1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-out.h"
25 #include "data/dataset.h"
26 #include "data/dictionary.h"
27 #include "data/mrset.h"
28 #include "data/subcase.h"
29 #include "data/value-labels.h"
30 #include "language/command.h"
31 #include "language/lexer/format-parser.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/hash-functions.h"
37 #include "libpspp/hmap.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "libpspp/string-array.h"
41 #include "math/mode.h"
42 #include "math/moments.h"
43 #include "math/percentiles.h"
44 #include "math/sort.h"
45 #include "output/pivot-table.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
51 #define _(msgid) gettext (msgid)
52 #define N_(msgid) (msgid)
56 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
57 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
58 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
59 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
63 - unweighted summaries (U*)
64 - lower confidence limits (*.LCL)
65 - upper confidence limits (*.UCL)
66 - standard error (*.SE)
69 /* All variables. */ \
70 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
71 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
72 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
73 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
74 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
94 /* Scale variables, totals, and subtotals. */ \
95 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
96 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
97 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
98 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
99 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
100 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
101 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
102 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
103 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
104 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
105 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
106 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
107 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
108 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
109 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
110 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
111 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
112 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
113 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
114 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
115 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
116 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
117 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
119 #if 0 /* Multiple response sets not yet implemented. */
120 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
121 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
122 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
123 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
124 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
125 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
126 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
127 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
128 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
129 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
130 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
131 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
132 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
133 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
134 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
135 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
136 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
137 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
138 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
139 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
140 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
141 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
144 enum ctables_summary_function
146 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
152 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
153 N_CTSF_FUNCTIONS = SUMMARIES
157 static bool ctables_summary_function_is_count (enum ctables_summary_function);
159 enum ctables_domain_type
161 /* Within a section, where stacked variables divide one section from
163 CTDT_TABLE, /* All layers of a whole section. */
164 CTDT_LAYER, /* One layer within a section. */
165 CTDT_LAYERROW, /* Row in one layer within a section. */
166 CTDT_LAYERCOL, /* Column in one layer within a section. */
168 /* Within a subtable, where a subtable pairs an innermost row variable with
169 an innermost column variable within a single layer. */
170 CTDT_SUBTABLE, /* Whole subtable. */
171 CTDT_ROW, /* Row within a subtable. */
172 CTDT_COL, /* Column within a subtable. */
176 struct ctables_domain
178 struct hmap_node node;
180 const struct ctables_cell *example;
182 double d_valid; /* Dictionary weight. */
185 double e_valid; /* Effective weight */
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The domains that contain this cell. */
204 uint32_t omit_domains;
205 struct ctables_domain *domains[N_CTDTS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
231 const struct dictionary *dict;
232 struct pivot_table_look *look;
234 /* CTABLES has a number of extra formats that we implement via custom
235 currency specifications on an alternate fmt_settings. */
236 #define CTEF_NEGPAREN FMT_CCA
237 #define CTEF_NEQUAL FMT_CCB
238 #define CTEF_PAREN FMT_CCC
239 #define CTEF_PCTPAREN FMT_CCD
240 struct fmt_settings ctables_formats;
242 /* If this is NULL, zeros are displayed using the normal print format.
243 Otherwise, this string is displayed. */
246 /* If this is NULL, missing values are displayed using the normal print
247 format. Otherwise, this string is displayed. */
250 /* Indexed by variable dictionary index. */
251 enum ctables_vlabel *vlabels;
253 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
255 bool mrsets_count_duplicates; /* MRSETS. */
256 bool smissing_listwise; /* SMISSING. */
257 struct variable *e_weight; /* WEIGHT. */
258 int hide_threshold; /* HIDESMALLCOUNTS. */
260 struct ctables_table **tables;
264 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
267 struct ctables_postcompute
269 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
270 char *name; /* Name, without leading &. */
272 struct msg_location *location; /* Location of definition. */
273 struct ctables_pcexpr *expr;
275 struct ctables_summary_spec_set *specs;
276 bool hide_source_cats;
279 struct ctables_pcexpr
289 enum ctables_postcompute_op
292 CTPO_CONSTANT, /* 5 */
293 CTPO_CAT_NUMBER, /* [5] */
294 CTPO_CAT_STRING, /* ["STRING"] */
295 CTPO_CAT_RANGE, /* [LO THRU 5] */
296 CTPO_CAT_MISSING, /* MISSING */
297 CTPO_CAT_OTHERNM, /* OTHERNM */
298 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
299 CTPO_CAT_TOTAL, /* TOTAL */
313 /* CTPO_CAT_NUMBER. */
316 /* CTPO_CAT_STRING. */
319 /* CTPO_CAT_RANGE. */
322 /* CTPO_CAT_SUBTOTAL. */
323 size_t subtotal_index;
325 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
326 One element: CTPO_NEG. */
327 struct ctables_pcexpr *subs[2];
330 /* Source location. */
331 struct msg_location *location;
334 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
335 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
336 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
337 struct ctables_pcexpr *sub1);
339 struct ctables_summary_spec_set
341 struct ctables_summary_spec *specs;
345 /* The variable to which the summary specs are applied. */
346 struct variable *var;
348 /* Whether the variable to which the summary specs are applied is a scale
349 variable for the purpose of summarization.
351 (VALIDN and TOTALN act differently for summarizing scale and categorical
356 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
357 const struct ctables_summary_spec_set *);
358 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
360 /* A nested sequence of variables, e.g. a > b > c. */
363 struct variable **vars;
366 size_t *domains[N_CTDTS];
367 size_t n_domains[N_CTDTS];
369 struct ctables_summary_spec_set specs[N_CSVS];
372 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
375 struct ctables_nest *nests;
381 struct hmap_node node;
386 struct ctables_occurrence
388 struct hmap_node node;
392 struct ctables_section
394 struct ctables_table *table;
395 struct ctables_nest *nests[PIVOT_N_AXES];
396 struct hmap *occurrences[PIVOT_N_AXES];
397 struct hmap cells; /* Contains "struct ctable_cell"s. */
398 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
403 struct ctables *ctables;
404 struct ctables_axis *axes[PIVOT_N_AXES];
405 struct ctables_stack stacks[PIVOT_N_AXES];
406 struct ctables_section *sections;
408 enum pivot_axis_type summary_axis;
409 struct ctables_summary_spec_set summary_specs;
411 const struct variable *clabels_example;
412 struct hmap clabels_values_map;
413 struct ctables_value **clabels_values;
414 size_t n_clabels_values;
416 enum pivot_axis_type slabels_axis;
417 bool slabels_visible;
419 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
421 Most commonly, label_axis[a] == a, and in particular we always have
422 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
424 If ROWLABELS or COLLABELS is specified, then one of
425 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
426 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
428 enum pivot_axis_type label_axis[PIVOT_N_AXES];
429 enum pivot_axis_type clabels_from_axis;
431 /* Indexed by variable dictionary index. */
432 struct ctables_categories **categories;
441 struct ctables_chisq *chisq;
442 struct ctables_pairwise *pairwise;
450 struct variable *var;
451 const struct mrset *mrset;
455 static const struct fmt_spec *
456 ctables_var_get_print_format (const struct ctables_var *var)
458 return (var->is_mrset
459 ? var_get_print_format (var->mrset->vars[0])
460 : var_get_print_format (var->var));
464 ctables_var_name (const struct ctables_var *var)
466 return var->is_mrset ? var->mrset->name : var_get_name (var->var);
469 struct ctables_categories
472 struct ctables_category *cats;
477 struct ctables_category
479 enum ctables_category_type
481 /* Explicit category lists. */
489 /* Totals and subtotals. */
493 /* Implicit category lists. */
498 /* For contributing to TOTALN. */
499 CCT_EXCLUDED_MISSING,
503 struct ctables_category *subtotal;
509 double number; /* CCT_NUMBER. */
510 char *string; /* CCT_STRING. */
511 double range[2]; /* CCT_RANGE. */
515 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
516 bool hide_subcategories; /* CCT_SUBTOTAL. */
519 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
521 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
524 bool include_missing;
528 enum ctables_summary_function sort_function;
529 struct variable *sort_var;
534 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
535 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
536 struct msg_location *location;
540 ctables_category_uninit (struct ctables_category *cat)
551 case CCT_POSTCOMPUTE:
560 free (cat->total_label);
568 case CCT_EXCLUDED_MISSING:
574 ctables_category_equal (const struct ctables_category *a,
575 const struct ctables_category *b)
577 if (a->type != b->type)
583 return a->number == b->number;
586 return strcmp (a->string, b->string);
589 return a->range[0] == b->range[0] && a->range[1] == b->range[1];
595 case CCT_POSTCOMPUTE:
596 return a->pc == b->pc;
600 return !strcmp (a->total_label, b->total_label);
605 return (a->include_missing == b->include_missing
606 && a->sort_ascending == b->sort_ascending
607 && a->sort_function == b->sort_function
608 && a->sort_var == b->sort_var
609 && a->percentile == b->percentile);
611 case CCT_EXCLUDED_MISSING:
619 ctables_categories_unref (struct ctables_categories *c)
624 assert (c->n_refs > 0);
628 for (size_t i = 0; i < c->n_cats; i++)
629 ctables_category_uninit (&c->cats[i]);
635 ctables_categories_equal (const struct ctables_categories *a,
636 const struct ctables_categories *b)
638 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
641 for (size_t i = 0; i < a->n_cats; i++)
642 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
648 /* Chi-square test (SIGTEST). */
656 /* Pairwise comparison test (COMPARETEST). */
657 struct ctables_pairwise
659 enum { PROP, MEAN } type;
662 bool meansvariance_allcats;
664 enum { BONFERRONI = 1, BH } adjust;
688 struct ctables_var var;
690 struct ctables_summary_spec_set specs[N_CSVS];
694 struct ctables_axis *subs[2];
697 struct msg_location *loc;
700 static void ctables_axis_destroy (struct ctables_axis *);
709 enum ctables_function_availability
711 CTFA_ALL, /* Any variables. */
712 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
713 CTFA_MRSETS, /* Only multiple-response sets */
716 struct ctables_summary_spec
718 enum ctables_summary_function function;
719 double percentile; /* CTSF_PTILE only. */
722 struct fmt_spec format;
723 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
729 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
730 const struct ctables_summary_spec *src)
733 dst->label = xstrdup (src->label);
737 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
744 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
745 const struct ctables_summary_spec_set *src)
747 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
748 for (size_t i = 0; i < src->n; i++)
749 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
751 *dst = (struct ctables_summary_spec_set) {
756 .is_scale = src->is_scale,
761 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
763 for (size_t i = 0; i < set->n; i++)
764 ctables_summary_spec_uninit (&set->specs[i]);
769 parse_col_width (struct lexer *lexer, const char *name, double *width)
771 lex_match (lexer, T_EQUALS);
772 if (lex_match_id (lexer, "DEFAULT"))
774 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
776 *width = lex_number (lexer);
786 parse_bool (struct lexer *lexer, bool *b)
788 if (lex_match_id (lexer, "NO"))
790 else if (lex_match_id (lexer, "YES"))
794 lex_error_expecting (lexer, "YES", "NO");
800 static enum ctables_function_availability
801 ctables_function_availability (enum ctables_summary_function f)
803 static enum ctables_function_availability availability[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
809 return availability[f];
813 ctables_summary_function_is_count (enum ctables_summary_function f)
819 case CTSF_ROWPCT_COUNT:
820 case CTSF_COLPCT_COUNT:
821 case CTSF_TABLEPCT_COUNT:
822 case CTSF_SUBTABLEPCT_COUNT:
823 case CTSF_LAYERPCT_COUNT:
824 case CTSF_LAYERROWPCT_COUNT:
825 case CTSF_LAYERCOLPCT_COUNT:
828 case CTSF_ROWPCT_VALIDN:
829 case CTSF_COLPCT_VALIDN:
830 case CTSF_TABLEPCT_VALIDN:
831 case CTSF_SUBTABLEPCT_VALIDN:
832 case CTSF_LAYERPCT_VALIDN:
833 case CTSF_LAYERROWPCT_VALIDN:
834 case CTSF_LAYERCOLPCT_VALIDN:
835 case CTSF_ROWPCT_TOTALN:
836 case CTSF_COLPCT_TOTALN:
837 case CTSF_TABLEPCT_TOTALN:
838 case CTSF_SUBTABLEPCT_TOTALN:
839 case CTSF_LAYERPCT_TOTALN:
840 case CTSF_LAYERROWPCT_TOTALN:
841 case CTSF_LAYERCOLPCT_TOTALN:
858 case CTSF_ROWPCT_SUM:
859 case CTSF_COLPCT_SUM:
860 case CTSF_TABLEPCT_SUM:
861 case CTSF_SUBTABLEPCT_SUM:
862 case CTSF_LAYERPCT_SUM:
863 case CTSF_LAYERROWPCT_SUM:
864 case CTSF_LAYERCOLPCT_SUM:
872 parse_ctables_summary_function (struct lexer *lexer,
873 enum ctables_summary_function *f)
877 enum ctables_summary_function function;
878 struct substring name;
880 static struct pair names[] = {
881 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
882 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
885 /* The .COUNT suffix may be omitted. */
886 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
887 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
888 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
889 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
890 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
891 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
892 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
896 if (!lex_force_id (lexer))
899 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
900 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
902 *f = names[i].function;
907 lex_error (lexer, _("Expecting summary function name."));
912 ctables_axis_destroy (struct ctables_axis *axis)
920 for (size_t i = 0; i < N_CSVS; i++)
921 ctables_summary_spec_set_uninit (&axis->specs[i]);
926 ctables_axis_destroy (axis->subs[0]);
927 ctables_axis_destroy (axis->subs[1]);
930 msg_location_destroy (axis->loc);
934 static struct ctables_axis *
935 ctables_axis_new_nonterminal (enum ctables_axis_op op,
936 struct ctables_axis *sub0,
937 struct ctables_axis *sub1,
938 struct lexer *lexer, int start_ofs)
940 struct ctables_axis *axis = xmalloc (sizeof *axis);
941 *axis = (struct ctables_axis) {
943 .subs = { sub0, sub1 },
944 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
949 struct ctables_axis_parse_ctx
952 struct dictionary *dict;
954 struct ctables_table *t;
957 static struct fmt_spec
958 ctables_summary_default_format (enum ctables_summary_function function,
959 const struct ctables_var *var)
961 static const enum ctables_format default_formats[] = {
962 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
966 switch (default_formats[function])
969 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
972 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
975 return *ctables_var_get_print_format (var);
983 ctables_summary_default_label (enum ctables_summary_function function,
986 static const char *default_labels[] = {
987 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
992 return (function == CTSF_PTILE
993 ? xasprintf (_("Percentile %.2f"), percentile)
994 : xstrdup (gettext (default_labels[function])));
998 ctables_summary_function_name (enum ctables_summary_function function)
1000 static const char *names[] = {
1001 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1005 return names[function];
1009 add_summary_spec (struct ctables_axis *axis,
1010 enum ctables_summary_function function, double percentile,
1011 const char *label, const struct fmt_spec *format,
1012 bool is_ctables_format, const struct msg_location *loc,
1013 enum ctables_summary_variant sv)
1015 if (axis->op == CTAO_VAR)
1017 const char *function_name = ctables_summary_function_name (function);
1018 const char *var_name = ctables_var_name (&axis->var);
1019 switch (ctables_function_availability (function))
1022 if (!axis->var.is_mrset)
1024 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1025 "response sets."), function_name);
1026 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1037 _("Summary function %s applies only to scale variables."),
1039 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1050 struct ctables_summary_spec_set *set = &axis->specs[sv];
1051 if (set->n >= set->allocated)
1052 set->specs = x2nrealloc (set->specs, &set->allocated,
1053 sizeof *set->specs);
1055 struct ctables_summary_spec *dst = &set->specs[set->n++];
1056 *dst = (struct ctables_summary_spec) {
1057 .function = function,
1058 .percentile = percentile,
1059 .label = xstrdup (label),
1060 .format = (format ? *format
1061 : ctables_summary_default_format (function, &axis->var)),
1062 .is_ctables_format = is_ctables_format,
1068 for (size_t i = 0; i < 2; i++)
1069 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1070 format, is_ctables_format, loc, sv))
1076 static struct ctables_axis *ctables_axis_parse_stack (
1077 struct ctables_axis_parse_ctx *);
1080 ctables_var_parse (struct lexer *lexer, struct dictionary *dict,
1081 struct ctables_var *var)
1083 if (ss_starts_with (lex_tokss (lexer), ss_cstr ("$")))
1085 *var = (struct ctables_var) {
1087 .mrset = dict_lookup_mrset (dict, lex_tokcstr (lexer))
1091 lex_error (lexer, _("'%s' does not name a multiple-response set "
1092 "in the active file dictionary."),
1093 lex_tokcstr (lexer));
1101 *var = (struct ctables_var) {
1103 .var = parse_variable (lexer, dict),
1105 return var->var != NULL;
1109 static struct ctables_axis *
1110 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1112 if (lex_match (ctx->lexer, T_LPAREN))
1114 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1115 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1117 ctables_axis_destroy (sub);
1123 if (!lex_force_id (ctx->lexer))
1126 int start_ofs = lex_ofs (ctx->lexer);
1127 struct ctables_var var;
1128 if (!ctables_var_parse (ctx->lexer, ctx->dict, &var))
1131 struct ctables_axis *axis = xmalloc (sizeof *axis);
1132 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1134 /* XXX should figure out default measures by reading data */
1135 axis->scale = (var.is_mrset ? false
1136 : lex_match_phrase (ctx->lexer, "[S]") ? true
1137 : lex_match_phrase (ctx->lexer, "[C]") ? false
1138 : var_get_measure (var.var) == MEASURE_SCALE);
1139 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1140 lex_ofs (ctx->lexer) - 1);
1145 has_digit (const char *s)
1147 return s[strcspn (s, "0123456789")] != '\0';
1151 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1152 bool *is_ctables_format)
1154 char type[FMT_TYPE_LEN_MAX + 1];
1155 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1158 if (!strcasecmp (type, "NEGPAREN"))
1159 format->type = CTEF_NEGPAREN;
1160 else if (!strcasecmp (type, "NEQUAL"))
1161 format->type = CTEF_NEQUAL;
1162 else if (!strcasecmp (type, "PAREN"))
1163 format->type = CTEF_PAREN;
1164 else if (!strcasecmp (type, "PCTPAREN"))
1165 format->type = CTEF_PCTPAREN;
1168 *is_ctables_format = false;
1169 return (parse_format_specifier (lexer, format)
1170 && fmt_check_output (format)
1171 && fmt_check_type_compat (format, VAL_NUMERIC));
1176 msg (SE, _("Output format %s requires width 2 or greater."), type);
1179 else if (format->d > format->w - 1)
1181 msg (SE, _("Output format %s requires width greater than decimals."),
1187 *is_ctables_format = true;
1192 static struct ctables_axis *
1193 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1195 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1196 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1199 enum ctables_summary_variant sv = CSV_CELL;
1202 int start_ofs = lex_ofs (ctx->lexer);
1204 /* Parse function. */
1205 enum ctables_summary_function function;
1206 if (!parse_ctables_summary_function (ctx->lexer, &function))
1209 /* Parse percentile. */
1210 double percentile = 0;
1211 if (function == CTSF_PTILE)
1213 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1215 percentile = lex_number (ctx->lexer);
1216 lex_get (ctx->lexer);
1221 if (lex_is_string (ctx->lexer))
1223 label = ss_xstrdup (lex_tokss (ctx->lexer));
1224 lex_get (ctx->lexer);
1227 label = ctables_summary_default_label (function, percentile);
1230 struct fmt_spec format;
1231 const struct fmt_spec *formatp;
1232 bool is_ctables_format = false;
1233 if (lex_token (ctx->lexer) == T_ID
1234 && has_digit (lex_tokcstr (ctx->lexer)))
1236 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1237 &is_ctables_format))
1247 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1248 lex_ofs (ctx->lexer) - 1);
1249 add_summary_spec (sub, function, percentile, label, formatp,
1250 is_ctables_format, loc, sv);
1252 msg_location_destroy (loc);
1254 lex_match (ctx->lexer, T_COMMA);
1255 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1257 if (!lex_force_match (ctx->lexer, T_LBRACK))
1261 else if (lex_match (ctx->lexer, T_RBRACK))
1263 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1270 ctables_axis_destroy (sub);
1274 static const struct ctables_axis *
1275 find_scale (const struct ctables_axis *axis)
1279 else if (axis->op == CTAO_VAR)
1283 assert (!axis->var.is_mrset);
1291 for (size_t i = 0; i < 2; i++)
1293 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1301 static const struct ctables_axis *
1302 find_categorical_summary_spec (const struct ctables_axis *axis)
1306 else if (axis->op == CTAO_VAR)
1307 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1310 for (size_t i = 0; i < 2; i++)
1312 const struct ctables_axis *sum
1313 = find_categorical_summary_spec (axis->subs[i]);
1321 static struct ctables_axis *
1322 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1324 int start_ofs = lex_ofs (ctx->lexer);
1325 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1329 while (lex_match (ctx->lexer, T_GT))
1331 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1335 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1336 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1338 const struct ctables_axis *outer_scale = find_scale (lhs);
1339 const struct ctables_axis *inner_scale = find_scale (rhs);
1340 if (outer_scale && inner_scale)
1342 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1343 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1344 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1345 ctables_axis_destroy (nest);
1349 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1352 msg_at (SE, nest->loc,
1353 _("Summaries may only be requested for categorical variables "
1354 "at the innermost nesting level."));
1355 msg_at (SN, outer_sum->loc,
1356 _("This outer categorical variable has a summary."));
1357 ctables_axis_destroy (nest);
1367 static struct ctables_axis *
1368 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1370 int start_ofs = lex_ofs (ctx->lexer);
1371 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1375 while (lex_match (ctx->lexer, T_PLUS))
1377 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1381 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1382 ctx->lexer, start_ofs);
1389 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1390 struct ctables *ct, struct ctables_table *t,
1391 enum pivot_axis_type a)
1393 if (lex_token (lexer) == T_BY
1394 || lex_token (lexer) == T_SLASH
1395 || lex_token (lexer) == T_ENDCMD)
1398 struct ctables_axis_parse_ctx ctx = {
1404 t->axes[a] = ctables_axis_parse_stack (&ctx);
1405 return t->axes[a] != NULL;
1409 ctables_chisq_destroy (struct ctables_chisq *chisq)
1415 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1421 ctables_table_destroy (struct ctables_table *t)
1426 for (size_t i = 0; i < t->n_categories; i++)
1427 ctables_categories_unref (t->categories[i]);
1428 free (t->categories);
1430 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1431 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1432 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1436 ctables_chisq_destroy (t->chisq);
1437 ctables_pairwise_destroy (t->pairwise);
1442 ctables_destroy (struct ctables *ct)
1447 pivot_table_look_unref (ct->look);
1451 for (size_t i = 0; i < ct->n_tables; i++)
1452 ctables_table_destroy (ct->tables[i]);
1457 static struct ctables_category
1458 cct_range (double low, double high)
1460 return (struct ctables_category) {
1462 .range = { low, high }
1467 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1468 struct ctables_category *cat)
1471 if (lex_match (lexer, T_EQUALS))
1473 if (!lex_force_string (lexer))
1476 total_label = ss_xstrdup (lex_tokss (lexer));
1480 total_label = xstrdup (_("Subtotal"));
1482 *cat = (struct ctables_category) {
1483 .type = CCT_SUBTOTAL,
1484 .hide_subcategories = hide_subcategories,
1485 .total_label = total_label
1491 ctables_table_parse_explicit_category (struct lexer *lexer, struct ctables *ct,
1492 struct ctables_category *cat)
1494 if (lex_match_id (lexer, "OTHERNM"))
1495 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1496 else if (lex_match_id (lexer, "MISSING"))
1497 *cat = (struct ctables_category) { .type = CCT_MISSING };
1498 else if (lex_match_id (lexer, "SUBTOTAL"))
1499 return ctables_table_parse_subtotal (lexer, false, cat);
1500 else if (lex_match_id (lexer, "HSUBTOTAL"))
1501 return ctables_table_parse_subtotal (lexer, true, cat);
1502 else if (lex_match_id (lexer, "LO"))
1504 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
1506 *cat = cct_range (-DBL_MAX, lex_number (lexer));
1509 else if (lex_is_number (lexer))
1511 double number = lex_number (lexer);
1513 if (lex_match_id (lexer, "THRU"))
1515 if (lex_match_id (lexer, "HI"))
1516 *cat = cct_range (number, DBL_MAX);
1519 if (!lex_force_num (lexer))
1521 *cat = cct_range (number, lex_number (lexer));
1526 *cat = (struct ctables_category) {
1531 else if (lex_is_string (lexer))
1533 *cat = (struct ctables_category) {
1535 .string = ss_xstrdup (lex_tokss (lexer)),
1539 else if (lex_match (lexer, T_AND))
1541 if (!lex_force_id (lexer))
1543 struct ctables_postcompute *pc = ctables_find_postcompute (
1544 ct, lex_tokcstr (lexer));
1547 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1548 msg_at (SE, loc, _("Unknown postcompute &%s."),
1549 lex_tokcstr (lexer));
1550 msg_location_destroy (loc);
1555 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1559 lex_error (lexer, NULL);
1566 static struct ctables_category *
1567 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1568 const struct ctables_pcexpr *e)
1570 struct ctables_category *best = NULL;
1571 size_t n_subtotals = 0;
1572 for (size_t i = 0; i < cats->n_cats; i++)
1574 struct ctables_category *cat = &cats->cats[i];
1577 case CTPO_CAT_NUMBER:
1578 if (cat->type == CCT_NUMBER && cat->number == e->number)
1582 case CTPO_CAT_STRING:
1583 if (cat->type == CCT_STRING && !strcmp (cat->string, e->string))
1587 case CTPO_CAT_RANGE:
1588 if (cat->type == CCT_RANGE
1589 && cat->range[0] == e->range[0]
1590 && cat->range[1] == e->range[1])
1594 case CTPO_CAT_MISSING:
1595 if (cat->type == CCT_MISSING)
1599 case CTPO_CAT_OTHERNM:
1600 if (cat->type == CCT_OTHERNM)
1604 case CTPO_CAT_SUBTOTAL:
1605 if (cat->type == CCT_SUBTOTAL)
1608 if (e->subtotal_index == n_subtotals)
1610 else if (e->subtotal_index == 0)
1615 case CTPO_CAT_TOTAL:
1616 if (cat->type == CCT_TOTAL)
1630 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1636 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1637 struct ctables_category *pc_cat,
1638 const struct ctables_categories *cats,
1639 const struct msg_location *cats_location)
1643 case CTPO_CAT_NUMBER:
1644 case CTPO_CAT_STRING:
1645 case CTPO_CAT_RANGE:
1646 case CTPO_CAT_MISSING:
1647 case CTPO_CAT_OTHERNM:
1648 case CTPO_CAT_SUBTOTAL:
1649 case CTPO_CAT_TOTAL:
1651 struct ctables_category *cat = ctables_find_category_for_postcompute (
1655 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1657 size_t n_subtotals = 0;
1658 for (size_t i = 0; i < cats->n_cats; i++)
1659 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1660 if (n_subtotals > 1)
1662 msg_at (SE, cats_location,
1663 ngettext ("These categories include %zu instance "
1664 "of SUBTOTAL or HSUBTOTAL, so references "
1665 "from computed categories must refer to "
1666 "subtotals by position.",
1667 "These categories include %zu instances "
1668 "of SUBTOTAL or HSUBTOTAL, so references "
1669 "from computed categories must refer to "
1670 "subtotals by position.",
1673 msg_at (SN, e->location,
1674 _("This is the reference that lacks a position."));
1679 msg_at (SE, pc_cat->location,
1680 _("Computed category &%s references a category not included "
1681 "in the category list."),
1683 msg_at (SN, e->location, _("This is the missing category."));
1684 msg_at (SN, cats_location,
1685 _("To fix the problem, add the missing category to the "
1686 "list of categories here."));
1689 if (pc_cat->pc->hide_source_cats)
1703 for (size_t i = 0; i < 2; i++)
1704 if (e->subs[i] && !ctables_recursive_check_postcompute (
1705 e->subs[i], pc_cat, cats, cats_location))
1715 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1716 struct ctables *ct, struct ctables_table *t)
1718 if (!lex_match_id (lexer, "VARIABLES"))
1720 lex_match (lexer, T_EQUALS);
1722 struct variable **vars;
1724 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1727 struct ctables_categories *c = xmalloc (sizeof *c);
1728 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1729 for (size_t i = 0; i < n_vars; i++)
1731 struct ctables_categories **cp
1732 = &t->categories[var_get_dict_index (vars[i])];
1733 ctables_categories_unref (*cp);
1738 size_t allocated_cats = 0;
1739 if (lex_match (lexer, T_LBRACK))
1741 int cats_start_ofs = lex_ofs (lexer);
1744 if (c->n_cats >= allocated_cats)
1745 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1747 int start_ofs = lex_ofs (lexer);
1748 struct ctables_category *cat = &c->cats[c->n_cats];
1749 if (!ctables_table_parse_explicit_category (lexer, ct, cat))
1751 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1754 lex_match (lexer, T_COMMA);
1756 while (!lex_match (lexer, T_RBRACK));
1758 struct msg_location *cats_location
1759 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1760 for (size_t i = 0; i < c->n_cats; i++)
1762 struct ctables_category *cat = &c->cats[i];
1763 if (cat->type == CCT_POSTCOMPUTE
1764 && !ctables_recursive_check_postcompute (cat->pc->expr, cat,
1770 struct ctables_category cat = {
1772 .include_missing = false,
1773 .sort_ascending = true,
1775 bool show_totals = false;
1776 char *total_label = NULL;
1777 bool totals_before = false;
1778 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1780 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1782 lex_match (lexer, T_EQUALS);
1783 if (lex_match_id (lexer, "A"))
1784 cat.sort_ascending = true;
1785 else if (lex_match_id (lexer, "D"))
1786 cat.sort_ascending = false;
1789 lex_error_expecting (lexer, "A", "D");
1793 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1795 lex_match (lexer, T_EQUALS);
1796 if (lex_match_id (lexer, "VALUE"))
1797 cat.type = CCT_VALUE;
1798 else if (lex_match_id (lexer, "LABEL"))
1799 cat.type = CCT_LABEL;
1802 cat.type = CCT_FUNCTION;
1803 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1806 if (lex_match (lexer, T_LPAREN))
1808 cat.sort_var = parse_variable (lexer, dict);
1812 if (cat.sort_function == CTSF_PTILE)
1814 lex_match (lexer, T_COMMA);
1815 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1817 cat.percentile = lex_number (lexer);
1821 if (!lex_force_match (lexer, T_RPAREN))
1824 else if (ctables_function_availability (cat.sort_function)
1827 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1832 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1834 lex_match (lexer, T_EQUALS);
1835 if (lex_match_id (lexer, "INCLUDE"))
1836 cat.include_missing = true;
1837 else if (lex_match_id (lexer, "EXCLUDE"))
1838 cat.include_missing = false;
1841 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1845 else if (lex_match_id (lexer, "TOTAL"))
1847 lex_match (lexer, T_EQUALS);
1848 if (!parse_bool (lexer, &show_totals))
1851 else if (lex_match_id (lexer, "LABEL"))
1853 lex_match (lexer, T_EQUALS);
1854 if (!lex_force_string (lexer))
1857 total_label = ss_xstrdup (lex_tokss (lexer));
1860 else if (lex_match_id (lexer, "POSITION"))
1862 lex_match (lexer, T_EQUALS);
1863 if (lex_match_id (lexer, "BEFORE"))
1864 totals_before = true;
1865 else if (lex_match_id (lexer, "AFTER"))
1866 totals_before = false;
1869 lex_error_expecting (lexer, "BEFORE", "AFTER");
1873 else if (lex_match_id (lexer, "EMPTY"))
1875 lex_match (lexer, T_EQUALS);
1876 if (lex_match_id (lexer, "INCLUDE"))
1877 c->show_empty = true;
1878 else if (lex_match_id (lexer, "EXCLUDE"))
1879 c->show_empty = false;
1882 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1889 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
1890 "TOTAL", "LABEL", "POSITION", "EMPTY");
1892 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
1899 if (c->n_cats >= allocated_cats)
1900 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1901 c->cats[c->n_cats++] = cat;
1906 if (c->n_cats >= allocated_cats)
1907 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1909 struct ctables_category *totals;
1912 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
1913 totals = &c->cats[0];
1916 totals = &c->cats[c->n_cats];
1919 *totals = (struct ctables_category) {
1921 .total_label = total_label ? total_label : xstrdup (_("Total")),
1925 struct ctables_category *subtotal = NULL;
1926 for (size_t i = totals_before ? 0 : c->n_cats;
1927 totals_before ? i < c->n_cats : i-- > 0;
1928 totals_before ? i++ : 0)
1930 struct ctables_category *cat = &c->cats[i];
1938 cat->subtotal = subtotal;
1941 case CCT_POSTCOMPUTE:
1952 case CCT_EXCLUDED_MISSING:
1961 ctables_nest_uninit (struct ctables_nest *nest)
1968 ctables_stack_uninit (struct ctables_stack *stack)
1972 for (size_t i = 0; i < stack->n; i++)
1973 ctables_nest_uninit (&stack->nests[i]);
1974 free (stack->nests);
1978 static struct ctables_stack
1979 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
1986 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
1987 for (size_t i = 0; i < s0.n; i++)
1988 for (size_t j = 0; j < s1.n; j++)
1990 const struct ctables_nest *a = &s0.nests[i];
1991 const struct ctables_nest *b = &s1.nests[j];
1993 size_t allocate = a->n + b->n;
1994 struct variable **vars = xnmalloc (allocate, sizeof *vars);
1995 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
1997 for (size_t k = 0; k < a->n; k++)
1998 vars[n++] = a->vars[k];
1999 for (size_t k = 0; k < b->n; k++)
2000 vars[n++] = b->vars[k];
2001 assert (n == allocate);
2003 const struct ctables_nest *summary_src;
2004 if (!a->specs[CSV_CELL].var)
2006 else if (!b->specs[CSV_CELL].var)
2011 struct ctables_nest *new = &stack.nests[stack.n++];
2012 *new = (struct ctables_nest) {
2014 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2015 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2019 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2020 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2022 ctables_stack_uninit (&s0);
2023 ctables_stack_uninit (&s1);
2027 static struct ctables_stack
2028 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2030 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2031 for (size_t i = 0; i < s0.n; i++)
2032 stack.nests[stack.n++] = s0.nests[i];
2033 for (size_t i = 0; i < s1.n; i++)
2034 stack.nests[stack.n++] = s1.nests[i];
2035 assert (stack.n == s0.n + s1.n);
2041 static struct ctables_stack
2042 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2045 return (struct ctables_stack) { .n = 0 };
2050 assert (!a->var.is_mrset);
2052 struct variable **vars = xmalloc (sizeof *vars);
2055 struct ctables_nest *nest = xmalloc (sizeof *nest);
2056 *nest = (struct ctables_nest) {
2059 .scale_idx = a->scale ? 0 : SIZE_MAX,
2061 if (a->specs[CSV_CELL].n || a->scale)
2062 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2064 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2065 nest->specs[sv].var = a->var.var;
2066 nest->specs[sv].is_scale = a->scale;
2068 return (struct ctables_stack) { .nests = nest, .n = 1 };
2071 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2072 enumerate_fts (axis_type, a->subs[1]));
2075 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2076 enumerate_fts (axis_type, a->subs[1]));
2082 union ctables_summary
2084 /* COUNT, VALIDN, TOTALN. */
2087 /* MINIMUM, MAXIMUM, RANGE. */
2094 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2095 struct moments1 *moments;
2097 /* MEDIAN, MODE, PTILE. */
2100 struct casewriter *writer;
2105 /* XXX multiple response */
2109 ctables_summary_init (union ctables_summary *s,
2110 const struct ctables_summary_spec *ss)
2112 switch (ss->function)
2116 case CTSF_ROWPCT_COUNT:
2117 case CTSF_COLPCT_COUNT:
2118 case CTSF_TABLEPCT_COUNT:
2119 case CTSF_SUBTABLEPCT_COUNT:
2120 case CTSF_LAYERPCT_COUNT:
2121 case CTSF_LAYERROWPCT_COUNT:
2122 case CTSF_LAYERCOLPCT_COUNT:
2123 case CTSF_ROWPCT_VALIDN:
2124 case CTSF_COLPCT_VALIDN:
2125 case CTSF_TABLEPCT_VALIDN:
2126 case CTSF_SUBTABLEPCT_VALIDN:
2127 case CTSF_LAYERPCT_VALIDN:
2128 case CTSF_LAYERROWPCT_VALIDN:
2129 case CTSF_LAYERCOLPCT_VALIDN:
2130 case CTSF_ROWPCT_TOTALN:
2131 case CTSF_COLPCT_TOTALN:
2132 case CTSF_TABLEPCT_TOTALN:
2133 case CTSF_SUBTABLEPCT_TOTALN:
2134 case CTSF_LAYERPCT_TOTALN:
2135 case CTSF_LAYERROWPCT_TOTALN:
2136 case CTSF_LAYERCOLPCT_TOTALN:
2148 s->min = s->max = SYSMIS;
2156 case CTSF_ROWPCT_SUM:
2157 case CTSF_COLPCT_SUM:
2158 case CTSF_TABLEPCT_SUM:
2159 case CTSF_SUBTABLEPCT_SUM:
2160 case CTSF_LAYERPCT_SUM:
2161 case CTSF_LAYERROWPCT_SUM:
2162 case CTSF_LAYERCOLPCT_SUM:
2163 s->moments = moments1_create (MOMENT_VARIANCE);
2170 struct caseproto *proto = caseproto_create ();
2171 proto = caseproto_add_width (proto, 0);
2172 proto = caseproto_add_width (proto, 0);
2174 struct subcase ordering;
2175 subcase_init (&ordering, 0, 0, SC_ASCEND);
2176 s->writer = sort_create_writer (&ordering, proto);
2177 subcase_uninit (&ordering);
2178 caseproto_unref (proto);
2188 ctables_summary_uninit (union ctables_summary *s,
2189 const struct ctables_summary_spec *ss)
2191 switch (ss->function)
2195 case CTSF_ROWPCT_COUNT:
2196 case CTSF_COLPCT_COUNT:
2197 case CTSF_TABLEPCT_COUNT:
2198 case CTSF_SUBTABLEPCT_COUNT:
2199 case CTSF_LAYERPCT_COUNT:
2200 case CTSF_LAYERROWPCT_COUNT:
2201 case CTSF_LAYERCOLPCT_COUNT:
2202 case CTSF_ROWPCT_VALIDN:
2203 case CTSF_COLPCT_VALIDN:
2204 case CTSF_TABLEPCT_VALIDN:
2205 case CTSF_SUBTABLEPCT_VALIDN:
2206 case CTSF_LAYERPCT_VALIDN:
2207 case CTSF_LAYERROWPCT_VALIDN:
2208 case CTSF_LAYERCOLPCT_VALIDN:
2209 case CTSF_ROWPCT_TOTALN:
2210 case CTSF_COLPCT_TOTALN:
2211 case CTSF_TABLEPCT_TOTALN:
2212 case CTSF_SUBTABLEPCT_TOTALN:
2213 case CTSF_LAYERPCT_TOTALN:
2214 case CTSF_LAYERROWPCT_TOTALN:
2215 case CTSF_LAYERCOLPCT_TOTALN:
2233 case CTSF_ROWPCT_SUM:
2234 case CTSF_COLPCT_SUM:
2235 case CTSF_TABLEPCT_SUM:
2236 case CTSF_SUBTABLEPCT_SUM:
2237 case CTSF_LAYERPCT_SUM:
2238 case CTSF_LAYERROWPCT_SUM:
2239 case CTSF_LAYERCOLPCT_SUM:
2240 moments1_destroy (s->moments);
2246 casewriter_destroy (s->writer);
2252 ctables_summary_add (union ctables_summary *s,
2253 const struct ctables_summary_spec *ss,
2254 const struct variable *var, const union value *value,
2255 bool is_scale, bool is_missing, bool excluded_missing,
2256 double d_weight, double e_weight)
2258 /* To determine whether a case is included in a given table for a particular
2259 kind of summary, consider the following charts for each variable in the
2260 table. Only if "yes" appears for every variable for the summary is the
2263 Categorical variables: VALIDN COUNT TOTALN
2264 Valid values in included categories yes yes yes
2265 Missing values in included categories --- yes yes
2266 Missing values in excluded categories --- --- yes
2267 Valid values in excluded categories --- --- ---
2269 Scale variables: VALIDN COUNT TOTALN
2270 Valid value yes yes yes
2271 Missing value --- yes yes
2273 Missing values include both user- and system-missing. (The system-missing
2274 value is always in an excluded category.)
2276 switch (ss->function)
2279 case CTSF_ROWPCT_TOTALN:
2280 case CTSF_COLPCT_TOTALN:
2281 case CTSF_TABLEPCT_TOTALN:
2282 case CTSF_SUBTABLEPCT_TOTALN:
2283 case CTSF_LAYERPCT_TOTALN:
2284 case CTSF_LAYERROWPCT_TOTALN:
2285 case CTSF_LAYERCOLPCT_TOTALN:
2286 s->count += d_weight;
2290 case CTSF_ROWPCT_COUNT:
2291 case CTSF_COLPCT_COUNT:
2292 case CTSF_TABLEPCT_COUNT:
2293 case CTSF_SUBTABLEPCT_COUNT:
2294 case CTSF_LAYERPCT_COUNT:
2295 case CTSF_LAYERROWPCT_COUNT:
2296 case CTSF_LAYERCOLPCT_COUNT:
2297 if (is_scale || !excluded_missing)
2298 s->count += d_weight;
2302 case CTSF_ROWPCT_VALIDN:
2303 case CTSF_COLPCT_VALIDN:
2304 case CTSF_TABLEPCT_VALIDN:
2305 case CTSF_SUBTABLEPCT_VALIDN:
2306 case CTSF_LAYERPCT_VALIDN:
2307 case CTSF_LAYERROWPCT_VALIDN:
2308 case CTSF_LAYERCOLPCT_VALIDN:
2310 ? !var_is_value_missing (var, value)
2312 s->count += d_weight;
2317 s->count += d_weight;
2321 if (is_scale || !excluded_missing)
2322 s->count += e_weight;
2327 ? !var_is_value_missing (var, value)
2329 s->count += e_weight;
2333 s->count += e_weight;
2339 if (!var_is_value_missing (var, value))
2341 assert (!var_is_alpha (var)); /* XXX? */
2342 if (s->min == SYSMIS || value->f < s->min)
2344 if (s->max == SYSMIS || value->f > s->max)
2354 case CTSF_ROWPCT_SUM:
2355 case CTSF_COLPCT_SUM:
2356 case CTSF_TABLEPCT_SUM:
2357 case CTSF_SUBTABLEPCT_SUM:
2358 case CTSF_LAYERPCT_SUM:
2359 case CTSF_LAYERROWPCT_SUM:
2360 case CTSF_LAYERCOLPCT_SUM:
2361 if (!var_is_value_missing (var, value))
2362 moments1_add (s->moments, value->f, e_weight);
2368 if (var_is_value_missing (var, value))
2370 s->ovalid += e_weight;
2372 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2373 *case_num_rw_idx (c, 0) = value->f;
2374 *case_num_rw_idx (c, 1) = e_weight;
2375 casewriter_write (s->writer, c);
2381 static enum ctables_domain_type
2382 ctables_function_domain (enum ctables_summary_function function)
2406 case CTSF_COLPCT_COUNT:
2407 case CTSF_COLPCT_SUM:
2408 case CTSF_COLPCT_TOTALN:
2409 case CTSF_COLPCT_VALIDN:
2412 case CTSF_LAYERCOLPCT_COUNT:
2413 case CTSF_LAYERCOLPCT_SUM:
2414 case CTSF_LAYERCOLPCT_TOTALN:
2415 case CTSF_LAYERCOLPCT_VALIDN:
2416 return CTDT_LAYERCOL;
2418 case CTSF_LAYERPCT_COUNT:
2419 case CTSF_LAYERPCT_SUM:
2420 case CTSF_LAYERPCT_TOTALN:
2421 case CTSF_LAYERPCT_VALIDN:
2424 case CTSF_LAYERROWPCT_COUNT:
2425 case CTSF_LAYERROWPCT_SUM:
2426 case CTSF_LAYERROWPCT_TOTALN:
2427 case CTSF_LAYERROWPCT_VALIDN:
2428 return CTDT_LAYERROW;
2430 case CTSF_ROWPCT_COUNT:
2431 case CTSF_ROWPCT_SUM:
2432 case CTSF_ROWPCT_TOTALN:
2433 case CTSF_ROWPCT_VALIDN:
2436 case CTSF_SUBTABLEPCT_COUNT:
2437 case CTSF_SUBTABLEPCT_SUM:
2438 case CTSF_SUBTABLEPCT_TOTALN:
2439 case CTSF_SUBTABLEPCT_VALIDN:
2440 return CTDT_SUBTABLE;
2442 case CTSF_TABLEPCT_COUNT:
2443 case CTSF_TABLEPCT_SUM:
2444 case CTSF_TABLEPCT_TOTALN:
2445 case CTSF_TABLEPCT_VALIDN:
2453 ctables_summary_value (const struct ctables_cell *cell,
2454 union ctables_summary *s,
2455 const struct ctables_summary_spec *ss)
2457 switch (ss->function)
2463 case CTSF_ROWPCT_COUNT:
2464 case CTSF_COLPCT_COUNT:
2465 case CTSF_TABLEPCT_COUNT:
2466 case CTSF_SUBTABLEPCT_COUNT:
2467 case CTSF_LAYERPCT_COUNT:
2468 case CTSF_LAYERROWPCT_COUNT:
2469 case CTSF_LAYERCOLPCT_COUNT:
2471 enum ctables_domain_type d = ctables_function_domain (ss->function);
2472 return (cell->domains[d]->e_count
2473 ? s->count / cell->domains[d]->e_count * 100
2477 case CTSF_ROWPCT_VALIDN:
2478 case CTSF_COLPCT_VALIDN:
2479 case CTSF_TABLEPCT_VALIDN:
2480 case CTSF_SUBTABLEPCT_VALIDN:
2481 case CTSF_LAYERPCT_VALIDN:
2482 case CTSF_LAYERROWPCT_VALIDN:
2483 case CTSF_LAYERCOLPCT_VALIDN:
2485 enum ctables_domain_type d = ctables_function_domain (ss->function);
2486 return (cell->domains[d]->e_valid
2487 ? s->count / cell->domains[d]->e_valid * 100
2491 case CTSF_ROWPCT_TOTALN:
2492 case CTSF_COLPCT_TOTALN:
2493 case CTSF_TABLEPCT_TOTALN:
2494 case CTSF_SUBTABLEPCT_TOTALN:
2495 case CTSF_LAYERPCT_TOTALN:
2496 case CTSF_LAYERROWPCT_TOTALN:
2497 case CTSF_LAYERCOLPCT_TOTALN:
2499 enum ctables_domain_type d = ctables_function_domain (ss->function);
2500 return (cell->domains[d]->e_total
2501 ? s->count / cell->domains[d]->e_total * 100
2525 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2530 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2536 double weight, variance;
2537 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2538 return calc_semean (variance, weight);
2544 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2545 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2550 double weight, mean;
2551 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2552 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2558 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2562 case CTSF_ROWPCT_SUM:
2563 case CTSF_COLPCT_SUM:
2564 case CTSF_TABLEPCT_SUM:
2565 case CTSF_SUBTABLEPCT_SUM:
2566 case CTSF_LAYERPCT_SUM:
2567 case CTSF_LAYERROWPCT_SUM:
2568 case CTSF_LAYERCOLPCT_SUM:
2575 struct casereader *reader = casewriter_make_reader (s->writer);
2578 struct percentile *ptile = percentile_create (
2579 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2580 struct order_stats *os = &ptile->parent;
2581 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2582 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2583 statistic_destroy (&ptile->parent.parent);
2590 struct casereader *reader = casewriter_make_reader (s->writer);
2593 struct mode *mode = mode_create ();
2594 struct order_stats *os = &mode->parent;
2595 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2596 s->ovalue = mode->mode;
2597 statistic_destroy (&mode->parent.parent);
2605 struct ctables_cell_sort_aux
2607 const struct ctables_nest *nest;
2608 enum pivot_axis_type a;
2612 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2614 const struct ctables_cell_sort_aux *aux = aux_;
2615 struct ctables_cell *const *ap = a_;
2616 struct ctables_cell *const *bp = b_;
2617 const struct ctables_cell *a = *ap;
2618 const struct ctables_cell *b = *bp;
2620 const struct ctables_nest *nest = aux->nest;
2621 for (size_t i = 0; i < nest->n; i++)
2622 if (i != nest->scale_idx)
2624 const struct variable *var = nest->vars[i];
2625 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2626 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2627 if (a_cv->category != b_cv->category)
2628 return a_cv->category > b_cv->category ? 1 : -1;
2630 const union value *a_val = &a_cv->value;
2631 const union value *b_val = &b_cv->value;
2632 switch (a_cv->category->type)
2638 case CCT_POSTCOMPUTE:
2639 case CCT_EXCLUDED_MISSING:
2640 /* Must be equal. */
2647 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2655 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2657 return a_cv->category->sort_ascending ? cmp : -cmp;
2663 const char *a_label = var_lookup_value_label (var, a_val);
2664 const char *b_label = var_lookup_value_label (var, b_val);
2666 ? (b_label ? strcmp (a_label, b_label) : 1)
2667 : (b_label ? -1 : value_compare_3way (
2668 a_val, b_val, var_get_width (var))));
2670 return a_cv->category->sort_ascending ? cmp : -cmp;
2684 For each ctables_table:
2685 For each combination of row vars:
2686 For each combination of column vars:
2687 For each combination of layer vars:
2689 Make a table of row values:
2690 Sort entries by row values
2691 Assign a 0-based index to each actual value
2692 Construct a dimension
2693 Make a table of column values
2694 Make a table of layer values
2696 Fill the table entry using the indexes from before.
2699 static struct ctables_domain *
2700 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
2701 enum ctables_domain_type domain)
2704 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2706 const struct ctables_nest *nest = s->nests[a];
2707 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2709 size_t v_idx = nest->domains[domain][i];
2710 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
2711 hash = hash_pointer (cv->category, hash);
2712 if (cv->category->type != CCT_TOTAL
2713 && cv->category->type != CCT_SUBTOTAL
2714 && cv->category->type != CCT_POSTCOMPUTE)
2715 hash = value_hash (&cv->value,
2716 var_get_width (nest->vars[v_idx]), hash);
2720 struct ctables_domain *d;
2721 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
2723 const struct ctables_cell *df = d->example;
2724 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2726 const struct ctables_nest *nest = s->nests[a];
2727 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2729 size_t v_idx = nest->domains[domain][i];
2730 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
2731 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
2732 if (cv1->category != cv2->category
2733 || (cv1->category->type != CCT_TOTAL
2734 && cv1->category->type != CCT_SUBTOTAL
2735 && cv1->category->type != CCT_POSTCOMPUTE
2736 && !value_equal (&cv1->value, &cv2->value,
2737 var_get_width (nest->vars[v_idx]))))
2746 d = xmalloc (sizeof *d);
2747 *d = (struct ctables_domain) { .example = cell };
2748 hmap_insert (&s->domains[domain], &d->node, hash);
2752 static const struct ctables_category *
2753 ctables_categories_match (const struct ctables_categories *c,
2754 const union value *v, const struct variable *var)
2756 if (var_is_numeric (var) && v->f == SYSMIS)
2759 const struct ctables_category *othernm = NULL;
2760 for (size_t i = c->n_cats; i-- > 0; )
2762 const struct ctables_category *cat = &c->cats[i];
2766 if (cat->number == v->f)
2774 if ((cat->range[0] == -DBL_MAX || v->f >= cat->range[0])
2775 && (cat->range[1] == DBL_MAX || v->f <= cat->range[1]))
2780 if (var_is_value_missing (var, v))
2784 case CCT_POSTCOMPUTE:
2799 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2802 case CCT_EXCLUDED_MISSING:
2807 return var_is_value_missing (var, v) ? NULL : othernm;
2810 static const struct ctables_category *
2811 ctables_categories_total (const struct ctables_categories *c)
2813 const struct ctables_category *first = &c->cats[0];
2814 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2815 return (first->type == CCT_TOTAL ? first
2816 : last->type == CCT_TOTAL ? last
2820 static struct ctables_cell *
2821 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
2822 const struct ctables_category *cats[PIVOT_N_AXES][10])
2825 enum ctables_summary_variant sv = CSV_CELL;
2826 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2828 const struct ctables_nest *nest = s->nests[a];
2829 for (size_t i = 0; i < nest->n; i++)
2830 if (i != nest->scale_idx)
2832 hash = hash_pointer (cats[a][i], hash);
2833 if (cats[a][i]->type != CCT_TOTAL
2834 && cats[a][i]->type != CCT_SUBTOTAL
2835 && cats[a][i]->type != CCT_POSTCOMPUTE)
2836 hash = value_hash (case_data (c, nest->vars[i]),
2837 var_get_width (nest->vars[i]), hash);
2843 struct ctables_cell *cell;
2844 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
2846 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2848 const struct ctables_nest *nest = s->nests[a];
2849 for (size_t i = 0; i < nest->n; i++)
2850 if (i != nest->scale_idx
2851 && (cats[a][i] != cell->axes[a].cvs[i].category
2852 || (cats[a][i]->type != CCT_TOTAL
2853 && cats[a][i]->type != CCT_SUBTOTAL
2854 && cats[a][i]->type != CCT_POSTCOMPUTE
2855 && !value_equal (case_data (c, nest->vars[i]),
2856 &cell->axes[a].cvs[i].value,
2857 var_get_width (nest->vars[i])))))
2866 cell = xmalloc (sizeof *cell);
2869 cell->omit_domains = 0;
2870 cell->postcompute = false;
2871 //struct string name = DS_EMPTY_INITIALIZER;
2872 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2874 const struct ctables_nest *nest = s->nests[a];
2875 cell->axes[a].cvs = (nest->n
2876 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
2878 for (size_t i = 0; i < nest->n; i++)
2880 const struct ctables_category *cat = cats[a][i];
2881 const struct variable *var = nest->vars[i];
2882 const union value *value = case_data (c, var);
2883 if (i != nest->scale_idx)
2885 const struct ctables_category *subtotal = cat->subtotal;
2886 if (cat->hide || (subtotal && subtotal->hide_subcategories))
2889 if (cat->type == CCT_TOTAL
2890 || cat->type == CCT_SUBTOTAL
2891 || cat->type == CCT_POSTCOMPUTE)
2893 /* XXX these should be more encompassing I think.*/
2897 case PIVOT_AXIS_COLUMN:
2898 cell->omit_domains |= ((1u << CTDT_TABLE) |
2899 (1u << CTDT_LAYER) |
2900 (1u << CTDT_LAYERCOL) |
2901 (1u << CTDT_SUBTABLE) |
2904 case PIVOT_AXIS_ROW:
2905 cell->omit_domains |= ((1u << CTDT_TABLE) |
2906 (1u << CTDT_LAYER) |
2907 (1u << CTDT_LAYERROW) |
2908 (1u << CTDT_SUBTABLE) |
2911 case PIVOT_AXIS_LAYER:
2912 cell->omit_domains |= ((1u << CTDT_TABLE) |
2913 (1u << CTDT_LAYER));
2917 if (cat->type == CCT_POSTCOMPUTE)
2918 cell->postcompute = true;
2921 cell->axes[a].cvs[i].category = cat;
2922 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
2925 if (i != nest->scale_idx)
2927 if (!ds_is_empty (&name))
2928 ds_put_cstr (&name, ", ");
2929 char *value_s = data_out (value, var_get_encoding (var),
2930 var_get_print_format (var),
2931 settings_get_fmt_settings ());
2932 if (cat->type == CCT_TOTAL
2933 || cat->type == CCT_SUBTOTAL
2934 || cat->type == CCT_POSTCOMPUTE)
2935 ds_put_format (&name, "%s=total", var_get_name (var));
2937 ds_put_format (&name, "%s=%s", var_get_name (var),
2938 value_s + strspn (value_s, " "));
2944 //cell->name = ds_steal_cstr (&name);
2946 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
2947 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
2948 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
2949 for (size_t i = 0; i < specs->n; i++)
2950 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
2951 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2952 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
2953 hmap_insert (&s->cells, &cell->node, hash);
2958 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
2959 const struct ctables_category *cats[PIVOT_N_AXES][10],
2960 bool is_missing, bool excluded_missing,
2961 double d_weight, double e_weight)
2963 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
2964 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
2966 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
2967 for (size_t i = 0; i < specs->n; i++)
2968 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
2969 specs->var, case_data (c, specs->var), specs->is_scale,
2970 is_missing, excluded_missing, d_weight, e_weight);
2971 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2972 if (!(cell->omit_domains && (1u << dt)))
2974 struct ctables_domain *d = cell->domains[dt];
2975 d->d_total += d_weight;
2976 d->e_total += e_weight;
2977 if (!excluded_missing)
2979 d->d_count += d_weight;
2980 d->e_count += e_weight;
2984 d->d_valid += d_weight;
2985 d->e_valid += e_weight;
2991 recurse_totals (struct ctables_section *s, const struct ccase *c,
2992 const struct ctables_category *cats[PIVOT_N_AXES][10],
2993 bool is_missing, bool excluded_missing,
2994 double d_weight, double e_weight,
2995 enum pivot_axis_type start_axis, size_t start_nest)
2997 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
2999 const struct ctables_nest *nest = s->nests[a];
3000 for (size_t i = start_nest; i < nest->n; i++)
3002 if (i == nest->scale_idx)
3005 const struct variable *var = nest->vars[i];
3007 const struct ctables_category *total = ctables_categories_total (
3008 s->table->categories[var_get_dict_index (var)]);
3011 const struct ctables_category *save = cats[a][i];
3013 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3014 d_weight, e_weight);
3015 recurse_totals (s, c, cats, is_missing, excluded_missing,
3016 d_weight, e_weight, a, i + 1);
3025 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3026 const struct ctables_category *cats[PIVOT_N_AXES][10],
3027 bool is_missing, bool excluded_missing,
3028 double d_weight, double e_weight,
3029 enum pivot_axis_type start_axis, size_t start_nest)
3031 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3033 const struct ctables_nest *nest = s->nests[a];
3034 for (size_t i = start_nest; i < nest->n; i++)
3036 if (i == nest->scale_idx)
3039 const struct ctables_category *save = cats[a][i];
3042 cats[a][i] = save->subtotal;
3043 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3044 d_weight, e_weight);
3045 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3046 d_weight, e_weight, a, i + 1);
3055 ctables_add_occurrence (const struct variable *var,
3056 const union value *value,
3057 struct hmap *occurrences)
3059 int width = var_get_width (var);
3060 unsigned int hash = value_hash (value, width, 0);
3062 struct ctables_occurrence *o;
3063 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3065 if (value_equal (value, &o->value, width))
3068 o = xmalloc (sizeof *o);
3069 value_clone (&o->value, value, width);
3070 hmap_insert (occurrences, &o->node, hash);
3074 ctables_cell_insert (struct ctables_section *s,
3075 const struct ccase *c,
3076 double d_weight, double e_weight)
3078 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3080 /* Does at least one categorical variable have a missing value in an included
3081 or excluded category? */
3082 bool is_missing = false;
3084 /* Does at least one categorical variable have a missing value in an excluded
3086 bool excluded_missing = false;
3088 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3090 const struct ctables_nest *nest = s->nests[a];
3091 for (size_t i = 0; i < nest->n; i++)
3093 if (i == nest->scale_idx)
3096 const struct variable *var = nest->vars[i];
3097 const union value *value = case_data (c, var);
3099 bool var_missing = var_is_value_missing (var, value) != 0;
3103 cats[a][i] = ctables_categories_match (
3104 s->table->categories[var_get_dict_index (var)], value, var);
3110 static const struct ctables_category cct_excluded_missing = {
3111 .type = CCT_EXCLUDED_MISSING,
3114 cats[a][i] = &cct_excluded_missing;
3115 excluded_missing = true;
3120 if (!excluded_missing)
3121 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3123 const struct ctables_nest *nest = s->nests[a];
3124 for (size_t i = 0; i < nest->n; i++)
3125 if (i != nest->scale_idx)
3127 const struct variable *var = nest->vars[i];
3128 const union value *value = case_data (c, var);
3129 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3133 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3134 d_weight, e_weight);
3136 //if (!excluded_missing)
3138 recurse_totals (s, c, cats, is_missing, excluded_missing,
3139 d_weight, e_weight, 0, 0);
3140 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3141 d_weight, e_weight, 0, 0);
3147 const struct ctables_summary_spec_set *set;
3152 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3154 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3155 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3156 if (as->function != bs->function)
3157 return as->function > bs->function ? 1 : -1;
3158 else if (as->percentile != bs->percentile)
3159 return as->percentile < bs->percentile ? 1 : -1;
3160 return strcmp (as->label, bs->label);
3163 static struct pivot_value *
3164 ctables_category_create_label (const struct ctables_category *cat,
3165 const struct variable *var,
3166 const union value *value)
3168 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3169 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3170 : cat->type == CCT_POSTCOMPUTE && cat->pc->label
3171 ? pivot_value_new_user_text (cat->pc->label, SIZE_MAX)
3172 : pivot_value_new_var_value (var, value));
3175 static struct ctables_value *
3176 ctables_value_find__ (struct ctables_table *t, const union value *value,
3177 int width, unsigned int hash)
3179 struct ctables_value *clv;
3180 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3181 hash, &t->clabels_values_map)
3182 if (value_equal (value, &clv->value, width))
3188 ctables_value_insert (struct ctables_table *t, const union value *value,
3191 unsigned int hash = value_hash (value, width, 0);
3192 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3195 clv = xmalloc (sizeof *clv);
3196 value_clone (&clv->value, value, width);
3197 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3201 static struct ctables_value *
3202 ctables_value_find (struct ctables_table *t,
3203 const union value *value, int width)
3205 return ctables_value_find__ (t, value, width,
3206 value_hash (value, width, 0));
3210 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3211 size_t ix[PIVOT_N_AXES])
3213 if (a < PIVOT_N_AXES)
3215 size_t limit = MAX (t->stacks[a].n, 1);
3216 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3217 ctables_table_add_section (t, a + 1, ix);
3221 struct ctables_section *s = &t->sections[t->n_sections++];
3222 *s = (struct ctables_section) {
3224 .cells = HMAP_INITIALIZER (s->cells),
3226 for (a = 0; a < PIVOT_N_AXES; a++)
3229 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3231 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3232 for (size_t i = 0; i < nest->n; i++)
3233 hmap_init (&s->occurrences[a][i]);
3235 for (size_t i = 0; i < N_CTDTS; i++)
3236 hmap_init (&s->domains[i]);
3241 ctpo_add (double a, double b)
3247 ctpo_sub (double a, double b)
3253 ctpo_mul (double a, double b)
3259 ctpo_div (double a, double b)
3261 return b ? a / b : SYSMIS;
3265 ctpo_pow (double a, double b)
3267 int save_errno = errno;
3269 double result = pow (a, b);
3277 ctpo_neg (double a, double b UNUSED)
3282 struct ctables_pcexpr_evaluate_ctx
3284 const struct ctables_cell *cell;
3285 const struct ctables_section *section;
3286 const struct ctables_categories *cats;
3287 enum pivot_axis_type pc_a;
3291 static double ctables_pcexpr_evaluate (
3292 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3295 ctables_pcexpr_evaluate_nonterminal (
3296 const struct ctables_pcexpr_evaluate_ctx *ctx,
3297 const struct ctables_pcexpr *e, size_t n_args,
3298 double evaluate (double, double))
3300 double args[2] = { 0, 0 };
3301 for (size_t i = 0; i < n_args; i++)
3303 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3304 if (!isfinite (args[i]) || args[i] == SYSMIS)
3307 return evaluate (args[0], args[1]);
3311 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3312 const struct ctables_cell_value *pc_cv)
3314 const struct ctables_section *s = ctx->section;
3317 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3319 const struct ctables_nest *nest = s->nests[a];
3320 for (size_t i = 0; i < nest->n; i++)
3321 if (i != nest->scale_idx)
3323 const struct ctables_cell_value *cv
3324 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3325 : &ctx->cell->axes[a].cvs[i]);
3326 hash = hash_pointer (cv->category, hash);
3327 if (cv->category->type != CCT_TOTAL
3328 && cv->category->type != CCT_SUBTOTAL
3329 && cv->category->type != CCT_POSTCOMPUTE)
3330 hash = value_hash (&cv->value,
3331 var_get_width (nest->vars[i]), hash);
3335 struct ctables_cell *tc;
3336 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3338 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3340 const struct ctables_nest *nest = s->nests[a];
3341 for (size_t i = 0; i < nest->n; i++)
3342 if (i != nest->scale_idx)
3344 const struct ctables_cell_value *p_cv
3345 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3346 : &ctx->cell->axes[a].cvs[i]);
3347 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3348 if (p_cv->category != t_cv->category
3349 || (p_cv->category->type != CCT_TOTAL
3350 && p_cv->category->type != CCT_SUBTOTAL
3351 && p_cv->category->type != CCT_POSTCOMPUTE
3352 && !value_equal (&p_cv->value,
3354 var_get_width (nest->vars[i]))))
3366 const struct ctables_table *t = s->table;
3367 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3368 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3369 size_t j = 0 /* XXX */;
3370 return ctables_summary_value (tc, &tc->summaries[j], &specs->specs[j]);
3374 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3375 const struct ctables_pcexpr *e)
3382 case CTPO_CAT_RANGE:
3384 struct ctables_cell_value cv = {
3385 .category = ctables_find_category_for_postcompute (ctx->cats, e)
3387 assert (cv.category != NULL);
3389 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3390 const struct ctables_occurrence *o;
3393 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3394 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3395 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3397 cv.value = o->value;
3398 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3403 case CTPO_CAT_NUMBER:
3404 case CTPO_CAT_STRING:
3405 case CTPO_CAT_MISSING:
3406 case CTPO_CAT_OTHERNM:
3407 case CTPO_CAT_SUBTOTAL:
3408 case CTPO_CAT_TOTAL:
3410 struct ctables_cell_value cv = {
3411 .category = ctables_find_category_for_postcompute (ctx->cats, e),
3412 .value = { .f = e->number },
3414 assert (cv.category != NULL);
3415 return ctables_pcexpr_evaluate_category (ctx, &cv);
3419 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3422 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3425 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3428 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3431 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3434 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3441 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3442 const struct ctables_cell *cell)
3444 enum pivot_axis_type pc_a;
3446 const struct ctables_postcompute *pc;
3447 for (pc_a = 0; ; pc_a++)
3449 assert (pc_a < PIVOT_N_AXES);
3450 for (pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3452 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3453 if (cv->category->type == CCT_POSTCOMPUTE)
3455 pc = cv->category->pc;
3462 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3463 const struct ctables_categories *cats = s->table->categories[
3464 var_get_dict_index (var)];
3465 struct ctables_pcexpr_evaluate_ctx ctx = {
3470 .pc_a_idx = pc_a_idx,
3472 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3476 ctables_table_output (struct ctables *ct, struct ctables_table *t)
3478 struct pivot_table *pt = pivot_table_create__ (
3480 ? pivot_value_new_user_text (t->title, SIZE_MAX)
3481 : pivot_value_new_text (N_("Custom Tables"))),
3484 pivot_table_set_caption (
3485 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
3487 pivot_table_set_caption (
3488 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
3490 bool summary_dimension = (t->summary_axis != t->slabels_axis
3491 || (!t->slabels_visible
3492 && t->summary_specs.n > 1));
3493 if (summary_dimension)
3495 struct pivot_dimension *d = pivot_dimension_create (
3496 pt, t->slabels_axis, N_("Statistics"));
3497 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3498 if (!t->slabels_visible)
3499 d->hide_all_labels = true;
3500 for (size_t i = 0; i < specs->n; i++)
3501 pivot_category_create_leaf (
3502 d->root, pivot_value_new_text (specs->specs[i].label));
3505 bool categories_dimension = t->clabels_example != NULL;
3506 if (categories_dimension)
3508 struct pivot_dimension *d = pivot_dimension_create (
3509 pt, t->label_axis[t->clabels_from_axis],
3510 t->clabels_from_axis == PIVOT_AXIS_ROW
3511 ? N_("Row Categories")
3512 : N_("Column Categories"));
3513 const struct variable *var = t->clabels_example;
3514 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
3515 for (size_t i = 0; i < t->n_clabels_values; i++)
3517 const struct ctables_value *value = t->clabels_values[i];
3518 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
3519 assert (cat != NULL);
3520 pivot_category_create_leaf (d->root, ctables_category_create_label (
3521 cat, t->clabels_example, &value->value));
3525 pivot_table_set_look (pt, ct->look);
3526 struct pivot_dimension *d[PIVOT_N_AXES];
3527 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3529 static const char *names[] = {
3530 [PIVOT_AXIS_ROW] = N_("Rows"),
3531 [PIVOT_AXIS_COLUMN] = N_("Columns"),
3532 [PIVOT_AXIS_LAYER] = N_("Layers"),
3534 d[a] = (t->axes[a] || a == t->summary_axis
3535 ? pivot_dimension_create (pt, a, names[a])
3540 assert (t->axes[a]);
3542 for (size_t i = 0; i < t->stacks[a].n; i++)
3544 struct ctables_nest *nest = &t->stacks[a].nests[i];
3545 struct ctables_section **sections = xnmalloc (t->n_sections,
3547 size_t n_sections = 0;
3549 size_t n_total_cells = 0;
3550 size_t max_depth = 0;
3551 for (size_t j = 0; j < t->n_sections; j++)
3552 if (t->sections[j].nests[a] == nest)
3554 struct ctables_section *s = &t->sections[j];
3555 sections[n_sections++] = s;
3556 n_total_cells += s->cells.count;
3558 size_t depth = s->nests[a]->n;
3559 max_depth = MAX (depth, max_depth);
3562 struct ctables_cell **sorted = xnmalloc (n_total_cells,
3564 size_t n_sorted = 0;
3566 for (size_t j = 0; j < n_sections; j++)
3568 struct ctables_section *s = sections[j];
3570 struct ctables_cell *cell;
3571 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3573 sorted[n_sorted++] = cell;
3574 assert (n_sorted <= n_total_cells);
3577 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
3578 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
3581 for (size_t j = 0; j < n_sorted; j++)
3583 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
3588 struct ctables_level
3590 enum ctables_level_type
3592 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
3593 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
3594 CTL_SUMMARY, /* Summary functions. */
3598 enum settings_value_show vlabel; /* CTL_VAR only. */
3601 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
3602 size_t n_levels = 0;
3603 for (size_t k = 0; k < nest->n; k++)
3605 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
3606 if (vlabel != CTVL_NONE)
3608 levels[n_levels++] = (struct ctables_level) {
3610 .vlabel = (enum settings_value_show) vlabel,
3615 if (nest->scale_idx != k
3616 && (k != nest->n - 1 || t->label_axis[a] == a))
3618 levels[n_levels++] = (struct ctables_level) {
3619 .type = CTL_CATEGORY,
3625 if (!summary_dimension && a == t->slabels_axis)
3627 levels[n_levels++] = (struct ctables_level) {
3628 .type = CTL_SUMMARY,
3629 .var_idx = SIZE_MAX,
3633 /* Pivot categories:
3635 - variable label for nest->vars[0], if vlabel != CTVL_NONE
3636 - category for nest->vars[0], if nest->scale_idx != 0
3637 - variable label for nest->vars[1], if vlabel != CTVL_NONE
3638 - category for nest->vars[1], if nest->scale_idx != 1
3640 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
3641 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
3642 - summary function, if 'a == t->slabels_axis && a ==
3645 Additional dimensions:
3647 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
3649 - If 't->label_axis[b] == a' for some 'b != a', add a category
3654 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
3656 for (size_t j = 0; j < n_sorted; j++)
3658 struct ctables_cell *cell = sorted[j];
3659 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
3661 size_t n_common = 0;
3664 for (; n_common < n_levels; n_common++)
3666 const struct ctables_level *level = &levels[n_common];
3667 if (level->type == CTL_CATEGORY)
3669 size_t var_idx = level->var_idx;
3670 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
3671 if (prev->axes[a].cvs[var_idx].category != c)
3673 else if (c->type != CCT_SUBTOTAL
3674 && c->type != CCT_TOTAL
3675 && c->type != CCT_POSTCOMPUTE
3676 && !value_equal (&prev->axes[a].cvs[var_idx].value,
3677 &cell->axes[a].cvs[var_idx].value,
3678 var_get_type (nest->vars[var_idx])))
3684 for (size_t k = n_common; k < n_levels; k++)
3686 const struct ctables_level *level = &levels[k];
3687 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
3688 if (level->type == CTL_SUMMARY)
3690 assert (k == n_levels - 1);
3692 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3693 for (size_t m = 0; m < specs->n; m++)
3695 int leaf = pivot_category_create_leaf (
3696 parent, pivot_value_new_text (specs->specs[m].label));
3703 const struct variable *var = nest->vars[level->var_idx];
3704 struct pivot_value *label;
3705 if (level->type == CTL_VAR)
3707 label = pivot_value_new_variable (var);
3708 label->variable.show = level->vlabel;
3710 else if (level->type == CTL_CATEGORY)
3712 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
3713 label = ctables_category_create_label (cv->category,
3719 if (k == n_levels - 1)
3720 prev_leaf = pivot_category_create_leaf (parent, label);
3722 groups[k] = pivot_category_create_group__ (parent, label);
3726 cell->axes[a].leaf = prev_leaf;
3733 for (size_t i = 0; i < t->n_sections; i++)
3735 struct ctables_section *s = &t->sections[i];
3737 struct ctables_cell *cell;
3738 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3743 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3744 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
3745 for (size_t j = 0; j < specs->n; j++)
3748 size_t n_dindexes = 0;
3750 if (summary_dimension)
3751 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
3753 if (categories_dimension)
3755 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3756 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3757 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3758 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
3761 dindexes[n_dindexes++] = ctv->leaf;
3764 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3767 int leaf = cell->axes[a].leaf;
3768 if (a == t->summary_axis && !summary_dimension)
3770 dindexes[n_dindexes++] = leaf;
3773 const struct ctables_summary_spec *ss = &specs->specs[j];
3775 double d = (cell->postcompute
3776 ? ctables_cell_calculate_postcompute (s, cell)
3777 : ctables_summary_value (cell, &cell->summaries[j], ss));
3778 struct pivot_value *value;
3779 if (ct->hide_threshold != 0
3780 && d < ct->hide_threshold
3781 && (cell->postcompute
3783 : ctables_summary_function_is_count (ss->function)))
3785 value = pivot_value_new_user_text_nocopy (
3786 xasprintf ("<%d", ct->hide_threshold));
3788 else if (d == 0 && ct->zero)
3789 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
3790 else if (d == SYSMIS && ct->missing)
3791 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
3792 else if (specs->specs[j].is_ctables_format)
3794 char *s = data_out_stretchy (&(union value) { .f = d },
3796 &specs->specs[j].format,
3797 &ct->ctables_formats, NULL);
3798 value = pivot_value_new_user_text_nocopy (s);
3802 value = pivot_value_new_number (d);
3803 value->numeric.format = specs->specs[j].format;
3805 pivot_table_put (pt, dindexes, n_dindexes, value);
3810 pivot_table_submit (pt);
3814 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
3816 enum pivot_axis_type label_pos = t->label_axis[a];
3820 t->clabels_from_axis = a;
3822 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
3823 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
3825 const struct ctables_stack *stack = &t->stacks[a];
3829 const struct ctables_nest *n0 = &stack->nests[0];
3831 const struct variable *v0 = n0->vars[n0->n - 1];
3832 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3833 t->clabels_example = v0;
3835 for (size_t i = 0; i < c0->n_cats; i++)
3836 if (c0->cats[i].type == CCT_FUNCTION)
3838 msg (SE, _("%s=%s is not allowed with sorting based "
3839 "on a summary function."),
3840 subcommand_name, pos_name);
3843 if (n0->n - 1 == n0->scale_idx)
3845 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
3846 "but %s is a scale variable."),
3847 subcommand_name, pos_name, var_get_name (v0));
3851 for (size_t i = 1; i < stack->n; i++)
3853 const struct ctables_nest *ni = &stack->nests[i];
3855 const struct variable *vi = ni->vars[ni->n - 1];
3856 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
3858 if (ni->n - 1 == ni->scale_idx)
3860 msg (SE, _("%s=%s requires the variables to be moved to be "
3861 "categorical, but %s is a scale variable."),
3862 subcommand_name, pos_name, var_get_name (vi));
3865 if (var_get_width (v0) != var_get_width (vi))
3867 msg (SE, _("%s=%s requires the variables to be "
3868 "moved to have the same width, but %s has "
3869 "width %d and %s has width %d."),
3870 subcommand_name, pos_name,
3871 var_get_name (v0), var_get_width (v0),
3872 var_get_name (vi), var_get_width (vi));
3875 if (!val_labs_equal (var_get_value_labels (v0),
3876 var_get_value_labels (vi)))
3878 msg (SE, _("%s=%s requires the variables to be "
3879 "moved to have the same value labels, but %s "
3880 "and %s have different value labels."),
3881 subcommand_name, pos_name,
3882 var_get_name (v0), var_get_name (vi));
3885 if (!ctables_categories_equal (c0, ci))
3887 msg (SE, _("%s=%s requires the variables to be "
3888 "moved to have the same category "
3889 "specifications, but %s and %s have different "
3890 "category specifications."),
3891 subcommand_name, pos_name,
3892 var_get_name (v0), var_get_name (vi));
3901 ctables_prepare_table (struct ctables_table *t)
3903 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3906 t->stacks[a] = enumerate_fts (a, t->axes[a]);
3908 for (size_t j = 0; j < t->stacks[a].n; j++)
3910 struct ctables_nest *nest = &t->stacks[a].nests[j];
3911 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3913 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
3914 nest->n_domains[dt] = 0;
3916 for (size_t k = 0; k < nest->n; k++)
3918 if (k == nest->scale_idx)
3927 if (a != PIVOT_AXIS_LAYER)
3934 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
3935 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
3936 : a == PIVOT_AXIS_ROW)
3938 if (k == nest->n - 1
3939 || (nest->scale_idx == nest->n - 1
3940 && k == nest->n - 2))
3946 if (a == PIVOT_AXIS_COLUMN)
3951 if (a == PIVOT_AXIS_ROW)
3956 nest->domains[dt][nest->n_domains[dt]++] = k;
3963 struct ctables_nest *nest = xmalloc (sizeof *nest);
3964 *nest = (struct ctables_nest) { .n = 0 };
3965 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
3968 struct ctables_stack *stack = &t->stacks[t->summary_axis];
3969 for (size_t i = 0; i < stack->n; i++)
3971 struct ctables_nest *nest = &stack->nests[i];
3972 if (!nest->specs[CSV_CELL].n)
3974 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
3975 specs->specs = xmalloc (sizeof *specs->specs);
3978 enum ctables_summary_function function
3979 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
3980 struct ctables_var var = { .is_mrset = false, .var = specs->var };
3982 *specs->specs = (struct ctables_summary_spec) {
3983 .function = function,
3984 .format = ctables_summary_default_format (function, &var),
3985 .label = ctables_summary_default_label (function, 0),
3988 specs->var = nest->vars[0];
3990 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
3991 &nest->specs[CSV_CELL]);
3993 else if (!nest->specs[CSV_TOTAL].n)
3994 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
3995 &nest->specs[CSV_CELL]);
3998 struct ctables_summary_spec_set *merged = &t->summary_specs;
3999 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
4001 for (size_t j = 0; j < stack->n; j++)
4003 const struct ctables_nest *nest = &stack->nests[j];
4005 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4006 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4011 struct merge_item min = items[0];
4012 for (size_t j = 1; j < n_left; j++)
4013 if (merge_item_compare_3way (&items[j], &min) < 0)
4016 if (merged->n >= merged->allocated)
4017 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4018 sizeof *merged->specs);
4019 merged->specs[merged->n++] = min.set->specs[min.ofs];
4021 for (size_t j = 0; j < n_left; )
4023 if (merge_item_compare_3way (&items[j], &min) == 0)
4025 struct merge_item *item = &items[j];
4026 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4027 if (++item->ofs >= item->set->n)
4029 items[j] = items[--n_left];
4038 for (size_t j = 0; j < merged->n; j++)
4039 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4041 for (size_t j = 0; j < stack->n; j++)
4043 const struct ctables_nest *nest = &stack->nests[j];
4044 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4046 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4047 for (size_t k = 0; k < specs->n; k++)
4048 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4049 specs->specs[k].axis_idx);
4055 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4056 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4060 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4061 enum pivot_axis_type a)
4063 struct ctables_stack *stack = &t->stacks[a];
4064 for (size_t i = 0; i < stack->n; i++)
4066 const struct ctables_nest *nest = &stack->nests[i];
4067 const struct variable *var = nest->vars[nest->n - 1];
4068 const union value *value = case_data (c, var);
4070 if (var_is_numeric (var) && value->f == SYSMIS)
4073 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4075 ctables_value_insert (t, value, var_get_width (var));
4080 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4082 const struct ctables_value *const *ap = a_;
4083 const struct ctables_value *const *bp = b_;
4084 const struct ctables_value *a = *ap;
4085 const struct ctables_value *b = *bp;
4086 const int *width = width_;
4087 return value_compare_3way (&a->value, &b->value, *width);
4091 ctables_sort_clabels_values (struct ctables_table *t)
4093 const struct variable *v0 = t->clabels_example;
4094 int width = var_get_width (v0);
4096 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4099 const struct val_labs *val_labs = var_get_value_labels (v0);
4100 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4101 vl = val_labs_next (val_labs, vl))
4102 if (ctables_categories_match (c0, &vl->value, v0))
4103 ctables_value_insert (t, &vl->value, width);
4106 size_t n = hmap_count (&t->clabels_values_map);
4107 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4109 struct ctables_value *clv;
4111 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4112 t->clabels_values[i++] = clv;
4113 t->n_clabels_values = n;
4116 sort (t->clabels_values, n, sizeof *t->clabels_values,
4117 compare_clabels_values_3way, &width);
4119 for (size_t i = 0; i < n; i++)
4120 t->clabels_values[i]->leaf = i;
4124 ctables_add_category_occurrences (const struct variable *var,
4125 struct hmap *occurrences,
4126 const struct ctables_categories *cats)
4128 const struct val_labs *val_labs = var_get_value_labels (var);
4130 for (size_t i = 0; i < cats->n_cats; i++)
4132 const struct ctables_category *c = &cats->cats[i];
4136 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4144 assert (var_is_numeric (var));
4145 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4146 vl = val_labs_next (val_labs, vl))
4147 if (vl->value.f >= c->range[0] && vl->value.f <= c->range[1])
4148 ctables_add_occurrence (var, &vl->value, occurrences);
4152 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4153 vl = val_labs_next (val_labs, vl))
4154 if (var_is_value_missing (var, &vl->value))
4155 ctables_add_occurrence (var, &vl->value, occurrences);
4159 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4160 vl = val_labs_next (val_labs, vl))
4161 ctables_add_occurrence (var, &vl->value, occurrences);
4164 case CCT_POSTCOMPUTE:
4174 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4175 vl = val_labs_next (val_labs, vl))
4176 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4177 ctables_add_occurrence (var, &vl->value, occurrences);
4180 case CCT_EXCLUDED_MISSING:
4187 ctables_section_recurse_add_empty_categories (
4188 struct ctables_section *s,
4189 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4190 enum pivot_axis_type a, size_t a_idx)
4192 if (a >= PIVOT_N_AXES)
4193 ctables_cell_insert__ (s, c, cats);
4194 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4195 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4198 const struct variable *var = s->nests[a]->vars[a_idx];
4199 const struct ctables_categories *categories = s->table->categories[
4200 var_get_dict_index (var)];
4201 int width = var_get_width (var);
4202 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4203 const struct ctables_occurrence *o;
4204 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4206 union value *value = case_data_rw (c, var);
4207 value_destroy (value, width);
4208 value_clone (value, &o->value, width);
4209 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4210 assert (cats[a][a_idx] != NULL);
4211 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4214 for (size_t i = 0; i < categories->n_cats; i++)
4216 const struct ctables_category *cat = &categories->cats[i];
4217 if (cat->type == CCT_POSTCOMPUTE)
4219 cats[a][a_idx] = cat;
4220 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4227 ctables_section_add_empty_categories (struct ctables_section *s)
4229 bool show_empty = false;
4230 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4232 for (size_t k = 0; k < s->nests[a]->n; k++)
4233 if (k != s->nests[a]->scale_idx)
4235 const struct variable *var = s->nests[a]->vars[k];
4236 const struct ctables_categories *cats = s->table->categories[
4237 var_get_dict_index (var)];
4238 if (cats->show_empty)
4241 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4247 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4248 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4249 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4254 ctables_execute (struct dataset *ds, struct ctables *ct)
4256 for (size_t i = 0; i < ct->n_tables; i++)
4258 struct ctables_table *t = ct->tables[i];
4259 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4260 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4261 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4262 sizeof *t->sections);
4263 size_t ix[PIVOT_N_AXES];
4264 ctables_table_add_section (t, 0, ix);
4267 struct casereader *input = proc_open (ds);
4268 bool warn_on_invalid = true;
4269 for (struct ccase *c = casereader_read (input); c;
4270 case_unref (c), c = casereader_read (input))
4272 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4274 double e_weight = (ct->e_weight
4275 ? var_force_valid_weight (ct->e_weight,
4276 case_num (c, ct->e_weight),
4280 for (size_t i = 0; i < ct->n_tables; i++)
4282 struct ctables_table *t = ct->tables[i];
4284 for (size_t j = 0; j < t->n_sections; j++)
4285 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4287 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4288 if (t->label_axis[a] != a)
4289 ctables_insert_clabels_values (t, c, a);
4292 casereader_destroy (input);
4294 for (size_t i = 0; i < ct->n_tables; i++)
4296 struct ctables_table *t = ct->tables[i];
4298 if (t->clabels_example)
4299 ctables_sort_clabels_values (t);
4301 for (size_t j = 0; j < t->n_sections; j++)
4302 ctables_section_add_empty_categories (&t->sections[j]);
4304 ctables_table_output (ct, ct->tables[i]);
4306 return proc_commit (ds);
4311 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *);
4314 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
4320 case CTPO_CAT_STRING:
4330 for (size_t i = 0; i < 2; i++)
4331 ctables_pcexpr_destroy (e->subs[i]);
4335 case CTPO_CAT_NUMBER:
4336 case CTPO_CAT_RANGE:
4337 case CTPO_CAT_MISSING:
4338 case CTPO_CAT_OTHERNM:
4339 case CTPO_CAT_SUBTOTAL:
4340 case CTPO_CAT_TOTAL:
4344 msg_location_destroy (e->location);
4349 static struct ctables_pcexpr *
4350 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
4351 struct ctables_pcexpr *sub0,
4352 struct ctables_pcexpr *sub1)
4354 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4355 *e = (struct ctables_pcexpr) {
4357 .subs = { sub0, sub1 },
4358 .location = msg_location_merged (sub0->location, sub1->location),
4363 /* How to parse an operator. */
4366 enum token_type token;
4367 enum ctables_postcompute_op op;
4370 static const struct operator *
4371 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops)
4373 for (const struct operator *op = ops; op < ops + n_ops; op++)
4374 if (lex_token (lexer) == op->token)
4376 if (op->token != T_NEG_NUM)
4385 static struct ctables_pcexpr *
4386 parse_binary_operators__ (struct lexer *lexer,
4387 const struct operator ops[], size_t n_ops,
4388 parse_recursively_func *parse_next_level,
4389 const char *chain_warning,
4390 struct ctables_pcexpr *lhs)
4392 for (int op_count = 0; ; op_count++)
4394 const struct operator *op = match_operator (lexer, ops, n_ops);
4397 if (op_count > 1 && chain_warning)
4398 msg_at (SW, lhs->location, "%s", chain_warning);
4403 struct ctables_pcexpr *rhs = parse_next_level (lexer);
4406 ctables_pcexpr_destroy (lhs);
4410 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
4414 static struct ctables_pcexpr *
4415 parse_binary_operators (struct lexer *lexer,
4416 const struct operator ops[], size_t n_ops,
4417 parse_recursively_func *parse_next_level,
4418 const char *chain_warning)
4420 struct ctables_pcexpr *lhs = parse_next_level (lexer);
4424 return parse_binary_operators__ (lexer, ops, n_ops, parse_next_level,
4425 chain_warning, lhs);
4428 static struct ctables_pcexpr *parse_add (struct lexer *);
4430 static struct ctables_pcexpr
4431 ctpo_cat_range (double low, double high)
4433 return (struct ctables_pcexpr) {
4434 .op = CTPO_CAT_RANGE,
4435 .range = { low, high },
4439 static struct ctables_pcexpr *
4440 parse_primary (struct lexer *lexer)
4442 int start_ofs = lex_ofs (lexer);
4443 struct ctables_pcexpr e;
4444 if (lex_is_number (lexer))
4446 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
4447 .number = lex_number (lexer) };
4450 else if (lex_match_id (lexer, "MISSING"))
4451 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
4452 else if (lex_match_id (lexer, "OTHERNM"))
4453 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
4454 else if (lex_match_id (lexer, "TOTAL"))
4455 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
4456 else if (lex_match_id (lexer, "SUBTOTAL"))
4458 size_t subtotal_index = 0;
4459 if (lex_match (lexer, T_LBRACK))
4461 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
4463 subtotal_index = lex_integer (lexer);
4465 if (!lex_force_match (lexer, T_RBRACK))
4468 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
4469 .subtotal_index = subtotal_index };
4471 else if (lex_match (lexer, T_LBRACK))
4473 if (lex_match_id (lexer, "LO"))
4475 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
4477 e = ctpo_cat_range (-DBL_MAX, lex_number (lexer));
4480 else if (lex_is_number (lexer))
4482 double number = lex_number (lexer);
4484 if (lex_match_id (lexer, "THRU"))
4486 if (lex_match_id (lexer, "HI"))
4487 e = ctpo_cat_range (number, DBL_MAX);
4490 if (!lex_force_num (lexer))
4492 e = ctpo_cat_range (number, lex_number (lexer));
4497 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
4500 else if (lex_is_string (lexer))
4502 e = (struct ctables_pcexpr) {
4503 .op = CTPO_CAT_STRING,
4504 .string = ss_xstrdup (lex_tokss (lexer)),
4510 lex_error (lexer, NULL);
4514 if (!lex_force_match (lexer, T_RBRACK))
4516 if (e.op == CTPO_CAT_STRING)
4521 else if (lex_match (lexer, T_LPAREN))
4523 struct ctables_pcexpr *ep = parse_add (lexer);
4526 if (!lex_force_match (lexer, T_RPAREN))
4528 ctables_pcexpr_destroy (ep);
4535 lex_error (lexer, NULL);
4539 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4540 return xmemdup (&e, sizeof e);
4543 static struct ctables_pcexpr *
4544 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
4545 struct lexer *lexer, int start_ofs)
4547 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4548 *e = (struct ctables_pcexpr) {
4551 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
4556 static struct ctables_pcexpr *
4557 parse_exp (struct lexer *lexer)
4559 static const struct operator op = { T_EXP, CTPO_POW };
4561 const char *chain_warning =
4562 _("The exponentiation operator (`**') is left-associative: "
4563 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
4564 "To disable this warning, insert parentheses.");
4566 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
4567 return parse_binary_operators (lexer, &op, 1,
4568 parse_primary, chain_warning);
4570 /* Special case for situations like "-5**6", which must be parsed as
4573 int start_ofs = lex_ofs (lexer);
4574 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
4575 *lhs = (struct ctables_pcexpr) {
4576 .op = CTPO_CONSTANT,
4577 .number = -lex_tokval (lexer),
4578 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
4582 struct ctables_pcexpr *node = parse_binary_operators__ (
4583 lexer, &op, 1, parse_primary, chain_warning, lhs);
4587 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
4590 /* Parses the unary minus level. */
4591 static struct ctables_pcexpr *
4592 parse_neg (struct lexer *lexer)
4594 int start_ofs = lex_ofs (lexer);
4595 if (!lex_match (lexer, T_DASH))
4596 return parse_exp (lexer);
4598 struct ctables_pcexpr *inner = parse_neg (lexer);
4602 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
4605 /* Parses the multiplication and division level. */
4606 static struct ctables_pcexpr *
4607 parse_mul (struct lexer *lexer)
4609 static const struct operator ops[] =
4611 { T_ASTERISK, CTPO_MUL },
4612 { T_SLASH, CTPO_DIV },
4615 return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
4619 /* Parses the addition and subtraction level. */
4620 static struct ctables_pcexpr *
4621 parse_add (struct lexer *lexer)
4623 static const struct operator ops[] =
4625 { T_PLUS, CTPO_ADD },
4626 { T_DASH, CTPO_SUB },
4627 { T_NEG_NUM, CTPO_ADD },
4630 return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
4634 static struct ctables_postcompute *
4635 ctables_find_postcompute (struct ctables *ct, const char *name)
4637 struct ctables_postcompute *pc;
4638 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
4639 utf8_hash_case_string (name, 0), &ct->postcomputes)
4640 if (!utf8_strcasecmp (pc->name, name))
4646 ctables_parse_pcompute (struct lexer *lexer, struct ctables *ct)
4648 int pcompute_start = lex_ofs (lexer) - 1;
4650 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
4653 char *name = ss_xstrdup (lex_tokss (lexer));
4656 if (!lex_force_match (lexer, T_EQUALS)
4657 || !lex_force_match_id (lexer, "EXPR")
4658 || !lex_force_match (lexer, T_LPAREN))
4664 int expr_start = lex_ofs (lexer);
4665 struct ctables_pcexpr *expr = parse_add (lexer);
4666 int expr_end = lex_ofs (lexer) - 1;
4667 if (!expr || !lex_force_match (lexer, T_RPAREN))
4672 int pcompute_end = lex_ofs (lexer) - 1;
4674 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
4677 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
4680 msg_at (SW, location, _("New definition of &%s will override the "
4681 "previous definition."),
4683 msg_at (SN, pc->location, _("This is the previous definition."));
4685 ctables_pcexpr_destroy (pc->expr);
4686 msg_location_destroy (pc->location);
4691 pc = xmalloc (sizeof *pc);
4692 *pc = (struct ctables_postcompute) { .name = name };
4693 hmap_insert (&ct->postcomputes, &pc->hmap_node,
4694 utf8_hash_case_string (pc->name, 0));
4697 pc->location = location;
4699 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
4704 ctables_parse_pproperties_format (struct lexer *lexer,
4705 struct ctables_summary_spec_set *sss)
4707 *sss = (struct ctables_summary_spec_set) { .n = 0 };
4709 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
4710 && !(lex_token (lexer) == T_ID
4711 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
4712 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
4713 lex_tokss (lexer)))))
4715 /* Parse function. */
4716 enum ctables_summary_function function;
4717 if (!parse_ctables_summary_function (lexer, &function))
4720 /* Parse percentile. */
4721 double percentile = 0;
4722 if (function == CTSF_PTILE)
4724 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4726 percentile = lex_number (lexer);
4731 struct fmt_spec format;
4732 if (!parse_format_specifier (lexer, &format)
4733 || !fmt_check_output (&format)
4734 || !fmt_check_type_compat (&format, VAL_NUMERIC))
4737 if (sss->n >= sss->allocated)
4738 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
4739 sizeof *sss->specs);
4740 sss->specs[sss->n++] = (struct ctables_summary_spec) {
4741 .function = function,
4742 .percentile = percentile,
4749 ctables_summary_spec_set_uninit (sss);
4754 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
4756 struct ctables_postcompute **pcs = NULL;
4758 size_t allocated_pcs = 0;
4760 while (lex_match (lexer, T_AND))
4762 if (!lex_force_id (lexer))
4764 struct ctables_postcompute *pc
4765 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
4768 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
4773 if (n_pcs >= allocated_pcs)
4774 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
4778 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4780 if (lex_match_id (lexer, "LABEL"))
4782 lex_match (lexer, T_EQUALS);
4783 if (!lex_force_string (lexer))
4786 for (size_t i = 0; i < n_pcs; i++)
4788 free (pcs[i]->label);
4789 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
4794 else if (lex_match_id (lexer, "FORMAT"))
4796 lex_match (lexer, T_EQUALS);
4798 struct ctables_summary_spec_set sss;
4799 if (!ctables_parse_pproperties_format (lexer, &sss))
4802 for (size_t i = 0; i < n_pcs; i++)
4805 ctables_summary_spec_set_uninit (pcs[i]->specs);
4807 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
4808 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
4810 ctables_summary_spec_set_uninit (&sss);
4812 else if (lex_match_id (lexer, "HIDESOURCECATS"))
4814 lex_match (lexer, T_EQUALS);
4815 bool hide_source_cats;
4816 if (!parse_bool (lexer, &hide_source_cats))
4818 for (size_t i = 0; i < n_pcs; i++)
4819 pcs[i]->hide_source_cats = hide_source_cats;
4823 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
4836 cmd_ctables (struct lexer *lexer, struct dataset *ds)
4838 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
4839 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
4840 enum settings_value_show tvars = settings_get_show_variables ();
4841 for (size_t i = 0; i < n_vars; i++)
4842 vlabels[i] = (enum ctables_vlabel) tvars;
4844 struct pivot_table_look *look = pivot_table_look_unshare (
4845 pivot_table_look_ref (pivot_table_look_get_default ()));
4846 look->omit_empty = false;
4848 struct ctables *ct = xmalloc (sizeof *ct);
4849 *ct = (struct ctables) {
4850 .dict = dataset_dict (ds),
4852 .ctables_formats = FMT_SETTINGS_INIT,
4854 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
4860 const char *dot_string;
4861 const char *comma_string;
4863 static const struct ctf ctfs[4] = {
4864 { CTEF_NEGPAREN, "(,,,)", "(...)" },
4865 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
4866 { CTEF_PAREN, "-,(,),", "-.(.)." },
4867 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
4869 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
4870 for (size_t i = 0; i < 4; i++)
4872 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
4873 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
4874 fmt_number_style_from_string (s));
4877 if (!lex_force_match (lexer, T_SLASH))
4880 while (!lex_match_id (lexer, "TABLE"))
4882 if (lex_match_id (lexer, "FORMAT"))
4884 double widths[2] = { SYSMIS, SYSMIS };
4885 double units_per_inch = 72.0;
4887 while (lex_token (lexer) != T_SLASH)
4889 if (lex_match_id (lexer, "MINCOLWIDTH"))
4891 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
4894 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
4896 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
4899 else if (lex_match_id (lexer, "UNITS"))
4901 lex_match (lexer, T_EQUALS);
4902 if (lex_match_id (lexer, "POINTS"))
4903 units_per_inch = 72.0;
4904 else if (lex_match_id (lexer, "INCHES"))
4905 units_per_inch = 1.0;
4906 else if (lex_match_id (lexer, "CM"))
4907 units_per_inch = 2.54;
4910 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
4914 else if (lex_match_id (lexer, "EMPTY"))
4919 lex_match (lexer, T_EQUALS);
4920 if (lex_match_id (lexer, "ZERO"))
4922 /* Nothing to do. */
4924 else if (lex_match_id (lexer, "BLANK"))
4925 ct->zero = xstrdup ("");
4926 else if (lex_force_string (lexer))
4928 ct->zero = ss_xstrdup (lex_tokss (lexer));
4934 else if (lex_match_id (lexer, "MISSING"))
4936 lex_match (lexer, T_EQUALS);
4937 if (!lex_force_string (lexer))
4941 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
4942 ? ss_xstrdup (lex_tokss (lexer))
4948 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
4949 "UNITS", "EMPTY", "MISSING");
4954 if (widths[0] != SYSMIS && widths[1] != SYSMIS
4955 && widths[0] > widths[1])
4957 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
4961 for (size_t i = 0; i < 2; i++)
4962 if (widths[i] != SYSMIS)
4964 int *wr = ct->look->width_ranges[TABLE_HORZ];
4965 wr[i] = widths[i] / units_per_inch * 96.0;
4970 else if (lex_match_id (lexer, "VLABELS"))
4972 if (!lex_force_match_id (lexer, "VARIABLES"))
4974 lex_match (lexer, T_EQUALS);
4976 struct variable **vars;
4978 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
4982 if (!lex_force_match_id (lexer, "DISPLAY"))
4987 lex_match (lexer, T_EQUALS);
4989 enum ctables_vlabel vlabel;
4990 if (lex_match_id (lexer, "DEFAULT"))
4991 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
4992 else if (lex_match_id (lexer, "NAME"))
4994 else if (lex_match_id (lexer, "LABEL"))
4995 vlabel = CTVL_LABEL;
4996 else if (lex_match_id (lexer, "BOTH"))
4998 else if (lex_match_id (lexer, "NONE"))
5002 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
5008 for (size_t i = 0; i < n_vars; i++)
5009 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
5012 else if (lex_match_id (lexer, "MRSETS"))
5014 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
5016 lex_match (lexer, T_EQUALS);
5017 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
5020 else if (lex_match_id (lexer, "SMISSING"))
5022 if (lex_match_id (lexer, "VARIABLE"))
5023 ct->smissing_listwise = false;
5024 else if (lex_match_id (lexer, "LISTWISE"))
5025 ct->smissing_listwise = true;
5028 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
5032 else if (lex_match_id (lexer, "PCOMPUTE"))
5034 if (!ctables_parse_pcompute (lexer, ct))
5037 else if (lex_match_id (lexer, "PPROPERTIES"))
5039 if (!ctables_parse_pproperties (lexer, ct))
5042 else if (lex_match_id (lexer, "WEIGHT"))
5044 if (!lex_force_match_id (lexer, "VARIABLE"))
5046 lex_match (lexer, T_EQUALS);
5047 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
5051 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
5053 if (lex_match_id (lexer, "COUNT"))
5055 lex_match (lexer, T_EQUALS);
5056 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
5059 ct->hide_threshold = lex_integer (lexer);
5062 else if (ct->hide_threshold == 0)
5063 ct->hide_threshold = 5;
5067 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
5068 "SMISSING", "PCOMPUTE", "PPROPERTIES",
5069 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
5073 if (!lex_force_match (lexer, T_SLASH))
5077 size_t allocated_tables = 0;
5080 if (ct->n_tables >= allocated_tables)
5081 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5082 sizeof *ct->tables);
5084 struct ctables_category *cat = xmalloc (sizeof *cat);
5085 *cat = (struct ctables_category) {
5087 .include_missing = false,
5088 .sort_ascending = true,
5091 struct ctables_categories *c = xmalloc (sizeof *c);
5092 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5093 *c = (struct ctables_categories) {
5100 struct ctables_categories **categories = xnmalloc (n_vars,
5101 sizeof *categories);
5102 for (size_t i = 0; i < n_vars; i++)
5105 struct ctables_table *t = xmalloc (sizeof *t);
5106 *t = (struct ctables_table) {
5108 .slabels_axis = PIVOT_AXIS_COLUMN,
5109 .slabels_visible = true,
5110 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5112 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5113 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5114 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5116 .clabels_from_axis = PIVOT_AXIS_LAYER,
5117 .categories = categories,
5118 .n_categories = n_vars,
5121 ct->tables[ct->n_tables++] = t;
5123 lex_match (lexer, T_EQUALS);
5124 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5126 if (lex_match (lexer, T_BY))
5128 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5129 ct, t, PIVOT_AXIS_COLUMN))
5132 if (lex_match (lexer, T_BY))
5134 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5135 ct, t, PIVOT_AXIS_LAYER))
5140 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5141 && !t->axes[PIVOT_AXIS_LAYER])
5143 lex_error (lexer, _("At least one variable must be specified."));
5147 const struct ctables_axis *scales[PIVOT_N_AXES];
5148 size_t n_scales = 0;
5149 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5151 scales[a] = find_scale (t->axes[a]);
5157 msg (SE, _("Scale variables may appear only on one axis."));
5158 if (scales[PIVOT_AXIS_ROW])
5159 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5160 _("This scale variable appears on the rows axis."));
5161 if (scales[PIVOT_AXIS_COLUMN])
5162 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5163 _("This scale variable appears on the columns axis."));
5164 if (scales[PIVOT_AXIS_LAYER])
5165 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5166 _("This scale variable appears on the layer axis."));
5170 const struct ctables_axis *summaries[PIVOT_N_AXES];
5171 size_t n_summaries = 0;
5172 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5174 summaries[a] = (scales[a]
5176 : find_categorical_summary_spec (t->axes[a]));
5180 if (n_summaries > 1)
5182 msg (SE, _("Summaries may appear only on one axis."));
5183 if (summaries[PIVOT_AXIS_ROW])
5184 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5185 _("This variable on the rows axis has a summary."));
5186 if (summaries[PIVOT_AXIS_COLUMN])
5187 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
5188 _("This variable on the columns axis has a summary."));
5189 if (summaries[PIVOT_AXIS_LAYER])
5190 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
5191 _("This variable on the layers axis has a summary."));
5194 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5195 if (n_summaries ? summaries[a] : t->axes[a])
5197 t->summary_axis = a;
5201 if (lex_token (lexer) == T_ENDCMD)
5203 if (!ctables_prepare_table (t))
5207 if (!lex_force_match (lexer, T_SLASH))
5210 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
5212 if (lex_match_id (lexer, "SLABELS"))
5214 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5216 if (lex_match_id (lexer, "POSITION"))
5218 lex_match (lexer, T_EQUALS);
5219 if (lex_match_id (lexer, "COLUMN"))
5220 t->slabels_axis = PIVOT_AXIS_COLUMN;
5221 else if (lex_match_id (lexer, "ROW"))
5222 t->slabels_axis = PIVOT_AXIS_ROW;
5223 else if (lex_match_id (lexer, "LAYER"))
5224 t->slabels_axis = PIVOT_AXIS_LAYER;
5227 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
5231 else if (lex_match_id (lexer, "VISIBLE"))
5233 lex_match (lexer, T_EQUALS);
5234 if (!parse_bool (lexer, &t->slabels_visible))
5239 lex_error_expecting (lexer, "POSITION", "VISIBLE");
5244 else if (lex_match_id (lexer, "CLABELS"))
5246 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5248 if (lex_match_id (lexer, "AUTO"))
5250 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
5251 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
5253 else if (lex_match_id (lexer, "ROWLABELS"))
5255 lex_match (lexer, T_EQUALS);
5256 if (lex_match_id (lexer, "OPPOSITE"))
5257 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
5258 else if (lex_match_id (lexer, "LAYER"))
5259 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
5262 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5266 else if (lex_match_id (lexer, "COLLABELS"))
5268 lex_match (lexer, T_EQUALS);
5269 if (lex_match_id (lexer, "OPPOSITE"))
5270 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
5271 else if (lex_match_id (lexer, "LAYER"))
5272 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
5275 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5281 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
5287 else if (lex_match_id (lexer, "CRITERIA"))
5289 if (!lex_force_match_id (lexer, "CILEVEL"))
5291 lex_match (lexer, T_EQUALS);
5293 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
5295 t->cilevel = lex_number (lexer);
5298 else if (lex_match_id (lexer, "CATEGORIES"))
5300 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
5304 else if (lex_match_id (lexer, "TITLES"))
5309 if (lex_match_id (lexer, "CAPTION"))
5310 textp = &t->caption;
5311 else if (lex_match_id (lexer, "CORNER"))
5313 else if (lex_match_id (lexer, "TITLE"))
5317 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
5320 lex_match (lexer, T_EQUALS);
5322 struct string s = DS_EMPTY_INITIALIZER;
5323 while (lex_is_string (lexer))
5325 if (!ds_is_empty (&s))
5326 ds_put_byte (&s, ' ');
5327 ds_put_substring (&s, lex_tokss (lexer));
5331 *textp = ds_steal_cstr (&s);
5333 while (lex_token (lexer) != T_SLASH
5334 && lex_token (lexer) != T_ENDCMD);
5336 else if (lex_match_id (lexer, "SIGTEST"))
5340 t->chisq = xmalloc (sizeof *t->chisq);
5341 *t->chisq = (struct ctables_chisq) {
5343 .include_mrsets = true,
5344 .all_visible = true,
5350 if (lex_match_id (lexer, "TYPE"))
5352 lex_match (lexer, T_EQUALS);
5353 if (!lex_force_match_id (lexer, "CHISQUARE"))
5356 else if (lex_match_id (lexer, "ALPHA"))
5358 lex_match (lexer, T_EQUALS);
5359 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
5361 t->chisq->alpha = lex_number (lexer);
5364 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5366 lex_match (lexer, T_EQUALS);
5367 if (parse_bool (lexer, &t->chisq->include_mrsets))
5370 else if (lex_match_id (lexer, "CATEGORIES"))
5372 lex_match (lexer, T_EQUALS);
5373 if (lex_match_id (lexer, "ALLVISIBLE"))
5374 t->chisq->all_visible = true;
5375 else if (lex_match_id (lexer, "SUBTOTALS"))
5376 t->chisq->all_visible = false;
5379 lex_error_expecting (lexer,
5380 "ALLVISIBLE", "SUBTOTALS");
5386 lex_error_expecting (lexer, "TYPE", "ALPHA",
5387 "INCLUDEMRSETS", "CATEGORIES");
5391 while (lex_token (lexer) != T_SLASH
5392 && lex_token (lexer) != T_ENDCMD);
5394 else if (lex_match_id (lexer, "COMPARETEST"))
5398 t->pairwise = xmalloc (sizeof *t->pairwise);
5399 *t->pairwise = (struct ctables_pairwise) {
5401 .alpha = { .05, .05 },
5402 .adjust = BONFERRONI,
5403 .include_mrsets = true,
5404 .meansvariance_allcats = true,
5405 .all_visible = true,
5414 if (lex_match_id (lexer, "TYPE"))
5416 lex_match (lexer, T_EQUALS);
5417 if (lex_match_id (lexer, "PROP"))
5418 t->pairwise->type = PROP;
5419 else if (lex_match_id (lexer, "MEAN"))
5420 t->pairwise->type = MEAN;
5423 lex_error_expecting (lexer, "PROP", "MEAN");
5427 else if (lex_match_id (lexer, "ALPHA"))
5429 lex_match (lexer, T_EQUALS);
5431 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5433 double a0 = lex_number (lexer);
5436 lex_match (lexer, T_COMMA);
5437 if (lex_is_number (lexer))
5439 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5441 double a1 = lex_number (lexer);
5444 t->pairwise->alpha[0] = MIN (a0, a1);
5445 t->pairwise->alpha[1] = MAX (a0, a1);
5448 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
5450 else if (lex_match_id (lexer, "ADJUST"))
5452 lex_match (lexer, T_EQUALS);
5453 if (lex_match_id (lexer, "BONFERRONI"))
5454 t->pairwise->adjust = BONFERRONI;
5455 else if (lex_match_id (lexer, "BH"))
5456 t->pairwise->adjust = BH;
5457 else if (lex_match_id (lexer, "NONE"))
5458 t->pairwise->adjust = 0;
5461 lex_error_expecting (lexer, "BONFERRONI", "BH",
5466 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5468 lex_match (lexer, T_EQUALS);
5469 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
5472 else if (lex_match_id (lexer, "MEANSVARIANCE"))
5474 lex_match (lexer, T_EQUALS);
5475 if (lex_match_id (lexer, "ALLCATS"))
5476 t->pairwise->meansvariance_allcats = true;
5477 else if (lex_match_id (lexer, "TESTEDCATS"))
5478 t->pairwise->meansvariance_allcats = false;
5481 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
5485 else if (lex_match_id (lexer, "CATEGORIES"))
5487 lex_match (lexer, T_EQUALS);
5488 if (lex_match_id (lexer, "ALLVISIBLE"))
5489 t->pairwise->all_visible = true;
5490 else if (lex_match_id (lexer, "SUBTOTALS"))
5491 t->pairwise->all_visible = false;
5494 lex_error_expecting (lexer, "ALLVISIBLE",
5499 else if (lex_match_id (lexer, "MERGE"))
5501 lex_match (lexer, T_EQUALS);
5502 if (!parse_bool (lexer, &t->pairwise->merge))
5505 else if (lex_match_id (lexer, "STYLE"))
5507 lex_match (lexer, T_EQUALS);
5508 if (lex_match_id (lexer, "APA"))
5509 t->pairwise->apa_style = true;
5510 else if (lex_match_id (lexer, "SIMPLE"))
5511 t->pairwise->apa_style = false;
5514 lex_error_expecting (lexer, "APA", "SIMPLE");
5518 else if (lex_match_id (lexer, "SHOWSIG"))
5520 lex_match (lexer, T_EQUALS);
5521 if (!parse_bool (lexer, &t->pairwise->show_sig))
5526 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
5527 "INCLUDEMRSETS", "MEANSVARIANCE",
5528 "CATEGORIES", "MERGE", "STYLE",
5533 while (lex_token (lexer) != T_SLASH
5534 && lex_token (lexer) != T_ENDCMD);
5538 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
5539 "CRITERIA", "CATEGORIES", "TITLES",
5540 "SIGTEST", "COMPARETEST");
5544 if (!lex_match (lexer, T_SLASH))
5548 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
5549 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
5551 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
5555 if (!ctables_prepare_table (t))
5558 while (lex_token (lexer) != T_ENDCMD);
5560 bool ok = ctables_execute (ds, ct);
5561 ctables_destroy (ct);
5562 return ok ? CMD_SUCCESS : CMD_FAILURE;
5565 ctables_destroy (ct);