1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-out.h"
25 #include "data/dataset.h"
26 #include "data/dictionary.h"
27 #include "data/mrset.h"
28 #include "data/subcase.h"
29 #include "data/value-labels.h"
30 #include "language/command.h"
31 #include "language/lexer/format-parser.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/hash-functions.h"
37 #include "libpspp/hmap.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "libpspp/string-array.h"
41 #include "math/mode.h"
42 #include "math/moments.h"
43 #include "math/percentiles.h"
44 #include "math/sort.h"
45 #include "output/pivot-table.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
51 #define _(msgid) gettext (msgid)
52 #define N_(msgid) (msgid)
56 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
57 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
58 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
59 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
63 - unweighted summaries (U*)
64 - lower confidence limits (*.LCL)
65 - upper confidence limits (*.UCL)
66 - standard error (*.SE)
69 /* All variables. */ \
70 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
71 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
72 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
73 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
74 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
94 /* Scale variables, totals, and subtotals. */ \
95 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
96 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
97 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
98 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
99 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
100 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
101 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
102 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
103 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
104 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
105 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
106 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
107 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
108 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
109 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
110 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
111 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
112 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
113 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
114 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
115 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
116 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
117 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
119 #if 0 /* Multiple response sets not yet implemented. */
120 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
121 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
122 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
123 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
124 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
125 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
126 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
127 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
128 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
129 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
130 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
131 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
132 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
133 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
134 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
135 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
136 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
137 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
138 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
139 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
140 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
141 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
144 enum ctables_summary_function
146 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
152 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
153 N_CTSF_FUNCTIONS = SUMMARIES
157 static bool ctables_summary_function_is_count (enum ctables_summary_function);
159 enum ctables_domain_type
161 /* Within a section, where stacked variables divide one section from
163 CTDT_TABLE, /* All layers of a whole section. */
164 CTDT_LAYER, /* One layer within a section. */
165 CTDT_LAYERROW, /* Row in one layer within a section. */
166 CTDT_LAYERCOL, /* Column in one layer within a section. */
168 /* Within a subtable, where a subtable pairs an innermost row variable with
169 an innermost column variable within a single layer. */
170 CTDT_SUBTABLE, /* Whole subtable. */
171 CTDT_ROW, /* Row within a subtable. */
172 CTDT_COL, /* Column within a subtable. */
176 struct ctables_domain
178 struct hmap_node node;
180 const struct ctables_cell *example;
182 double d_valid; /* Dictionary weight. */
185 double e_valid; /* Effective weight */
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The domains that contain this cell. */
204 uint32_t omit_domains;
205 struct ctables_domain *domains[N_CTDTS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
231 const struct dictionary *dict;
232 struct pivot_table_look *look;
234 /* CTABLES has a number of extra formats that we implement via custom
235 currency specifications on an alternate fmt_settings. */
236 #define CTEF_NEGPAREN FMT_CCA
237 #define CTEF_NEQUAL FMT_CCB
238 #define CTEF_PAREN FMT_CCC
239 #define CTEF_PCTPAREN FMT_CCD
240 struct fmt_settings ctables_formats;
242 /* If this is NULL, zeros are displayed using the normal print format.
243 Otherwise, this string is displayed. */
246 /* If this is NULL, missing values are displayed using the normal print
247 format. Otherwise, this string is displayed. */
250 /* Indexed by variable dictionary index. */
251 enum ctables_vlabel *vlabels;
253 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
255 bool mrsets_count_duplicates; /* MRSETS. */
256 bool smissing_listwise; /* SMISSING. */
257 struct variable *e_weight; /* WEIGHT. */
258 int hide_threshold; /* HIDESMALLCOUNTS. */
260 struct ctables_table **tables;
264 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
267 struct ctables_postcompute
269 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
270 char *name; /* Name, without leading &. */
272 struct msg_location *location; /* Location of definition. */
273 struct ctables_pcexpr *expr;
275 struct ctables_summary_spec_set *specs;
276 bool hide_source_cats;
279 struct ctables_pcexpr
289 enum ctables_postcompute_op
292 CTPO_CONSTANT, /* 5 */
293 CTPO_CAT_NUMBER, /* [5] */
294 CTPO_CAT_STRING, /* ["STRING"] */
295 CTPO_CAT_RANGE, /* [LO THRU 5] */
296 CTPO_CAT_MISSING, /* MISSING */
297 CTPO_CAT_OTHERNM, /* OTHERNM */
298 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
299 CTPO_CAT_TOTAL, /* TOTAL */
313 /* CTPO_CAT_NUMBER. */
316 /* CTPO_CAT_STRING. */
319 /* CTPO_CAT_RANGE. */
322 /* CTPO_CAT_SUBTOTAL. */
323 size_t subtotal_index;
325 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
326 One element: CTPO_NEG. */
327 struct ctables_pcexpr *subs[2];
330 /* Source location. */
331 struct msg_location *location;
334 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
335 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
336 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
337 struct ctables_pcexpr *sub1);
339 struct ctables_summary_spec_set
341 struct ctables_summary_spec *specs;
345 /* The variable to which the summary specs are applied. */
346 struct variable *var;
348 /* Whether the variable to which the summary specs are applied is a scale
349 variable for the purpose of summarization.
351 (VALIDN and TOTALN act differently for summarizing scale and categorical
355 /* If any of these optional additional scale variables are missing, then
356 treat 'var' as if it's missing too. This is for implementing
357 SMISSING=LISTWISE. */
358 struct variable **listwise_vars;
359 size_t n_listwise_vars;
362 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
363 const struct ctables_summary_spec_set *);
364 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
366 /* A nested sequence of variables, e.g. a > b > c. */
369 struct variable **vars;
372 size_t *domains[N_CTDTS];
373 size_t n_domains[N_CTDTS];
376 struct ctables_summary_spec_set specs[N_CSVS];
379 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
382 struct ctables_nest *nests;
388 struct hmap_node node;
393 struct ctables_occurrence
395 struct hmap_node node;
399 struct ctables_section
401 struct ctables_table *table;
402 struct ctables_nest *nests[PIVOT_N_AXES];
403 struct hmap *occurrences[PIVOT_N_AXES];
404 struct hmap cells; /* Contains "struct ctable_cell"s. */
405 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
410 struct ctables *ctables;
411 struct ctables_axis *axes[PIVOT_N_AXES];
412 struct ctables_stack stacks[PIVOT_N_AXES];
413 struct ctables_section *sections;
415 enum pivot_axis_type summary_axis;
416 struct ctables_summary_spec_set summary_specs;
418 const struct variable *clabels_example;
419 struct hmap clabels_values_map;
420 struct ctables_value **clabels_values;
421 size_t n_clabels_values;
423 enum pivot_axis_type slabels_axis;
424 bool slabels_visible;
426 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
428 Most commonly, label_axis[a] == a, and in particular we always have
429 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
431 If ROWLABELS or COLLABELS is specified, then one of
432 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
433 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
435 enum pivot_axis_type label_axis[PIVOT_N_AXES];
436 enum pivot_axis_type clabels_from_axis;
438 /* Indexed by variable dictionary index. */
439 struct ctables_categories **categories;
448 struct ctables_chisq *chisq;
449 struct ctables_pairwise *pairwise;
457 struct variable *var;
458 const struct mrset *mrset;
462 static const struct fmt_spec *
463 ctables_var_get_print_format (const struct ctables_var *var)
465 return (var->is_mrset
466 ? var_get_print_format (var->mrset->vars[0])
467 : var_get_print_format (var->var));
471 ctables_var_name (const struct ctables_var *var)
473 return var->is_mrset ? var->mrset->name : var_get_name (var->var);
476 struct ctables_categories
479 struct ctables_category *cats;
484 struct ctables_category
486 enum ctables_category_type
488 /* Explicit category lists. */
496 /* Totals and subtotals. */
500 /* Implicit category lists. */
505 /* For contributing to TOTALN. */
506 CCT_EXCLUDED_MISSING,
510 struct ctables_category *subtotal;
516 double number; /* CCT_NUMBER. */
517 char *string; /* CCT_STRING. */
518 double range[2]; /* CCT_RANGE. */
522 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
523 bool hide_subcategories; /* CCT_SUBTOTAL. */
526 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
528 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
531 bool include_missing;
535 enum ctables_summary_function sort_function;
536 struct variable *sort_var;
541 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
542 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
543 struct msg_location *location;
547 ctables_category_uninit (struct ctables_category *cat)
558 case CCT_POSTCOMPUTE:
567 free (cat->total_label);
575 case CCT_EXCLUDED_MISSING:
581 ctables_category_equal (const struct ctables_category *a,
582 const struct ctables_category *b)
584 if (a->type != b->type)
590 return a->number == b->number;
593 return strcmp (a->string, b->string);
596 return a->range[0] == b->range[0] && a->range[1] == b->range[1];
602 case CCT_POSTCOMPUTE:
603 return a->pc == b->pc;
607 return !strcmp (a->total_label, b->total_label);
612 return (a->include_missing == b->include_missing
613 && a->sort_ascending == b->sort_ascending
614 && a->sort_function == b->sort_function
615 && a->sort_var == b->sort_var
616 && a->percentile == b->percentile);
618 case CCT_EXCLUDED_MISSING:
626 ctables_categories_unref (struct ctables_categories *c)
631 assert (c->n_refs > 0);
635 for (size_t i = 0; i < c->n_cats; i++)
636 ctables_category_uninit (&c->cats[i]);
642 ctables_categories_equal (const struct ctables_categories *a,
643 const struct ctables_categories *b)
645 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
648 for (size_t i = 0; i < a->n_cats; i++)
649 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
655 /* Chi-square test (SIGTEST). */
663 /* Pairwise comparison test (COMPARETEST). */
664 struct ctables_pairwise
666 enum { PROP, MEAN } type;
669 bool meansvariance_allcats;
671 enum { BONFERRONI = 1, BH } adjust;
695 struct ctables_var var;
697 struct ctables_summary_spec_set specs[N_CSVS];
701 struct ctables_axis *subs[2];
704 struct msg_location *loc;
707 static void ctables_axis_destroy (struct ctables_axis *);
716 enum ctables_function_availability
718 CTFA_ALL, /* Any variables. */
719 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
720 CTFA_MRSETS, /* Only multiple-response sets */
723 struct ctables_summary_spec
725 enum ctables_summary_function function;
726 double percentile; /* CTSF_PTILE only. */
729 struct fmt_spec format;
730 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
736 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
737 const struct ctables_summary_spec *src)
740 dst->label = xstrdup (src->label);
744 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
751 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
752 const struct ctables_summary_spec_set *src)
754 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
755 for (size_t i = 0; i < src->n; i++)
756 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
758 *dst = (struct ctables_summary_spec_set) {
763 .is_scale = src->is_scale,
768 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
770 for (size_t i = 0; i < set->n; i++)
771 ctables_summary_spec_uninit (&set->specs[i]);
776 parse_col_width (struct lexer *lexer, const char *name, double *width)
778 lex_match (lexer, T_EQUALS);
779 if (lex_match_id (lexer, "DEFAULT"))
781 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
783 *width = lex_number (lexer);
793 parse_bool (struct lexer *lexer, bool *b)
795 if (lex_match_id (lexer, "NO"))
797 else if (lex_match_id (lexer, "YES"))
801 lex_error_expecting (lexer, "YES", "NO");
807 static enum ctables_function_availability
808 ctables_function_availability (enum ctables_summary_function f)
810 static enum ctables_function_availability availability[] = {
811 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
816 return availability[f];
820 ctables_summary_function_is_count (enum ctables_summary_function f)
826 case CTSF_ROWPCT_COUNT:
827 case CTSF_COLPCT_COUNT:
828 case CTSF_TABLEPCT_COUNT:
829 case CTSF_SUBTABLEPCT_COUNT:
830 case CTSF_LAYERPCT_COUNT:
831 case CTSF_LAYERROWPCT_COUNT:
832 case CTSF_LAYERCOLPCT_COUNT:
835 case CTSF_ROWPCT_VALIDN:
836 case CTSF_COLPCT_VALIDN:
837 case CTSF_TABLEPCT_VALIDN:
838 case CTSF_SUBTABLEPCT_VALIDN:
839 case CTSF_LAYERPCT_VALIDN:
840 case CTSF_LAYERROWPCT_VALIDN:
841 case CTSF_LAYERCOLPCT_VALIDN:
842 case CTSF_ROWPCT_TOTALN:
843 case CTSF_COLPCT_TOTALN:
844 case CTSF_TABLEPCT_TOTALN:
845 case CTSF_SUBTABLEPCT_TOTALN:
846 case CTSF_LAYERPCT_TOTALN:
847 case CTSF_LAYERROWPCT_TOTALN:
848 case CTSF_LAYERCOLPCT_TOTALN:
865 case CTSF_ROWPCT_SUM:
866 case CTSF_COLPCT_SUM:
867 case CTSF_TABLEPCT_SUM:
868 case CTSF_SUBTABLEPCT_SUM:
869 case CTSF_LAYERPCT_SUM:
870 case CTSF_LAYERROWPCT_SUM:
871 case CTSF_LAYERCOLPCT_SUM:
879 parse_ctables_summary_function (struct lexer *lexer,
880 enum ctables_summary_function *f)
884 enum ctables_summary_function function;
885 struct substring name;
887 static struct pair names[] = {
888 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
889 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
892 /* The .COUNT suffix may be omitted. */
893 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
894 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
895 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
896 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
897 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
898 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
899 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
903 if (!lex_force_id (lexer))
906 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
907 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
909 *f = names[i].function;
914 lex_error (lexer, _("Expecting summary function name."));
919 ctables_axis_destroy (struct ctables_axis *axis)
927 for (size_t i = 0; i < N_CSVS; i++)
928 ctables_summary_spec_set_uninit (&axis->specs[i]);
933 ctables_axis_destroy (axis->subs[0]);
934 ctables_axis_destroy (axis->subs[1]);
937 msg_location_destroy (axis->loc);
941 static struct ctables_axis *
942 ctables_axis_new_nonterminal (enum ctables_axis_op op,
943 struct ctables_axis *sub0,
944 struct ctables_axis *sub1,
945 struct lexer *lexer, int start_ofs)
947 struct ctables_axis *axis = xmalloc (sizeof *axis);
948 *axis = (struct ctables_axis) {
950 .subs = { sub0, sub1 },
951 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
956 struct ctables_axis_parse_ctx
959 struct dictionary *dict;
961 struct ctables_table *t;
964 static struct fmt_spec
965 ctables_summary_default_format (enum ctables_summary_function function,
966 const struct ctables_var *var)
968 static const enum ctables_format default_formats[] = {
969 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
973 switch (default_formats[function])
976 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
979 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
982 return *ctables_var_get_print_format (var);
990 ctables_summary_default_label (enum ctables_summary_function function,
993 static const char *default_labels[] = {
994 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
999 return (function == CTSF_PTILE
1000 ? xasprintf (_("Percentile %.2f"), percentile)
1001 : xstrdup (gettext (default_labels[function])));
1005 ctables_summary_function_name (enum ctables_summary_function function)
1007 static const char *names[] = {
1008 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1012 return names[function];
1016 add_summary_spec (struct ctables_axis *axis,
1017 enum ctables_summary_function function, double percentile,
1018 const char *label, const struct fmt_spec *format,
1019 bool is_ctables_format, const struct msg_location *loc,
1020 enum ctables_summary_variant sv)
1022 if (axis->op == CTAO_VAR)
1024 const char *function_name = ctables_summary_function_name (function);
1025 const char *var_name = ctables_var_name (&axis->var);
1026 switch (ctables_function_availability (function))
1029 if (!axis->var.is_mrset)
1031 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1032 "response sets."), function_name);
1033 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1044 _("Summary function %s applies only to scale variables."),
1046 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1057 struct ctables_summary_spec_set *set = &axis->specs[sv];
1058 if (set->n >= set->allocated)
1059 set->specs = x2nrealloc (set->specs, &set->allocated,
1060 sizeof *set->specs);
1062 struct ctables_summary_spec *dst = &set->specs[set->n++];
1063 *dst = (struct ctables_summary_spec) {
1064 .function = function,
1065 .percentile = percentile,
1066 .label = xstrdup (label),
1067 .format = (format ? *format
1068 : ctables_summary_default_format (function, &axis->var)),
1069 .is_ctables_format = is_ctables_format,
1075 for (size_t i = 0; i < 2; i++)
1076 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1077 format, is_ctables_format, loc, sv))
1083 static struct ctables_axis *ctables_axis_parse_stack (
1084 struct ctables_axis_parse_ctx *);
1087 ctables_var_parse (struct lexer *lexer, struct dictionary *dict,
1088 struct ctables_var *var)
1090 if (ss_starts_with (lex_tokss (lexer), ss_cstr ("$")))
1092 *var = (struct ctables_var) {
1094 .mrset = dict_lookup_mrset (dict, lex_tokcstr (lexer))
1098 lex_error (lexer, _("'%s' does not name a multiple-response set "
1099 "in the active file dictionary."),
1100 lex_tokcstr (lexer));
1108 *var = (struct ctables_var) {
1110 .var = parse_variable (lexer, dict),
1112 return var->var != NULL;
1116 static struct ctables_axis *
1117 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1119 if (lex_match (ctx->lexer, T_LPAREN))
1121 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1122 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1124 ctables_axis_destroy (sub);
1130 if (!lex_force_id (ctx->lexer))
1133 int start_ofs = lex_ofs (ctx->lexer);
1134 struct ctables_var var;
1135 if (!ctables_var_parse (ctx->lexer, ctx->dict, &var))
1138 struct ctables_axis *axis = xmalloc (sizeof *axis);
1139 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1141 /* XXX should figure out default measures by reading data */
1142 axis->scale = (var.is_mrset ? false
1143 : lex_match_phrase (ctx->lexer, "[S]") ? true
1144 : lex_match_phrase (ctx->lexer, "[C]") ? false
1145 : var_get_measure (var.var) == MEASURE_SCALE);
1146 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1147 lex_ofs (ctx->lexer) - 1);
1152 has_digit (const char *s)
1154 return s[strcspn (s, "0123456789")] != '\0';
1158 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1159 bool *is_ctables_format)
1161 char type[FMT_TYPE_LEN_MAX + 1];
1162 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1165 if (!strcasecmp (type, "NEGPAREN"))
1166 format->type = CTEF_NEGPAREN;
1167 else if (!strcasecmp (type, "NEQUAL"))
1168 format->type = CTEF_NEQUAL;
1169 else if (!strcasecmp (type, "PAREN"))
1170 format->type = CTEF_PAREN;
1171 else if (!strcasecmp (type, "PCTPAREN"))
1172 format->type = CTEF_PCTPAREN;
1175 *is_ctables_format = false;
1176 return (parse_format_specifier (lexer, format)
1177 && fmt_check_output (format)
1178 && fmt_check_type_compat (format, VAL_NUMERIC));
1183 msg (SE, _("Output format %s requires width 2 or greater."), type);
1186 else if (format->d > format->w - 1)
1188 msg (SE, _("Output format %s requires width greater than decimals."),
1194 *is_ctables_format = true;
1199 static struct ctables_axis *
1200 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1202 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1203 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1206 enum ctables_summary_variant sv = CSV_CELL;
1209 int start_ofs = lex_ofs (ctx->lexer);
1211 /* Parse function. */
1212 enum ctables_summary_function function;
1213 if (!parse_ctables_summary_function (ctx->lexer, &function))
1216 /* Parse percentile. */
1217 double percentile = 0;
1218 if (function == CTSF_PTILE)
1220 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1222 percentile = lex_number (ctx->lexer);
1223 lex_get (ctx->lexer);
1228 if (lex_is_string (ctx->lexer))
1230 label = ss_xstrdup (lex_tokss (ctx->lexer));
1231 lex_get (ctx->lexer);
1234 label = ctables_summary_default_label (function, percentile);
1237 struct fmt_spec format;
1238 const struct fmt_spec *formatp;
1239 bool is_ctables_format = false;
1240 if (lex_token (ctx->lexer) == T_ID
1241 && has_digit (lex_tokcstr (ctx->lexer)))
1243 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1244 &is_ctables_format))
1254 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1255 lex_ofs (ctx->lexer) - 1);
1256 add_summary_spec (sub, function, percentile, label, formatp,
1257 is_ctables_format, loc, sv);
1259 msg_location_destroy (loc);
1261 lex_match (ctx->lexer, T_COMMA);
1262 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1264 if (!lex_force_match (ctx->lexer, T_LBRACK))
1268 else if (lex_match (ctx->lexer, T_RBRACK))
1270 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1277 ctables_axis_destroy (sub);
1281 static const struct ctables_axis *
1282 find_scale (const struct ctables_axis *axis)
1286 else if (axis->op == CTAO_VAR)
1290 assert (!axis->var.is_mrset);
1298 for (size_t i = 0; i < 2; i++)
1300 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1308 static const struct ctables_axis *
1309 find_categorical_summary_spec (const struct ctables_axis *axis)
1313 else if (axis->op == CTAO_VAR)
1314 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1317 for (size_t i = 0; i < 2; i++)
1319 const struct ctables_axis *sum
1320 = find_categorical_summary_spec (axis->subs[i]);
1328 static struct ctables_axis *
1329 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1331 int start_ofs = lex_ofs (ctx->lexer);
1332 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1336 while (lex_match (ctx->lexer, T_GT))
1338 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1342 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1343 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1345 const struct ctables_axis *outer_scale = find_scale (lhs);
1346 const struct ctables_axis *inner_scale = find_scale (rhs);
1347 if (outer_scale && inner_scale)
1349 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1350 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1351 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1352 ctables_axis_destroy (nest);
1356 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1359 msg_at (SE, nest->loc,
1360 _("Summaries may only be requested for categorical variables "
1361 "at the innermost nesting level."));
1362 msg_at (SN, outer_sum->loc,
1363 _("This outer categorical variable has a summary."));
1364 ctables_axis_destroy (nest);
1374 static struct ctables_axis *
1375 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1377 int start_ofs = lex_ofs (ctx->lexer);
1378 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1382 while (lex_match (ctx->lexer, T_PLUS))
1384 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1388 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1389 ctx->lexer, start_ofs);
1396 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1397 struct ctables *ct, struct ctables_table *t,
1398 enum pivot_axis_type a)
1400 if (lex_token (lexer) == T_BY
1401 || lex_token (lexer) == T_SLASH
1402 || lex_token (lexer) == T_ENDCMD)
1405 struct ctables_axis_parse_ctx ctx = {
1411 t->axes[a] = ctables_axis_parse_stack (&ctx);
1412 return t->axes[a] != NULL;
1416 ctables_chisq_destroy (struct ctables_chisq *chisq)
1422 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1428 ctables_table_destroy (struct ctables_table *t)
1433 for (size_t i = 0; i < t->n_categories; i++)
1434 ctables_categories_unref (t->categories[i]);
1435 free (t->categories);
1437 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1438 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1439 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1443 ctables_chisq_destroy (t->chisq);
1444 ctables_pairwise_destroy (t->pairwise);
1449 ctables_destroy (struct ctables *ct)
1454 pivot_table_look_unref (ct->look);
1458 for (size_t i = 0; i < ct->n_tables; i++)
1459 ctables_table_destroy (ct->tables[i]);
1464 static struct ctables_category
1465 cct_range (double low, double high)
1467 return (struct ctables_category) {
1469 .range = { low, high }
1474 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1475 struct ctables_category *cat)
1478 if (lex_match (lexer, T_EQUALS))
1480 if (!lex_force_string (lexer))
1483 total_label = ss_xstrdup (lex_tokss (lexer));
1487 total_label = xstrdup (_("Subtotal"));
1489 *cat = (struct ctables_category) {
1490 .type = CCT_SUBTOTAL,
1491 .hide_subcategories = hide_subcategories,
1492 .total_label = total_label
1498 ctables_table_parse_explicit_category (struct lexer *lexer, struct ctables *ct,
1499 struct ctables_category *cat)
1501 if (lex_match_id (lexer, "OTHERNM"))
1502 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1503 else if (lex_match_id (lexer, "MISSING"))
1504 *cat = (struct ctables_category) { .type = CCT_MISSING };
1505 else if (lex_match_id (lexer, "SUBTOTAL"))
1506 return ctables_table_parse_subtotal (lexer, false, cat);
1507 else if (lex_match_id (lexer, "HSUBTOTAL"))
1508 return ctables_table_parse_subtotal (lexer, true, cat);
1509 else if (lex_match_id (lexer, "LO"))
1511 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
1513 *cat = cct_range (-DBL_MAX, lex_number (lexer));
1516 else if (lex_is_number (lexer))
1518 double number = lex_number (lexer);
1520 if (lex_match_id (lexer, "THRU"))
1522 if (lex_match_id (lexer, "HI"))
1523 *cat = cct_range (number, DBL_MAX);
1526 if (!lex_force_num (lexer))
1528 *cat = cct_range (number, lex_number (lexer));
1533 *cat = (struct ctables_category) {
1538 else if (lex_is_string (lexer))
1540 *cat = (struct ctables_category) {
1542 .string = ss_xstrdup (lex_tokss (lexer)),
1546 else if (lex_match (lexer, T_AND))
1548 if (!lex_force_id (lexer))
1550 struct ctables_postcompute *pc = ctables_find_postcompute (
1551 ct, lex_tokcstr (lexer));
1554 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1555 msg_at (SE, loc, _("Unknown postcompute &%s."),
1556 lex_tokcstr (lexer));
1557 msg_location_destroy (loc);
1562 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1566 lex_error (lexer, NULL);
1573 static struct ctables_category *
1574 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1575 const struct ctables_pcexpr *e)
1577 struct ctables_category *best = NULL;
1578 size_t n_subtotals = 0;
1579 for (size_t i = 0; i < cats->n_cats; i++)
1581 struct ctables_category *cat = &cats->cats[i];
1584 case CTPO_CAT_NUMBER:
1585 if (cat->type == CCT_NUMBER && cat->number == e->number)
1589 case CTPO_CAT_STRING:
1590 if (cat->type == CCT_STRING && !strcmp (cat->string, e->string))
1594 case CTPO_CAT_RANGE:
1595 if (cat->type == CCT_RANGE
1596 && cat->range[0] == e->range[0]
1597 && cat->range[1] == e->range[1])
1601 case CTPO_CAT_MISSING:
1602 if (cat->type == CCT_MISSING)
1606 case CTPO_CAT_OTHERNM:
1607 if (cat->type == CCT_OTHERNM)
1611 case CTPO_CAT_SUBTOTAL:
1612 if (cat->type == CCT_SUBTOTAL)
1615 if (e->subtotal_index == n_subtotals)
1617 else if (e->subtotal_index == 0)
1622 case CTPO_CAT_TOTAL:
1623 if (cat->type == CCT_TOTAL)
1637 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1643 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1644 struct ctables_category *pc_cat,
1645 const struct ctables_categories *cats,
1646 const struct msg_location *cats_location)
1650 case CTPO_CAT_NUMBER:
1651 case CTPO_CAT_STRING:
1652 case CTPO_CAT_RANGE:
1653 case CTPO_CAT_MISSING:
1654 case CTPO_CAT_OTHERNM:
1655 case CTPO_CAT_SUBTOTAL:
1656 case CTPO_CAT_TOTAL:
1658 struct ctables_category *cat = ctables_find_category_for_postcompute (
1662 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1664 size_t n_subtotals = 0;
1665 for (size_t i = 0; i < cats->n_cats; i++)
1666 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1667 if (n_subtotals > 1)
1669 msg_at (SE, cats_location,
1670 ngettext ("These categories include %zu instance "
1671 "of SUBTOTAL or HSUBTOTAL, so references "
1672 "from computed categories must refer to "
1673 "subtotals by position.",
1674 "These categories include %zu instances "
1675 "of SUBTOTAL or HSUBTOTAL, so references "
1676 "from computed categories must refer to "
1677 "subtotals by position.",
1680 msg_at (SN, e->location,
1681 _("This is the reference that lacks a position."));
1686 msg_at (SE, pc_cat->location,
1687 _("Computed category &%s references a category not included "
1688 "in the category list."),
1690 msg_at (SN, e->location, _("This is the missing category."));
1691 msg_at (SN, cats_location,
1692 _("To fix the problem, add the missing category to the "
1693 "list of categories here."));
1696 if (pc_cat->pc->hide_source_cats)
1710 for (size_t i = 0; i < 2; i++)
1711 if (e->subs[i] && !ctables_recursive_check_postcompute (
1712 e->subs[i], pc_cat, cats, cats_location))
1722 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1723 struct ctables *ct, struct ctables_table *t)
1725 if (!lex_match_id (lexer, "VARIABLES"))
1727 lex_match (lexer, T_EQUALS);
1729 struct variable **vars;
1731 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1734 struct ctables_categories *c = xmalloc (sizeof *c);
1735 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1736 for (size_t i = 0; i < n_vars; i++)
1738 struct ctables_categories **cp
1739 = &t->categories[var_get_dict_index (vars[i])];
1740 ctables_categories_unref (*cp);
1745 size_t allocated_cats = 0;
1746 if (lex_match (lexer, T_LBRACK))
1748 int cats_start_ofs = lex_ofs (lexer);
1751 if (c->n_cats >= allocated_cats)
1752 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1754 int start_ofs = lex_ofs (lexer);
1755 struct ctables_category *cat = &c->cats[c->n_cats];
1756 if (!ctables_table_parse_explicit_category (lexer, ct, cat))
1758 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1761 lex_match (lexer, T_COMMA);
1763 while (!lex_match (lexer, T_RBRACK));
1765 struct msg_location *cats_location
1766 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1767 for (size_t i = 0; i < c->n_cats; i++)
1769 struct ctables_category *cat = &c->cats[i];
1770 if (cat->type == CCT_POSTCOMPUTE
1771 && !ctables_recursive_check_postcompute (cat->pc->expr, cat,
1777 struct ctables_category cat = {
1779 .include_missing = false,
1780 .sort_ascending = true,
1782 bool show_totals = false;
1783 char *total_label = NULL;
1784 bool totals_before = false;
1785 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1787 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1789 lex_match (lexer, T_EQUALS);
1790 if (lex_match_id (lexer, "A"))
1791 cat.sort_ascending = true;
1792 else if (lex_match_id (lexer, "D"))
1793 cat.sort_ascending = false;
1796 lex_error_expecting (lexer, "A", "D");
1800 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1802 lex_match (lexer, T_EQUALS);
1803 if (lex_match_id (lexer, "VALUE"))
1804 cat.type = CCT_VALUE;
1805 else if (lex_match_id (lexer, "LABEL"))
1806 cat.type = CCT_LABEL;
1809 cat.type = CCT_FUNCTION;
1810 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1813 if (lex_match (lexer, T_LPAREN))
1815 cat.sort_var = parse_variable (lexer, dict);
1819 if (cat.sort_function == CTSF_PTILE)
1821 lex_match (lexer, T_COMMA);
1822 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1824 cat.percentile = lex_number (lexer);
1828 if (!lex_force_match (lexer, T_RPAREN))
1831 else if (ctables_function_availability (cat.sort_function)
1834 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1839 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1841 lex_match (lexer, T_EQUALS);
1842 if (lex_match_id (lexer, "INCLUDE"))
1843 cat.include_missing = true;
1844 else if (lex_match_id (lexer, "EXCLUDE"))
1845 cat.include_missing = false;
1848 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1852 else if (lex_match_id (lexer, "TOTAL"))
1854 lex_match (lexer, T_EQUALS);
1855 if (!parse_bool (lexer, &show_totals))
1858 else if (lex_match_id (lexer, "LABEL"))
1860 lex_match (lexer, T_EQUALS);
1861 if (!lex_force_string (lexer))
1864 total_label = ss_xstrdup (lex_tokss (lexer));
1867 else if (lex_match_id (lexer, "POSITION"))
1869 lex_match (lexer, T_EQUALS);
1870 if (lex_match_id (lexer, "BEFORE"))
1871 totals_before = true;
1872 else if (lex_match_id (lexer, "AFTER"))
1873 totals_before = false;
1876 lex_error_expecting (lexer, "BEFORE", "AFTER");
1880 else if (lex_match_id (lexer, "EMPTY"))
1882 lex_match (lexer, T_EQUALS);
1883 if (lex_match_id (lexer, "INCLUDE"))
1884 c->show_empty = true;
1885 else if (lex_match_id (lexer, "EXCLUDE"))
1886 c->show_empty = false;
1889 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1896 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
1897 "TOTAL", "LABEL", "POSITION", "EMPTY");
1899 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
1906 if (c->n_cats >= allocated_cats)
1907 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1908 c->cats[c->n_cats++] = cat;
1913 if (c->n_cats >= allocated_cats)
1914 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1916 struct ctables_category *totals;
1919 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
1920 totals = &c->cats[0];
1923 totals = &c->cats[c->n_cats];
1926 *totals = (struct ctables_category) {
1928 .total_label = total_label ? total_label : xstrdup (_("Total")),
1932 struct ctables_category *subtotal = NULL;
1933 for (size_t i = totals_before ? 0 : c->n_cats;
1934 totals_before ? i < c->n_cats : i-- > 0;
1935 totals_before ? i++ : 0)
1937 struct ctables_category *cat = &c->cats[i];
1945 cat->subtotal = subtotal;
1948 case CCT_POSTCOMPUTE:
1959 case CCT_EXCLUDED_MISSING:
1968 ctables_nest_uninit (struct ctables_nest *nest)
1975 ctables_stack_uninit (struct ctables_stack *stack)
1979 for (size_t i = 0; i < stack->n; i++)
1980 ctables_nest_uninit (&stack->nests[i]);
1981 free (stack->nests);
1985 static struct ctables_stack
1986 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
1993 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
1994 for (size_t i = 0; i < s0.n; i++)
1995 for (size_t j = 0; j < s1.n; j++)
1997 const struct ctables_nest *a = &s0.nests[i];
1998 const struct ctables_nest *b = &s1.nests[j];
2000 size_t allocate = a->n + b->n;
2001 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2002 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2004 for (size_t k = 0; k < a->n; k++)
2005 vars[n++] = a->vars[k];
2006 for (size_t k = 0; k < b->n; k++)
2007 vars[n++] = b->vars[k];
2008 assert (n == allocate);
2010 const struct ctables_nest *summary_src;
2011 if (!a->specs[CSV_CELL].var)
2013 else if (!b->specs[CSV_CELL].var)
2018 struct ctables_nest *new = &stack.nests[stack.n++];
2019 *new = (struct ctables_nest) {
2021 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2022 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2026 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2027 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2029 ctables_stack_uninit (&s0);
2030 ctables_stack_uninit (&s1);
2034 static struct ctables_stack
2035 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2037 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2038 for (size_t i = 0; i < s0.n; i++)
2039 stack.nests[stack.n++] = s0.nests[i];
2040 for (size_t i = 0; i < s1.n; i++)
2042 stack.nests[stack.n] = s1.nests[i];
2043 stack.nests[stack.n].group_head += s0.n;
2046 assert (stack.n == s0.n + s1.n);
2052 static struct ctables_stack
2053 var_fts (const struct ctables_axis *a)
2055 assert (!a->var.is_mrset);
2057 struct variable **vars = xmalloc (sizeof *vars);
2060 struct ctables_nest *nest = xmalloc (sizeof *nest);
2061 *nest = (struct ctables_nest) {
2064 .scale_idx = a->scale ? 0 : SIZE_MAX,
2066 if (a->specs[CSV_CELL].n || a->scale)
2067 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2069 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2070 nest->specs[sv].var = a->var.var;
2071 nest->specs[sv].is_scale = a->scale;
2073 return (struct ctables_stack) { .nests = nest, .n = 1 };
2076 static struct ctables_stack
2077 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2080 return (struct ctables_stack) { .n = 0 };
2088 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2089 enumerate_fts (axis_type, a->subs[1]));
2092 /* This should consider any of the scale variables found in the result to
2093 be linked to each other listwise for SMISSING=LISTWISE. */
2094 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2095 enumerate_fts (axis_type, a->subs[1]));
2101 union ctables_summary
2103 /* COUNT, VALIDN, TOTALN. */
2106 /* MINIMUM, MAXIMUM, RANGE. */
2113 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2114 struct moments1 *moments;
2116 /* MEDIAN, MODE, PTILE. */
2119 struct casewriter *writer;
2124 /* XXX multiple response */
2128 ctables_summary_init (union ctables_summary *s,
2129 const struct ctables_summary_spec *ss)
2131 switch (ss->function)
2135 case CTSF_ROWPCT_COUNT:
2136 case CTSF_COLPCT_COUNT:
2137 case CTSF_TABLEPCT_COUNT:
2138 case CTSF_SUBTABLEPCT_COUNT:
2139 case CTSF_LAYERPCT_COUNT:
2140 case CTSF_LAYERROWPCT_COUNT:
2141 case CTSF_LAYERCOLPCT_COUNT:
2142 case CTSF_ROWPCT_VALIDN:
2143 case CTSF_COLPCT_VALIDN:
2144 case CTSF_TABLEPCT_VALIDN:
2145 case CTSF_SUBTABLEPCT_VALIDN:
2146 case CTSF_LAYERPCT_VALIDN:
2147 case CTSF_LAYERROWPCT_VALIDN:
2148 case CTSF_LAYERCOLPCT_VALIDN:
2149 case CTSF_ROWPCT_TOTALN:
2150 case CTSF_COLPCT_TOTALN:
2151 case CTSF_TABLEPCT_TOTALN:
2152 case CTSF_SUBTABLEPCT_TOTALN:
2153 case CTSF_LAYERPCT_TOTALN:
2154 case CTSF_LAYERROWPCT_TOTALN:
2155 case CTSF_LAYERCOLPCT_TOTALN:
2167 s->min = s->max = SYSMIS;
2175 case CTSF_ROWPCT_SUM:
2176 case CTSF_COLPCT_SUM:
2177 case CTSF_TABLEPCT_SUM:
2178 case CTSF_SUBTABLEPCT_SUM:
2179 case CTSF_LAYERPCT_SUM:
2180 case CTSF_LAYERROWPCT_SUM:
2181 case CTSF_LAYERCOLPCT_SUM:
2182 s->moments = moments1_create (MOMENT_VARIANCE);
2189 struct caseproto *proto = caseproto_create ();
2190 proto = caseproto_add_width (proto, 0);
2191 proto = caseproto_add_width (proto, 0);
2193 struct subcase ordering;
2194 subcase_init (&ordering, 0, 0, SC_ASCEND);
2195 s->writer = sort_create_writer (&ordering, proto);
2196 subcase_uninit (&ordering);
2197 caseproto_unref (proto);
2207 ctables_summary_uninit (union ctables_summary *s,
2208 const struct ctables_summary_spec *ss)
2210 switch (ss->function)
2214 case CTSF_ROWPCT_COUNT:
2215 case CTSF_COLPCT_COUNT:
2216 case CTSF_TABLEPCT_COUNT:
2217 case CTSF_SUBTABLEPCT_COUNT:
2218 case CTSF_LAYERPCT_COUNT:
2219 case CTSF_LAYERROWPCT_COUNT:
2220 case CTSF_LAYERCOLPCT_COUNT:
2221 case CTSF_ROWPCT_VALIDN:
2222 case CTSF_COLPCT_VALIDN:
2223 case CTSF_TABLEPCT_VALIDN:
2224 case CTSF_SUBTABLEPCT_VALIDN:
2225 case CTSF_LAYERPCT_VALIDN:
2226 case CTSF_LAYERROWPCT_VALIDN:
2227 case CTSF_LAYERCOLPCT_VALIDN:
2228 case CTSF_ROWPCT_TOTALN:
2229 case CTSF_COLPCT_TOTALN:
2230 case CTSF_TABLEPCT_TOTALN:
2231 case CTSF_SUBTABLEPCT_TOTALN:
2232 case CTSF_LAYERPCT_TOTALN:
2233 case CTSF_LAYERROWPCT_TOTALN:
2234 case CTSF_LAYERCOLPCT_TOTALN:
2252 case CTSF_ROWPCT_SUM:
2253 case CTSF_COLPCT_SUM:
2254 case CTSF_TABLEPCT_SUM:
2255 case CTSF_SUBTABLEPCT_SUM:
2256 case CTSF_LAYERPCT_SUM:
2257 case CTSF_LAYERROWPCT_SUM:
2258 case CTSF_LAYERCOLPCT_SUM:
2259 moments1_destroy (s->moments);
2265 casewriter_destroy (s->writer);
2271 ctables_summary_add (union ctables_summary *s,
2272 const struct ctables_summary_spec *ss,
2273 const struct variable *var, const union value *value,
2274 bool is_scale, bool is_scale_missing,
2275 bool is_missing, bool excluded_missing,
2276 double d_weight, double e_weight)
2278 /* To determine whether a case is included in a given table for a particular
2279 kind of summary, consider the following charts for each variable in the
2280 table. Only if "yes" appears for every variable for the summary is the
2283 Categorical variables: VALIDN COUNT TOTALN
2284 Valid values in included categories yes yes yes
2285 Missing values in included categories --- yes yes
2286 Missing values in excluded categories --- --- yes
2287 Valid values in excluded categories --- --- ---
2289 Scale variables: VALIDN COUNT TOTALN
2290 Valid value yes yes yes
2291 Missing value --- yes yes
2293 Missing values include both user- and system-missing. (The system-missing
2294 value is always in an excluded category.)
2296 switch (ss->function)
2299 case CTSF_ROWPCT_TOTALN:
2300 case CTSF_COLPCT_TOTALN:
2301 case CTSF_TABLEPCT_TOTALN:
2302 case CTSF_SUBTABLEPCT_TOTALN:
2303 case CTSF_LAYERPCT_TOTALN:
2304 case CTSF_LAYERROWPCT_TOTALN:
2305 case CTSF_LAYERCOLPCT_TOTALN:
2306 s->count += d_weight;
2310 case CTSF_ROWPCT_COUNT:
2311 case CTSF_COLPCT_COUNT:
2312 case CTSF_TABLEPCT_COUNT:
2313 case CTSF_SUBTABLEPCT_COUNT:
2314 case CTSF_LAYERPCT_COUNT:
2315 case CTSF_LAYERROWPCT_COUNT:
2316 case CTSF_LAYERCOLPCT_COUNT:
2317 if (is_scale || !excluded_missing)
2318 s->count += d_weight;
2322 case CTSF_ROWPCT_VALIDN:
2323 case CTSF_COLPCT_VALIDN:
2324 case CTSF_TABLEPCT_VALIDN:
2325 case CTSF_SUBTABLEPCT_VALIDN:
2326 case CTSF_LAYERPCT_VALIDN:
2327 case CTSF_LAYERROWPCT_VALIDN:
2328 case CTSF_LAYERCOLPCT_VALIDN:
2332 s->count += d_weight;
2337 s->count += d_weight;
2341 if (is_scale || !excluded_missing)
2342 s->count += e_weight;
2349 s->count += e_weight;
2353 s->count += e_weight;
2359 if (!is_scale_missing)
2361 assert (!var_is_alpha (var)); /* XXX? */
2362 if (s->min == SYSMIS || value->f < s->min)
2364 if (s->max == SYSMIS || value->f > s->max)
2374 case CTSF_ROWPCT_SUM:
2375 case CTSF_COLPCT_SUM:
2376 case CTSF_TABLEPCT_SUM:
2377 case CTSF_SUBTABLEPCT_SUM:
2378 case CTSF_LAYERPCT_SUM:
2379 case CTSF_LAYERROWPCT_SUM:
2380 case CTSF_LAYERCOLPCT_SUM:
2381 if (!is_scale_missing)
2382 moments1_add (s->moments, value->f, e_weight);
2388 if (!is_scale_missing)
2390 s->ovalid += e_weight;
2392 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2393 *case_num_rw_idx (c, 0) = value->f;
2394 *case_num_rw_idx (c, 1) = e_weight;
2395 casewriter_write (s->writer, c);
2401 static enum ctables_domain_type
2402 ctables_function_domain (enum ctables_summary_function function)
2426 case CTSF_COLPCT_COUNT:
2427 case CTSF_COLPCT_SUM:
2428 case CTSF_COLPCT_TOTALN:
2429 case CTSF_COLPCT_VALIDN:
2432 case CTSF_LAYERCOLPCT_COUNT:
2433 case CTSF_LAYERCOLPCT_SUM:
2434 case CTSF_LAYERCOLPCT_TOTALN:
2435 case CTSF_LAYERCOLPCT_VALIDN:
2436 return CTDT_LAYERCOL;
2438 case CTSF_LAYERPCT_COUNT:
2439 case CTSF_LAYERPCT_SUM:
2440 case CTSF_LAYERPCT_TOTALN:
2441 case CTSF_LAYERPCT_VALIDN:
2444 case CTSF_LAYERROWPCT_COUNT:
2445 case CTSF_LAYERROWPCT_SUM:
2446 case CTSF_LAYERROWPCT_TOTALN:
2447 case CTSF_LAYERROWPCT_VALIDN:
2448 return CTDT_LAYERROW;
2450 case CTSF_ROWPCT_COUNT:
2451 case CTSF_ROWPCT_SUM:
2452 case CTSF_ROWPCT_TOTALN:
2453 case CTSF_ROWPCT_VALIDN:
2456 case CTSF_SUBTABLEPCT_COUNT:
2457 case CTSF_SUBTABLEPCT_SUM:
2458 case CTSF_SUBTABLEPCT_TOTALN:
2459 case CTSF_SUBTABLEPCT_VALIDN:
2460 return CTDT_SUBTABLE;
2462 case CTSF_TABLEPCT_COUNT:
2463 case CTSF_TABLEPCT_SUM:
2464 case CTSF_TABLEPCT_TOTALN:
2465 case CTSF_TABLEPCT_VALIDN:
2473 ctables_summary_value (const struct ctables_cell *cell,
2474 union ctables_summary *s,
2475 const struct ctables_summary_spec *ss)
2477 switch (ss->function)
2483 case CTSF_ROWPCT_COUNT:
2484 case CTSF_COLPCT_COUNT:
2485 case CTSF_TABLEPCT_COUNT:
2486 case CTSF_SUBTABLEPCT_COUNT:
2487 case CTSF_LAYERPCT_COUNT:
2488 case CTSF_LAYERROWPCT_COUNT:
2489 case CTSF_LAYERCOLPCT_COUNT:
2491 enum ctables_domain_type d = ctables_function_domain (ss->function);
2492 return (cell->domains[d]->e_count
2493 ? s->count / cell->domains[d]->e_count * 100
2497 case CTSF_ROWPCT_VALIDN:
2498 case CTSF_COLPCT_VALIDN:
2499 case CTSF_TABLEPCT_VALIDN:
2500 case CTSF_SUBTABLEPCT_VALIDN:
2501 case CTSF_LAYERPCT_VALIDN:
2502 case CTSF_LAYERROWPCT_VALIDN:
2503 case CTSF_LAYERCOLPCT_VALIDN:
2505 enum ctables_domain_type d = ctables_function_domain (ss->function);
2506 return (cell->domains[d]->e_valid
2507 ? s->count / cell->domains[d]->e_valid * 100
2511 case CTSF_ROWPCT_TOTALN:
2512 case CTSF_COLPCT_TOTALN:
2513 case CTSF_TABLEPCT_TOTALN:
2514 case CTSF_SUBTABLEPCT_TOTALN:
2515 case CTSF_LAYERPCT_TOTALN:
2516 case CTSF_LAYERROWPCT_TOTALN:
2517 case CTSF_LAYERCOLPCT_TOTALN:
2519 enum ctables_domain_type d = ctables_function_domain (ss->function);
2520 return (cell->domains[d]->e_total
2521 ? s->count / cell->domains[d]->e_total * 100
2545 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2550 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2556 double weight, variance;
2557 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2558 return calc_semean (variance, weight);
2564 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2565 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2570 double weight, mean;
2571 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2572 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2578 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2582 case CTSF_ROWPCT_SUM:
2583 case CTSF_COLPCT_SUM:
2584 case CTSF_TABLEPCT_SUM:
2585 case CTSF_SUBTABLEPCT_SUM:
2586 case CTSF_LAYERPCT_SUM:
2587 case CTSF_LAYERROWPCT_SUM:
2588 case CTSF_LAYERCOLPCT_SUM:
2595 struct casereader *reader = casewriter_make_reader (s->writer);
2598 struct percentile *ptile = percentile_create (
2599 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2600 struct order_stats *os = &ptile->parent;
2601 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2602 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2603 statistic_destroy (&ptile->parent.parent);
2610 struct casereader *reader = casewriter_make_reader (s->writer);
2613 struct mode *mode = mode_create ();
2614 struct order_stats *os = &mode->parent;
2615 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2616 s->ovalue = mode->mode;
2617 statistic_destroy (&mode->parent.parent);
2625 struct ctables_cell_sort_aux
2627 const struct ctables_nest *nest;
2628 enum pivot_axis_type a;
2632 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2634 const struct ctables_cell_sort_aux *aux = aux_;
2635 struct ctables_cell *const *ap = a_;
2636 struct ctables_cell *const *bp = b_;
2637 const struct ctables_cell *a = *ap;
2638 const struct ctables_cell *b = *bp;
2640 const struct ctables_nest *nest = aux->nest;
2641 for (size_t i = 0; i < nest->n; i++)
2642 if (i != nest->scale_idx)
2644 const struct variable *var = nest->vars[i];
2645 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2646 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2647 if (a_cv->category != b_cv->category)
2648 return a_cv->category > b_cv->category ? 1 : -1;
2650 const union value *a_val = &a_cv->value;
2651 const union value *b_val = &b_cv->value;
2652 switch (a_cv->category->type)
2658 case CCT_POSTCOMPUTE:
2659 case CCT_EXCLUDED_MISSING:
2660 /* Must be equal. */
2667 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2675 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2677 return a_cv->category->sort_ascending ? cmp : -cmp;
2683 const char *a_label = var_lookup_value_label (var, a_val);
2684 const char *b_label = var_lookup_value_label (var, b_val);
2686 ? (b_label ? strcmp (a_label, b_label) : 1)
2687 : (b_label ? -1 : value_compare_3way (
2688 a_val, b_val, var_get_width (var))));
2690 return a_cv->category->sort_ascending ? cmp : -cmp;
2704 For each ctables_table:
2705 For each combination of row vars:
2706 For each combination of column vars:
2707 For each combination of layer vars:
2709 Make a table of row values:
2710 Sort entries by row values
2711 Assign a 0-based index to each actual value
2712 Construct a dimension
2713 Make a table of column values
2714 Make a table of layer values
2716 Fill the table entry using the indexes from before.
2719 static struct ctables_domain *
2720 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
2721 enum ctables_domain_type domain)
2724 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2726 const struct ctables_nest *nest = s->nests[a];
2727 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2729 size_t v_idx = nest->domains[domain][i];
2730 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
2731 hash = hash_pointer (cv->category, hash);
2732 if (cv->category->type != CCT_TOTAL
2733 && cv->category->type != CCT_SUBTOTAL
2734 && cv->category->type != CCT_POSTCOMPUTE)
2735 hash = value_hash (&cv->value,
2736 var_get_width (nest->vars[v_idx]), hash);
2740 struct ctables_domain *d;
2741 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
2743 const struct ctables_cell *df = d->example;
2744 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2746 const struct ctables_nest *nest = s->nests[a];
2747 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2749 size_t v_idx = nest->domains[domain][i];
2750 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
2751 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
2752 if (cv1->category != cv2->category
2753 || (cv1->category->type != CCT_TOTAL
2754 && cv1->category->type != CCT_SUBTOTAL
2755 && cv1->category->type != CCT_POSTCOMPUTE
2756 && !value_equal (&cv1->value, &cv2->value,
2757 var_get_width (nest->vars[v_idx]))))
2766 d = xmalloc (sizeof *d);
2767 *d = (struct ctables_domain) { .example = cell };
2768 hmap_insert (&s->domains[domain], &d->node, hash);
2772 static const struct ctables_category *
2773 ctables_categories_match (const struct ctables_categories *c,
2774 const union value *v, const struct variable *var)
2776 if (var_is_numeric (var) && v->f == SYSMIS)
2779 const struct ctables_category *othernm = NULL;
2780 for (size_t i = c->n_cats; i-- > 0; )
2782 const struct ctables_category *cat = &c->cats[i];
2786 if (cat->number == v->f)
2794 if ((cat->range[0] == -DBL_MAX || v->f >= cat->range[0])
2795 && (cat->range[1] == DBL_MAX || v->f <= cat->range[1]))
2800 if (var_is_value_missing (var, v))
2804 case CCT_POSTCOMPUTE:
2819 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2822 case CCT_EXCLUDED_MISSING:
2827 return var_is_value_missing (var, v) ? NULL : othernm;
2830 static const struct ctables_category *
2831 ctables_categories_total (const struct ctables_categories *c)
2833 const struct ctables_category *first = &c->cats[0];
2834 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2835 return (first->type == CCT_TOTAL ? first
2836 : last->type == CCT_TOTAL ? last
2840 static struct ctables_cell *
2841 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
2842 const struct ctables_category *cats[PIVOT_N_AXES][10])
2845 enum ctables_summary_variant sv = CSV_CELL;
2846 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2848 const struct ctables_nest *nest = s->nests[a];
2849 for (size_t i = 0; i < nest->n; i++)
2850 if (i != nest->scale_idx)
2852 hash = hash_pointer (cats[a][i], hash);
2853 if (cats[a][i]->type != CCT_TOTAL
2854 && cats[a][i]->type != CCT_SUBTOTAL
2855 && cats[a][i]->type != CCT_POSTCOMPUTE)
2856 hash = value_hash (case_data (c, nest->vars[i]),
2857 var_get_width (nest->vars[i]), hash);
2863 struct ctables_cell *cell;
2864 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
2866 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2868 const struct ctables_nest *nest = s->nests[a];
2869 for (size_t i = 0; i < nest->n; i++)
2870 if (i != nest->scale_idx
2871 && (cats[a][i] != cell->axes[a].cvs[i].category
2872 || (cats[a][i]->type != CCT_TOTAL
2873 && cats[a][i]->type != CCT_SUBTOTAL
2874 && cats[a][i]->type != CCT_POSTCOMPUTE
2875 && !value_equal (case_data (c, nest->vars[i]),
2876 &cell->axes[a].cvs[i].value,
2877 var_get_width (nest->vars[i])))))
2886 cell = xmalloc (sizeof *cell);
2889 cell->omit_domains = 0;
2890 cell->postcompute = false;
2891 //struct string name = DS_EMPTY_INITIALIZER;
2892 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2894 const struct ctables_nest *nest = s->nests[a];
2895 cell->axes[a].cvs = (nest->n
2896 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
2898 for (size_t i = 0; i < nest->n; i++)
2900 const struct ctables_category *cat = cats[a][i];
2901 const struct variable *var = nest->vars[i];
2902 const union value *value = case_data (c, var);
2903 if (i != nest->scale_idx)
2905 const struct ctables_category *subtotal = cat->subtotal;
2906 if (cat->hide || (subtotal && subtotal->hide_subcategories))
2909 if (cat->type == CCT_TOTAL
2910 || cat->type == CCT_SUBTOTAL
2911 || cat->type == CCT_POSTCOMPUTE)
2913 /* XXX these should be more encompassing I think.*/
2917 case PIVOT_AXIS_COLUMN:
2918 cell->omit_domains |= ((1u << CTDT_TABLE) |
2919 (1u << CTDT_LAYER) |
2920 (1u << CTDT_LAYERCOL) |
2921 (1u << CTDT_SUBTABLE) |
2924 case PIVOT_AXIS_ROW:
2925 cell->omit_domains |= ((1u << CTDT_TABLE) |
2926 (1u << CTDT_LAYER) |
2927 (1u << CTDT_LAYERROW) |
2928 (1u << CTDT_SUBTABLE) |
2931 case PIVOT_AXIS_LAYER:
2932 cell->omit_domains |= ((1u << CTDT_TABLE) |
2933 (1u << CTDT_LAYER));
2937 if (cat->type == CCT_POSTCOMPUTE)
2938 cell->postcompute = true;
2941 cell->axes[a].cvs[i].category = cat;
2942 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
2945 if (i != nest->scale_idx)
2947 if (!ds_is_empty (&name))
2948 ds_put_cstr (&name, ", ");
2949 char *value_s = data_out (value, var_get_encoding (var),
2950 var_get_print_format (var),
2951 settings_get_fmt_settings ());
2952 if (cat->type == CCT_TOTAL
2953 || cat->type == CCT_SUBTOTAL
2954 || cat->type == CCT_POSTCOMPUTE)
2955 ds_put_format (&name, "%s=total", var_get_name (var));
2957 ds_put_format (&name, "%s=%s", var_get_name (var),
2958 value_s + strspn (value_s, " "));
2964 //cell->name = ds_steal_cstr (&name);
2966 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
2967 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
2968 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
2969 for (size_t i = 0; i < specs->n; i++)
2970 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
2971 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2972 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
2973 hmap_insert (&s->cells, &cell->node, hash);
2978 is_scale_missing (const struct ctables_summary_spec_set *specs,
2979 const struct ccase *c)
2981 if (!specs->is_scale)
2984 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
2987 for (size_t i = 0; i < specs->n_listwise_vars; i++)
2989 const struct variable *var = specs->listwise_vars[i];
2990 if (var_is_num_missing (var, case_num (c, var)))
2998 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
2999 const struct ctables_category *cats[PIVOT_N_AXES][10],
3000 bool is_missing, bool excluded_missing,
3001 double d_weight, double e_weight)
3003 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3004 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3006 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3008 bool scale_missing = is_scale_missing (specs, c);
3009 for (size_t i = 0; i < specs->n; i++)
3010 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3011 specs->var, case_data (c, specs->var), specs->is_scale,
3012 scale_missing, is_missing, excluded_missing,
3013 d_weight, e_weight);
3014 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3015 if (!(cell->omit_domains && (1u << dt)))
3017 struct ctables_domain *d = cell->domains[dt];
3018 d->d_total += d_weight;
3019 d->e_total += e_weight;
3020 if (!excluded_missing)
3022 d->d_count += d_weight;
3023 d->e_count += e_weight;
3027 d->d_valid += d_weight;
3028 d->e_valid += e_weight;
3034 recurse_totals (struct ctables_section *s, const struct ccase *c,
3035 const struct ctables_category *cats[PIVOT_N_AXES][10],
3036 bool is_missing, bool excluded_missing,
3037 double d_weight, double e_weight,
3038 enum pivot_axis_type start_axis, size_t start_nest)
3040 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3042 const struct ctables_nest *nest = s->nests[a];
3043 for (size_t i = start_nest; i < nest->n; i++)
3045 if (i == nest->scale_idx)
3048 const struct variable *var = nest->vars[i];
3050 const struct ctables_category *total = ctables_categories_total (
3051 s->table->categories[var_get_dict_index (var)]);
3054 const struct ctables_category *save = cats[a][i];
3056 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3057 d_weight, e_weight);
3058 recurse_totals (s, c, cats, is_missing, excluded_missing,
3059 d_weight, e_weight, a, i + 1);
3068 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3069 const struct ctables_category *cats[PIVOT_N_AXES][10],
3070 bool is_missing, bool excluded_missing,
3071 double d_weight, double e_weight,
3072 enum pivot_axis_type start_axis, size_t start_nest)
3074 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3076 const struct ctables_nest *nest = s->nests[a];
3077 for (size_t i = start_nest; i < nest->n; i++)
3079 if (i == nest->scale_idx)
3082 const struct ctables_category *save = cats[a][i];
3085 cats[a][i] = save->subtotal;
3086 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3087 d_weight, e_weight);
3088 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3089 d_weight, e_weight, a, i + 1);
3098 ctables_add_occurrence (const struct variable *var,
3099 const union value *value,
3100 struct hmap *occurrences)
3102 int width = var_get_width (var);
3103 unsigned int hash = value_hash (value, width, 0);
3105 struct ctables_occurrence *o;
3106 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3108 if (value_equal (value, &o->value, width))
3111 o = xmalloc (sizeof *o);
3112 value_clone (&o->value, value, width);
3113 hmap_insert (occurrences, &o->node, hash);
3117 ctables_cell_insert (struct ctables_section *s,
3118 const struct ccase *c,
3119 double d_weight, double e_weight)
3121 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3123 /* Does at least one categorical variable have a missing value in an included
3124 or excluded category? */
3125 bool is_missing = false;
3127 /* Does at least one categorical variable have a missing value in an excluded
3129 bool excluded_missing = false;
3131 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3133 const struct ctables_nest *nest = s->nests[a];
3134 for (size_t i = 0; i < nest->n; i++)
3136 if (i == nest->scale_idx)
3139 const struct variable *var = nest->vars[i];
3140 const union value *value = case_data (c, var);
3142 bool var_missing = var_is_value_missing (var, value) != 0;
3146 cats[a][i] = ctables_categories_match (
3147 s->table->categories[var_get_dict_index (var)], value, var);
3153 static const struct ctables_category cct_excluded_missing = {
3154 .type = CCT_EXCLUDED_MISSING,
3157 cats[a][i] = &cct_excluded_missing;
3158 excluded_missing = true;
3163 if (!excluded_missing)
3164 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3166 const struct ctables_nest *nest = s->nests[a];
3167 for (size_t i = 0; i < nest->n; i++)
3168 if (i != nest->scale_idx)
3170 const struct variable *var = nest->vars[i];
3171 const union value *value = case_data (c, var);
3172 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3176 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3177 d_weight, e_weight);
3179 //if (!excluded_missing)
3181 recurse_totals (s, c, cats, is_missing, excluded_missing,
3182 d_weight, e_weight, 0, 0);
3183 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3184 d_weight, e_weight, 0, 0);
3190 const struct ctables_summary_spec_set *set;
3195 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3197 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3198 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3199 if (as->function != bs->function)
3200 return as->function > bs->function ? 1 : -1;
3201 else if (as->percentile != bs->percentile)
3202 return as->percentile < bs->percentile ? 1 : -1;
3203 return strcmp (as->label, bs->label);
3206 static struct pivot_value *
3207 ctables_category_create_label (const struct ctables_category *cat,
3208 const struct variable *var,
3209 const union value *value)
3211 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3212 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3213 : cat->type == CCT_POSTCOMPUTE && cat->pc->label
3214 ? pivot_value_new_user_text (cat->pc->label, SIZE_MAX)
3215 : pivot_value_new_var_value (var, value));
3218 static struct ctables_value *
3219 ctables_value_find__ (struct ctables_table *t, const union value *value,
3220 int width, unsigned int hash)
3222 struct ctables_value *clv;
3223 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3224 hash, &t->clabels_values_map)
3225 if (value_equal (value, &clv->value, width))
3231 ctables_value_insert (struct ctables_table *t, const union value *value,
3234 unsigned int hash = value_hash (value, width, 0);
3235 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3238 clv = xmalloc (sizeof *clv);
3239 value_clone (&clv->value, value, width);
3240 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3244 static struct ctables_value *
3245 ctables_value_find (struct ctables_table *t,
3246 const union value *value, int width)
3248 return ctables_value_find__ (t, value, width,
3249 value_hash (value, width, 0));
3253 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3254 size_t ix[PIVOT_N_AXES])
3256 if (a < PIVOT_N_AXES)
3258 size_t limit = MAX (t->stacks[a].n, 1);
3259 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3260 ctables_table_add_section (t, a + 1, ix);
3264 struct ctables_section *s = &t->sections[t->n_sections++];
3265 *s = (struct ctables_section) {
3267 .cells = HMAP_INITIALIZER (s->cells),
3269 for (a = 0; a < PIVOT_N_AXES; a++)
3272 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3274 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3275 for (size_t i = 0; i < nest->n; i++)
3276 hmap_init (&s->occurrences[a][i]);
3278 for (size_t i = 0; i < N_CTDTS; i++)
3279 hmap_init (&s->domains[i]);
3284 ctpo_add (double a, double b)
3290 ctpo_sub (double a, double b)
3296 ctpo_mul (double a, double b)
3302 ctpo_div (double a, double b)
3304 return b ? a / b : SYSMIS;
3308 ctpo_pow (double a, double b)
3310 int save_errno = errno;
3312 double result = pow (a, b);
3320 ctpo_neg (double a, double b UNUSED)
3325 struct ctables_pcexpr_evaluate_ctx
3327 const struct ctables_cell *cell;
3328 const struct ctables_section *section;
3329 const struct ctables_categories *cats;
3330 enum pivot_axis_type pc_a;
3334 static double ctables_pcexpr_evaluate (
3335 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3338 ctables_pcexpr_evaluate_nonterminal (
3339 const struct ctables_pcexpr_evaluate_ctx *ctx,
3340 const struct ctables_pcexpr *e, size_t n_args,
3341 double evaluate (double, double))
3343 double args[2] = { 0, 0 };
3344 for (size_t i = 0; i < n_args; i++)
3346 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3347 if (!isfinite (args[i]) || args[i] == SYSMIS)
3350 return evaluate (args[0], args[1]);
3354 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3355 const struct ctables_cell_value *pc_cv)
3357 const struct ctables_section *s = ctx->section;
3360 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3362 const struct ctables_nest *nest = s->nests[a];
3363 for (size_t i = 0; i < nest->n; i++)
3364 if (i != nest->scale_idx)
3366 const struct ctables_cell_value *cv
3367 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3368 : &ctx->cell->axes[a].cvs[i]);
3369 hash = hash_pointer (cv->category, hash);
3370 if (cv->category->type != CCT_TOTAL
3371 && cv->category->type != CCT_SUBTOTAL
3372 && cv->category->type != CCT_POSTCOMPUTE)
3373 hash = value_hash (&cv->value,
3374 var_get_width (nest->vars[i]), hash);
3378 struct ctables_cell *tc;
3379 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3381 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3383 const struct ctables_nest *nest = s->nests[a];
3384 for (size_t i = 0; i < nest->n; i++)
3385 if (i != nest->scale_idx)
3387 const struct ctables_cell_value *p_cv
3388 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3389 : &ctx->cell->axes[a].cvs[i]);
3390 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3391 if (p_cv->category != t_cv->category
3392 || (p_cv->category->type != CCT_TOTAL
3393 && p_cv->category->type != CCT_SUBTOTAL
3394 && p_cv->category->type != CCT_POSTCOMPUTE
3395 && !value_equal (&p_cv->value,
3397 var_get_width (nest->vars[i]))))
3409 const struct ctables_table *t = s->table;
3410 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3411 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3412 size_t j = 0 /* XXX */;
3413 return ctables_summary_value (tc, &tc->summaries[j], &specs->specs[j]);
3417 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3418 const struct ctables_pcexpr *e)
3425 case CTPO_CAT_RANGE:
3427 struct ctables_cell_value cv = {
3428 .category = ctables_find_category_for_postcompute (ctx->cats, e)
3430 assert (cv.category != NULL);
3432 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3433 const struct ctables_occurrence *o;
3436 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3437 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3438 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3440 cv.value = o->value;
3441 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3446 case CTPO_CAT_NUMBER:
3447 case CTPO_CAT_STRING:
3448 case CTPO_CAT_MISSING:
3449 case CTPO_CAT_OTHERNM:
3450 case CTPO_CAT_SUBTOTAL:
3451 case CTPO_CAT_TOTAL:
3453 struct ctables_cell_value cv = {
3454 .category = ctables_find_category_for_postcompute (ctx->cats, e),
3455 .value = { .f = e->number },
3457 assert (cv.category != NULL);
3458 return ctables_pcexpr_evaluate_category (ctx, &cv);
3462 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3465 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3468 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3471 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3474 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3477 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3484 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3485 const struct ctables_cell *cell)
3487 enum pivot_axis_type pc_a;
3489 const struct ctables_postcompute *pc;
3490 for (pc_a = 0; ; pc_a++)
3492 assert (pc_a < PIVOT_N_AXES);
3493 for (pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3495 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3496 if (cv->category->type == CCT_POSTCOMPUTE)
3498 pc = cv->category->pc;
3505 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3506 const struct ctables_categories *cats = s->table->categories[
3507 var_get_dict_index (var)];
3508 struct ctables_pcexpr_evaluate_ctx ctx = {
3513 .pc_a_idx = pc_a_idx,
3515 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3519 ctables_table_output (struct ctables *ct, struct ctables_table *t)
3521 struct pivot_table *pt = pivot_table_create__ (
3523 ? pivot_value_new_user_text (t->title, SIZE_MAX)
3524 : pivot_value_new_text (N_("Custom Tables"))),
3527 pivot_table_set_caption (
3528 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
3530 pivot_table_set_caption (
3531 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
3533 bool summary_dimension = (t->summary_axis != t->slabels_axis
3534 || (!t->slabels_visible
3535 && t->summary_specs.n > 1));
3536 if (summary_dimension)
3538 struct pivot_dimension *d = pivot_dimension_create (
3539 pt, t->slabels_axis, N_("Statistics"));
3540 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3541 if (!t->slabels_visible)
3542 d->hide_all_labels = true;
3543 for (size_t i = 0; i < specs->n; i++)
3544 pivot_category_create_leaf (
3545 d->root, pivot_value_new_text (specs->specs[i].label));
3548 bool categories_dimension = t->clabels_example != NULL;
3549 if (categories_dimension)
3551 struct pivot_dimension *d = pivot_dimension_create (
3552 pt, t->label_axis[t->clabels_from_axis],
3553 t->clabels_from_axis == PIVOT_AXIS_ROW
3554 ? N_("Row Categories")
3555 : N_("Column Categories"));
3556 const struct variable *var = t->clabels_example;
3557 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
3558 for (size_t i = 0; i < t->n_clabels_values; i++)
3560 const struct ctables_value *value = t->clabels_values[i];
3561 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
3562 assert (cat != NULL);
3563 pivot_category_create_leaf (d->root, ctables_category_create_label (
3564 cat, t->clabels_example, &value->value));
3568 pivot_table_set_look (pt, ct->look);
3569 struct pivot_dimension *d[PIVOT_N_AXES];
3570 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3572 static const char *names[] = {
3573 [PIVOT_AXIS_ROW] = N_("Rows"),
3574 [PIVOT_AXIS_COLUMN] = N_("Columns"),
3575 [PIVOT_AXIS_LAYER] = N_("Layers"),
3577 d[a] = (t->axes[a] || a == t->summary_axis
3578 ? pivot_dimension_create (pt, a, names[a])
3583 assert (t->axes[a]);
3585 for (size_t i = 0; i < t->stacks[a].n; i++)
3587 struct ctables_nest *nest = &t->stacks[a].nests[i];
3588 struct ctables_section **sections = xnmalloc (t->n_sections,
3590 size_t n_sections = 0;
3592 size_t n_total_cells = 0;
3593 size_t max_depth = 0;
3594 for (size_t j = 0; j < t->n_sections; j++)
3595 if (t->sections[j].nests[a] == nest)
3597 struct ctables_section *s = &t->sections[j];
3598 sections[n_sections++] = s;
3599 n_total_cells += s->cells.count;
3601 size_t depth = s->nests[a]->n;
3602 max_depth = MAX (depth, max_depth);
3605 struct ctables_cell **sorted = xnmalloc (n_total_cells,
3607 size_t n_sorted = 0;
3609 for (size_t j = 0; j < n_sections; j++)
3611 struct ctables_section *s = sections[j];
3613 struct ctables_cell *cell;
3614 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3616 sorted[n_sorted++] = cell;
3617 assert (n_sorted <= n_total_cells);
3620 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
3621 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
3624 for (size_t j = 0; j < n_sorted; j++)
3626 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
3631 struct ctables_level
3633 enum ctables_level_type
3635 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
3636 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
3637 CTL_SUMMARY, /* Summary functions. */
3641 enum settings_value_show vlabel; /* CTL_VAR only. */
3644 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
3645 size_t n_levels = 0;
3646 for (size_t k = 0; k < nest->n; k++)
3648 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
3649 if (vlabel != CTVL_NONE)
3651 levels[n_levels++] = (struct ctables_level) {
3653 .vlabel = (enum settings_value_show) vlabel,
3658 if (nest->scale_idx != k
3659 && (k != nest->n - 1 || t->label_axis[a] == a))
3661 levels[n_levels++] = (struct ctables_level) {
3662 .type = CTL_CATEGORY,
3668 if (!summary_dimension && a == t->slabels_axis)
3670 levels[n_levels++] = (struct ctables_level) {
3671 .type = CTL_SUMMARY,
3672 .var_idx = SIZE_MAX,
3676 /* Pivot categories:
3678 - variable label for nest->vars[0], if vlabel != CTVL_NONE
3679 - category for nest->vars[0], if nest->scale_idx != 0
3680 - variable label for nest->vars[1], if vlabel != CTVL_NONE
3681 - category for nest->vars[1], if nest->scale_idx != 1
3683 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
3684 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
3685 - summary function, if 'a == t->slabels_axis && a ==
3688 Additional dimensions:
3690 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
3692 - If 't->label_axis[b] == a' for some 'b != a', add a category
3697 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
3699 for (size_t j = 0; j < n_sorted; j++)
3701 struct ctables_cell *cell = sorted[j];
3702 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
3704 size_t n_common = 0;
3707 for (; n_common < n_levels; n_common++)
3709 const struct ctables_level *level = &levels[n_common];
3710 if (level->type == CTL_CATEGORY)
3712 size_t var_idx = level->var_idx;
3713 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
3714 if (prev->axes[a].cvs[var_idx].category != c)
3716 else if (c->type != CCT_SUBTOTAL
3717 && c->type != CCT_TOTAL
3718 && c->type != CCT_POSTCOMPUTE
3719 && !value_equal (&prev->axes[a].cvs[var_idx].value,
3720 &cell->axes[a].cvs[var_idx].value,
3721 var_get_type (nest->vars[var_idx])))
3727 for (size_t k = n_common; k < n_levels; k++)
3729 const struct ctables_level *level = &levels[k];
3730 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
3731 if (level->type == CTL_SUMMARY)
3733 assert (k == n_levels - 1);
3735 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3736 for (size_t m = 0; m < specs->n; m++)
3738 int leaf = pivot_category_create_leaf (
3739 parent, pivot_value_new_text (specs->specs[m].label));
3746 const struct variable *var = nest->vars[level->var_idx];
3747 struct pivot_value *label;
3748 if (level->type == CTL_VAR)
3750 label = pivot_value_new_variable (var);
3751 label->variable.show = level->vlabel;
3753 else if (level->type == CTL_CATEGORY)
3755 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
3756 label = ctables_category_create_label (cv->category,
3762 if (k == n_levels - 1)
3763 prev_leaf = pivot_category_create_leaf (parent, label);
3765 groups[k] = pivot_category_create_group__ (parent, label);
3769 cell->axes[a].leaf = prev_leaf;
3776 for (size_t i = 0; i < t->n_sections; i++)
3778 struct ctables_section *s = &t->sections[i];
3780 struct ctables_cell *cell;
3781 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3786 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3787 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
3788 for (size_t j = 0; j < specs->n; j++)
3791 size_t n_dindexes = 0;
3793 if (summary_dimension)
3794 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
3796 if (categories_dimension)
3798 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3799 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3800 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3801 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
3804 dindexes[n_dindexes++] = ctv->leaf;
3807 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3810 int leaf = cell->axes[a].leaf;
3811 if (a == t->summary_axis && !summary_dimension)
3813 dindexes[n_dindexes++] = leaf;
3816 const struct ctables_summary_spec *ss = &specs->specs[j];
3818 double d = (cell->postcompute
3819 ? ctables_cell_calculate_postcompute (s, cell)
3820 : ctables_summary_value (cell, &cell->summaries[j], ss));
3821 struct pivot_value *value;
3822 if (ct->hide_threshold != 0
3823 && d < ct->hide_threshold
3824 && (cell->postcompute
3826 : ctables_summary_function_is_count (ss->function)))
3828 value = pivot_value_new_user_text_nocopy (
3829 xasprintf ("<%d", ct->hide_threshold));
3831 else if (d == 0 && ct->zero)
3832 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
3833 else if (d == SYSMIS && ct->missing)
3834 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
3835 else if (specs->specs[j].is_ctables_format)
3837 char *s = data_out_stretchy (&(union value) { .f = d },
3839 &specs->specs[j].format,
3840 &ct->ctables_formats, NULL);
3841 value = pivot_value_new_user_text_nocopy (s);
3845 value = pivot_value_new_number (d);
3846 value->numeric.format = specs->specs[j].format;
3848 pivot_table_put (pt, dindexes, n_dindexes, value);
3853 pivot_table_submit (pt);
3857 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
3859 enum pivot_axis_type label_pos = t->label_axis[a];
3863 t->clabels_from_axis = a;
3865 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
3866 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
3868 const struct ctables_stack *stack = &t->stacks[a];
3872 const struct ctables_nest *n0 = &stack->nests[0];
3874 const struct variable *v0 = n0->vars[n0->n - 1];
3875 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3876 t->clabels_example = v0;
3878 for (size_t i = 0; i < c0->n_cats; i++)
3879 if (c0->cats[i].type == CCT_FUNCTION)
3881 msg (SE, _("%s=%s is not allowed with sorting based "
3882 "on a summary function."),
3883 subcommand_name, pos_name);
3886 if (n0->n - 1 == n0->scale_idx)
3888 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
3889 "but %s is a scale variable."),
3890 subcommand_name, pos_name, var_get_name (v0));
3894 for (size_t i = 1; i < stack->n; i++)
3896 const struct ctables_nest *ni = &stack->nests[i];
3898 const struct variable *vi = ni->vars[ni->n - 1];
3899 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
3901 if (ni->n - 1 == ni->scale_idx)
3903 msg (SE, _("%s=%s requires the variables to be moved to be "
3904 "categorical, but %s is a scale variable."),
3905 subcommand_name, pos_name, var_get_name (vi));
3908 if (var_get_width (v0) != var_get_width (vi))
3910 msg (SE, _("%s=%s requires the variables to be "
3911 "moved to have the same width, but %s has "
3912 "width %d and %s has width %d."),
3913 subcommand_name, pos_name,
3914 var_get_name (v0), var_get_width (v0),
3915 var_get_name (vi), var_get_width (vi));
3918 if (!val_labs_equal (var_get_value_labels (v0),
3919 var_get_value_labels (vi)))
3921 msg (SE, _("%s=%s requires the variables to be "
3922 "moved to have the same value labels, but %s "
3923 "and %s have different value labels."),
3924 subcommand_name, pos_name,
3925 var_get_name (v0), var_get_name (vi));
3928 if (!ctables_categories_equal (c0, ci))
3930 msg (SE, _("%s=%s requires the variables to be "
3931 "moved to have the same category "
3932 "specifications, but %s and %s have different "
3933 "category specifications."),
3934 subcommand_name, pos_name,
3935 var_get_name (v0), var_get_name (vi));
3944 ctables_prepare_table (struct ctables_table *t)
3946 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3949 t->stacks[a] = enumerate_fts (a, t->axes[a]);
3951 for (size_t j = 0; j < t->stacks[a].n; j++)
3953 struct ctables_nest *nest = &t->stacks[a].nests[j];
3954 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3956 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
3957 nest->n_domains[dt] = 0;
3959 for (size_t k = 0; k < nest->n; k++)
3961 if (k == nest->scale_idx)
3970 if (a != PIVOT_AXIS_LAYER)
3977 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
3978 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
3979 : a == PIVOT_AXIS_ROW)
3981 if (k == nest->n - 1
3982 || (nest->scale_idx == nest->n - 1
3983 && k == nest->n - 2))
3989 if (a == PIVOT_AXIS_COLUMN)
3994 if (a == PIVOT_AXIS_ROW)
3999 nest->domains[dt][nest->n_domains[dt]++] = k;
4006 struct ctables_nest *nest = xmalloc (sizeof *nest);
4007 *nest = (struct ctables_nest) { .n = 0 };
4008 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4011 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4012 for (size_t i = 0; i < stack->n; i++)
4014 struct ctables_nest *nest = &stack->nests[i];
4015 if (!nest->specs[CSV_CELL].n)
4017 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4018 specs->specs = xmalloc (sizeof *specs->specs);
4021 enum ctables_summary_function function
4022 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4023 struct ctables_var var = { .is_mrset = false, .var = specs->var };
4025 *specs->specs = (struct ctables_summary_spec) {
4026 .function = function,
4027 .format = ctables_summary_default_format (function, &var),
4028 .label = ctables_summary_default_label (function, 0),
4031 specs->var = nest->vars[0];
4033 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4034 &nest->specs[CSV_CELL]);
4036 else if (!nest->specs[CSV_TOTAL].n)
4037 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4038 &nest->specs[CSV_CELL]);
4040 if (t->ctables->smissing_listwise)
4042 struct variable **listwise_vars = NULL;
4044 size_t allocated = 0;
4046 for (size_t j = nest->group_head; j < stack->n; j++)
4048 const struct ctables_nest *other_nest = &stack->nests[j];
4049 if (other_nest->group_head != nest->group_head)
4052 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4055 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4056 sizeof *listwise_vars);
4057 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4060 for (size_t j = 0; j < N_CSVS; j++)
4062 nest->specs[j].listwise_vars = listwise_vars;
4063 nest->specs[j].n_listwise_vars = n;
4068 struct ctables_summary_spec_set *merged = &t->summary_specs;
4069 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
4071 for (size_t j = 0; j < stack->n; j++)
4073 const struct ctables_nest *nest = &stack->nests[j];
4075 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4076 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4081 struct merge_item min = items[0];
4082 for (size_t j = 1; j < n_left; j++)
4083 if (merge_item_compare_3way (&items[j], &min) < 0)
4086 if (merged->n >= merged->allocated)
4087 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4088 sizeof *merged->specs);
4089 merged->specs[merged->n++] = min.set->specs[min.ofs];
4091 for (size_t j = 0; j < n_left; )
4093 if (merge_item_compare_3way (&items[j], &min) == 0)
4095 struct merge_item *item = &items[j];
4096 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4097 if (++item->ofs >= item->set->n)
4099 items[j] = items[--n_left];
4108 for (size_t j = 0; j < merged->n; j++)
4109 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4111 for (size_t j = 0; j < stack->n; j++)
4113 const struct ctables_nest *nest = &stack->nests[j];
4114 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4116 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4117 for (size_t k = 0; k < specs->n; k++)
4118 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4119 specs->specs[k].axis_idx);
4125 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4126 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4130 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4131 enum pivot_axis_type a)
4133 struct ctables_stack *stack = &t->stacks[a];
4134 for (size_t i = 0; i < stack->n; i++)
4136 const struct ctables_nest *nest = &stack->nests[i];
4137 const struct variable *var = nest->vars[nest->n - 1];
4138 const union value *value = case_data (c, var);
4140 if (var_is_numeric (var) && value->f == SYSMIS)
4143 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4145 ctables_value_insert (t, value, var_get_width (var));
4150 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4152 const struct ctables_value *const *ap = a_;
4153 const struct ctables_value *const *bp = b_;
4154 const struct ctables_value *a = *ap;
4155 const struct ctables_value *b = *bp;
4156 const int *width = width_;
4157 return value_compare_3way (&a->value, &b->value, *width);
4161 ctables_sort_clabels_values (struct ctables_table *t)
4163 const struct variable *v0 = t->clabels_example;
4164 int width = var_get_width (v0);
4166 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4169 const struct val_labs *val_labs = var_get_value_labels (v0);
4170 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4171 vl = val_labs_next (val_labs, vl))
4172 if (ctables_categories_match (c0, &vl->value, v0))
4173 ctables_value_insert (t, &vl->value, width);
4176 size_t n = hmap_count (&t->clabels_values_map);
4177 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4179 struct ctables_value *clv;
4181 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4182 t->clabels_values[i++] = clv;
4183 t->n_clabels_values = n;
4186 sort (t->clabels_values, n, sizeof *t->clabels_values,
4187 compare_clabels_values_3way, &width);
4189 for (size_t i = 0; i < n; i++)
4190 t->clabels_values[i]->leaf = i;
4194 ctables_add_category_occurrences (const struct variable *var,
4195 struct hmap *occurrences,
4196 const struct ctables_categories *cats)
4198 const struct val_labs *val_labs = var_get_value_labels (var);
4200 for (size_t i = 0; i < cats->n_cats; i++)
4202 const struct ctables_category *c = &cats->cats[i];
4206 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4214 assert (var_is_numeric (var));
4215 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4216 vl = val_labs_next (val_labs, vl))
4217 if (vl->value.f >= c->range[0] && vl->value.f <= c->range[1])
4218 ctables_add_occurrence (var, &vl->value, occurrences);
4222 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4223 vl = val_labs_next (val_labs, vl))
4224 if (var_is_value_missing (var, &vl->value))
4225 ctables_add_occurrence (var, &vl->value, occurrences);
4229 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4230 vl = val_labs_next (val_labs, vl))
4231 ctables_add_occurrence (var, &vl->value, occurrences);
4234 case CCT_POSTCOMPUTE:
4244 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4245 vl = val_labs_next (val_labs, vl))
4246 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4247 ctables_add_occurrence (var, &vl->value, occurrences);
4250 case CCT_EXCLUDED_MISSING:
4257 ctables_section_recurse_add_empty_categories (
4258 struct ctables_section *s,
4259 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4260 enum pivot_axis_type a, size_t a_idx)
4262 if (a >= PIVOT_N_AXES)
4263 ctables_cell_insert__ (s, c, cats);
4264 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4265 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4268 const struct variable *var = s->nests[a]->vars[a_idx];
4269 const struct ctables_categories *categories = s->table->categories[
4270 var_get_dict_index (var)];
4271 int width = var_get_width (var);
4272 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4273 const struct ctables_occurrence *o;
4274 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4276 union value *value = case_data_rw (c, var);
4277 value_destroy (value, width);
4278 value_clone (value, &o->value, width);
4279 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4280 assert (cats[a][a_idx] != NULL);
4281 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4284 for (size_t i = 0; i < categories->n_cats; i++)
4286 const struct ctables_category *cat = &categories->cats[i];
4287 if (cat->type == CCT_POSTCOMPUTE)
4289 cats[a][a_idx] = cat;
4290 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4297 ctables_section_add_empty_categories (struct ctables_section *s)
4299 bool show_empty = false;
4300 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4302 for (size_t k = 0; k < s->nests[a]->n; k++)
4303 if (k != s->nests[a]->scale_idx)
4305 const struct variable *var = s->nests[a]->vars[k];
4306 const struct ctables_categories *cats = s->table->categories[
4307 var_get_dict_index (var)];
4308 if (cats->show_empty)
4311 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4317 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4318 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4319 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4324 ctables_execute (struct dataset *ds, struct ctables *ct)
4326 for (size_t i = 0; i < ct->n_tables; i++)
4328 struct ctables_table *t = ct->tables[i];
4329 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4330 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4331 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4332 sizeof *t->sections);
4333 size_t ix[PIVOT_N_AXES];
4334 ctables_table_add_section (t, 0, ix);
4337 struct casereader *input = proc_open (ds);
4338 bool warn_on_invalid = true;
4339 for (struct ccase *c = casereader_read (input); c;
4340 case_unref (c), c = casereader_read (input))
4342 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4344 double e_weight = (ct->e_weight
4345 ? var_force_valid_weight (ct->e_weight,
4346 case_num (c, ct->e_weight),
4350 for (size_t i = 0; i < ct->n_tables; i++)
4352 struct ctables_table *t = ct->tables[i];
4354 for (size_t j = 0; j < t->n_sections; j++)
4355 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4357 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4358 if (t->label_axis[a] != a)
4359 ctables_insert_clabels_values (t, c, a);
4362 casereader_destroy (input);
4364 for (size_t i = 0; i < ct->n_tables; i++)
4366 struct ctables_table *t = ct->tables[i];
4368 if (t->clabels_example)
4369 ctables_sort_clabels_values (t);
4371 for (size_t j = 0; j < t->n_sections; j++)
4372 ctables_section_add_empty_categories (&t->sections[j]);
4374 ctables_table_output (ct, ct->tables[i]);
4376 return proc_commit (ds);
4381 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *);
4384 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
4390 case CTPO_CAT_STRING:
4400 for (size_t i = 0; i < 2; i++)
4401 ctables_pcexpr_destroy (e->subs[i]);
4405 case CTPO_CAT_NUMBER:
4406 case CTPO_CAT_RANGE:
4407 case CTPO_CAT_MISSING:
4408 case CTPO_CAT_OTHERNM:
4409 case CTPO_CAT_SUBTOTAL:
4410 case CTPO_CAT_TOTAL:
4414 msg_location_destroy (e->location);
4419 static struct ctables_pcexpr *
4420 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
4421 struct ctables_pcexpr *sub0,
4422 struct ctables_pcexpr *sub1)
4424 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4425 *e = (struct ctables_pcexpr) {
4427 .subs = { sub0, sub1 },
4428 .location = msg_location_merged (sub0->location, sub1->location),
4433 /* How to parse an operator. */
4436 enum token_type token;
4437 enum ctables_postcompute_op op;
4440 static const struct operator *
4441 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops)
4443 for (const struct operator *op = ops; op < ops + n_ops; op++)
4444 if (lex_token (lexer) == op->token)
4446 if (op->token != T_NEG_NUM)
4455 static struct ctables_pcexpr *
4456 parse_binary_operators__ (struct lexer *lexer,
4457 const struct operator ops[], size_t n_ops,
4458 parse_recursively_func *parse_next_level,
4459 const char *chain_warning,
4460 struct ctables_pcexpr *lhs)
4462 for (int op_count = 0; ; op_count++)
4464 const struct operator *op = match_operator (lexer, ops, n_ops);
4467 if (op_count > 1 && chain_warning)
4468 msg_at (SW, lhs->location, "%s", chain_warning);
4473 struct ctables_pcexpr *rhs = parse_next_level (lexer);
4476 ctables_pcexpr_destroy (lhs);
4480 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
4484 static struct ctables_pcexpr *
4485 parse_binary_operators (struct lexer *lexer,
4486 const struct operator ops[], size_t n_ops,
4487 parse_recursively_func *parse_next_level,
4488 const char *chain_warning)
4490 struct ctables_pcexpr *lhs = parse_next_level (lexer);
4494 return parse_binary_operators__ (lexer, ops, n_ops, parse_next_level,
4495 chain_warning, lhs);
4498 static struct ctables_pcexpr *parse_add (struct lexer *);
4500 static struct ctables_pcexpr
4501 ctpo_cat_range (double low, double high)
4503 return (struct ctables_pcexpr) {
4504 .op = CTPO_CAT_RANGE,
4505 .range = { low, high },
4509 static struct ctables_pcexpr *
4510 parse_primary (struct lexer *lexer)
4512 int start_ofs = lex_ofs (lexer);
4513 struct ctables_pcexpr e;
4514 if (lex_is_number (lexer))
4516 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
4517 .number = lex_number (lexer) };
4520 else if (lex_match_id (lexer, "MISSING"))
4521 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
4522 else if (lex_match_id (lexer, "OTHERNM"))
4523 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
4524 else if (lex_match_id (lexer, "TOTAL"))
4525 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
4526 else if (lex_match_id (lexer, "SUBTOTAL"))
4528 size_t subtotal_index = 0;
4529 if (lex_match (lexer, T_LBRACK))
4531 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
4533 subtotal_index = lex_integer (lexer);
4535 if (!lex_force_match (lexer, T_RBRACK))
4538 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
4539 .subtotal_index = subtotal_index };
4541 else if (lex_match (lexer, T_LBRACK))
4543 if (lex_match_id (lexer, "LO"))
4545 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
4547 e = ctpo_cat_range (-DBL_MAX, lex_number (lexer));
4550 else if (lex_is_number (lexer))
4552 double number = lex_number (lexer);
4554 if (lex_match_id (lexer, "THRU"))
4556 if (lex_match_id (lexer, "HI"))
4557 e = ctpo_cat_range (number, DBL_MAX);
4560 if (!lex_force_num (lexer))
4562 e = ctpo_cat_range (number, lex_number (lexer));
4567 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
4570 else if (lex_is_string (lexer))
4572 e = (struct ctables_pcexpr) {
4573 .op = CTPO_CAT_STRING,
4574 .string = ss_xstrdup (lex_tokss (lexer)),
4580 lex_error (lexer, NULL);
4584 if (!lex_force_match (lexer, T_RBRACK))
4586 if (e.op == CTPO_CAT_STRING)
4591 else if (lex_match (lexer, T_LPAREN))
4593 struct ctables_pcexpr *ep = parse_add (lexer);
4596 if (!lex_force_match (lexer, T_RPAREN))
4598 ctables_pcexpr_destroy (ep);
4605 lex_error (lexer, NULL);
4609 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4610 return xmemdup (&e, sizeof e);
4613 static struct ctables_pcexpr *
4614 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
4615 struct lexer *lexer, int start_ofs)
4617 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4618 *e = (struct ctables_pcexpr) {
4621 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
4626 static struct ctables_pcexpr *
4627 parse_exp (struct lexer *lexer)
4629 static const struct operator op = { T_EXP, CTPO_POW };
4631 const char *chain_warning =
4632 _("The exponentiation operator (`**') is left-associative: "
4633 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
4634 "To disable this warning, insert parentheses.");
4636 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
4637 return parse_binary_operators (lexer, &op, 1,
4638 parse_primary, chain_warning);
4640 /* Special case for situations like "-5**6", which must be parsed as
4643 int start_ofs = lex_ofs (lexer);
4644 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
4645 *lhs = (struct ctables_pcexpr) {
4646 .op = CTPO_CONSTANT,
4647 .number = -lex_tokval (lexer),
4648 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
4652 struct ctables_pcexpr *node = parse_binary_operators__ (
4653 lexer, &op, 1, parse_primary, chain_warning, lhs);
4657 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
4660 /* Parses the unary minus level. */
4661 static struct ctables_pcexpr *
4662 parse_neg (struct lexer *lexer)
4664 int start_ofs = lex_ofs (lexer);
4665 if (!lex_match (lexer, T_DASH))
4666 return parse_exp (lexer);
4668 struct ctables_pcexpr *inner = parse_neg (lexer);
4672 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
4675 /* Parses the multiplication and division level. */
4676 static struct ctables_pcexpr *
4677 parse_mul (struct lexer *lexer)
4679 static const struct operator ops[] =
4681 { T_ASTERISK, CTPO_MUL },
4682 { T_SLASH, CTPO_DIV },
4685 return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
4689 /* Parses the addition and subtraction level. */
4690 static struct ctables_pcexpr *
4691 parse_add (struct lexer *lexer)
4693 static const struct operator ops[] =
4695 { T_PLUS, CTPO_ADD },
4696 { T_DASH, CTPO_SUB },
4697 { T_NEG_NUM, CTPO_ADD },
4700 return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
4704 static struct ctables_postcompute *
4705 ctables_find_postcompute (struct ctables *ct, const char *name)
4707 struct ctables_postcompute *pc;
4708 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
4709 utf8_hash_case_string (name, 0), &ct->postcomputes)
4710 if (!utf8_strcasecmp (pc->name, name))
4716 ctables_parse_pcompute (struct lexer *lexer, struct ctables *ct)
4718 int pcompute_start = lex_ofs (lexer) - 1;
4720 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
4723 char *name = ss_xstrdup (lex_tokss (lexer));
4726 if (!lex_force_match (lexer, T_EQUALS)
4727 || !lex_force_match_id (lexer, "EXPR")
4728 || !lex_force_match (lexer, T_LPAREN))
4734 int expr_start = lex_ofs (lexer);
4735 struct ctables_pcexpr *expr = parse_add (lexer);
4736 int expr_end = lex_ofs (lexer) - 1;
4737 if (!expr || !lex_force_match (lexer, T_RPAREN))
4742 int pcompute_end = lex_ofs (lexer) - 1;
4744 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
4747 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
4750 msg_at (SW, location, _("New definition of &%s will override the "
4751 "previous definition."),
4753 msg_at (SN, pc->location, _("This is the previous definition."));
4755 ctables_pcexpr_destroy (pc->expr);
4756 msg_location_destroy (pc->location);
4761 pc = xmalloc (sizeof *pc);
4762 *pc = (struct ctables_postcompute) { .name = name };
4763 hmap_insert (&ct->postcomputes, &pc->hmap_node,
4764 utf8_hash_case_string (pc->name, 0));
4767 pc->location = location;
4769 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
4774 ctables_parse_pproperties_format (struct lexer *lexer,
4775 struct ctables_summary_spec_set *sss)
4777 *sss = (struct ctables_summary_spec_set) { .n = 0 };
4779 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
4780 && !(lex_token (lexer) == T_ID
4781 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
4782 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
4783 lex_tokss (lexer)))))
4785 /* Parse function. */
4786 enum ctables_summary_function function;
4787 if (!parse_ctables_summary_function (lexer, &function))
4790 /* Parse percentile. */
4791 double percentile = 0;
4792 if (function == CTSF_PTILE)
4794 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4796 percentile = lex_number (lexer);
4801 struct fmt_spec format;
4802 if (!parse_format_specifier (lexer, &format)
4803 || !fmt_check_output (&format)
4804 || !fmt_check_type_compat (&format, VAL_NUMERIC))
4807 if (sss->n >= sss->allocated)
4808 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
4809 sizeof *sss->specs);
4810 sss->specs[sss->n++] = (struct ctables_summary_spec) {
4811 .function = function,
4812 .percentile = percentile,
4819 ctables_summary_spec_set_uninit (sss);
4824 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
4826 struct ctables_postcompute **pcs = NULL;
4828 size_t allocated_pcs = 0;
4830 while (lex_match (lexer, T_AND))
4832 if (!lex_force_id (lexer))
4834 struct ctables_postcompute *pc
4835 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
4838 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
4843 if (n_pcs >= allocated_pcs)
4844 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
4848 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4850 if (lex_match_id (lexer, "LABEL"))
4852 lex_match (lexer, T_EQUALS);
4853 if (!lex_force_string (lexer))
4856 for (size_t i = 0; i < n_pcs; i++)
4858 free (pcs[i]->label);
4859 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
4864 else if (lex_match_id (lexer, "FORMAT"))
4866 lex_match (lexer, T_EQUALS);
4868 struct ctables_summary_spec_set sss;
4869 if (!ctables_parse_pproperties_format (lexer, &sss))
4872 for (size_t i = 0; i < n_pcs; i++)
4875 ctables_summary_spec_set_uninit (pcs[i]->specs);
4877 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
4878 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
4880 ctables_summary_spec_set_uninit (&sss);
4882 else if (lex_match_id (lexer, "HIDESOURCECATS"))
4884 lex_match (lexer, T_EQUALS);
4885 bool hide_source_cats;
4886 if (!parse_bool (lexer, &hide_source_cats))
4888 for (size_t i = 0; i < n_pcs; i++)
4889 pcs[i]->hide_source_cats = hide_source_cats;
4893 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
4906 cmd_ctables (struct lexer *lexer, struct dataset *ds)
4908 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
4909 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
4910 enum settings_value_show tvars = settings_get_show_variables ();
4911 for (size_t i = 0; i < n_vars; i++)
4912 vlabels[i] = (enum ctables_vlabel) tvars;
4914 struct pivot_table_look *look = pivot_table_look_unshare (
4915 pivot_table_look_ref (pivot_table_look_get_default ()));
4916 look->omit_empty = false;
4918 struct ctables *ct = xmalloc (sizeof *ct);
4919 *ct = (struct ctables) {
4920 .dict = dataset_dict (ds),
4922 .ctables_formats = FMT_SETTINGS_INIT,
4924 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
4930 const char *dot_string;
4931 const char *comma_string;
4933 static const struct ctf ctfs[4] = {
4934 { CTEF_NEGPAREN, "(,,,)", "(...)" },
4935 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
4936 { CTEF_PAREN, "-,(,),", "-.(.)." },
4937 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
4939 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
4940 for (size_t i = 0; i < 4; i++)
4942 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
4943 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
4944 fmt_number_style_from_string (s));
4947 if (!lex_force_match (lexer, T_SLASH))
4950 while (!lex_match_id (lexer, "TABLE"))
4952 if (lex_match_id (lexer, "FORMAT"))
4954 double widths[2] = { SYSMIS, SYSMIS };
4955 double units_per_inch = 72.0;
4957 while (lex_token (lexer) != T_SLASH)
4959 if (lex_match_id (lexer, "MINCOLWIDTH"))
4961 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
4964 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
4966 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
4969 else if (lex_match_id (lexer, "UNITS"))
4971 lex_match (lexer, T_EQUALS);
4972 if (lex_match_id (lexer, "POINTS"))
4973 units_per_inch = 72.0;
4974 else if (lex_match_id (lexer, "INCHES"))
4975 units_per_inch = 1.0;
4976 else if (lex_match_id (lexer, "CM"))
4977 units_per_inch = 2.54;
4980 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
4984 else if (lex_match_id (lexer, "EMPTY"))
4989 lex_match (lexer, T_EQUALS);
4990 if (lex_match_id (lexer, "ZERO"))
4992 /* Nothing to do. */
4994 else if (lex_match_id (lexer, "BLANK"))
4995 ct->zero = xstrdup ("");
4996 else if (lex_force_string (lexer))
4998 ct->zero = ss_xstrdup (lex_tokss (lexer));
5004 else if (lex_match_id (lexer, "MISSING"))
5006 lex_match (lexer, T_EQUALS);
5007 if (!lex_force_string (lexer))
5011 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
5012 ? ss_xstrdup (lex_tokss (lexer))
5018 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
5019 "UNITS", "EMPTY", "MISSING");
5024 if (widths[0] != SYSMIS && widths[1] != SYSMIS
5025 && widths[0] > widths[1])
5027 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
5031 for (size_t i = 0; i < 2; i++)
5032 if (widths[i] != SYSMIS)
5034 int *wr = ct->look->width_ranges[TABLE_HORZ];
5035 wr[i] = widths[i] / units_per_inch * 96.0;
5040 else if (lex_match_id (lexer, "VLABELS"))
5042 if (!lex_force_match_id (lexer, "VARIABLES"))
5044 lex_match (lexer, T_EQUALS);
5046 struct variable **vars;
5048 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
5052 if (!lex_force_match_id (lexer, "DISPLAY"))
5057 lex_match (lexer, T_EQUALS);
5059 enum ctables_vlabel vlabel;
5060 if (lex_match_id (lexer, "DEFAULT"))
5061 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
5062 else if (lex_match_id (lexer, "NAME"))
5064 else if (lex_match_id (lexer, "LABEL"))
5065 vlabel = CTVL_LABEL;
5066 else if (lex_match_id (lexer, "BOTH"))
5068 else if (lex_match_id (lexer, "NONE"))
5072 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
5078 for (size_t i = 0; i < n_vars; i++)
5079 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
5082 else if (lex_match_id (lexer, "MRSETS"))
5084 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
5086 lex_match (lexer, T_EQUALS);
5087 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
5090 else if (lex_match_id (lexer, "SMISSING"))
5092 if (lex_match_id (lexer, "VARIABLE"))
5093 ct->smissing_listwise = false;
5094 else if (lex_match_id (lexer, "LISTWISE"))
5095 ct->smissing_listwise = true;
5098 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
5102 else if (lex_match_id (lexer, "PCOMPUTE"))
5104 if (!ctables_parse_pcompute (lexer, ct))
5107 else if (lex_match_id (lexer, "PPROPERTIES"))
5109 if (!ctables_parse_pproperties (lexer, ct))
5112 else if (lex_match_id (lexer, "WEIGHT"))
5114 if (!lex_force_match_id (lexer, "VARIABLE"))
5116 lex_match (lexer, T_EQUALS);
5117 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
5121 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
5123 if (lex_match_id (lexer, "COUNT"))
5125 lex_match (lexer, T_EQUALS);
5126 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
5129 ct->hide_threshold = lex_integer (lexer);
5132 else if (ct->hide_threshold == 0)
5133 ct->hide_threshold = 5;
5137 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
5138 "SMISSING", "PCOMPUTE", "PPROPERTIES",
5139 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
5143 if (!lex_force_match (lexer, T_SLASH))
5147 size_t allocated_tables = 0;
5150 if (ct->n_tables >= allocated_tables)
5151 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5152 sizeof *ct->tables);
5154 struct ctables_category *cat = xmalloc (sizeof *cat);
5155 *cat = (struct ctables_category) {
5157 .include_missing = false,
5158 .sort_ascending = true,
5161 struct ctables_categories *c = xmalloc (sizeof *c);
5162 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5163 *c = (struct ctables_categories) {
5170 struct ctables_categories **categories = xnmalloc (n_vars,
5171 sizeof *categories);
5172 for (size_t i = 0; i < n_vars; i++)
5175 struct ctables_table *t = xmalloc (sizeof *t);
5176 *t = (struct ctables_table) {
5178 .slabels_axis = PIVOT_AXIS_COLUMN,
5179 .slabels_visible = true,
5180 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5182 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5183 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5184 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5186 .clabels_from_axis = PIVOT_AXIS_LAYER,
5187 .categories = categories,
5188 .n_categories = n_vars,
5191 ct->tables[ct->n_tables++] = t;
5193 lex_match (lexer, T_EQUALS);
5194 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5196 if (lex_match (lexer, T_BY))
5198 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5199 ct, t, PIVOT_AXIS_COLUMN))
5202 if (lex_match (lexer, T_BY))
5204 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5205 ct, t, PIVOT_AXIS_LAYER))
5210 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5211 && !t->axes[PIVOT_AXIS_LAYER])
5213 lex_error (lexer, _("At least one variable must be specified."));
5217 const struct ctables_axis *scales[PIVOT_N_AXES];
5218 size_t n_scales = 0;
5219 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5221 scales[a] = find_scale (t->axes[a]);
5227 msg (SE, _("Scale variables may appear only on one axis."));
5228 if (scales[PIVOT_AXIS_ROW])
5229 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5230 _("This scale variable appears on the rows axis."));
5231 if (scales[PIVOT_AXIS_COLUMN])
5232 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5233 _("This scale variable appears on the columns axis."));
5234 if (scales[PIVOT_AXIS_LAYER])
5235 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5236 _("This scale variable appears on the layer axis."));
5240 const struct ctables_axis *summaries[PIVOT_N_AXES];
5241 size_t n_summaries = 0;
5242 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5244 summaries[a] = (scales[a]
5246 : find_categorical_summary_spec (t->axes[a]));
5250 if (n_summaries > 1)
5252 msg (SE, _("Summaries may appear only on one axis."));
5253 if (summaries[PIVOT_AXIS_ROW])
5254 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5255 _("This variable on the rows axis has a summary."));
5256 if (summaries[PIVOT_AXIS_COLUMN])
5257 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
5258 _("This variable on the columns axis has a summary."));
5259 if (summaries[PIVOT_AXIS_LAYER])
5260 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
5261 _("This variable on the layers axis has a summary."));
5264 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5265 if (n_summaries ? summaries[a] : t->axes[a])
5267 t->summary_axis = a;
5271 if (lex_token (lexer) == T_ENDCMD)
5273 if (!ctables_prepare_table (t))
5277 if (!lex_force_match (lexer, T_SLASH))
5280 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
5282 if (lex_match_id (lexer, "SLABELS"))
5284 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5286 if (lex_match_id (lexer, "POSITION"))
5288 lex_match (lexer, T_EQUALS);
5289 if (lex_match_id (lexer, "COLUMN"))
5290 t->slabels_axis = PIVOT_AXIS_COLUMN;
5291 else if (lex_match_id (lexer, "ROW"))
5292 t->slabels_axis = PIVOT_AXIS_ROW;
5293 else if (lex_match_id (lexer, "LAYER"))
5294 t->slabels_axis = PIVOT_AXIS_LAYER;
5297 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
5301 else if (lex_match_id (lexer, "VISIBLE"))
5303 lex_match (lexer, T_EQUALS);
5304 if (!parse_bool (lexer, &t->slabels_visible))
5309 lex_error_expecting (lexer, "POSITION", "VISIBLE");
5314 else if (lex_match_id (lexer, "CLABELS"))
5316 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5318 if (lex_match_id (lexer, "AUTO"))
5320 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
5321 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
5323 else if (lex_match_id (lexer, "ROWLABELS"))
5325 lex_match (lexer, T_EQUALS);
5326 if (lex_match_id (lexer, "OPPOSITE"))
5327 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
5328 else if (lex_match_id (lexer, "LAYER"))
5329 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
5332 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5336 else if (lex_match_id (lexer, "COLLABELS"))
5338 lex_match (lexer, T_EQUALS);
5339 if (lex_match_id (lexer, "OPPOSITE"))
5340 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
5341 else if (lex_match_id (lexer, "LAYER"))
5342 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
5345 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5351 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
5357 else if (lex_match_id (lexer, "CRITERIA"))
5359 if (!lex_force_match_id (lexer, "CILEVEL"))
5361 lex_match (lexer, T_EQUALS);
5363 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
5365 t->cilevel = lex_number (lexer);
5368 else if (lex_match_id (lexer, "CATEGORIES"))
5370 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
5374 else if (lex_match_id (lexer, "TITLES"))
5379 if (lex_match_id (lexer, "CAPTION"))
5380 textp = &t->caption;
5381 else if (lex_match_id (lexer, "CORNER"))
5383 else if (lex_match_id (lexer, "TITLE"))
5387 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
5390 lex_match (lexer, T_EQUALS);
5392 struct string s = DS_EMPTY_INITIALIZER;
5393 while (lex_is_string (lexer))
5395 if (!ds_is_empty (&s))
5396 ds_put_byte (&s, ' ');
5397 ds_put_substring (&s, lex_tokss (lexer));
5401 *textp = ds_steal_cstr (&s);
5403 while (lex_token (lexer) != T_SLASH
5404 && lex_token (lexer) != T_ENDCMD);
5406 else if (lex_match_id (lexer, "SIGTEST"))
5410 t->chisq = xmalloc (sizeof *t->chisq);
5411 *t->chisq = (struct ctables_chisq) {
5413 .include_mrsets = true,
5414 .all_visible = true,
5420 if (lex_match_id (lexer, "TYPE"))
5422 lex_match (lexer, T_EQUALS);
5423 if (!lex_force_match_id (lexer, "CHISQUARE"))
5426 else if (lex_match_id (lexer, "ALPHA"))
5428 lex_match (lexer, T_EQUALS);
5429 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
5431 t->chisq->alpha = lex_number (lexer);
5434 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5436 lex_match (lexer, T_EQUALS);
5437 if (parse_bool (lexer, &t->chisq->include_mrsets))
5440 else if (lex_match_id (lexer, "CATEGORIES"))
5442 lex_match (lexer, T_EQUALS);
5443 if (lex_match_id (lexer, "ALLVISIBLE"))
5444 t->chisq->all_visible = true;
5445 else if (lex_match_id (lexer, "SUBTOTALS"))
5446 t->chisq->all_visible = false;
5449 lex_error_expecting (lexer,
5450 "ALLVISIBLE", "SUBTOTALS");
5456 lex_error_expecting (lexer, "TYPE", "ALPHA",
5457 "INCLUDEMRSETS", "CATEGORIES");
5461 while (lex_token (lexer) != T_SLASH
5462 && lex_token (lexer) != T_ENDCMD);
5464 else if (lex_match_id (lexer, "COMPARETEST"))
5468 t->pairwise = xmalloc (sizeof *t->pairwise);
5469 *t->pairwise = (struct ctables_pairwise) {
5471 .alpha = { .05, .05 },
5472 .adjust = BONFERRONI,
5473 .include_mrsets = true,
5474 .meansvariance_allcats = true,
5475 .all_visible = true,
5484 if (lex_match_id (lexer, "TYPE"))
5486 lex_match (lexer, T_EQUALS);
5487 if (lex_match_id (lexer, "PROP"))
5488 t->pairwise->type = PROP;
5489 else if (lex_match_id (lexer, "MEAN"))
5490 t->pairwise->type = MEAN;
5493 lex_error_expecting (lexer, "PROP", "MEAN");
5497 else if (lex_match_id (lexer, "ALPHA"))
5499 lex_match (lexer, T_EQUALS);
5501 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5503 double a0 = lex_number (lexer);
5506 lex_match (lexer, T_COMMA);
5507 if (lex_is_number (lexer))
5509 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5511 double a1 = lex_number (lexer);
5514 t->pairwise->alpha[0] = MIN (a0, a1);
5515 t->pairwise->alpha[1] = MAX (a0, a1);
5518 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
5520 else if (lex_match_id (lexer, "ADJUST"))
5522 lex_match (lexer, T_EQUALS);
5523 if (lex_match_id (lexer, "BONFERRONI"))
5524 t->pairwise->adjust = BONFERRONI;
5525 else if (lex_match_id (lexer, "BH"))
5526 t->pairwise->adjust = BH;
5527 else if (lex_match_id (lexer, "NONE"))
5528 t->pairwise->adjust = 0;
5531 lex_error_expecting (lexer, "BONFERRONI", "BH",
5536 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5538 lex_match (lexer, T_EQUALS);
5539 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
5542 else if (lex_match_id (lexer, "MEANSVARIANCE"))
5544 lex_match (lexer, T_EQUALS);
5545 if (lex_match_id (lexer, "ALLCATS"))
5546 t->pairwise->meansvariance_allcats = true;
5547 else if (lex_match_id (lexer, "TESTEDCATS"))
5548 t->pairwise->meansvariance_allcats = false;
5551 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
5555 else if (lex_match_id (lexer, "CATEGORIES"))
5557 lex_match (lexer, T_EQUALS);
5558 if (lex_match_id (lexer, "ALLVISIBLE"))
5559 t->pairwise->all_visible = true;
5560 else if (lex_match_id (lexer, "SUBTOTALS"))
5561 t->pairwise->all_visible = false;
5564 lex_error_expecting (lexer, "ALLVISIBLE",
5569 else if (lex_match_id (lexer, "MERGE"))
5571 lex_match (lexer, T_EQUALS);
5572 if (!parse_bool (lexer, &t->pairwise->merge))
5575 else if (lex_match_id (lexer, "STYLE"))
5577 lex_match (lexer, T_EQUALS);
5578 if (lex_match_id (lexer, "APA"))
5579 t->pairwise->apa_style = true;
5580 else if (lex_match_id (lexer, "SIMPLE"))
5581 t->pairwise->apa_style = false;
5584 lex_error_expecting (lexer, "APA", "SIMPLE");
5588 else if (lex_match_id (lexer, "SHOWSIG"))
5590 lex_match (lexer, T_EQUALS);
5591 if (!parse_bool (lexer, &t->pairwise->show_sig))
5596 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
5597 "INCLUDEMRSETS", "MEANSVARIANCE",
5598 "CATEGORIES", "MERGE", "STYLE",
5603 while (lex_token (lexer) != T_SLASH
5604 && lex_token (lexer) != T_ENDCMD);
5608 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
5609 "CRITERIA", "CATEGORIES", "TITLES",
5610 "SIGTEST", "COMPARETEST");
5614 if (!lex_match (lexer, T_SLASH))
5618 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
5619 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
5621 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
5625 if (!ctables_prepare_table (t))
5628 while (lex_token (lexer) != T_ENDCMD);
5630 bool ok = ctables_execute (ds, ct);
5631 ctables_destroy (ct);
5632 return ok ? CMD_SUCCESS : CMD_FAILURE;
5635 ctables_destroy (ct);