1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_weighting
74 enum ctables_function_type
76 /* A function that operates on data in a single cell. It operates on
77 effective weights. It does not have an unweighted version. */
80 /* A function that operates on data in a single cell. The function
81 operates on effective weights and has a U-prefixed unweighted
85 /* A function that operates on data in a single cell. It operates on
86 dictionary weights, and has U-prefixed unweighted version and an
87 E-prefixed effective weight version. */
90 /* A function that operates on an area of cells. It operates on effective
91 weights and has a U-prefixed unweighted version. */
102 enum ctables_function_availability
104 CTFA_ALL, /* Any variables. */
105 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
106 //CTFA_MRSETS, /* Only multiple-response sets */
109 enum ctables_summary_function
111 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
112 #include "ctables.inc"
117 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
119 #include "ctables.inc"
123 struct ctables_function_info
125 struct substring basename;
126 enum ctables_function_type type;
127 enum ctables_format format;
128 enum ctables_function_availability availability;
130 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
131 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
132 bool is_area; /* Needs an area prefix. */
134 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
135 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
137 .basename = SS_LITERAL_INITIALIZER (NAME), \
140 .availability = AVAILABILITY, \
141 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
142 .e_prefix = (TYPE) == CTFT_UECELL, \
143 .is_area = (TYPE) == CTFT_AREA \
145 #include "ctables.inc"
149 enum ctables_area_type
151 /* Within a section, where stacked variables divide one section from
154 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
155 parse_ctables_summary_function() parses correctly. */
156 CTAT_TABLE, /* All layers of a whole section. */
157 CTAT_LAYERROW, /* Row in one layer within a section. */
158 CTAT_LAYERCOL, /* Column in one layer within a section. */
159 CTAT_LAYER, /* One layer within a section. */
161 /* Within a subtable, where a subtable pairs an innermost row variable with
162 an innermost column variable within a single layer. */
163 CTAT_SUBTABLE, /* Whole subtable. */
164 CTAT_ROW, /* Row within a subtable. */
165 CTAT_COL, /* Column within a subtable. */
169 static const char *ctables_area_type_name[N_CTATS] = {
170 [CTAT_TABLE] = "TABLE",
171 [CTAT_LAYER] = "LAYER",
172 [CTAT_LAYERROW] = "LAYERROW",
173 [CTAT_LAYERCOL] = "LAYERCOL",
174 [CTAT_SUBTABLE] = "SUBTABLE",
181 struct hmap_node node;
183 const struct ctables_cell *example;
186 double count[N_CTWS];
187 double valid[N_CTWS];
188 double total[N_CTWS];
189 struct ctables_sum *sums;
197 enum ctables_summary_variant
206 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
207 all the axes (except the scalar variable, if any). */
208 struct hmap_node node;
210 /* The areas that contain this cell. */
212 struct ctables_area *areas[N_CTATS];
217 enum ctables_summary_variant sv;
219 struct ctables_cell_axis
221 struct ctables_cell_value
223 const struct ctables_category *category;
231 union ctables_summary *summaries;
236 const struct dictionary *dict;
237 struct pivot_table_look *look;
239 /* CTABLES has a number of extra formats that we implement via custom
240 currency specifications on an alternate fmt_settings. */
241 #define CTEF_NEGPAREN FMT_CCA
242 #define CTEF_NEQUAL FMT_CCB
243 #define CTEF_PAREN FMT_CCC
244 #define CTEF_PCTPAREN FMT_CCD
245 struct fmt_settings ctables_formats;
247 /* If this is NULL, zeros are displayed using the normal print format.
248 Otherwise, this string is displayed. */
251 /* If this is NULL, missing values are displayed using the normal print
252 format. Otherwise, this string is displayed. */
255 /* Indexed by variable dictionary index. */
256 enum ctables_vlabel *vlabels;
258 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
260 bool mrsets_count_duplicates; /* MRSETS. */
261 bool smissing_listwise; /* SMISSING. */
262 struct variable *e_weight; /* WEIGHT. */
263 int hide_threshold; /* HIDESMALLCOUNTS. */
265 struct ctables_table **tables;
269 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
272 struct ctables_postcompute
274 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
275 char *name; /* Name, without leading &. */
277 struct msg_location *location; /* Location of definition. */
278 struct ctables_pcexpr *expr;
280 struct ctables_summary_spec_set *specs;
281 bool hide_source_cats;
284 struct ctables_pcexpr
294 enum ctables_postcompute_op
297 CTPO_CONSTANT, /* 5 */
298 CTPO_CAT_NUMBER, /* [5] */
299 CTPO_CAT_STRING, /* ["STRING"] */
300 CTPO_CAT_NRANGE, /* [LO THRU 5] */
301 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
302 CTPO_CAT_MISSING, /* MISSING */
303 CTPO_CAT_OTHERNM, /* OTHERNM */
304 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
305 CTPO_CAT_TOTAL, /* TOTAL */
319 /* CTPO_CAT_NUMBER. */
322 /* CTPO_CAT_STRING, in dictionary encoding. */
323 struct substring string;
325 /* CTPO_CAT_NRANGE. */
328 /* CTPO_CAT_SRANGE. */
329 struct substring srange[2];
331 /* CTPO_CAT_SUBTOTAL. */
332 size_t subtotal_index;
334 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
335 One element: CTPO_NEG. */
336 struct ctables_pcexpr *subs[2];
339 /* Source location. */
340 struct msg_location *location;
343 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
344 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
345 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
346 struct ctables_pcexpr *sub1);
348 struct ctables_summary_spec_set
350 struct ctables_summary_spec *specs;
354 /* The variable to which the summary specs are applied. */
355 struct variable *var;
357 /* Whether the variable to which the summary specs are applied is a scale
358 variable for the purpose of summarization.
360 (VALIDN and TOTALN act differently for summarizing scale and categorical
364 /* If any of these optional additional scale variables are missing, then
365 treat 'var' as if it's missing too. This is for implementing
366 SMISSING=LISTWISE. */
367 struct variable **listwise_vars;
368 size_t n_listwise_vars;
371 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
372 const struct ctables_summary_spec_set *);
373 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
375 /* A nested sequence of variables, e.g. a > b > c. */
378 struct variable **vars;
382 size_t *areas[N_CTATS];
383 size_t n_areas[N_CTATS];
386 struct ctables_summary_spec_set specs[N_CSVS];
389 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
392 struct ctables_nest *nests;
396 static void ctables_stack_uninit (struct ctables_stack *);
400 struct hmap_node node;
405 struct ctables_occurrence
407 struct hmap_node node;
411 struct ctables_section
414 struct ctables_table *table;
415 struct ctables_nest *nests[PIVOT_N_AXES];
418 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
419 struct hmap cells; /* Contains "struct ctables_cell"s. */
420 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
423 static void ctables_section_uninit (struct ctables_section *);
427 struct ctables *ctables;
428 struct ctables_axis *axes[PIVOT_N_AXES];
429 struct ctables_stack stacks[PIVOT_N_AXES];
430 struct ctables_section *sections;
432 enum pivot_axis_type summary_axis;
433 struct ctables_summary_spec_set summary_specs;
434 struct variable **sum_vars;
437 enum pivot_axis_type slabels_axis;
438 bool slabels_visible;
440 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
442 Most commonly, label_axis[a] == a, and in particular we always have
443 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
445 If ROWLABELS or COLLABELS is specified, then one of
446 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
447 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
449 If any category labels are moved, then 'clabels_example' is one of the
450 variables being moved (and it is otherwise NULL). All of the variables
451 being moved have the same width, value labels, and categories, so this
452 example variable can be used to find those out.
454 The remaining members in this group are relevant only if category labels
457 'clabels_values_map' holds a "struct ctables_value" for all the values
458 that appear in all of the variables in the moved categories. It is
459 accumulated as the data is read. Once the data is fully read, its
460 sorted values are put into 'clabels_values' and 'n_clabels_values'.
462 enum pivot_axis_type label_axis[PIVOT_N_AXES];
463 enum pivot_axis_type clabels_from_axis;
464 enum pivot_axis_type clabels_to_axis;
465 const struct variable *clabels_example;
466 struct hmap clabels_values_map;
467 struct ctables_value **clabels_values;
468 size_t n_clabels_values;
470 /* Indexed by variable dictionary index. */
471 struct ctables_categories **categories;
480 struct ctables_chisq *chisq;
481 struct ctables_pairwise *pairwise;
484 struct ctables_categories
487 struct ctables_category *cats;
492 struct ctables_category
494 enum ctables_category_type
496 /* Explicit category lists. */
499 CCT_NRANGE, /* Numerical range. */
500 CCT_SRANGE, /* String range. */
505 /* Totals and subtotals. */
509 /* Implicit category lists. */
514 /* For contributing to TOTALN. */
515 CCT_EXCLUDED_MISSING,
519 struct ctables_category *subtotal;
525 double number; /* CCT_NUMBER. */
526 struct substring string; /* CCT_STRING, in dictionary encoding. */
527 double nrange[2]; /* CCT_NRANGE. */
528 struct substring srange[2]; /* CCT_SRANGE. */
532 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
533 bool hide_subcategories; /* CCT_SUBTOTAL. */
536 /* CCT_POSTCOMPUTE. */
539 const struct ctables_postcompute *pc;
540 enum fmt_type parse_format;
543 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
546 bool include_missing;
550 enum ctables_summary_function sort_function;
551 enum ctables_weighting weighting;
552 enum ctables_area_type area;
553 struct variable *sort_var;
558 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
559 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
560 struct msg_location *location;
564 ctables_category_uninit (struct ctables_category *cat)
569 msg_location_destroy (cat->location);
576 case CCT_POSTCOMPUTE:
580 ss_dealloc (&cat->string);
584 ss_dealloc (&cat->srange[0]);
585 ss_dealloc (&cat->srange[1]);
590 free (cat->total_label);
598 case CCT_EXCLUDED_MISSING:
604 nullable_substring_equal (const struct substring *a,
605 const struct substring *b)
607 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
611 ctables_category_equal (const struct ctables_category *a,
612 const struct ctables_category *b)
614 if (a->type != b->type)
620 return a->number == b->number;
623 return ss_equals (a->string, b->string);
626 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
629 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
630 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
636 case CCT_POSTCOMPUTE:
637 return a->pc == b->pc;
641 return !strcmp (a->total_label, b->total_label);
646 return (a->include_missing == b->include_missing
647 && a->sort_ascending == b->sort_ascending
648 && a->sort_function == b->sort_function
649 && a->sort_var == b->sort_var
650 && a->percentile == b->percentile);
652 case CCT_EXCLUDED_MISSING:
660 ctables_categories_unref (struct ctables_categories *c)
665 assert (c->n_refs > 0);
669 for (size_t i = 0; i < c->n_cats; i++)
670 ctables_category_uninit (&c->cats[i]);
676 ctables_categories_equal (const struct ctables_categories *a,
677 const struct ctables_categories *b)
679 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
682 for (size_t i = 0; i < a->n_cats; i++)
683 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
689 /* Chi-square test (SIGTEST). */
697 /* Pairwise comparison test (COMPARETEST). */
698 struct ctables_pairwise
700 enum { PROP, MEAN } type;
703 bool meansvariance_allcats;
705 enum { BONFERRONI = 1, BH } adjust;
729 struct variable *var;
731 struct ctables_summary_spec_set specs[N_CSVS];
735 struct ctables_axis *subs[2];
738 struct msg_location *loc;
741 static void ctables_axis_destroy (struct ctables_axis *);
743 struct ctables_summary_spec
745 /* The calculation to be performed.
747 'function' is the function to calculate. 'weighted' specifies whether
748 to use weighted or unweighted data (for functions that do not support a
749 choice, it must be true). 'calc_area' is the area over which the
750 calculation takes place (for functions that target only an individual
751 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
752 percentile between 0 and 100 (for other functions it must be 0). */
753 enum ctables_summary_function function;
754 enum ctables_weighting weighting;
755 enum ctables_area_type calc_area;
756 double percentile; /* CTSF_PTILE only. */
758 /* How to display the result of the calculation.
760 'label' is a user-specified label, NULL if the user didn't specify
763 'user_area' is usually the same as 'calc_area', but when category labels
764 are rotated from one axis to another it swaps rows and columns.
766 'format' is the format for displaying the output. If
767 'is_ctables_format' is true, then 'format.type' is one of the special
768 CTEF_* formats instead of the standard ones. */
770 enum ctables_area_type user_area;
771 struct fmt_spec format;
772 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
779 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
780 const struct ctables_summary_spec *src)
783 dst->label = xstrdup_if_nonnull (src->label);
787 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
794 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
795 const struct ctables_summary_spec_set *src)
797 struct ctables_summary_spec *specs
798 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
799 for (size_t i = 0; i < src->n; i++)
800 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
802 *dst = (struct ctables_summary_spec_set) {
807 .is_scale = src->is_scale,
812 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
814 for (size_t i = 0; i < set->n; i++)
815 ctables_summary_spec_uninit (&set->specs[i]);
816 free (set->listwise_vars);
821 parse_col_width (struct lexer *lexer, const char *name, double *width)
823 lex_match (lexer, T_EQUALS);
824 if (lex_match_id (lexer, "DEFAULT"))
826 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
828 *width = lex_number (lexer);
838 parse_bool (struct lexer *lexer, bool *b)
840 if (lex_match_id (lexer, "NO"))
842 else if (lex_match_id (lexer, "YES"))
846 lex_error_expecting (lexer, "YES", "NO");
852 static enum ctables_function_availability
853 ctables_function_availability (enum ctables_summary_function f)
855 static enum ctables_function_availability availability[] = {
856 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
857 #include "ctables.inc"
861 return availability[f];
865 parse_ctables_summary_function (struct lexer *lexer,
866 enum ctables_summary_function *function,
867 enum ctables_weighting *weighting,
868 enum ctables_area_type *area)
870 if (!lex_force_id (lexer))
873 struct substring name = lex_tokss (lexer);
874 if (ss_ends_with_case (name, ss_cstr (".LCL"))
875 || ss_ends_with_case (name, ss_cstr (".UCL"))
876 || ss_ends_with_case (name, ss_cstr (".SE")))
878 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
879 "is not yet implemented."));
883 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
884 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
886 bool has_area = false;
888 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
889 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
894 if (ss_equals_case (name, ss_cstr ("PCT")))
896 /* Special case where .COUNT suffix is omitted. */
897 *function = CTSF_areaPCT_COUNT;
898 *weighting = CTW_EFFECTIVE;
905 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
907 const struct ctables_function_info *cfi = &ctables_function_info[f];
908 if (ss_equals_case (cfi->basename, name))
911 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
914 *weighting = (e ? CTW_EFFECTIVE
916 : cfi->e_prefix ? CTW_DICTIONARY
923 lex_error (lexer, _("Expecting summary function name."));
928 ctables_axis_destroy (struct ctables_axis *axis)
936 for (size_t i = 0; i < N_CSVS; i++)
937 ctables_summary_spec_set_uninit (&axis->specs[i]);
942 ctables_axis_destroy (axis->subs[0]);
943 ctables_axis_destroy (axis->subs[1]);
946 msg_location_destroy (axis->loc);
950 static struct ctables_axis *
951 ctables_axis_new_nonterminal (enum ctables_axis_op op,
952 struct ctables_axis *sub0,
953 struct ctables_axis *sub1,
954 struct lexer *lexer, int start_ofs)
956 struct ctables_axis *axis = xmalloc (sizeof *axis);
957 *axis = (struct ctables_axis) {
959 .subs = { sub0, sub1 },
960 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
965 struct ctables_axis_parse_ctx
968 struct dictionary *dict;
970 struct ctables_table *t;
973 static struct fmt_spec
974 ctables_summary_default_format (enum ctables_summary_function function,
975 const struct variable *var)
977 static const enum ctables_format default_formats[] = {
978 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
979 #include "ctables.inc"
982 switch (default_formats[function])
985 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
988 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
991 return *var_get_print_format (var);
999 ctables_summary_label__ (const struct ctables_summary_spec *spec)
1001 bool w = spec->weighting != CTW_UNWEIGHTED;
1002 bool d = spec->weighting == CTW_DICTIONARY;
1003 enum ctables_area_type a = spec->user_area;
1004 switch (spec->function)
1007 return (d ? N_("Count")
1008 : w ? N_("Adjusted Count")
1009 : N_("Unweighted Count"));
1011 case CTSF_areaPCT_COUNT:
1014 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1015 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1016 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1017 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1018 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1019 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1020 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1024 case CTSF_areaPCT_VALIDN:
1027 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1028 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1029 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1030 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1031 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1032 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1033 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1037 case CTSF_areaPCT_TOTALN:
1040 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1041 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1042 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1043 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1044 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1045 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1046 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1050 case CTSF_MAXIMUM: return N_("Maximum");
1051 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1052 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
1053 case CTSF_MINIMUM: return N_("Minimum");
1054 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
1055 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
1056 case CTSF_PTILE: NOT_REACHED ();
1057 case CTSF_RANGE: return N_("Range");
1058 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
1059 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
1060 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
1061 case CTSF_TOTALN: return (d ? N_("Total N")
1062 : w ? N_("Adjusted Total N")
1063 : N_("Unweighted Total N"));
1064 case CTSF_VALIDN: return (d ? N_("Valid N")
1065 : w ? N_("Adjusted Valid N")
1066 : N_("Unweighted Valid N"));
1067 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
1068 case CTSF_areaPCT_SUM:
1071 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1072 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1073 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1074 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1075 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1076 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1077 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1084 /* Don't bother translating these: they are for developers only. */
1085 case CTAT_TABLE: return "Table ID";
1086 case CTAT_LAYER: return "Layer ID";
1087 case CTAT_LAYERROW: return "Layer Row ID";
1088 case CTAT_LAYERCOL: return "Layer Column ID";
1089 case CTAT_SUBTABLE: return "Subtable ID";
1090 case CTAT_ROW: return "Row ID";
1091 case CTAT_COL: return "Column ID";
1099 static struct pivot_value *
1100 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1104 if (spec->function == CTSF_PTILE)
1106 double p = spec->percentile;
1107 char *s = (spec->weighting != CTW_UNWEIGHTED
1108 ? xasprintf (_("Percentile %.2f"), p)
1109 : xasprintf (_("Unweighted Percentile %.2f"), p));
1110 return pivot_value_new_user_text_nocopy (s);
1113 return pivot_value_new_text (ctables_summary_label__ (spec));
1117 struct substring in = ss_cstr (spec->label);
1118 struct substring target = ss_cstr (")CILEVEL");
1120 struct string out = DS_EMPTY_INITIALIZER;
1123 size_t chunk = ss_find_substring (in, target);
1124 ds_put_substring (&out, ss_head (in, chunk));
1125 ss_advance (&in, chunk);
1127 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1129 ss_advance (&in, target.length);
1130 ds_put_format (&out, "%g", cilevel);
1136 ctables_summary_function_name (enum ctables_summary_function function,
1137 enum ctables_weighting weighting,
1138 enum ctables_area_type area,
1139 char *buffer, size_t bufsize)
1141 const struct ctables_function_info *cfi = &ctables_function_info[function];
1142 snprintf (buffer, bufsize, "%s%s%s",
1143 (weighting == CTW_UNWEIGHTED ? "U"
1144 : weighting == CTW_DICTIONARY ? ""
1145 : cfi->e_prefix ? "E"
1147 cfi->is_area ? ctables_area_type_name[area] : "",
1148 cfi->basename.string);
1153 add_summary_spec (struct ctables_axis *axis,
1154 enum ctables_summary_function function,
1155 enum ctables_weighting weighting,
1156 enum ctables_area_type area, double percentile,
1157 const char *label, const struct fmt_spec *format,
1158 bool is_ctables_format, const struct msg_location *loc,
1159 enum ctables_summary_variant sv)
1161 if (axis->op == CTAO_VAR)
1163 char function_name[128];
1164 ctables_summary_function_name (function, weighting, area,
1165 function_name, sizeof function_name);
1166 const char *var_name = var_get_name (axis->var);
1167 switch (ctables_function_availability (function))
1171 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1172 "response sets."), function_name);
1173 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1179 if (!axis->scale && sv != CSV_TOTAL)
1182 _("Summary function %s applies only to scale variables."),
1184 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1194 struct ctables_summary_spec_set *set = &axis->specs[sv];
1195 if (set->n >= set->allocated)
1196 set->specs = x2nrealloc (set->specs, &set->allocated,
1197 sizeof *set->specs);
1199 struct ctables_summary_spec *dst = &set->specs[set->n++];
1200 *dst = (struct ctables_summary_spec) {
1201 .function = function,
1202 .weighting = weighting,
1205 .percentile = percentile,
1206 .label = xstrdup_if_nonnull (label),
1207 .format = (format ? *format
1208 : ctables_summary_default_format (function, axis->var)),
1209 .is_ctables_format = is_ctables_format,
1215 for (size_t i = 0; i < 2; i++)
1216 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1217 percentile, label, format, is_ctables_format,
1224 static struct ctables_axis *ctables_axis_parse_stack (
1225 struct ctables_axis_parse_ctx *);
1227 static struct ctables_axis *
1228 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1230 if (lex_match (ctx->lexer, T_LPAREN))
1232 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1233 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1235 ctables_axis_destroy (sub);
1241 if (!lex_force_id (ctx->lexer))
1244 if (lex_tokcstr (ctx->lexer)[0] == '$')
1246 lex_error (ctx->lexer,
1247 _("Multiple response set support not implemented."));
1251 int start_ofs = lex_ofs (ctx->lexer);
1252 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1256 struct ctables_axis *axis = xmalloc (sizeof *axis);
1257 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1259 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1260 : lex_match_phrase (ctx->lexer, "[C]") ? false
1261 : var_get_measure (var) == MEASURE_SCALE);
1262 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1263 lex_ofs (ctx->lexer) - 1);
1264 if (axis->scale && var_is_alpha (var))
1266 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1268 var_get_name (var));
1269 ctables_axis_destroy (axis);
1277 has_digit (const char *s)
1279 return s[strcspn (s, "0123456789")] != '\0';
1283 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1284 bool *is_ctables_format)
1286 char type[FMT_TYPE_LEN_MAX + 1];
1287 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1290 if (!strcasecmp (type, "NEGPAREN"))
1291 format->type = CTEF_NEGPAREN;
1292 else if (!strcasecmp (type, "NEQUAL"))
1293 format->type = CTEF_NEQUAL;
1294 else if (!strcasecmp (type, "PAREN"))
1295 format->type = CTEF_PAREN;
1296 else if (!strcasecmp (type, "PCTPAREN"))
1297 format->type = CTEF_PCTPAREN;
1300 *is_ctables_format = false;
1301 return (parse_format_specifier (lexer, format)
1302 && fmt_check_output (format)
1303 && fmt_check_type_compat (format, VAL_NUMERIC));
1309 lex_next_error (lexer, -1, -1,
1310 _("Output format %s requires width 2 or greater."), type);
1313 else if (format->d > format->w - 1)
1315 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1316 "greater than decimals."), type);
1321 *is_ctables_format = true;
1326 static struct ctables_axis *
1327 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1329 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1330 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1333 enum ctables_summary_variant sv = CSV_CELL;
1336 int start_ofs = lex_ofs (ctx->lexer);
1338 /* Parse function. */
1339 enum ctables_summary_function function;
1340 enum ctables_weighting weighting;
1341 enum ctables_area_type area;
1342 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1346 /* Parse percentile. */
1347 double percentile = 0;
1348 if (function == CTSF_PTILE)
1350 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1352 percentile = lex_number (ctx->lexer);
1353 lex_get (ctx->lexer);
1358 if (lex_is_string (ctx->lexer))
1360 label = ss_xstrdup (lex_tokss (ctx->lexer));
1361 lex_get (ctx->lexer);
1365 struct fmt_spec format;
1366 const struct fmt_spec *formatp;
1367 bool is_ctables_format = false;
1368 if (lex_token (ctx->lexer) == T_ID
1369 && has_digit (lex_tokcstr (ctx->lexer)))
1371 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1372 &is_ctables_format))
1382 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1383 lex_ofs (ctx->lexer) - 1);
1384 add_summary_spec (sub, function, weighting, area, percentile, label,
1385 formatp, is_ctables_format, loc, sv);
1387 msg_location_destroy (loc);
1389 lex_match (ctx->lexer, T_COMMA);
1390 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1392 if (!lex_force_match (ctx->lexer, T_LBRACK))
1396 else if (lex_match (ctx->lexer, T_RBRACK))
1398 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1405 ctables_axis_destroy (sub);
1409 static const struct ctables_axis *
1410 find_scale (const struct ctables_axis *axis)
1414 else if (axis->op == CTAO_VAR)
1415 return axis->scale ? axis : NULL;
1418 for (size_t i = 0; i < 2; i++)
1420 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1428 static const struct ctables_axis *
1429 find_categorical_summary_spec (const struct ctables_axis *axis)
1433 else if (axis->op == CTAO_VAR)
1434 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1437 for (size_t i = 0; i < 2; i++)
1439 const struct ctables_axis *sum
1440 = find_categorical_summary_spec (axis->subs[i]);
1448 static struct ctables_axis *
1449 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1451 int start_ofs = lex_ofs (ctx->lexer);
1452 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1456 while (lex_match (ctx->lexer, T_GT))
1458 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1461 ctables_axis_destroy (lhs);
1465 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1466 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1468 const struct ctables_axis *outer_scale = find_scale (lhs);
1469 const struct ctables_axis *inner_scale = find_scale (rhs);
1470 if (outer_scale && inner_scale)
1472 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1473 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1474 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1475 ctables_axis_destroy (nest);
1479 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1482 msg_at (SE, nest->loc,
1483 _("Summaries may only be requested for categorical variables "
1484 "at the innermost nesting level."));
1485 msg_at (SN, outer_sum->loc,
1486 _("This outer categorical variable has a summary."));
1487 ctables_axis_destroy (nest);
1497 static struct ctables_axis *
1498 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1500 int start_ofs = lex_ofs (ctx->lexer);
1501 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1505 while (lex_match (ctx->lexer, T_PLUS))
1507 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1510 ctables_axis_destroy (lhs);
1514 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1515 ctx->lexer, start_ofs);
1522 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1523 struct ctables *ct, struct ctables_table *t,
1524 enum pivot_axis_type a)
1526 if (lex_token (lexer) == T_BY
1527 || lex_token (lexer) == T_SLASH
1528 || lex_token (lexer) == T_ENDCMD)
1531 struct ctables_axis_parse_ctx ctx = {
1537 t->axes[a] = ctables_axis_parse_stack (&ctx);
1538 return t->axes[a] != NULL;
1542 ctables_chisq_destroy (struct ctables_chisq *chisq)
1548 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1554 ctables_table_destroy (struct ctables_table *t)
1559 for (size_t i = 0; i < t->n_sections; i++)
1560 ctables_section_uninit (&t->sections[i]);
1563 for (size_t i = 0; i < t->n_categories; i++)
1564 ctables_categories_unref (t->categories[i]);
1565 free (t->categories);
1567 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1569 ctables_axis_destroy (t->axes[a]);
1570 ctables_stack_uninit (&t->stacks[a]);
1572 free (t->summary_specs.specs);
1574 struct ctables_value *ctv, *next_ctv;
1575 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1576 &t->clabels_values_map)
1578 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1579 hmap_delete (&t->clabels_values_map, &ctv->node);
1582 hmap_destroy (&t->clabels_values_map);
1583 free (t->clabels_values);
1589 ctables_chisq_destroy (t->chisq);
1590 ctables_pairwise_destroy (t->pairwise);
1595 ctables_destroy (struct ctables *ct)
1600 struct ctables_postcompute *pc, *next_pc;
1601 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1605 msg_location_destroy (pc->location);
1606 ctables_pcexpr_destroy (pc->expr);
1610 ctables_summary_spec_set_uninit (pc->specs);
1613 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1616 hmap_destroy (&ct->postcomputes);
1618 fmt_settings_uninit (&ct->ctables_formats);
1619 pivot_table_look_unref (ct->look);
1623 for (size_t i = 0; i < ct->n_tables; i++)
1624 ctables_table_destroy (ct->tables[i]);
1629 static struct ctables_category
1630 cct_nrange (double low, double high)
1632 return (struct ctables_category) {
1634 .nrange = { low, high }
1638 static struct ctables_category
1639 cct_srange (struct substring low, struct substring high)
1641 return (struct ctables_category) {
1643 .srange = { low, high }
1648 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1649 struct ctables_category *cat)
1652 if (lex_match (lexer, T_EQUALS))
1654 if (!lex_force_string (lexer))
1657 total_label = ss_xstrdup (lex_tokss (lexer));
1661 total_label = xstrdup (_("Subtotal"));
1663 *cat = (struct ctables_category) {
1664 .type = CCT_SUBTOTAL,
1665 .hide_subcategories = hide_subcategories,
1666 .total_label = total_label
1671 static struct substring
1672 parse_substring (struct lexer *lexer, struct dictionary *dict)
1674 struct substring s = recode_substring_pool (
1675 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1676 ss_rtrim (&s, ss_cstr (" "));
1682 ctables_table_parse_explicit_category (struct lexer *lexer,
1683 struct dictionary *dict,
1685 struct ctables_category *cat)
1687 if (lex_match_id (lexer, "OTHERNM"))
1688 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1689 else if (lex_match_id (lexer, "MISSING"))
1690 *cat = (struct ctables_category) { .type = CCT_MISSING };
1691 else if (lex_match_id (lexer, "SUBTOTAL"))
1692 return ctables_table_parse_subtotal (lexer, false, cat);
1693 else if (lex_match_id (lexer, "HSUBTOTAL"))
1694 return ctables_table_parse_subtotal (lexer, true, cat);
1695 else if (lex_match_id (lexer, "LO"))
1697 if (!lex_force_match_id (lexer, "THRU"))
1699 if (lex_is_string (lexer))
1701 struct substring sr0 = { .string = NULL };
1702 struct substring sr1 = parse_substring (lexer, dict);
1703 *cat = cct_srange (sr0, sr1);
1705 else if (lex_force_num (lexer))
1707 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1713 else if (lex_is_number (lexer))
1715 double number = lex_number (lexer);
1717 if (lex_match_id (lexer, "THRU"))
1719 if (lex_match_id (lexer, "HI"))
1720 *cat = cct_nrange (number, DBL_MAX);
1723 if (!lex_force_num (lexer))
1725 *cat = cct_nrange (number, lex_number (lexer));
1730 *cat = (struct ctables_category) {
1735 else if (lex_is_string (lexer))
1737 struct substring s = parse_substring (lexer, dict);
1738 if (lex_match_id (lexer, "THRU"))
1740 if (lex_match_id (lexer, "HI"))
1742 struct substring sr1 = { .string = NULL };
1743 *cat = cct_srange (s, sr1);
1747 if (!lex_force_string (lexer))
1752 struct substring sr1 = parse_substring (lexer, dict);
1753 *cat = cct_srange (s, sr1);
1757 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1759 else if (lex_match (lexer, T_AND))
1761 if (!lex_force_id (lexer))
1763 struct ctables_postcompute *pc = ctables_find_postcompute (
1764 ct, lex_tokcstr (lexer));
1767 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1768 msg_at (SE, loc, _("Unknown postcompute &%s."),
1769 lex_tokcstr (lexer));
1770 msg_location_destroy (loc);
1775 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1779 lex_error (lexer, NULL);
1787 parse_category_string (struct msg_location *location,
1788 struct substring s, const struct dictionary *dict,
1789 enum fmt_type format, double *n)
1792 char *error = data_in (s, dict_get_encoding (dict), format,
1793 settings_get_fmt_settings (), &v, 0, NULL);
1796 msg_at (SE, location,
1797 _("Failed to parse category specification as format %s: %s."),
1798 fmt_name (format), error);
1807 static struct ctables_category *
1808 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1809 const struct ctables_pcexpr *e)
1811 struct ctables_category *best = NULL;
1812 size_t n_subtotals = 0;
1813 for (size_t i = 0; i < cats->n_cats; i++)
1815 struct ctables_category *cat = &cats->cats[i];
1818 case CTPO_CAT_NUMBER:
1819 if (cat->type == CCT_NUMBER && cat->number == e->number)
1823 case CTPO_CAT_STRING:
1824 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1828 case CTPO_CAT_NRANGE:
1829 if (cat->type == CCT_NRANGE
1830 && cat->nrange[0] == e->nrange[0]
1831 && cat->nrange[1] == e->nrange[1])
1835 case CTPO_CAT_SRANGE:
1836 if (cat->type == CCT_SRANGE
1837 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1838 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1842 case CTPO_CAT_MISSING:
1843 if (cat->type == CCT_MISSING)
1847 case CTPO_CAT_OTHERNM:
1848 if (cat->type == CCT_OTHERNM)
1852 case CTPO_CAT_SUBTOTAL:
1853 if (cat->type == CCT_SUBTOTAL)
1856 if (e->subtotal_index == n_subtotals)
1858 else if (e->subtotal_index == 0)
1863 case CTPO_CAT_TOTAL:
1864 if (cat->type == CCT_TOTAL)
1878 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1883 static struct ctables_category *
1884 ctables_find_category_for_postcompute (const struct dictionary *dict,
1885 const struct ctables_categories *cats,
1886 enum fmt_type parse_format,
1887 const struct ctables_pcexpr *e)
1889 if (parse_format != FMT_F)
1891 if (e->op == CTPO_CAT_STRING)
1894 if (!parse_category_string (e->location, e->string, dict,
1895 parse_format, &number))
1898 struct ctables_pcexpr e2 = {
1899 .op = CTPO_CAT_NUMBER,
1901 .location = e->location,
1903 return ctables_find_category_for_postcompute__ (cats, &e2);
1905 else if (e->op == CTPO_CAT_SRANGE)
1908 if (!e->srange[0].string)
1909 nrange[0] = -DBL_MAX;
1910 else if (!parse_category_string (e->location, e->srange[0], dict,
1911 parse_format, &nrange[0]))
1914 if (!e->srange[1].string)
1915 nrange[1] = DBL_MAX;
1916 else if (!parse_category_string (e->location, e->srange[1], dict,
1917 parse_format, &nrange[1]))
1920 struct ctables_pcexpr e2 = {
1921 .op = CTPO_CAT_NRANGE,
1922 .nrange = { nrange[0], nrange[1] },
1923 .location = e->location,
1925 return ctables_find_category_for_postcompute__ (cats, &e2);
1928 return ctables_find_category_for_postcompute__ (cats, e);
1932 ctables_recursive_check_postcompute (struct dictionary *dict,
1933 const struct ctables_pcexpr *e,
1934 struct ctables_category *pc_cat,
1935 const struct ctables_categories *cats,
1936 const struct msg_location *cats_location)
1940 case CTPO_CAT_NUMBER:
1941 case CTPO_CAT_STRING:
1942 case CTPO_CAT_NRANGE:
1943 case CTPO_CAT_SRANGE:
1944 case CTPO_CAT_MISSING:
1945 case CTPO_CAT_OTHERNM:
1946 case CTPO_CAT_SUBTOTAL:
1947 case CTPO_CAT_TOTAL:
1949 struct ctables_category *cat = ctables_find_category_for_postcompute (
1950 dict, cats, pc_cat->parse_format, e);
1953 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1955 size_t n_subtotals = 0;
1956 for (size_t i = 0; i < cats->n_cats; i++)
1957 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1958 if (n_subtotals > 1)
1960 msg_at (SE, cats_location,
1961 ngettext ("These categories include %zu instance "
1962 "of SUBTOTAL or HSUBTOTAL, so references "
1963 "from computed categories must refer to "
1964 "subtotals by position, "
1965 "e.g. SUBTOTAL[1].",
1966 "These categories include %zu instances "
1967 "of SUBTOTAL or HSUBTOTAL, so references "
1968 "from computed categories must refer to "
1969 "subtotals by position, "
1970 "e.g. SUBTOTAL[1].",
1973 msg_at (SN, e->location,
1974 _("This is the reference that lacks a position."));
1979 msg_at (SE, pc_cat->location,
1980 _("Computed category &%s references a category not included "
1981 "in the category list."),
1983 msg_at (SN, e->location, _("This is the missing category."));
1984 if (e->op == CTPO_CAT_SUBTOTAL)
1985 msg_at (SN, cats_location,
1986 _("To fix the problem, add subtotals to the "
1987 "list of categories here."));
1988 else if (e->op == CTPO_CAT_TOTAL)
1989 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1990 "CATEGORIES specification."));
1992 msg_at (SN, cats_location,
1993 _("To fix the problem, add the missing category to the "
1994 "list of categories here."));
1997 if (pc_cat->pc->hide_source_cats)
2011 for (size_t i = 0; i < 2; i++)
2012 if (e->subs[i] && !ctables_recursive_check_postcompute (
2013 dict, e->subs[i], pc_cat, cats, cats_location))
2022 all_strings (struct variable **vars, size_t n_vars,
2023 const struct ctables_category *cat)
2025 for (size_t j = 0; j < n_vars; j++)
2026 if (var_is_numeric (vars[j]))
2028 msg_at (SE, cat->location,
2029 _("This category specification may be applied only to string "
2030 "variables, but this subcommand tries to apply it to "
2031 "numeric variable %s."),
2032 var_get_name (vars[j]));
2039 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2040 struct ctables *ct, struct ctables_table *t)
2042 if (!lex_match_id (lexer, "VARIABLES"))
2044 lex_match (lexer, T_EQUALS);
2046 struct variable **vars;
2048 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2051 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2052 for (size_t i = 1; i < n_vars; i++)
2054 const struct fmt_spec *f = var_get_print_format (vars[i]);
2055 if (f->type != common_format->type)
2057 common_format = NULL;
2063 && (fmt_get_category (common_format->type)
2064 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2066 struct ctables_categories *c = xmalloc (sizeof *c);
2067 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2068 for (size_t i = 0; i < n_vars; i++)
2070 struct ctables_categories **cp
2071 = &t->categories[var_get_dict_index (vars[i])];
2072 ctables_categories_unref (*cp);
2076 size_t allocated_cats = 0;
2077 int cats_start_ofs = -1;
2078 int cats_end_ofs = -1;
2079 if (lex_match (lexer, T_LBRACK))
2081 cats_start_ofs = lex_ofs (lexer);
2084 if (c->n_cats >= allocated_cats)
2085 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2087 int start_ofs = lex_ofs (lexer);
2088 struct ctables_category *cat = &c->cats[c->n_cats];
2089 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2091 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2094 lex_match (lexer, T_COMMA);
2096 while (!lex_match (lexer, T_RBRACK));
2097 cats_end_ofs = lex_ofs (lexer) - 1;
2100 struct ctables_category cat = {
2102 .include_missing = false,
2103 .sort_ascending = true,
2105 bool show_totals = false;
2106 char *total_label = NULL;
2107 bool totals_before = false;
2108 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2110 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2112 lex_match (lexer, T_EQUALS);
2113 if (lex_match_id (lexer, "A"))
2114 cat.sort_ascending = true;
2115 else if (lex_match_id (lexer, "D"))
2116 cat.sort_ascending = false;
2119 lex_error_expecting (lexer, "A", "D");
2123 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2125 int start_ofs = lex_ofs (lexer) - 1;
2126 lex_match (lexer, T_EQUALS);
2127 if (lex_match_id (lexer, "VALUE"))
2128 cat.type = CCT_VALUE;
2129 else if (lex_match_id (lexer, "LABEL"))
2130 cat.type = CCT_LABEL;
2133 cat.type = CCT_FUNCTION;
2134 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2135 &cat.weighting, &cat.area))
2138 if (lex_match (lexer, T_LPAREN))
2140 cat.sort_var = parse_variable (lexer, dict);
2144 if (cat.sort_function == CTSF_PTILE)
2146 lex_match (lexer, T_COMMA);
2147 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2149 cat.percentile = lex_number (lexer);
2153 if (!lex_force_match (lexer, T_RPAREN))
2156 else if (ctables_function_availability (cat.sort_function)
2159 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2163 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
2164 _("Data-dependent sorting is not implemented."));
2168 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2170 lex_match (lexer, T_EQUALS);
2171 if (lex_match_id (lexer, "INCLUDE"))
2172 cat.include_missing = true;
2173 else if (lex_match_id (lexer, "EXCLUDE"))
2174 cat.include_missing = false;
2177 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2181 else if (lex_match_id (lexer, "TOTAL"))
2183 lex_match (lexer, T_EQUALS);
2184 if (!parse_bool (lexer, &show_totals))
2187 else if (lex_match_id (lexer, "LABEL"))
2189 lex_match (lexer, T_EQUALS);
2190 if (!lex_force_string (lexer))
2193 total_label = ss_xstrdup (lex_tokss (lexer));
2196 else if (lex_match_id (lexer, "POSITION"))
2198 lex_match (lexer, T_EQUALS);
2199 if (lex_match_id (lexer, "BEFORE"))
2200 totals_before = true;
2201 else if (lex_match_id (lexer, "AFTER"))
2202 totals_before = false;
2205 lex_error_expecting (lexer, "BEFORE", "AFTER");
2209 else if (lex_match_id (lexer, "EMPTY"))
2211 lex_match (lexer, T_EQUALS);
2212 if (lex_match_id (lexer, "INCLUDE"))
2213 c->show_empty = true;
2214 else if (lex_match_id (lexer, "EXCLUDE"))
2215 c->show_empty = false;
2218 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2225 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2226 "TOTAL", "LABEL", "POSITION", "EMPTY");
2228 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2235 if (c->n_cats >= allocated_cats)
2236 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2237 c->cats[c->n_cats++] = cat;
2242 if (c->n_cats >= allocated_cats)
2243 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2245 struct ctables_category *totals;
2248 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2249 totals = &c->cats[0];
2252 totals = &c->cats[c->n_cats];
2255 *totals = (struct ctables_category) {
2257 .total_label = total_label ? total_label : xstrdup (_("Total")),
2261 struct ctables_category *subtotal = NULL;
2262 for (size_t i = totals_before ? 0 : c->n_cats;
2263 totals_before ? i < c->n_cats : i-- > 0;
2264 totals_before ? i++ : 0)
2266 struct ctables_category *cat = &c->cats[i];
2275 cat->subtotal = subtotal;
2278 case CCT_POSTCOMPUTE:
2289 case CCT_EXCLUDED_MISSING:
2294 if (cats_start_ofs != -1)
2296 for (size_t i = 0; i < c->n_cats; i++)
2298 struct ctables_category *cat = &c->cats[i];
2301 case CCT_POSTCOMPUTE:
2302 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2303 struct msg_location *cats_location
2304 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2305 bool ok = ctables_recursive_check_postcompute (
2306 dict, cat->pc->expr, cat, c, cats_location);
2307 msg_location_destroy (cats_location);
2314 for (size_t j = 0; j < n_vars; j++)
2315 if (var_is_alpha (vars[j]))
2317 msg_at (SE, cat->location,
2318 _("This category specification may be applied "
2319 "only to numeric variables, but this "
2320 "subcommand tries to apply it to string "
2322 var_get_name (vars[j]));
2331 if (!parse_category_string (cat->location, cat->string, dict,
2332 common_format->type, &n))
2335 ss_dealloc (&cat->string);
2337 cat->type = CCT_NUMBER;
2340 else if (!all_strings (vars, n_vars, cat))
2349 if (!cat->srange[0].string)
2351 else if (!parse_category_string (cat->location,
2352 cat->srange[0], dict,
2353 common_format->type, &n[0]))
2356 if (!cat->srange[1].string)
2358 else if (!parse_category_string (cat->location,
2359 cat->srange[1], dict,
2360 common_format->type, &n[1]))
2363 ss_dealloc (&cat->srange[0]);
2364 ss_dealloc (&cat->srange[1]);
2366 cat->type = CCT_NRANGE;
2367 cat->nrange[0] = n[0];
2368 cat->nrange[1] = n[1];
2370 else if (!all_strings (vars, n_vars, cat))
2381 case CCT_EXCLUDED_MISSING:
2396 ctables_nest_uninit (struct ctables_nest *nest)
2399 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2400 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2401 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2402 free (nest->areas[at]);
2406 ctables_stack_uninit (struct ctables_stack *stack)
2410 for (size_t i = 0; i < stack->n; i++)
2411 ctables_nest_uninit (&stack->nests[i]);
2412 free (stack->nests);
2416 static struct ctables_stack
2417 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2424 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2425 for (size_t i = 0; i < s0.n; i++)
2426 for (size_t j = 0; j < s1.n; j++)
2428 const struct ctables_nest *a = &s0.nests[i];
2429 const struct ctables_nest *b = &s1.nests[j];
2431 size_t allocate = a->n + b->n;
2432 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2434 for (size_t k = 0; k < a->n; k++)
2435 vars[n++] = a->vars[k];
2436 for (size_t k = 0; k < b->n; k++)
2437 vars[n++] = b->vars[k];
2438 assert (n == allocate);
2440 const struct ctables_nest *summary_src;
2441 if (!a->specs[CSV_CELL].var)
2443 else if (!b->specs[CSV_CELL].var)
2448 struct ctables_nest *new = &stack.nests[stack.n++];
2449 *new = (struct ctables_nest) {
2451 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2452 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2454 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2455 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2459 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2460 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2462 ctables_stack_uninit (&s0);
2463 ctables_stack_uninit (&s1);
2467 static struct ctables_stack
2468 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2470 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2471 for (size_t i = 0; i < s0.n; i++)
2472 stack.nests[stack.n++] = s0.nests[i];
2473 for (size_t i = 0; i < s1.n; i++)
2475 stack.nests[stack.n] = s1.nests[i];
2476 stack.nests[stack.n].group_head += s0.n;
2479 assert (stack.n == s0.n + s1.n);
2485 static struct ctables_stack
2486 var_fts (const struct ctables_axis *a)
2488 struct variable **vars = xmalloc (sizeof *vars);
2491 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2492 struct ctables_nest *nest = xmalloc (sizeof *nest);
2493 *nest = (struct ctables_nest) {
2496 .scale_idx = a->scale ? 0 : SIZE_MAX,
2497 .summary_idx = is_summary ? 0 : SIZE_MAX,
2500 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2502 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2503 nest->specs[sv].var = a->var;
2504 nest->specs[sv].is_scale = a->scale;
2506 return (struct ctables_stack) { .nests = nest, .n = 1 };
2509 static struct ctables_stack
2510 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2513 return (struct ctables_stack) { .n = 0 };
2521 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2522 enumerate_fts (axis_type, a->subs[1]));
2525 /* This should consider any of the scale variables found in the result to
2526 be linked to each other listwise for SMISSING=LISTWISE. */
2527 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2528 enumerate_fts (axis_type, a->subs[1]));
2534 union ctables_summary
2536 /* COUNT, VALIDN, TOTALN. */
2539 /* MINIMUM, MAXIMUM, RANGE. */
2546 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2547 struct moments1 *moments;
2549 /* MEDIAN, MODE, PTILE. */
2552 struct casewriter *writer;
2559 ctables_summary_init (union ctables_summary *s,
2560 const struct ctables_summary_spec *ss)
2562 switch (ss->function)
2565 case CTSF_areaPCT_COUNT:
2566 case CTSF_areaPCT_VALIDN:
2567 case CTSF_areaPCT_TOTALN:
2580 s->min = s->max = SYSMIS;
2585 case CTSF_areaPCT_SUM:
2586 s->moments = moments1_create (MOMENT_MEAN);
2592 s->moments = moments1_create (MOMENT_VARIANCE);
2599 struct caseproto *proto = caseproto_create ();
2600 proto = caseproto_add_width (proto, 0);
2601 proto = caseproto_add_width (proto, 0);
2603 struct subcase ordering;
2604 subcase_init (&ordering, 0, 0, SC_ASCEND);
2605 s->writer = sort_create_writer (&ordering, proto);
2606 subcase_uninit (&ordering);
2607 caseproto_unref (proto);
2617 ctables_summary_uninit (union ctables_summary *s,
2618 const struct ctables_summary_spec *ss)
2620 switch (ss->function)
2623 case CTSF_areaPCT_COUNT:
2624 case CTSF_areaPCT_VALIDN:
2625 case CTSF_areaPCT_TOTALN:
2644 case CTSF_areaPCT_SUM:
2645 moments1_destroy (s->moments);
2651 casewriter_destroy (s->writer);
2657 ctables_summary_add (union ctables_summary *s,
2658 const struct ctables_summary_spec *ss,
2659 const union value *value,
2660 bool is_missing, bool is_included,
2663 /* To determine whether a case is included in a given table for a particular
2664 kind of summary, consider the following charts for the variable being
2665 summarized. Only if "yes" appears is the case counted.
2667 Categorical variables: VALIDN other TOTALN
2668 Valid values in included categories yes yes yes
2669 Missing values in included categories --- yes yes
2670 Missing values in excluded categories --- --- yes
2671 Valid values in excluded categories --- --- ---
2673 Scale variables: VALIDN other TOTALN
2674 Valid value yes yes yes
2675 Missing value --- yes yes
2677 Missing values include both user- and system-missing. (The system-missing
2678 value is always in an excluded category.)
2680 One way to interpret the above table is that scale variables are like
2681 categorical variables in which all values are in included categories.
2683 switch (ss->function)
2686 case CTSF_areaPCT_TOTALN:
2691 case CTSF_areaPCT_COUNT:
2697 case CTSF_areaPCT_VALIDN:
2715 if (s->min == SYSMIS || value->f < s->min)
2717 if (s->max == SYSMIS || value->f > s->max)
2728 moments1_add (s->moments, value->f, weight);
2731 case CTSF_areaPCT_SUM:
2733 moments1_add (s->moments, value->f, weight);
2741 s->ovalid += weight;
2743 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2744 *case_num_rw_idx (c, 0) = value->f;
2745 *case_num_rw_idx (c, 1) = weight;
2746 casewriter_write (s->writer, c);
2753 ctables_summary_value (const struct ctables_cell *cell,
2754 union ctables_summary *s,
2755 const struct ctables_summary_spec *ss)
2757 switch (ss->function)
2763 return cell->areas[ss->calc_area]->sequence;
2765 case CTSF_areaPCT_COUNT:
2767 const struct ctables_area *a = cell->areas[ss->calc_area];
2768 double a_count = a->count[ss->weighting];
2769 return a_count ? s->count / a_count * 100 : SYSMIS;
2772 case CTSF_areaPCT_VALIDN:
2774 const struct ctables_area *a = cell->areas[ss->calc_area];
2775 double a_valid = a->valid[ss->weighting];
2776 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2779 case CTSF_areaPCT_TOTALN:
2781 const struct ctables_area *a = cell->areas[ss->calc_area];
2782 double a_total = a->total[ss->weighting];
2783 return a_total ? s->count / a_total * 100 : SYSMIS;
2798 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2803 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2809 double weight, variance;
2810 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2811 return calc_semean (variance, weight);
2817 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2818 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2823 double weight, mean;
2824 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2825 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2831 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2835 case CTSF_areaPCT_SUM:
2837 double weight, mean;
2838 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2839 if (weight == SYSMIS || mean == SYSMIS)
2842 const struct ctables_area *a = cell->areas[ss->calc_area];
2843 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2844 double denom = sum->sum[ss->weighting];
2845 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2852 struct casereader *reader = casewriter_make_reader (s->writer);
2855 struct percentile *ptile = percentile_create (
2856 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2857 struct order_stats *os = &ptile->parent;
2858 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2859 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2860 statistic_destroy (&ptile->parent.parent);
2867 struct casereader *reader = casewriter_make_reader (s->writer);
2870 struct mode *mode = mode_create ();
2871 struct order_stats *os = &mode->parent;
2872 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2873 s->ovalue = mode->mode;
2874 statistic_destroy (&mode->parent.parent);
2882 struct ctables_cell_sort_aux
2884 const struct ctables_nest *nest;
2885 enum pivot_axis_type a;
2889 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2891 const struct ctables_cell_sort_aux *aux = aux_;
2892 struct ctables_cell *const *ap = a_;
2893 struct ctables_cell *const *bp = b_;
2894 const struct ctables_cell *a = *ap;
2895 const struct ctables_cell *b = *bp;
2897 const struct ctables_nest *nest = aux->nest;
2898 for (size_t i = 0; i < nest->n; i++)
2899 if (i != nest->scale_idx)
2901 const struct variable *var = nest->vars[i];
2902 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2903 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2904 if (a_cv->category != b_cv->category)
2905 return a_cv->category > b_cv->category ? 1 : -1;
2907 const union value *a_val = &a_cv->value;
2908 const union value *b_val = &b_cv->value;
2909 switch (a_cv->category->type)
2915 case CCT_POSTCOMPUTE:
2916 case CCT_EXCLUDED_MISSING:
2917 /* Must be equal. */
2925 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2933 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2935 return a_cv->category->sort_ascending ? cmp : -cmp;
2941 const char *a_label = var_lookup_value_label (var, a_val);
2942 const char *b_label = var_lookup_value_label (var, b_val);
2948 cmp = strcmp (a_label, b_label);
2954 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2957 return a_cv->category->sort_ascending ? cmp : -cmp;
2969 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2970 const void *aux UNUSED)
2972 struct ctables_cell *const *ap = a_;
2973 struct ctables_cell *const *bp = b_;
2974 const struct ctables_cell *a = *ap;
2975 const struct ctables_cell *b = *bp;
2977 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2979 int al = a->axes[axis].leaf;
2980 int bl = b->axes[axis].leaf;
2982 return al > bl ? 1 : -1;
2987 static struct ctables_area *
2988 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
2989 enum ctables_area_type area)
2992 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2994 const struct ctables_nest *nest = s->nests[a];
2995 for (size_t i = 0; i < nest->n_areas[area]; i++)
2997 size_t v_idx = nest->areas[area][i];
2998 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
2999 hash = hash_pointer (cv->category, hash);
3000 if (cv->category->type != CCT_TOTAL
3001 && cv->category->type != CCT_SUBTOTAL
3002 && cv->category->type != CCT_POSTCOMPUTE)
3003 hash = value_hash (&cv->value,
3004 var_get_width (nest->vars[v_idx]), hash);
3008 struct ctables_area *a;
3009 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3011 const struct ctables_cell *df = a->example;
3012 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3014 const struct ctables_nest *nest = s->nests[a];
3015 for (size_t i = 0; i < nest->n_areas[area]; i++)
3017 size_t v_idx = nest->areas[area][i];
3018 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3019 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3020 if (cv1->category != cv2->category
3021 || (cv1->category->type != CCT_TOTAL
3022 && cv1->category->type != CCT_SUBTOTAL
3023 && cv1->category->type != CCT_POSTCOMPUTE
3024 && !value_equal (&cv1->value, &cv2->value,
3025 var_get_width (nest->vars[v_idx]))))
3034 struct ctables_sum *sums = (s->table->n_sum_vars
3035 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3038 a = xmalloc (sizeof *a);
3039 *a = (struct ctables_area) { .example = cell, .sums = sums };
3040 hmap_insert (&s->areas[area], &a->node, hash);
3044 static struct substring
3045 rtrim_value (const union value *v, const struct variable *var)
3047 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3048 var_get_width (var));
3049 ss_rtrim (&s, ss_cstr (" "));
3054 in_string_range (const union value *v, const struct variable *var,
3055 const struct substring *srange)
3057 struct substring s = rtrim_value (v, var);
3058 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3059 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3062 static const struct ctables_category *
3063 ctables_categories_match (const struct ctables_categories *c,
3064 const union value *v, const struct variable *var)
3066 if (var_is_numeric (var) && v->f == SYSMIS)
3069 const struct ctables_category *othernm = NULL;
3070 for (size_t i = c->n_cats; i-- > 0; )
3072 const struct ctables_category *cat = &c->cats[i];
3076 if (cat->number == v->f)
3081 if (ss_equals (cat->string, rtrim_value (v, var)))
3086 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3087 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3092 if (in_string_range (v, var, cat->srange))
3097 if (var_is_value_missing (var, v))
3101 case CCT_POSTCOMPUTE:
3116 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3119 case CCT_EXCLUDED_MISSING:
3124 return var_is_value_missing (var, v) ? NULL : othernm;
3127 static const struct ctables_category *
3128 ctables_categories_total (const struct ctables_categories *c)
3130 const struct ctables_category *first = &c->cats[0];
3131 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3132 return (first->type == CCT_TOTAL ? first
3133 : last->type == CCT_TOTAL ? last
3137 static struct ctables_cell *
3138 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3139 const struct ctables_category **cats[PIVOT_N_AXES])
3142 enum ctables_summary_variant sv = CSV_CELL;
3143 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3145 const struct ctables_nest *nest = s->nests[a];
3146 for (size_t i = 0; i < nest->n; i++)
3147 if (i != nest->scale_idx)
3149 hash = hash_pointer (cats[a][i], hash);
3150 if (cats[a][i]->type != CCT_TOTAL
3151 && cats[a][i]->type != CCT_SUBTOTAL
3152 && cats[a][i]->type != CCT_POSTCOMPUTE)
3153 hash = value_hash (case_data (c, nest->vars[i]),
3154 var_get_width (nest->vars[i]), hash);
3160 struct ctables_cell *cell;
3161 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3163 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3165 const struct ctables_nest *nest = s->nests[a];
3166 for (size_t i = 0; i < nest->n; i++)
3167 if (i != nest->scale_idx
3168 && (cats[a][i] != cell->axes[a].cvs[i].category
3169 || (cats[a][i]->type != CCT_TOTAL
3170 && cats[a][i]->type != CCT_SUBTOTAL
3171 && cats[a][i]->type != CCT_POSTCOMPUTE
3172 && !value_equal (case_data (c, nest->vars[i]),
3173 &cell->axes[a].cvs[i].value,
3174 var_get_width (nest->vars[i])))))
3183 cell = xmalloc (sizeof *cell);
3186 cell->omit_areas = 0;
3187 cell->postcompute = false;
3188 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3190 const struct ctables_nest *nest = s->nests[a];
3191 cell->axes[a].cvs = (nest->n
3192 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3194 for (size_t i = 0; i < nest->n; i++)
3196 const struct ctables_category *cat = cats[a][i];
3197 const struct variable *var = nest->vars[i];
3198 const union value *value = case_data (c, var);
3199 if (i != nest->scale_idx)
3201 const struct ctables_category *subtotal = cat->subtotal;
3202 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3205 if (cat->type == CCT_TOTAL
3206 || cat->type == CCT_SUBTOTAL
3207 || cat->type == CCT_POSTCOMPUTE)
3211 case PIVOT_AXIS_COLUMN:
3212 cell->omit_areas |= ((1u << CTAT_TABLE) |
3213 (1u << CTAT_LAYER) |
3214 (1u << CTAT_LAYERCOL) |
3215 (1u << CTAT_SUBTABLE) |
3218 case PIVOT_AXIS_ROW:
3219 cell->omit_areas |= ((1u << CTAT_TABLE) |
3220 (1u << CTAT_LAYER) |
3221 (1u << CTAT_LAYERROW) |
3222 (1u << CTAT_SUBTABLE) |
3225 case PIVOT_AXIS_LAYER:
3226 cell->omit_areas |= ((1u << CTAT_TABLE) |
3227 (1u << CTAT_LAYER));
3231 if (cat->type == CCT_POSTCOMPUTE)
3232 cell->postcompute = true;
3235 cell->axes[a].cvs[i].category = cat;
3236 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3240 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3241 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3242 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3243 for (size_t i = 0; i < specs->n; i++)
3244 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3245 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3246 cell->areas[at] = ctables_area_insert (s, cell, at);
3247 hmap_insert (&s->cells, &cell->node, hash);
3252 is_listwise_missing (const struct ctables_summary_spec_set *specs,
3253 const struct ccase *c)
3255 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3257 const struct variable *var = specs->listwise_vars[i];
3258 if (var_is_num_missing (var, case_num (c, var)))
3266 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3268 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3273 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3274 const struct ctables_category **cats[PIVOT_N_AXES],
3275 bool is_included, double weight[N_CTWS])
3277 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3278 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3280 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3281 const union value *value = case_data (c, specs->var);
3282 bool is_missing = var_is_value_missing (specs->var, value);
3283 bool is_scale_missing
3284 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3286 for (size_t i = 0; i < specs->n; i++)
3287 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3288 is_scale_missing, is_included,
3289 weight[specs->specs[i].weighting]);
3290 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3291 if (!(cell->omit_areas && (1u << at)))
3293 struct ctables_area *a = cell->areas[at];
3295 add_weight (a->total, weight);
3297 add_weight (a->count, weight);
3300 add_weight (a->valid, weight);
3302 if (!is_scale_missing)
3303 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3305 const struct variable *var = s->table->sum_vars[i];
3306 double addend = case_num (c, var);
3307 if (!var_is_num_missing (var, addend))
3308 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3309 a->sums[i].sum[wt] += addend * weight[wt];
3316 recurse_totals (struct ctables_section *s, const struct ccase *c,
3317 const struct ctables_category **cats[PIVOT_N_AXES],
3318 bool is_included, double weight[N_CTWS],
3319 enum pivot_axis_type start_axis, size_t start_nest)
3321 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3323 const struct ctables_nest *nest = s->nests[a];
3324 for (size_t i = start_nest; i < nest->n; i++)
3326 if (i == nest->scale_idx)
3329 const struct variable *var = nest->vars[i];
3331 const struct ctables_category *total = ctables_categories_total (
3332 s->table->categories[var_get_dict_index (var)]);
3335 const struct ctables_category *save = cats[a][i];
3337 ctables_cell_add__ (s, c, cats, is_included, weight);
3338 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3347 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3348 const struct ctables_category **cats[PIVOT_N_AXES],
3349 bool is_included, double weight[N_CTWS],
3350 enum pivot_axis_type start_axis, size_t start_nest)
3352 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3354 const struct ctables_nest *nest = s->nests[a];
3355 for (size_t i = start_nest; i < nest->n; i++)
3357 if (i == nest->scale_idx)
3360 const struct ctables_category *save = cats[a][i];
3363 cats[a][i] = save->subtotal;
3364 ctables_cell_add__ (s, c, cats, is_included, weight);
3365 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3374 ctables_add_occurrence (const struct variable *var,
3375 const union value *value,
3376 struct hmap *occurrences)
3378 int width = var_get_width (var);
3379 unsigned int hash = value_hash (value, width, 0);
3381 struct ctables_occurrence *o;
3382 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3384 if (value_equal (value, &o->value, width))
3387 o = xmalloc (sizeof *o);
3388 value_clone (&o->value, value, width);
3389 hmap_insert (occurrences, &o->node, hash);
3393 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3394 double weight[N_CTWS])
3396 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3397 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3398 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3399 const struct ctables_category **cats[PIVOT_N_AXES] =
3401 [PIVOT_AXIS_LAYER] = layer_cats,
3402 [PIVOT_AXIS_ROW] = row_cats,
3403 [PIVOT_AXIS_COLUMN] = column_cats,
3406 bool is_included = true;
3408 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3410 const struct ctables_nest *nest = s->nests[a];
3411 for (size_t i = 0; i < nest->n; i++)
3412 if (i != nest->scale_idx)
3414 const struct variable *var = nest->vars[i];
3415 const union value *value = case_data (c, var);
3417 cats[a][i] = ctables_categories_match (
3418 s->table->categories[var_get_dict_index (var)], value, var);
3421 if (i != nest->summary_idx)
3424 if (!var_is_value_missing (var, value))
3427 static const struct ctables_category cct_excluded_missing = {
3428 .type = CCT_EXCLUDED_MISSING,
3431 cats[a][i] = &cct_excluded_missing;
3432 is_included = false;
3438 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3440 const struct ctables_nest *nest = s->nests[a];
3441 for (size_t i = 0; i < nest->n; i++)
3442 if (i != nest->scale_idx)
3444 const struct variable *var = nest->vars[i];
3445 const union value *value = case_data (c, var);
3446 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3450 ctables_cell_add__ (s, c, cats, is_included, weight);
3451 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3452 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3457 const struct ctables_summary_spec_set *set;
3462 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3464 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3465 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3466 if (as->function != bs->function)
3467 return as->function > bs->function ? 1 : -1;
3468 else if (as->weighting != bs->weighting)
3469 return as->weighting > bs->weighting ? 1 : -1;
3470 else if (as->calc_area != bs->calc_area)
3471 return as->calc_area > bs->calc_area ? 1 : -1;
3472 else if (as->percentile != bs->percentile)
3473 return as->percentile < bs->percentile ? 1 : -1;
3475 const char *as_label = as->label ? as->label : "";
3476 const char *bs_label = bs->label ? bs->label : "";
3477 return strcmp (as_label, bs_label);
3481 ctables_category_format_number (double number, const struct variable *var,
3484 struct pivot_value *pv = pivot_value_new_var_value (
3485 var, &(union value) { .f = number });
3486 pivot_value_format (pv, NULL, s);
3487 pivot_value_destroy (pv);
3491 ctables_category_format_string (struct substring string,
3492 const struct variable *var, struct string *out)
3494 int width = var_get_width (var);
3495 char *s = xmalloc (width);
3496 buf_copy_rpad (s, width, string.string, string.length, ' ');
3497 struct pivot_value *pv = pivot_value_new_var_value (
3498 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3499 pivot_value_format (pv, NULL, out);
3500 pivot_value_destroy (pv);
3505 ctables_category_format_label (const struct ctables_category *cat,
3506 const struct variable *var,
3512 ctables_category_format_number (cat->number, var, s);
3516 ctables_category_format_string (cat->string, var, s);
3520 ctables_category_format_number (cat->nrange[0], var, s);
3521 ds_put_format (s, " THRU ");
3522 ctables_category_format_number (cat->nrange[1], var, s);
3526 ctables_category_format_string (cat->srange[0], var, s);
3527 ds_put_format (s, " THRU ");
3528 ctables_category_format_string (cat->srange[1], var, s);
3532 ds_put_cstr (s, "MISSING");
3536 ds_put_cstr (s, "OTHERNM");
3539 case CCT_POSTCOMPUTE:
3540 ds_put_format (s, "&%s", cat->pc->name);
3545 ds_put_cstr (s, cat->total_label);
3551 case CCT_EXCLUDED_MISSING:
3558 static struct pivot_value *
3559 ctables_postcompute_label (const struct ctables_categories *cats,
3560 const struct ctables_category *cat,
3561 const struct variable *var)
3563 struct substring in = ss_cstr (cat->pc->label);
3564 struct substring target = ss_cstr (")LABEL[");
3566 struct string out = DS_EMPTY_INITIALIZER;
3569 size_t chunk = ss_find_substring (in, target);
3570 if (chunk == SIZE_MAX)
3572 if (ds_is_empty (&out))
3573 return pivot_value_new_user_text (in.string, in.length);
3576 ds_put_substring (&out, in);
3577 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3581 ds_put_substring (&out, ss_head (in, chunk));
3582 ss_advance (&in, chunk + target.length);
3584 struct substring idx_s;
3585 if (!ss_get_until (&in, ']', &idx_s))
3588 long int idx = strtol (idx_s.string, &tail, 10);
3589 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3592 struct ctables_category *cat2 = &cats->cats[idx - 1];
3593 if (!ctables_category_format_label (cat2, var, &out))
3599 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3602 static struct pivot_value *
3603 ctables_category_create_value_label (const struct ctables_categories *cats,
3604 const struct ctables_category *cat,
3605 const struct variable *var,
3606 const union value *value)
3608 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3609 ? ctables_postcompute_label (cats, cat, var)
3610 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3611 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3612 : pivot_value_new_var_value (var, value));
3615 static struct ctables_value *
3616 ctables_value_find__ (struct ctables_table *t, const union value *value,
3617 int width, unsigned int hash)
3619 struct ctables_value *clv;
3620 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3621 hash, &t->clabels_values_map)
3622 if (value_equal (value, &clv->value, width))
3628 ctables_value_insert (struct ctables_table *t, const union value *value,
3631 unsigned int hash = value_hash (value, width, 0);
3632 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3635 clv = xmalloc (sizeof *clv);
3636 value_clone (&clv->value, value, width);
3637 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3641 static struct ctables_value *
3642 ctables_value_find (struct ctables_table *t,
3643 const union value *value, int width)
3645 return ctables_value_find__ (t, value, width,
3646 value_hash (value, width, 0));
3650 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3651 size_t ix[PIVOT_N_AXES])
3653 if (a < PIVOT_N_AXES)
3655 size_t limit = MAX (t->stacks[a].n, 1);
3656 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3657 ctables_table_add_section (t, a + 1, ix);
3661 struct ctables_section *s = &t->sections[t->n_sections++];
3662 *s = (struct ctables_section) {
3664 .cells = HMAP_INITIALIZER (s->cells),
3666 for (a = 0; a < PIVOT_N_AXES; a++)
3669 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3671 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3672 for (size_t i = 0; i < nest->n; i++)
3673 hmap_init (&s->occurrences[a][i]);
3675 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3676 hmap_init (&s->areas[at]);
3681 ctpo_add (double a, double b)
3687 ctpo_sub (double a, double b)
3693 ctpo_mul (double a, double b)
3699 ctpo_div (double a, double b)
3701 return b ? a / b : SYSMIS;
3705 ctpo_pow (double a, double b)
3707 int save_errno = errno;
3709 double result = pow (a, b);
3717 ctpo_neg (double a, double b UNUSED)
3722 struct ctables_pcexpr_evaluate_ctx
3724 const struct ctables_cell *cell;
3725 const struct ctables_section *section;
3726 const struct ctables_categories *cats;
3727 enum pivot_axis_type pc_a;
3730 enum fmt_type parse_format;
3733 static double ctables_pcexpr_evaluate (
3734 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3737 ctables_pcexpr_evaluate_nonterminal (
3738 const struct ctables_pcexpr_evaluate_ctx *ctx,
3739 const struct ctables_pcexpr *e, size_t n_args,
3740 double evaluate (double, double))
3742 double args[2] = { 0, 0 };
3743 for (size_t i = 0; i < n_args; i++)
3745 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3746 if (!isfinite (args[i]) || args[i] == SYSMIS)
3749 return evaluate (args[0], args[1]);
3753 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3754 const struct ctables_cell_value *pc_cv)
3756 const struct ctables_section *s = ctx->section;
3759 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3761 const struct ctables_nest *nest = s->nests[a];
3762 for (size_t i = 0; i < nest->n; i++)
3763 if (i != nest->scale_idx)
3765 const struct ctables_cell_value *cv
3766 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3767 : &ctx->cell->axes[a].cvs[i]);
3768 hash = hash_pointer (cv->category, hash);
3769 if (cv->category->type != CCT_TOTAL
3770 && cv->category->type != CCT_SUBTOTAL
3771 && cv->category->type != CCT_POSTCOMPUTE)
3772 hash = value_hash (&cv->value,
3773 var_get_width (nest->vars[i]), hash);
3777 struct ctables_cell *tc;
3778 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3780 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3782 const struct ctables_nest *nest = s->nests[a];
3783 for (size_t i = 0; i < nest->n; i++)
3784 if (i != nest->scale_idx)
3786 const struct ctables_cell_value *p_cv
3787 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3788 : &ctx->cell->axes[a].cvs[i]);
3789 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3790 if (p_cv->category != t_cv->category
3791 || (p_cv->category->type != CCT_TOTAL
3792 && p_cv->category->type != CCT_SUBTOTAL
3793 && p_cv->category->type != CCT_POSTCOMPUTE
3794 && !value_equal (&p_cv->value,
3796 var_get_width (nest->vars[i]))))
3808 const struct ctables_table *t = s->table;
3809 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3810 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3811 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3812 &specs->specs[ctx->summary_idx]);
3816 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3817 const struct ctables_pcexpr *e)
3824 case CTPO_CAT_NRANGE:
3825 case CTPO_CAT_SRANGE:
3826 case CTPO_CAT_MISSING:
3827 case CTPO_CAT_OTHERNM:
3829 struct ctables_cell_value cv = {
3830 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3832 assert (cv.category != NULL);
3834 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3835 const struct ctables_occurrence *o;
3838 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3839 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3840 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3842 cv.value = o->value;
3843 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3848 case CTPO_CAT_NUMBER:
3849 case CTPO_CAT_SUBTOTAL:
3850 case CTPO_CAT_TOTAL:
3852 struct ctables_cell_value cv = {
3853 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3854 .value = { .f = e->number },
3856 assert (cv.category != NULL);
3857 return ctables_pcexpr_evaluate_category (ctx, &cv);
3860 case CTPO_CAT_STRING:
3862 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3864 if (width > e->string.length)
3866 s = xmalloc (width);
3867 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3870 const struct ctables_category *category
3871 = ctables_find_category_for_postcompute (
3872 ctx->section->table->ctables->dict,
3873 ctx->cats, ctx->parse_format, e);
3874 assert (category != NULL);
3876 struct ctables_cell_value cv = { .category = category };
3877 if (category->type == CCT_NUMBER)
3878 cv.value.f = category->number;
3879 else if (category->type == CCT_STRING)
3880 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3884 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3890 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3893 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3896 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3899 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3902 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3905 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3911 static const struct ctables_category *
3912 ctables_cell_postcompute (const struct ctables_section *s,
3913 const struct ctables_cell *cell,
3914 enum pivot_axis_type *pc_a_p,
3917 assert (cell->postcompute);
3918 const struct ctables_category *pc_cat = NULL;
3919 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3920 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3922 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3923 if (cv->category->type == CCT_POSTCOMPUTE)
3927 /* Multiple postcomputes cross each other. The value is
3932 pc_cat = cv->category;
3936 *pc_a_idx_p = pc_a_idx;
3940 assert (pc_cat != NULL);
3945 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3946 const struct ctables_cell *cell,
3947 const struct ctables_summary_spec *ss,
3948 struct fmt_spec *format,
3949 bool *is_ctables_format,
3952 enum pivot_axis_type pc_a = 0;
3953 size_t pc_a_idx = 0;
3954 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3955 s, cell, &pc_a, &pc_a_idx);
3959 const struct ctables_postcompute *pc = pc_cat->pc;
3962 for (size_t i = 0; i < pc->specs->n; i++)
3964 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3965 if (ss->function == ss2->function
3966 && ss->weighting == ss2->weighting
3967 && ss->calc_area == ss2->calc_area
3968 && ss->percentile == ss2->percentile)
3970 *format = ss2->format;
3971 *is_ctables_format = ss2->is_ctables_format;
3977 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3978 const struct ctables_categories *cats = s->table->categories[
3979 var_get_dict_index (var)];
3980 struct ctables_pcexpr_evaluate_ctx ctx = {
3985 .pc_a_idx = pc_a_idx,
3986 .summary_idx = summary_idx,
3987 .parse_format = pc_cat->parse_format,
3989 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3993 ctables_format (double d, const struct fmt_spec *format,
3994 const struct fmt_settings *settings)
3996 const union value v = { .f = d };
3997 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
3999 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4000 produce the results we want for negative numbers, putting the negative
4001 sign in the wrong spot, before the prefix instead of after it. We can't,
4002 in fact, produce the desired results using a custom-currency
4003 specification. Instead, we postprocess the output, moving the negative
4006 NEQUAL: "-N=3" => "N=-3"
4007 PAREN: "-(3)" => "(-3)"
4008 PCTPAREN: "-(3%)" => "(-3%)"
4010 This transformation doesn't affect NEGPAREN. */
4011 char *minus_src = strchr (s, '-');
4012 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4014 char *n_equals = strstr (s, "N=");
4015 char *lparen = strchr (s, '(');
4016 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4018 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4024 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4026 for (size_t i = 0; i < t->stacks[a].n; i++)
4028 struct ctables_nest *nest = &t->stacks[a].nests[i];
4029 if (nest->n != 1 || nest->scale_idx != 0)
4032 enum ctables_vlabel vlabel
4033 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4034 if (vlabel != CTVL_NONE)
4041 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4043 struct pivot_table *pt = pivot_table_create__ (
4045 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4046 : pivot_value_new_text (N_("Custom Tables"))),
4049 pivot_table_set_caption (
4050 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4052 pivot_table_set_corner_text (
4053 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4055 bool summary_dimension = (t->summary_axis != t->slabels_axis
4056 || (!t->slabels_visible
4057 && t->summary_specs.n > 1));
4058 if (summary_dimension)
4060 struct pivot_dimension *d = pivot_dimension_create (
4061 pt, t->slabels_axis, N_("Statistics"));
4062 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4063 if (!t->slabels_visible)
4064 d->hide_all_labels = true;
4065 for (size_t i = 0; i < specs->n; i++)
4066 pivot_category_create_leaf (
4067 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4070 bool categories_dimension = t->clabels_example != NULL;
4071 if (categories_dimension)
4073 struct pivot_dimension *d = pivot_dimension_create (
4074 pt, t->label_axis[t->clabels_from_axis],
4075 t->clabels_from_axis == PIVOT_AXIS_ROW
4076 ? N_("Row Categories")
4077 : N_("Column Categories"));
4078 const struct variable *var = t->clabels_example;
4079 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4080 for (size_t i = 0; i < t->n_clabels_values; i++)
4082 const struct ctables_value *value = t->clabels_values[i];
4083 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4084 assert (cat != NULL);
4085 pivot_category_create_leaf (
4086 d->root, ctables_category_create_value_label (c, cat,
4092 pivot_table_set_look (pt, ct->look);
4093 struct pivot_dimension *d[PIVOT_N_AXES];
4094 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4096 static const char *names[] = {
4097 [PIVOT_AXIS_ROW] = N_("Rows"),
4098 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4099 [PIVOT_AXIS_LAYER] = N_("Layers"),
4101 d[a] = (t->axes[a] || a == t->summary_axis
4102 ? pivot_dimension_create (pt, a, names[a])
4107 assert (t->axes[a]);
4109 for (size_t i = 0; i < t->stacks[a].n; i++)
4111 struct ctables_nest *nest = &t->stacks[a].nests[i];
4112 struct ctables_section **sections = xnmalloc (t->n_sections,
4114 size_t n_sections = 0;
4116 size_t n_total_cells = 0;
4117 size_t max_depth = 0;
4118 for (size_t j = 0; j < t->n_sections; j++)
4119 if (t->sections[j].nests[a] == nest)
4121 struct ctables_section *s = &t->sections[j];
4122 sections[n_sections++] = s;
4123 n_total_cells += hmap_count (&s->cells);
4125 size_t depth = s->nests[a]->n;
4126 max_depth = MAX (depth, max_depth);
4129 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4131 size_t n_sorted = 0;
4133 for (size_t j = 0; j < n_sections; j++)
4135 struct ctables_section *s = sections[j];
4137 struct ctables_cell *cell;
4138 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4140 sorted[n_sorted++] = cell;
4141 assert (n_sorted <= n_total_cells);
4144 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4145 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4147 struct ctables_level
4149 enum ctables_level_type
4151 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4152 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4153 CTL_SUMMARY, /* Summary functions. */
4157 enum settings_value_show vlabel; /* CTL_VAR only. */
4160 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4161 size_t n_levels = 0;
4162 for (size_t k = 0; k < nest->n; k++)
4164 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4165 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4167 if (vlabel != CTVL_NONE)
4169 levels[n_levels++] = (struct ctables_level) {
4171 .vlabel = (enum settings_value_show) vlabel,
4176 if (nest->scale_idx != k
4177 && (k != nest->n - 1 || t->label_axis[a] == a))
4179 levels[n_levels++] = (struct ctables_level) {
4180 .type = CTL_CATEGORY,
4186 if (!summary_dimension && a == t->slabels_axis)
4188 levels[n_levels++] = (struct ctables_level) {
4189 .type = CTL_SUMMARY,
4190 .var_idx = SIZE_MAX,
4194 /* Pivot categories:
4196 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4197 - category for nest->vars[0], if nest->scale_idx != 0
4198 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4199 - category for nest->vars[1], if nest->scale_idx != 1
4201 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4202 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4203 - summary function, if 'a == t->slabels_axis && a ==
4206 Additional dimensions:
4208 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4210 - If 't->label_axis[b] == a' for some 'b != a', add a category
4215 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4217 for (size_t j = 0; j < n_sorted; j++)
4219 struct ctables_cell *cell = sorted[j];
4220 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4222 size_t n_common = 0;
4225 for (; n_common < n_levels; n_common++)
4227 const struct ctables_level *level = &levels[n_common];
4228 if (level->type == CTL_CATEGORY)
4230 size_t var_idx = level->var_idx;
4231 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4232 if (prev->axes[a].cvs[var_idx].category != c)
4234 else if (c->type != CCT_SUBTOTAL
4235 && c->type != CCT_TOTAL
4236 && c->type != CCT_POSTCOMPUTE
4237 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4238 &cell->axes[a].cvs[var_idx].value,
4239 var_get_type (nest->vars[var_idx])))
4245 for (size_t k = n_common; k < n_levels; k++)
4247 const struct ctables_level *level = &levels[k];
4248 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4249 if (level->type == CTL_SUMMARY)
4251 assert (k == n_levels - 1);
4253 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4254 for (size_t m = 0; m < specs->n; m++)
4256 int leaf = pivot_category_create_leaf (
4257 parent, ctables_summary_label (&specs->specs[m],
4265 const struct variable *var = nest->vars[level->var_idx];
4266 struct pivot_value *label;
4267 if (level->type == CTL_VAR)
4269 label = pivot_value_new_variable (var);
4270 label->variable.show = level->vlabel;
4272 else if (level->type == CTL_CATEGORY)
4274 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4275 label = ctables_category_create_value_label (
4276 t->categories[var_get_dict_index (var)],
4277 cv->category, var, &cv->value);
4282 if (k == n_levels - 1)
4283 prev_leaf = pivot_category_create_leaf (parent, label);
4285 groups[k] = pivot_category_create_group__ (parent, label);
4289 cell->axes[a].leaf = prev_leaf;
4298 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4302 size_t n_total_cells = 0;
4303 for (size_t j = 0; j < t->n_sections; j++)
4304 n_total_cells += hmap_count (&t->sections[j].cells);
4306 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4307 size_t n_sorted = 0;
4308 for (size_t j = 0; j < t->n_sections; j++)
4310 const struct ctables_section *s = &t->sections[j];
4311 struct ctables_cell *cell;
4312 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4314 sorted[n_sorted++] = cell;
4316 assert (n_sorted <= n_total_cells);
4317 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4319 size_t ids[N_CTATS];
4320 memset (ids, 0, sizeof ids);
4321 for (size_t j = 0; j < n_sorted; j++)
4323 struct ctables_cell *cell = sorted[j];
4324 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4326 struct ctables_area *area = cell->areas[at];
4327 if (!area->sequence)
4328 area->sequence = ++ids[at];
4335 for (size_t i = 0; i < t->n_sections; i++)
4337 struct ctables_section *s = &t->sections[i];
4339 struct ctables_cell *cell;
4340 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4345 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4346 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4347 for (size_t j = 0; j < specs->n; j++)
4350 size_t n_dindexes = 0;
4352 if (summary_dimension)
4353 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4355 if (categories_dimension)
4357 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4358 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4359 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4360 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4363 dindexes[n_dindexes++] = ctv->leaf;
4366 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4369 int leaf = cell->axes[a].leaf;
4370 if (a == t->summary_axis && !summary_dimension)
4372 dindexes[n_dindexes++] = leaf;
4375 const struct ctables_summary_spec *ss = &specs->specs[j];
4377 struct fmt_spec format = specs->specs[j].format;
4378 bool is_ctables_format = ss->is_ctables_format;
4379 double d = (cell->postcompute
4380 ? ctables_cell_calculate_postcompute (
4381 s, cell, ss, &format, &is_ctables_format, j)
4382 : ctables_summary_value (cell, &cell->summaries[j],
4385 struct pivot_value *value;
4386 if (ct->hide_threshold != 0
4387 && d < ct->hide_threshold
4388 && ss->function == CTSF_COUNT)
4390 value = pivot_value_new_user_text_nocopy (
4391 xasprintf ("<%d", ct->hide_threshold));
4393 else if (d == 0 && ct->zero)
4394 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4395 else if (d == SYSMIS && ct->missing)
4396 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4397 else if (is_ctables_format)
4398 value = pivot_value_new_user_text_nocopy (
4399 ctables_format (d, &format, &ct->ctables_formats));
4402 value = pivot_value_new_number (d);
4403 value->numeric.format = format;
4405 /* XXX should text values be right-justified? */
4406 pivot_table_put (pt, dindexes, n_dindexes, value);
4411 pivot_table_submit (pt);
4415 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4417 enum pivot_axis_type label_pos = t->label_axis[a];
4421 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4422 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4424 const struct ctables_stack *stack = &t->stacks[a];
4428 const struct ctables_nest *n0 = &stack->nests[0];
4431 assert (stack->n == 1);
4435 const struct variable *v0 = n0->vars[n0->n - 1];
4436 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4437 t->clabels_example = v0;
4439 for (size_t i = 0; i < c0->n_cats; i++)
4440 if (c0->cats[i].type == CCT_FUNCTION)
4442 msg (SE, _("%s=%s is not allowed with sorting based "
4443 "on a summary function."),
4444 subcommand_name, pos_name);
4447 if (n0->n - 1 == n0->scale_idx)
4449 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4450 "but %s is a scale variable."),
4451 subcommand_name, pos_name, var_get_name (v0));
4455 for (size_t i = 1; i < stack->n; i++)
4457 const struct ctables_nest *ni = &stack->nests[i];
4459 const struct variable *vi = ni->vars[ni->n - 1];
4460 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4462 if (ni->n - 1 == ni->scale_idx)
4464 msg (SE, _("%s=%s requires the variables to be moved to be "
4465 "categorical, but %s is a scale variable."),
4466 subcommand_name, pos_name, var_get_name (vi));
4469 if (var_get_width (v0) != var_get_width (vi))
4471 msg (SE, _("%s=%s requires the variables to be "
4472 "moved to have the same width, but %s has "
4473 "width %d and %s has width %d."),
4474 subcommand_name, pos_name,
4475 var_get_name (v0), var_get_width (v0),
4476 var_get_name (vi), var_get_width (vi));
4479 if (!val_labs_equal (var_get_value_labels (v0),
4480 var_get_value_labels (vi)))
4482 msg (SE, _("%s=%s requires the variables to be "
4483 "moved to have the same value labels, but %s "
4484 "and %s have different value labels."),
4485 subcommand_name, pos_name,
4486 var_get_name (v0), var_get_name (vi));
4489 if (!ctables_categories_equal (c0, ci))
4491 msg (SE, _("%s=%s requires the variables to be "
4492 "moved to have the same category "
4493 "specifications, but %s and %s have different "
4494 "category specifications."),
4495 subcommand_name, pos_name,
4496 var_get_name (v0), var_get_name (vi));
4505 add_sum_var (struct variable *var,
4506 struct variable ***sum_vars, size_t *n, size_t *allocated)
4508 for (size_t i = 0; i < *n; i++)
4509 if (var == (*sum_vars)[i])
4512 if (*n >= *allocated)
4513 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4514 (*sum_vars)[*n] = var;
4518 static enum ctables_area_type
4519 rotate_area (enum ctables_area_type area)
4530 return CTAT_LAYERCOL;
4533 return CTAT_LAYERROW;
4546 enumerate_sum_vars (const struct ctables_axis *a,
4547 struct variable ***sum_vars, size_t *n, size_t *allocated)
4555 for (size_t i = 0; i < N_CSVS; i++)
4556 for (size_t j = 0; j < a->specs[i].n; j++)
4558 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4559 if (spec->function == CTSF_areaPCT_SUM)
4560 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4566 for (size_t i = 0; i < 2; i++)
4567 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4573 ctables_prepare_table (struct ctables_table *t)
4575 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4578 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4580 for (size_t j = 0; j < t->stacks[a].n; j++)
4582 struct ctables_nest *nest = &t->stacks[a].nests[j];
4583 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4585 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4586 nest->n_areas[at] = 0;
4588 enum pivot_axis_type ata, atb;
4589 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4591 ata = PIVOT_AXIS_ROW;
4592 atb = PIVOT_AXIS_COLUMN;
4594 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4596 ata = PIVOT_AXIS_COLUMN;
4597 atb = PIVOT_AXIS_ROW;
4600 if (at == CTAT_LAYER
4601 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
4602 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
4603 ? a == atb && t->label_axis[a] != a
4606 for (size_t k = nest->n - 1; k < nest->n; k--)
4607 if (k != nest->scale_idx)
4609 nest->areas[at][nest->n_areas[at]++] = k;
4615 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
4616 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
4617 : at == CTAT_TABLE ? true
4621 for (size_t k = 0; k < nest->n; k++)
4622 if (k != nest->scale_idx)
4623 nest->areas[at][nest->n_areas[at]++] = k;
4629 #define L PIVOT_AXIS_LAYER
4630 n_drop = (t->clabels_from_axis == L ? a != L
4631 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
4632 : t->clabels_from_axis == a ? 2
4639 n_drop = a == ata && t->label_axis[ata] == atb;
4644 n_drop = (a == ata ? t->label_axis[ata] == atb
4646 : t->clabels_from_axis == atb ? -1
4647 : t->clabels_to_axis != atb ? 1
4659 size_t n = nest->n_areas[at];
4662 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4663 nest->n_areas[at]--;
4668 for (int i = 0; i < n_drop; i++)
4669 if (nest->n_areas[at] > 0)
4670 nest->n_areas[at]--;
4677 struct ctables_nest *nest = xmalloc (sizeof *nest);
4678 *nest = (struct ctables_nest) {
4680 .scale_idx = SIZE_MAX,
4681 .summary_idx = SIZE_MAX
4683 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4685 /* There's no point in moving labels away from an axis that has no
4686 labels, so avoid dealing with the special cases around that. */
4687 t->label_axis[a] = a;
4690 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4691 for (size_t i = 0; i < stack->n; i++)
4693 struct ctables_nest *nest = &stack->nests[i];
4694 if (!nest->specs[CSV_CELL].n)
4696 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
4697 ss->specs = xmalloc (sizeof *ss->specs);
4700 enum ctables_summary_function function
4701 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
4705 nest->summary_idx = nest->n - 1;
4706 ss->var = nest->vars[nest->summary_idx];
4708 *ss->specs = (struct ctables_summary_spec) {
4709 .function = function,
4710 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
4711 .format = ctables_summary_default_format (function, ss->var),
4714 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4715 &nest->specs[CSV_CELL]);
4717 else if (!nest->specs[CSV_TOTAL].n)
4718 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4719 &nest->specs[CSV_CELL]);
4721 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4722 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4724 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4725 for (size_t i = 0; i < nest->specs[sv].n; i++)
4727 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4728 const struct ctables_function_info *cfi =
4729 &ctables_function_info[ss->function];
4731 ss->calc_area = rotate_area (ss->calc_area);
4735 if (t->ctables->smissing_listwise)
4737 struct variable **listwise_vars = NULL;
4739 size_t allocated = 0;
4741 for (size_t j = nest->group_head; j < stack->n; j++)
4743 const struct ctables_nest *other_nest = &stack->nests[j];
4744 if (other_nest->group_head != nest->group_head)
4747 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4750 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4751 sizeof *listwise_vars);
4752 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4755 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4758 listwise_vars = xmemdup (listwise_vars,
4759 n * sizeof *listwise_vars);
4760 nest->specs[sv].listwise_vars = listwise_vars;
4761 nest->specs[sv].n_listwise_vars = n;
4766 struct ctables_summary_spec_set *merged = &t->summary_specs;
4767 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4769 for (size_t j = 0; j < stack->n; j++)
4771 const struct ctables_nest *nest = &stack->nests[j];
4773 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4774 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4779 struct merge_item min = items[0];
4780 for (size_t j = 1; j < n_left; j++)
4781 if (merge_item_compare_3way (&items[j], &min) < 0)
4784 if (merged->n >= merged->allocated)
4785 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4786 sizeof *merged->specs);
4787 merged->specs[merged->n++] = min.set->specs[min.ofs];
4789 for (size_t j = 0; j < n_left; )
4791 if (merge_item_compare_3way (&items[j], &min) == 0)
4793 struct merge_item *item = &items[j];
4794 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4795 if (++item->ofs >= item->set->n)
4797 items[j] = items[--n_left];
4806 size_t allocated_sum_vars = 0;
4807 enumerate_sum_vars (t->axes[t->summary_axis],
4808 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4810 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4811 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4815 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4816 enum pivot_axis_type a)
4818 struct ctables_stack *stack = &t->stacks[a];
4819 for (size_t i = 0; i < stack->n; i++)
4821 const struct ctables_nest *nest = &stack->nests[i];
4822 const struct variable *var = nest->vars[nest->n - 1];
4823 const union value *value = case_data (c, var);
4825 if (var_is_numeric (var) && value->f == SYSMIS)
4828 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4830 ctables_value_insert (t, value, var_get_width (var));
4835 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4837 const struct ctables_value *const *ap = a_;
4838 const struct ctables_value *const *bp = b_;
4839 const struct ctables_value *a = *ap;
4840 const struct ctables_value *b = *bp;
4841 const int *width = width_;
4842 return value_compare_3way (&a->value, &b->value, *width);
4846 ctables_sort_clabels_values (struct ctables_table *t)
4848 const struct variable *v0 = t->clabels_example;
4849 int width = var_get_width (v0);
4851 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4854 const struct val_labs *val_labs = var_get_value_labels (v0);
4855 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4856 vl = val_labs_next (val_labs, vl))
4857 if (ctables_categories_match (c0, &vl->value, v0))
4858 ctables_value_insert (t, &vl->value, width);
4861 size_t n = hmap_count (&t->clabels_values_map);
4862 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4864 struct ctables_value *clv;
4866 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4867 t->clabels_values[i++] = clv;
4868 t->n_clabels_values = n;
4871 sort (t->clabels_values, n, sizeof *t->clabels_values,
4872 compare_clabels_values_3way, &width);
4874 for (size_t i = 0; i < n; i++)
4875 t->clabels_values[i]->leaf = i;
4879 ctables_add_category_occurrences (const struct variable *var,
4880 struct hmap *occurrences,
4881 const struct ctables_categories *cats)
4883 const struct val_labs *val_labs = var_get_value_labels (var);
4885 for (size_t i = 0; i < cats->n_cats; i++)
4887 const struct ctables_category *c = &cats->cats[i];
4891 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4897 int width = var_get_width (var);
4899 value_init (&value, width);
4900 value_copy_buf_rpad (&value, width,
4901 CHAR_CAST (uint8_t *, c->string.string),
4902 c->string.length, ' ');
4903 ctables_add_occurrence (var, &value, occurrences);
4904 value_destroy (&value, width);
4909 assert (var_is_numeric (var));
4910 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4911 vl = val_labs_next (val_labs, vl))
4912 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4913 ctables_add_occurrence (var, &vl->value, occurrences);
4917 assert (var_is_alpha (var));
4918 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4919 vl = val_labs_next (val_labs, vl))
4920 if (in_string_range (&vl->value, var, c->srange))
4921 ctables_add_occurrence (var, &vl->value, occurrences);
4925 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4926 vl = val_labs_next (val_labs, vl))
4927 if (var_is_value_missing (var, &vl->value))
4928 ctables_add_occurrence (var, &vl->value, occurrences);
4932 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4933 vl = val_labs_next (val_labs, vl))
4934 ctables_add_occurrence (var, &vl->value, occurrences);
4937 case CCT_POSTCOMPUTE:
4947 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4948 vl = val_labs_next (val_labs, vl))
4949 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4950 ctables_add_occurrence (var, &vl->value, occurrences);
4953 case CCT_EXCLUDED_MISSING:
4960 ctables_section_recurse_add_empty_categories (
4961 struct ctables_section *s,
4962 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
4963 enum pivot_axis_type a, size_t a_idx)
4965 if (a >= PIVOT_N_AXES)
4966 ctables_cell_insert__ (s, c, cats);
4967 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4968 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4971 const struct variable *var = s->nests[a]->vars[a_idx];
4972 const struct ctables_categories *categories = s->table->categories[
4973 var_get_dict_index (var)];
4974 int width = var_get_width (var);
4975 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4976 const struct ctables_occurrence *o;
4977 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4979 union value *value = case_data_rw (c, var);
4980 value_destroy (value, width);
4981 value_clone (value, &o->value, width);
4982 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4983 assert (cats[a][a_idx] != NULL);
4984 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4987 for (size_t i = 0; i < categories->n_cats; i++)
4989 const struct ctables_category *cat = &categories->cats[i];
4990 if (cat->type == CCT_POSTCOMPUTE)
4992 cats[a][a_idx] = cat;
4993 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5000 ctables_section_add_empty_categories (struct ctables_section *s)
5002 bool show_empty = false;
5003 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5005 for (size_t k = 0; k < s->nests[a]->n; k++)
5006 if (k != s->nests[a]->scale_idx)
5008 const struct variable *var = s->nests[a]->vars[k];
5009 const struct ctables_categories *cats = s->table->categories[
5010 var_get_dict_index (var)];
5011 if (cats->show_empty)
5014 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5020 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5021 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5022 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5023 const struct ctables_category **cats[PIVOT_N_AXES] =
5025 [PIVOT_AXIS_LAYER] = layer_cats,
5026 [PIVOT_AXIS_ROW] = row_cats,
5027 [PIVOT_AXIS_COLUMN] = column_cats,
5029 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5030 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5035 ctables_section_clear (struct ctables_section *s)
5037 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5039 const struct ctables_nest *nest = s->nests[a];
5040 for (size_t i = 0; i < nest->n; i++)
5041 if (i != nest->scale_idx)
5043 const struct variable *var = nest->vars[i];
5044 int width = var_get_width (var);
5045 struct ctables_occurrence *o, *next;
5046 struct hmap *map = &s->occurrences[a][i];
5047 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5049 value_destroy (&o->value, width);
5050 hmap_delete (map, &o->node);
5057 struct ctables_cell *cell, *next_cell;
5058 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5060 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5062 const struct ctables_nest *nest = s->nests[a];
5063 for (size_t i = 0; i < nest->n; i++)
5064 if (i != nest->scale_idx)
5065 value_destroy (&cell->axes[a].cvs[i].value,
5066 var_get_width (nest->vars[i]));
5067 free (cell->axes[a].cvs);
5070 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5071 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5072 for (size_t i = 0; i < specs->n; i++)
5073 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5074 free (cell->summaries);
5076 hmap_delete (&s->cells, &cell->node);
5079 hmap_shrink (&s->cells);
5081 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5083 struct ctables_area *area, *next_area;
5084 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5088 hmap_delete (&s->areas[at], &area->node);
5091 hmap_shrink (&s->areas[at]);
5096 ctables_section_uninit (struct ctables_section *s)
5098 ctables_section_clear (s);
5100 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5102 struct ctables_nest *nest = s->nests[a];
5103 for (size_t i = 0; i < nest->n; i++)
5104 hmap_destroy (&s->occurrences[a][i]);
5105 free (s->occurrences[a]);
5108 hmap_destroy (&s->cells);
5109 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5110 hmap_destroy (&s->areas[at]);
5114 ctables_table_clear (struct ctables_table *t)
5116 for (size_t i = 0; i < t->n_sections; i++)
5117 ctables_section_clear (&t->sections[i]);
5119 if (t->clabels_example)
5121 int width = var_get_width (t->clabels_example);
5122 struct ctables_value *value, *next_value;
5123 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5124 &t->clabels_values_map)
5126 value_destroy (&value->value, width);
5127 hmap_delete (&t->clabels_values_map, &value->node);
5130 hmap_shrink (&t->clabels_values_map);
5132 free (t->clabels_values);
5133 t->clabels_values = NULL;
5134 t->n_clabels_values = 0;
5139 ctables_execute (struct dataset *ds, struct casereader *input,
5142 for (size_t i = 0; i < ct->n_tables; i++)
5144 struct ctables_table *t = ct->tables[i];
5145 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5146 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5147 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5148 sizeof *t->sections);
5149 size_t ix[PIVOT_N_AXES];
5150 ctables_table_add_section (t, 0, ix);
5153 struct dictionary *dict = dataset_dict (ds);
5155 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5156 struct casegrouper *grouper
5158 ? casegrouper_create_splits (input, dict)
5159 : casegrouper_create_vars (input, NULL, 0));
5160 struct casereader *group;
5161 while (casegrouper_get_next_group (grouper, &group))
5165 struct ccase *c = casereader_peek (group, 0);
5168 output_split_file_values (ds, c);
5173 bool warn_on_invalid = true;
5174 for (struct ccase *c = casereader_read (group); c;
5175 case_unref (c), c = casereader_read (group))
5177 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5178 double e_weight = (ct->e_weight
5179 ? var_force_valid_weight (ct->e_weight,
5180 case_num (c, ct->e_weight),
5184 [CTW_DICTIONARY] = d_weight,
5185 [CTW_EFFECTIVE] = e_weight,
5186 [CTW_UNWEIGHTED] = 1.0,
5189 for (size_t i = 0; i < ct->n_tables; i++)
5191 struct ctables_table *t = ct->tables[i];
5193 for (size_t j = 0; j < t->n_sections; j++)
5194 ctables_cell_insert (&t->sections[j], c, weight);
5196 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5197 if (t->label_axis[a] != a)
5198 ctables_insert_clabels_values (t, c, a);
5201 casereader_destroy (group);
5203 for (size_t i = 0; i < ct->n_tables; i++)
5205 struct ctables_table *t = ct->tables[i];
5207 if (t->clabels_example)
5208 ctables_sort_clabels_values (t);
5210 for (size_t j = 0; j < t->n_sections; j++)
5211 ctables_section_add_empty_categories (&t->sections[j]);
5213 ctables_table_output (ct, t);
5214 ctables_table_clear (t);
5217 return casegrouper_destroy (grouper);
5222 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5223 struct dictionary *);
5226 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5232 case CTPO_CAT_STRING:
5233 ss_dealloc (&e->string);
5236 case CTPO_CAT_SRANGE:
5237 for (size_t i = 0; i < 2; i++)
5238 ss_dealloc (&e->srange[i]);
5247 for (size_t i = 0; i < 2; i++)
5248 ctables_pcexpr_destroy (e->subs[i]);
5252 case CTPO_CAT_NUMBER:
5253 case CTPO_CAT_NRANGE:
5254 case CTPO_CAT_MISSING:
5255 case CTPO_CAT_OTHERNM:
5256 case CTPO_CAT_SUBTOTAL:
5257 case CTPO_CAT_TOTAL:
5261 msg_location_destroy (e->location);
5266 static struct ctables_pcexpr *
5267 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5268 struct ctables_pcexpr *sub0,
5269 struct ctables_pcexpr *sub1)
5271 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5272 *e = (struct ctables_pcexpr) {
5274 .subs = { sub0, sub1 },
5275 .location = msg_location_merged (sub0->location, sub1->location),
5280 /* How to parse an operator. */
5283 enum token_type token;
5284 enum ctables_postcompute_op op;
5287 static const struct operator *
5288 ctables_pcexpr_match_operator (struct lexer *lexer,
5289 const struct operator ops[], size_t n_ops)
5291 for (const struct operator *op = ops; op < ops + n_ops; op++)
5292 if (lex_token (lexer) == op->token)
5294 if (op->token != T_NEG_NUM)
5303 static struct ctables_pcexpr *
5304 ctables_pcexpr_parse_binary_operators__ (
5305 struct lexer *lexer, struct dictionary *dict,
5306 const struct operator ops[], size_t n_ops,
5307 parse_recursively_func *parse_next_level,
5308 const char *chain_warning, struct ctables_pcexpr *lhs)
5310 for (int op_count = 0; ; op_count++)
5312 const struct operator *op
5313 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5316 if (op_count > 1 && chain_warning)
5317 msg_at (SW, lhs->location, "%s", chain_warning);
5322 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5325 ctables_pcexpr_destroy (lhs);
5329 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5333 static struct ctables_pcexpr *
5334 ctables_pcexpr_parse_binary_operators (
5335 struct lexer *lexer, struct dictionary *dict,
5336 const struct operator ops[], size_t n_ops,
5337 parse_recursively_func *parse_next_level, const char *chain_warning)
5339 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5343 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5345 chain_warning, lhs);
5348 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5349 struct dictionary *);
5351 static struct ctables_pcexpr
5352 ctpo_cat_nrange (double low, double high)
5354 return (struct ctables_pcexpr) {
5355 .op = CTPO_CAT_NRANGE,
5356 .nrange = { low, high },
5360 static struct ctables_pcexpr
5361 ctpo_cat_srange (struct substring low, struct substring high)
5363 return (struct ctables_pcexpr) {
5364 .op = CTPO_CAT_SRANGE,
5365 .srange = { low, high },
5369 static struct ctables_pcexpr *
5370 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5372 int start_ofs = lex_ofs (lexer);
5373 struct ctables_pcexpr e;
5374 if (lex_is_number (lexer))
5376 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5377 .number = lex_number (lexer) };
5380 else if (lex_match_id (lexer, "MISSING"))
5381 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5382 else if (lex_match_id (lexer, "OTHERNM"))
5383 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5384 else if (lex_match_id (lexer, "TOTAL"))
5385 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5386 else if (lex_match_id (lexer, "SUBTOTAL"))
5388 size_t subtotal_index = 0;
5389 if (lex_match (lexer, T_LBRACK))
5391 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5393 subtotal_index = lex_integer (lexer);
5395 if (!lex_force_match (lexer, T_RBRACK))
5398 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5399 .subtotal_index = subtotal_index };
5401 else if (lex_match (lexer, T_LBRACK))
5403 if (lex_match_id (lexer, "LO"))
5405 if (!lex_force_match_id (lexer, "THRU"))
5408 if (lex_is_string (lexer))
5410 struct substring low = { .string = NULL };
5411 struct substring high = parse_substring (lexer, dict);
5412 e = ctpo_cat_srange (low, high);
5416 if (!lex_force_num (lexer))
5418 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5422 else if (lex_is_number (lexer))
5424 double number = lex_number (lexer);
5426 if (lex_match_id (lexer, "THRU"))
5428 if (lex_match_id (lexer, "HI"))
5429 e = ctpo_cat_nrange (number, DBL_MAX);
5432 if (!lex_force_num (lexer))
5434 e = ctpo_cat_nrange (number, lex_number (lexer));
5439 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5442 else if (lex_is_string (lexer))
5444 struct substring s = parse_substring (lexer, dict);
5446 if (lex_match_id (lexer, "THRU"))
5448 struct substring high;
5450 if (lex_match_id (lexer, "HI"))
5451 high = (struct substring) { .string = NULL };
5454 if (!lex_force_string (lexer))
5459 high = parse_substring (lexer, dict);
5462 e = ctpo_cat_srange (s, high);
5465 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5469 lex_error (lexer, NULL);
5473 if (!lex_force_match (lexer, T_RBRACK))
5475 if (e.op == CTPO_CAT_STRING)
5476 ss_dealloc (&e.string);
5477 else if (e.op == CTPO_CAT_SRANGE)
5479 ss_dealloc (&e.srange[0]);
5480 ss_dealloc (&e.srange[1]);
5485 else if (lex_match (lexer, T_LPAREN))
5487 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5490 if (!lex_force_match (lexer, T_RPAREN))
5492 ctables_pcexpr_destroy (ep);
5499 lex_error (lexer, NULL);
5503 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5504 return xmemdup (&e, sizeof e);
5507 static struct ctables_pcexpr *
5508 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5509 struct lexer *lexer, int start_ofs)
5511 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5512 *e = (struct ctables_pcexpr) {
5515 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5520 static struct ctables_pcexpr *
5521 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5523 static const struct operator op = { T_EXP, CTPO_POW };
5525 const char *chain_warning =
5526 _("The exponentiation operator (`**') is left-associative: "
5527 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5528 "To disable this warning, insert parentheses.");
5530 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5531 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5532 ctables_pcexpr_parse_primary,
5535 /* Special case for situations like "-5**6", which must be parsed as
5538 int start_ofs = lex_ofs (lexer);
5539 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5540 *lhs = (struct ctables_pcexpr) {
5541 .op = CTPO_CONSTANT,
5542 .number = -lex_tokval (lexer),
5543 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5547 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5548 lexer, dict, &op, 1,
5549 ctables_pcexpr_parse_primary, chain_warning, lhs);
5553 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5556 /* Parses the unary minus level. */
5557 static struct ctables_pcexpr *
5558 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5560 int start_ofs = lex_ofs (lexer);
5561 if (!lex_match (lexer, T_DASH))
5562 return ctables_pcexpr_parse_exp (lexer, dict);
5564 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5568 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5571 /* Parses the multiplication and division level. */
5572 static struct ctables_pcexpr *
5573 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5575 static const struct operator ops[] =
5577 { T_ASTERISK, CTPO_MUL },
5578 { T_SLASH, CTPO_DIV },
5581 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5582 sizeof ops / sizeof *ops,
5583 ctables_pcexpr_parse_neg, NULL);
5586 /* Parses the addition and subtraction level. */
5587 static struct ctables_pcexpr *
5588 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5590 static const struct operator ops[] =
5592 { T_PLUS, CTPO_ADD },
5593 { T_DASH, CTPO_SUB },
5594 { T_NEG_NUM, CTPO_ADD },
5597 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5598 ops, sizeof ops / sizeof *ops,
5599 ctables_pcexpr_parse_mul, NULL);
5602 static struct ctables_postcompute *
5603 ctables_find_postcompute (struct ctables *ct, const char *name)
5605 struct ctables_postcompute *pc;
5606 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5607 utf8_hash_case_string (name, 0), &ct->postcomputes)
5608 if (!utf8_strcasecmp (pc->name, name))
5614 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5617 int pcompute_start = lex_ofs (lexer) - 1;
5619 if (!lex_match (lexer, T_AND))
5621 lex_error_expecting (lexer, "&");
5624 if (!lex_force_id (lexer))
5627 char *name = ss_xstrdup (lex_tokss (lexer));
5630 if (!lex_force_match (lexer, T_EQUALS)
5631 || !lex_force_match_id (lexer, "EXPR")
5632 || !lex_force_match (lexer, T_LPAREN))
5638 int expr_start = lex_ofs (lexer);
5639 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5640 int expr_end = lex_ofs (lexer) - 1;
5641 if (!expr || !lex_force_match (lexer, T_RPAREN))
5643 ctables_pcexpr_destroy (expr);
5647 int pcompute_end = lex_ofs (lexer) - 1;
5649 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5652 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5655 msg_at (SW, location, _("New definition of &%s will override the "
5656 "previous definition."),
5658 msg_at (SN, pc->location, _("This is the previous definition."));
5660 ctables_pcexpr_destroy (pc->expr);
5661 msg_location_destroy (pc->location);
5666 pc = xmalloc (sizeof *pc);
5667 *pc = (struct ctables_postcompute) { .name = name };
5668 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5669 utf8_hash_case_string (pc->name, 0));
5672 pc->location = location;
5674 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5679 ctables_parse_pproperties_format (struct lexer *lexer,
5680 struct ctables_summary_spec_set *sss)
5682 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5684 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5685 && !(lex_token (lexer) == T_ID
5686 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5687 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5688 lex_tokss (lexer)))))
5690 /* Parse function. */
5691 enum ctables_summary_function function;
5692 enum ctables_weighting weighting;
5693 enum ctables_area_type area;
5694 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5697 /* Parse percentile. */
5698 double percentile = 0;
5699 if (function == CTSF_PTILE)
5701 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5703 percentile = lex_number (lexer);
5708 struct fmt_spec format;
5709 bool is_ctables_format;
5710 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5713 if (sss->n >= sss->allocated)
5714 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5715 sizeof *sss->specs);
5716 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5717 .function = function,
5718 .weighting = weighting,
5721 .percentile = percentile,
5723 .is_ctables_format = is_ctables_format,
5729 ctables_summary_spec_set_uninit (sss);
5734 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5736 struct ctables_postcompute **pcs = NULL;
5738 size_t allocated_pcs = 0;
5740 while (lex_match (lexer, T_AND))
5742 if (!lex_force_id (lexer))
5744 struct ctables_postcompute *pc
5745 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5748 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5753 if (n_pcs >= allocated_pcs)
5754 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5758 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5760 if (lex_match_id (lexer, "LABEL"))
5762 lex_match (lexer, T_EQUALS);
5763 if (!lex_force_string (lexer))
5766 for (size_t i = 0; i < n_pcs; i++)
5768 free (pcs[i]->label);
5769 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5774 else if (lex_match_id (lexer, "FORMAT"))
5776 lex_match (lexer, T_EQUALS);
5778 struct ctables_summary_spec_set sss;
5779 if (!ctables_parse_pproperties_format (lexer, &sss))
5782 for (size_t i = 0; i < n_pcs; i++)
5785 ctables_summary_spec_set_uninit (pcs[i]->specs);
5787 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5788 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5790 ctables_summary_spec_set_uninit (&sss);
5792 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5794 lex_match (lexer, T_EQUALS);
5795 bool hide_source_cats;
5796 if (!parse_bool (lexer, &hide_source_cats))
5798 for (size_t i = 0; i < n_pcs; i++)
5799 pcs[i]->hide_source_cats = hide_source_cats;
5803 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5816 put_strftime (struct string *out, time_t now, const char *format)
5818 const struct tm *tm = localtime (&now);
5820 strftime (value, sizeof value, format, tm);
5821 ds_put_cstr (out, value);
5825 skip_prefix (struct substring *s, struct substring prefix)
5827 if (ss_starts_with (*s, prefix))
5829 ss_advance (s, prefix.length);
5837 put_table_expression (struct string *out, struct lexer *lexer,
5838 struct dictionary *dict, int expr_start, int expr_end)
5841 for (int ofs = expr_start; ofs < expr_end; ofs++)
5843 const struct token *t = lex_ofs_token (lexer, ofs);
5844 if (t->type == T_LBRACK)
5846 else if (t->type == T_RBRACK && nest > 0)
5852 else if (t->type == T_ID)
5854 const struct variable *var
5855 = dict_lookup_var (dict, t->string.string);
5856 const char *label = var ? var_get_label (var) : NULL;
5857 ds_put_cstr (out, label ? label : t->string.string);
5861 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5862 ds_put_byte (out, ' ');
5864 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5865 ds_put_cstr (out, repr);
5868 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5869 ds_put_byte (out, ' ');
5875 put_title_text (struct string *out, struct substring in, time_t now,
5876 struct lexer *lexer, struct dictionary *dict,
5877 int expr_start, int expr_end)
5881 size_t chunk = ss_find_byte (in, ')');
5882 ds_put_substring (out, ss_head (in, chunk));
5883 ss_advance (&in, chunk);
5884 if (ss_is_empty (in))
5887 if (skip_prefix (&in, ss_cstr (")DATE")))
5888 put_strftime (out, now, "%x");
5889 else if (skip_prefix (&in, ss_cstr (")TIME")))
5890 put_strftime (out, now, "%X");
5891 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5892 put_table_expression (out, lexer, dict, expr_start, expr_end);
5895 ds_put_byte (out, ')');
5896 ss_advance (&in, 1);
5902 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5904 struct casereader *input = NULL;
5906 struct measure_guesser *mg = measure_guesser_create (ds);
5909 input = proc_open (ds);
5910 measure_guesser_run (mg, input);
5911 measure_guesser_destroy (mg);
5914 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5915 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5916 enum settings_value_show tvars = settings_get_show_variables ();
5917 for (size_t i = 0; i < n_vars; i++)
5918 vlabels[i] = (enum ctables_vlabel) tvars;
5920 struct pivot_table_look *look = pivot_table_look_unshare (
5921 pivot_table_look_ref (pivot_table_look_get_default ()));
5922 look->omit_empty = false;
5924 struct ctables *ct = xmalloc (sizeof *ct);
5925 *ct = (struct ctables) {
5926 .dict = dataset_dict (ds),
5928 .ctables_formats = FMT_SETTINGS_INIT,
5930 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5933 time_t now = time (NULL);
5938 const char *dot_string;
5939 const char *comma_string;
5941 static const struct ctf ctfs[4] = {
5942 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5943 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5944 { CTEF_PAREN, "-,(,),", "-.(.)." },
5945 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5947 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5948 for (size_t i = 0; i < 4; i++)
5950 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5951 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5952 fmt_number_style_from_string (s));
5955 if (!lex_force_match (lexer, T_SLASH))
5958 while (!lex_match_id (lexer, "TABLE"))
5960 if (lex_match_id (lexer, "FORMAT"))
5962 double widths[2] = { SYSMIS, SYSMIS };
5963 double units_per_inch = 72.0;
5965 while (lex_token (lexer) != T_SLASH)
5967 if (lex_match_id (lexer, "MINCOLWIDTH"))
5969 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5972 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5974 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5977 else if (lex_match_id (lexer, "UNITS"))
5979 lex_match (lexer, T_EQUALS);
5980 if (lex_match_id (lexer, "POINTS"))
5981 units_per_inch = 72.0;
5982 else if (lex_match_id (lexer, "INCHES"))
5983 units_per_inch = 1.0;
5984 else if (lex_match_id (lexer, "CM"))
5985 units_per_inch = 2.54;
5988 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
5992 else if (lex_match_id (lexer, "EMPTY"))
5997 lex_match (lexer, T_EQUALS);
5998 if (lex_match_id (lexer, "ZERO"))
6000 /* Nothing to do. */
6002 else if (lex_match_id (lexer, "BLANK"))
6003 ct->zero = xstrdup ("");
6004 else if (lex_force_string (lexer))
6006 ct->zero = ss_xstrdup (lex_tokss (lexer));
6012 else if (lex_match_id (lexer, "MISSING"))
6014 lex_match (lexer, T_EQUALS);
6015 if (!lex_force_string (lexer))
6019 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6020 ? ss_xstrdup (lex_tokss (lexer))
6026 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6027 "UNITS", "EMPTY", "MISSING");
6032 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6033 && widths[0] > widths[1])
6035 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6039 for (size_t i = 0; i < 2; i++)
6040 if (widths[i] != SYSMIS)
6042 int *wr = ct->look->width_ranges[TABLE_HORZ];
6043 wr[i] = widths[i] / units_per_inch * 96.0;
6048 else if (lex_match_id (lexer, "VLABELS"))
6050 if (!lex_force_match_id (lexer, "VARIABLES"))
6052 lex_match (lexer, T_EQUALS);
6054 struct variable **vars;
6056 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6060 if (!lex_force_match_id (lexer, "DISPLAY"))
6065 lex_match (lexer, T_EQUALS);
6067 enum ctables_vlabel vlabel;
6068 if (lex_match_id (lexer, "DEFAULT"))
6069 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6070 else if (lex_match_id (lexer, "NAME"))
6072 else if (lex_match_id (lexer, "LABEL"))
6073 vlabel = CTVL_LABEL;
6074 else if (lex_match_id (lexer, "BOTH"))
6076 else if (lex_match_id (lexer, "NONE"))
6080 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6086 for (size_t i = 0; i < n_vars; i++)
6087 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6090 else if (lex_match_id (lexer, "MRSETS"))
6092 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6094 lex_match (lexer, T_EQUALS);
6095 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6098 else if (lex_match_id (lexer, "SMISSING"))
6100 if (lex_match_id (lexer, "VARIABLE"))
6101 ct->smissing_listwise = false;
6102 else if (lex_match_id (lexer, "LISTWISE"))
6103 ct->smissing_listwise = true;
6106 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6110 else if (lex_match_id (lexer, "PCOMPUTE"))
6112 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6115 else if (lex_match_id (lexer, "PPROPERTIES"))
6117 if (!ctables_parse_pproperties (lexer, ct))
6120 else if (lex_match_id (lexer, "WEIGHT"))
6122 if (!lex_force_match_id (lexer, "VARIABLE"))
6124 lex_match (lexer, T_EQUALS);
6125 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6129 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6131 if (lex_match_id (lexer, "COUNT"))
6133 lex_match (lexer, T_EQUALS);
6134 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6137 ct->hide_threshold = lex_integer (lexer);
6140 else if (ct->hide_threshold == 0)
6141 ct->hide_threshold = 5;
6145 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6146 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6147 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6151 if (!lex_force_match (lexer, T_SLASH))
6155 size_t allocated_tables = 0;
6158 if (ct->n_tables >= allocated_tables)
6159 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6160 sizeof *ct->tables);
6162 struct ctables_category *cat = xmalloc (sizeof *cat);
6163 *cat = (struct ctables_category) {
6165 .include_missing = false,
6166 .sort_ascending = true,
6169 struct ctables_categories *c = xmalloc (sizeof *c);
6170 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6171 *c = (struct ctables_categories) {
6178 struct ctables_categories **categories = xnmalloc (n_vars,
6179 sizeof *categories);
6180 for (size_t i = 0; i < n_vars; i++)
6183 struct ctables_table *t = xmalloc (sizeof *t);
6184 *t = (struct ctables_table) {
6186 .slabels_axis = PIVOT_AXIS_COLUMN,
6187 .slabels_visible = true,
6188 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6190 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6191 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6192 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6194 .clabels_from_axis = PIVOT_AXIS_LAYER,
6195 .clabels_to_axis = PIVOT_AXIS_LAYER,
6196 .categories = categories,
6197 .n_categories = n_vars,
6200 ct->tables[ct->n_tables++] = t;
6202 lex_match (lexer, T_EQUALS);
6203 int expr_start = lex_ofs (lexer);
6204 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6206 if (lex_match (lexer, T_BY))
6208 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6209 ct, t, PIVOT_AXIS_COLUMN))
6212 if (lex_match (lexer, T_BY))
6214 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6215 ct, t, PIVOT_AXIS_LAYER))
6219 int expr_end = lex_ofs (lexer);
6221 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6222 && !t->axes[PIVOT_AXIS_LAYER])
6224 lex_error (lexer, _("At least one variable must be specified."));
6228 const struct ctables_axis *scales[PIVOT_N_AXES];
6229 size_t n_scales = 0;
6230 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6232 scales[a] = find_scale (t->axes[a]);
6238 msg (SE, _("Scale variables may appear only on one axis."));
6239 if (scales[PIVOT_AXIS_ROW])
6240 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6241 _("This scale variable appears on the rows axis."));
6242 if (scales[PIVOT_AXIS_COLUMN])
6243 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6244 _("This scale variable appears on the columns axis."));
6245 if (scales[PIVOT_AXIS_LAYER])
6246 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6247 _("This scale variable appears on the layer axis."));
6251 const struct ctables_axis *summaries[PIVOT_N_AXES];
6252 size_t n_summaries = 0;
6253 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6255 summaries[a] = (scales[a]
6257 : find_categorical_summary_spec (t->axes[a]));
6261 if (n_summaries > 1)
6263 msg (SE, _("Summaries may appear only on one axis."));
6264 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6267 msg_at (SN, summaries[a]->loc,
6269 ? _("This variable on the rows axis has a summary.")
6270 : a == PIVOT_AXIS_COLUMN
6271 ? _("This variable on the columns axis has a summary.")
6272 : _("This variable on the layers axis has a summary."));
6274 msg_at (SN, summaries[a]->loc,
6275 _("This is a scale variable, so it always has a "
6276 "summary even if the syntax does not explicitly "
6281 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6282 if (n_summaries ? summaries[a] : t->axes[a])
6284 t->summary_axis = a;
6288 if (lex_token (lexer) == T_ENDCMD)
6290 if (!ctables_prepare_table (t))
6294 if (!lex_force_match (lexer, T_SLASH))
6297 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6299 if (lex_match_id (lexer, "SLABELS"))
6301 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6303 if (lex_match_id (lexer, "POSITION"))
6305 lex_match (lexer, T_EQUALS);
6306 if (lex_match_id (lexer, "COLUMN"))
6307 t->slabels_axis = PIVOT_AXIS_COLUMN;
6308 else if (lex_match_id (lexer, "ROW"))
6309 t->slabels_axis = PIVOT_AXIS_ROW;
6310 else if (lex_match_id (lexer, "LAYER"))
6311 t->slabels_axis = PIVOT_AXIS_LAYER;
6314 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6318 else if (lex_match_id (lexer, "VISIBLE"))
6320 lex_match (lexer, T_EQUALS);
6321 if (!parse_bool (lexer, &t->slabels_visible))
6326 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6331 else if (lex_match_id (lexer, "CLABELS"))
6333 if (lex_match_id (lexer, "AUTO"))
6335 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6336 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6338 else if (lex_match_id (lexer, "ROWLABELS"))
6340 lex_match (lexer, T_EQUALS);
6341 if (lex_match_id (lexer, "OPPOSITE"))
6342 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6343 else if (lex_match_id (lexer, "LAYER"))
6344 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6347 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6351 else if (lex_match_id (lexer, "COLLABELS"))
6353 lex_match (lexer, T_EQUALS);
6354 if (lex_match_id (lexer, "OPPOSITE"))
6355 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6356 else if (lex_match_id (lexer, "LAYER"))
6357 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6360 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6366 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6371 else if (lex_match_id (lexer, "CRITERIA"))
6373 if (!lex_force_match_id (lexer, "CILEVEL"))
6375 lex_match (lexer, T_EQUALS);
6377 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6379 t->cilevel = lex_number (lexer);
6382 else if (lex_match_id (lexer, "CATEGORIES"))
6384 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6388 else if (lex_match_id (lexer, "TITLES"))
6393 if (lex_match_id (lexer, "CAPTION"))
6394 textp = &t->caption;
6395 else if (lex_match_id (lexer, "CORNER"))
6397 else if (lex_match_id (lexer, "TITLE"))
6401 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6404 lex_match (lexer, T_EQUALS);
6406 struct string s = DS_EMPTY_INITIALIZER;
6407 while (lex_is_string (lexer))
6409 if (!ds_is_empty (&s))
6410 ds_put_byte (&s, ' ');
6411 put_title_text (&s, lex_tokss (lexer), now,
6412 lexer, dataset_dict (ds),
6413 expr_start, expr_end);
6417 *textp = ds_steal_cstr (&s);
6419 while (lex_token (lexer) != T_SLASH
6420 && lex_token (lexer) != T_ENDCMD);
6422 else if (lex_match_id (lexer, "SIGTEST"))
6424 int start_ofs = lex_ofs (lexer) - 1;
6427 t->chisq = xmalloc (sizeof *t->chisq);
6428 *t->chisq = (struct ctables_chisq) {
6430 .include_mrsets = true,
6431 .all_visible = true,
6437 if (lex_match_id (lexer, "TYPE"))
6439 lex_match (lexer, T_EQUALS);
6440 if (!lex_force_match_id (lexer, "CHISQUARE"))
6443 else if (lex_match_id (lexer, "ALPHA"))
6445 lex_match (lexer, T_EQUALS);
6446 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6448 t->chisq->alpha = lex_number (lexer);
6451 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6453 lex_match (lexer, T_EQUALS);
6454 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6457 else if (lex_match_id (lexer, "CATEGORIES"))
6459 lex_match (lexer, T_EQUALS);
6460 if (lex_match_id (lexer, "ALLVISIBLE"))
6461 t->chisq->all_visible = true;
6462 else if (lex_match_id (lexer, "SUBTOTALS"))
6463 t->chisq->all_visible = false;
6466 lex_error_expecting (lexer,
6467 "ALLVISIBLE", "SUBTOTALS");
6473 lex_error_expecting (lexer, "TYPE", "ALPHA",
6474 "INCLUDEMRSETS", "CATEGORIES");
6478 while (lex_token (lexer) != T_SLASH
6479 && lex_token (lexer) != T_ENDCMD);
6481 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6482 _("Support for SIGTEST not yet implemented."));
6485 else if (lex_match_id (lexer, "COMPARETEST"))
6487 int start_ofs = lex_ofs (lexer);
6490 t->pairwise = xmalloc (sizeof *t->pairwise);
6491 *t->pairwise = (struct ctables_pairwise) {
6493 .alpha = { .05, .05 },
6494 .adjust = BONFERRONI,
6495 .include_mrsets = true,
6496 .meansvariance_allcats = true,
6497 .all_visible = true,
6506 if (lex_match_id (lexer, "TYPE"))
6508 lex_match (lexer, T_EQUALS);
6509 if (lex_match_id (lexer, "PROP"))
6510 t->pairwise->type = PROP;
6511 else if (lex_match_id (lexer, "MEAN"))
6512 t->pairwise->type = MEAN;
6515 lex_error_expecting (lexer, "PROP", "MEAN");
6519 else if (lex_match_id (lexer, "ALPHA"))
6521 lex_match (lexer, T_EQUALS);
6523 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6525 double a0 = lex_number (lexer);
6528 lex_match (lexer, T_COMMA);
6529 if (lex_is_number (lexer))
6531 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6533 double a1 = lex_number (lexer);
6536 t->pairwise->alpha[0] = MIN (a0, a1);
6537 t->pairwise->alpha[1] = MAX (a0, a1);
6540 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6542 else if (lex_match_id (lexer, "ADJUST"))
6544 lex_match (lexer, T_EQUALS);
6545 if (lex_match_id (lexer, "BONFERRONI"))
6546 t->pairwise->adjust = BONFERRONI;
6547 else if (lex_match_id (lexer, "BH"))
6548 t->pairwise->adjust = BH;
6549 else if (lex_match_id (lexer, "NONE"))
6550 t->pairwise->adjust = 0;
6553 lex_error_expecting (lexer, "BONFERRONI", "BH",
6558 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6560 lex_match (lexer, T_EQUALS);
6561 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6564 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6566 lex_match (lexer, T_EQUALS);
6567 if (lex_match_id (lexer, "ALLCATS"))
6568 t->pairwise->meansvariance_allcats = true;
6569 else if (lex_match_id (lexer, "TESTEDCATS"))
6570 t->pairwise->meansvariance_allcats = false;
6573 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6577 else if (lex_match_id (lexer, "CATEGORIES"))
6579 lex_match (lexer, T_EQUALS);
6580 if (lex_match_id (lexer, "ALLVISIBLE"))
6581 t->pairwise->all_visible = true;
6582 else if (lex_match_id (lexer, "SUBTOTALS"))
6583 t->pairwise->all_visible = false;
6586 lex_error_expecting (lexer, "ALLVISIBLE",
6591 else if (lex_match_id (lexer, "MERGE"))
6593 lex_match (lexer, T_EQUALS);
6594 if (!parse_bool (lexer, &t->pairwise->merge))
6597 else if (lex_match_id (lexer, "STYLE"))
6599 lex_match (lexer, T_EQUALS);
6600 if (lex_match_id (lexer, "APA"))
6601 t->pairwise->apa_style = true;
6602 else if (lex_match_id (lexer, "SIMPLE"))
6603 t->pairwise->apa_style = false;
6606 lex_error_expecting (lexer, "APA", "SIMPLE");
6610 else if (lex_match_id (lexer, "SHOWSIG"))
6612 lex_match (lexer, T_EQUALS);
6613 if (!parse_bool (lexer, &t->pairwise->show_sig))
6618 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6619 "INCLUDEMRSETS", "MEANSVARIANCE",
6620 "CATEGORIES", "MERGE", "STYLE",
6625 while (lex_token (lexer) != T_SLASH
6626 && lex_token (lexer) != T_ENDCMD);
6628 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6629 _("Support for COMPARETEST not yet implemented."));
6634 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6635 "CRITERIA", "CATEGORIES", "TITLES",
6636 "SIGTEST", "COMPARETEST");
6640 if (!lex_match (lexer, T_SLASH))
6644 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6646 t->clabels_from_axis = PIVOT_AXIS_ROW;
6647 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6649 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6653 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6654 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6655 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6657 if (!ctables_prepare_table (t))
6660 while (lex_token (lexer) != T_ENDCMD);
6663 input = proc_open (ds);
6664 bool ok = ctables_execute (ds, input, ct);
6665 ok = proc_commit (ds) && ok;
6667 ctables_destroy (ct);
6668 return ok ? CMD_SUCCESS : CMD_FAILURE;
6673 ctables_destroy (ct);