1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_weighting
74 enum ctables_function_type
76 /* A function that operates on data in a single cell. It operates on
77 effective weights. It does not have an unweighted version. */
80 /* A function that operates on data in a single cell. The function
81 operates on effective weights and has a U-prefixed unweighted
85 /* A function that operates on data in a single cell. It operates on
86 dictionary weights, and has U-prefixed unweighted version and an
87 E-prefixed effective weight version. */
90 /* A function that operates on an area of cells. It operates on effective
91 weights and has a U-prefixed unweighted version. */
102 enum ctables_function_availability
104 CTFA_ALL, /* Any variables. */
105 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
106 //CTFA_MRSETS, /* Only multiple-response sets */
109 enum ctables_summary_function
111 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
112 #include "ctables.inc"
117 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
119 #include "ctables.inc"
123 struct ctables_function_info
125 struct substring basename;
126 enum ctables_function_type type;
127 enum ctables_format format;
128 enum ctables_function_availability availability;
130 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
131 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
132 bool is_area; /* Needs an area prefix. */
134 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
135 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
137 .basename = SS_LITERAL_INITIALIZER (NAME), \
140 .availability = AVAILABILITY, \
141 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
142 .e_prefix = (TYPE) == CTFT_UECELL, \
143 .is_area = (TYPE) == CTFT_AREA \
145 #include "ctables.inc"
149 enum ctables_area_type
151 /* Within a section, where stacked variables divide one section from
154 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
155 parse_ctables_summary_function() parses correctly. */
156 CTAT_TABLE, /* All layers of a whole section. */
157 CTAT_LAYERROW, /* Row in one layer within a section. */
158 CTAT_LAYERCOL, /* Column in one layer within a section. */
159 CTAT_LAYER, /* One layer within a section. */
161 /* Within a subtable, where a subtable pairs an innermost row variable with
162 an innermost column variable within a single layer. */
163 CTAT_SUBTABLE, /* Whole subtable. */
164 CTAT_ROW, /* Row within a subtable. */
165 CTAT_COL, /* Column within a subtable. */
169 static const char *ctables_area_type_name[N_CTATS] = {
170 [CTAT_TABLE] = "TABLE",
171 [CTAT_LAYER] = "LAYER",
172 [CTAT_LAYERROW] = "LAYERROW",
173 [CTAT_LAYERCOL] = "LAYERCOL",
174 [CTAT_SUBTABLE] = "SUBTABLE",
181 struct hmap_node node;
183 const struct ctables_cell *example;
186 double count[N_CTWS];
187 double valid[N_CTWS];
188 double total[N_CTWS];
189 struct ctables_sum *sums;
197 enum ctables_summary_variant
206 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
207 all the axes (except the scalar variable, if any). */
208 struct hmap_node node;
210 /* The areas that contain this cell. */
212 struct ctables_area *areas[N_CTATS];
217 enum ctables_summary_variant sv;
219 struct ctables_cell_axis
221 struct ctables_cell_value
223 const struct ctables_category *category;
231 union ctables_summary *summaries;
238 const struct dictionary *dict;
239 struct pivot_table_look *look;
241 /* CTABLES has a number of extra formats that we implement via custom
242 currency specifications on an alternate fmt_settings. */
243 #define CTEF_NEGPAREN FMT_CCA
244 #define CTEF_NEQUAL FMT_CCB
245 #define CTEF_PAREN FMT_CCC
246 #define CTEF_PCTPAREN FMT_CCD
247 struct fmt_settings ctables_formats;
249 /* If this is NULL, zeros are displayed using the normal print format.
250 Otherwise, this string is displayed. */
253 /* If this is NULL, missing values are displayed using the normal print
254 format. Otherwise, this string is displayed. */
257 /* Indexed by variable dictionary index. */
258 enum ctables_vlabel *vlabels;
260 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
262 bool mrsets_count_duplicates; /* MRSETS. */
263 bool smissing_listwise; /* SMISSING. */
264 struct variable *e_weight; /* WEIGHT. */
265 int hide_threshold; /* HIDESMALLCOUNTS. */
267 struct ctables_table **tables;
271 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
274 struct ctables_postcompute
276 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
277 char *name; /* Name, without leading &. */
279 struct msg_location *location; /* Location of definition. */
280 struct ctables_pcexpr *expr;
282 struct ctables_summary_spec_set *specs;
283 bool hide_source_cats;
286 struct ctables_pcexpr
296 enum ctables_postcompute_op
299 CTPO_CONSTANT, /* 5 */
300 CTPO_CAT_NUMBER, /* [5] */
301 CTPO_CAT_STRING, /* ["STRING"] */
302 CTPO_CAT_NRANGE, /* [LO THRU 5] */
303 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
304 CTPO_CAT_MISSING, /* MISSING */
305 CTPO_CAT_OTHERNM, /* OTHERNM */
306 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
307 CTPO_CAT_TOTAL, /* TOTAL */
321 /* CTPO_CAT_NUMBER. */
324 /* CTPO_CAT_STRING, in dictionary encoding. */
325 struct substring string;
327 /* CTPO_CAT_NRANGE. */
330 /* CTPO_CAT_SRANGE. */
331 struct substring srange[2];
333 /* CTPO_CAT_SUBTOTAL. */
334 size_t subtotal_index;
336 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
337 One element: CTPO_NEG. */
338 struct ctables_pcexpr *subs[2];
341 /* Source location. */
342 struct msg_location *location;
345 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
346 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
347 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
348 struct ctables_pcexpr *sub1);
350 struct ctables_summary_spec_set
352 struct ctables_summary_spec *specs;
356 /* The variable to which the summary specs are applied. */
357 struct variable *var;
359 /* Whether the variable to which the summary specs are applied is a scale
360 variable for the purpose of summarization.
362 (VALIDN and TOTALN act differently for summarizing scale and categorical
366 /* If any of these optional additional scale variables are missing, then
367 treat 'var' as if it's missing too. This is for implementing
368 SMISSING=LISTWISE. */
369 struct variable **listwise_vars;
370 size_t n_listwise_vars;
373 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
374 const struct ctables_summary_spec_set *);
375 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
377 /* A nested sequence of variables, e.g. a > b > c. */
380 struct variable **vars;
384 size_t *areas[N_CTATS];
385 size_t n_areas[N_CTATS];
388 struct ctables_summary_spec_set specs[N_CSVS];
391 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
394 struct ctables_nest *nests;
398 static void ctables_stack_uninit (struct ctables_stack *);
402 struct hmap_node node;
407 struct ctables_occurrence
409 struct hmap_node node;
413 struct ctables_section
416 struct ctables_table *table;
417 struct ctables_nest *nests[PIVOT_N_AXES];
420 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
421 struct hmap cells; /* Contains "struct ctables_cell"s. */
422 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
425 static void ctables_section_uninit (struct ctables_section *);
429 struct ctables *ctables;
430 struct ctables_axis *axes[PIVOT_N_AXES];
431 struct ctables_stack stacks[PIVOT_N_AXES];
432 struct ctables_section *sections;
434 enum pivot_axis_type summary_axis;
435 struct ctables_summary_spec_set summary_specs;
436 struct variable **sum_vars;
439 enum pivot_axis_type slabels_axis;
440 bool slabels_visible;
442 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
444 Most commonly, label_axis[a] == a, and in particular we always have
445 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
447 If ROWLABELS or COLLABELS is specified, then one of
448 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
449 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
451 If any category labels are moved, then 'clabels_example' is one of the
452 variables being moved (and it is otherwise NULL). All of the variables
453 being moved have the same width, value labels, and categories, so this
454 example variable can be used to find those out.
456 The remaining members in this group are relevant only if category labels
459 'clabels_values_map' holds a "struct ctables_value" for all the values
460 that appear in all of the variables in the moved categories. It is
461 accumulated as the data is read. Once the data is fully read, its
462 sorted values are put into 'clabels_values' and 'n_clabels_values'.
464 enum pivot_axis_type label_axis[PIVOT_N_AXES];
465 enum pivot_axis_type clabels_from_axis;
466 enum pivot_axis_type clabels_to_axis;
467 const struct variable *clabels_example;
468 struct hmap clabels_values_map;
469 struct ctables_value **clabels_values;
470 size_t n_clabels_values;
472 /* Indexed by variable dictionary index. */
473 struct ctables_categories **categories;
482 struct ctables_chisq *chisq;
483 struct ctables_pairwise *pairwise;
486 struct ctables_categories
489 struct ctables_category *cats;
494 struct ctables_category
496 enum ctables_category_type
498 /* Explicit category lists. */
501 CCT_NRANGE, /* Numerical range. */
502 CCT_SRANGE, /* String range. */
507 /* Totals and subtotals. */
511 /* Implicit category lists. */
516 /* For contributing to TOTALN. */
517 CCT_EXCLUDED_MISSING,
521 struct ctables_category *subtotal;
527 double number; /* CCT_NUMBER. */
528 struct substring string; /* CCT_STRING, in dictionary encoding. */
529 double nrange[2]; /* CCT_NRANGE. */
530 struct substring srange[2]; /* CCT_SRANGE. */
534 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
535 bool hide_subcategories; /* CCT_SUBTOTAL. */
538 /* CCT_POSTCOMPUTE. */
541 const struct ctables_postcompute *pc;
542 enum fmt_type parse_format;
545 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
548 bool include_missing;
552 enum ctables_summary_function sort_function;
553 enum ctables_weighting weighting;
554 enum ctables_area_type area;
555 struct variable *sort_var;
560 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
561 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
562 struct msg_location *location;
566 ctables_category_uninit (struct ctables_category *cat)
571 msg_location_destroy (cat->location);
578 case CCT_POSTCOMPUTE:
582 ss_dealloc (&cat->string);
586 ss_dealloc (&cat->srange[0]);
587 ss_dealloc (&cat->srange[1]);
592 free (cat->total_label);
600 case CCT_EXCLUDED_MISSING:
606 nullable_substring_equal (const struct substring *a,
607 const struct substring *b)
609 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
613 ctables_category_equal (const struct ctables_category *a,
614 const struct ctables_category *b)
616 if (a->type != b->type)
622 return a->number == b->number;
625 return ss_equals (a->string, b->string);
628 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
631 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
632 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
638 case CCT_POSTCOMPUTE:
639 return a->pc == b->pc;
643 return !strcmp (a->total_label, b->total_label);
648 return (a->include_missing == b->include_missing
649 && a->sort_ascending == b->sort_ascending
650 && a->sort_function == b->sort_function
651 && a->sort_var == b->sort_var
652 && a->percentile == b->percentile);
654 case CCT_EXCLUDED_MISSING:
662 ctables_categories_unref (struct ctables_categories *c)
667 assert (c->n_refs > 0);
671 for (size_t i = 0; i < c->n_cats; i++)
672 ctables_category_uninit (&c->cats[i]);
678 ctables_categories_equal (const struct ctables_categories *a,
679 const struct ctables_categories *b)
681 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
684 for (size_t i = 0; i < a->n_cats; i++)
685 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
691 /* Chi-square test (SIGTEST). */
699 /* Pairwise comparison test (COMPARETEST). */
700 struct ctables_pairwise
702 enum { PROP, MEAN } type;
705 bool meansvariance_allcats;
707 enum { BONFERRONI = 1, BH } adjust;
731 struct variable *var;
733 struct ctables_summary_spec_set specs[N_CSVS];
737 struct ctables_axis *subs[2];
740 struct msg_location *loc;
743 static void ctables_axis_destroy (struct ctables_axis *);
745 struct ctables_summary_spec
747 /* The calculation to be performed.
749 'function' is the function to calculate. 'weighted' specifies whether
750 to use weighted or unweighted data (for functions that do not support a
751 choice, it must be true). 'calc_area' is the area over which the
752 calculation takes place (for functions that target only an individual
753 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
754 percentile between 0 and 100 (for other functions it must be 0). */
755 enum ctables_summary_function function;
756 enum ctables_weighting weighting;
757 enum ctables_area_type calc_area;
758 double percentile; /* CTSF_PTILE only. */
760 /* How to display the result of the calculation.
762 'label' is a user-specified label, NULL if the user didn't specify
765 'user_area' is usually the same as 'calc_area', but when category labels
766 are rotated from one axis to another it swaps rows and columns.
768 'format' is the format for displaying the output. If
769 'is_ctables_format' is true, then 'format.type' is one of the special
770 CTEF_* formats instead of the standard ones. */
772 enum ctables_area_type user_area;
773 struct fmt_spec format;
774 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
781 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
782 const struct ctables_summary_spec *src)
785 dst->label = xstrdup_if_nonnull (src->label);
789 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
796 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
797 const struct ctables_summary_spec_set *src)
799 struct ctables_summary_spec *specs
800 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
801 for (size_t i = 0; i < src->n; i++)
802 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
804 *dst = (struct ctables_summary_spec_set) {
809 .is_scale = src->is_scale,
814 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
816 for (size_t i = 0; i < set->n; i++)
817 ctables_summary_spec_uninit (&set->specs[i]);
818 free (set->listwise_vars);
823 parse_col_width (struct lexer *lexer, const char *name, double *width)
825 lex_match (lexer, T_EQUALS);
826 if (lex_match_id (lexer, "DEFAULT"))
828 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
830 *width = lex_number (lexer);
840 parse_bool (struct lexer *lexer, bool *b)
842 if (lex_match_id (lexer, "NO"))
844 else if (lex_match_id (lexer, "YES"))
848 lex_error_expecting (lexer, "YES", "NO");
854 static enum ctables_function_availability
855 ctables_function_availability (enum ctables_summary_function f)
857 static enum ctables_function_availability availability[] = {
858 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
859 #include "ctables.inc"
863 return availability[f];
867 parse_ctables_summary_function (struct lexer *lexer,
868 enum ctables_summary_function *function,
869 enum ctables_weighting *weighting,
870 enum ctables_area_type *area)
872 if (!lex_force_id (lexer))
875 struct substring name = lex_tokss (lexer);
876 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
877 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
879 bool has_area = false;
881 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
882 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
887 if (ss_equals_case (name, ss_cstr ("PCT")))
889 /* Special case where .COUNT suffix is omitted. */
890 *function = CTSF_areaPCT_COUNT;
891 *weighting = CTW_EFFECTIVE;
898 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
900 const struct ctables_function_info *cfi = &ctables_function_info[f];
901 if (ss_equals_case (cfi->basename, name))
904 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
907 *weighting = (e ? CTW_EFFECTIVE
909 : cfi->e_prefix ? CTW_DICTIONARY
916 lex_error (lexer, _("Expecting summary function name."));
921 ctables_axis_destroy (struct ctables_axis *axis)
929 for (size_t i = 0; i < N_CSVS; i++)
930 ctables_summary_spec_set_uninit (&axis->specs[i]);
935 ctables_axis_destroy (axis->subs[0]);
936 ctables_axis_destroy (axis->subs[1]);
939 msg_location_destroy (axis->loc);
943 static struct ctables_axis *
944 ctables_axis_new_nonterminal (enum ctables_axis_op op,
945 struct ctables_axis *sub0,
946 struct ctables_axis *sub1,
947 struct lexer *lexer, int start_ofs)
949 struct ctables_axis *axis = xmalloc (sizeof *axis);
950 *axis = (struct ctables_axis) {
952 .subs = { sub0, sub1 },
953 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
958 struct ctables_axis_parse_ctx
961 struct dictionary *dict;
963 struct ctables_table *t;
966 static struct fmt_spec
967 ctables_summary_default_format (enum ctables_summary_function function,
968 const struct variable *var)
970 static const enum ctables_format default_formats[] = {
971 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
972 #include "ctables.inc"
975 switch (default_formats[function])
978 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
981 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
984 return *var_get_print_format (var);
992 ctables_summary_label__ (const struct ctables_summary_spec *spec)
994 bool w = spec->weighting != CTW_UNWEIGHTED;
995 bool d = spec->weighting == CTW_DICTIONARY;
996 enum ctables_area_type a = spec->user_area;
997 switch (spec->function)
1000 return (d ? N_("Count")
1001 : w ? N_("Adjusted Count")
1002 : N_("Unweighted Count"));
1004 case CTSF_areaPCT_COUNT:
1007 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1008 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1009 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1010 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1011 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1012 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1013 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1017 case CTSF_areaPCT_VALIDN:
1020 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1021 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1022 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1023 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1024 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1025 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1026 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1030 case CTSF_areaPCT_TOTALN:
1033 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1034 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1035 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1036 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1037 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1038 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1039 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1043 case CTSF_MAXIMUM: return N_("Maximum");
1044 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1045 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
1046 case CTSF_MINIMUM: return N_("Minimum");
1047 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
1048 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
1049 case CTSF_PTILE: NOT_REACHED ();
1050 case CTSF_RANGE: return N_("Range");
1051 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
1052 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
1053 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
1054 case CTSF_TOTALN: return (d ? N_("Total N")
1055 : w ? N_("Adjusted Total N")
1056 : N_("Unweighted Total N"));
1057 case CTSF_VALIDN: return (d ? N_("Valid N")
1058 : w ? N_("Adjusted Valid N")
1059 : N_("Unweighted Valid N"));
1060 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
1061 case CTSF_areaPCT_SUM:
1064 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1065 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1066 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1067 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1068 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1069 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1070 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1077 /* Don't bother translating these: they are for developers only. */
1078 case CTAT_TABLE: return "Table ID";
1079 case CTAT_LAYER: return "Layer ID";
1080 case CTAT_LAYERROW: return "Layer Row ID";
1081 case CTAT_LAYERCOL: return "Layer Column ID";
1082 case CTAT_SUBTABLE: return "Subtable ID";
1083 case CTAT_ROW: return "Row ID";
1084 case CTAT_COL: return "Column ID";
1092 static struct pivot_value *
1093 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1097 if (spec->function == CTSF_PTILE)
1099 double p = spec->percentile;
1100 char *s = (spec->weighting != CTW_UNWEIGHTED
1101 ? xasprintf (_("Percentile %.2f"), p)
1102 : xasprintf (_("Unweighted Percentile %.2f"), p));
1103 return pivot_value_new_user_text_nocopy (s);
1106 return pivot_value_new_text (ctables_summary_label__ (spec));
1110 struct substring in = ss_cstr (spec->label);
1111 struct substring target = ss_cstr (")CILEVEL");
1113 struct string out = DS_EMPTY_INITIALIZER;
1116 size_t chunk = ss_find_substring (in, target);
1117 ds_put_substring (&out, ss_head (in, chunk));
1118 ss_advance (&in, chunk);
1120 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1122 ss_advance (&in, target.length);
1123 ds_put_format (&out, "%g", cilevel);
1129 ctables_summary_function_name (enum ctables_summary_function function,
1130 enum ctables_weighting weighting,
1131 enum ctables_area_type area,
1132 char *buffer, size_t bufsize)
1134 const struct ctables_function_info *cfi = &ctables_function_info[function];
1135 snprintf (buffer, bufsize, "%s%s%s",
1136 (weighting == CTW_UNWEIGHTED ? "U"
1137 : weighting == CTW_DICTIONARY ? ""
1138 : cfi->e_prefix ? "E"
1140 cfi->is_area ? ctables_area_type_name[area] : "",
1141 cfi->basename.string);
1146 add_summary_spec (struct ctables_axis *axis,
1147 enum ctables_summary_function function,
1148 enum ctables_weighting weighting,
1149 enum ctables_area_type area, double percentile,
1150 const char *label, const struct fmt_spec *format,
1151 bool is_ctables_format, const struct msg_location *loc,
1152 enum ctables_summary_variant sv)
1154 if (axis->op == CTAO_VAR)
1156 char function_name[128];
1157 ctables_summary_function_name (function, weighting, area,
1158 function_name, sizeof function_name);
1159 const char *var_name = var_get_name (axis->var);
1160 switch (ctables_function_availability (function))
1164 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1165 "response sets."), function_name);
1166 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1172 if (!axis->scale && sv != CSV_TOTAL)
1175 _("Summary function %s applies only to scale variables."),
1177 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1187 struct ctables_summary_spec_set *set = &axis->specs[sv];
1188 if (set->n >= set->allocated)
1189 set->specs = x2nrealloc (set->specs, &set->allocated,
1190 sizeof *set->specs);
1192 struct ctables_summary_spec *dst = &set->specs[set->n++];
1193 *dst = (struct ctables_summary_spec) {
1194 .function = function,
1195 .weighting = weighting,
1198 .percentile = percentile,
1199 .label = xstrdup_if_nonnull (label),
1200 .format = (format ? *format
1201 : ctables_summary_default_format (function, axis->var)),
1202 .is_ctables_format = is_ctables_format,
1208 for (size_t i = 0; i < 2; i++)
1209 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1210 percentile, label, format, is_ctables_format,
1217 static struct ctables_axis *ctables_axis_parse_stack (
1218 struct ctables_axis_parse_ctx *);
1221 static struct ctables_axis *
1222 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1224 if (lex_match (ctx->lexer, T_LPAREN))
1226 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1227 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1229 ctables_axis_destroy (sub);
1235 if (!lex_force_id (ctx->lexer))
1238 int start_ofs = lex_ofs (ctx->lexer);
1239 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1243 struct ctables_axis *axis = xmalloc (sizeof *axis);
1244 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1246 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1247 : lex_match_phrase (ctx->lexer, "[C]") ? false
1248 : var_get_measure (var) == MEASURE_SCALE);
1249 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1250 lex_ofs (ctx->lexer) - 1);
1251 if (axis->scale && var_is_alpha (var))
1253 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1255 var_get_name (var));
1256 ctables_axis_destroy (axis);
1264 has_digit (const char *s)
1266 return s[strcspn (s, "0123456789")] != '\0';
1270 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1271 bool *is_ctables_format)
1273 char type[FMT_TYPE_LEN_MAX + 1];
1274 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1277 if (!strcasecmp (type, "NEGPAREN"))
1278 format->type = CTEF_NEGPAREN;
1279 else if (!strcasecmp (type, "NEQUAL"))
1280 format->type = CTEF_NEQUAL;
1281 else if (!strcasecmp (type, "PAREN"))
1282 format->type = CTEF_PAREN;
1283 else if (!strcasecmp (type, "PCTPAREN"))
1284 format->type = CTEF_PCTPAREN;
1287 *is_ctables_format = false;
1288 return (parse_format_specifier (lexer, format)
1289 && fmt_check_output (format)
1290 && fmt_check_type_compat (format, VAL_NUMERIC));
1296 lex_next_error (lexer, -1, -1,
1297 _("Output format %s requires width 2 or greater."), type);
1300 else if (format->d > format->w - 1)
1302 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1303 "greater than decimals."), type);
1308 *is_ctables_format = true;
1313 static struct ctables_axis *
1314 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1316 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1317 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1320 enum ctables_summary_variant sv = CSV_CELL;
1323 int start_ofs = lex_ofs (ctx->lexer);
1325 /* Parse function. */
1326 enum ctables_summary_function function;
1327 enum ctables_weighting weighting;
1328 enum ctables_area_type area;
1329 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1333 /* Parse percentile. */
1334 double percentile = 0;
1335 if (function == CTSF_PTILE)
1337 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1339 percentile = lex_number (ctx->lexer);
1340 lex_get (ctx->lexer);
1345 if (lex_is_string (ctx->lexer))
1347 label = ss_xstrdup (lex_tokss (ctx->lexer));
1348 lex_get (ctx->lexer);
1352 struct fmt_spec format;
1353 const struct fmt_spec *formatp;
1354 bool is_ctables_format = false;
1355 if (lex_token (ctx->lexer) == T_ID
1356 && has_digit (lex_tokcstr (ctx->lexer)))
1358 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1359 &is_ctables_format))
1369 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1370 lex_ofs (ctx->lexer) - 1);
1371 add_summary_spec (sub, function, weighting, area, percentile, label,
1372 formatp, is_ctables_format, loc, sv);
1374 msg_location_destroy (loc);
1376 lex_match (ctx->lexer, T_COMMA);
1377 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1379 if (!lex_force_match (ctx->lexer, T_LBRACK))
1383 else if (lex_match (ctx->lexer, T_RBRACK))
1385 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1392 ctables_axis_destroy (sub);
1396 static const struct ctables_axis *
1397 find_scale (const struct ctables_axis *axis)
1401 else if (axis->op == CTAO_VAR)
1402 return axis->scale ? axis : NULL;
1405 for (size_t i = 0; i < 2; i++)
1407 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1415 static const struct ctables_axis *
1416 find_categorical_summary_spec (const struct ctables_axis *axis)
1420 else if (axis->op == CTAO_VAR)
1421 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1424 for (size_t i = 0; i < 2; i++)
1426 const struct ctables_axis *sum
1427 = find_categorical_summary_spec (axis->subs[i]);
1435 static struct ctables_axis *
1436 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1438 int start_ofs = lex_ofs (ctx->lexer);
1439 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1443 while (lex_match (ctx->lexer, T_GT))
1445 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1448 ctables_axis_destroy (lhs);
1452 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1453 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1455 const struct ctables_axis *outer_scale = find_scale (lhs);
1456 const struct ctables_axis *inner_scale = find_scale (rhs);
1457 if (outer_scale && inner_scale)
1459 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1460 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1461 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1462 ctables_axis_destroy (nest);
1466 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1469 msg_at (SE, nest->loc,
1470 _("Summaries may only be requested for categorical variables "
1471 "at the innermost nesting level."));
1472 msg_at (SN, outer_sum->loc,
1473 _("This outer categorical variable has a summary."));
1474 ctables_axis_destroy (nest);
1484 static struct ctables_axis *
1485 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1487 int start_ofs = lex_ofs (ctx->lexer);
1488 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1492 while (lex_match (ctx->lexer, T_PLUS))
1494 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1497 ctables_axis_destroy (lhs);
1501 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1502 ctx->lexer, start_ofs);
1509 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1510 struct ctables *ct, struct ctables_table *t,
1511 enum pivot_axis_type a)
1513 if (lex_token (lexer) == T_BY
1514 || lex_token (lexer) == T_SLASH
1515 || lex_token (lexer) == T_ENDCMD)
1518 struct ctables_axis_parse_ctx ctx = {
1524 t->axes[a] = ctables_axis_parse_stack (&ctx);
1525 return t->axes[a] != NULL;
1529 ctables_chisq_destroy (struct ctables_chisq *chisq)
1535 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1541 ctables_table_destroy (struct ctables_table *t)
1546 for (size_t i = 0; i < t->n_sections; i++)
1547 ctables_section_uninit (&t->sections[i]);
1550 for (size_t i = 0; i < t->n_categories; i++)
1551 ctables_categories_unref (t->categories[i]);
1552 free (t->categories);
1554 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1556 ctables_axis_destroy (t->axes[a]);
1557 ctables_stack_uninit (&t->stacks[a]);
1559 free (t->summary_specs.specs);
1561 struct ctables_value *ctv, *next_ctv;
1562 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1563 &t->clabels_values_map)
1565 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1566 hmap_delete (&t->clabels_values_map, &ctv->node);
1569 hmap_destroy (&t->clabels_values_map);
1570 free (t->clabels_values);
1576 ctables_chisq_destroy (t->chisq);
1577 ctables_pairwise_destroy (t->pairwise);
1582 ctables_destroy (struct ctables *ct)
1587 struct ctables_postcompute *pc, *next_pc;
1588 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1592 msg_location_destroy (pc->location);
1593 ctables_pcexpr_destroy (pc->expr);
1597 ctables_summary_spec_set_uninit (pc->specs);
1600 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1603 hmap_destroy (&ct->postcomputes);
1605 fmt_settings_uninit (&ct->ctables_formats);
1606 pivot_table_look_unref (ct->look);
1610 for (size_t i = 0; i < ct->n_tables; i++)
1611 ctables_table_destroy (ct->tables[i]);
1616 static struct ctables_category
1617 cct_nrange (double low, double high)
1619 return (struct ctables_category) {
1621 .nrange = { low, high }
1625 static struct ctables_category
1626 cct_srange (struct substring low, struct substring high)
1628 return (struct ctables_category) {
1630 .srange = { low, high }
1635 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1636 struct ctables_category *cat)
1639 if (lex_match (lexer, T_EQUALS))
1641 if (!lex_force_string (lexer))
1644 total_label = ss_xstrdup (lex_tokss (lexer));
1648 total_label = xstrdup (_("Subtotal"));
1650 *cat = (struct ctables_category) {
1651 .type = CCT_SUBTOTAL,
1652 .hide_subcategories = hide_subcategories,
1653 .total_label = total_label
1658 static struct substring
1659 parse_substring (struct lexer *lexer, struct dictionary *dict)
1661 struct substring s = recode_substring_pool (
1662 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1663 ss_rtrim (&s, ss_cstr (" "));
1669 ctables_table_parse_explicit_category (struct lexer *lexer,
1670 struct dictionary *dict,
1672 struct ctables_category *cat)
1674 if (lex_match_id (lexer, "OTHERNM"))
1675 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1676 else if (lex_match_id (lexer, "MISSING"))
1677 *cat = (struct ctables_category) { .type = CCT_MISSING };
1678 else if (lex_match_id (lexer, "SUBTOTAL"))
1679 return ctables_table_parse_subtotal (lexer, false, cat);
1680 else if (lex_match_id (lexer, "HSUBTOTAL"))
1681 return ctables_table_parse_subtotal (lexer, true, cat);
1682 else if (lex_match_id (lexer, "LO"))
1684 if (!lex_force_match_id (lexer, "THRU"))
1686 if (lex_is_string (lexer))
1688 struct substring sr0 = { .string = NULL };
1689 struct substring sr1 = parse_substring (lexer, dict);
1690 *cat = cct_srange (sr0, sr1);
1692 else if (lex_force_num (lexer))
1694 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1700 else if (lex_is_number (lexer))
1702 double number = lex_number (lexer);
1704 if (lex_match_id (lexer, "THRU"))
1706 if (lex_match_id (lexer, "HI"))
1707 *cat = cct_nrange (number, DBL_MAX);
1710 if (!lex_force_num (lexer))
1712 *cat = cct_nrange (number, lex_number (lexer));
1717 *cat = (struct ctables_category) {
1722 else if (lex_is_string (lexer))
1724 struct substring s = parse_substring (lexer, dict);
1725 if (lex_match_id (lexer, "THRU"))
1727 if (lex_match_id (lexer, "HI"))
1729 struct substring sr1 = { .string = NULL };
1730 *cat = cct_srange (s, sr1);
1734 if (!lex_force_string (lexer))
1739 struct substring sr1 = parse_substring (lexer, dict);
1740 *cat = cct_srange (s, sr1);
1744 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1746 else if (lex_match (lexer, T_AND))
1748 if (!lex_force_id (lexer))
1750 struct ctables_postcompute *pc = ctables_find_postcompute (
1751 ct, lex_tokcstr (lexer));
1754 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1755 msg_at (SE, loc, _("Unknown postcompute &%s."),
1756 lex_tokcstr (lexer));
1757 msg_location_destroy (loc);
1762 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1766 lex_error (lexer, NULL);
1774 parse_category_string (struct msg_location *location,
1775 struct substring s, const struct dictionary *dict,
1776 enum fmt_type format, double *n)
1779 char *error = data_in (s, dict_get_encoding (dict), format,
1780 settings_get_fmt_settings (), &v, 0, NULL);
1783 msg_at (SE, location,
1784 _("Failed to parse category specification as format %s: %s."),
1785 fmt_name (format), error);
1794 static struct ctables_category *
1795 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1796 const struct ctables_pcexpr *e)
1798 struct ctables_category *best = NULL;
1799 size_t n_subtotals = 0;
1800 for (size_t i = 0; i < cats->n_cats; i++)
1802 struct ctables_category *cat = &cats->cats[i];
1805 case CTPO_CAT_NUMBER:
1806 if (cat->type == CCT_NUMBER && cat->number == e->number)
1810 case CTPO_CAT_STRING:
1811 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1815 case CTPO_CAT_NRANGE:
1816 if (cat->type == CCT_NRANGE
1817 && cat->nrange[0] == e->nrange[0]
1818 && cat->nrange[1] == e->nrange[1])
1822 case CTPO_CAT_SRANGE:
1823 if (cat->type == CCT_SRANGE
1824 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1825 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1829 case CTPO_CAT_MISSING:
1830 if (cat->type == CCT_MISSING)
1834 case CTPO_CAT_OTHERNM:
1835 if (cat->type == CCT_OTHERNM)
1839 case CTPO_CAT_SUBTOTAL:
1840 if (cat->type == CCT_SUBTOTAL)
1843 if (e->subtotal_index == n_subtotals)
1845 else if (e->subtotal_index == 0)
1850 case CTPO_CAT_TOTAL:
1851 if (cat->type == CCT_TOTAL)
1865 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1870 static struct ctables_category *
1871 ctables_find_category_for_postcompute (const struct dictionary *dict,
1872 const struct ctables_categories *cats,
1873 enum fmt_type parse_format,
1874 const struct ctables_pcexpr *e)
1876 if (parse_format != FMT_F)
1878 if (e->op == CTPO_CAT_STRING)
1881 if (!parse_category_string (e->location, e->string, dict,
1882 parse_format, &number))
1885 struct ctables_pcexpr e2 = {
1886 .op = CTPO_CAT_NUMBER,
1888 .location = e->location,
1890 return ctables_find_category_for_postcompute__ (cats, &e2);
1892 else if (e->op == CTPO_CAT_SRANGE)
1895 if (!e->srange[0].string)
1896 nrange[0] = -DBL_MAX;
1897 else if (!parse_category_string (e->location, e->srange[0], dict,
1898 parse_format, &nrange[0]))
1901 if (!e->srange[1].string)
1902 nrange[1] = DBL_MAX;
1903 else if (!parse_category_string (e->location, e->srange[1], dict,
1904 parse_format, &nrange[1]))
1907 struct ctables_pcexpr e2 = {
1908 .op = CTPO_CAT_NRANGE,
1909 .nrange = { nrange[0], nrange[1] },
1910 .location = e->location,
1912 return ctables_find_category_for_postcompute__ (cats, &e2);
1915 return ctables_find_category_for_postcompute__ (cats, e);
1919 ctables_recursive_check_postcompute (struct dictionary *dict,
1920 const struct ctables_pcexpr *e,
1921 struct ctables_category *pc_cat,
1922 const struct ctables_categories *cats,
1923 const struct msg_location *cats_location)
1927 case CTPO_CAT_NUMBER:
1928 case CTPO_CAT_STRING:
1929 case CTPO_CAT_NRANGE:
1930 case CTPO_CAT_SRANGE:
1931 case CTPO_CAT_MISSING:
1932 case CTPO_CAT_OTHERNM:
1933 case CTPO_CAT_SUBTOTAL:
1934 case CTPO_CAT_TOTAL:
1936 struct ctables_category *cat = ctables_find_category_for_postcompute (
1937 dict, cats, pc_cat->parse_format, e);
1940 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1942 size_t n_subtotals = 0;
1943 for (size_t i = 0; i < cats->n_cats; i++)
1944 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1945 if (n_subtotals > 1)
1947 msg_at (SE, cats_location,
1948 ngettext ("These categories include %zu instance "
1949 "of SUBTOTAL or HSUBTOTAL, so references "
1950 "from computed categories must refer to "
1951 "subtotals by position, "
1952 "e.g. SUBTOTAL[1].",
1953 "These categories include %zu instances "
1954 "of SUBTOTAL or HSUBTOTAL, so references "
1955 "from computed categories must refer to "
1956 "subtotals by position, "
1957 "e.g. SUBTOTAL[1].",
1960 msg_at (SN, e->location,
1961 _("This is the reference that lacks a position."));
1966 msg_at (SE, pc_cat->location,
1967 _("Computed category &%s references a category not included "
1968 "in the category list."),
1970 msg_at (SN, e->location, _("This is the missing category."));
1971 if (e->op == CTPO_CAT_SUBTOTAL)
1972 msg_at (SN, cats_location,
1973 _("To fix the problem, add subtotals to the "
1974 "list of categories here."));
1975 else if (e->op == CTPO_CAT_TOTAL)
1976 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1977 "CATEGORIES specification."));
1979 msg_at (SN, cats_location,
1980 _("To fix the problem, add the missing category to the "
1981 "list of categories here."));
1984 if (pc_cat->pc->hide_source_cats)
1998 for (size_t i = 0; i < 2; i++)
1999 if (e->subs[i] && !ctables_recursive_check_postcompute (
2000 dict, e->subs[i], pc_cat, cats, cats_location))
2009 all_strings (struct variable **vars, size_t n_vars,
2010 const struct ctables_category *cat)
2012 for (size_t j = 0; j < n_vars; j++)
2013 if (var_is_numeric (vars[j]))
2015 msg_at (SE, cat->location,
2016 _("This category specification may be applied only to string "
2017 "variables, but this subcommand tries to apply it to "
2018 "numeric variable %s."),
2019 var_get_name (vars[j]));
2026 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2027 struct ctables *ct, struct ctables_table *t)
2029 if (!lex_match_id (lexer, "VARIABLES"))
2031 lex_match (lexer, T_EQUALS);
2033 struct variable **vars;
2035 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2038 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2039 for (size_t i = 1; i < n_vars; i++)
2041 const struct fmt_spec *f = var_get_print_format (vars[i]);
2042 if (f->type != common_format->type)
2044 common_format = NULL;
2050 && (fmt_get_category (common_format->type)
2051 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2053 struct ctables_categories *c = xmalloc (sizeof *c);
2054 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2055 for (size_t i = 0; i < n_vars; i++)
2057 struct ctables_categories **cp
2058 = &t->categories[var_get_dict_index (vars[i])];
2059 ctables_categories_unref (*cp);
2063 size_t allocated_cats = 0;
2064 int cats_start_ofs = -1;
2065 int cats_end_ofs = -1;
2066 if (lex_match (lexer, T_LBRACK))
2068 cats_start_ofs = lex_ofs (lexer);
2071 if (c->n_cats >= allocated_cats)
2072 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2074 int start_ofs = lex_ofs (lexer);
2075 struct ctables_category *cat = &c->cats[c->n_cats];
2076 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2078 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2081 lex_match (lexer, T_COMMA);
2083 while (!lex_match (lexer, T_RBRACK));
2084 cats_end_ofs = lex_ofs (lexer) - 1;
2087 struct ctables_category cat = {
2089 .include_missing = false,
2090 .sort_ascending = true,
2092 bool show_totals = false;
2093 char *total_label = NULL;
2094 bool totals_before = false;
2095 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2097 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2099 lex_match (lexer, T_EQUALS);
2100 if (lex_match_id (lexer, "A"))
2101 cat.sort_ascending = true;
2102 else if (lex_match_id (lexer, "D"))
2103 cat.sort_ascending = false;
2106 lex_error_expecting (lexer, "A", "D");
2110 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2112 lex_match (lexer, T_EQUALS);
2113 if (lex_match_id (lexer, "VALUE"))
2114 cat.type = CCT_VALUE;
2115 else if (lex_match_id (lexer, "LABEL"))
2116 cat.type = CCT_LABEL;
2119 cat.type = CCT_FUNCTION;
2120 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2121 &cat.weighting, &cat.area))
2124 if (lex_match (lexer, T_LPAREN))
2126 cat.sort_var = parse_variable (lexer, dict);
2130 if (cat.sort_function == CTSF_PTILE)
2132 lex_match (lexer, T_COMMA);
2133 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2135 cat.percentile = lex_number (lexer);
2139 if (!lex_force_match (lexer, T_RPAREN))
2142 else if (ctables_function_availability (cat.sort_function)
2145 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2150 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2152 lex_match (lexer, T_EQUALS);
2153 if (lex_match_id (lexer, "INCLUDE"))
2154 cat.include_missing = true;
2155 else if (lex_match_id (lexer, "EXCLUDE"))
2156 cat.include_missing = false;
2159 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2163 else if (lex_match_id (lexer, "TOTAL"))
2165 lex_match (lexer, T_EQUALS);
2166 if (!parse_bool (lexer, &show_totals))
2169 else if (lex_match_id (lexer, "LABEL"))
2171 lex_match (lexer, T_EQUALS);
2172 if (!lex_force_string (lexer))
2175 total_label = ss_xstrdup (lex_tokss (lexer));
2178 else if (lex_match_id (lexer, "POSITION"))
2180 lex_match (lexer, T_EQUALS);
2181 if (lex_match_id (lexer, "BEFORE"))
2182 totals_before = true;
2183 else if (lex_match_id (lexer, "AFTER"))
2184 totals_before = false;
2187 lex_error_expecting (lexer, "BEFORE", "AFTER");
2191 else if (lex_match_id (lexer, "EMPTY"))
2193 lex_match (lexer, T_EQUALS);
2194 if (lex_match_id (lexer, "INCLUDE"))
2195 c->show_empty = true;
2196 else if (lex_match_id (lexer, "EXCLUDE"))
2197 c->show_empty = false;
2200 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2207 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2208 "TOTAL", "LABEL", "POSITION", "EMPTY");
2210 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2217 if (c->n_cats >= allocated_cats)
2218 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2219 c->cats[c->n_cats++] = cat;
2224 if (c->n_cats >= allocated_cats)
2225 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2227 struct ctables_category *totals;
2230 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2231 totals = &c->cats[0];
2234 totals = &c->cats[c->n_cats];
2237 *totals = (struct ctables_category) {
2239 .total_label = total_label ? total_label : xstrdup (_("Total")),
2243 struct ctables_category *subtotal = NULL;
2244 for (size_t i = totals_before ? 0 : c->n_cats;
2245 totals_before ? i < c->n_cats : i-- > 0;
2246 totals_before ? i++ : 0)
2248 struct ctables_category *cat = &c->cats[i];
2257 cat->subtotal = subtotal;
2260 case CCT_POSTCOMPUTE:
2271 case CCT_EXCLUDED_MISSING:
2276 if (cats_start_ofs != -1)
2278 for (size_t i = 0; i < c->n_cats; i++)
2280 struct ctables_category *cat = &c->cats[i];
2283 case CCT_POSTCOMPUTE:
2284 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2285 struct msg_location *cats_location
2286 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2287 bool ok = ctables_recursive_check_postcompute (
2288 dict, cat->pc->expr, cat, c, cats_location);
2289 msg_location_destroy (cats_location);
2296 for (size_t j = 0; j < n_vars; j++)
2297 if (var_is_alpha (vars[j]))
2299 msg_at (SE, cat->location,
2300 _("This category specification may be applied "
2301 "only to numeric variables, but this "
2302 "subcommand tries to apply it to string "
2304 var_get_name (vars[j]));
2313 if (!parse_category_string (cat->location, cat->string, dict,
2314 common_format->type, &n))
2317 ss_dealloc (&cat->string);
2319 cat->type = CCT_NUMBER;
2322 else if (!all_strings (vars, n_vars, cat))
2331 if (!cat->srange[0].string)
2333 else if (!parse_category_string (cat->location,
2334 cat->srange[0], dict,
2335 common_format->type, &n[0]))
2338 if (!cat->srange[1].string)
2340 else if (!parse_category_string (cat->location,
2341 cat->srange[1], dict,
2342 common_format->type, &n[1]))
2345 ss_dealloc (&cat->srange[0]);
2346 ss_dealloc (&cat->srange[1]);
2348 cat->type = CCT_NRANGE;
2349 cat->nrange[0] = n[0];
2350 cat->nrange[1] = n[1];
2352 else if (!all_strings (vars, n_vars, cat))
2363 case CCT_EXCLUDED_MISSING:
2378 ctables_nest_uninit (struct ctables_nest *nest)
2381 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2382 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2383 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2384 free (nest->areas[at]);
2388 ctables_stack_uninit (struct ctables_stack *stack)
2392 for (size_t i = 0; i < stack->n; i++)
2393 ctables_nest_uninit (&stack->nests[i]);
2394 free (stack->nests);
2398 static struct ctables_stack
2399 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2406 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2407 for (size_t i = 0; i < s0.n; i++)
2408 for (size_t j = 0; j < s1.n; j++)
2410 const struct ctables_nest *a = &s0.nests[i];
2411 const struct ctables_nest *b = &s1.nests[j];
2413 size_t allocate = a->n + b->n;
2414 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2416 for (size_t k = 0; k < a->n; k++)
2417 vars[n++] = a->vars[k];
2418 for (size_t k = 0; k < b->n; k++)
2419 vars[n++] = b->vars[k];
2420 assert (n == allocate);
2422 const struct ctables_nest *summary_src;
2423 if (!a->specs[CSV_CELL].var)
2425 else if (!b->specs[CSV_CELL].var)
2430 struct ctables_nest *new = &stack.nests[stack.n++];
2431 *new = (struct ctables_nest) {
2433 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2434 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2436 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2437 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2441 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2442 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2444 ctables_stack_uninit (&s0);
2445 ctables_stack_uninit (&s1);
2449 static struct ctables_stack
2450 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2452 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2453 for (size_t i = 0; i < s0.n; i++)
2454 stack.nests[stack.n++] = s0.nests[i];
2455 for (size_t i = 0; i < s1.n; i++)
2457 stack.nests[stack.n] = s1.nests[i];
2458 stack.nests[stack.n].group_head += s0.n;
2461 assert (stack.n == s0.n + s1.n);
2467 static struct ctables_stack
2468 var_fts (const struct ctables_axis *a)
2470 struct variable **vars = xmalloc (sizeof *vars);
2473 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2474 struct ctables_nest *nest = xmalloc (sizeof *nest);
2475 *nest = (struct ctables_nest) {
2478 .scale_idx = a->scale ? 0 : SIZE_MAX,
2479 .summary_idx = is_summary ? 0 : SIZE_MAX,
2482 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2484 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2485 nest->specs[sv].var = a->var;
2486 nest->specs[sv].is_scale = a->scale;
2488 return (struct ctables_stack) { .nests = nest, .n = 1 };
2491 static struct ctables_stack
2492 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2495 return (struct ctables_stack) { .n = 0 };
2503 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2504 enumerate_fts (axis_type, a->subs[1]));
2507 /* This should consider any of the scale variables found in the result to
2508 be linked to each other listwise for SMISSING=LISTWISE. */
2509 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2510 enumerate_fts (axis_type, a->subs[1]));
2516 union ctables_summary
2518 /* COUNT, VALIDN, TOTALN. */
2521 /* MINIMUM, MAXIMUM, RANGE. */
2528 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2529 struct moments1 *moments;
2531 /* MEDIAN, MODE, PTILE. */
2534 struct casewriter *writer;
2539 /* XXX multiple response */
2543 ctables_summary_init (union ctables_summary *s,
2544 const struct ctables_summary_spec *ss)
2546 switch (ss->function)
2549 case CTSF_areaPCT_COUNT:
2550 case CTSF_areaPCT_VALIDN:
2551 case CTSF_areaPCT_TOTALN:
2564 s->min = s->max = SYSMIS;
2569 case CTSF_areaPCT_SUM:
2570 s->moments = moments1_create (MOMENT_MEAN);
2576 s->moments = moments1_create (MOMENT_VARIANCE);
2583 struct caseproto *proto = caseproto_create ();
2584 proto = caseproto_add_width (proto, 0);
2585 proto = caseproto_add_width (proto, 0);
2587 struct subcase ordering;
2588 subcase_init (&ordering, 0, 0, SC_ASCEND);
2589 s->writer = sort_create_writer (&ordering, proto);
2590 subcase_uninit (&ordering);
2591 caseproto_unref (proto);
2601 ctables_summary_uninit (union ctables_summary *s,
2602 const struct ctables_summary_spec *ss)
2604 switch (ss->function)
2607 case CTSF_areaPCT_COUNT:
2608 case CTSF_areaPCT_VALIDN:
2609 case CTSF_areaPCT_TOTALN:
2628 case CTSF_areaPCT_SUM:
2629 moments1_destroy (s->moments);
2635 casewriter_destroy (s->writer);
2641 ctables_summary_add (union ctables_summary *s,
2642 const struct ctables_summary_spec *ss,
2643 const struct variable *var, const union value *value,
2644 bool is_scale, bool is_scale_missing,
2645 bool is_missing, bool is_included,
2648 /* To determine whether a case is included in a given table for a particular
2649 kind of summary, consider the following charts for each variable in the
2650 table. Only if "yes" appears for every variable for the summary is the
2653 Categorical variables: VALIDN COUNT TOTALN
2654 Valid values in included categories yes yes yes
2655 Missing values in included categories --- yes yes
2656 Missing values in excluded categories --- --- yes
2657 Valid values in excluded categories --- --- ---
2659 Scale variables: VALIDN COUNT TOTALN
2660 Valid value yes yes yes
2661 Missing value --- yes yes
2663 Missing values include both user- and system-missing. (The system-missing
2664 value is always in an excluded category.)
2666 switch (ss->function)
2672 case CTSF_areaPCT_TOTALN:
2677 if (is_scale || is_included)
2681 case CTSF_areaPCT_COUNT:
2682 if (is_scale || is_included)
2693 case CTSF_areaPCT_VALIDN:
2713 if (!is_scale_missing)
2715 assert (!var_is_alpha (var)); /* XXX? */
2716 if (s->min == SYSMIS || value->f < s->min)
2718 if (s->max == SYSMIS || value->f > s->max)
2728 if (!is_scale_missing)
2729 moments1_add (s->moments, value->f, weight);
2732 case CTSF_areaPCT_SUM:
2733 if (!is_missing && !is_scale_missing)
2734 moments1_add (s->moments, value->f, weight);
2740 if (!is_scale_missing)
2742 s->ovalid += weight;
2744 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2745 *case_num_rw_idx (c, 0) = value->f;
2746 *case_num_rw_idx (c, 1) = weight;
2747 casewriter_write (s->writer, c);
2754 ctables_summary_value (const struct ctables_cell *cell,
2755 union ctables_summary *s,
2756 const struct ctables_summary_spec *ss)
2758 switch (ss->function)
2764 return cell->areas[ss->calc_area]->sequence;
2766 case CTSF_areaPCT_COUNT:
2768 const struct ctables_area *a = cell->areas[ss->calc_area];
2769 double a_count = a->count[ss->weighting];
2770 return a_count ? s->count / a_count * 100 : SYSMIS;
2773 case CTSF_areaPCT_VALIDN:
2775 const struct ctables_area *a = cell->areas[ss->calc_area];
2776 double a_valid = a->valid[ss->weighting];
2777 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2780 case CTSF_areaPCT_TOTALN:
2782 const struct ctables_area *a = cell->areas[ss->calc_area];
2783 double a_total = a->total[ss->weighting];
2784 return a_total ? s->count / a_total * 100 : SYSMIS;
2799 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2804 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2810 double weight, variance;
2811 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2812 return calc_semean (variance, weight);
2818 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2819 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2824 double weight, mean;
2825 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2826 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2832 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2836 case CTSF_areaPCT_SUM:
2838 double weight, mean;
2839 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2840 if (weight == SYSMIS || mean == SYSMIS)
2843 const struct ctables_area *a = cell->areas[ss->calc_area];
2844 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2845 double denom = sum->sum[ss->weighting];
2846 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2853 struct casereader *reader = casewriter_make_reader (s->writer);
2856 struct percentile *ptile = percentile_create (
2857 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2858 struct order_stats *os = &ptile->parent;
2859 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2860 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2861 statistic_destroy (&ptile->parent.parent);
2868 struct casereader *reader = casewriter_make_reader (s->writer);
2871 struct mode *mode = mode_create ();
2872 struct order_stats *os = &mode->parent;
2873 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2874 s->ovalue = mode->mode;
2875 statistic_destroy (&mode->parent.parent);
2883 struct ctables_cell_sort_aux
2885 const struct ctables_nest *nest;
2886 enum pivot_axis_type a;
2890 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2892 const struct ctables_cell_sort_aux *aux = aux_;
2893 struct ctables_cell *const *ap = a_;
2894 struct ctables_cell *const *bp = b_;
2895 const struct ctables_cell *a = *ap;
2896 const struct ctables_cell *b = *bp;
2898 const struct ctables_nest *nest = aux->nest;
2899 for (size_t i = 0; i < nest->n; i++)
2900 if (i != nest->scale_idx)
2902 const struct variable *var = nest->vars[i];
2903 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2904 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2905 if (a_cv->category != b_cv->category)
2906 return a_cv->category > b_cv->category ? 1 : -1;
2908 const union value *a_val = &a_cv->value;
2909 const union value *b_val = &b_cv->value;
2910 switch (a_cv->category->type)
2916 case CCT_POSTCOMPUTE:
2917 case CCT_EXCLUDED_MISSING:
2918 /* Must be equal. */
2926 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2934 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2936 return a_cv->category->sort_ascending ? cmp : -cmp;
2942 const char *a_label = var_lookup_value_label (var, a_val);
2943 const char *b_label = var_lookup_value_label (var, b_val);
2949 cmp = strcmp (a_label, b_label);
2955 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2958 return a_cv->category->sort_ascending ? cmp : -cmp;
2970 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2971 const void *aux UNUSED)
2973 struct ctables_cell *const *ap = a_;
2974 struct ctables_cell *const *bp = b_;
2975 const struct ctables_cell *a = *ap;
2976 const struct ctables_cell *b = *bp;
2978 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2980 int al = a->axes[axis].leaf;
2981 int bl = b->axes[axis].leaf;
2983 return al > bl ? 1 : -1;
2991 For each ctables_table:
2992 For each combination of row vars:
2993 For each combination of column vars:
2994 For each combination of layer vars:
2996 Make a table of row values:
2997 Sort entries by row values
2998 Assign a 0-based index to each actual value
2999 Construct a dimension
3000 Make a table of column values
3001 Make a table of layer values
3003 Fill the table entry using the indexes from before.
3006 static struct ctables_area *
3007 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3008 enum ctables_area_type area)
3011 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3013 const struct ctables_nest *nest = s->nests[a];
3014 for (size_t i = 0; i < nest->n_areas[area]; i++)
3016 size_t v_idx = nest->areas[area][i];
3017 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3018 hash = hash_pointer (cv->category, hash);
3019 if (cv->category->type != CCT_TOTAL
3020 && cv->category->type != CCT_SUBTOTAL
3021 && cv->category->type != CCT_POSTCOMPUTE)
3022 hash = value_hash (&cv->value,
3023 var_get_width (nest->vars[v_idx]), hash);
3027 struct ctables_area *a;
3028 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3030 const struct ctables_cell *df = a->example;
3031 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3033 const struct ctables_nest *nest = s->nests[a];
3034 for (size_t i = 0; i < nest->n_areas[area]; i++)
3036 size_t v_idx = nest->areas[area][i];
3037 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3038 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3039 if (cv1->category != cv2->category
3040 || (cv1->category->type != CCT_TOTAL
3041 && cv1->category->type != CCT_SUBTOTAL
3042 && cv1->category->type != CCT_POSTCOMPUTE
3043 && !value_equal (&cv1->value, &cv2->value,
3044 var_get_width (nest->vars[v_idx]))))
3053 struct ctables_sum *sums = (s->table->n_sum_vars
3054 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3057 a = xmalloc (sizeof *a);
3058 *a = (struct ctables_area) { .example = cell, .sums = sums };
3059 hmap_insert (&s->areas[area], &a->node, hash);
3063 static struct substring
3064 rtrim_value (const union value *v, const struct variable *var)
3066 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3067 var_get_width (var));
3068 ss_rtrim (&s, ss_cstr (" "));
3073 in_string_range (const union value *v, const struct variable *var,
3074 const struct substring *srange)
3076 struct substring s = rtrim_value (v, var);
3077 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3078 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3081 static const struct ctables_category *
3082 ctables_categories_match (const struct ctables_categories *c,
3083 const union value *v, const struct variable *var)
3085 if (var_is_numeric (var) && v->f == SYSMIS)
3088 const struct ctables_category *othernm = NULL;
3089 for (size_t i = c->n_cats; i-- > 0; )
3091 const struct ctables_category *cat = &c->cats[i];
3095 if (cat->number == v->f)
3100 if (ss_equals (cat->string, rtrim_value (v, var)))
3105 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3106 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3111 if (in_string_range (v, var, cat->srange))
3116 if (var_is_value_missing (var, v))
3120 case CCT_POSTCOMPUTE:
3135 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3138 case CCT_EXCLUDED_MISSING:
3143 return var_is_value_missing (var, v) ? NULL : othernm;
3146 static const struct ctables_category *
3147 ctables_categories_total (const struct ctables_categories *c)
3149 const struct ctables_category *first = &c->cats[0];
3150 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3151 return (first->type == CCT_TOTAL ? first
3152 : last->type == CCT_TOTAL ? last
3156 static struct ctables_cell *
3157 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3158 const struct ctables_category *cats[PIVOT_N_AXES][10])
3161 enum ctables_summary_variant sv = CSV_CELL;
3162 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3164 const struct ctables_nest *nest = s->nests[a];
3165 for (size_t i = 0; i < nest->n; i++)
3166 if (i != nest->scale_idx)
3168 hash = hash_pointer (cats[a][i], hash);
3169 if (cats[a][i]->type != CCT_TOTAL
3170 && cats[a][i]->type != CCT_SUBTOTAL
3171 && cats[a][i]->type != CCT_POSTCOMPUTE)
3172 hash = value_hash (case_data (c, nest->vars[i]),
3173 var_get_width (nest->vars[i]), hash);
3179 struct ctables_cell *cell;
3180 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3182 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3184 const struct ctables_nest *nest = s->nests[a];
3185 for (size_t i = 0; i < nest->n; i++)
3186 if (i != nest->scale_idx
3187 && (cats[a][i] != cell->axes[a].cvs[i].category
3188 || (cats[a][i]->type != CCT_TOTAL
3189 && cats[a][i]->type != CCT_SUBTOTAL
3190 && cats[a][i]->type != CCT_POSTCOMPUTE
3191 && !value_equal (case_data (c, nest->vars[i]),
3192 &cell->axes[a].cvs[i].value,
3193 var_get_width (nest->vars[i])))))
3202 cell = xmalloc (sizeof *cell);
3205 cell->omit_areas = 0;
3206 cell->postcompute = false;
3207 //struct string name = DS_EMPTY_INITIALIZER;
3208 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3210 const struct ctables_nest *nest = s->nests[a];
3211 cell->axes[a].cvs = (nest->n
3212 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3214 for (size_t i = 0; i < nest->n; i++)
3216 const struct ctables_category *cat = cats[a][i];
3217 const struct variable *var = nest->vars[i];
3218 const union value *value = case_data (c, var);
3219 if (i != nest->scale_idx)
3221 const struct ctables_category *subtotal = cat->subtotal;
3222 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3225 if (cat->type == CCT_TOTAL
3226 || cat->type == CCT_SUBTOTAL
3227 || cat->type == CCT_POSTCOMPUTE)
3229 /* XXX these should be more encompassing I think.*/
3233 case PIVOT_AXIS_COLUMN:
3234 cell->omit_areas |= ((1u << CTAT_TABLE) |
3235 (1u << CTAT_LAYER) |
3236 (1u << CTAT_LAYERCOL) |
3237 (1u << CTAT_SUBTABLE) |
3240 case PIVOT_AXIS_ROW:
3241 cell->omit_areas |= ((1u << CTAT_TABLE) |
3242 (1u << CTAT_LAYER) |
3243 (1u << CTAT_LAYERROW) |
3244 (1u << CTAT_SUBTABLE) |
3247 case PIVOT_AXIS_LAYER:
3248 cell->omit_areas |= ((1u << CTAT_TABLE) |
3249 (1u << CTAT_LAYER));
3253 if (cat->type == CCT_POSTCOMPUTE)
3254 cell->postcompute = true;
3257 cell->axes[a].cvs[i].category = cat;
3258 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3261 if (i != nest->scale_idx)
3263 if (!ds_is_empty (&name))
3264 ds_put_cstr (&name, ", ");
3265 char *value_s = data_out (value, var_get_encoding (var),
3266 var_get_print_format (var),
3267 settings_get_fmt_settings ());
3268 if (cat->type == CCT_TOTAL
3269 || cat->type == CCT_SUBTOTAL
3270 || cat->type == CCT_POSTCOMPUTE)
3271 ds_put_format (&name, "%s=total", var_get_name (var));
3273 ds_put_format (&name, "%s=%s", var_get_name (var),
3274 value_s + strspn (value_s, " "));
3280 //cell->name = ds_steal_cstr (&name);
3282 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3283 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3284 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3285 for (size_t i = 0; i < specs->n; i++)
3286 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3287 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3288 cell->areas[at] = ctables_area_insert (s, cell, at);
3289 hmap_insert (&s->cells, &cell->node, hash);
3294 is_listwise_missing (const struct ctables_summary_spec_set *specs,
3295 const struct ccase *c)
3297 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3299 const struct variable *var = specs->listwise_vars[i];
3300 if (var_is_num_missing (var, case_num (c, var)))
3308 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3310 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3315 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3316 const struct ctables_category *cats[PIVOT_N_AXES][10],
3317 bool is_included, double weight[N_CTWS])
3319 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3320 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3322 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3323 const union value *value = case_data (c, specs->var);
3324 bool is_missing = var_is_value_missing (specs->var, value);
3325 bool scale_missing = specs->is_scale && (is_missing || is_listwise_missing (specs, c));
3327 for (size_t i = 0; i < specs->n; i++)
3328 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3329 specs->var, value, specs->is_scale,
3330 scale_missing, is_missing, is_included,
3331 weight[specs->specs[i].weighting]);
3332 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3333 if (!(cell->omit_areas && (1u << at)))
3335 struct ctables_area *a = cell->areas[at];
3337 add_weight (a->total, weight);
3339 add_weight (a->count, weight);
3342 add_weight (a->valid, weight);
3344 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3346 /* XXX listwise_missing??? */
3347 const struct variable *var = s->table->sum_vars[i];
3348 double addend = case_num (c, var);
3349 if (!var_is_num_missing (var, addend))
3351 struct ctables_sum *sum = &a->sums[i];
3352 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3353 sum->sum[wt] += addend * weight[wt];
3361 recurse_totals (struct ctables_section *s, const struct ccase *c,
3362 const struct ctables_category *cats[PIVOT_N_AXES][10],
3363 bool is_included, double weight[N_CTWS],
3364 enum pivot_axis_type start_axis, size_t start_nest)
3366 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3368 const struct ctables_nest *nest = s->nests[a];
3369 for (size_t i = start_nest; i < nest->n; i++)
3371 if (i == nest->scale_idx)
3374 const struct variable *var = nest->vars[i];
3376 const struct ctables_category *total = ctables_categories_total (
3377 s->table->categories[var_get_dict_index (var)]);
3380 const struct ctables_category *save = cats[a][i];
3382 ctables_cell_add__ (s, c, cats, is_included, weight);
3383 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3392 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3393 const struct ctables_category *cats[PIVOT_N_AXES][10],
3394 bool is_included, double weight[N_CTWS],
3395 enum pivot_axis_type start_axis, size_t start_nest)
3397 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3399 const struct ctables_nest *nest = s->nests[a];
3400 for (size_t i = start_nest; i < nest->n; i++)
3402 if (i == nest->scale_idx)
3405 const struct ctables_category *save = cats[a][i];
3408 cats[a][i] = save->subtotal;
3409 ctables_cell_add__ (s, c, cats, is_included, weight);
3410 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3419 ctables_add_occurrence (const struct variable *var,
3420 const union value *value,
3421 struct hmap *occurrences)
3423 int width = var_get_width (var);
3424 unsigned int hash = value_hash (value, width, 0);
3426 struct ctables_occurrence *o;
3427 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3429 if (value_equal (value, &o->value, width))
3432 o = xmalloc (sizeof *o);
3433 value_clone (&o->value, value, width);
3434 hmap_insert (occurrences, &o->node, hash);
3438 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3439 double weight[N_CTWS])
3441 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3443 bool is_included = true;
3445 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3447 const struct ctables_nest *nest = s->nests[a];
3448 for (size_t i = 0; i < nest->n; i++)
3449 if (i != nest->scale_idx)
3451 const struct variable *var = nest->vars[i];
3452 const union value *value = case_data (c, var);
3454 cats[a][i] = ctables_categories_match (
3455 s->table->categories[var_get_dict_index (var)], value, var);
3458 if (i != nest->summary_idx)
3461 if (!var_is_value_missing (var, value))
3464 static const struct ctables_category cct_excluded_missing = {
3465 .type = CCT_EXCLUDED_MISSING,
3468 cats[a][i] = &cct_excluded_missing;
3469 is_included = false;
3475 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3477 const struct ctables_nest *nest = s->nests[a];
3478 for (size_t i = 0; i < nest->n; i++)
3479 if (i != nest->scale_idx)
3481 const struct variable *var = nest->vars[i];
3482 const union value *value = case_data (c, var);
3483 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3487 ctables_cell_add__ (s, c, cats, is_included, weight);
3488 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3489 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3494 const struct ctables_summary_spec_set *set;
3499 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3501 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3502 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3503 if (as->function != bs->function)
3504 return as->function > bs->function ? 1 : -1;
3505 else if (as->weighting != bs->weighting)
3506 return as->weighting > bs->weighting ? 1 : -1;
3507 else if (as->calc_area != bs->calc_area)
3508 return as->calc_area > bs->calc_area ? 1 : -1;
3509 else if (as->percentile != bs->percentile)
3510 return as->percentile < bs->percentile ? 1 : -1;
3512 const char *as_label = as->label ? as->label : "";
3513 const char *bs_label = bs->label ? bs->label : "";
3514 return strcmp (as_label, bs_label);
3518 ctables_category_format_number (double number, const struct variable *var,
3521 struct pivot_value *pv = pivot_value_new_var_value (
3522 var, &(union value) { .f = number });
3523 pivot_value_format (pv, NULL, s);
3524 pivot_value_destroy (pv);
3528 ctables_category_format_string (struct substring string,
3529 const struct variable *var, struct string *out)
3531 int width = var_get_width (var);
3532 char *s = xmalloc (width);
3533 buf_copy_rpad (s, width, string.string, string.length, ' ');
3534 struct pivot_value *pv = pivot_value_new_var_value (
3535 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3536 pivot_value_format (pv, NULL, out);
3537 pivot_value_destroy (pv);
3542 ctables_category_format_label (const struct ctables_category *cat,
3543 const struct variable *var,
3549 ctables_category_format_number (cat->number, var, s);
3553 ctables_category_format_string (cat->string, var, s);
3557 ctables_category_format_number (cat->nrange[0], var, s);
3558 ds_put_format (s, " THRU ");
3559 ctables_category_format_number (cat->nrange[1], var, s);
3563 ctables_category_format_string (cat->srange[0], var, s);
3564 ds_put_format (s, " THRU ");
3565 ctables_category_format_string (cat->srange[1], var, s);
3569 ds_put_cstr (s, "MISSING");
3573 ds_put_cstr (s, "OTHERNM");
3576 case CCT_POSTCOMPUTE:
3577 ds_put_format (s, "&%s", cat->pc->name);
3582 ds_put_cstr (s, cat->total_label);
3588 case CCT_EXCLUDED_MISSING:
3595 static struct pivot_value *
3596 ctables_postcompute_label (const struct ctables_categories *cats,
3597 const struct ctables_category *cat,
3598 const struct variable *var)
3600 struct substring in = ss_cstr (cat->pc->label);
3601 struct substring target = ss_cstr (")LABEL[");
3603 struct string out = DS_EMPTY_INITIALIZER;
3606 size_t chunk = ss_find_substring (in, target);
3607 if (chunk == SIZE_MAX)
3609 if (ds_is_empty (&out))
3610 return pivot_value_new_user_text (in.string, in.length);
3613 ds_put_substring (&out, in);
3614 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3618 ds_put_substring (&out, ss_head (in, chunk));
3619 ss_advance (&in, chunk + target.length);
3621 struct substring idx_s;
3622 if (!ss_get_until (&in, ']', &idx_s))
3625 long int idx = strtol (idx_s.string, &tail, 10);
3626 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3629 struct ctables_category *cat2 = &cats->cats[idx - 1];
3630 if (!ctables_category_format_label (cat2, var, &out))
3636 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3639 static struct pivot_value *
3640 ctables_category_create_value_label (const struct ctables_categories *cats,
3641 const struct ctables_category *cat,
3642 const struct variable *var,
3643 const union value *value)
3645 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3646 ? ctables_postcompute_label (cats, cat, var)
3647 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3648 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3649 : pivot_value_new_var_value (var, value));
3652 static struct ctables_value *
3653 ctables_value_find__ (struct ctables_table *t, const union value *value,
3654 int width, unsigned int hash)
3656 struct ctables_value *clv;
3657 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3658 hash, &t->clabels_values_map)
3659 if (value_equal (value, &clv->value, width))
3665 ctables_value_insert (struct ctables_table *t, const union value *value,
3668 unsigned int hash = value_hash (value, width, 0);
3669 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3672 clv = xmalloc (sizeof *clv);
3673 value_clone (&clv->value, value, width);
3674 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3678 static struct ctables_value *
3679 ctables_value_find (struct ctables_table *t,
3680 const union value *value, int width)
3682 return ctables_value_find__ (t, value, width,
3683 value_hash (value, width, 0));
3687 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3688 size_t ix[PIVOT_N_AXES])
3690 if (a < PIVOT_N_AXES)
3692 size_t limit = MAX (t->stacks[a].n, 1);
3693 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3694 ctables_table_add_section (t, a + 1, ix);
3698 struct ctables_section *s = &t->sections[t->n_sections++];
3699 *s = (struct ctables_section) {
3701 .cells = HMAP_INITIALIZER (s->cells),
3703 for (a = 0; a < PIVOT_N_AXES; a++)
3706 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3708 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3709 for (size_t i = 0; i < nest->n; i++)
3710 hmap_init (&s->occurrences[a][i]);
3712 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3713 hmap_init (&s->areas[at]);
3718 ctpo_add (double a, double b)
3724 ctpo_sub (double a, double b)
3730 ctpo_mul (double a, double b)
3736 ctpo_div (double a, double b)
3738 return b ? a / b : SYSMIS;
3742 ctpo_pow (double a, double b)
3744 int save_errno = errno;
3746 double result = pow (a, b);
3754 ctpo_neg (double a, double b UNUSED)
3759 struct ctables_pcexpr_evaluate_ctx
3761 const struct ctables_cell *cell;
3762 const struct ctables_section *section;
3763 const struct ctables_categories *cats;
3764 enum pivot_axis_type pc_a;
3767 enum fmt_type parse_format;
3770 static double ctables_pcexpr_evaluate (
3771 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3774 ctables_pcexpr_evaluate_nonterminal (
3775 const struct ctables_pcexpr_evaluate_ctx *ctx,
3776 const struct ctables_pcexpr *e, size_t n_args,
3777 double evaluate (double, double))
3779 double args[2] = { 0, 0 };
3780 for (size_t i = 0; i < n_args; i++)
3782 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3783 if (!isfinite (args[i]) || args[i] == SYSMIS)
3786 return evaluate (args[0], args[1]);
3790 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3791 const struct ctables_cell_value *pc_cv)
3793 const struct ctables_section *s = ctx->section;
3796 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3798 const struct ctables_nest *nest = s->nests[a];
3799 for (size_t i = 0; i < nest->n; i++)
3800 if (i != nest->scale_idx)
3802 const struct ctables_cell_value *cv
3803 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3804 : &ctx->cell->axes[a].cvs[i]);
3805 hash = hash_pointer (cv->category, hash);
3806 if (cv->category->type != CCT_TOTAL
3807 && cv->category->type != CCT_SUBTOTAL
3808 && cv->category->type != CCT_POSTCOMPUTE)
3809 hash = value_hash (&cv->value,
3810 var_get_width (nest->vars[i]), hash);
3814 struct ctables_cell *tc;
3815 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3817 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3819 const struct ctables_nest *nest = s->nests[a];
3820 for (size_t i = 0; i < nest->n; i++)
3821 if (i != nest->scale_idx)
3823 const struct ctables_cell_value *p_cv
3824 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3825 : &ctx->cell->axes[a].cvs[i]);
3826 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3827 if (p_cv->category != t_cv->category
3828 || (p_cv->category->type != CCT_TOTAL
3829 && p_cv->category->type != CCT_SUBTOTAL
3830 && p_cv->category->type != CCT_POSTCOMPUTE
3831 && !value_equal (&p_cv->value,
3833 var_get_width (nest->vars[i]))))
3845 const struct ctables_table *t = s->table;
3846 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3847 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3848 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3849 &specs->specs[ctx->summary_idx]);
3853 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3854 const struct ctables_pcexpr *e)
3861 case CTPO_CAT_NRANGE:
3862 case CTPO_CAT_SRANGE:
3863 case CTPO_CAT_MISSING:
3864 case CTPO_CAT_OTHERNM:
3866 struct ctables_cell_value cv = {
3867 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3869 assert (cv.category != NULL);
3871 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3872 const struct ctables_occurrence *o;
3875 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3876 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3877 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3879 cv.value = o->value;
3880 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3885 case CTPO_CAT_NUMBER:
3886 case CTPO_CAT_SUBTOTAL:
3887 case CTPO_CAT_TOTAL:
3889 struct ctables_cell_value cv = {
3890 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3891 .value = { .f = e->number },
3893 assert (cv.category != NULL);
3894 return ctables_pcexpr_evaluate_category (ctx, &cv);
3897 case CTPO_CAT_STRING:
3899 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3901 if (width > e->string.length)
3903 s = xmalloc (width);
3904 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3907 const struct ctables_category *category
3908 = ctables_find_category_for_postcompute (
3909 ctx->section->table->ctables->dict,
3910 ctx->cats, ctx->parse_format, e);
3911 assert (category != NULL);
3913 struct ctables_cell_value cv = { .category = category };
3914 if (category->type == CCT_NUMBER)
3915 cv.value.f = category->number;
3916 else if (category->type == CCT_STRING)
3917 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3921 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3927 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3930 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3933 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3936 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3939 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3942 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3948 static const struct ctables_category *
3949 ctables_cell_postcompute (const struct ctables_section *s,
3950 const struct ctables_cell *cell,
3951 enum pivot_axis_type *pc_a_p,
3954 assert (cell->postcompute);
3955 const struct ctables_category *pc_cat = NULL;
3956 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3957 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3959 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3960 if (cv->category->type == CCT_POSTCOMPUTE)
3964 /* Multiple postcomputes cross each other. The value is
3969 pc_cat = cv->category;
3973 *pc_a_idx_p = pc_a_idx;
3977 assert (pc_cat != NULL);
3982 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3983 const struct ctables_cell *cell,
3984 const struct ctables_summary_spec *ss,
3985 struct fmt_spec *format,
3986 bool *is_ctables_format,
3989 enum pivot_axis_type pc_a = 0;
3990 size_t pc_a_idx = 0;
3991 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3992 s, cell, &pc_a, &pc_a_idx);
3996 const struct ctables_postcompute *pc = pc_cat->pc;
3999 for (size_t i = 0; i < pc->specs->n; i++)
4001 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4002 if (ss->function == ss2->function
4003 && ss->weighting == ss2->weighting
4004 && ss->calc_area == ss2->calc_area
4005 && ss->percentile == ss2->percentile)
4007 *format = ss2->format;
4008 *is_ctables_format = ss2->is_ctables_format;
4014 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4015 const struct ctables_categories *cats = s->table->categories[
4016 var_get_dict_index (var)];
4017 struct ctables_pcexpr_evaluate_ctx ctx = {
4022 .pc_a_idx = pc_a_idx,
4023 .summary_idx = summary_idx,
4024 .parse_format = pc_cat->parse_format,
4026 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4030 ctables_format (double d, const struct fmt_spec *format,
4031 const struct fmt_settings *settings)
4033 const union value v = { .f = d };
4034 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4036 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4037 produce the results we want for negative numbers, putting the negative
4038 sign in the wrong spot, before the prefix instead of after it. We can't,
4039 in fact, produce the desired results using a custom-currency
4040 specification. Instead, we postprocess the output, moving the negative
4043 NEQUAL: "-N=3" => "N=-3"
4044 PAREN: "-(3)" => "(-3)"
4045 PCTPAREN: "-(3%)" => "(-3%)"
4047 This transformation doesn't affect NEGPAREN. */
4048 char *minus_src = strchr (s, '-');
4049 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4051 char *n_equals = strstr (s, "N=");
4052 char *lparen = strchr (s, '(');
4053 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4055 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4061 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4063 for (size_t i = 0; i < t->stacks[a].n; i++)
4065 struct ctables_nest *nest = &t->stacks[a].nests[i];
4066 if (nest->n != 1 || nest->scale_idx != 0)
4069 enum ctables_vlabel vlabel
4070 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4071 if (vlabel != CTVL_NONE)
4078 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4080 struct pivot_table *pt = pivot_table_create__ (
4082 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4083 : pivot_value_new_text (N_("Custom Tables"))),
4086 pivot_table_set_caption (
4087 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4089 pivot_table_set_corner_text (
4090 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4092 bool summary_dimension = (t->summary_axis != t->slabels_axis
4093 || (!t->slabels_visible
4094 && t->summary_specs.n > 1));
4095 if (summary_dimension)
4097 struct pivot_dimension *d = pivot_dimension_create (
4098 pt, t->slabels_axis, N_("Statistics"));
4099 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4100 if (!t->slabels_visible)
4101 d->hide_all_labels = true;
4102 for (size_t i = 0; i < specs->n; i++)
4103 pivot_category_create_leaf (
4104 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4107 bool categories_dimension = t->clabels_example != NULL;
4108 if (categories_dimension)
4110 struct pivot_dimension *d = pivot_dimension_create (
4111 pt, t->label_axis[t->clabels_from_axis],
4112 t->clabels_from_axis == PIVOT_AXIS_ROW
4113 ? N_("Row Categories")
4114 : N_("Column Categories"));
4115 const struct variable *var = t->clabels_example;
4116 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4117 for (size_t i = 0; i < t->n_clabels_values; i++)
4119 const struct ctables_value *value = t->clabels_values[i];
4120 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4121 assert (cat != NULL);
4122 pivot_category_create_leaf (
4123 d->root, ctables_category_create_value_label (c, cat,
4129 pivot_table_set_look (pt, ct->look);
4130 struct pivot_dimension *d[PIVOT_N_AXES];
4131 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4133 static const char *names[] = {
4134 [PIVOT_AXIS_ROW] = N_("Rows"),
4135 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4136 [PIVOT_AXIS_LAYER] = N_("Layers"),
4138 d[a] = (t->axes[a] || a == t->summary_axis
4139 ? pivot_dimension_create (pt, a, names[a])
4144 assert (t->axes[a]);
4146 for (size_t i = 0; i < t->stacks[a].n; i++)
4148 struct ctables_nest *nest = &t->stacks[a].nests[i];
4149 struct ctables_section **sections = xnmalloc (t->n_sections,
4151 size_t n_sections = 0;
4153 size_t n_total_cells = 0;
4154 size_t max_depth = 0;
4155 for (size_t j = 0; j < t->n_sections; j++)
4156 if (t->sections[j].nests[a] == nest)
4158 struct ctables_section *s = &t->sections[j];
4159 sections[n_sections++] = s;
4160 n_total_cells += hmap_count (&s->cells);
4162 size_t depth = s->nests[a]->n;
4163 max_depth = MAX (depth, max_depth);
4166 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4168 size_t n_sorted = 0;
4170 for (size_t j = 0; j < n_sections; j++)
4172 struct ctables_section *s = sections[j];
4174 struct ctables_cell *cell;
4175 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4177 sorted[n_sorted++] = cell;
4178 assert (n_sorted <= n_total_cells);
4181 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4182 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4185 if (a == PIVOT_AXIS_ROW)
4187 size_t ids[N_CTATS];
4188 memset (ids, 0, sizeof ids);
4189 for (size_t j = 0; j < n_sorted; j++)
4191 struct ctables_cell *cell = sorted[j];
4192 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4194 struct ctables_area *area = cell->areas[at];
4195 if (!area->sequence)
4196 area->sequence = ++ids[at];
4203 for (size_t j = 0; j < n_sorted; j++)
4205 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0);
4210 struct ctables_level
4212 enum ctables_level_type
4214 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4215 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4216 CTL_SUMMARY, /* Summary functions. */
4220 enum settings_value_show vlabel; /* CTL_VAR only. */
4223 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4224 size_t n_levels = 0;
4225 for (size_t k = 0; k < nest->n; k++)
4227 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4228 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4230 if (vlabel != CTVL_NONE)
4232 levels[n_levels++] = (struct ctables_level) {
4234 .vlabel = (enum settings_value_show) vlabel,
4239 if (nest->scale_idx != k
4240 && (k != nest->n - 1 || t->label_axis[a] == a))
4242 levels[n_levels++] = (struct ctables_level) {
4243 .type = CTL_CATEGORY,
4249 if (!summary_dimension && a == t->slabels_axis)
4251 levels[n_levels++] = (struct ctables_level) {
4252 .type = CTL_SUMMARY,
4253 .var_idx = SIZE_MAX,
4257 /* Pivot categories:
4259 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4260 - category for nest->vars[0], if nest->scale_idx != 0
4261 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4262 - category for nest->vars[1], if nest->scale_idx != 1
4264 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4265 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4266 - summary function, if 'a == t->slabels_axis && a ==
4269 Additional dimensions:
4271 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4273 - If 't->label_axis[b] == a' for some 'b != a', add a category
4278 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4280 for (size_t j = 0; j < n_sorted; j++)
4282 struct ctables_cell *cell = sorted[j];
4283 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4285 size_t n_common = 0;
4288 for (; n_common < n_levels; n_common++)
4290 const struct ctables_level *level = &levels[n_common];
4291 if (level->type == CTL_CATEGORY)
4293 size_t var_idx = level->var_idx;
4294 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4295 if (prev->axes[a].cvs[var_idx].category != c)
4297 else if (c->type != CCT_SUBTOTAL
4298 && c->type != CCT_TOTAL
4299 && c->type != CCT_POSTCOMPUTE
4300 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4301 &cell->axes[a].cvs[var_idx].value,
4302 var_get_type (nest->vars[var_idx])))
4308 for (size_t k = n_common; k < n_levels; k++)
4310 const struct ctables_level *level = &levels[k];
4311 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4312 if (level->type == CTL_SUMMARY)
4314 assert (k == n_levels - 1);
4316 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4317 for (size_t m = 0; m < specs->n; m++)
4319 int leaf = pivot_category_create_leaf (
4320 parent, ctables_summary_label (&specs->specs[m],
4328 const struct variable *var = nest->vars[level->var_idx];
4329 struct pivot_value *label;
4330 if (level->type == CTL_VAR)
4332 label = pivot_value_new_variable (var);
4333 label->variable.show = level->vlabel;
4335 else if (level->type == CTL_CATEGORY)
4337 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4338 label = ctables_category_create_value_label (
4339 t->categories[var_get_dict_index (var)],
4340 cv->category, var, &cv->value);
4345 if (k == n_levels - 1)
4346 prev_leaf = pivot_category_create_leaf (parent, label);
4348 groups[k] = pivot_category_create_group__ (parent, label);
4352 cell->axes[a].leaf = prev_leaf;
4361 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4365 size_t n_total_cells = 0;
4366 for (size_t j = 0; j < t->n_sections; j++)
4367 n_total_cells += hmap_count (&t->sections[j].cells);
4369 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4370 size_t n_sorted = 0;
4371 for (size_t j = 0; j < t->n_sections; j++)
4373 const struct ctables_section *s = &t->sections[j];
4374 struct ctables_cell *cell;
4375 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4377 sorted[n_sorted++] = cell;
4379 assert (n_sorted <= n_total_cells);
4380 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4382 size_t ids[N_CTATS];
4383 memset (ids, 0, sizeof ids);
4384 for (size_t j = 0; j < n_sorted; j++)
4386 struct ctables_cell *cell = sorted[j];
4387 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4389 struct ctables_area *area = cell->areas[at];
4390 if (!area->sequence)
4391 area->sequence = ++ids[at];
4398 for (size_t i = 0; i < t->n_sections; i++)
4400 struct ctables_section *s = &t->sections[i];
4402 struct ctables_cell *cell;
4403 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4408 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4409 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4410 for (size_t j = 0; j < specs->n; j++)
4413 size_t n_dindexes = 0;
4415 if (summary_dimension)
4416 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4418 if (categories_dimension)
4420 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4421 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4422 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4423 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4426 dindexes[n_dindexes++] = ctv->leaf;
4429 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4432 int leaf = cell->axes[a].leaf;
4433 if (a == t->summary_axis && !summary_dimension)
4435 dindexes[n_dindexes++] = leaf;
4438 const struct ctables_summary_spec *ss = &specs->specs[j];
4440 struct fmt_spec format = specs->specs[j].format;
4441 bool is_ctables_format = ss->is_ctables_format;
4442 double d = (cell->postcompute
4443 ? ctables_cell_calculate_postcompute (
4444 s, cell, ss, &format, &is_ctables_format, j)
4445 : ctables_summary_value (cell, &cell->summaries[j],
4448 struct pivot_value *value;
4449 if (ct->hide_threshold != 0
4450 && d < ct->hide_threshold
4451 && ss->function == CTSF_COUNT)
4453 value = pivot_value_new_user_text_nocopy (
4454 xasprintf ("<%d", ct->hide_threshold));
4456 else if (d == 0 && ct->zero)
4457 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4458 else if (d == SYSMIS && ct->missing)
4459 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4460 else if (is_ctables_format)
4461 value = pivot_value_new_user_text_nocopy (
4462 ctables_format (d, &format, &ct->ctables_formats));
4465 value = pivot_value_new_number (d);
4466 value->numeric.format = format;
4468 /* XXX should text values be right-justified? */
4469 pivot_table_put (pt, dindexes, n_dindexes, value);
4474 pivot_table_submit (pt);
4478 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4480 enum pivot_axis_type label_pos = t->label_axis[a];
4484 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4485 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4487 const struct ctables_stack *stack = &t->stacks[a];
4491 const struct ctables_nest *n0 = &stack->nests[0];
4494 assert (stack->n == 1);
4498 const struct variable *v0 = n0->vars[n0->n - 1];
4499 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4500 t->clabels_example = v0;
4502 for (size_t i = 0; i < c0->n_cats; i++)
4503 if (c0->cats[i].type == CCT_FUNCTION)
4505 msg (SE, _("%s=%s is not allowed with sorting based "
4506 "on a summary function."),
4507 subcommand_name, pos_name);
4510 if (n0->n - 1 == n0->scale_idx)
4512 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4513 "but %s is a scale variable."),
4514 subcommand_name, pos_name, var_get_name (v0));
4518 for (size_t i = 1; i < stack->n; i++)
4520 const struct ctables_nest *ni = &stack->nests[i];
4522 const struct variable *vi = ni->vars[ni->n - 1];
4523 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4525 if (ni->n - 1 == ni->scale_idx)
4527 msg (SE, _("%s=%s requires the variables to be moved to be "
4528 "categorical, but %s is a scale variable."),
4529 subcommand_name, pos_name, var_get_name (vi));
4532 if (var_get_width (v0) != var_get_width (vi))
4534 msg (SE, _("%s=%s requires the variables to be "
4535 "moved to have the same width, but %s has "
4536 "width %d and %s has width %d."),
4537 subcommand_name, pos_name,
4538 var_get_name (v0), var_get_width (v0),
4539 var_get_name (vi), var_get_width (vi));
4542 if (!val_labs_equal (var_get_value_labels (v0),
4543 var_get_value_labels (vi)))
4545 msg (SE, _("%s=%s requires the variables to be "
4546 "moved to have the same value labels, but %s "
4547 "and %s have different value labels."),
4548 subcommand_name, pos_name,
4549 var_get_name (v0), var_get_name (vi));
4552 if (!ctables_categories_equal (c0, ci))
4554 msg (SE, _("%s=%s requires the variables to be "
4555 "moved to have the same category "
4556 "specifications, but %s and %s have different "
4557 "category specifications."),
4558 subcommand_name, pos_name,
4559 var_get_name (v0), var_get_name (vi));
4568 add_sum_var (struct variable *var,
4569 struct variable ***sum_vars, size_t *n, size_t *allocated)
4571 for (size_t i = 0; i < *n; i++)
4572 if (var == (*sum_vars)[i])
4575 if (*n >= *allocated)
4576 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4577 (*sum_vars)[*n] = var;
4581 static enum ctables_area_type
4582 rotate_area (enum ctables_area_type area)
4593 return CTAT_LAYERCOL;
4596 return CTAT_LAYERROW;
4609 enumerate_sum_vars (const struct ctables_axis *a,
4610 struct variable ***sum_vars, size_t *n, size_t *allocated)
4618 for (size_t i = 0; i < N_CSVS; i++)
4619 for (size_t j = 0; j < a->specs[i].n; j++)
4621 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4622 if (spec->function == CTSF_areaPCT_SUM)
4623 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4629 for (size_t i = 0; i < 2; i++)
4630 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4636 ctables_prepare_table (struct ctables_table *t)
4638 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4641 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4643 for (size_t j = 0; j < t->stacks[a].n; j++)
4645 struct ctables_nest *nest = &t->stacks[a].nests[j];
4646 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4648 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4649 nest->n_areas[at] = 0;
4651 enum pivot_axis_type ata, atb;
4652 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4654 ata = PIVOT_AXIS_ROW;
4655 atb = PIVOT_AXIS_COLUMN;
4657 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4659 ata = PIVOT_AXIS_COLUMN;
4660 atb = PIVOT_AXIS_ROW;
4663 if (at == CTAT_LAYER
4664 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
4665 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
4666 ? a == atb && t->label_axis[a] != a
4669 for (size_t k = nest->n - 1; k < nest->n; k--)
4670 if (k != nest->scale_idx)
4672 nest->areas[at][nest->n_areas[at]++] = k;
4678 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
4679 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
4680 : at == CTAT_TABLE ? true
4684 for (size_t k = 0; k < nest->n; k++)
4685 if (k != nest->scale_idx)
4686 nest->areas[at][nest->n_areas[at]++] = k;
4692 #define L PIVOT_AXIS_LAYER
4693 n_drop = (t->clabels_from_axis == L ? a != L
4694 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
4695 : t->clabels_from_axis == a ? 2
4702 n_drop = a == ata && t->label_axis[ata] == atb;
4707 n_drop = (a == ata ? t->label_axis[ata] == atb
4709 : t->clabels_from_axis == atb ? -1
4710 : t->clabels_to_axis != atb ? 1
4722 size_t n = nest->n_areas[at];
4725 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4726 nest->n_areas[at]--;
4731 for (int i = 0; i < n_drop; i++)
4732 if (nest->n_areas[at] > 0)
4733 nest->n_areas[at]--;
4740 struct ctables_nest *nest = xmalloc (sizeof *nest);
4741 *nest = (struct ctables_nest) {
4743 .scale_idx = SIZE_MAX,
4744 .summary_idx = SIZE_MAX
4746 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4748 /* There's no point in moving labels away from an axis that has no
4749 labels, so avoid dealing with the special cases around that. */
4750 t->label_axis[a] = a;
4753 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4754 for (size_t i = 0; i < stack->n; i++)
4756 struct ctables_nest *nest = &stack->nests[i];
4757 if (!nest->specs[CSV_CELL].n)
4759 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
4760 ss->specs = xmalloc (sizeof *ss->specs);
4763 enum ctables_summary_function function
4764 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
4768 nest->summary_idx = nest->n - 1;
4769 ss->var = nest->vars[nest->summary_idx];
4771 *ss->specs = (struct ctables_summary_spec) {
4772 .function = function,
4773 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
4774 .format = ctables_summary_default_format (function, ss->var),
4777 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4778 &nest->specs[CSV_CELL]);
4780 else if (!nest->specs[CSV_TOTAL].n)
4781 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4782 &nest->specs[CSV_CELL]);
4784 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4785 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4787 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4788 for (size_t i = 0; i < nest->specs[sv].n; i++)
4790 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4791 const struct ctables_function_info *cfi =
4792 &ctables_function_info[ss->function];
4794 ss->calc_area = rotate_area (ss->calc_area);
4798 if (t->ctables->smissing_listwise)
4800 struct variable **listwise_vars = NULL;
4802 size_t allocated = 0;
4804 for (size_t j = nest->group_head; j < stack->n; j++)
4806 const struct ctables_nest *other_nest = &stack->nests[j];
4807 if (other_nest->group_head != nest->group_head)
4810 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4813 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4814 sizeof *listwise_vars);
4815 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4818 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4821 listwise_vars = xmemdup (listwise_vars,
4822 n * sizeof *listwise_vars);
4823 nest->specs[sv].listwise_vars = listwise_vars;
4824 nest->specs[sv].n_listwise_vars = n;
4829 struct ctables_summary_spec_set *merged = &t->summary_specs;
4830 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4832 for (size_t j = 0; j < stack->n; j++)
4834 const struct ctables_nest *nest = &stack->nests[j];
4836 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4837 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4842 struct merge_item min = items[0];
4843 for (size_t j = 1; j < n_left; j++)
4844 if (merge_item_compare_3way (&items[j], &min) < 0)
4847 if (merged->n >= merged->allocated)
4848 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4849 sizeof *merged->specs);
4850 merged->specs[merged->n++] = min.set->specs[min.ofs];
4852 for (size_t j = 0; j < n_left; )
4854 if (merge_item_compare_3way (&items[j], &min) == 0)
4856 struct merge_item *item = &items[j];
4857 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4858 if (++item->ofs >= item->set->n)
4860 items[j] = items[--n_left];
4870 for (size_t j = 0; j < merged->n; j++)
4871 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4873 for (size_t j = 0; j < stack->n; j++)
4875 const struct ctables_nest *nest = &stack->nests[j];
4876 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4878 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4879 for (size_t k = 0; k < specs->n; k++)
4880 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4881 specs->specs[k].axis_idx);
4887 size_t allocated_sum_vars = 0;
4888 enumerate_sum_vars (t->axes[t->summary_axis],
4889 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4891 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4892 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4896 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4897 enum pivot_axis_type a)
4899 struct ctables_stack *stack = &t->stacks[a];
4900 for (size_t i = 0; i < stack->n; i++)
4902 const struct ctables_nest *nest = &stack->nests[i];
4903 const struct variable *var = nest->vars[nest->n - 1];
4904 const union value *value = case_data (c, var);
4906 if (var_is_numeric (var) && value->f == SYSMIS)
4909 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4911 ctables_value_insert (t, value, var_get_width (var));
4916 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4918 const struct ctables_value *const *ap = a_;
4919 const struct ctables_value *const *bp = b_;
4920 const struct ctables_value *a = *ap;
4921 const struct ctables_value *b = *bp;
4922 const int *width = width_;
4923 return value_compare_3way (&a->value, &b->value, *width);
4927 ctables_sort_clabels_values (struct ctables_table *t)
4929 const struct variable *v0 = t->clabels_example;
4930 int width = var_get_width (v0);
4932 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4935 const struct val_labs *val_labs = var_get_value_labels (v0);
4936 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4937 vl = val_labs_next (val_labs, vl))
4938 if (ctables_categories_match (c0, &vl->value, v0))
4939 ctables_value_insert (t, &vl->value, width);
4942 size_t n = hmap_count (&t->clabels_values_map);
4943 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4945 struct ctables_value *clv;
4947 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4948 t->clabels_values[i++] = clv;
4949 t->n_clabels_values = n;
4952 sort (t->clabels_values, n, sizeof *t->clabels_values,
4953 compare_clabels_values_3way, &width);
4955 for (size_t i = 0; i < n; i++)
4956 t->clabels_values[i]->leaf = i;
4960 ctables_add_category_occurrences (const struct variable *var,
4961 struct hmap *occurrences,
4962 const struct ctables_categories *cats)
4964 const struct val_labs *val_labs = var_get_value_labels (var);
4966 for (size_t i = 0; i < cats->n_cats; i++)
4968 const struct ctables_category *c = &cats->cats[i];
4972 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4978 int width = var_get_width (var);
4980 value_init (&value, width);
4981 value_copy_buf_rpad (&value, width,
4982 CHAR_CAST (uint8_t *, c->string.string),
4983 c->string.length, ' ');
4984 ctables_add_occurrence (var, &value, occurrences);
4985 value_destroy (&value, width);
4990 assert (var_is_numeric (var));
4991 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4992 vl = val_labs_next (val_labs, vl))
4993 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4994 ctables_add_occurrence (var, &vl->value, occurrences);
4998 assert (var_is_alpha (var));
4999 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5000 vl = val_labs_next (val_labs, vl))
5001 if (in_string_range (&vl->value, var, c->srange))
5002 ctables_add_occurrence (var, &vl->value, occurrences);
5006 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5007 vl = val_labs_next (val_labs, vl))
5008 if (var_is_value_missing (var, &vl->value))
5009 ctables_add_occurrence (var, &vl->value, occurrences);
5013 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5014 vl = val_labs_next (val_labs, vl))
5015 ctables_add_occurrence (var, &vl->value, occurrences);
5018 case CCT_POSTCOMPUTE:
5028 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5029 vl = val_labs_next (val_labs, vl))
5030 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5031 ctables_add_occurrence (var, &vl->value, occurrences);
5034 case CCT_EXCLUDED_MISSING:
5041 ctables_section_recurse_add_empty_categories (
5042 struct ctables_section *s,
5043 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5044 enum pivot_axis_type a, size_t a_idx)
5046 if (a >= PIVOT_N_AXES)
5047 ctables_cell_insert__ (s, c, cats);
5048 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5049 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5052 const struct variable *var = s->nests[a]->vars[a_idx];
5053 const struct ctables_categories *categories = s->table->categories[
5054 var_get_dict_index (var)];
5055 int width = var_get_width (var);
5056 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5057 const struct ctables_occurrence *o;
5058 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5060 union value *value = case_data_rw (c, var);
5061 value_destroy (value, width);
5062 value_clone (value, &o->value, width);
5063 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5064 assert (cats[a][a_idx] != NULL);
5065 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5068 for (size_t i = 0; i < categories->n_cats; i++)
5070 const struct ctables_category *cat = &categories->cats[i];
5071 if (cat->type == CCT_POSTCOMPUTE)
5073 cats[a][a_idx] = cat;
5074 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5081 ctables_section_add_empty_categories (struct ctables_section *s)
5083 bool show_empty = false;
5084 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5086 for (size_t k = 0; k < s->nests[a]->n; k++)
5087 if (k != s->nests[a]->scale_idx)
5089 const struct variable *var = s->nests[a]->vars[k];
5090 const struct ctables_categories *cats = s->table->categories[
5091 var_get_dict_index (var)];
5092 if (cats->show_empty)
5095 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5101 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5102 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5103 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5108 ctables_section_clear (struct ctables_section *s)
5110 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5112 const struct ctables_nest *nest = s->nests[a];
5113 for (size_t i = 0; i < nest->n; i++)
5114 if (i != nest->scale_idx)
5116 const struct variable *var = nest->vars[i];
5117 int width = var_get_width (var);
5118 struct ctables_occurrence *o, *next;
5119 struct hmap *map = &s->occurrences[a][i];
5120 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5122 value_destroy (&o->value, width);
5123 hmap_delete (map, &o->node);
5130 struct ctables_cell *cell, *next_cell;
5131 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5133 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5135 const struct ctables_nest *nest = s->nests[a];
5136 for (size_t i = 0; i < nest->n; i++)
5137 if (i != nest->scale_idx)
5138 value_destroy (&cell->axes[a].cvs[i].value,
5139 var_get_width (nest->vars[i]));
5140 free (cell->axes[a].cvs);
5143 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5144 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5145 for (size_t i = 0; i < specs->n; i++)
5146 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5147 free (cell->summaries);
5149 hmap_delete (&s->cells, &cell->node);
5152 hmap_shrink (&s->cells);
5154 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5156 struct ctables_area *area, *next_area;
5157 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5161 hmap_delete (&s->areas[at], &area->node);
5164 hmap_shrink (&s->areas[at]);
5169 ctables_section_uninit (struct ctables_section *s)
5171 ctables_section_clear (s);
5173 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5175 struct ctables_nest *nest = s->nests[a];
5176 for (size_t i = 0; i < nest->n; i++)
5177 hmap_destroy (&s->occurrences[a][i]);
5178 free (s->occurrences[a]);
5181 hmap_destroy (&s->cells);
5182 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5183 hmap_destroy (&s->areas[at]);
5187 ctables_table_clear (struct ctables_table *t)
5189 for (size_t i = 0; i < t->n_sections; i++)
5190 ctables_section_clear (&t->sections[i]);
5192 if (t->clabels_example)
5194 int width = var_get_width (t->clabels_example);
5195 struct ctables_value *value, *next_value;
5196 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5197 &t->clabels_values_map)
5199 value_destroy (&value->value, width);
5200 hmap_delete (&t->clabels_values_map, &value->node);
5203 hmap_shrink (&t->clabels_values_map);
5205 free (t->clabels_values);
5206 t->clabels_values = NULL;
5207 t->n_clabels_values = 0;
5212 ctables_execute (struct dataset *ds, struct casereader *input,
5215 for (size_t i = 0; i < ct->n_tables; i++)
5217 struct ctables_table *t = ct->tables[i];
5218 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5219 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5220 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5221 sizeof *t->sections);
5222 size_t ix[PIVOT_N_AXES];
5223 ctables_table_add_section (t, 0, ix);
5226 struct dictionary *dict = dataset_dict (ds);
5228 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5229 struct casegrouper *grouper
5231 ? casegrouper_create_splits (input, dict)
5232 : casegrouper_create_vars (input, NULL, 0));
5233 struct casereader *group;
5234 while (casegrouper_get_next_group (grouper, &group))
5238 struct ccase *c = casereader_peek (group, 0);
5241 output_split_file_values (ds, c);
5246 bool warn_on_invalid = true;
5247 for (struct ccase *c = casereader_read (group); c;
5248 case_unref (c), c = casereader_read (group))
5250 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5251 double e_weight = (ct->e_weight
5252 ? var_force_valid_weight (ct->e_weight,
5253 case_num (c, ct->e_weight),
5257 [CTW_DICTIONARY] = d_weight,
5258 [CTW_EFFECTIVE] = e_weight,
5259 [CTW_UNWEIGHTED] = 1.0,
5262 for (size_t i = 0; i < ct->n_tables; i++)
5264 struct ctables_table *t = ct->tables[i];
5266 for (size_t j = 0; j < t->n_sections; j++)
5267 ctables_cell_insert (&t->sections[j], c, weight);
5269 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5270 if (t->label_axis[a] != a)
5271 ctables_insert_clabels_values (t, c, a);
5274 casereader_destroy (group);
5276 for (size_t i = 0; i < ct->n_tables; i++)
5278 struct ctables_table *t = ct->tables[i];
5280 if (t->clabels_example)
5281 ctables_sort_clabels_values (t);
5283 for (size_t j = 0; j < t->n_sections; j++)
5284 ctables_section_add_empty_categories (&t->sections[j]);
5286 ctables_table_output (ct, t);
5287 ctables_table_clear (t);
5290 return casegrouper_destroy (grouper);
5295 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5296 struct dictionary *);
5299 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5305 case CTPO_CAT_STRING:
5306 ss_dealloc (&e->string);
5309 case CTPO_CAT_SRANGE:
5310 for (size_t i = 0; i < 2; i++)
5311 ss_dealloc (&e->srange[i]);
5320 for (size_t i = 0; i < 2; i++)
5321 ctables_pcexpr_destroy (e->subs[i]);
5325 case CTPO_CAT_NUMBER:
5326 case CTPO_CAT_NRANGE:
5327 case CTPO_CAT_MISSING:
5328 case CTPO_CAT_OTHERNM:
5329 case CTPO_CAT_SUBTOTAL:
5330 case CTPO_CAT_TOTAL:
5334 msg_location_destroy (e->location);
5339 static struct ctables_pcexpr *
5340 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5341 struct ctables_pcexpr *sub0,
5342 struct ctables_pcexpr *sub1)
5344 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5345 *e = (struct ctables_pcexpr) {
5347 .subs = { sub0, sub1 },
5348 .location = msg_location_merged (sub0->location, sub1->location),
5353 /* How to parse an operator. */
5356 enum token_type token;
5357 enum ctables_postcompute_op op;
5360 static const struct operator *
5361 ctables_pcexpr_match_operator (struct lexer *lexer,
5362 const struct operator ops[], size_t n_ops)
5364 for (const struct operator *op = ops; op < ops + n_ops; op++)
5365 if (lex_token (lexer) == op->token)
5367 if (op->token != T_NEG_NUM)
5376 static struct ctables_pcexpr *
5377 ctables_pcexpr_parse_binary_operators__ (
5378 struct lexer *lexer, struct dictionary *dict,
5379 const struct operator ops[], size_t n_ops,
5380 parse_recursively_func *parse_next_level,
5381 const char *chain_warning, struct ctables_pcexpr *lhs)
5383 for (int op_count = 0; ; op_count++)
5385 const struct operator *op
5386 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5389 if (op_count > 1 && chain_warning)
5390 msg_at (SW, lhs->location, "%s", chain_warning);
5395 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5398 ctables_pcexpr_destroy (lhs);
5402 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5406 static struct ctables_pcexpr *
5407 ctables_pcexpr_parse_binary_operators (
5408 struct lexer *lexer, struct dictionary *dict,
5409 const struct operator ops[], size_t n_ops,
5410 parse_recursively_func *parse_next_level, const char *chain_warning)
5412 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5416 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5418 chain_warning, lhs);
5421 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5422 struct dictionary *);
5424 static struct ctables_pcexpr
5425 ctpo_cat_nrange (double low, double high)
5427 return (struct ctables_pcexpr) {
5428 .op = CTPO_CAT_NRANGE,
5429 .nrange = { low, high },
5433 static struct ctables_pcexpr
5434 ctpo_cat_srange (struct substring low, struct substring high)
5436 return (struct ctables_pcexpr) {
5437 .op = CTPO_CAT_SRANGE,
5438 .srange = { low, high },
5442 static struct ctables_pcexpr *
5443 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5445 int start_ofs = lex_ofs (lexer);
5446 struct ctables_pcexpr e;
5447 if (lex_is_number (lexer))
5449 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5450 .number = lex_number (lexer) };
5453 else if (lex_match_id (lexer, "MISSING"))
5454 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5455 else if (lex_match_id (lexer, "OTHERNM"))
5456 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5457 else if (lex_match_id (lexer, "TOTAL"))
5458 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5459 else if (lex_match_id (lexer, "SUBTOTAL"))
5461 size_t subtotal_index = 0;
5462 if (lex_match (lexer, T_LBRACK))
5464 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5466 subtotal_index = lex_integer (lexer);
5468 if (!lex_force_match (lexer, T_RBRACK))
5471 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5472 .subtotal_index = subtotal_index };
5474 else if (lex_match (lexer, T_LBRACK))
5476 if (lex_match_id (lexer, "LO"))
5478 if (!lex_force_match_id (lexer, "THRU"))
5481 if (lex_is_string (lexer))
5483 struct substring low = { .string = NULL };
5484 struct substring high = parse_substring (lexer, dict);
5485 e = ctpo_cat_srange (low, high);
5489 if (!lex_force_num (lexer))
5491 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5495 else if (lex_is_number (lexer))
5497 double number = lex_number (lexer);
5499 if (lex_match_id (lexer, "THRU"))
5501 if (lex_match_id (lexer, "HI"))
5502 e = ctpo_cat_nrange (number, DBL_MAX);
5505 if (!lex_force_num (lexer))
5507 e = ctpo_cat_nrange (number, lex_number (lexer));
5512 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5515 else if (lex_is_string (lexer))
5517 struct substring s = parse_substring (lexer, dict);
5519 if (lex_match_id (lexer, "THRU"))
5521 struct substring high;
5523 if (lex_match_id (lexer, "HI"))
5524 high = (struct substring) { .string = NULL };
5527 if (!lex_force_string (lexer))
5532 high = parse_substring (lexer, dict);
5535 e = ctpo_cat_srange (s, high);
5538 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5542 lex_error (lexer, NULL);
5546 if (!lex_force_match (lexer, T_RBRACK))
5548 if (e.op == CTPO_CAT_STRING)
5549 ss_dealloc (&e.string);
5550 else if (e.op == CTPO_CAT_SRANGE)
5552 ss_dealloc (&e.srange[0]);
5553 ss_dealloc (&e.srange[1]);
5558 else if (lex_match (lexer, T_LPAREN))
5560 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5563 if (!lex_force_match (lexer, T_RPAREN))
5565 ctables_pcexpr_destroy (ep);
5572 lex_error (lexer, NULL);
5576 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5577 return xmemdup (&e, sizeof e);
5580 static struct ctables_pcexpr *
5581 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5582 struct lexer *lexer, int start_ofs)
5584 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5585 *e = (struct ctables_pcexpr) {
5588 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5593 static struct ctables_pcexpr *
5594 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5596 static const struct operator op = { T_EXP, CTPO_POW };
5598 const char *chain_warning =
5599 _("The exponentiation operator (`**') is left-associative: "
5600 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5601 "To disable this warning, insert parentheses.");
5603 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5604 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5605 ctables_pcexpr_parse_primary,
5608 /* Special case for situations like "-5**6", which must be parsed as
5611 int start_ofs = lex_ofs (lexer);
5612 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5613 *lhs = (struct ctables_pcexpr) {
5614 .op = CTPO_CONSTANT,
5615 .number = -lex_tokval (lexer),
5616 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5620 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5621 lexer, dict, &op, 1,
5622 ctables_pcexpr_parse_primary, chain_warning, lhs);
5626 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5629 /* Parses the unary minus level. */
5630 static struct ctables_pcexpr *
5631 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5633 int start_ofs = lex_ofs (lexer);
5634 if (!lex_match (lexer, T_DASH))
5635 return ctables_pcexpr_parse_exp (lexer, dict);
5637 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5641 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5644 /* Parses the multiplication and division level. */
5645 static struct ctables_pcexpr *
5646 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5648 static const struct operator ops[] =
5650 { T_ASTERISK, CTPO_MUL },
5651 { T_SLASH, CTPO_DIV },
5654 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5655 sizeof ops / sizeof *ops,
5656 ctables_pcexpr_parse_neg, NULL);
5659 /* Parses the addition and subtraction level. */
5660 static struct ctables_pcexpr *
5661 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5663 static const struct operator ops[] =
5665 { T_PLUS, CTPO_ADD },
5666 { T_DASH, CTPO_SUB },
5667 { T_NEG_NUM, CTPO_ADD },
5670 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5671 ops, sizeof ops / sizeof *ops,
5672 ctables_pcexpr_parse_mul, NULL);
5675 static struct ctables_postcompute *
5676 ctables_find_postcompute (struct ctables *ct, const char *name)
5678 struct ctables_postcompute *pc;
5679 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5680 utf8_hash_case_string (name, 0), &ct->postcomputes)
5681 if (!utf8_strcasecmp (pc->name, name))
5687 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5690 int pcompute_start = lex_ofs (lexer) - 1;
5692 if (!lex_match (lexer, T_AND))
5694 lex_error_expecting (lexer, "&");
5697 if (!lex_force_id (lexer))
5700 char *name = ss_xstrdup (lex_tokss (lexer));
5703 if (!lex_force_match (lexer, T_EQUALS)
5704 || !lex_force_match_id (lexer, "EXPR")
5705 || !lex_force_match (lexer, T_LPAREN))
5711 int expr_start = lex_ofs (lexer);
5712 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5713 int expr_end = lex_ofs (lexer) - 1;
5714 if (!expr || !lex_force_match (lexer, T_RPAREN))
5716 ctables_pcexpr_destroy (expr);
5720 int pcompute_end = lex_ofs (lexer) - 1;
5722 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5725 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5728 msg_at (SW, location, _("New definition of &%s will override the "
5729 "previous definition."),
5731 msg_at (SN, pc->location, _("This is the previous definition."));
5733 ctables_pcexpr_destroy (pc->expr);
5734 msg_location_destroy (pc->location);
5739 pc = xmalloc (sizeof *pc);
5740 *pc = (struct ctables_postcompute) { .name = name };
5741 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5742 utf8_hash_case_string (pc->name, 0));
5745 pc->location = location;
5747 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5752 ctables_parse_pproperties_format (struct lexer *lexer,
5753 struct ctables_summary_spec_set *sss)
5755 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5757 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5758 && !(lex_token (lexer) == T_ID
5759 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5760 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5761 lex_tokss (lexer)))))
5763 /* Parse function. */
5764 enum ctables_summary_function function;
5765 enum ctables_weighting weighting;
5766 enum ctables_area_type area;
5767 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5770 /* Parse percentile. */
5771 double percentile = 0;
5772 if (function == CTSF_PTILE)
5774 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5776 percentile = lex_number (lexer);
5781 struct fmt_spec format;
5782 bool is_ctables_format;
5783 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5786 if (sss->n >= sss->allocated)
5787 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5788 sizeof *sss->specs);
5789 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5790 .function = function,
5791 .weighting = weighting,
5794 .percentile = percentile,
5796 .is_ctables_format = is_ctables_format,
5802 ctables_summary_spec_set_uninit (sss);
5807 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5809 struct ctables_postcompute **pcs = NULL;
5811 size_t allocated_pcs = 0;
5813 while (lex_match (lexer, T_AND))
5815 if (!lex_force_id (lexer))
5817 struct ctables_postcompute *pc
5818 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5821 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5826 if (n_pcs >= allocated_pcs)
5827 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5831 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5833 if (lex_match_id (lexer, "LABEL"))
5835 lex_match (lexer, T_EQUALS);
5836 if (!lex_force_string (lexer))
5839 for (size_t i = 0; i < n_pcs; i++)
5841 free (pcs[i]->label);
5842 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5847 else if (lex_match_id (lexer, "FORMAT"))
5849 lex_match (lexer, T_EQUALS);
5851 struct ctables_summary_spec_set sss;
5852 if (!ctables_parse_pproperties_format (lexer, &sss))
5855 for (size_t i = 0; i < n_pcs; i++)
5858 ctables_summary_spec_set_uninit (pcs[i]->specs);
5860 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5861 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5863 ctables_summary_spec_set_uninit (&sss);
5865 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5867 lex_match (lexer, T_EQUALS);
5868 bool hide_source_cats;
5869 if (!parse_bool (lexer, &hide_source_cats))
5871 for (size_t i = 0; i < n_pcs; i++)
5872 pcs[i]->hide_source_cats = hide_source_cats;
5876 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5889 put_strftime (struct string *out, time_t now, const char *format)
5891 const struct tm *tm = localtime (&now);
5893 strftime (value, sizeof value, format, tm);
5894 ds_put_cstr (out, value);
5898 skip_prefix (struct substring *s, struct substring prefix)
5900 if (ss_starts_with (*s, prefix))
5902 ss_advance (s, prefix.length);
5910 put_table_expression (struct string *out, struct lexer *lexer,
5911 struct dictionary *dict, int expr_start, int expr_end)
5914 for (int ofs = expr_start; ofs < expr_end; ofs++)
5916 const struct token *t = lex_ofs_token (lexer, ofs);
5917 if (t->type == T_LBRACK)
5919 else if (t->type == T_RBRACK && nest > 0)
5925 else if (t->type == T_ID)
5927 const struct variable *var
5928 = dict_lookup_var (dict, t->string.string);
5929 const char *label = var ? var_get_label (var) : NULL;
5930 ds_put_cstr (out, label ? label : t->string.string);
5934 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5935 ds_put_byte (out, ' ');
5937 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5938 ds_put_cstr (out, repr);
5941 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5942 ds_put_byte (out, ' ');
5948 put_title_text (struct string *out, struct substring in, time_t now,
5949 struct lexer *lexer, struct dictionary *dict,
5950 int expr_start, int expr_end)
5954 size_t chunk = ss_find_byte (in, ')');
5955 ds_put_substring (out, ss_head (in, chunk));
5956 ss_advance (&in, chunk);
5957 if (ss_is_empty (in))
5960 if (skip_prefix (&in, ss_cstr (")DATE")))
5961 put_strftime (out, now, "%x");
5962 else if (skip_prefix (&in, ss_cstr (")TIME")))
5963 put_strftime (out, now, "%X");
5964 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5965 put_table_expression (out, lexer, dict, expr_start, expr_end);
5968 ds_put_byte (out, ')');
5969 ss_advance (&in, 1);
5975 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5977 struct casereader *input = NULL;
5979 struct measure_guesser *mg = measure_guesser_create (ds);
5982 input = proc_open (ds);
5983 measure_guesser_run (mg, input);
5984 measure_guesser_destroy (mg);
5987 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5988 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5989 enum settings_value_show tvars = settings_get_show_variables ();
5990 for (size_t i = 0; i < n_vars; i++)
5991 vlabels[i] = (enum ctables_vlabel) tvars;
5993 struct pivot_table_look *look = pivot_table_look_unshare (
5994 pivot_table_look_ref (pivot_table_look_get_default ()));
5995 look->omit_empty = false;
5997 struct ctables *ct = xmalloc (sizeof *ct);
5998 *ct = (struct ctables) {
5999 .dict = dataset_dict (ds),
6001 .ctables_formats = FMT_SETTINGS_INIT,
6003 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6006 time_t now = time (NULL);
6011 const char *dot_string;
6012 const char *comma_string;
6014 static const struct ctf ctfs[4] = {
6015 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6016 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6017 { CTEF_PAREN, "-,(,),", "-.(.)." },
6018 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6020 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6021 for (size_t i = 0; i < 4; i++)
6023 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6024 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6025 fmt_number_style_from_string (s));
6028 if (!lex_force_match (lexer, T_SLASH))
6031 while (!lex_match_id (lexer, "TABLE"))
6033 if (lex_match_id (lexer, "FORMAT"))
6035 double widths[2] = { SYSMIS, SYSMIS };
6036 double units_per_inch = 72.0;
6038 while (lex_token (lexer) != T_SLASH)
6040 if (lex_match_id (lexer, "MINCOLWIDTH"))
6042 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6045 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6047 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6050 else if (lex_match_id (lexer, "UNITS"))
6052 lex_match (lexer, T_EQUALS);
6053 if (lex_match_id (lexer, "POINTS"))
6054 units_per_inch = 72.0;
6055 else if (lex_match_id (lexer, "INCHES"))
6056 units_per_inch = 1.0;
6057 else if (lex_match_id (lexer, "CM"))
6058 units_per_inch = 2.54;
6061 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6065 else if (lex_match_id (lexer, "EMPTY"))
6070 lex_match (lexer, T_EQUALS);
6071 if (lex_match_id (lexer, "ZERO"))
6073 /* Nothing to do. */
6075 else if (lex_match_id (lexer, "BLANK"))
6076 ct->zero = xstrdup ("");
6077 else if (lex_force_string (lexer))
6079 ct->zero = ss_xstrdup (lex_tokss (lexer));
6085 else if (lex_match_id (lexer, "MISSING"))
6087 lex_match (lexer, T_EQUALS);
6088 if (!lex_force_string (lexer))
6092 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6093 ? ss_xstrdup (lex_tokss (lexer))
6099 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6100 "UNITS", "EMPTY", "MISSING");
6105 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6106 && widths[0] > widths[1])
6108 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6112 for (size_t i = 0; i < 2; i++)
6113 if (widths[i] != SYSMIS)
6115 int *wr = ct->look->width_ranges[TABLE_HORZ];
6116 wr[i] = widths[i] / units_per_inch * 96.0;
6121 else if (lex_match_id (lexer, "VLABELS"))
6123 if (!lex_force_match_id (lexer, "VARIABLES"))
6125 lex_match (lexer, T_EQUALS);
6127 struct variable **vars;
6129 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6133 if (!lex_force_match_id (lexer, "DISPLAY"))
6138 lex_match (lexer, T_EQUALS);
6140 enum ctables_vlabel vlabel;
6141 if (lex_match_id (lexer, "DEFAULT"))
6142 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6143 else if (lex_match_id (lexer, "NAME"))
6145 else if (lex_match_id (lexer, "LABEL"))
6146 vlabel = CTVL_LABEL;
6147 else if (lex_match_id (lexer, "BOTH"))
6149 else if (lex_match_id (lexer, "NONE"))
6153 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6159 for (size_t i = 0; i < n_vars; i++)
6160 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6163 else if (lex_match_id (lexer, "MRSETS"))
6165 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6167 lex_match (lexer, T_EQUALS);
6168 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6171 else if (lex_match_id (lexer, "SMISSING"))
6173 if (lex_match_id (lexer, "VARIABLE"))
6174 ct->smissing_listwise = false;
6175 else if (lex_match_id (lexer, "LISTWISE"))
6176 ct->smissing_listwise = true;
6179 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6183 else if (lex_match_id (lexer, "PCOMPUTE"))
6185 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6188 else if (lex_match_id (lexer, "PPROPERTIES"))
6190 if (!ctables_parse_pproperties (lexer, ct))
6193 else if (lex_match_id (lexer, "WEIGHT"))
6195 if (!lex_force_match_id (lexer, "VARIABLE"))
6197 lex_match (lexer, T_EQUALS);
6198 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6202 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6204 if (lex_match_id (lexer, "COUNT"))
6206 lex_match (lexer, T_EQUALS);
6207 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6210 ct->hide_threshold = lex_integer (lexer);
6213 else if (ct->hide_threshold == 0)
6214 ct->hide_threshold = 5;
6218 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6219 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6220 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6224 if (!lex_force_match (lexer, T_SLASH))
6228 size_t allocated_tables = 0;
6231 if (ct->n_tables >= allocated_tables)
6232 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6233 sizeof *ct->tables);
6235 struct ctables_category *cat = xmalloc (sizeof *cat);
6236 *cat = (struct ctables_category) {
6238 .include_missing = false,
6239 .sort_ascending = true,
6242 struct ctables_categories *c = xmalloc (sizeof *c);
6243 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6244 *c = (struct ctables_categories) {
6251 struct ctables_categories **categories = xnmalloc (n_vars,
6252 sizeof *categories);
6253 for (size_t i = 0; i < n_vars; i++)
6256 struct ctables_table *t = xmalloc (sizeof *t);
6257 *t = (struct ctables_table) {
6259 .slabels_axis = PIVOT_AXIS_COLUMN,
6260 .slabels_visible = true,
6261 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6263 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6264 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6265 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6267 .clabels_from_axis = PIVOT_AXIS_LAYER,
6268 .clabels_to_axis = PIVOT_AXIS_LAYER,
6269 .categories = categories,
6270 .n_categories = n_vars,
6273 ct->tables[ct->n_tables++] = t;
6275 lex_match (lexer, T_EQUALS);
6276 int expr_start = lex_ofs (lexer);
6277 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6279 if (lex_match (lexer, T_BY))
6281 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6282 ct, t, PIVOT_AXIS_COLUMN))
6285 if (lex_match (lexer, T_BY))
6287 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6288 ct, t, PIVOT_AXIS_LAYER))
6292 int expr_end = lex_ofs (lexer);
6294 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6295 && !t->axes[PIVOT_AXIS_LAYER])
6297 lex_error (lexer, _("At least one variable must be specified."));
6301 const struct ctables_axis *scales[PIVOT_N_AXES];
6302 size_t n_scales = 0;
6303 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6305 scales[a] = find_scale (t->axes[a]);
6311 msg (SE, _("Scale variables may appear only on one axis."));
6312 if (scales[PIVOT_AXIS_ROW])
6313 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6314 _("This scale variable appears on the rows axis."));
6315 if (scales[PIVOT_AXIS_COLUMN])
6316 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6317 _("This scale variable appears on the columns axis."));
6318 if (scales[PIVOT_AXIS_LAYER])
6319 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6320 _("This scale variable appears on the layer axis."));
6324 const struct ctables_axis *summaries[PIVOT_N_AXES];
6325 size_t n_summaries = 0;
6326 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6328 summaries[a] = (scales[a]
6330 : find_categorical_summary_spec (t->axes[a]));
6334 if (n_summaries > 1)
6336 msg (SE, _("Summaries may appear only on one axis."));
6337 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6340 msg_at (SN, summaries[a]->loc,
6342 ? _("This variable on the rows axis has a summary.")
6343 : a == PIVOT_AXIS_COLUMN
6344 ? _("This variable on the columns axis has a summary.")
6345 : _("This variable on the layers axis has a summary."));
6347 msg_at (SN, summaries[a]->loc,
6348 _("This is a scale variable, so it always has a "
6349 "summary even if the syntax does not explicitly "
6354 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6355 if (n_summaries ? summaries[a] : t->axes[a])
6357 t->summary_axis = a;
6361 if (lex_token (lexer) == T_ENDCMD)
6363 if (!ctables_prepare_table (t))
6367 if (!lex_force_match (lexer, T_SLASH))
6370 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6372 if (lex_match_id (lexer, "SLABELS"))
6374 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6376 if (lex_match_id (lexer, "POSITION"))
6378 lex_match (lexer, T_EQUALS);
6379 if (lex_match_id (lexer, "COLUMN"))
6380 t->slabels_axis = PIVOT_AXIS_COLUMN;
6381 else if (lex_match_id (lexer, "ROW"))
6382 t->slabels_axis = PIVOT_AXIS_ROW;
6383 else if (lex_match_id (lexer, "LAYER"))
6384 t->slabels_axis = PIVOT_AXIS_LAYER;
6387 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6391 else if (lex_match_id (lexer, "VISIBLE"))
6393 lex_match (lexer, T_EQUALS);
6394 if (!parse_bool (lexer, &t->slabels_visible))
6399 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6404 else if (lex_match_id (lexer, "CLABELS"))
6406 if (lex_match_id (lexer, "AUTO"))
6408 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6409 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6411 else if (lex_match_id (lexer, "ROWLABELS"))
6413 lex_match (lexer, T_EQUALS);
6414 if (lex_match_id (lexer, "OPPOSITE"))
6415 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6416 else if (lex_match_id (lexer, "LAYER"))
6417 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6420 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6424 else if (lex_match_id (lexer, "COLLABELS"))
6426 lex_match (lexer, T_EQUALS);
6427 if (lex_match_id (lexer, "OPPOSITE"))
6428 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6429 else if (lex_match_id (lexer, "LAYER"))
6430 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6433 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6439 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6444 else if (lex_match_id (lexer, "CRITERIA"))
6446 if (!lex_force_match_id (lexer, "CILEVEL"))
6448 lex_match (lexer, T_EQUALS);
6450 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6452 t->cilevel = lex_number (lexer);
6455 else if (lex_match_id (lexer, "CATEGORIES"))
6457 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6461 else if (lex_match_id (lexer, "TITLES"))
6466 if (lex_match_id (lexer, "CAPTION"))
6467 textp = &t->caption;
6468 else if (lex_match_id (lexer, "CORNER"))
6470 else if (lex_match_id (lexer, "TITLE"))
6474 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6477 lex_match (lexer, T_EQUALS);
6479 struct string s = DS_EMPTY_INITIALIZER;
6480 while (lex_is_string (lexer))
6482 if (!ds_is_empty (&s))
6483 ds_put_byte (&s, ' ');
6484 put_title_text (&s, lex_tokss (lexer), now,
6485 lexer, dataset_dict (ds),
6486 expr_start, expr_end);
6490 *textp = ds_steal_cstr (&s);
6492 while (lex_token (lexer) != T_SLASH
6493 && lex_token (lexer) != T_ENDCMD);
6495 else if (lex_match_id (lexer, "SIGTEST"))
6499 t->chisq = xmalloc (sizeof *t->chisq);
6500 *t->chisq = (struct ctables_chisq) {
6502 .include_mrsets = true,
6503 .all_visible = true,
6509 if (lex_match_id (lexer, "TYPE"))
6511 lex_match (lexer, T_EQUALS);
6512 if (!lex_force_match_id (lexer, "CHISQUARE"))
6515 else if (lex_match_id (lexer, "ALPHA"))
6517 lex_match (lexer, T_EQUALS);
6518 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6520 t->chisq->alpha = lex_number (lexer);
6523 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6525 lex_match (lexer, T_EQUALS);
6526 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6529 else if (lex_match_id (lexer, "CATEGORIES"))
6531 lex_match (lexer, T_EQUALS);
6532 if (lex_match_id (lexer, "ALLVISIBLE"))
6533 t->chisq->all_visible = true;
6534 else if (lex_match_id (lexer, "SUBTOTALS"))
6535 t->chisq->all_visible = false;
6538 lex_error_expecting (lexer,
6539 "ALLVISIBLE", "SUBTOTALS");
6545 lex_error_expecting (lexer, "TYPE", "ALPHA",
6546 "INCLUDEMRSETS", "CATEGORIES");
6550 while (lex_token (lexer) != T_SLASH
6551 && lex_token (lexer) != T_ENDCMD);
6553 else if (lex_match_id (lexer, "COMPARETEST"))
6557 t->pairwise = xmalloc (sizeof *t->pairwise);
6558 *t->pairwise = (struct ctables_pairwise) {
6560 .alpha = { .05, .05 },
6561 .adjust = BONFERRONI,
6562 .include_mrsets = true,
6563 .meansvariance_allcats = true,
6564 .all_visible = true,
6573 if (lex_match_id (lexer, "TYPE"))
6575 lex_match (lexer, T_EQUALS);
6576 if (lex_match_id (lexer, "PROP"))
6577 t->pairwise->type = PROP;
6578 else if (lex_match_id (lexer, "MEAN"))
6579 t->pairwise->type = MEAN;
6582 lex_error_expecting (lexer, "PROP", "MEAN");
6586 else if (lex_match_id (lexer, "ALPHA"))
6588 lex_match (lexer, T_EQUALS);
6590 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6592 double a0 = lex_number (lexer);
6595 lex_match (lexer, T_COMMA);
6596 if (lex_is_number (lexer))
6598 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6600 double a1 = lex_number (lexer);
6603 t->pairwise->alpha[0] = MIN (a0, a1);
6604 t->pairwise->alpha[1] = MAX (a0, a1);
6607 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6609 else if (lex_match_id (lexer, "ADJUST"))
6611 lex_match (lexer, T_EQUALS);
6612 if (lex_match_id (lexer, "BONFERRONI"))
6613 t->pairwise->adjust = BONFERRONI;
6614 else if (lex_match_id (lexer, "BH"))
6615 t->pairwise->adjust = BH;
6616 else if (lex_match_id (lexer, "NONE"))
6617 t->pairwise->adjust = 0;
6620 lex_error_expecting (lexer, "BONFERRONI", "BH",
6625 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6627 lex_match (lexer, T_EQUALS);
6628 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6631 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6633 lex_match (lexer, T_EQUALS);
6634 if (lex_match_id (lexer, "ALLCATS"))
6635 t->pairwise->meansvariance_allcats = true;
6636 else if (lex_match_id (lexer, "TESTEDCATS"))
6637 t->pairwise->meansvariance_allcats = false;
6640 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6644 else if (lex_match_id (lexer, "CATEGORIES"))
6646 lex_match (lexer, T_EQUALS);
6647 if (lex_match_id (lexer, "ALLVISIBLE"))
6648 t->pairwise->all_visible = true;
6649 else if (lex_match_id (lexer, "SUBTOTALS"))
6650 t->pairwise->all_visible = false;
6653 lex_error_expecting (lexer, "ALLVISIBLE",
6658 else if (lex_match_id (lexer, "MERGE"))
6660 lex_match (lexer, T_EQUALS);
6661 if (!parse_bool (lexer, &t->pairwise->merge))
6664 else if (lex_match_id (lexer, "STYLE"))
6666 lex_match (lexer, T_EQUALS);
6667 if (lex_match_id (lexer, "APA"))
6668 t->pairwise->apa_style = true;
6669 else if (lex_match_id (lexer, "SIMPLE"))
6670 t->pairwise->apa_style = false;
6673 lex_error_expecting (lexer, "APA", "SIMPLE");
6677 else if (lex_match_id (lexer, "SHOWSIG"))
6679 lex_match (lexer, T_EQUALS);
6680 if (!parse_bool (lexer, &t->pairwise->show_sig))
6685 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6686 "INCLUDEMRSETS", "MEANSVARIANCE",
6687 "CATEGORIES", "MERGE", "STYLE",
6692 while (lex_token (lexer) != T_SLASH
6693 && lex_token (lexer) != T_ENDCMD);
6697 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6698 "CRITERIA", "CATEGORIES", "TITLES",
6699 "SIGTEST", "COMPARETEST");
6703 if (!lex_match (lexer, T_SLASH))
6707 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6709 t->clabels_from_axis = PIVOT_AXIS_ROW;
6710 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6712 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6716 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6717 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6718 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6720 if (!ctables_prepare_table (t))
6723 while (lex_token (lexer) != T_ENDCMD);
6726 input = proc_open (ds);
6727 bool ok = ctables_execute (ds, input, ct);
6728 ok = proc_commit (ds) && ok;
6730 ctables_destroy (ct);
6731 return ok ? CMD_SUCCESS : CMD_FAILURE;
6736 ctables_destroy (ct);