1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_weighting
74 enum ctables_function_type
76 /* A function that operates on data in a single cell. It operates on
77 effective weights. It does not have an unweighted version. */
80 /* A function that operates on data in a single cell. The function
81 operates on effective weights and has a U-prefixed unweighted
85 /* A function that operates on data in a single cell. It operates on
86 dictionary weights, and has U-prefixed unweighted version and an
87 E-prefixed effective weight version. */
90 /* A function that operates on an area of cells. It operates on effective
91 weights and has a U-prefixed unweighted version. */
102 enum ctables_function_availability
104 CTFA_ALL, /* Any variables. */
105 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
106 //CTFA_MRSETS, /* Only multiple-response sets */
109 enum ctables_summary_function
111 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
112 #include "ctables.inc"
117 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
119 #include "ctables.inc"
123 struct ctables_function_info
125 struct substring basename;
126 enum ctables_function_type type;
127 enum ctables_format format;
128 enum ctables_function_availability availability;
130 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
131 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
132 bool is_area; /* Needs an area prefix. */
134 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
135 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
137 .basename = SS_LITERAL_INITIALIZER (NAME), \
140 .availability = AVAILABILITY, \
141 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
142 .e_prefix = (TYPE) == CTFT_UECELL, \
143 .is_area = (TYPE) == CTFT_AREA \
145 #include "ctables.inc"
149 enum ctables_area_type
151 /* Within a section, where stacked variables divide one section from
154 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
155 parse_ctables_summary_function() parses correctly. */
156 CTAT_TABLE, /* All layers of a whole section. */
157 CTAT_LAYERROW, /* Row in one layer within a section. */
158 CTAT_LAYERCOL, /* Column in one layer within a section. */
159 CTAT_LAYER, /* One layer within a section. */
161 /* Within a subtable, where a subtable pairs an innermost row variable with
162 an innermost column variable within a single layer. */
163 CTAT_SUBTABLE, /* Whole subtable. */
164 CTAT_ROW, /* Row within a subtable. */
165 CTAT_COL, /* Column within a subtable. */
169 static const char *ctables_area_type_name[N_CTATS] = {
170 [CTAT_TABLE] = "TABLE",
171 [CTAT_LAYER] = "LAYER",
172 [CTAT_LAYERROW] = "LAYERROW",
173 [CTAT_LAYERCOL] = "LAYERCOL",
174 [CTAT_SUBTABLE] = "SUBTABLE",
181 struct hmap_node node;
183 const struct ctables_cell *example;
186 double count[N_CTWS];
187 double valid[N_CTWS];
188 double total[N_CTWS];
189 struct ctables_sum *sums;
197 enum ctables_summary_variant
206 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
207 all the axes (except the scalar variable, if any). */
208 struct hmap_node node;
210 /* The areas that contain this cell. */
212 struct ctables_area *areas[N_CTATS];
217 enum ctables_summary_variant sv;
219 struct ctables_cell_axis
221 struct ctables_cell_value
223 const struct ctables_category *category;
231 union ctables_summary *summaries;
238 const struct dictionary *dict;
239 struct pivot_table_look *look;
241 /* CTABLES has a number of extra formats that we implement via custom
242 currency specifications on an alternate fmt_settings. */
243 #define CTEF_NEGPAREN FMT_CCA
244 #define CTEF_NEQUAL FMT_CCB
245 #define CTEF_PAREN FMT_CCC
246 #define CTEF_PCTPAREN FMT_CCD
247 struct fmt_settings ctables_formats;
249 /* If this is NULL, zeros are displayed using the normal print format.
250 Otherwise, this string is displayed. */
253 /* If this is NULL, missing values are displayed using the normal print
254 format. Otherwise, this string is displayed. */
257 /* Indexed by variable dictionary index. */
258 enum ctables_vlabel *vlabels;
260 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
262 bool mrsets_count_duplicates; /* MRSETS. */
263 bool smissing_listwise; /* SMISSING. */
264 struct variable *e_weight; /* WEIGHT. */
265 int hide_threshold; /* HIDESMALLCOUNTS. */
267 struct ctables_table **tables;
271 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
274 struct ctables_postcompute
276 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
277 char *name; /* Name, without leading &. */
279 struct msg_location *location; /* Location of definition. */
280 struct ctables_pcexpr *expr;
282 struct ctables_summary_spec_set *specs;
283 bool hide_source_cats;
286 struct ctables_pcexpr
296 enum ctables_postcompute_op
299 CTPO_CONSTANT, /* 5 */
300 CTPO_CAT_NUMBER, /* [5] */
301 CTPO_CAT_STRING, /* ["STRING"] */
302 CTPO_CAT_NRANGE, /* [LO THRU 5] */
303 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
304 CTPO_CAT_MISSING, /* MISSING */
305 CTPO_CAT_OTHERNM, /* OTHERNM */
306 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
307 CTPO_CAT_TOTAL, /* TOTAL */
321 /* CTPO_CAT_NUMBER. */
324 /* CTPO_CAT_STRING, in dictionary encoding. */
325 struct substring string;
327 /* CTPO_CAT_NRANGE. */
330 /* CTPO_CAT_SRANGE. */
331 struct substring srange[2];
333 /* CTPO_CAT_SUBTOTAL. */
334 size_t subtotal_index;
336 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
337 One element: CTPO_NEG. */
338 struct ctables_pcexpr *subs[2];
341 /* Source location. */
342 struct msg_location *location;
345 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
346 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
347 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
348 struct ctables_pcexpr *sub1);
350 struct ctables_summary_spec_set
352 struct ctables_summary_spec *specs;
356 /* The variable to which the summary specs are applied. */
357 struct variable *var;
359 /* Whether the variable to which the summary specs are applied is a scale
360 variable for the purpose of summarization.
362 (VALIDN and TOTALN act differently for summarizing scale and categorical
366 /* If any of these optional additional scale variables are missing, then
367 treat 'var' as if it's missing too. This is for implementing
368 SMISSING=LISTWISE. */
369 struct variable **listwise_vars;
370 size_t n_listwise_vars;
373 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
374 const struct ctables_summary_spec_set *);
375 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
377 /* A nested sequence of variables, e.g. a > b > c. */
380 struct variable **vars;
384 size_t *areas[N_CTATS];
385 size_t n_areas[N_CTATS];
388 struct ctables_summary_spec_set specs[N_CSVS];
391 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
394 struct ctables_nest *nests;
398 static void ctables_stack_uninit (struct ctables_stack *);
402 struct hmap_node node;
407 struct ctables_occurrence
409 struct hmap_node node;
413 struct ctables_section
416 struct ctables_table *table;
417 struct ctables_nest *nests[PIVOT_N_AXES];
420 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
421 struct hmap cells; /* Contains "struct ctables_cell"s. */
422 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
425 static void ctables_section_uninit (struct ctables_section *);
429 struct ctables *ctables;
430 struct ctables_axis *axes[PIVOT_N_AXES];
431 struct ctables_stack stacks[PIVOT_N_AXES];
432 struct ctables_section *sections;
434 enum pivot_axis_type summary_axis;
435 struct ctables_summary_spec_set summary_specs;
436 struct variable **sum_vars;
439 enum pivot_axis_type slabels_axis;
440 bool slabels_visible;
442 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
444 Most commonly, label_axis[a] == a, and in particular we always have
445 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
447 If ROWLABELS or COLLABELS is specified, then one of
448 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
449 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
451 If any category labels are moved, then 'clabels_example' is one of the
452 variables being moved (and it is otherwise NULL). All of the variables
453 being moved have the same width, value labels, and categories, so this
454 example variable can be used to find those out.
456 The remaining members in this group are relevant only if category labels
459 'clabels_values_map' holds a "struct ctables_value" for all the values
460 that appear in all of the variables in the moved categories. It is
461 accumulated as the data is read. Once the data is fully read, its
462 sorted values are put into 'clabels_values' and 'n_clabels_values'.
464 enum pivot_axis_type label_axis[PIVOT_N_AXES];
465 enum pivot_axis_type clabels_from_axis;
466 enum pivot_axis_type clabels_to_axis;
467 const struct variable *clabels_example;
468 struct hmap clabels_values_map;
469 struct ctables_value **clabels_values;
470 size_t n_clabels_values;
472 /* Indexed by variable dictionary index. */
473 struct ctables_categories **categories;
482 struct ctables_chisq *chisq;
483 struct ctables_pairwise *pairwise;
486 struct ctables_categories
489 struct ctables_category *cats;
494 struct ctables_category
496 enum ctables_category_type
498 /* Explicit category lists. */
501 CCT_NRANGE, /* Numerical range. */
502 CCT_SRANGE, /* String range. */
507 /* Totals and subtotals. */
511 /* Implicit category lists. */
516 /* For contributing to TOTALN. */
517 CCT_EXCLUDED_MISSING,
521 struct ctables_category *subtotal;
527 double number; /* CCT_NUMBER. */
528 struct substring string; /* CCT_STRING, in dictionary encoding. */
529 double nrange[2]; /* CCT_NRANGE. */
530 struct substring srange[2]; /* CCT_SRANGE. */
534 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
535 bool hide_subcategories; /* CCT_SUBTOTAL. */
538 /* CCT_POSTCOMPUTE. */
541 const struct ctables_postcompute *pc;
542 enum fmt_type parse_format;
545 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
548 bool include_missing;
552 enum ctables_summary_function sort_function;
553 enum ctables_weighting weighting;
554 enum ctables_area_type area;
555 struct variable *sort_var;
560 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
561 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
562 struct msg_location *location;
566 ctables_category_uninit (struct ctables_category *cat)
571 msg_location_destroy (cat->location);
578 case CCT_POSTCOMPUTE:
582 ss_dealloc (&cat->string);
586 ss_dealloc (&cat->srange[0]);
587 ss_dealloc (&cat->srange[1]);
592 free (cat->total_label);
600 case CCT_EXCLUDED_MISSING:
606 nullable_substring_equal (const struct substring *a,
607 const struct substring *b)
609 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
613 ctables_category_equal (const struct ctables_category *a,
614 const struct ctables_category *b)
616 if (a->type != b->type)
622 return a->number == b->number;
625 return ss_equals (a->string, b->string);
628 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
631 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
632 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
638 case CCT_POSTCOMPUTE:
639 return a->pc == b->pc;
643 return !strcmp (a->total_label, b->total_label);
648 return (a->include_missing == b->include_missing
649 && a->sort_ascending == b->sort_ascending
650 && a->sort_function == b->sort_function
651 && a->sort_var == b->sort_var
652 && a->percentile == b->percentile);
654 case CCT_EXCLUDED_MISSING:
662 ctables_categories_unref (struct ctables_categories *c)
667 assert (c->n_refs > 0);
671 for (size_t i = 0; i < c->n_cats; i++)
672 ctables_category_uninit (&c->cats[i]);
678 ctables_categories_equal (const struct ctables_categories *a,
679 const struct ctables_categories *b)
681 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
684 for (size_t i = 0; i < a->n_cats; i++)
685 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
691 /* Chi-square test (SIGTEST). */
699 /* Pairwise comparison test (COMPARETEST). */
700 struct ctables_pairwise
702 enum { PROP, MEAN } type;
705 bool meansvariance_allcats;
707 enum { BONFERRONI = 1, BH } adjust;
731 struct variable *var;
733 struct ctables_summary_spec_set specs[N_CSVS];
737 struct ctables_axis *subs[2];
740 struct msg_location *loc;
743 static void ctables_axis_destroy (struct ctables_axis *);
745 struct ctables_summary_spec
747 /* The calculation to be performed.
749 'function' is the function to calculate. 'weighted' specifies whether
750 to use weighted or unweighted data (for functions that do not support a
751 choice, it must be true). 'calc_area' is the area over which the
752 calculation takes place (for functions that target only an individual
753 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
754 percentile between 0 and 100 (for other functions it must be 0). */
755 enum ctables_summary_function function;
756 enum ctables_weighting weighting;
757 enum ctables_area_type calc_area;
758 double percentile; /* CTSF_PTILE only. */
760 /* How to display the result of the calculation.
762 'label' is a user-specified label, NULL if the user didn't specify
765 'user_area' is usually the same as 'calc_area', but when category labels
766 are rotated from one axis to another it swaps rows and columns.
768 'format' is the format for displaying the output. If
769 'is_ctables_format' is true, then 'format.type' is one of the special
770 CTEF_* formats instead of the standard ones. */
772 enum ctables_area_type user_area;
773 struct fmt_spec format;
774 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
781 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
782 const struct ctables_summary_spec *src)
785 dst->label = xstrdup_if_nonnull (src->label);
789 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
796 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
797 const struct ctables_summary_spec_set *src)
799 struct ctables_summary_spec *specs
800 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
801 for (size_t i = 0; i < src->n; i++)
802 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
804 *dst = (struct ctables_summary_spec_set) {
809 .is_scale = src->is_scale,
814 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
816 for (size_t i = 0; i < set->n; i++)
817 ctables_summary_spec_uninit (&set->specs[i]);
818 free (set->listwise_vars);
823 parse_col_width (struct lexer *lexer, const char *name, double *width)
825 lex_match (lexer, T_EQUALS);
826 if (lex_match_id (lexer, "DEFAULT"))
828 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
830 *width = lex_number (lexer);
840 parse_bool (struct lexer *lexer, bool *b)
842 if (lex_match_id (lexer, "NO"))
844 else if (lex_match_id (lexer, "YES"))
848 lex_error_expecting (lexer, "YES", "NO");
854 static enum ctables_function_availability
855 ctables_function_availability (enum ctables_summary_function f)
857 static enum ctables_function_availability availability[] = {
858 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
859 #include "ctables.inc"
863 return availability[f];
867 parse_ctables_summary_function (struct lexer *lexer,
868 enum ctables_summary_function *function,
869 enum ctables_weighting *weighting,
870 enum ctables_area_type *area)
872 if (!lex_force_id (lexer))
875 struct substring name = lex_tokss (lexer);
876 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
877 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
879 bool has_area = false;
881 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
882 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
887 if (ss_equals_case (name, ss_cstr ("PCT")))
889 /* Special case where .COUNT suffix is omitted. */
890 *function = CTSF_areaPCT_COUNT;
891 *weighting = CTW_EFFECTIVE;
898 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
900 const struct ctables_function_info *cfi = &ctables_function_info[f];
901 if (ss_equals_case (cfi->basename, name))
904 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
907 *weighting = (e ? CTW_EFFECTIVE
909 : cfi->e_prefix ? CTW_DICTIONARY
916 lex_error (lexer, _("Expecting summary function name."));
921 ctables_axis_destroy (struct ctables_axis *axis)
929 for (size_t i = 0; i < N_CSVS; i++)
930 ctables_summary_spec_set_uninit (&axis->specs[i]);
935 ctables_axis_destroy (axis->subs[0]);
936 ctables_axis_destroy (axis->subs[1]);
939 msg_location_destroy (axis->loc);
943 static struct ctables_axis *
944 ctables_axis_new_nonterminal (enum ctables_axis_op op,
945 struct ctables_axis *sub0,
946 struct ctables_axis *sub1,
947 struct lexer *lexer, int start_ofs)
949 struct ctables_axis *axis = xmalloc (sizeof *axis);
950 *axis = (struct ctables_axis) {
952 .subs = { sub0, sub1 },
953 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
958 struct ctables_axis_parse_ctx
961 struct dictionary *dict;
963 struct ctables_table *t;
966 static struct fmt_spec
967 ctables_summary_default_format (enum ctables_summary_function function,
968 const struct variable *var)
970 static const enum ctables_format default_formats[] = {
971 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
972 #include "ctables.inc"
975 switch (default_formats[function])
978 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
981 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
984 return *var_get_print_format (var);
992 ctables_summary_label__ (const struct ctables_summary_spec *spec)
994 bool w = spec->weighting != CTW_UNWEIGHTED;
995 bool d = spec->weighting == CTW_DICTIONARY;
996 enum ctables_area_type a = spec->user_area;
997 switch (spec->function)
1000 return (d ? N_("Count")
1001 : w ? N_("Adjusted Count")
1002 : N_("Unweighted Count"));
1004 case CTSF_areaPCT_COUNT:
1007 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1008 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1009 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1010 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1011 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1012 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1013 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1017 case CTSF_areaPCT_VALIDN:
1020 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1021 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1022 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1023 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1024 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1025 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1026 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1030 case CTSF_areaPCT_TOTALN:
1033 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1034 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1035 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1036 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1037 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1038 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1039 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1043 case CTSF_MAXIMUM: return N_("Maximum");
1044 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1045 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
1046 case CTSF_MINIMUM: return N_("Minimum");
1047 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
1048 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
1049 case CTSF_PTILE: NOT_REACHED ();
1050 case CTSF_RANGE: return N_("Range");
1051 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
1052 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
1053 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
1054 case CTSF_TOTALN: return (d ? N_("Total N")
1055 : w ? N_("Adjusted Total N")
1056 : N_("Unweighted Total N"));
1057 case CTSF_VALIDN: return (d ? N_("Valid N")
1058 : w ? N_("Adjusted Valid N")
1059 : N_("Unweighted Valid N"));
1060 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
1061 case CTSF_areaPCT_SUM:
1064 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1065 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1066 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1067 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1068 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1069 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1070 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1077 /* Don't bother translating these: they are for developers only. */
1078 case CTAT_TABLE: return "Table ID";
1079 case CTAT_LAYER: return "Layer ID";
1080 case CTAT_LAYERROW: return "Layer Row ID";
1081 case CTAT_LAYERCOL: return "Layer Column ID";
1082 case CTAT_SUBTABLE: return "Subtable ID";
1083 case CTAT_ROW: return "Row ID";
1084 case CTAT_COL: return "Column ID";
1092 static struct pivot_value *
1093 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1097 if (spec->function == CTSF_PTILE)
1099 double p = spec->percentile;
1100 char *s = (spec->weighting != CTW_UNWEIGHTED
1101 ? xasprintf (_("Percentile %.2f"), p)
1102 : xasprintf (_("Unweighted Percentile %.2f"), p));
1103 return pivot_value_new_user_text_nocopy (s);
1106 return pivot_value_new_text (ctables_summary_label__ (spec));
1110 struct substring in = ss_cstr (spec->label);
1111 struct substring target = ss_cstr (")CILEVEL");
1113 struct string out = DS_EMPTY_INITIALIZER;
1116 size_t chunk = ss_find_substring (in, target);
1117 ds_put_substring (&out, ss_head (in, chunk));
1118 ss_advance (&in, chunk);
1120 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1122 ss_advance (&in, target.length);
1123 ds_put_format (&out, "%g", cilevel);
1129 ctables_summary_function_name (enum ctables_summary_function function,
1130 enum ctables_weighting weighting,
1131 enum ctables_area_type area,
1132 char *buffer, size_t bufsize)
1134 const struct ctables_function_info *cfi = &ctables_function_info[function];
1135 snprintf (buffer, bufsize, "%s%s%s",
1136 (weighting == CTW_UNWEIGHTED ? "U"
1137 : weighting == CTW_DICTIONARY ? ""
1138 : cfi->e_prefix ? "E"
1140 cfi->is_area ? ctables_area_type_name[area] : "",
1141 cfi->basename.string);
1146 add_summary_spec (struct ctables_axis *axis,
1147 enum ctables_summary_function function,
1148 enum ctables_weighting weighting,
1149 enum ctables_area_type area, double percentile,
1150 const char *label, const struct fmt_spec *format,
1151 bool is_ctables_format, const struct msg_location *loc,
1152 enum ctables_summary_variant sv)
1154 if (axis->op == CTAO_VAR)
1156 char function_name[128];
1157 ctables_summary_function_name (function, weighting, area,
1158 function_name, sizeof function_name);
1159 const char *var_name = var_get_name (axis->var);
1160 switch (ctables_function_availability (function))
1164 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1165 "response sets."), function_name);
1166 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1172 if (!axis->scale && sv != CSV_TOTAL)
1175 _("Summary function %s applies only to scale variables."),
1177 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1187 struct ctables_summary_spec_set *set = &axis->specs[sv];
1188 if (set->n >= set->allocated)
1189 set->specs = x2nrealloc (set->specs, &set->allocated,
1190 sizeof *set->specs);
1192 struct ctables_summary_spec *dst = &set->specs[set->n++];
1193 *dst = (struct ctables_summary_spec) {
1194 .function = function,
1195 .weighting = weighting,
1198 .percentile = percentile,
1199 .label = xstrdup_if_nonnull (label),
1200 .format = (format ? *format
1201 : ctables_summary_default_format (function, axis->var)),
1202 .is_ctables_format = is_ctables_format,
1208 for (size_t i = 0; i < 2; i++)
1209 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1210 percentile, label, format, is_ctables_format,
1217 static struct ctables_axis *ctables_axis_parse_stack (
1218 struct ctables_axis_parse_ctx *);
1221 static struct ctables_axis *
1222 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1224 if (lex_match (ctx->lexer, T_LPAREN))
1226 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1227 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1229 ctables_axis_destroy (sub);
1235 if (!lex_force_id (ctx->lexer))
1238 int start_ofs = lex_ofs (ctx->lexer);
1239 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1243 struct ctables_axis *axis = xmalloc (sizeof *axis);
1244 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1246 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1247 : lex_match_phrase (ctx->lexer, "[C]") ? false
1248 : var_get_measure (var) == MEASURE_SCALE);
1249 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1250 lex_ofs (ctx->lexer) - 1);
1251 if (axis->scale && var_is_alpha (var))
1253 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1255 var_get_name (var));
1256 ctables_axis_destroy (axis);
1264 has_digit (const char *s)
1266 return s[strcspn (s, "0123456789")] != '\0';
1270 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1271 bool *is_ctables_format)
1273 char type[FMT_TYPE_LEN_MAX + 1];
1274 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1277 if (!strcasecmp (type, "NEGPAREN"))
1278 format->type = CTEF_NEGPAREN;
1279 else if (!strcasecmp (type, "NEQUAL"))
1280 format->type = CTEF_NEQUAL;
1281 else if (!strcasecmp (type, "PAREN"))
1282 format->type = CTEF_PAREN;
1283 else if (!strcasecmp (type, "PCTPAREN"))
1284 format->type = CTEF_PCTPAREN;
1287 *is_ctables_format = false;
1288 return (parse_format_specifier (lexer, format)
1289 && fmt_check_output (format)
1290 && fmt_check_type_compat (format, VAL_NUMERIC));
1296 lex_next_error (lexer, -1, -1,
1297 _("Output format %s requires width 2 or greater."), type);
1300 else if (format->d > format->w - 1)
1302 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1303 "greater than decimals."), type);
1308 *is_ctables_format = true;
1313 static struct ctables_axis *
1314 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1316 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1317 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1320 enum ctables_summary_variant sv = CSV_CELL;
1323 int start_ofs = lex_ofs (ctx->lexer);
1325 /* Parse function. */
1326 enum ctables_summary_function function;
1327 enum ctables_weighting weighting;
1328 enum ctables_area_type area;
1329 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1333 /* Parse percentile. */
1334 double percentile = 0;
1335 if (function == CTSF_PTILE)
1337 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1339 percentile = lex_number (ctx->lexer);
1340 lex_get (ctx->lexer);
1345 if (lex_is_string (ctx->lexer))
1347 label = ss_xstrdup (lex_tokss (ctx->lexer));
1348 lex_get (ctx->lexer);
1352 struct fmt_spec format;
1353 const struct fmt_spec *formatp;
1354 bool is_ctables_format = false;
1355 if (lex_token (ctx->lexer) == T_ID
1356 && has_digit (lex_tokcstr (ctx->lexer)))
1358 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1359 &is_ctables_format))
1369 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1370 lex_ofs (ctx->lexer) - 1);
1371 add_summary_spec (sub, function, weighting, area, percentile, label,
1372 formatp, is_ctables_format, loc, sv);
1374 msg_location_destroy (loc);
1376 lex_match (ctx->lexer, T_COMMA);
1377 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1379 if (!lex_force_match (ctx->lexer, T_LBRACK))
1383 else if (lex_match (ctx->lexer, T_RBRACK))
1385 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1392 ctables_axis_destroy (sub);
1396 static const struct ctables_axis *
1397 find_scale (const struct ctables_axis *axis)
1401 else if (axis->op == CTAO_VAR)
1402 return axis->scale ? axis : NULL;
1405 for (size_t i = 0; i < 2; i++)
1407 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1415 static const struct ctables_axis *
1416 find_categorical_summary_spec (const struct ctables_axis *axis)
1420 else if (axis->op == CTAO_VAR)
1421 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1424 for (size_t i = 0; i < 2; i++)
1426 const struct ctables_axis *sum
1427 = find_categorical_summary_spec (axis->subs[i]);
1435 static struct ctables_axis *
1436 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1438 int start_ofs = lex_ofs (ctx->lexer);
1439 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1443 while (lex_match (ctx->lexer, T_GT))
1445 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1448 ctables_axis_destroy (lhs);
1452 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1453 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1455 const struct ctables_axis *outer_scale = find_scale (lhs);
1456 const struct ctables_axis *inner_scale = find_scale (rhs);
1457 if (outer_scale && inner_scale)
1459 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1460 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1461 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1462 ctables_axis_destroy (nest);
1466 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1469 msg_at (SE, nest->loc,
1470 _("Summaries may only be requested for categorical variables "
1471 "at the innermost nesting level."));
1472 msg_at (SN, outer_sum->loc,
1473 _("This outer categorical variable has a summary."));
1474 ctables_axis_destroy (nest);
1484 static struct ctables_axis *
1485 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1487 int start_ofs = lex_ofs (ctx->lexer);
1488 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1492 while (lex_match (ctx->lexer, T_PLUS))
1494 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1497 ctables_axis_destroy (lhs);
1501 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1502 ctx->lexer, start_ofs);
1509 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1510 struct ctables *ct, struct ctables_table *t,
1511 enum pivot_axis_type a)
1513 if (lex_token (lexer) == T_BY
1514 || lex_token (lexer) == T_SLASH
1515 || lex_token (lexer) == T_ENDCMD)
1518 struct ctables_axis_parse_ctx ctx = {
1524 t->axes[a] = ctables_axis_parse_stack (&ctx);
1525 return t->axes[a] != NULL;
1529 ctables_chisq_destroy (struct ctables_chisq *chisq)
1535 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1541 ctables_table_destroy (struct ctables_table *t)
1546 for (size_t i = 0; i < t->n_sections; i++)
1547 ctables_section_uninit (&t->sections[i]);
1550 for (size_t i = 0; i < t->n_categories; i++)
1551 ctables_categories_unref (t->categories[i]);
1552 free (t->categories);
1554 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1556 ctables_axis_destroy (t->axes[a]);
1557 ctables_stack_uninit (&t->stacks[a]);
1559 free (t->summary_specs.specs);
1561 struct ctables_value *ctv, *next_ctv;
1562 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1563 &t->clabels_values_map)
1565 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1566 hmap_delete (&t->clabels_values_map, &ctv->node);
1569 hmap_destroy (&t->clabels_values_map);
1570 free (t->clabels_values);
1576 ctables_chisq_destroy (t->chisq);
1577 ctables_pairwise_destroy (t->pairwise);
1582 ctables_destroy (struct ctables *ct)
1587 struct ctables_postcompute *pc, *next_pc;
1588 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1592 msg_location_destroy (pc->location);
1593 ctables_pcexpr_destroy (pc->expr);
1597 ctables_summary_spec_set_uninit (pc->specs);
1600 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1603 hmap_destroy (&ct->postcomputes);
1605 fmt_settings_uninit (&ct->ctables_formats);
1606 pivot_table_look_unref (ct->look);
1610 for (size_t i = 0; i < ct->n_tables; i++)
1611 ctables_table_destroy (ct->tables[i]);
1616 static struct ctables_category
1617 cct_nrange (double low, double high)
1619 return (struct ctables_category) {
1621 .nrange = { low, high }
1625 static struct ctables_category
1626 cct_srange (struct substring low, struct substring high)
1628 return (struct ctables_category) {
1630 .srange = { low, high }
1635 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1636 struct ctables_category *cat)
1639 if (lex_match (lexer, T_EQUALS))
1641 if (!lex_force_string (lexer))
1644 total_label = ss_xstrdup (lex_tokss (lexer));
1648 total_label = xstrdup (_("Subtotal"));
1650 *cat = (struct ctables_category) {
1651 .type = CCT_SUBTOTAL,
1652 .hide_subcategories = hide_subcategories,
1653 .total_label = total_label
1658 static struct substring
1659 parse_substring (struct lexer *lexer, struct dictionary *dict)
1661 struct substring s = recode_substring_pool (
1662 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1663 ss_rtrim (&s, ss_cstr (" "));
1669 ctables_table_parse_explicit_category (struct lexer *lexer,
1670 struct dictionary *dict,
1672 struct ctables_category *cat)
1674 if (lex_match_id (lexer, "OTHERNM"))
1675 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1676 else if (lex_match_id (lexer, "MISSING"))
1677 *cat = (struct ctables_category) { .type = CCT_MISSING };
1678 else if (lex_match_id (lexer, "SUBTOTAL"))
1679 return ctables_table_parse_subtotal (lexer, false, cat);
1680 else if (lex_match_id (lexer, "HSUBTOTAL"))
1681 return ctables_table_parse_subtotal (lexer, true, cat);
1682 else if (lex_match_id (lexer, "LO"))
1684 if (!lex_force_match_id (lexer, "THRU"))
1686 if (lex_is_string (lexer))
1688 struct substring sr0 = { .string = NULL };
1689 struct substring sr1 = parse_substring (lexer, dict);
1690 *cat = cct_srange (sr0, sr1);
1692 else if (lex_force_num (lexer))
1694 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1700 else if (lex_is_number (lexer))
1702 double number = lex_number (lexer);
1704 if (lex_match_id (lexer, "THRU"))
1706 if (lex_match_id (lexer, "HI"))
1707 *cat = cct_nrange (number, DBL_MAX);
1710 if (!lex_force_num (lexer))
1712 *cat = cct_nrange (number, lex_number (lexer));
1717 *cat = (struct ctables_category) {
1722 else if (lex_is_string (lexer))
1724 struct substring s = parse_substring (lexer, dict);
1725 if (lex_match_id (lexer, "THRU"))
1727 if (lex_match_id (lexer, "HI"))
1729 struct substring sr1 = { .string = NULL };
1730 *cat = cct_srange (s, sr1);
1734 if (!lex_force_string (lexer))
1739 struct substring sr1 = parse_substring (lexer, dict);
1740 *cat = cct_srange (s, sr1);
1744 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1746 else if (lex_match (lexer, T_AND))
1748 if (!lex_force_id (lexer))
1750 struct ctables_postcompute *pc = ctables_find_postcompute (
1751 ct, lex_tokcstr (lexer));
1754 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1755 msg_at (SE, loc, _("Unknown postcompute &%s."),
1756 lex_tokcstr (lexer));
1757 msg_location_destroy (loc);
1762 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1766 lex_error (lexer, NULL);
1774 parse_category_string (struct msg_location *location,
1775 struct substring s, const struct dictionary *dict,
1776 enum fmt_type format, double *n)
1779 char *error = data_in (s, dict_get_encoding (dict), format,
1780 settings_get_fmt_settings (), &v, 0, NULL);
1783 msg_at (SE, location,
1784 _("Failed to parse category specification as format %s: %s."),
1785 fmt_name (format), error);
1794 static struct ctables_category *
1795 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1796 const struct ctables_pcexpr *e)
1798 struct ctables_category *best = NULL;
1799 size_t n_subtotals = 0;
1800 for (size_t i = 0; i < cats->n_cats; i++)
1802 struct ctables_category *cat = &cats->cats[i];
1805 case CTPO_CAT_NUMBER:
1806 if (cat->type == CCT_NUMBER && cat->number == e->number)
1810 case CTPO_CAT_STRING:
1811 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1815 case CTPO_CAT_NRANGE:
1816 if (cat->type == CCT_NRANGE
1817 && cat->nrange[0] == e->nrange[0]
1818 && cat->nrange[1] == e->nrange[1])
1822 case CTPO_CAT_SRANGE:
1823 if (cat->type == CCT_SRANGE
1824 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1825 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1829 case CTPO_CAT_MISSING:
1830 if (cat->type == CCT_MISSING)
1834 case CTPO_CAT_OTHERNM:
1835 if (cat->type == CCT_OTHERNM)
1839 case CTPO_CAT_SUBTOTAL:
1840 if (cat->type == CCT_SUBTOTAL)
1843 if (e->subtotal_index == n_subtotals)
1845 else if (e->subtotal_index == 0)
1850 case CTPO_CAT_TOTAL:
1851 if (cat->type == CCT_TOTAL)
1865 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1870 static struct ctables_category *
1871 ctables_find_category_for_postcompute (const struct dictionary *dict,
1872 const struct ctables_categories *cats,
1873 enum fmt_type parse_format,
1874 const struct ctables_pcexpr *e)
1876 if (parse_format != FMT_F)
1878 if (e->op == CTPO_CAT_STRING)
1881 if (!parse_category_string (e->location, e->string, dict,
1882 parse_format, &number))
1885 struct ctables_pcexpr e2 = {
1886 .op = CTPO_CAT_NUMBER,
1888 .location = e->location,
1890 return ctables_find_category_for_postcompute__ (cats, &e2);
1892 else if (e->op == CTPO_CAT_SRANGE)
1895 if (!e->srange[0].string)
1896 nrange[0] = -DBL_MAX;
1897 else if (!parse_category_string (e->location, e->srange[0], dict,
1898 parse_format, &nrange[0]))
1901 if (!e->srange[1].string)
1902 nrange[1] = DBL_MAX;
1903 else if (!parse_category_string (e->location, e->srange[1], dict,
1904 parse_format, &nrange[1]))
1907 struct ctables_pcexpr e2 = {
1908 .op = CTPO_CAT_NRANGE,
1909 .nrange = { nrange[0], nrange[1] },
1910 .location = e->location,
1912 return ctables_find_category_for_postcompute__ (cats, &e2);
1915 return ctables_find_category_for_postcompute__ (cats, e);
1919 ctables_recursive_check_postcompute (struct dictionary *dict,
1920 const struct ctables_pcexpr *e,
1921 struct ctables_category *pc_cat,
1922 const struct ctables_categories *cats,
1923 const struct msg_location *cats_location)
1927 case CTPO_CAT_NUMBER:
1928 case CTPO_CAT_STRING:
1929 case CTPO_CAT_NRANGE:
1930 case CTPO_CAT_SRANGE:
1931 case CTPO_CAT_MISSING:
1932 case CTPO_CAT_OTHERNM:
1933 case CTPO_CAT_SUBTOTAL:
1934 case CTPO_CAT_TOTAL:
1936 struct ctables_category *cat = ctables_find_category_for_postcompute (
1937 dict, cats, pc_cat->parse_format, e);
1940 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1942 size_t n_subtotals = 0;
1943 for (size_t i = 0; i < cats->n_cats; i++)
1944 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1945 if (n_subtotals > 1)
1947 msg_at (SE, cats_location,
1948 ngettext ("These categories include %zu instance "
1949 "of SUBTOTAL or HSUBTOTAL, so references "
1950 "from computed categories must refer to "
1951 "subtotals by position, "
1952 "e.g. SUBTOTAL[1].",
1953 "These categories include %zu instances "
1954 "of SUBTOTAL or HSUBTOTAL, so references "
1955 "from computed categories must refer to "
1956 "subtotals by position, "
1957 "e.g. SUBTOTAL[1].",
1960 msg_at (SN, e->location,
1961 _("This is the reference that lacks a position."));
1966 msg_at (SE, pc_cat->location,
1967 _("Computed category &%s references a category not included "
1968 "in the category list."),
1970 msg_at (SN, e->location, _("This is the missing category."));
1971 if (e->op == CTPO_CAT_SUBTOTAL)
1972 msg_at (SN, cats_location,
1973 _("To fix the problem, add subtotals to the "
1974 "list of categories here."));
1975 else if (e->op == CTPO_CAT_TOTAL)
1976 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1977 "CATEGORIES specification."));
1979 msg_at (SN, cats_location,
1980 _("To fix the problem, add the missing category to the "
1981 "list of categories here."));
1984 if (pc_cat->pc->hide_source_cats)
1998 for (size_t i = 0; i < 2; i++)
1999 if (e->subs[i] && !ctables_recursive_check_postcompute (
2000 dict, e->subs[i], pc_cat, cats, cats_location))
2009 all_strings (struct variable **vars, size_t n_vars,
2010 const struct ctables_category *cat)
2012 for (size_t j = 0; j < n_vars; j++)
2013 if (var_is_numeric (vars[j]))
2015 msg_at (SE, cat->location,
2016 _("This category specification may be applied only to string "
2017 "variables, but this subcommand tries to apply it to "
2018 "numeric variable %s."),
2019 var_get_name (vars[j]));
2026 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2027 struct ctables *ct, struct ctables_table *t)
2029 if (!lex_match_id (lexer, "VARIABLES"))
2031 lex_match (lexer, T_EQUALS);
2033 struct variable **vars;
2035 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2038 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2039 for (size_t i = 1; i < n_vars; i++)
2041 const struct fmt_spec *f = var_get_print_format (vars[i]);
2042 if (f->type != common_format->type)
2044 common_format = NULL;
2050 && (fmt_get_category (common_format->type)
2051 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2053 struct ctables_categories *c = xmalloc (sizeof *c);
2054 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2055 for (size_t i = 0; i < n_vars; i++)
2057 struct ctables_categories **cp
2058 = &t->categories[var_get_dict_index (vars[i])];
2059 ctables_categories_unref (*cp);
2063 size_t allocated_cats = 0;
2064 int cats_start_ofs = -1;
2065 int cats_end_ofs = -1;
2066 if (lex_match (lexer, T_LBRACK))
2068 cats_start_ofs = lex_ofs (lexer);
2071 if (c->n_cats >= allocated_cats)
2072 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2074 int start_ofs = lex_ofs (lexer);
2075 struct ctables_category *cat = &c->cats[c->n_cats];
2076 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2078 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2081 lex_match (lexer, T_COMMA);
2083 while (!lex_match (lexer, T_RBRACK));
2084 cats_end_ofs = lex_ofs (lexer) - 1;
2087 struct ctables_category cat = {
2089 .include_missing = false,
2090 .sort_ascending = true,
2092 bool show_totals = false;
2093 char *total_label = NULL;
2094 bool totals_before = false;
2095 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2097 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2099 lex_match (lexer, T_EQUALS);
2100 if (lex_match_id (lexer, "A"))
2101 cat.sort_ascending = true;
2102 else if (lex_match_id (lexer, "D"))
2103 cat.sort_ascending = false;
2106 lex_error_expecting (lexer, "A", "D");
2110 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2112 lex_match (lexer, T_EQUALS);
2113 if (lex_match_id (lexer, "VALUE"))
2114 cat.type = CCT_VALUE;
2115 else if (lex_match_id (lexer, "LABEL"))
2116 cat.type = CCT_LABEL;
2119 cat.type = CCT_FUNCTION;
2120 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2121 &cat.weighting, &cat.area))
2124 if (lex_match (lexer, T_LPAREN))
2126 cat.sort_var = parse_variable (lexer, dict);
2130 if (cat.sort_function == CTSF_PTILE)
2132 lex_match (lexer, T_COMMA);
2133 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2135 cat.percentile = lex_number (lexer);
2139 if (!lex_force_match (lexer, T_RPAREN))
2142 else if (ctables_function_availability (cat.sort_function)
2145 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2150 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2152 lex_match (lexer, T_EQUALS);
2153 if (lex_match_id (lexer, "INCLUDE"))
2154 cat.include_missing = true;
2155 else if (lex_match_id (lexer, "EXCLUDE"))
2156 cat.include_missing = false;
2159 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2163 else if (lex_match_id (lexer, "TOTAL"))
2165 lex_match (lexer, T_EQUALS);
2166 if (!parse_bool (lexer, &show_totals))
2169 else if (lex_match_id (lexer, "LABEL"))
2171 lex_match (lexer, T_EQUALS);
2172 if (!lex_force_string (lexer))
2175 total_label = ss_xstrdup (lex_tokss (lexer));
2178 else if (lex_match_id (lexer, "POSITION"))
2180 lex_match (lexer, T_EQUALS);
2181 if (lex_match_id (lexer, "BEFORE"))
2182 totals_before = true;
2183 else if (lex_match_id (lexer, "AFTER"))
2184 totals_before = false;
2187 lex_error_expecting (lexer, "BEFORE", "AFTER");
2191 else if (lex_match_id (lexer, "EMPTY"))
2193 lex_match (lexer, T_EQUALS);
2194 if (lex_match_id (lexer, "INCLUDE"))
2195 c->show_empty = true;
2196 else if (lex_match_id (lexer, "EXCLUDE"))
2197 c->show_empty = false;
2200 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2207 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2208 "TOTAL", "LABEL", "POSITION", "EMPTY");
2210 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2217 if (c->n_cats >= allocated_cats)
2218 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2219 c->cats[c->n_cats++] = cat;
2224 if (c->n_cats >= allocated_cats)
2225 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2227 struct ctables_category *totals;
2230 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2231 totals = &c->cats[0];
2234 totals = &c->cats[c->n_cats];
2237 *totals = (struct ctables_category) {
2239 .total_label = total_label ? total_label : xstrdup (_("Total")),
2243 struct ctables_category *subtotal = NULL;
2244 for (size_t i = totals_before ? 0 : c->n_cats;
2245 totals_before ? i < c->n_cats : i-- > 0;
2246 totals_before ? i++ : 0)
2248 struct ctables_category *cat = &c->cats[i];
2257 cat->subtotal = subtotal;
2260 case CCT_POSTCOMPUTE:
2271 case CCT_EXCLUDED_MISSING:
2276 if (cats_start_ofs != -1)
2278 for (size_t i = 0; i < c->n_cats; i++)
2280 struct ctables_category *cat = &c->cats[i];
2283 case CCT_POSTCOMPUTE:
2284 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2285 struct msg_location *cats_location
2286 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2287 bool ok = ctables_recursive_check_postcompute (
2288 dict, cat->pc->expr, cat, c, cats_location);
2289 msg_location_destroy (cats_location);
2296 for (size_t j = 0; j < n_vars; j++)
2297 if (var_is_alpha (vars[j]))
2299 msg_at (SE, cat->location,
2300 _("This category specification may be applied "
2301 "only to numeric variables, but this "
2302 "subcommand tries to apply it to string "
2304 var_get_name (vars[j]));
2313 if (!parse_category_string (cat->location, cat->string, dict,
2314 common_format->type, &n))
2317 ss_dealloc (&cat->string);
2319 cat->type = CCT_NUMBER;
2322 else if (!all_strings (vars, n_vars, cat))
2331 if (!cat->srange[0].string)
2333 else if (!parse_category_string (cat->location,
2334 cat->srange[0], dict,
2335 common_format->type, &n[0]))
2338 if (!cat->srange[1].string)
2340 else if (!parse_category_string (cat->location,
2341 cat->srange[1], dict,
2342 common_format->type, &n[1]))
2345 ss_dealloc (&cat->srange[0]);
2346 ss_dealloc (&cat->srange[1]);
2348 cat->type = CCT_NRANGE;
2349 cat->nrange[0] = n[0];
2350 cat->nrange[1] = n[1];
2352 else if (!all_strings (vars, n_vars, cat))
2363 case CCT_EXCLUDED_MISSING:
2378 ctables_nest_uninit (struct ctables_nest *nest)
2381 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2382 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2383 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2384 free (nest->areas[at]);
2388 ctables_stack_uninit (struct ctables_stack *stack)
2392 for (size_t i = 0; i < stack->n; i++)
2393 ctables_nest_uninit (&stack->nests[i]);
2394 free (stack->nests);
2398 static struct ctables_stack
2399 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2406 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2407 for (size_t i = 0; i < s0.n; i++)
2408 for (size_t j = 0; j < s1.n; j++)
2410 const struct ctables_nest *a = &s0.nests[i];
2411 const struct ctables_nest *b = &s1.nests[j];
2413 size_t allocate = a->n + b->n;
2414 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2416 for (size_t k = 0; k < a->n; k++)
2417 vars[n++] = a->vars[k];
2418 for (size_t k = 0; k < b->n; k++)
2419 vars[n++] = b->vars[k];
2420 assert (n == allocate);
2422 const struct ctables_nest *summary_src;
2423 if (!a->specs[CSV_CELL].var)
2425 else if (!b->specs[CSV_CELL].var)
2430 struct ctables_nest *new = &stack.nests[stack.n++];
2431 *new = (struct ctables_nest) {
2433 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2434 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2436 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2437 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2441 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2442 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2444 ctables_stack_uninit (&s0);
2445 ctables_stack_uninit (&s1);
2449 static struct ctables_stack
2450 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2452 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2453 for (size_t i = 0; i < s0.n; i++)
2454 stack.nests[stack.n++] = s0.nests[i];
2455 for (size_t i = 0; i < s1.n; i++)
2457 stack.nests[stack.n] = s1.nests[i];
2458 stack.nests[stack.n].group_head += s0.n;
2461 assert (stack.n == s0.n + s1.n);
2467 static struct ctables_stack
2468 var_fts (const struct ctables_axis *a)
2470 struct variable **vars = xmalloc (sizeof *vars);
2473 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2474 struct ctables_nest *nest = xmalloc (sizeof *nest);
2475 *nest = (struct ctables_nest) {
2478 .scale_idx = a->scale ? 0 : SIZE_MAX,
2479 .summary_idx = is_summary ? 0 : SIZE_MAX,
2482 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2484 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2485 nest->specs[sv].var = a->var;
2486 nest->specs[sv].is_scale = a->scale;
2488 return (struct ctables_stack) { .nests = nest, .n = 1 };
2491 static struct ctables_stack
2492 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2495 return (struct ctables_stack) { .n = 0 };
2503 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2504 enumerate_fts (axis_type, a->subs[1]));
2507 /* This should consider any of the scale variables found in the result to
2508 be linked to each other listwise for SMISSING=LISTWISE. */
2509 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2510 enumerate_fts (axis_type, a->subs[1]));
2516 union ctables_summary
2518 /* COUNT, VALIDN, TOTALN. */
2521 /* MINIMUM, MAXIMUM, RANGE. */
2528 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2529 struct moments1 *moments;
2531 /* MEDIAN, MODE, PTILE. */
2534 struct casewriter *writer;
2541 ctables_summary_init (union ctables_summary *s,
2542 const struct ctables_summary_spec *ss)
2544 switch (ss->function)
2547 case CTSF_areaPCT_COUNT:
2548 case CTSF_areaPCT_VALIDN:
2549 case CTSF_areaPCT_TOTALN:
2562 s->min = s->max = SYSMIS;
2567 case CTSF_areaPCT_SUM:
2568 s->moments = moments1_create (MOMENT_MEAN);
2574 s->moments = moments1_create (MOMENT_VARIANCE);
2581 struct caseproto *proto = caseproto_create ();
2582 proto = caseproto_add_width (proto, 0);
2583 proto = caseproto_add_width (proto, 0);
2585 struct subcase ordering;
2586 subcase_init (&ordering, 0, 0, SC_ASCEND);
2587 s->writer = sort_create_writer (&ordering, proto);
2588 subcase_uninit (&ordering);
2589 caseproto_unref (proto);
2599 ctables_summary_uninit (union ctables_summary *s,
2600 const struct ctables_summary_spec *ss)
2602 switch (ss->function)
2605 case CTSF_areaPCT_COUNT:
2606 case CTSF_areaPCT_VALIDN:
2607 case CTSF_areaPCT_TOTALN:
2626 case CTSF_areaPCT_SUM:
2627 moments1_destroy (s->moments);
2633 casewriter_destroy (s->writer);
2639 ctables_summary_add (union ctables_summary *s,
2640 const struct ctables_summary_spec *ss,
2641 const union value *value,
2642 bool is_scale, bool is_scale_missing,
2643 bool is_missing, bool is_included,
2646 /* To determine whether a case is included in a given table for a particular
2647 kind of summary, consider the following charts for each variable in the
2648 table. Only if "yes" appears for every variable for the summary is the
2651 Categorical variables: VALIDN COUNT TOTALN
2652 Valid values in included categories yes yes yes
2653 Missing values in included categories --- yes yes
2654 Missing values in excluded categories --- --- yes
2655 Valid values in excluded categories --- --- ---
2657 Scale variables: VALIDN COUNT TOTALN
2658 Valid value yes yes yes
2659 Missing value --- yes yes
2661 Missing values include both user- and system-missing. (The system-missing
2662 value is always in an excluded category.)
2664 switch (ss->function)
2670 case CTSF_areaPCT_TOTALN:
2675 if (is_scale || is_included)
2679 case CTSF_areaPCT_COUNT:
2680 if (is_scale || is_included)
2691 case CTSF_areaPCT_VALIDN:
2711 if (!is_scale_missing)
2713 if (s->min == SYSMIS || value->f < s->min)
2715 if (s->max == SYSMIS || value->f > s->max)
2725 if (!is_scale_missing)
2726 moments1_add (s->moments, value->f, weight);
2729 case CTSF_areaPCT_SUM:
2730 if (!is_missing && !is_scale_missing)
2731 moments1_add (s->moments, value->f, weight);
2737 if (!is_scale_missing)
2739 s->ovalid += weight;
2741 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2742 *case_num_rw_idx (c, 0) = value->f;
2743 *case_num_rw_idx (c, 1) = weight;
2744 casewriter_write (s->writer, c);
2751 ctables_summary_value (const struct ctables_cell *cell,
2752 union ctables_summary *s,
2753 const struct ctables_summary_spec *ss)
2755 switch (ss->function)
2761 return cell->areas[ss->calc_area]->sequence;
2763 case CTSF_areaPCT_COUNT:
2765 const struct ctables_area *a = cell->areas[ss->calc_area];
2766 double a_count = a->count[ss->weighting];
2767 return a_count ? s->count / a_count * 100 : SYSMIS;
2770 case CTSF_areaPCT_VALIDN:
2772 const struct ctables_area *a = cell->areas[ss->calc_area];
2773 double a_valid = a->valid[ss->weighting];
2774 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2777 case CTSF_areaPCT_TOTALN:
2779 const struct ctables_area *a = cell->areas[ss->calc_area];
2780 double a_total = a->total[ss->weighting];
2781 return a_total ? s->count / a_total * 100 : SYSMIS;
2796 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2801 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2807 double weight, variance;
2808 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2809 return calc_semean (variance, weight);
2815 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2816 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2821 double weight, mean;
2822 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2823 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2829 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2833 case CTSF_areaPCT_SUM:
2835 double weight, mean;
2836 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2837 if (weight == SYSMIS || mean == SYSMIS)
2840 const struct ctables_area *a = cell->areas[ss->calc_area];
2841 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2842 double denom = sum->sum[ss->weighting];
2843 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2850 struct casereader *reader = casewriter_make_reader (s->writer);
2853 struct percentile *ptile = percentile_create (
2854 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2855 struct order_stats *os = &ptile->parent;
2856 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2857 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2858 statistic_destroy (&ptile->parent.parent);
2865 struct casereader *reader = casewriter_make_reader (s->writer);
2868 struct mode *mode = mode_create ();
2869 struct order_stats *os = &mode->parent;
2870 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2871 s->ovalue = mode->mode;
2872 statistic_destroy (&mode->parent.parent);
2880 struct ctables_cell_sort_aux
2882 const struct ctables_nest *nest;
2883 enum pivot_axis_type a;
2887 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2889 const struct ctables_cell_sort_aux *aux = aux_;
2890 struct ctables_cell *const *ap = a_;
2891 struct ctables_cell *const *bp = b_;
2892 const struct ctables_cell *a = *ap;
2893 const struct ctables_cell *b = *bp;
2895 const struct ctables_nest *nest = aux->nest;
2896 for (size_t i = 0; i < nest->n; i++)
2897 if (i != nest->scale_idx)
2899 const struct variable *var = nest->vars[i];
2900 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2901 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2902 if (a_cv->category != b_cv->category)
2903 return a_cv->category > b_cv->category ? 1 : -1;
2905 const union value *a_val = &a_cv->value;
2906 const union value *b_val = &b_cv->value;
2907 switch (a_cv->category->type)
2913 case CCT_POSTCOMPUTE:
2914 case CCT_EXCLUDED_MISSING:
2915 /* Must be equal. */
2923 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2931 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2933 return a_cv->category->sort_ascending ? cmp : -cmp;
2939 const char *a_label = var_lookup_value_label (var, a_val);
2940 const char *b_label = var_lookup_value_label (var, b_val);
2946 cmp = strcmp (a_label, b_label);
2952 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2955 return a_cv->category->sort_ascending ? cmp : -cmp;
2967 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2968 const void *aux UNUSED)
2970 struct ctables_cell *const *ap = a_;
2971 struct ctables_cell *const *bp = b_;
2972 const struct ctables_cell *a = *ap;
2973 const struct ctables_cell *b = *bp;
2975 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2977 int al = a->axes[axis].leaf;
2978 int bl = b->axes[axis].leaf;
2980 return al > bl ? 1 : -1;
2985 static struct ctables_area *
2986 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
2987 enum ctables_area_type area)
2990 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2992 const struct ctables_nest *nest = s->nests[a];
2993 for (size_t i = 0; i < nest->n_areas[area]; i++)
2995 size_t v_idx = nest->areas[area][i];
2996 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
2997 hash = hash_pointer (cv->category, hash);
2998 if (cv->category->type != CCT_TOTAL
2999 && cv->category->type != CCT_SUBTOTAL
3000 && cv->category->type != CCT_POSTCOMPUTE)
3001 hash = value_hash (&cv->value,
3002 var_get_width (nest->vars[v_idx]), hash);
3006 struct ctables_area *a;
3007 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3009 const struct ctables_cell *df = a->example;
3010 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3012 const struct ctables_nest *nest = s->nests[a];
3013 for (size_t i = 0; i < nest->n_areas[area]; i++)
3015 size_t v_idx = nest->areas[area][i];
3016 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3017 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3018 if (cv1->category != cv2->category
3019 || (cv1->category->type != CCT_TOTAL
3020 && cv1->category->type != CCT_SUBTOTAL
3021 && cv1->category->type != CCT_POSTCOMPUTE
3022 && !value_equal (&cv1->value, &cv2->value,
3023 var_get_width (nest->vars[v_idx]))))
3032 struct ctables_sum *sums = (s->table->n_sum_vars
3033 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3036 a = xmalloc (sizeof *a);
3037 *a = (struct ctables_area) { .example = cell, .sums = sums };
3038 hmap_insert (&s->areas[area], &a->node, hash);
3042 static struct substring
3043 rtrim_value (const union value *v, const struct variable *var)
3045 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3046 var_get_width (var));
3047 ss_rtrim (&s, ss_cstr (" "));
3052 in_string_range (const union value *v, const struct variable *var,
3053 const struct substring *srange)
3055 struct substring s = rtrim_value (v, var);
3056 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3057 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3060 static const struct ctables_category *
3061 ctables_categories_match (const struct ctables_categories *c,
3062 const union value *v, const struct variable *var)
3064 if (var_is_numeric (var) && v->f == SYSMIS)
3067 const struct ctables_category *othernm = NULL;
3068 for (size_t i = c->n_cats; i-- > 0; )
3070 const struct ctables_category *cat = &c->cats[i];
3074 if (cat->number == v->f)
3079 if (ss_equals (cat->string, rtrim_value (v, var)))
3084 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3085 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3090 if (in_string_range (v, var, cat->srange))
3095 if (var_is_value_missing (var, v))
3099 case CCT_POSTCOMPUTE:
3114 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3117 case CCT_EXCLUDED_MISSING:
3122 return var_is_value_missing (var, v) ? NULL : othernm;
3125 static const struct ctables_category *
3126 ctables_categories_total (const struct ctables_categories *c)
3128 const struct ctables_category *first = &c->cats[0];
3129 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3130 return (first->type == CCT_TOTAL ? first
3131 : last->type == CCT_TOTAL ? last
3135 static struct ctables_cell *
3136 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3137 const struct ctables_category **cats[PIVOT_N_AXES])
3140 enum ctables_summary_variant sv = CSV_CELL;
3141 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3143 const struct ctables_nest *nest = s->nests[a];
3144 for (size_t i = 0; i < nest->n; i++)
3145 if (i != nest->scale_idx)
3147 hash = hash_pointer (cats[a][i], hash);
3148 if (cats[a][i]->type != CCT_TOTAL
3149 && cats[a][i]->type != CCT_SUBTOTAL
3150 && cats[a][i]->type != CCT_POSTCOMPUTE)
3151 hash = value_hash (case_data (c, nest->vars[i]),
3152 var_get_width (nest->vars[i]), hash);
3158 struct ctables_cell *cell;
3159 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3161 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3163 const struct ctables_nest *nest = s->nests[a];
3164 for (size_t i = 0; i < nest->n; i++)
3165 if (i != nest->scale_idx
3166 && (cats[a][i] != cell->axes[a].cvs[i].category
3167 || (cats[a][i]->type != CCT_TOTAL
3168 && cats[a][i]->type != CCT_SUBTOTAL
3169 && cats[a][i]->type != CCT_POSTCOMPUTE
3170 && !value_equal (case_data (c, nest->vars[i]),
3171 &cell->axes[a].cvs[i].value,
3172 var_get_width (nest->vars[i])))))
3181 cell = xmalloc (sizeof *cell);
3184 cell->omit_areas = 0;
3185 cell->postcompute = false;
3186 //struct string name = DS_EMPTY_INITIALIZER;
3187 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3189 const struct ctables_nest *nest = s->nests[a];
3190 cell->axes[a].cvs = (nest->n
3191 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3193 for (size_t i = 0; i < nest->n; i++)
3195 const struct ctables_category *cat = cats[a][i];
3196 const struct variable *var = nest->vars[i];
3197 const union value *value = case_data (c, var);
3198 if (i != nest->scale_idx)
3200 const struct ctables_category *subtotal = cat->subtotal;
3201 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3204 if (cat->type == CCT_TOTAL
3205 || cat->type == CCT_SUBTOTAL
3206 || cat->type == CCT_POSTCOMPUTE)
3210 case PIVOT_AXIS_COLUMN:
3211 cell->omit_areas |= ((1u << CTAT_TABLE) |
3212 (1u << CTAT_LAYER) |
3213 (1u << CTAT_LAYERCOL) |
3214 (1u << CTAT_SUBTABLE) |
3217 case PIVOT_AXIS_ROW:
3218 cell->omit_areas |= ((1u << CTAT_TABLE) |
3219 (1u << CTAT_LAYER) |
3220 (1u << CTAT_LAYERROW) |
3221 (1u << CTAT_SUBTABLE) |
3224 case PIVOT_AXIS_LAYER:
3225 cell->omit_areas |= ((1u << CTAT_TABLE) |
3226 (1u << CTAT_LAYER));
3230 if (cat->type == CCT_POSTCOMPUTE)
3231 cell->postcompute = true;
3234 cell->axes[a].cvs[i].category = cat;
3235 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3238 if (i != nest->scale_idx)
3240 if (!ds_is_empty (&name))
3241 ds_put_cstr (&name, ", ");
3242 char *value_s = data_out (value, var_get_encoding (var),
3243 var_get_print_format (var),
3244 settings_get_fmt_settings ());
3245 if (cat->type == CCT_TOTAL
3246 || cat->type == CCT_SUBTOTAL
3247 || cat->type == CCT_POSTCOMPUTE)
3248 ds_put_format (&name, "%s=total", var_get_name (var));
3250 ds_put_format (&name, "%s=%s", var_get_name (var),
3251 value_s + strspn (value_s, " "));
3257 //cell->name = ds_steal_cstr (&name);
3259 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3260 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3261 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3262 for (size_t i = 0; i < specs->n; i++)
3263 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3264 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3265 cell->areas[at] = ctables_area_insert (s, cell, at);
3266 hmap_insert (&s->cells, &cell->node, hash);
3271 is_listwise_missing (const struct ctables_summary_spec_set *specs,
3272 const struct ccase *c)
3274 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3276 const struct variable *var = specs->listwise_vars[i];
3277 if (var_is_num_missing (var, case_num (c, var)))
3285 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3287 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3292 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3293 const struct ctables_category **cats[PIVOT_N_AXES],
3294 bool is_included, double weight[N_CTWS])
3296 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3297 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3299 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3300 const union value *value = case_data (c, specs->var);
3301 bool is_missing = var_is_value_missing (specs->var, value);
3302 bool scale_missing = specs->is_scale && (is_missing || is_listwise_missing (specs, c));
3304 for (size_t i = 0; i < specs->n; i++)
3305 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3306 specs->is_scale, scale_missing, is_missing,
3307 is_included, weight[specs->specs[i].weighting]);
3308 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3309 if (!(cell->omit_areas && (1u << at)))
3311 struct ctables_area *a = cell->areas[at];
3313 add_weight (a->total, weight);
3315 add_weight (a->count, weight);
3318 add_weight (a->valid, weight);
3321 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3323 const struct variable *var = s->table->sum_vars[i];
3324 double addend = case_num (c, var);
3325 if (!var_is_num_missing (var, addend))
3326 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3327 a->sums[i].sum[wt] += addend * weight[wt];
3334 recurse_totals (struct ctables_section *s, const struct ccase *c,
3335 const struct ctables_category **cats[PIVOT_N_AXES],
3336 bool is_included, double weight[N_CTWS],
3337 enum pivot_axis_type start_axis, size_t start_nest)
3339 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3341 const struct ctables_nest *nest = s->nests[a];
3342 for (size_t i = start_nest; i < nest->n; i++)
3344 if (i == nest->scale_idx)
3347 const struct variable *var = nest->vars[i];
3349 const struct ctables_category *total = ctables_categories_total (
3350 s->table->categories[var_get_dict_index (var)]);
3353 const struct ctables_category *save = cats[a][i];
3355 ctables_cell_add__ (s, c, cats, is_included, weight);
3356 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3365 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3366 const struct ctables_category **cats[PIVOT_N_AXES],
3367 bool is_included, double weight[N_CTWS],
3368 enum pivot_axis_type start_axis, size_t start_nest)
3370 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3372 const struct ctables_nest *nest = s->nests[a];
3373 for (size_t i = start_nest; i < nest->n; i++)
3375 if (i == nest->scale_idx)
3378 const struct ctables_category *save = cats[a][i];
3381 cats[a][i] = save->subtotal;
3382 ctables_cell_add__ (s, c, cats, is_included, weight);
3383 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3392 ctables_add_occurrence (const struct variable *var,
3393 const union value *value,
3394 struct hmap *occurrences)
3396 int width = var_get_width (var);
3397 unsigned int hash = value_hash (value, width, 0);
3399 struct ctables_occurrence *o;
3400 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3402 if (value_equal (value, &o->value, width))
3405 o = xmalloc (sizeof *o);
3406 value_clone (&o->value, value, width);
3407 hmap_insert (occurrences, &o->node, hash);
3411 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3412 double weight[N_CTWS])
3414 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3415 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3416 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3417 const struct ctables_category **cats[PIVOT_N_AXES] =
3419 [PIVOT_AXIS_LAYER] = layer_cats,
3420 [PIVOT_AXIS_ROW] = row_cats,
3421 [PIVOT_AXIS_COLUMN] = column_cats,
3424 bool is_included = true;
3426 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3428 const struct ctables_nest *nest = s->nests[a];
3429 for (size_t i = 0; i < nest->n; i++)
3430 if (i != nest->scale_idx)
3432 const struct variable *var = nest->vars[i];
3433 const union value *value = case_data (c, var);
3435 cats[a][i] = ctables_categories_match (
3436 s->table->categories[var_get_dict_index (var)], value, var);
3439 if (i != nest->summary_idx)
3442 if (!var_is_value_missing (var, value))
3445 static const struct ctables_category cct_excluded_missing = {
3446 .type = CCT_EXCLUDED_MISSING,
3449 cats[a][i] = &cct_excluded_missing;
3450 is_included = false;
3456 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3458 const struct ctables_nest *nest = s->nests[a];
3459 for (size_t i = 0; i < nest->n; i++)
3460 if (i != nest->scale_idx)
3462 const struct variable *var = nest->vars[i];
3463 const union value *value = case_data (c, var);
3464 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3468 ctables_cell_add__ (s, c, cats, is_included, weight);
3469 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3470 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3475 const struct ctables_summary_spec_set *set;
3480 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3482 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3483 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3484 if (as->function != bs->function)
3485 return as->function > bs->function ? 1 : -1;
3486 else if (as->weighting != bs->weighting)
3487 return as->weighting > bs->weighting ? 1 : -1;
3488 else if (as->calc_area != bs->calc_area)
3489 return as->calc_area > bs->calc_area ? 1 : -1;
3490 else if (as->percentile != bs->percentile)
3491 return as->percentile < bs->percentile ? 1 : -1;
3493 const char *as_label = as->label ? as->label : "";
3494 const char *bs_label = bs->label ? bs->label : "";
3495 return strcmp (as_label, bs_label);
3499 ctables_category_format_number (double number, const struct variable *var,
3502 struct pivot_value *pv = pivot_value_new_var_value (
3503 var, &(union value) { .f = number });
3504 pivot_value_format (pv, NULL, s);
3505 pivot_value_destroy (pv);
3509 ctables_category_format_string (struct substring string,
3510 const struct variable *var, struct string *out)
3512 int width = var_get_width (var);
3513 char *s = xmalloc (width);
3514 buf_copy_rpad (s, width, string.string, string.length, ' ');
3515 struct pivot_value *pv = pivot_value_new_var_value (
3516 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3517 pivot_value_format (pv, NULL, out);
3518 pivot_value_destroy (pv);
3523 ctables_category_format_label (const struct ctables_category *cat,
3524 const struct variable *var,
3530 ctables_category_format_number (cat->number, var, s);
3534 ctables_category_format_string (cat->string, var, s);
3538 ctables_category_format_number (cat->nrange[0], var, s);
3539 ds_put_format (s, " THRU ");
3540 ctables_category_format_number (cat->nrange[1], var, s);
3544 ctables_category_format_string (cat->srange[0], var, s);
3545 ds_put_format (s, " THRU ");
3546 ctables_category_format_string (cat->srange[1], var, s);
3550 ds_put_cstr (s, "MISSING");
3554 ds_put_cstr (s, "OTHERNM");
3557 case CCT_POSTCOMPUTE:
3558 ds_put_format (s, "&%s", cat->pc->name);
3563 ds_put_cstr (s, cat->total_label);
3569 case CCT_EXCLUDED_MISSING:
3576 static struct pivot_value *
3577 ctables_postcompute_label (const struct ctables_categories *cats,
3578 const struct ctables_category *cat,
3579 const struct variable *var)
3581 struct substring in = ss_cstr (cat->pc->label);
3582 struct substring target = ss_cstr (")LABEL[");
3584 struct string out = DS_EMPTY_INITIALIZER;
3587 size_t chunk = ss_find_substring (in, target);
3588 if (chunk == SIZE_MAX)
3590 if (ds_is_empty (&out))
3591 return pivot_value_new_user_text (in.string, in.length);
3594 ds_put_substring (&out, in);
3595 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3599 ds_put_substring (&out, ss_head (in, chunk));
3600 ss_advance (&in, chunk + target.length);
3602 struct substring idx_s;
3603 if (!ss_get_until (&in, ']', &idx_s))
3606 long int idx = strtol (idx_s.string, &tail, 10);
3607 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3610 struct ctables_category *cat2 = &cats->cats[idx - 1];
3611 if (!ctables_category_format_label (cat2, var, &out))
3617 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3620 static struct pivot_value *
3621 ctables_category_create_value_label (const struct ctables_categories *cats,
3622 const struct ctables_category *cat,
3623 const struct variable *var,
3624 const union value *value)
3626 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3627 ? ctables_postcompute_label (cats, cat, var)
3628 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3629 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3630 : pivot_value_new_var_value (var, value));
3633 static struct ctables_value *
3634 ctables_value_find__ (struct ctables_table *t, const union value *value,
3635 int width, unsigned int hash)
3637 struct ctables_value *clv;
3638 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3639 hash, &t->clabels_values_map)
3640 if (value_equal (value, &clv->value, width))
3646 ctables_value_insert (struct ctables_table *t, const union value *value,
3649 unsigned int hash = value_hash (value, width, 0);
3650 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3653 clv = xmalloc (sizeof *clv);
3654 value_clone (&clv->value, value, width);
3655 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3659 static struct ctables_value *
3660 ctables_value_find (struct ctables_table *t,
3661 const union value *value, int width)
3663 return ctables_value_find__ (t, value, width,
3664 value_hash (value, width, 0));
3668 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3669 size_t ix[PIVOT_N_AXES])
3671 if (a < PIVOT_N_AXES)
3673 size_t limit = MAX (t->stacks[a].n, 1);
3674 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3675 ctables_table_add_section (t, a + 1, ix);
3679 struct ctables_section *s = &t->sections[t->n_sections++];
3680 *s = (struct ctables_section) {
3682 .cells = HMAP_INITIALIZER (s->cells),
3684 for (a = 0; a < PIVOT_N_AXES; a++)
3687 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3689 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3690 for (size_t i = 0; i < nest->n; i++)
3691 hmap_init (&s->occurrences[a][i]);
3693 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3694 hmap_init (&s->areas[at]);
3699 ctpo_add (double a, double b)
3705 ctpo_sub (double a, double b)
3711 ctpo_mul (double a, double b)
3717 ctpo_div (double a, double b)
3719 return b ? a / b : SYSMIS;
3723 ctpo_pow (double a, double b)
3725 int save_errno = errno;
3727 double result = pow (a, b);
3735 ctpo_neg (double a, double b UNUSED)
3740 struct ctables_pcexpr_evaluate_ctx
3742 const struct ctables_cell *cell;
3743 const struct ctables_section *section;
3744 const struct ctables_categories *cats;
3745 enum pivot_axis_type pc_a;
3748 enum fmt_type parse_format;
3751 static double ctables_pcexpr_evaluate (
3752 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3755 ctables_pcexpr_evaluate_nonterminal (
3756 const struct ctables_pcexpr_evaluate_ctx *ctx,
3757 const struct ctables_pcexpr *e, size_t n_args,
3758 double evaluate (double, double))
3760 double args[2] = { 0, 0 };
3761 for (size_t i = 0; i < n_args; i++)
3763 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3764 if (!isfinite (args[i]) || args[i] == SYSMIS)
3767 return evaluate (args[0], args[1]);
3771 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3772 const struct ctables_cell_value *pc_cv)
3774 const struct ctables_section *s = ctx->section;
3777 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3779 const struct ctables_nest *nest = s->nests[a];
3780 for (size_t i = 0; i < nest->n; i++)
3781 if (i != nest->scale_idx)
3783 const struct ctables_cell_value *cv
3784 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3785 : &ctx->cell->axes[a].cvs[i]);
3786 hash = hash_pointer (cv->category, hash);
3787 if (cv->category->type != CCT_TOTAL
3788 && cv->category->type != CCT_SUBTOTAL
3789 && cv->category->type != CCT_POSTCOMPUTE)
3790 hash = value_hash (&cv->value,
3791 var_get_width (nest->vars[i]), hash);
3795 struct ctables_cell *tc;
3796 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3798 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3800 const struct ctables_nest *nest = s->nests[a];
3801 for (size_t i = 0; i < nest->n; i++)
3802 if (i != nest->scale_idx)
3804 const struct ctables_cell_value *p_cv
3805 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3806 : &ctx->cell->axes[a].cvs[i]);
3807 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3808 if (p_cv->category != t_cv->category
3809 || (p_cv->category->type != CCT_TOTAL
3810 && p_cv->category->type != CCT_SUBTOTAL
3811 && p_cv->category->type != CCT_POSTCOMPUTE
3812 && !value_equal (&p_cv->value,
3814 var_get_width (nest->vars[i]))))
3826 const struct ctables_table *t = s->table;
3827 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3828 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3829 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3830 &specs->specs[ctx->summary_idx]);
3834 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3835 const struct ctables_pcexpr *e)
3842 case CTPO_CAT_NRANGE:
3843 case CTPO_CAT_SRANGE:
3844 case CTPO_CAT_MISSING:
3845 case CTPO_CAT_OTHERNM:
3847 struct ctables_cell_value cv = {
3848 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3850 assert (cv.category != NULL);
3852 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3853 const struct ctables_occurrence *o;
3856 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3857 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3858 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3860 cv.value = o->value;
3861 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3866 case CTPO_CAT_NUMBER:
3867 case CTPO_CAT_SUBTOTAL:
3868 case CTPO_CAT_TOTAL:
3870 struct ctables_cell_value cv = {
3871 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3872 .value = { .f = e->number },
3874 assert (cv.category != NULL);
3875 return ctables_pcexpr_evaluate_category (ctx, &cv);
3878 case CTPO_CAT_STRING:
3880 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3882 if (width > e->string.length)
3884 s = xmalloc (width);
3885 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3888 const struct ctables_category *category
3889 = ctables_find_category_for_postcompute (
3890 ctx->section->table->ctables->dict,
3891 ctx->cats, ctx->parse_format, e);
3892 assert (category != NULL);
3894 struct ctables_cell_value cv = { .category = category };
3895 if (category->type == CCT_NUMBER)
3896 cv.value.f = category->number;
3897 else if (category->type == CCT_STRING)
3898 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3902 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3908 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3911 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3914 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3917 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3920 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3923 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3929 static const struct ctables_category *
3930 ctables_cell_postcompute (const struct ctables_section *s,
3931 const struct ctables_cell *cell,
3932 enum pivot_axis_type *pc_a_p,
3935 assert (cell->postcompute);
3936 const struct ctables_category *pc_cat = NULL;
3937 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3938 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3940 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3941 if (cv->category->type == CCT_POSTCOMPUTE)
3945 /* Multiple postcomputes cross each other. The value is
3950 pc_cat = cv->category;
3954 *pc_a_idx_p = pc_a_idx;
3958 assert (pc_cat != NULL);
3963 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3964 const struct ctables_cell *cell,
3965 const struct ctables_summary_spec *ss,
3966 struct fmt_spec *format,
3967 bool *is_ctables_format,
3970 enum pivot_axis_type pc_a = 0;
3971 size_t pc_a_idx = 0;
3972 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3973 s, cell, &pc_a, &pc_a_idx);
3977 const struct ctables_postcompute *pc = pc_cat->pc;
3980 for (size_t i = 0; i < pc->specs->n; i++)
3982 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3983 if (ss->function == ss2->function
3984 && ss->weighting == ss2->weighting
3985 && ss->calc_area == ss2->calc_area
3986 && ss->percentile == ss2->percentile)
3988 *format = ss2->format;
3989 *is_ctables_format = ss2->is_ctables_format;
3995 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3996 const struct ctables_categories *cats = s->table->categories[
3997 var_get_dict_index (var)];
3998 struct ctables_pcexpr_evaluate_ctx ctx = {
4003 .pc_a_idx = pc_a_idx,
4004 .summary_idx = summary_idx,
4005 .parse_format = pc_cat->parse_format,
4007 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4011 ctables_format (double d, const struct fmt_spec *format,
4012 const struct fmt_settings *settings)
4014 const union value v = { .f = d };
4015 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4017 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4018 produce the results we want for negative numbers, putting the negative
4019 sign in the wrong spot, before the prefix instead of after it. We can't,
4020 in fact, produce the desired results using a custom-currency
4021 specification. Instead, we postprocess the output, moving the negative
4024 NEQUAL: "-N=3" => "N=-3"
4025 PAREN: "-(3)" => "(-3)"
4026 PCTPAREN: "-(3%)" => "(-3%)"
4028 This transformation doesn't affect NEGPAREN. */
4029 char *minus_src = strchr (s, '-');
4030 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4032 char *n_equals = strstr (s, "N=");
4033 char *lparen = strchr (s, '(');
4034 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4036 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4042 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4044 for (size_t i = 0; i < t->stacks[a].n; i++)
4046 struct ctables_nest *nest = &t->stacks[a].nests[i];
4047 if (nest->n != 1 || nest->scale_idx != 0)
4050 enum ctables_vlabel vlabel
4051 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4052 if (vlabel != CTVL_NONE)
4059 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4061 struct pivot_table *pt = pivot_table_create__ (
4063 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4064 : pivot_value_new_text (N_("Custom Tables"))),
4067 pivot_table_set_caption (
4068 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4070 pivot_table_set_corner_text (
4071 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4073 bool summary_dimension = (t->summary_axis != t->slabels_axis
4074 || (!t->slabels_visible
4075 && t->summary_specs.n > 1));
4076 if (summary_dimension)
4078 struct pivot_dimension *d = pivot_dimension_create (
4079 pt, t->slabels_axis, N_("Statistics"));
4080 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4081 if (!t->slabels_visible)
4082 d->hide_all_labels = true;
4083 for (size_t i = 0; i < specs->n; i++)
4084 pivot_category_create_leaf (
4085 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4088 bool categories_dimension = t->clabels_example != NULL;
4089 if (categories_dimension)
4091 struct pivot_dimension *d = pivot_dimension_create (
4092 pt, t->label_axis[t->clabels_from_axis],
4093 t->clabels_from_axis == PIVOT_AXIS_ROW
4094 ? N_("Row Categories")
4095 : N_("Column Categories"));
4096 const struct variable *var = t->clabels_example;
4097 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4098 for (size_t i = 0; i < t->n_clabels_values; i++)
4100 const struct ctables_value *value = t->clabels_values[i];
4101 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4102 assert (cat != NULL);
4103 pivot_category_create_leaf (
4104 d->root, ctables_category_create_value_label (c, cat,
4110 pivot_table_set_look (pt, ct->look);
4111 struct pivot_dimension *d[PIVOT_N_AXES];
4112 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4114 static const char *names[] = {
4115 [PIVOT_AXIS_ROW] = N_("Rows"),
4116 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4117 [PIVOT_AXIS_LAYER] = N_("Layers"),
4119 d[a] = (t->axes[a] || a == t->summary_axis
4120 ? pivot_dimension_create (pt, a, names[a])
4125 assert (t->axes[a]);
4127 for (size_t i = 0; i < t->stacks[a].n; i++)
4129 struct ctables_nest *nest = &t->stacks[a].nests[i];
4130 struct ctables_section **sections = xnmalloc (t->n_sections,
4132 size_t n_sections = 0;
4134 size_t n_total_cells = 0;
4135 size_t max_depth = 0;
4136 for (size_t j = 0; j < t->n_sections; j++)
4137 if (t->sections[j].nests[a] == nest)
4139 struct ctables_section *s = &t->sections[j];
4140 sections[n_sections++] = s;
4141 n_total_cells += hmap_count (&s->cells);
4143 size_t depth = s->nests[a]->n;
4144 max_depth = MAX (depth, max_depth);
4147 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4149 size_t n_sorted = 0;
4151 for (size_t j = 0; j < n_sections; j++)
4153 struct ctables_section *s = sections[j];
4155 struct ctables_cell *cell;
4156 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4158 sorted[n_sorted++] = cell;
4159 assert (n_sorted <= n_total_cells);
4162 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4163 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4166 if (a == PIVOT_AXIS_ROW)
4168 size_t ids[N_CTATS];
4169 memset (ids, 0, sizeof ids);
4170 for (size_t j = 0; j < n_sorted; j++)
4172 struct ctables_cell *cell = sorted[j];
4173 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4175 struct ctables_area *area = cell->areas[at];
4176 if (!area->sequence)
4177 area->sequence = ++ids[at];
4184 for (size_t j = 0; j < n_sorted; j++)
4186 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0);
4191 struct ctables_level
4193 enum ctables_level_type
4195 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4196 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4197 CTL_SUMMARY, /* Summary functions. */
4201 enum settings_value_show vlabel; /* CTL_VAR only. */
4204 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4205 size_t n_levels = 0;
4206 for (size_t k = 0; k < nest->n; k++)
4208 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4209 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4211 if (vlabel != CTVL_NONE)
4213 levels[n_levels++] = (struct ctables_level) {
4215 .vlabel = (enum settings_value_show) vlabel,
4220 if (nest->scale_idx != k
4221 && (k != nest->n - 1 || t->label_axis[a] == a))
4223 levels[n_levels++] = (struct ctables_level) {
4224 .type = CTL_CATEGORY,
4230 if (!summary_dimension && a == t->slabels_axis)
4232 levels[n_levels++] = (struct ctables_level) {
4233 .type = CTL_SUMMARY,
4234 .var_idx = SIZE_MAX,
4238 /* Pivot categories:
4240 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4241 - category for nest->vars[0], if nest->scale_idx != 0
4242 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4243 - category for nest->vars[1], if nest->scale_idx != 1
4245 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4246 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4247 - summary function, if 'a == t->slabels_axis && a ==
4250 Additional dimensions:
4252 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4254 - If 't->label_axis[b] == a' for some 'b != a', add a category
4259 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4261 for (size_t j = 0; j < n_sorted; j++)
4263 struct ctables_cell *cell = sorted[j];
4264 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4266 size_t n_common = 0;
4269 for (; n_common < n_levels; n_common++)
4271 const struct ctables_level *level = &levels[n_common];
4272 if (level->type == CTL_CATEGORY)
4274 size_t var_idx = level->var_idx;
4275 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4276 if (prev->axes[a].cvs[var_idx].category != c)
4278 else if (c->type != CCT_SUBTOTAL
4279 && c->type != CCT_TOTAL
4280 && c->type != CCT_POSTCOMPUTE
4281 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4282 &cell->axes[a].cvs[var_idx].value,
4283 var_get_type (nest->vars[var_idx])))
4289 for (size_t k = n_common; k < n_levels; k++)
4291 const struct ctables_level *level = &levels[k];
4292 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4293 if (level->type == CTL_SUMMARY)
4295 assert (k == n_levels - 1);
4297 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4298 for (size_t m = 0; m < specs->n; m++)
4300 int leaf = pivot_category_create_leaf (
4301 parent, ctables_summary_label (&specs->specs[m],
4309 const struct variable *var = nest->vars[level->var_idx];
4310 struct pivot_value *label;
4311 if (level->type == CTL_VAR)
4313 label = pivot_value_new_variable (var);
4314 label->variable.show = level->vlabel;
4316 else if (level->type == CTL_CATEGORY)
4318 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4319 label = ctables_category_create_value_label (
4320 t->categories[var_get_dict_index (var)],
4321 cv->category, var, &cv->value);
4326 if (k == n_levels - 1)
4327 prev_leaf = pivot_category_create_leaf (parent, label);
4329 groups[k] = pivot_category_create_group__ (parent, label);
4333 cell->axes[a].leaf = prev_leaf;
4342 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4346 size_t n_total_cells = 0;
4347 for (size_t j = 0; j < t->n_sections; j++)
4348 n_total_cells += hmap_count (&t->sections[j].cells);
4350 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4351 size_t n_sorted = 0;
4352 for (size_t j = 0; j < t->n_sections; j++)
4354 const struct ctables_section *s = &t->sections[j];
4355 struct ctables_cell *cell;
4356 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4358 sorted[n_sorted++] = cell;
4360 assert (n_sorted <= n_total_cells);
4361 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4363 size_t ids[N_CTATS];
4364 memset (ids, 0, sizeof ids);
4365 for (size_t j = 0; j < n_sorted; j++)
4367 struct ctables_cell *cell = sorted[j];
4368 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4370 struct ctables_area *area = cell->areas[at];
4371 if (!area->sequence)
4372 area->sequence = ++ids[at];
4379 for (size_t i = 0; i < t->n_sections; i++)
4381 struct ctables_section *s = &t->sections[i];
4383 struct ctables_cell *cell;
4384 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4389 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4390 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4391 for (size_t j = 0; j < specs->n; j++)
4394 size_t n_dindexes = 0;
4396 if (summary_dimension)
4397 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4399 if (categories_dimension)
4401 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4402 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4403 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4404 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4407 dindexes[n_dindexes++] = ctv->leaf;
4410 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4413 int leaf = cell->axes[a].leaf;
4414 if (a == t->summary_axis && !summary_dimension)
4416 dindexes[n_dindexes++] = leaf;
4419 const struct ctables_summary_spec *ss = &specs->specs[j];
4421 struct fmt_spec format = specs->specs[j].format;
4422 bool is_ctables_format = ss->is_ctables_format;
4423 double d = (cell->postcompute
4424 ? ctables_cell_calculate_postcompute (
4425 s, cell, ss, &format, &is_ctables_format, j)
4426 : ctables_summary_value (cell, &cell->summaries[j],
4429 struct pivot_value *value;
4430 if (ct->hide_threshold != 0
4431 && d < ct->hide_threshold
4432 && ss->function == CTSF_COUNT)
4434 value = pivot_value_new_user_text_nocopy (
4435 xasprintf ("<%d", ct->hide_threshold));
4437 else if (d == 0 && ct->zero)
4438 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4439 else if (d == SYSMIS && ct->missing)
4440 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4441 else if (is_ctables_format)
4442 value = pivot_value_new_user_text_nocopy (
4443 ctables_format (d, &format, &ct->ctables_formats));
4446 value = pivot_value_new_number (d);
4447 value->numeric.format = format;
4449 /* XXX should text values be right-justified? */
4450 pivot_table_put (pt, dindexes, n_dindexes, value);
4455 pivot_table_submit (pt);
4459 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4461 enum pivot_axis_type label_pos = t->label_axis[a];
4465 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4466 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4468 const struct ctables_stack *stack = &t->stacks[a];
4472 const struct ctables_nest *n0 = &stack->nests[0];
4475 assert (stack->n == 1);
4479 const struct variable *v0 = n0->vars[n0->n - 1];
4480 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4481 t->clabels_example = v0;
4483 for (size_t i = 0; i < c0->n_cats; i++)
4484 if (c0->cats[i].type == CCT_FUNCTION)
4486 msg (SE, _("%s=%s is not allowed with sorting based "
4487 "on a summary function."),
4488 subcommand_name, pos_name);
4491 if (n0->n - 1 == n0->scale_idx)
4493 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4494 "but %s is a scale variable."),
4495 subcommand_name, pos_name, var_get_name (v0));
4499 for (size_t i = 1; i < stack->n; i++)
4501 const struct ctables_nest *ni = &stack->nests[i];
4503 const struct variable *vi = ni->vars[ni->n - 1];
4504 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4506 if (ni->n - 1 == ni->scale_idx)
4508 msg (SE, _("%s=%s requires the variables to be moved to be "
4509 "categorical, but %s is a scale variable."),
4510 subcommand_name, pos_name, var_get_name (vi));
4513 if (var_get_width (v0) != var_get_width (vi))
4515 msg (SE, _("%s=%s requires the variables to be "
4516 "moved to have the same width, but %s has "
4517 "width %d and %s has width %d."),
4518 subcommand_name, pos_name,
4519 var_get_name (v0), var_get_width (v0),
4520 var_get_name (vi), var_get_width (vi));
4523 if (!val_labs_equal (var_get_value_labels (v0),
4524 var_get_value_labels (vi)))
4526 msg (SE, _("%s=%s requires the variables to be "
4527 "moved to have the same value labels, but %s "
4528 "and %s have different value labels."),
4529 subcommand_name, pos_name,
4530 var_get_name (v0), var_get_name (vi));
4533 if (!ctables_categories_equal (c0, ci))
4535 msg (SE, _("%s=%s requires the variables to be "
4536 "moved to have the same category "
4537 "specifications, but %s and %s have different "
4538 "category specifications."),
4539 subcommand_name, pos_name,
4540 var_get_name (v0), var_get_name (vi));
4549 add_sum_var (struct variable *var,
4550 struct variable ***sum_vars, size_t *n, size_t *allocated)
4552 for (size_t i = 0; i < *n; i++)
4553 if (var == (*sum_vars)[i])
4556 if (*n >= *allocated)
4557 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4558 (*sum_vars)[*n] = var;
4562 static enum ctables_area_type
4563 rotate_area (enum ctables_area_type area)
4574 return CTAT_LAYERCOL;
4577 return CTAT_LAYERROW;
4590 enumerate_sum_vars (const struct ctables_axis *a,
4591 struct variable ***sum_vars, size_t *n, size_t *allocated)
4599 for (size_t i = 0; i < N_CSVS; i++)
4600 for (size_t j = 0; j < a->specs[i].n; j++)
4602 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4603 if (spec->function == CTSF_areaPCT_SUM)
4604 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4610 for (size_t i = 0; i < 2; i++)
4611 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4617 ctables_prepare_table (struct ctables_table *t)
4619 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4622 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4624 for (size_t j = 0; j < t->stacks[a].n; j++)
4626 struct ctables_nest *nest = &t->stacks[a].nests[j];
4627 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4629 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4630 nest->n_areas[at] = 0;
4632 enum pivot_axis_type ata, atb;
4633 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4635 ata = PIVOT_AXIS_ROW;
4636 atb = PIVOT_AXIS_COLUMN;
4638 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4640 ata = PIVOT_AXIS_COLUMN;
4641 atb = PIVOT_AXIS_ROW;
4644 if (at == CTAT_LAYER
4645 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
4646 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
4647 ? a == atb && t->label_axis[a] != a
4650 for (size_t k = nest->n - 1; k < nest->n; k--)
4651 if (k != nest->scale_idx)
4653 nest->areas[at][nest->n_areas[at]++] = k;
4659 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
4660 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
4661 : at == CTAT_TABLE ? true
4665 for (size_t k = 0; k < nest->n; k++)
4666 if (k != nest->scale_idx)
4667 nest->areas[at][nest->n_areas[at]++] = k;
4673 #define L PIVOT_AXIS_LAYER
4674 n_drop = (t->clabels_from_axis == L ? a != L
4675 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
4676 : t->clabels_from_axis == a ? 2
4683 n_drop = a == ata && t->label_axis[ata] == atb;
4688 n_drop = (a == ata ? t->label_axis[ata] == atb
4690 : t->clabels_from_axis == atb ? -1
4691 : t->clabels_to_axis != atb ? 1
4703 size_t n = nest->n_areas[at];
4706 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4707 nest->n_areas[at]--;
4712 for (int i = 0; i < n_drop; i++)
4713 if (nest->n_areas[at] > 0)
4714 nest->n_areas[at]--;
4721 struct ctables_nest *nest = xmalloc (sizeof *nest);
4722 *nest = (struct ctables_nest) {
4724 .scale_idx = SIZE_MAX,
4725 .summary_idx = SIZE_MAX
4727 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4729 /* There's no point in moving labels away from an axis that has no
4730 labels, so avoid dealing with the special cases around that. */
4731 t->label_axis[a] = a;
4734 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4735 for (size_t i = 0; i < stack->n; i++)
4737 struct ctables_nest *nest = &stack->nests[i];
4738 if (!nest->specs[CSV_CELL].n)
4740 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
4741 ss->specs = xmalloc (sizeof *ss->specs);
4744 enum ctables_summary_function function
4745 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
4749 nest->summary_idx = nest->n - 1;
4750 ss->var = nest->vars[nest->summary_idx];
4752 *ss->specs = (struct ctables_summary_spec) {
4753 .function = function,
4754 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
4755 .format = ctables_summary_default_format (function, ss->var),
4758 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4759 &nest->specs[CSV_CELL]);
4761 else if (!nest->specs[CSV_TOTAL].n)
4762 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4763 &nest->specs[CSV_CELL]);
4765 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4766 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4768 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4769 for (size_t i = 0; i < nest->specs[sv].n; i++)
4771 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4772 const struct ctables_function_info *cfi =
4773 &ctables_function_info[ss->function];
4775 ss->calc_area = rotate_area (ss->calc_area);
4779 if (t->ctables->smissing_listwise)
4781 struct variable **listwise_vars = NULL;
4783 size_t allocated = 0;
4785 for (size_t j = nest->group_head; j < stack->n; j++)
4787 const struct ctables_nest *other_nest = &stack->nests[j];
4788 if (other_nest->group_head != nest->group_head)
4791 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4794 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4795 sizeof *listwise_vars);
4796 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4799 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4802 listwise_vars = xmemdup (listwise_vars,
4803 n * sizeof *listwise_vars);
4804 nest->specs[sv].listwise_vars = listwise_vars;
4805 nest->specs[sv].n_listwise_vars = n;
4810 struct ctables_summary_spec_set *merged = &t->summary_specs;
4811 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4813 for (size_t j = 0; j < stack->n; j++)
4815 const struct ctables_nest *nest = &stack->nests[j];
4817 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4818 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4823 struct merge_item min = items[0];
4824 for (size_t j = 1; j < n_left; j++)
4825 if (merge_item_compare_3way (&items[j], &min) < 0)
4828 if (merged->n >= merged->allocated)
4829 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4830 sizeof *merged->specs);
4831 merged->specs[merged->n++] = min.set->specs[min.ofs];
4833 for (size_t j = 0; j < n_left; )
4835 if (merge_item_compare_3way (&items[j], &min) == 0)
4837 struct merge_item *item = &items[j];
4838 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4839 if (++item->ofs >= item->set->n)
4841 items[j] = items[--n_left];
4851 for (size_t j = 0; j < merged->n; j++)
4852 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4854 for (size_t j = 0; j < stack->n; j++)
4856 const struct ctables_nest *nest = &stack->nests[j];
4857 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4859 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4860 for (size_t k = 0; k < specs->n; k++)
4861 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4862 specs->specs[k].axis_idx);
4868 size_t allocated_sum_vars = 0;
4869 enumerate_sum_vars (t->axes[t->summary_axis],
4870 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4872 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4873 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4877 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4878 enum pivot_axis_type a)
4880 struct ctables_stack *stack = &t->stacks[a];
4881 for (size_t i = 0; i < stack->n; i++)
4883 const struct ctables_nest *nest = &stack->nests[i];
4884 const struct variable *var = nest->vars[nest->n - 1];
4885 const union value *value = case_data (c, var);
4887 if (var_is_numeric (var) && value->f == SYSMIS)
4890 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4892 ctables_value_insert (t, value, var_get_width (var));
4897 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4899 const struct ctables_value *const *ap = a_;
4900 const struct ctables_value *const *bp = b_;
4901 const struct ctables_value *a = *ap;
4902 const struct ctables_value *b = *bp;
4903 const int *width = width_;
4904 return value_compare_3way (&a->value, &b->value, *width);
4908 ctables_sort_clabels_values (struct ctables_table *t)
4910 const struct variable *v0 = t->clabels_example;
4911 int width = var_get_width (v0);
4913 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4916 const struct val_labs *val_labs = var_get_value_labels (v0);
4917 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4918 vl = val_labs_next (val_labs, vl))
4919 if (ctables_categories_match (c0, &vl->value, v0))
4920 ctables_value_insert (t, &vl->value, width);
4923 size_t n = hmap_count (&t->clabels_values_map);
4924 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4926 struct ctables_value *clv;
4928 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4929 t->clabels_values[i++] = clv;
4930 t->n_clabels_values = n;
4933 sort (t->clabels_values, n, sizeof *t->clabels_values,
4934 compare_clabels_values_3way, &width);
4936 for (size_t i = 0; i < n; i++)
4937 t->clabels_values[i]->leaf = i;
4941 ctables_add_category_occurrences (const struct variable *var,
4942 struct hmap *occurrences,
4943 const struct ctables_categories *cats)
4945 const struct val_labs *val_labs = var_get_value_labels (var);
4947 for (size_t i = 0; i < cats->n_cats; i++)
4949 const struct ctables_category *c = &cats->cats[i];
4953 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4959 int width = var_get_width (var);
4961 value_init (&value, width);
4962 value_copy_buf_rpad (&value, width,
4963 CHAR_CAST (uint8_t *, c->string.string),
4964 c->string.length, ' ');
4965 ctables_add_occurrence (var, &value, occurrences);
4966 value_destroy (&value, width);
4971 assert (var_is_numeric (var));
4972 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4973 vl = val_labs_next (val_labs, vl))
4974 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4975 ctables_add_occurrence (var, &vl->value, occurrences);
4979 assert (var_is_alpha (var));
4980 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4981 vl = val_labs_next (val_labs, vl))
4982 if (in_string_range (&vl->value, var, c->srange))
4983 ctables_add_occurrence (var, &vl->value, occurrences);
4987 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4988 vl = val_labs_next (val_labs, vl))
4989 if (var_is_value_missing (var, &vl->value))
4990 ctables_add_occurrence (var, &vl->value, occurrences);
4994 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4995 vl = val_labs_next (val_labs, vl))
4996 ctables_add_occurrence (var, &vl->value, occurrences);
4999 case CCT_POSTCOMPUTE:
5009 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5010 vl = val_labs_next (val_labs, vl))
5011 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5012 ctables_add_occurrence (var, &vl->value, occurrences);
5015 case CCT_EXCLUDED_MISSING:
5022 ctables_section_recurse_add_empty_categories (
5023 struct ctables_section *s,
5024 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5025 enum pivot_axis_type a, size_t a_idx)
5027 if (a >= PIVOT_N_AXES)
5028 ctables_cell_insert__ (s, c, cats);
5029 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5030 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5033 const struct variable *var = s->nests[a]->vars[a_idx];
5034 const struct ctables_categories *categories = s->table->categories[
5035 var_get_dict_index (var)];
5036 int width = var_get_width (var);
5037 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5038 const struct ctables_occurrence *o;
5039 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5041 union value *value = case_data_rw (c, var);
5042 value_destroy (value, width);
5043 value_clone (value, &o->value, width);
5044 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5045 assert (cats[a][a_idx] != NULL);
5046 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5049 for (size_t i = 0; i < categories->n_cats; i++)
5051 const struct ctables_category *cat = &categories->cats[i];
5052 if (cat->type == CCT_POSTCOMPUTE)
5054 cats[a][a_idx] = cat;
5055 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5062 ctables_section_add_empty_categories (struct ctables_section *s)
5064 bool show_empty = false;
5065 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5067 for (size_t k = 0; k < s->nests[a]->n; k++)
5068 if (k != s->nests[a]->scale_idx)
5070 const struct variable *var = s->nests[a]->vars[k];
5071 const struct ctables_categories *cats = s->table->categories[
5072 var_get_dict_index (var)];
5073 if (cats->show_empty)
5076 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5082 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5083 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5084 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5085 const struct ctables_category **cats[PIVOT_N_AXES] =
5087 [PIVOT_AXIS_LAYER] = layer_cats,
5088 [PIVOT_AXIS_ROW] = row_cats,
5089 [PIVOT_AXIS_COLUMN] = column_cats,
5091 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5092 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5097 ctables_section_clear (struct ctables_section *s)
5099 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5101 const struct ctables_nest *nest = s->nests[a];
5102 for (size_t i = 0; i < nest->n; i++)
5103 if (i != nest->scale_idx)
5105 const struct variable *var = nest->vars[i];
5106 int width = var_get_width (var);
5107 struct ctables_occurrence *o, *next;
5108 struct hmap *map = &s->occurrences[a][i];
5109 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5111 value_destroy (&o->value, width);
5112 hmap_delete (map, &o->node);
5119 struct ctables_cell *cell, *next_cell;
5120 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5122 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5124 const struct ctables_nest *nest = s->nests[a];
5125 for (size_t i = 0; i < nest->n; i++)
5126 if (i != nest->scale_idx)
5127 value_destroy (&cell->axes[a].cvs[i].value,
5128 var_get_width (nest->vars[i]));
5129 free (cell->axes[a].cvs);
5132 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5133 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5134 for (size_t i = 0; i < specs->n; i++)
5135 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5136 free (cell->summaries);
5138 hmap_delete (&s->cells, &cell->node);
5141 hmap_shrink (&s->cells);
5143 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5145 struct ctables_area *area, *next_area;
5146 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5150 hmap_delete (&s->areas[at], &area->node);
5153 hmap_shrink (&s->areas[at]);
5158 ctables_section_uninit (struct ctables_section *s)
5160 ctables_section_clear (s);
5162 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5164 struct ctables_nest *nest = s->nests[a];
5165 for (size_t i = 0; i < nest->n; i++)
5166 hmap_destroy (&s->occurrences[a][i]);
5167 free (s->occurrences[a]);
5170 hmap_destroy (&s->cells);
5171 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5172 hmap_destroy (&s->areas[at]);
5176 ctables_table_clear (struct ctables_table *t)
5178 for (size_t i = 0; i < t->n_sections; i++)
5179 ctables_section_clear (&t->sections[i]);
5181 if (t->clabels_example)
5183 int width = var_get_width (t->clabels_example);
5184 struct ctables_value *value, *next_value;
5185 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5186 &t->clabels_values_map)
5188 value_destroy (&value->value, width);
5189 hmap_delete (&t->clabels_values_map, &value->node);
5192 hmap_shrink (&t->clabels_values_map);
5194 free (t->clabels_values);
5195 t->clabels_values = NULL;
5196 t->n_clabels_values = 0;
5201 ctables_execute (struct dataset *ds, struct casereader *input,
5204 for (size_t i = 0; i < ct->n_tables; i++)
5206 struct ctables_table *t = ct->tables[i];
5207 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5208 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5209 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5210 sizeof *t->sections);
5211 size_t ix[PIVOT_N_AXES];
5212 ctables_table_add_section (t, 0, ix);
5215 struct dictionary *dict = dataset_dict (ds);
5217 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5218 struct casegrouper *grouper
5220 ? casegrouper_create_splits (input, dict)
5221 : casegrouper_create_vars (input, NULL, 0));
5222 struct casereader *group;
5223 while (casegrouper_get_next_group (grouper, &group))
5227 struct ccase *c = casereader_peek (group, 0);
5230 output_split_file_values (ds, c);
5235 bool warn_on_invalid = true;
5236 for (struct ccase *c = casereader_read (group); c;
5237 case_unref (c), c = casereader_read (group))
5239 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5240 double e_weight = (ct->e_weight
5241 ? var_force_valid_weight (ct->e_weight,
5242 case_num (c, ct->e_weight),
5246 [CTW_DICTIONARY] = d_weight,
5247 [CTW_EFFECTIVE] = e_weight,
5248 [CTW_UNWEIGHTED] = 1.0,
5251 for (size_t i = 0; i < ct->n_tables; i++)
5253 struct ctables_table *t = ct->tables[i];
5255 for (size_t j = 0; j < t->n_sections; j++)
5256 ctables_cell_insert (&t->sections[j], c, weight);
5258 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5259 if (t->label_axis[a] != a)
5260 ctables_insert_clabels_values (t, c, a);
5263 casereader_destroy (group);
5265 for (size_t i = 0; i < ct->n_tables; i++)
5267 struct ctables_table *t = ct->tables[i];
5269 if (t->clabels_example)
5270 ctables_sort_clabels_values (t);
5272 for (size_t j = 0; j < t->n_sections; j++)
5273 ctables_section_add_empty_categories (&t->sections[j]);
5275 ctables_table_output (ct, t);
5276 ctables_table_clear (t);
5279 return casegrouper_destroy (grouper);
5284 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5285 struct dictionary *);
5288 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5294 case CTPO_CAT_STRING:
5295 ss_dealloc (&e->string);
5298 case CTPO_CAT_SRANGE:
5299 for (size_t i = 0; i < 2; i++)
5300 ss_dealloc (&e->srange[i]);
5309 for (size_t i = 0; i < 2; i++)
5310 ctables_pcexpr_destroy (e->subs[i]);
5314 case CTPO_CAT_NUMBER:
5315 case CTPO_CAT_NRANGE:
5316 case CTPO_CAT_MISSING:
5317 case CTPO_CAT_OTHERNM:
5318 case CTPO_CAT_SUBTOTAL:
5319 case CTPO_CAT_TOTAL:
5323 msg_location_destroy (e->location);
5328 static struct ctables_pcexpr *
5329 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5330 struct ctables_pcexpr *sub0,
5331 struct ctables_pcexpr *sub1)
5333 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5334 *e = (struct ctables_pcexpr) {
5336 .subs = { sub0, sub1 },
5337 .location = msg_location_merged (sub0->location, sub1->location),
5342 /* How to parse an operator. */
5345 enum token_type token;
5346 enum ctables_postcompute_op op;
5349 static const struct operator *
5350 ctables_pcexpr_match_operator (struct lexer *lexer,
5351 const struct operator ops[], size_t n_ops)
5353 for (const struct operator *op = ops; op < ops + n_ops; op++)
5354 if (lex_token (lexer) == op->token)
5356 if (op->token != T_NEG_NUM)
5365 static struct ctables_pcexpr *
5366 ctables_pcexpr_parse_binary_operators__ (
5367 struct lexer *lexer, struct dictionary *dict,
5368 const struct operator ops[], size_t n_ops,
5369 parse_recursively_func *parse_next_level,
5370 const char *chain_warning, struct ctables_pcexpr *lhs)
5372 for (int op_count = 0; ; op_count++)
5374 const struct operator *op
5375 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5378 if (op_count > 1 && chain_warning)
5379 msg_at (SW, lhs->location, "%s", chain_warning);
5384 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5387 ctables_pcexpr_destroy (lhs);
5391 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5395 static struct ctables_pcexpr *
5396 ctables_pcexpr_parse_binary_operators (
5397 struct lexer *lexer, struct dictionary *dict,
5398 const struct operator ops[], size_t n_ops,
5399 parse_recursively_func *parse_next_level, const char *chain_warning)
5401 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5405 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5407 chain_warning, lhs);
5410 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5411 struct dictionary *);
5413 static struct ctables_pcexpr
5414 ctpo_cat_nrange (double low, double high)
5416 return (struct ctables_pcexpr) {
5417 .op = CTPO_CAT_NRANGE,
5418 .nrange = { low, high },
5422 static struct ctables_pcexpr
5423 ctpo_cat_srange (struct substring low, struct substring high)
5425 return (struct ctables_pcexpr) {
5426 .op = CTPO_CAT_SRANGE,
5427 .srange = { low, high },
5431 static struct ctables_pcexpr *
5432 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5434 int start_ofs = lex_ofs (lexer);
5435 struct ctables_pcexpr e;
5436 if (lex_is_number (lexer))
5438 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5439 .number = lex_number (lexer) };
5442 else if (lex_match_id (lexer, "MISSING"))
5443 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5444 else if (lex_match_id (lexer, "OTHERNM"))
5445 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5446 else if (lex_match_id (lexer, "TOTAL"))
5447 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5448 else if (lex_match_id (lexer, "SUBTOTAL"))
5450 size_t subtotal_index = 0;
5451 if (lex_match (lexer, T_LBRACK))
5453 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5455 subtotal_index = lex_integer (lexer);
5457 if (!lex_force_match (lexer, T_RBRACK))
5460 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5461 .subtotal_index = subtotal_index };
5463 else if (lex_match (lexer, T_LBRACK))
5465 if (lex_match_id (lexer, "LO"))
5467 if (!lex_force_match_id (lexer, "THRU"))
5470 if (lex_is_string (lexer))
5472 struct substring low = { .string = NULL };
5473 struct substring high = parse_substring (lexer, dict);
5474 e = ctpo_cat_srange (low, high);
5478 if (!lex_force_num (lexer))
5480 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5484 else if (lex_is_number (lexer))
5486 double number = lex_number (lexer);
5488 if (lex_match_id (lexer, "THRU"))
5490 if (lex_match_id (lexer, "HI"))
5491 e = ctpo_cat_nrange (number, DBL_MAX);
5494 if (!lex_force_num (lexer))
5496 e = ctpo_cat_nrange (number, lex_number (lexer));
5501 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5504 else if (lex_is_string (lexer))
5506 struct substring s = parse_substring (lexer, dict);
5508 if (lex_match_id (lexer, "THRU"))
5510 struct substring high;
5512 if (lex_match_id (lexer, "HI"))
5513 high = (struct substring) { .string = NULL };
5516 if (!lex_force_string (lexer))
5521 high = parse_substring (lexer, dict);
5524 e = ctpo_cat_srange (s, high);
5527 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5531 lex_error (lexer, NULL);
5535 if (!lex_force_match (lexer, T_RBRACK))
5537 if (e.op == CTPO_CAT_STRING)
5538 ss_dealloc (&e.string);
5539 else if (e.op == CTPO_CAT_SRANGE)
5541 ss_dealloc (&e.srange[0]);
5542 ss_dealloc (&e.srange[1]);
5547 else if (lex_match (lexer, T_LPAREN))
5549 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5552 if (!lex_force_match (lexer, T_RPAREN))
5554 ctables_pcexpr_destroy (ep);
5561 lex_error (lexer, NULL);
5565 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5566 return xmemdup (&e, sizeof e);
5569 static struct ctables_pcexpr *
5570 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5571 struct lexer *lexer, int start_ofs)
5573 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5574 *e = (struct ctables_pcexpr) {
5577 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5582 static struct ctables_pcexpr *
5583 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5585 static const struct operator op = { T_EXP, CTPO_POW };
5587 const char *chain_warning =
5588 _("The exponentiation operator (`**') is left-associative: "
5589 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5590 "To disable this warning, insert parentheses.");
5592 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5593 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5594 ctables_pcexpr_parse_primary,
5597 /* Special case for situations like "-5**6", which must be parsed as
5600 int start_ofs = lex_ofs (lexer);
5601 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5602 *lhs = (struct ctables_pcexpr) {
5603 .op = CTPO_CONSTANT,
5604 .number = -lex_tokval (lexer),
5605 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5609 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5610 lexer, dict, &op, 1,
5611 ctables_pcexpr_parse_primary, chain_warning, lhs);
5615 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5618 /* Parses the unary minus level. */
5619 static struct ctables_pcexpr *
5620 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5622 int start_ofs = lex_ofs (lexer);
5623 if (!lex_match (lexer, T_DASH))
5624 return ctables_pcexpr_parse_exp (lexer, dict);
5626 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5630 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5633 /* Parses the multiplication and division level. */
5634 static struct ctables_pcexpr *
5635 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5637 static const struct operator ops[] =
5639 { T_ASTERISK, CTPO_MUL },
5640 { T_SLASH, CTPO_DIV },
5643 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5644 sizeof ops / sizeof *ops,
5645 ctables_pcexpr_parse_neg, NULL);
5648 /* Parses the addition and subtraction level. */
5649 static struct ctables_pcexpr *
5650 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5652 static const struct operator ops[] =
5654 { T_PLUS, CTPO_ADD },
5655 { T_DASH, CTPO_SUB },
5656 { T_NEG_NUM, CTPO_ADD },
5659 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5660 ops, sizeof ops / sizeof *ops,
5661 ctables_pcexpr_parse_mul, NULL);
5664 static struct ctables_postcompute *
5665 ctables_find_postcompute (struct ctables *ct, const char *name)
5667 struct ctables_postcompute *pc;
5668 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5669 utf8_hash_case_string (name, 0), &ct->postcomputes)
5670 if (!utf8_strcasecmp (pc->name, name))
5676 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5679 int pcompute_start = lex_ofs (lexer) - 1;
5681 if (!lex_match (lexer, T_AND))
5683 lex_error_expecting (lexer, "&");
5686 if (!lex_force_id (lexer))
5689 char *name = ss_xstrdup (lex_tokss (lexer));
5692 if (!lex_force_match (lexer, T_EQUALS)
5693 || !lex_force_match_id (lexer, "EXPR")
5694 || !lex_force_match (lexer, T_LPAREN))
5700 int expr_start = lex_ofs (lexer);
5701 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5702 int expr_end = lex_ofs (lexer) - 1;
5703 if (!expr || !lex_force_match (lexer, T_RPAREN))
5705 ctables_pcexpr_destroy (expr);
5709 int pcompute_end = lex_ofs (lexer) - 1;
5711 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5714 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5717 msg_at (SW, location, _("New definition of &%s will override the "
5718 "previous definition."),
5720 msg_at (SN, pc->location, _("This is the previous definition."));
5722 ctables_pcexpr_destroy (pc->expr);
5723 msg_location_destroy (pc->location);
5728 pc = xmalloc (sizeof *pc);
5729 *pc = (struct ctables_postcompute) { .name = name };
5730 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5731 utf8_hash_case_string (pc->name, 0));
5734 pc->location = location;
5736 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5741 ctables_parse_pproperties_format (struct lexer *lexer,
5742 struct ctables_summary_spec_set *sss)
5744 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5746 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5747 && !(lex_token (lexer) == T_ID
5748 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5749 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5750 lex_tokss (lexer)))))
5752 /* Parse function. */
5753 enum ctables_summary_function function;
5754 enum ctables_weighting weighting;
5755 enum ctables_area_type area;
5756 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5759 /* Parse percentile. */
5760 double percentile = 0;
5761 if (function == CTSF_PTILE)
5763 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5765 percentile = lex_number (lexer);
5770 struct fmt_spec format;
5771 bool is_ctables_format;
5772 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5775 if (sss->n >= sss->allocated)
5776 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5777 sizeof *sss->specs);
5778 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5779 .function = function,
5780 .weighting = weighting,
5783 .percentile = percentile,
5785 .is_ctables_format = is_ctables_format,
5791 ctables_summary_spec_set_uninit (sss);
5796 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5798 struct ctables_postcompute **pcs = NULL;
5800 size_t allocated_pcs = 0;
5802 while (lex_match (lexer, T_AND))
5804 if (!lex_force_id (lexer))
5806 struct ctables_postcompute *pc
5807 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5810 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5815 if (n_pcs >= allocated_pcs)
5816 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5820 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5822 if (lex_match_id (lexer, "LABEL"))
5824 lex_match (lexer, T_EQUALS);
5825 if (!lex_force_string (lexer))
5828 for (size_t i = 0; i < n_pcs; i++)
5830 free (pcs[i]->label);
5831 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5836 else if (lex_match_id (lexer, "FORMAT"))
5838 lex_match (lexer, T_EQUALS);
5840 struct ctables_summary_spec_set sss;
5841 if (!ctables_parse_pproperties_format (lexer, &sss))
5844 for (size_t i = 0; i < n_pcs; i++)
5847 ctables_summary_spec_set_uninit (pcs[i]->specs);
5849 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5850 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5852 ctables_summary_spec_set_uninit (&sss);
5854 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5856 lex_match (lexer, T_EQUALS);
5857 bool hide_source_cats;
5858 if (!parse_bool (lexer, &hide_source_cats))
5860 for (size_t i = 0; i < n_pcs; i++)
5861 pcs[i]->hide_source_cats = hide_source_cats;
5865 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5878 put_strftime (struct string *out, time_t now, const char *format)
5880 const struct tm *tm = localtime (&now);
5882 strftime (value, sizeof value, format, tm);
5883 ds_put_cstr (out, value);
5887 skip_prefix (struct substring *s, struct substring prefix)
5889 if (ss_starts_with (*s, prefix))
5891 ss_advance (s, prefix.length);
5899 put_table_expression (struct string *out, struct lexer *lexer,
5900 struct dictionary *dict, int expr_start, int expr_end)
5903 for (int ofs = expr_start; ofs < expr_end; ofs++)
5905 const struct token *t = lex_ofs_token (lexer, ofs);
5906 if (t->type == T_LBRACK)
5908 else if (t->type == T_RBRACK && nest > 0)
5914 else if (t->type == T_ID)
5916 const struct variable *var
5917 = dict_lookup_var (dict, t->string.string);
5918 const char *label = var ? var_get_label (var) : NULL;
5919 ds_put_cstr (out, label ? label : t->string.string);
5923 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5924 ds_put_byte (out, ' ');
5926 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5927 ds_put_cstr (out, repr);
5930 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5931 ds_put_byte (out, ' ');
5937 put_title_text (struct string *out, struct substring in, time_t now,
5938 struct lexer *lexer, struct dictionary *dict,
5939 int expr_start, int expr_end)
5943 size_t chunk = ss_find_byte (in, ')');
5944 ds_put_substring (out, ss_head (in, chunk));
5945 ss_advance (&in, chunk);
5946 if (ss_is_empty (in))
5949 if (skip_prefix (&in, ss_cstr (")DATE")))
5950 put_strftime (out, now, "%x");
5951 else if (skip_prefix (&in, ss_cstr (")TIME")))
5952 put_strftime (out, now, "%X");
5953 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5954 put_table_expression (out, lexer, dict, expr_start, expr_end);
5957 ds_put_byte (out, ')');
5958 ss_advance (&in, 1);
5964 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5966 struct casereader *input = NULL;
5968 struct measure_guesser *mg = measure_guesser_create (ds);
5971 input = proc_open (ds);
5972 measure_guesser_run (mg, input);
5973 measure_guesser_destroy (mg);
5976 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5977 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5978 enum settings_value_show tvars = settings_get_show_variables ();
5979 for (size_t i = 0; i < n_vars; i++)
5980 vlabels[i] = (enum ctables_vlabel) tvars;
5982 struct pivot_table_look *look = pivot_table_look_unshare (
5983 pivot_table_look_ref (pivot_table_look_get_default ()));
5984 look->omit_empty = false;
5986 struct ctables *ct = xmalloc (sizeof *ct);
5987 *ct = (struct ctables) {
5988 .dict = dataset_dict (ds),
5990 .ctables_formats = FMT_SETTINGS_INIT,
5992 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5995 time_t now = time (NULL);
6000 const char *dot_string;
6001 const char *comma_string;
6003 static const struct ctf ctfs[4] = {
6004 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6005 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6006 { CTEF_PAREN, "-,(,),", "-.(.)." },
6007 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6009 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6010 for (size_t i = 0; i < 4; i++)
6012 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6013 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6014 fmt_number_style_from_string (s));
6017 if (!lex_force_match (lexer, T_SLASH))
6020 while (!lex_match_id (lexer, "TABLE"))
6022 if (lex_match_id (lexer, "FORMAT"))
6024 double widths[2] = { SYSMIS, SYSMIS };
6025 double units_per_inch = 72.0;
6027 while (lex_token (lexer) != T_SLASH)
6029 if (lex_match_id (lexer, "MINCOLWIDTH"))
6031 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6034 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6036 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6039 else if (lex_match_id (lexer, "UNITS"))
6041 lex_match (lexer, T_EQUALS);
6042 if (lex_match_id (lexer, "POINTS"))
6043 units_per_inch = 72.0;
6044 else if (lex_match_id (lexer, "INCHES"))
6045 units_per_inch = 1.0;
6046 else if (lex_match_id (lexer, "CM"))
6047 units_per_inch = 2.54;
6050 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6054 else if (lex_match_id (lexer, "EMPTY"))
6059 lex_match (lexer, T_EQUALS);
6060 if (lex_match_id (lexer, "ZERO"))
6062 /* Nothing to do. */
6064 else if (lex_match_id (lexer, "BLANK"))
6065 ct->zero = xstrdup ("");
6066 else if (lex_force_string (lexer))
6068 ct->zero = ss_xstrdup (lex_tokss (lexer));
6074 else if (lex_match_id (lexer, "MISSING"))
6076 lex_match (lexer, T_EQUALS);
6077 if (!lex_force_string (lexer))
6081 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6082 ? ss_xstrdup (lex_tokss (lexer))
6088 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6089 "UNITS", "EMPTY", "MISSING");
6094 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6095 && widths[0] > widths[1])
6097 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6101 for (size_t i = 0; i < 2; i++)
6102 if (widths[i] != SYSMIS)
6104 int *wr = ct->look->width_ranges[TABLE_HORZ];
6105 wr[i] = widths[i] / units_per_inch * 96.0;
6110 else if (lex_match_id (lexer, "VLABELS"))
6112 if (!lex_force_match_id (lexer, "VARIABLES"))
6114 lex_match (lexer, T_EQUALS);
6116 struct variable **vars;
6118 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6122 if (!lex_force_match_id (lexer, "DISPLAY"))
6127 lex_match (lexer, T_EQUALS);
6129 enum ctables_vlabel vlabel;
6130 if (lex_match_id (lexer, "DEFAULT"))
6131 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6132 else if (lex_match_id (lexer, "NAME"))
6134 else if (lex_match_id (lexer, "LABEL"))
6135 vlabel = CTVL_LABEL;
6136 else if (lex_match_id (lexer, "BOTH"))
6138 else if (lex_match_id (lexer, "NONE"))
6142 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6148 for (size_t i = 0; i < n_vars; i++)
6149 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6152 else if (lex_match_id (lexer, "MRSETS"))
6154 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6156 lex_match (lexer, T_EQUALS);
6157 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6160 else if (lex_match_id (lexer, "SMISSING"))
6162 if (lex_match_id (lexer, "VARIABLE"))
6163 ct->smissing_listwise = false;
6164 else if (lex_match_id (lexer, "LISTWISE"))
6165 ct->smissing_listwise = true;
6168 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6172 else if (lex_match_id (lexer, "PCOMPUTE"))
6174 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6177 else if (lex_match_id (lexer, "PPROPERTIES"))
6179 if (!ctables_parse_pproperties (lexer, ct))
6182 else if (lex_match_id (lexer, "WEIGHT"))
6184 if (!lex_force_match_id (lexer, "VARIABLE"))
6186 lex_match (lexer, T_EQUALS);
6187 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6191 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6193 if (lex_match_id (lexer, "COUNT"))
6195 lex_match (lexer, T_EQUALS);
6196 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6199 ct->hide_threshold = lex_integer (lexer);
6202 else if (ct->hide_threshold == 0)
6203 ct->hide_threshold = 5;
6207 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6208 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6209 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6213 if (!lex_force_match (lexer, T_SLASH))
6217 size_t allocated_tables = 0;
6220 if (ct->n_tables >= allocated_tables)
6221 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6222 sizeof *ct->tables);
6224 struct ctables_category *cat = xmalloc (sizeof *cat);
6225 *cat = (struct ctables_category) {
6227 .include_missing = false,
6228 .sort_ascending = true,
6231 struct ctables_categories *c = xmalloc (sizeof *c);
6232 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6233 *c = (struct ctables_categories) {
6240 struct ctables_categories **categories = xnmalloc (n_vars,
6241 sizeof *categories);
6242 for (size_t i = 0; i < n_vars; i++)
6245 struct ctables_table *t = xmalloc (sizeof *t);
6246 *t = (struct ctables_table) {
6248 .slabels_axis = PIVOT_AXIS_COLUMN,
6249 .slabels_visible = true,
6250 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6252 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6253 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6254 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6256 .clabels_from_axis = PIVOT_AXIS_LAYER,
6257 .clabels_to_axis = PIVOT_AXIS_LAYER,
6258 .categories = categories,
6259 .n_categories = n_vars,
6262 ct->tables[ct->n_tables++] = t;
6264 lex_match (lexer, T_EQUALS);
6265 int expr_start = lex_ofs (lexer);
6266 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6268 if (lex_match (lexer, T_BY))
6270 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6271 ct, t, PIVOT_AXIS_COLUMN))
6274 if (lex_match (lexer, T_BY))
6276 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6277 ct, t, PIVOT_AXIS_LAYER))
6281 int expr_end = lex_ofs (lexer);
6283 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6284 && !t->axes[PIVOT_AXIS_LAYER])
6286 lex_error (lexer, _("At least one variable must be specified."));
6290 const struct ctables_axis *scales[PIVOT_N_AXES];
6291 size_t n_scales = 0;
6292 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6294 scales[a] = find_scale (t->axes[a]);
6300 msg (SE, _("Scale variables may appear only on one axis."));
6301 if (scales[PIVOT_AXIS_ROW])
6302 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6303 _("This scale variable appears on the rows axis."));
6304 if (scales[PIVOT_AXIS_COLUMN])
6305 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6306 _("This scale variable appears on the columns axis."));
6307 if (scales[PIVOT_AXIS_LAYER])
6308 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6309 _("This scale variable appears on the layer axis."));
6313 const struct ctables_axis *summaries[PIVOT_N_AXES];
6314 size_t n_summaries = 0;
6315 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6317 summaries[a] = (scales[a]
6319 : find_categorical_summary_spec (t->axes[a]));
6323 if (n_summaries > 1)
6325 msg (SE, _("Summaries may appear only on one axis."));
6326 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6329 msg_at (SN, summaries[a]->loc,
6331 ? _("This variable on the rows axis has a summary.")
6332 : a == PIVOT_AXIS_COLUMN
6333 ? _("This variable on the columns axis has a summary.")
6334 : _("This variable on the layers axis has a summary."));
6336 msg_at (SN, summaries[a]->loc,
6337 _("This is a scale variable, so it always has a "
6338 "summary even if the syntax does not explicitly "
6343 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6344 if (n_summaries ? summaries[a] : t->axes[a])
6346 t->summary_axis = a;
6350 if (lex_token (lexer) == T_ENDCMD)
6352 if (!ctables_prepare_table (t))
6356 if (!lex_force_match (lexer, T_SLASH))
6359 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6361 if (lex_match_id (lexer, "SLABELS"))
6363 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6365 if (lex_match_id (lexer, "POSITION"))
6367 lex_match (lexer, T_EQUALS);
6368 if (lex_match_id (lexer, "COLUMN"))
6369 t->slabels_axis = PIVOT_AXIS_COLUMN;
6370 else if (lex_match_id (lexer, "ROW"))
6371 t->slabels_axis = PIVOT_AXIS_ROW;
6372 else if (lex_match_id (lexer, "LAYER"))
6373 t->slabels_axis = PIVOT_AXIS_LAYER;
6376 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6380 else if (lex_match_id (lexer, "VISIBLE"))
6382 lex_match (lexer, T_EQUALS);
6383 if (!parse_bool (lexer, &t->slabels_visible))
6388 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6393 else if (lex_match_id (lexer, "CLABELS"))
6395 if (lex_match_id (lexer, "AUTO"))
6397 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6398 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6400 else if (lex_match_id (lexer, "ROWLABELS"))
6402 lex_match (lexer, T_EQUALS);
6403 if (lex_match_id (lexer, "OPPOSITE"))
6404 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6405 else if (lex_match_id (lexer, "LAYER"))
6406 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6409 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6413 else if (lex_match_id (lexer, "COLLABELS"))
6415 lex_match (lexer, T_EQUALS);
6416 if (lex_match_id (lexer, "OPPOSITE"))
6417 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6418 else if (lex_match_id (lexer, "LAYER"))
6419 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6422 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6428 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6433 else if (lex_match_id (lexer, "CRITERIA"))
6435 if (!lex_force_match_id (lexer, "CILEVEL"))
6437 lex_match (lexer, T_EQUALS);
6439 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6441 t->cilevel = lex_number (lexer);
6444 else if (lex_match_id (lexer, "CATEGORIES"))
6446 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6450 else if (lex_match_id (lexer, "TITLES"))
6455 if (lex_match_id (lexer, "CAPTION"))
6456 textp = &t->caption;
6457 else if (lex_match_id (lexer, "CORNER"))
6459 else if (lex_match_id (lexer, "TITLE"))
6463 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6466 lex_match (lexer, T_EQUALS);
6468 struct string s = DS_EMPTY_INITIALIZER;
6469 while (lex_is_string (lexer))
6471 if (!ds_is_empty (&s))
6472 ds_put_byte (&s, ' ');
6473 put_title_text (&s, lex_tokss (lexer), now,
6474 lexer, dataset_dict (ds),
6475 expr_start, expr_end);
6479 *textp = ds_steal_cstr (&s);
6481 while (lex_token (lexer) != T_SLASH
6482 && lex_token (lexer) != T_ENDCMD);
6484 else if (lex_match_id (lexer, "SIGTEST"))
6488 t->chisq = xmalloc (sizeof *t->chisq);
6489 *t->chisq = (struct ctables_chisq) {
6491 .include_mrsets = true,
6492 .all_visible = true,
6498 if (lex_match_id (lexer, "TYPE"))
6500 lex_match (lexer, T_EQUALS);
6501 if (!lex_force_match_id (lexer, "CHISQUARE"))
6504 else if (lex_match_id (lexer, "ALPHA"))
6506 lex_match (lexer, T_EQUALS);
6507 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6509 t->chisq->alpha = lex_number (lexer);
6512 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6514 lex_match (lexer, T_EQUALS);
6515 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6518 else if (lex_match_id (lexer, "CATEGORIES"))
6520 lex_match (lexer, T_EQUALS);
6521 if (lex_match_id (lexer, "ALLVISIBLE"))
6522 t->chisq->all_visible = true;
6523 else if (lex_match_id (lexer, "SUBTOTALS"))
6524 t->chisq->all_visible = false;
6527 lex_error_expecting (lexer,
6528 "ALLVISIBLE", "SUBTOTALS");
6534 lex_error_expecting (lexer, "TYPE", "ALPHA",
6535 "INCLUDEMRSETS", "CATEGORIES");
6539 while (lex_token (lexer) != T_SLASH
6540 && lex_token (lexer) != T_ENDCMD);
6542 else if (lex_match_id (lexer, "COMPARETEST"))
6546 t->pairwise = xmalloc (sizeof *t->pairwise);
6547 *t->pairwise = (struct ctables_pairwise) {
6549 .alpha = { .05, .05 },
6550 .adjust = BONFERRONI,
6551 .include_mrsets = true,
6552 .meansvariance_allcats = true,
6553 .all_visible = true,
6562 if (lex_match_id (lexer, "TYPE"))
6564 lex_match (lexer, T_EQUALS);
6565 if (lex_match_id (lexer, "PROP"))
6566 t->pairwise->type = PROP;
6567 else if (lex_match_id (lexer, "MEAN"))
6568 t->pairwise->type = MEAN;
6571 lex_error_expecting (lexer, "PROP", "MEAN");
6575 else if (lex_match_id (lexer, "ALPHA"))
6577 lex_match (lexer, T_EQUALS);
6579 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6581 double a0 = lex_number (lexer);
6584 lex_match (lexer, T_COMMA);
6585 if (lex_is_number (lexer))
6587 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6589 double a1 = lex_number (lexer);
6592 t->pairwise->alpha[0] = MIN (a0, a1);
6593 t->pairwise->alpha[1] = MAX (a0, a1);
6596 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6598 else if (lex_match_id (lexer, "ADJUST"))
6600 lex_match (lexer, T_EQUALS);
6601 if (lex_match_id (lexer, "BONFERRONI"))
6602 t->pairwise->adjust = BONFERRONI;
6603 else if (lex_match_id (lexer, "BH"))
6604 t->pairwise->adjust = BH;
6605 else if (lex_match_id (lexer, "NONE"))
6606 t->pairwise->adjust = 0;
6609 lex_error_expecting (lexer, "BONFERRONI", "BH",
6614 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6616 lex_match (lexer, T_EQUALS);
6617 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6620 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6622 lex_match (lexer, T_EQUALS);
6623 if (lex_match_id (lexer, "ALLCATS"))
6624 t->pairwise->meansvariance_allcats = true;
6625 else if (lex_match_id (lexer, "TESTEDCATS"))
6626 t->pairwise->meansvariance_allcats = false;
6629 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6633 else if (lex_match_id (lexer, "CATEGORIES"))
6635 lex_match (lexer, T_EQUALS);
6636 if (lex_match_id (lexer, "ALLVISIBLE"))
6637 t->pairwise->all_visible = true;
6638 else if (lex_match_id (lexer, "SUBTOTALS"))
6639 t->pairwise->all_visible = false;
6642 lex_error_expecting (lexer, "ALLVISIBLE",
6647 else if (lex_match_id (lexer, "MERGE"))
6649 lex_match (lexer, T_EQUALS);
6650 if (!parse_bool (lexer, &t->pairwise->merge))
6653 else if (lex_match_id (lexer, "STYLE"))
6655 lex_match (lexer, T_EQUALS);
6656 if (lex_match_id (lexer, "APA"))
6657 t->pairwise->apa_style = true;
6658 else if (lex_match_id (lexer, "SIMPLE"))
6659 t->pairwise->apa_style = false;
6662 lex_error_expecting (lexer, "APA", "SIMPLE");
6666 else if (lex_match_id (lexer, "SHOWSIG"))
6668 lex_match (lexer, T_EQUALS);
6669 if (!parse_bool (lexer, &t->pairwise->show_sig))
6674 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6675 "INCLUDEMRSETS", "MEANSVARIANCE",
6676 "CATEGORIES", "MERGE", "STYLE",
6681 while (lex_token (lexer) != T_SLASH
6682 && lex_token (lexer) != T_ENDCMD);
6686 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6687 "CRITERIA", "CATEGORIES", "TITLES",
6688 "SIGTEST", "COMPARETEST");
6692 if (!lex_match (lexer, T_SLASH))
6696 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6698 t->clabels_from_axis = PIVOT_AXIS_ROW;
6699 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6701 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6705 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6706 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6707 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6709 if (!ctables_prepare_table (t))
6712 while (lex_token (lexer) != T_ENDCMD);
6715 input = proc_open (ds);
6716 bool ok = ctables_execute (ds, input, ct);
6717 ok = proc_commit (ds) && ok;
6719 ctables_destroy (ct);
6720 return ok ? CMD_SUCCESS : CMD_FAILURE;
6725 ctables_destroy (ct);