1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_weighting
74 enum ctables_function_type
76 /* A function that operates on data in a single cell. It operates on
77 effective weights. It does not have an unweighted version. */
80 /* A function that operates on data in a single cell. The function
81 operates on effective weights and has a U-prefixed unweighted
85 /* A function that operates on data in a single cell. It operates on
86 dictionary weights, and has U-prefixed unweighted version and an
87 E-prefixed effective weight version. */
90 /* A function that operates on an area of cells. It operates on effective
91 weights and has a U-prefixed unweighted version. */
102 enum ctables_function_availability
104 CTFA_ALL, /* Any variables. */
105 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
106 //CTFA_MRSETS, /* Only multiple-response sets */
109 enum ctables_summary_function
111 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
112 #include "ctables.inc"
117 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
119 #include "ctables.inc"
123 struct ctables_function_info
125 struct substring basename;
126 enum ctables_function_type type;
127 enum ctables_format format;
128 enum ctables_function_availability availability;
130 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
131 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
132 bool is_area; /* Needs an area prefix. */
134 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
135 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
137 .basename = SS_LITERAL_INITIALIZER (NAME), \
140 .availability = AVAILABILITY, \
141 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
142 .e_prefix = (TYPE) == CTFT_UECELL, \
143 .is_area = (TYPE) == CTFT_AREA \
145 #include "ctables.inc"
149 enum ctables_area_type
151 /* Within a section, where stacked variables divide one section from
154 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
155 parse_ctables_summary_function() parses correctly. */
156 CTAT_TABLE, /* All layers of a whole section. */
157 CTAT_LAYERROW, /* Row in one layer within a section. */
158 CTAT_LAYERCOL, /* Column in one layer within a section. */
159 CTAT_LAYER, /* One layer within a section. */
161 /* Within a subtable, where a subtable pairs an innermost row variable with
162 an innermost column variable within a single layer. */
163 CTAT_SUBTABLE, /* Whole subtable. */
164 CTAT_ROW, /* Row within a subtable. */
165 CTAT_COL, /* Column within a subtable. */
169 static const char *ctables_area_type_name[N_CTATS] = {
170 [CTAT_TABLE] = "TABLE",
171 [CTAT_LAYER] = "LAYER",
172 [CTAT_LAYERROW] = "LAYERROW",
173 [CTAT_LAYERCOL] = "LAYERCOL",
174 [CTAT_SUBTABLE] = "SUBTABLE",
181 struct hmap_node node;
183 const struct ctables_cell *example;
186 double count[N_CTWS];
187 double valid[N_CTWS];
188 double total[N_CTWS];
189 struct ctables_sum *sums;
197 enum ctables_summary_variant
206 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
207 all the axes (except the scalar variable, if any). */
208 struct hmap_node node;
210 /* The areas that contain this cell. */
212 struct ctables_area *areas[N_CTATS];
217 enum ctables_summary_variant sv;
219 struct ctables_cell_axis
221 struct ctables_cell_value
223 const struct ctables_category *category;
231 union ctables_summary *summaries;
238 const struct dictionary *dict;
239 struct pivot_table_look *look;
241 /* CTABLES has a number of extra formats that we implement via custom
242 currency specifications on an alternate fmt_settings. */
243 #define CTEF_NEGPAREN FMT_CCA
244 #define CTEF_NEQUAL FMT_CCB
245 #define CTEF_PAREN FMT_CCC
246 #define CTEF_PCTPAREN FMT_CCD
247 struct fmt_settings ctables_formats;
249 /* If this is NULL, zeros are displayed using the normal print format.
250 Otherwise, this string is displayed. */
253 /* If this is NULL, missing values are displayed using the normal print
254 format. Otherwise, this string is displayed. */
257 /* Indexed by variable dictionary index. */
258 enum ctables_vlabel *vlabels;
260 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
262 bool mrsets_count_duplicates; /* MRSETS. */
263 bool smissing_listwise; /* SMISSING. */
264 struct variable *e_weight; /* WEIGHT. */
265 int hide_threshold; /* HIDESMALLCOUNTS. */
267 struct ctables_table **tables;
271 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
274 struct ctables_postcompute
276 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
277 char *name; /* Name, without leading &. */
279 struct msg_location *location; /* Location of definition. */
280 struct ctables_pcexpr *expr;
282 struct ctables_summary_spec_set *specs;
283 bool hide_source_cats;
286 struct ctables_pcexpr
296 enum ctables_postcompute_op
299 CTPO_CONSTANT, /* 5 */
300 CTPO_CAT_NUMBER, /* [5] */
301 CTPO_CAT_STRING, /* ["STRING"] */
302 CTPO_CAT_NRANGE, /* [LO THRU 5] */
303 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
304 CTPO_CAT_MISSING, /* MISSING */
305 CTPO_CAT_OTHERNM, /* OTHERNM */
306 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
307 CTPO_CAT_TOTAL, /* TOTAL */
321 /* CTPO_CAT_NUMBER. */
324 /* CTPO_CAT_STRING, in dictionary encoding. */
325 struct substring string;
327 /* CTPO_CAT_NRANGE. */
330 /* CTPO_CAT_SRANGE. */
331 struct substring srange[2];
333 /* CTPO_CAT_SUBTOTAL. */
334 size_t subtotal_index;
336 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
337 One element: CTPO_NEG. */
338 struct ctables_pcexpr *subs[2];
341 /* Source location. */
342 struct msg_location *location;
345 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
346 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
347 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
348 struct ctables_pcexpr *sub1);
350 struct ctables_summary_spec_set
352 struct ctables_summary_spec *specs;
356 /* The variable to which the summary specs are applied. */
357 struct variable *var;
359 /* Whether the variable to which the summary specs are applied is a scale
360 variable for the purpose of summarization.
362 (VALIDN and TOTALN act differently for summarizing scale and categorical
366 /* If any of these optional additional scale variables are missing, then
367 treat 'var' as if it's missing too. This is for implementing
368 SMISSING=LISTWISE. */
369 struct variable **listwise_vars;
370 size_t n_listwise_vars;
373 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
374 const struct ctables_summary_spec_set *);
375 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
377 /* A nested sequence of variables, e.g. a > b > c. */
380 struct variable **vars;
384 size_t *areas[N_CTATS];
385 size_t n_areas[N_CTATS];
388 struct ctables_summary_spec_set specs[N_CSVS];
391 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
394 struct ctables_nest *nests;
398 static void ctables_stack_uninit (struct ctables_stack *);
402 struct hmap_node node;
407 struct ctables_occurrence
409 struct hmap_node node;
413 struct ctables_section
416 struct ctables_table *table;
417 struct ctables_nest *nests[PIVOT_N_AXES];
420 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
421 struct hmap cells; /* Contains "struct ctables_cell"s. */
422 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
425 static void ctables_section_uninit (struct ctables_section *);
429 struct ctables *ctables;
430 struct ctables_axis *axes[PIVOT_N_AXES];
431 struct ctables_stack stacks[PIVOT_N_AXES];
432 struct ctables_section *sections;
434 enum pivot_axis_type summary_axis;
435 struct ctables_summary_spec_set summary_specs;
436 struct variable **sum_vars;
439 enum pivot_axis_type slabels_axis;
440 bool slabels_visible;
442 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
444 Most commonly, label_axis[a] == a, and in particular we always have
445 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
447 If ROWLABELS or COLLABELS is specified, then one of
448 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
449 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
451 If any category labels are moved, then 'clabels_example' is one of the
452 variables being moved (and it is otherwise NULL). All of the variables
453 being moved have the same width, value labels, and categories, so this
454 example variable can be used to find those out.
456 The remaining members in this group are relevant only if category labels
459 'clabels_values_map' holds a "struct ctables_value" for all the values
460 that appear in all of the variables in the moved categories. It is
461 accumulated as the data is read. Once the data is fully read, its
462 sorted values are put into 'clabels_values' and 'n_clabels_values'.
464 enum pivot_axis_type label_axis[PIVOT_N_AXES];
465 enum pivot_axis_type clabels_from_axis;
466 enum pivot_axis_type clabels_to_axis;
467 const struct variable *clabels_example;
468 struct hmap clabels_values_map;
469 struct ctables_value **clabels_values;
470 size_t n_clabels_values;
472 /* Indexed by variable dictionary index. */
473 struct ctables_categories **categories;
482 struct ctables_chisq *chisq;
483 struct ctables_pairwise *pairwise;
486 struct ctables_categories
489 struct ctables_category *cats;
494 struct ctables_category
496 enum ctables_category_type
498 /* Explicit category lists. */
501 CCT_NRANGE, /* Numerical range. */
502 CCT_SRANGE, /* String range. */
507 /* Totals and subtotals. */
511 /* Implicit category lists. */
516 /* For contributing to TOTALN. */
517 CCT_EXCLUDED_MISSING,
521 struct ctables_category *subtotal;
527 double number; /* CCT_NUMBER. */
528 struct substring string; /* CCT_STRING, in dictionary encoding. */
529 double nrange[2]; /* CCT_NRANGE. */
530 struct substring srange[2]; /* CCT_SRANGE. */
534 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
535 bool hide_subcategories; /* CCT_SUBTOTAL. */
538 /* CCT_POSTCOMPUTE. */
541 const struct ctables_postcompute *pc;
542 enum fmt_type parse_format;
545 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
548 bool include_missing;
552 enum ctables_summary_function sort_function;
553 enum ctables_weighting weighting;
554 enum ctables_area_type area;
555 struct variable *sort_var;
560 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
561 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
562 struct msg_location *location;
566 ctables_category_uninit (struct ctables_category *cat)
571 msg_location_destroy (cat->location);
578 case CCT_POSTCOMPUTE:
582 ss_dealloc (&cat->string);
586 ss_dealloc (&cat->srange[0]);
587 ss_dealloc (&cat->srange[1]);
592 free (cat->total_label);
600 case CCT_EXCLUDED_MISSING:
606 nullable_substring_equal (const struct substring *a,
607 const struct substring *b)
609 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
613 ctables_category_equal (const struct ctables_category *a,
614 const struct ctables_category *b)
616 if (a->type != b->type)
622 return a->number == b->number;
625 return ss_equals (a->string, b->string);
628 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
631 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
632 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
638 case CCT_POSTCOMPUTE:
639 return a->pc == b->pc;
643 return !strcmp (a->total_label, b->total_label);
648 return (a->include_missing == b->include_missing
649 && a->sort_ascending == b->sort_ascending
650 && a->sort_function == b->sort_function
651 && a->sort_var == b->sort_var
652 && a->percentile == b->percentile);
654 case CCT_EXCLUDED_MISSING:
662 ctables_categories_unref (struct ctables_categories *c)
667 assert (c->n_refs > 0);
671 for (size_t i = 0; i < c->n_cats; i++)
672 ctables_category_uninit (&c->cats[i]);
678 ctables_categories_equal (const struct ctables_categories *a,
679 const struct ctables_categories *b)
681 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
684 for (size_t i = 0; i < a->n_cats; i++)
685 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
691 /* Chi-square test (SIGTEST). */
699 /* Pairwise comparison test (COMPARETEST). */
700 struct ctables_pairwise
702 enum { PROP, MEAN } type;
705 bool meansvariance_allcats;
707 enum { BONFERRONI = 1, BH } adjust;
731 struct variable *var;
733 struct ctables_summary_spec_set specs[N_CSVS];
737 struct ctables_axis *subs[2];
740 struct msg_location *loc;
743 static void ctables_axis_destroy (struct ctables_axis *);
745 struct ctables_summary_spec
747 /* The calculation to be performed.
749 'function' is the function to calculate. 'weighted' specifies whether
750 to use weighted or unweighted data (for functions that do not support a
751 choice, it must be true). 'calc_area' is the area over which the
752 calculation takes place (for functions that target only an individual
753 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
754 percentile between 0 and 100 (for other functions it must be 0). */
755 enum ctables_summary_function function;
756 enum ctables_weighting weighting;
757 enum ctables_area_type calc_area;
758 double percentile; /* CTSF_PTILE only. */
760 /* How to display the result of the calculation.
762 'label' is a user-specified label, NULL if the user didn't specify
765 'user_area' is usually the same as 'calc_area', but when category labels
766 are rotated from one axis to another it swaps rows and columns.
768 'format' is the format for displaying the output. If
769 'is_ctables_format' is true, then 'format.type' is one of the special
770 CTEF_* formats instead of the standard ones. */
772 enum ctables_area_type user_area;
773 struct fmt_spec format;
774 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
781 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
782 const struct ctables_summary_spec *src)
785 dst->label = xstrdup_if_nonnull (src->label);
789 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
796 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
797 const struct ctables_summary_spec_set *src)
799 struct ctables_summary_spec *specs
800 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
801 for (size_t i = 0; i < src->n; i++)
802 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
804 *dst = (struct ctables_summary_spec_set) {
809 .is_scale = src->is_scale,
814 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
816 for (size_t i = 0; i < set->n; i++)
817 ctables_summary_spec_uninit (&set->specs[i]);
818 free (set->listwise_vars);
823 parse_col_width (struct lexer *lexer, const char *name, double *width)
825 lex_match (lexer, T_EQUALS);
826 if (lex_match_id (lexer, "DEFAULT"))
828 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
830 *width = lex_number (lexer);
840 parse_bool (struct lexer *lexer, bool *b)
842 if (lex_match_id (lexer, "NO"))
844 else if (lex_match_id (lexer, "YES"))
848 lex_error_expecting (lexer, "YES", "NO");
854 static enum ctables_function_availability
855 ctables_function_availability (enum ctables_summary_function f)
857 static enum ctables_function_availability availability[] = {
858 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
859 #include "ctables.inc"
863 return availability[f];
867 parse_ctables_summary_function (struct lexer *lexer,
868 enum ctables_summary_function *function,
869 enum ctables_weighting *weighting,
870 enum ctables_area_type *area)
872 if (!lex_force_id (lexer))
875 struct substring name = lex_tokss (lexer);
876 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
877 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
879 bool has_area = false;
881 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
882 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
887 if (ss_equals_case (name, ss_cstr ("PCT")))
889 /* Special case where .COUNT suffix is omitted. */
890 *function = CTSF_areaPCT_COUNT;
891 *weighting = CTW_EFFECTIVE;
898 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
900 const struct ctables_function_info *cfi = &ctables_function_info[f];
901 if (ss_equals_case (cfi->basename, name))
904 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
907 *weighting = (e ? CTW_EFFECTIVE
909 : cfi->e_prefix ? CTW_DICTIONARY
916 lex_error (lexer, _("Expecting summary function name."));
921 ctables_axis_destroy (struct ctables_axis *axis)
929 for (size_t i = 0; i < N_CSVS; i++)
930 ctables_summary_spec_set_uninit (&axis->specs[i]);
935 ctables_axis_destroy (axis->subs[0]);
936 ctables_axis_destroy (axis->subs[1]);
939 msg_location_destroy (axis->loc);
943 static struct ctables_axis *
944 ctables_axis_new_nonterminal (enum ctables_axis_op op,
945 struct ctables_axis *sub0,
946 struct ctables_axis *sub1,
947 struct lexer *lexer, int start_ofs)
949 struct ctables_axis *axis = xmalloc (sizeof *axis);
950 *axis = (struct ctables_axis) {
952 .subs = { sub0, sub1 },
953 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
958 struct ctables_axis_parse_ctx
961 struct dictionary *dict;
963 struct ctables_table *t;
966 static struct fmt_spec
967 ctables_summary_default_format (enum ctables_summary_function function,
968 const struct variable *var)
970 static const enum ctables_format default_formats[] = {
971 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
972 #include "ctables.inc"
975 switch (default_formats[function])
978 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
981 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
984 return *var_get_print_format (var);
992 ctables_summary_label__ (const struct ctables_summary_spec *spec)
994 bool w = spec->weighting != CTW_UNWEIGHTED;
995 bool d = spec->weighting == CTW_DICTIONARY;
996 enum ctables_area_type a = spec->user_area;
997 switch (spec->function)
1000 return (d ? N_("Count")
1001 : w ? N_("Adjusted Count")
1002 : N_("Unweighted Count"));
1004 case CTSF_areaPCT_COUNT:
1007 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1008 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1009 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1010 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1011 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1012 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1013 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1017 case CTSF_areaPCT_VALIDN:
1020 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1021 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1022 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1023 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1024 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1025 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1026 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1030 case CTSF_areaPCT_TOTALN:
1033 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1034 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1035 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1036 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1037 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1038 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1039 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1043 case CTSF_MAXIMUM: return N_("Maximum");
1044 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1045 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
1046 case CTSF_MINIMUM: return N_("Minimum");
1047 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
1048 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
1049 case CTSF_PTILE: NOT_REACHED ();
1050 case CTSF_RANGE: return N_("Range");
1051 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
1052 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
1053 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
1054 case CTSF_TOTALN: return (d ? N_("Total N")
1055 : w ? N_("Adjusted Total N")
1056 : N_("Unweighted Total N"));
1057 case CTSF_VALIDN: return (d ? N_("Valid N")
1058 : w ? N_("Adjusted Valid N")
1059 : N_("Unweighted Valid N"));
1060 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
1061 case CTSF_areaPCT_SUM:
1064 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1065 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1066 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1067 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1068 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1069 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1070 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1077 /* Don't bother translating these: they are for developers only. */
1078 case CTAT_TABLE: return "Table ID";
1079 case CTAT_LAYER: return "Layer ID";
1080 case CTAT_LAYERROW: return "Layer Row ID";
1081 case CTAT_LAYERCOL: return "Layer Column ID";
1082 case CTAT_SUBTABLE: return "Subtable ID";
1083 case CTAT_ROW: return "Row ID";
1084 case CTAT_COL: return "Column ID";
1092 static struct pivot_value *
1093 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1097 if (spec->function == CTSF_PTILE)
1099 double p = spec->percentile;
1100 char *s = (spec->weighting != CTW_UNWEIGHTED
1101 ? xasprintf (_("Percentile %.2f"), p)
1102 : xasprintf (_("Unweighted Percentile %.2f"), p));
1103 return pivot_value_new_user_text_nocopy (s);
1106 return pivot_value_new_text (ctables_summary_label__ (spec));
1110 struct substring in = ss_cstr (spec->label);
1111 struct substring target = ss_cstr (")CILEVEL");
1113 struct string out = DS_EMPTY_INITIALIZER;
1116 size_t chunk = ss_find_substring (in, target);
1117 ds_put_substring (&out, ss_head (in, chunk));
1118 ss_advance (&in, chunk);
1120 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1122 ss_advance (&in, target.length);
1123 ds_put_format (&out, "%g", cilevel);
1129 ctables_summary_function_name (enum ctables_summary_function function,
1130 enum ctables_weighting weighting,
1131 enum ctables_area_type area,
1132 char *buffer, size_t bufsize)
1134 const struct ctables_function_info *cfi = &ctables_function_info[function];
1135 snprintf (buffer, bufsize, "%s%s%s",
1136 (weighting == CTW_UNWEIGHTED ? "U"
1137 : weighting == CTW_DICTIONARY ? ""
1138 : cfi->e_prefix ? "E"
1140 cfi->is_area ? ctables_area_type_name[area] : "",
1141 cfi->basename.string);
1146 add_summary_spec (struct ctables_axis *axis,
1147 enum ctables_summary_function function,
1148 enum ctables_weighting weighting,
1149 enum ctables_area_type area, double percentile,
1150 const char *label, const struct fmt_spec *format,
1151 bool is_ctables_format, const struct msg_location *loc,
1152 enum ctables_summary_variant sv)
1154 if (axis->op == CTAO_VAR)
1156 char function_name[128];
1157 ctables_summary_function_name (function, weighting, area,
1158 function_name, sizeof function_name);
1159 const char *var_name = var_get_name (axis->var);
1160 switch (ctables_function_availability (function))
1164 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1165 "response sets."), function_name);
1166 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1172 if (!axis->scale && sv != CSV_TOTAL)
1175 _("Summary function %s applies only to scale variables."),
1177 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1187 struct ctables_summary_spec_set *set = &axis->specs[sv];
1188 if (set->n >= set->allocated)
1189 set->specs = x2nrealloc (set->specs, &set->allocated,
1190 sizeof *set->specs);
1192 struct ctables_summary_spec *dst = &set->specs[set->n++];
1193 *dst = (struct ctables_summary_spec) {
1194 .function = function,
1195 .weighting = weighting,
1198 .percentile = percentile,
1199 .label = xstrdup_if_nonnull (label),
1200 .format = (format ? *format
1201 : ctables_summary_default_format (function, axis->var)),
1202 .is_ctables_format = is_ctables_format,
1208 for (size_t i = 0; i < 2; i++)
1209 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1210 percentile, label, format, is_ctables_format,
1217 static struct ctables_axis *ctables_axis_parse_stack (
1218 struct ctables_axis_parse_ctx *);
1221 static struct ctables_axis *
1222 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1224 if (lex_match (ctx->lexer, T_LPAREN))
1226 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1227 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1229 ctables_axis_destroy (sub);
1235 if (!lex_force_id (ctx->lexer))
1238 int start_ofs = lex_ofs (ctx->lexer);
1239 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1243 struct ctables_axis *axis = xmalloc (sizeof *axis);
1244 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1246 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1247 : lex_match_phrase (ctx->lexer, "[C]") ? false
1248 : var_get_measure (var) == MEASURE_SCALE);
1249 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1250 lex_ofs (ctx->lexer) - 1);
1251 if (axis->scale && var_is_alpha (var))
1253 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1255 var_get_name (var));
1256 ctables_axis_destroy (axis);
1264 has_digit (const char *s)
1266 return s[strcspn (s, "0123456789")] != '\0';
1270 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1271 bool *is_ctables_format)
1273 char type[FMT_TYPE_LEN_MAX + 1];
1274 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1277 if (!strcasecmp (type, "NEGPAREN"))
1278 format->type = CTEF_NEGPAREN;
1279 else if (!strcasecmp (type, "NEQUAL"))
1280 format->type = CTEF_NEQUAL;
1281 else if (!strcasecmp (type, "PAREN"))
1282 format->type = CTEF_PAREN;
1283 else if (!strcasecmp (type, "PCTPAREN"))
1284 format->type = CTEF_PCTPAREN;
1287 *is_ctables_format = false;
1288 return (parse_format_specifier (lexer, format)
1289 && fmt_check_output (format)
1290 && fmt_check_type_compat (format, VAL_NUMERIC));
1296 lex_next_error (lexer, -1, -1,
1297 _("Output format %s requires width 2 or greater."), type);
1300 else if (format->d > format->w - 1)
1302 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1303 "greater than decimals."), type);
1308 *is_ctables_format = true;
1313 static struct ctables_axis *
1314 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1316 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1317 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1320 enum ctables_summary_variant sv = CSV_CELL;
1323 int start_ofs = lex_ofs (ctx->lexer);
1325 /* Parse function. */
1326 enum ctables_summary_function function;
1327 enum ctables_weighting weighting;
1328 enum ctables_area_type area;
1329 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1333 /* Parse percentile. */
1334 double percentile = 0;
1335 if (function == CTSF_PTILE)
1337 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1339 percentile = lex_number (ctx->lexer);
1340 lex_get (ctx->lexer);
1345 if (lex_is_string (ctx->lexer))
1347 label = ss_xstrdup (lex_tokss (ctx->lexer));
1348 lex_get (ctx->lexer);
1352 struct fmt_spec format;
1353 const struct fmt_spec *formatp;
1354 bool is_ctables_format = false;
1355 if (lex_token (ctx->lexer) == T_ID
1356 && has_digit (lex_tokcstr (ctx->lexer)))
1358 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1359 &is_ctables_format))
1369 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1370 lex_ofs (ctx->lexer) - 1);
1371 add_summary_spec (sub, function, weighting, area, percentile, label,
1372 formatp, is_ctables_format, loc, sv);
1374 msg_location_destroy (loc);
1376 lex_match (ctx->lexer, T_COMMA);
1377 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1379 if (!lex_force_match (ctx->lexer, T_LBRACK))
1383 else if (lex_match (ctx->lexer, T_RBRACK))
1385 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1392 ctables_axis_destroy (sub);
1396 static const struct ctables_axis *
1397 find_scale (const struct ctables_axis *axis)
1401 else if (axis->op == CTAO_VAR)
1402 return axis->scale ? axis : NULL;
1405 for (size_t i = 0; i < 2; i++)
1407 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1415 static const struct ctables_axis *
1416 find_categorical_summary_spec (const struct ctables_axis *axis)
1420 else if (axis->op == CTAO_VAR)
1421 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1424 for (size_t i = 0; i < 2; i++)
1426 const struct ctables_axis *sum
1427 = find_categorical_summary_spec (axis->subs[i]);
1435 static struct ctables_axis *
1436 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1438 int start_ofs = lex_ofs (ctx->lexer);
1439 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1443 while (lex_match (ctx->lexer, T_GT))
1445 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1448 ctables_axis_destroy (lhs);
1452 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1453 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1455 const struct ctables_axis *outer_scale = find_scale (lhs);
1456 const struct ctables_axis *inner_scale = find_scale (rhs);
1457 if (outer_scale && inner_scale)
1459 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1460 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1461 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1462 ctables_axis_destroy (nest);
1466 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1469 msg_at (SE, nest->loc,
1470 _("Summaries may only be requested for categorical variables "
1471 "at the innermost nesting level."));
1472 msg_at (SN, outer_sum->loc,
1473 _("This outer categorical variable has a summary."));
1474 ctables_axis_destroy (nest);
1484 static struct ctables_axis *
1485 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1487 int start_ofs = lex_ofs (ctx->lexer);
1488 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1492 while (lex_match (ctx->lexer, T_PLUS))
1494 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1497 ctables_axis_destroy (lhs);
1501 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1502 ctx->lexer, start_ofs);
1509 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1510 struct ctables *ct, struct ctables_table *t,
1511 enum pivot_axis_type a)
1513 if (lex_token (lexer) == T_BY
1514 || lex_token (lexer) == T_SLASH
1515 || lex_token (lexer) == T_ENDCMD)
1518 struct ctables_axis_parse_ctx ctx = {
1524 t->axes[a] = ctables_axis_parse_stack (&ctx);
1525 return t->axes[a] != NULL;
1529 ctables_chisq_destroy (struct ctables_chisq *chisq)
1535 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1541 ctables_table_destroy (struct ctables_table *t)
1546 for (size_t i = 0; i < t->n_sections; i++)
1547 ctables_section_uninit (&t->sections[i]);
1550 for (size_t i = 0; i < t->n_categories; i++)
1551 ctables_categories_unref (t->categories[i]);
1552 free (t->categories);
1554 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1556 ctables_axis_destroy (t->axes[a]);
1557 ctables_stack_uninit (&t->stacks[a]);
1559 free (t->summary_specs.specs);
1561 struct ctables_value *ctv, *next_ctv;
1562 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1563 &t->clabels_values_map)
1565 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1566 hmap_delete (&t->clabels_values_map, &ctv->node);
1569 hmap_destroy (&t->clabels_values_map);
1570 free (t->clabels_values);
1576 ctables_chisq_destroy (t->chisq);
1577 ctables_pairwise_destroy (t->pairwise);
1582 ctables_destroy (struct ctables *ct)
1587 struct ctables_postcompute *pc, *next_pc;
1588 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1592 msg_location_destroy (pc->location);
1593 ctables_pcexpr_destroy (pc->expr);
1597 ctables_summary_spec_set_uninit (pc->specs);
1600 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1603 hmap_destroy (&ct->postcomputes);
1605 fmt_settings_uninit (&ct->ctables_formats);
1606 pivot_table_look_unref (ct->look);
1610 for (size_t i = 0; i < ct->n_tables; i++)
1611 ctables_table_destroy (ct->tables[i]);
1616 static struct ctables_category
1617 cct_nrange (double low, double high)
1619 return (struct ctables_category) {
1621 .nrange = { low, high }
1625 static struct ctables_category
1626 cct_srange (struct substring low, struct substring high)
1628 return (struct ctables_category) {
1630 .srange = { low, high }
1635 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1636 struct ctables_category *cat)
1639 if (lex_match (lexer, T_EQUALS))
1641 if (!lex_force_string (lexer))
1644 total_label = ss_xstrdup (lex_tokss (lexer));
1648 total_label = xstrdup (_("Subtotal"));
1650 *cat = (struct ctables_category) {
1651 .type = CCT_SUBTOTAL,
1652 .hide_subcategories = hide_subcategories,
1653 .total_label = total_label
1658 static struct substring
1659 parse_substring (struct lexer *lexer, struct dictionary *dict)
1661 struct substring s = recode_substring_pool (
1662 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1663 ss_rtrim (&s, ss_cstr (" "));
1669 ctables_table_parse_explicit_category (struct lexer *lexer,
1670 struct dictionary *dict,
1672 struct ctables_category *cat)
1674 if (lex_match_id (lexer, "OTHERNM"))
1675 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1676 else if (lex_match_id (lexer, "MISSING"))
1677 *cat = (struct ctables_category) { .type = CCT_MISSING };
1678 else if (lex_match_id (lexer, "SUBTOTAL"))
1679 return ctables_table_parse_subtotal (lexer, false, cat);
1680 else if (lex_match_id (lexer, "HSUBTOTAL"))
1681 return ctables_table_parse_subtotal (lexer, true, cat);
1682 else if (lex_match_id (lexer, "LO"))
1684 if (!lex_force_match_id (lexer, "THRU"))
1686 if (lex_is_string (lexer))
1688 struct substring sr0 = { .string = NULL };
1689 struct substring sr1 = parse_substring (lexer, dict);
1690 *cat = cct_srange (sr0, sr1);
1692 else if (lex_force_num (lexer))
1694 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1700 else if (lex_is_number (lexer))
1702 double number = lex_number (lexer);
1704 if (lex_match_id (lexer, "THRU"))
1706 if (lex_match_id (lexer, "HI"))
1707 *cat = cct_nrange (number, DBL_MAX);
1710 if (!lex_force_num (lexer))
1712 *cat = cct_nrange (number, lex_number (lexer));
1717 *cat = (struct ctables_category) {
1722 else if (lex_is_string (lexer))
1724 struct substring s = parse_substring (lexer, dict);
1725 if (lex_match_id (lexer, "THRU"))
1727 if (lex_match_id (lexer, "HI"))
1729 struct substring sr1 = { .string = NULL };
1730 *cat = cct_srange (s, sr1);
1734 if (!lex_force_string (lexer))
1739 struct substring sr1 = parse_substring (lexer, dict);
1740 *cat = cct_srange (s, sr1);
1744 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1746 else if (lex_match (lexer, T_AND))
1748 if (!lex_force_id (lexer))
1750 struct ctables_postcompute *pc = ctables_find_postcompute (
1751 ct, lex_tokcstr (lexer));
1754 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1755 msg_at (SE, loc, _("Unknown postcompute &%s."),
1756 lex_tokcstr (lexer));
1757 msg_location_destroy (loc);
1762 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1766 lex_error (lexer, NULL);
1774 parse_category_string (struct msg_location *location,
1775 struct substring s, const struct dictionary *dict,
1776 enum fmt_type format, double *n)
1779 char *error = data_in (s, dict_get_encoding (dict), format,
1780 settings_get_fmt_settings (), &v, 0, NULL);
1783 msg_at (SE, location,
1784 _("Failed to parse category specification as format %s: %s."),
1785 fmt_name (format), error);
1794 static struct ctables_category *
1795 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1796 const struct ctables_pcexpr *e)
1798 struct ctables_category *best = NULL;
1799 size_t n_subtotals = 0;
1800 for (size_t i = 0; i < cats->n_cats; i++)
1802 struct ctables_category *cat = &cats->cats[i];
1805 case CTPO_CAT_NUMBER:
1806 if (cat->type == CCT_NUMBER && cat->number == e->number)
1810 case CTPO_CAT_STRING:
1811 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1815 case CTPO_CAT_NRANGE:
1816 if (cat->type == CCT_NRANGE
1817 && cat->nrange[0] == e->nrange[0]
1818 && cat->nrange[1] == e->nrange[1])
1822 case CTPO_CAT_SRANGE:
1823 if (cat->type == CCT_SRANGE
1824 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1825 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1829 case CTPO_CAT_MISSING:
1830 if (cat->type == CCT_MISSING)
1834 case CTPO_CAT_OTHERNM:
1835 if (cat->type == CCT_OTHERNM)
1839 case CTPO_CAT_SUBTOTAL:
1840 if (cat->type == CCT_SUBTOTAL)
1843 if (e->subtotal_index == n_subtotals)
1845 else if (e->subtotal_index == 0)
1850 case CTPO_CAT_TOTAL:
1851 if (cat->type == CCT_TOTAL)
1865 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1870 static struct ctables_category *
1871 ctables_find_category_for_postcompute (const struct dictionary *dict,
1872 const struct ctables_categories *cats,
1873 enum fmt_type parse_format,
1874 const struct ctables_pcexpr *e)
1876 if (parse_format != FMT_F)
1878 if (e->op == CTPO_CAT_STRING)
1881 if (!parse_category_string (e->location, e->string, dict,
1882 parse_format, &number))
1885 struct ctables_pcexpr e2 = {
1886 .op = CTPO_CAT_NUMBER,
1888 .location = e->location,
1890 return ctables_find_category_for_postcompute__ (cats, &e2);
1892 else if (e->op == CTPO_CAT_SRANGE)
1895 if (!e->srange[0].string)
1896 nrange[0] = -DBL_MAX;
1897 else if (!parse_category_string (e->location, e->srange[0], dict,
1898 parse_format, &nrange[0]))
1901 if (!e->srange[1].string)
1902 nrange[1] = DBL_MAX;
1903 else if (!parse_category_string (e->location, e->srange[1], dict,
1904 parse_format, &nrange[1]))
1907 struct ctables_pcexpr e2 = {
1908 .op = CTPO_CAT_NRANGE,
1909 .nrange = { nrange[0], nrange[1] },
1910 .location = e->location,
1912 return ctables_find_category_for_postcompute__ (cats, &e2);
1915 return ctables_find_category_for_postcompute__ (cats, e);
1919 ctables_recursive_check_postcompute (struct dictionary *dict,
1920 const struct ctables_pcexpr *e,
1921 struct ctables_category *pc_cat,
1922 const struct ctables_categories *cats,
1923 const struct msg_location *cats_location)
1927 case CTPO_CAT_NUMBER:
1928 case CTPO_CAT_STRING:
1929 case CTPO_CAT_NRANGE:
1930 case CTPO_CAT_SRANGE:
1931 case CTPO_CAT_MISSING:
1932 case CTPO_CAT_OTHERNM:
1933 case CTPO_CAT_SUBTOTAL:
1934 case CTPO_CAT_TOTAL:
1936 struct ctables_category *cat = ctables_find_category_for_postcompute (
1937 dict, cats, pc_cat->parse_format, e);
1940 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1942 size_t n_subtotals = 0;
1943 for (size_t i = 0; i < cats->n_cats; i++)
1944 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1945 if (n_subtotals > 1)
1947 msg_at (SE, cats_location,
1948 ngettext ("These categories include %zu instance "
1949 "of SUBTOTAL or HSUBTOTAL, so references "
1950 "from computed categories must refer to "
1951 "subtotals by position, "
1952 "e.g. SUBTOTAL[1].",
1953 "These categories include %zu instances "
1954 "of SUBTOTAL or HSUBTOTAL, so references "
1955 "from computed categories must refer to "
1956 "subtotals by position, "
1957 "e.g. SUBTOTAL[1].",
1960 msg_at (SN, e->location,
1961 _("This is the reference that lacks a position."));
1966 msg_at (SE, pc_cat->location,
1967 _("Computed category &%s references a category not included "
1968 "in the category list."),
1970 msg_at (SN, e->location, _("This is the missing category."));
1971 if (e->op == CTPO_CAT_SUBTOTAL)
1972 msg_at (SN, cats_location,
1973 _("To fix the problem, add subtotals to the "
1974 "list of categories here."));
1975 else if (e->op == CTPO_CAT_TOTAL)
1976 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1977 "CATEGORIES specification."));
1979 msg_at (SN, cats_location,
1980 _("To fix the problem, add the missing category to the "
1981 "list of categories here."));
1984 if (pc_cat->pc->hide_source_cats)
1998 for (size_t i = 0; i < 2; i++)
1999 if (e->subs[i] && !ctables_recursive_check_postcompute (
2000 dict, e->subs[i], pc_cat, cats, cats_location))
2009 all_strings (struct variable **vars, size_t n_vars,
2010 const struct ctables_category *cat)
2012 for (size_t j = 0; j < n_vars; j++)
2013 if (var_is_numeric (vars[j]))
2015 msg_at (SE, cat->location,
2016 _("This category specification may be applied only to string "
2017 "variables, but this subcommand tries to apply it to "
2018 "numeric variable %s."),
2019 var_get_name (vars[j]));
2026 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2027 struct ctables *ct, struct ctables_table *t)
2029 if (!lex_match_id (lexer, "VARIABLES"))
2031 lex_match (lexer, T_EQUALS);
2033 struct variable **vars;
2035 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2038 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2039 for (size_t i = 1; i < n_vars; i++)
2041 const struct fmt_spec *f = var_get_print_format (vars[i]);
2042 if (f->type != common_format->type)
2044 common_format = NULL;
2050 && (fmt_get_category (common_format->type)
2051 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2053 struct ctables_categories *c = xmalloc (sizeof *c);
2054 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2055 for (size_t i = 0; i < n_vars; i++)
2057 struct ctables_categories **cp
2058 = &t->categories[var_get_dict_index (vars[i])];
2059 ctables_categories_unref (*cp);
2063 size_t allocated_cats = 0;
2064 int cats_start_ofs = -1;
2065 int cats_end_ofs = -1;
2066 if (lex_match (lexer, T_LBRACK))
2068 cats_start_ofs = lex_ofs (lexer);
2071 if (c->n_cats >= allocated_cats)
2072 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2074 int start_ofs = lex_ofs (lexer);
2075 struct ctables_category *cat = &c->cats[c->n_cats];
2076 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2078 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2081 lex_match (lexer, T_COMMA);
2083 while (!lex_match (lexer, T_RBRACK));
2084 cats_end_ofs = lex_ofs (lexer) - 1;
2087 struct ctables_category cat = {
2089 .include_missing = false,
2090 .sort_ascending = true,
2092 bool show_totals = false;
2093 char *total_label = NULL;
2094 bool totals_before = false;
2095 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2097 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2099 lex_match (lexer, T_EQUALS);
2100 if (lex_match_id (lexer, "A"))
2101 cat.sort_ascending = true;
2102 else if (lex_match_id (lexer, "D"))
2103 cat.sort_ascending = false;
2106 lex_error_expecting (lexer, "A", "D");
2110 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2112 lex_match (lexer, T_EQUALS);
2113 if (lex_match_id (lexer, "VALUE"))
2114 cat.type = CCT_VALUE;
2115 else if (lex_match_id (lexer, "LABEL"))
2116 cat.type = CCT_LABEL;
2119 cat.type = CCT_FUNCTION;
2120 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2121 &cat.weighting, &cat.area))
2124 if (lex_match (lexer, T_LPAREN))
2126 cat.sort_var = parse_variable (lexer, dict);
2130 if (cat.sort_function == CTSF_PTILE)
2132 lex_match (lexer, T_COMMA);
2133 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2135 cat.percentile = lex_number (lexer);
2139 if (!lex_force_match (lexer, T_RPAREN))
2142 else if (ctables_function_availability (cat.sort_function)
2145 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2150 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2152 lex_match (lexer, T_EQUALS);
2153 if (lex_match_id (lexer, "INCLUDE"))
2154 cat.include_missing = true;
2155 else if (lex_match_id (lexer, "EXCLUDE"))
2156 cat.include_missing = false;
2159 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2163 else if (lex_match_id (lexer, "TOTAL"))
2165 lex_match (lexer, T_EQUALS);
2166 if (!parse_bool (lexer, &show_totals))
2169 else if (lex_match_id (lexer, "LABEL"))
2171 lex_match (lexer, T_EQUALS);
2172 if (!lex_force_string (lexer))
2175 total_label = ss_xstrdup (lex_tokss (lexer));
2178 else if (lex_match_id (lexer, "POSITION"))
2180 lex_match (lexer, T_EQUALS);
2181 if (lex_match_id (lexer, "BEFORE"))
2182 totals_before = true;
2183 else if (lex_match_id (lexer, "AFTER"))
2184 totals_before = false;
2187 lex_error_expecting (lexer, "BEFORE", "AFTER");
2191 else if (lex_match_id (lexer, "EMPTY"))
2193 lex_match (lexer, T_EQUALS);
2194 if (lex_match_id (lexer, "INCLUDE"))
2195 c->show_empty = true;
2196 else if (lex_match_id (lexer, "EXCLUDE"))
2197 c->show_empty = false;
2200 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2207 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2208 "TOTAL", "LABEL", "POSITION", "EMPTY");
2210 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2217 if (c->n_cats >= allocated_cats)
2218 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2219 c->cats[c->n_cats++] = cat;
2224 if (c->n_cats >= allocated_cats)
2225 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2227 struct ctables_category *totals;
2230 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2231 totals = &c->cats[0];
2234 totals = &c->cats[c->n_cats];
2237 *totals = (struct ctables_category) {
2239 .total_label = total_label ? total_label : xstrdup (_("Total")),
2243 struct ctables_category *subtotal = NULL;
2244 for (size_t i = totals_before ? 0 : c->n_cats;
2245 totals_before ? i < c->n_cats : i-- > 0;
2246 totals_before ? i++ : 0)
2248 struct ctables_category *cat = &c->cats[i];
2257 cat->subtotal = subtotal;
2260 case CCT_POSTCOMPUTE:
2271 case CCT_EXCLUDED_MISSING:
2276 if (cats_start_ofs != -1)
2278 for (size_t i = 0; i < c->n_cats; i++)
2280 struct ctables_category *cat = &c->cats[i];
2283 case CCT_POSTCOMPUTE:
2284 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2285 struct msg_location *cats_location
2286 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2287 bool ok = ctables_recursive_check_postcompute (
2288 dict, cat->pc->expr, cat, c, cats_location);
2289 msg_location_destroy (cats_location);
2296 for (size_t j = 0; j < n_vars; j++)
2297 if (var_is_alpha (vars[j]))
2299 msg_at (SE, cat->location,
2300 _("This category specification may be applied "
2301 "only to numeric variables, but this "
2302 "subcommand tries to apply it to string "
2304 var_get_name (vars[j]));
2313 if (!parse_category_string (cat->location, cat->string, dict,
2314 common_format->type, &n))
2317 ss_dealloc (&cat->string);
2319 cat->type = CCT_NUMBER;
2322 else if (!all_strings (vars, n_vars, cat))
2331 if (!cat->srange[0].string)
2333 else if (!parse_category_string (cat->location,
2334 cat->srange[0], dict,
2335 common_format->type, &n[0]))
2338 if (!cat->srange[1].string)
2340 else if (!parse_category_string (cat->location,
2341 cat->srange[1], dict,
2342 common_format->type, &n[1]))
2345 ss_dealloc (&cat->srange[0]);
2346 ss_dealloc (&cat->srange[1]);
2348 cat->type = CCT_NRANGE;
2349 cat->nrange[0] = n[0];
2350 cat->nrange[1] = n[1];
2352 else if (!all_strings (vars, n_vars, cat))
2363 case CCT_EXCLUDED_MISSING:
2378 ctables_nest_uninit (struct ctables_nest *nest)
2381 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2382 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2383 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2384 free (nest->areas[at]);
2388 ctables_stack_uninit (struct ctables_stack *stack)
2392 for (size_t i = 0; i < stack->n; i++)
2393 ctables_nest_uninit (&stack->nests[i]);
2394 free (stack->nests);
2398 static struct ctables_stack
2399 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2406 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2407 for (size_t i = 0; i < s0.n; i++)
2408 for (size_t j = 0; j < s1.n; j++)
2410 const struct ctables_nest *a = &s0.nests[i];
2411 const struct ctables_nest *b = &s1.nests[j];
2413 size_t allocate = a->n + b->n;
2414 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2416 for (size_t k = 0; k < a->n; k++)
2417 vars[n++] = a->vars[k];
2418 for (size_t k = 0; k < b->n; k++)
2419 vars[n++] = b->vars[k];
2420 assert (n == allocate);
2422 const struct ctables_nest *summary_src;
2423 if (!a->specs[CSV_CELL].var)
2425 else if (!b->specs[CSV_CELL].var)
2430 struct ctables_nest *new = &stack.nests[stack.n++];
2431 *new = (struct ctables_nest) {
2433 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2434 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2436 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2437 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2441 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2442 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2444 ctables_stack_uninit (&s0);
2445 ctables_stack_uninit (&s1);
2449 static struct ctables_stack
2450 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2452 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2453 for (size_t i = 0; i < s0.n; i++)
2454 stack.nests[stack.n++] = s0.nests[i];
2455 for (size_t i = 0; i < s1.n; i++)
2457 stack.nests[stack.n] = s1.nests[i];
2458 stack.nests[stack.n].group_head += s0.n;
2461 assert (stack.n == s0.n + s1.n);
2467 static struct ctables_stack
2468 var_fts (const struct ctables_axis *a)
2470 struct variable **vars = xmalloc (sizeof *vars);
2473 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2474 struct ctables_nest *nest = xmalloc (sizeof *nest);
2475 *nest = (struct ctables_nest) {
2478 .scale_idx = a->scale ? 0 : SIZE_MAX,
2479 .summary_idx = is_summary ? 0 : SIZE_MAX,
2482 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2484 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2485 nest->specs[sv].var = a->var;
2486 nest->specs[sv].is_scale = a->scale;
2488 return (struct ctables_stack) { .nests = nest, .n = 1 };
2491 static struct ctables_stack
2492 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2495 return (struct ctables_stack) { .n = 0 };
2503 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2504 enumerate_fts (axis_type, a->subs[1]));
2507 /* This should consider any of the scale variables found in the result to
2508 be linked to each other listwise for SMISSING=LISTWISE. */
2509 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2510 enumerate_fts (axis_type, a->subs[1]));
2516 union ctables_summary
2518 /* COUNT, VALIDN, TOTALN. */
2521 /* MINIMUM, MAXIMUM, RANGE. */
2528 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2529 struct moments1 *moments;
2531 /* MEDIAN, MODE, PTILE. */
2534 struct casewriter *writer;
2541 ctables_summary_init (union ctables_summary *s,
2542 const struct ctables_summary_spec *ss)
2544 switch (ss->function)
2547 case CTSF_areaPCT_COUNT:
2548 case CTSF_areaPCT_VALIDN:
2549 case CTSF_areaPCT_TOTALN:
2562 s->min = s->max = SYSMIS;
2567 case CTSF_areaPCT_SUM:
2568 s->moments = moments1_create (MOMENT_MEAN);
2574 s->moments = moments1_create (MOMENT_VARIANCE);
2581 struct caseproto *proto = caseproto_create ();
2582 proto = caseproto_add_width (proto, 0);
2583 proto = caseproto_add_width (proto, 0);
2585 struct subcase ordering;
2586 subcase_init (&ordering, 0, 0, SC_ASCEND);
2587 s->writer = sort_create_writer (&ordering, proto);
2588 subcase_uninit (&ordering);
2589 caseproto_unref (proto);
2599 ctables_summary_uninit (union ctables_summary *s,
2600 const struct ctables_summary_spec *ss)
2602 switch (ss->function)
2605 case CTSF_areaPCT_COUNT:
2606 case CTSF_areaPCT_VALIDN:
2607 case CTSF_areaPCT_TOTALN:
2626 case CTSF_areaPCT_SUM:
2627 moments1_destroy (s->moments);
2633 casewriter_destroy (s->writer);
2639 ctables_summary_add (union ctables_summary *s,
2640 const struct ctables_summary_spec *ss,
2641 const struct variable *var, const union value *value,
2642 bool is_scale, bool is_scale_missing,
2643 bool is_missing, bool is_included,
2646 /* To determine whether a case is included in a given table for a particular
2647 kind of summary, consider the following charts for each variable in the
2648 table. Only if "yes" appears for every variable for the summary is the
2651 Categorical variables: VALIDN COUNT TOTALN
2652 Valid values in included categories yes yes yes
2653 Missing values in included categories --- yes yes
2654 Missing values in excluded categories --- --- yes
2655 Valid values in excluded categories --- --- ---
2657 Scale variables: VALIDN COUNT TOTALN
2658 Valid value yes yes yes
2659 Missing value --- yes yes
2661 Missing values include both user- and system-missing. (The system-missing
2662 value is always in an excluded category.)
2664 switch (ss->function)
2670 case CTSF_areaPCT_TOTALN:
2675 if (is_scale || is_included)
2679 case CTSF_areaPCT_COUNT:
2680 if (is_scale || is_included)
2691 case CTSF_areaPCT_VALIDN:
2711 if (!is_scale_missing)
2713 assert (!var_is_alpha (var)); /* XXX? */
2714 if (s->min == SYSMIS || value->f < s->min)
2716 if (s->max == SYSMIS || value->f > s->max)
2726 if (!is_scale_missing)
2727 moments1_add (s->moments, value->f, weight);
2730 case CTSF_areaPCT_SUM:
2731 if (!is_missing && !is_scale_missing)
2732 moments1_add (s->moments, value->f, weight);
2738 if (!is_scale_missing)
2740 s->ovalid += weight;
2742 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2743 *case_num_rw_idx (c, 0) = value->f;
2744 *case_num_rw_idx (c, 1) = weight;
2745 casewriter_write (s->writer, c);
2752 ctables_summary_value (const struct ctables_cell *cell,
2753 union ctables_summary *s,
2754 const struct ctables_summary_spec *ss)
2756 switch (ss->function)
2762 return cell->areas[ss->calc_area]->sequence;
2764 case CTSF_areaPCT_COUNT:
2766 const struct ctables_area *a = cell->areas[ss->calc_area];
2767 double a_count = a->count[ss->weighting];
2768 return a_count ? s->count / a_count * 100 : SYSMIS;
2771 case CTSF_areaPCT_VALIDN:
2773 const struct ctables_area *a = cell->areas[ss->calc_area];
2774 double a_valid = a->valid[ss->weighting];
2775 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2778 case CTSF_areaPCT_TOTALN:
2780 const struct ctables_area *a = cell->areas[ss->calc_area];
2781 double a_total = a->total[ss->weighting];
2782 return a_total ? s->count / a_total * 100 : SYSMIS;
2797 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2802 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2808 double weight, variance;
2809 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2810 return calc_semean (variance, weight);
2816 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2817 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2822 double weight, mean;
2823 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2824 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2830 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2834 case CTSF_areaPCT_SUM:
2836 double weight, mean;
2837 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2838 if (weight == SYSMIS || mean == SYSMIS)
2841 const struct ctables_area *a = cell->areas[ss->calc_area];
2842 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2843 double denom = sum->sum[ss->weighting];
2844 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2851 struct casereader *reader = casewriter_make_reader (s->writer);
2854 struct percentile *ptile = percentile_create (
2855 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2856 struct order_stats *os = &ptile->parent;
2857 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2858 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2859 statistic_destroy (&ptile->parent.parent);
2866 struct casereader *reader = casewriter_make_reader (s->writer);
2869 struct mode *mode = mode_create ();
2870 struct order_stats *os = &mode->parent;
2871 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2872 s->ovalue = mode->mode;
2873 statistic_destroy (&mode->parent.parent);
2881 struct ctables_cell_sort_aux
2883 const struct ctables_nest *nest;
2884 enum pivot_axis_type a;
2888 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2890 const struct ctables_cell_sort_aux *aux = aux_;
2891 struct ctables_cell *const *ap = a_;
2892 struct ctables_cell *const *bp = b_;
2893 const struct ctables_cell *a = *ap;
2894 const struct ctables_cell *b = *bp;
2896 const struct ctables_nest *nest = aux->nest;
2897 for (size_t i = 0; i < nest->n; i++)
2898 if (i != nest->scale_idx)
2900 const struct variable *var = nest->vars[i];
2901 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2902 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2903 if (a_cv->category != b_cv->category)
2904 return a_cv->category > b_cv->category ? 1 : -1;
2906 const union value *a_val = &a_cv->value;
2907 const union value *b_val = &b_cv->value;
2908 switch (a_cv->category->type)
2914 case CCT_POSTCOMPUTE:
2915 case CCT_EXCLUDED_MISSING:
2916 /* Must be equal. */
2924 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2932 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2934 return a_cv->category->sort_ascending ? cmp : -cmp;
2940 const char *a_label = var_lookup_value_label (var, a_val);
2941 const char *b_label = var_lookup_value_label (var, b_val);
2947 cmp = strcmp (a_label, b_label);
2953 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2956 return a_cv->category->sort_ascending ? cmp : -cmp;
2968 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2969 const void *aux UNUSED)
2971 struct ctables_cell *const *ap = a_;
2972 struct ctables_cell *const *bp = b_;
2973 const struct ctables_cell *a = *ap;
2974 const struct ctables_cell *b = *bp;
2976 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2978 int al = a->axes[axis].leaf;
2979 int bl = b->axes[axis].leaf;
2981 return al > bl ? 1 : -1;
2989 For each ctables_table:
2990 For each combination of row vars:
2991 For each combination of column vars:
2992 For each combination of layer vars:
2994 Make a table of row values:
2995 Sort entries by row values
2996 Assign a 0-based index to each actual value
2997 Construct a dimension
2998 Make a table of column values
2999 Make a table of layer values
3001 Fill the table entry using the indexes from before.
3004 static struct ctables_area *
3005 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3006 enum ctables_area_type area)
3009 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3011 const struct ctables_nest *nest = s->nests[a];
3012 for (size_t i = 0; i < nest->n_areas[area]; i++)
3014 size_t v_idx = nest->areas[area][i];
3015 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3016 hash = hash_pointer (cv->category, hash);
3017 if (cv->category->type != CCT_TOTAL
3018 && cv->category->type != CCT_SUBTOTAL
3019 && cv->category->type != CCT_POSTCOMPUTE)
3020 hash = value_hash (&cv->value,
3021 var_get_width (nest->vars[v_idx]), hash);
3025 struct ctables_area *a;
3026 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3028 const struct ctables_cell *df = a->example;
3029 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3031 const struct ctables_nest *nest = s->nests[a];
3032 for (size_t i = 0; i < nest->n_areas[area]; i++)
3034 size_t v_idx = nest->areas[area][i];
3035 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3036 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3037 if (cv1->category != cv2->category
3038 || (cv1->category->type != CCT_TOTAL
3039 && cv1->category->type != CCT_SUBTOTAL
3040 && cv1->category->type != CCT_POSTCOMPUTE
3041 && !value_equal (&cv1->value, &cv2->value,
3042 var_get_width (nest->vars[v_idx]))))
3051 struct ctables_sum *sums = (s->table->n_sum_vars
3052 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3055 a = xmalloc (sizeof *a);
3056 *a = (struct ctables_area) { .example = cell, .sums = sums };
3057 hmap_insert (&s->areas[area], &a->node, hash);
3061 static struct substring
3062 rtrim_value (const union value *v, const struct variable *var)
3064 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3065 var_get_width (var));
3066 ss_rtrim (&s, ss_cstr (" "));
3071 in_string_range (const union value *v, const struct variable *var,
3072 const struct substring *srange)
3074 struct substring s = rtrim_value (v, var);
3075 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3076 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3079 static const struct ctables_category *
3080 ctables_categories_match (const struct ctables_categories *c,
3081 const union value *v, const struct variable *var)
3083 if (var_is_numeric (var) && v->f == SYSMIS)
3086 const struct ctables_category *othernm = NULL;
3087 for (size_t i = c->n_cats; i-- > 0; )
3089 const struct ctables_category *cat = &c->cats[i];
3093 if (cat->number == v->f)
3098 if (ss_equals (cat->string, rtrim_value (v, var)))
3103 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3104 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3109 if (in_string_range (v, var, cat->srange))
3114 if (var_is_value_missing (var, v))
3118 case CCT_POSTCOMPUTE:
3133 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3136 case CCT_EXCLUDED_MISSING:
3141 return var_is_value_missing (var, v) ? NULL : othernm;
3144 static const struct ctables_category *
3145 ctables_categories_total (const struct ctables_categories *c)
3147 const struct ctables_category *first = &c->cats[0];
3148 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3149 return (first->type == CCT_TOTAL ? first
3150 : last->type == CCT_TOTAL ? last
3154 static struct ctables_cell *
3155 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3156 const struct ctables_category *cats[PIVOT_N_AXES][10])
3159 enum ctables_summary_variant sv = CSV_CELL;
3160 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3162 const struct ctables_nest *nest = s->nests[a];
3163 for (size_t i = 0; i < nest->n; i++)
3164 if (i != nest->scale_idx)
3166 hash = hash_pointer (cats[a][i], hash);
3167 if (cats[a][i]->type != CCT_TOTAL
3168 && cats[a][i]->type != CCT_SUBTOTAL
3169 && cats[a][i]->type != CCT_POSTCOMPUTE)
3170 hash = value_hash (case_data (c, nest->vars[i]),
3171 var_get_width (nest->vars[i]), hash);
3177 struct ctables_cell *cell;
3178 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3180 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3182 const struct ctables_nest *nest = s->nests[a];
3183 for (size_t i = 0; i < nest->n; i++)
3184 if (i != nest->scale_idx
3185 && (cats[a][i] != cell->axes[a].cvs[i].category
3186 || (cats[a][i]->type != CCT_TOTAL
3187 && cats[a][i]->type != CCT_SUBTOTAL
3188 && cats[a][i]->type != CCT_POSTCOMPUTE
3189 && !value_equal (case_data (c, nest->vars[i]),
3190 &cell->axes[a].cvs[i].value,
3191 var_get_width (nest->vars[i])))))
3200 cell = xmalloc (sizeof *cell);
3203 cell->omit_areas = 0;
3204 cell->postcompute = false;
3205 //struct string name = DS_EMPTY_INITIALIZER;
3206 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3208 const struct ctables_nest *nest = s->nests[a];
3209 cell->axes[a].cvs = (nest->n
3210 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3212 for (size_t i = 0; i < nest->n; i++)
3214 const struct ctables_category *cat = cats[a][i];
3215 const struct variable *var = nest->vars[i];
3216 const union value *value = case_data (c, var);
3217 if (i != nest->scale_idx)
3219 const struct ctables_category *subtotal = cat->subtotal;
3220 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3223 if (cat->type == CCT_TOTAL
3224 || cat->type == CCT_SUBTOTAL
3225 || cat->type == CCT_POSTCOMPUTE)
3229 case PIVOT_AXIS_COLUMN:
3230 cell->omit_areas |= ((1u << CTAT_TABLE) |
3231 (1u << CTAT_LAYER) |
3232 (1u << CTAT_LAYERCOL) |
3233 (1u << CTAT_SUBTABLE) |
3236 case PIVOT_AXIS_ROW:
3237 cell->omit_areas |= ((1u << CTAT_TABLE) |
3238 (1u << CTAT_LAYER) |
3239 (1u << CTAT_LAYERROW) |
3240 (1u << CTAT_SUBTABLE) |
3243 case PIVOT_AXIS_LAYER:
3244 cell->omit_areas |= ((1u << CTAT_TABLE) |
3245 (1u << CTAT_LAYER));
3249 if (cat->type == CCT_POSTCOMPUTE)
3250 cell->postcompute = true;
3253 cell->axes[a].cvs[i].category = cat;
3254 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3257 if (i != nest->scale_idx)
3259 if (!ds_is_empty (&name))
3260 ds_put_cstr (&name, ", ");
3261 char *value_s = data_out (value, var_get_encoding (var),
3262 var_get_print_format (var),
3263 settings_get_fmt_settings ());
3264 if (cat->type == CCT_TOTAL
3265 || cat->type == CCT_SUBTOTAL
3266 || cat->type == CCT_POSTCOMPUTE)
3267 ds_put_format (&name, "%s=total", var_get_name (var));
3269 ds_put_format (&name, "%s=%s", var_get_name (var),
3270 value_s + strspn (value_s, " "));
3276 //cell->name = ds_steal_cstr (&name);
3278 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3279 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3280 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3281 for (size_t i = 0; i < specs->n; i++)
3282 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3283 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3284 cell->areas[at] = ctables_area_insert (s, cell, at);
3285 hmap_insert (&s->cells, &cell->node, hash);
3290 is_listwise_missing (const struct ctables_summary_spec_set *specs,
3291 const struct ccase *c)
3293 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3295 const struct variable *var = specs->listwise_vars[i];
3296 if (var_is_num_missing (var, case_num (c, var)))
3304 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3306 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3311 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3312 const struct ctables_category *cats[PIVOT_N_AXES][10],
3313 bool is_included, double weight[N_CTWS])
3315 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3316 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3318 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3319 const union value *value = case_data (c, specs->var);
3320 bool is_missing = var_is_value_missing (specs->var, value);
3321 bool scale_missing = specs->is_scale && (is_missing || is_listwise_missing (specs, c));
3323 for (size_t i = 0; i < specs->n; i++)
3324 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3325 specs->var, value, specs->is_scale,
3326 scale_missing, is_missing, is_included,
3327 weight[specs->specs[i].weighting]);
3328 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3329 if (!(cell->omit_areas && (1u << at)))
3331 struct ctables_area *a = cell->areas[at];
3333 add_weight (a->total, weight);
3335 add_weight (a->count, weight);
3338 add_weight (a->valid, weight);
3341 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3343 const struct variable *var = s->table->sum_vars[i];
3344 double addend = case_num (c, var);
3345 if (!var_is_num_missing (var, addend))
3346 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3347 a->sums[i].sum[wt] += addend * weight[wt];
3354 recurse_totals (struct ctables_section *s, const struct ccase *c,
3355 const struct ctables_category *cats[PIVOT_N_AXES][10],
3356 bool is_included, double weight[N_CTWS],
3357 enum pivot_axis_type start_axis, size_t start_nest)
3359 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3361 const struct ctables_nest *nest = s->nests[a];
3362 for (size_t i = start_nest; i < nest->n; i++)
3364 if (i == nest->scale_idx)
3367 const struct variable *var = nest->vars[i];
3369 const struct ctables_category *total = ctables_categories_total (
3370 s->table->categories[var_get_dict_index (var)]);
3373 const struct ctables_category *save = cats[a][i];
3375 ctables_cell_add__ (s, c, cats, is_included, weight);
3376 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3385 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3386 const struct ctables_category *cats[PIVOT_N_AXES][10],
3387 bool is_included, double weight[N_CTWS],
3388 enum pivot_axis_type start_axis, size_t start_nest)
3390 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3392 const struct ctables_nest *nest = s->nests[a];
3393 for (size_t i = start_nest; i < nest->n; i++)
3395 if (i == nest->scale_idx)
3398 const struct ctables_category *save = cats[a][i];
3401 cats[a][i] = save->subtotal;
3402 ctables_cell_add__ (s, c, cats, is_included, weight);
3403 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3412 ctables_add_occurrence (const struct variable *var,
3413 const union value *value,
3414 struct hmap *occurrences)
3416 int width = var_get_width (var);
3417 unsigned int hash = value_hash (value, width, 0);
3419 struct ctables_occurrence *o;
3420 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3422 if (value_equal (value, &o->value, width))
3425 o = xmalloc (sizeof *o);
3426 value_clone (&o->value, value, width);
3427 hmap_insert (occurrences, &o->node, hash);
3431 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3432 double weight[N_CTWS])
3434 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3436 bool is_included = true;
3438 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3440 const struct ctables_nest *nest = s->nests[a];
3441 for (size_t i = 0; i < nest->n; i++)
3442 if (i != nest->scale_idx)
3444 const struct variable *var = nest->vars[i];
3445 const union value *value = case_data (c, var);
3447 cats[a][i] = ctables_categories_match (
3448 s->table->categories[var_get_dict_index (var)], value, var);
3451 if (i != nest->summary_idx)
3454 if (!var_is_value_missing (var, value))
3457 static const struct ctables_category cct_excluded_missing = {
3458 .type = CCT_EXCLUDED_MISSING,
3461 cats[a][i] = &cct_excluded_missing;
3462 is_included = false;
3468 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3470 const struct ctables_nest *nest = s->nests[a];
3471 for (size_t i = 0; i < nest->n; i++)
3472 if (i != nest->scale_idx)
3474 const struct variable *var = nest->vars[i];
3475 const union value *value = case_data (c, var);
3476 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3480 ctables_cell_add__ (s, c, cats, is_included, weight);
3481 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3482 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3487 const struct ctables_summary_spec_set *set;
3492 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3494 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3495 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3496 if (as->function != bs->function)
3497 return as->function > bs->function ? 1 : -1;
3498 else if (as->weighting != bs->weighting)
3499 return as->weighting > bs->weighting ? 1 : -1;
3500 else if (as->calc_area != bs->calc_area)
3501 return as->calc_area > bs->calc_area ? 1 : -1;
3502 else if (as->percentile != bs->percentile)
3503 return as->percentile < bs->percentile ? 1 : -1;
3505 const char *as_label = as->label ? as->label : "";
3506 const char *bs_label = bs->label ? bs->label : "";
3507 return strcmp (as_label, bs_label);
3511 ctables_category_format_number (double number, const struct variable *var,
3514 struct pivot_value *pv = pivot_value_new_var_value (
3515 var, &(union value) { .f = number });
3516 pivot_value_format (pv, NULL, s);
3517 pivot_value_destroy (pv);
3521 ctables_category_format_string (struct substring string,
3522 const struct variable *var, struct string *out)
3524 int width = var_get_width (var);
3525 char *s = xmalloc (width);
3526 buf_copy_rpad (s, width, string.string, string.length, ' ');
3527 struct pivot_value *pv = pivot_value_new_var_value (
3528 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3529 pivot_value_format (pv, NULL, out);
3530 pivot_value_destroy (pv);
3535 ctables_category_format_label (const struct ctables_category *cat,
3536 const struct variable *var,
3542 ctables_category_format_number (cat->number, var, s);
3546 ctables_category_format_string (cat->string, var, s);
3550 ctables_category_format_number (cat->nrange[0], var, s);
3551 ds_put_format (s, " THRU ");
3552 ctables_category_format_number (cat->nrange[1], var, s);
3556 ctables_category_format_string (cat->srange[0], var, s);
3557 ds_put_format (s, " THRU ");
3558 ctables_category_format_string (cat->srange[1], var, s);
3562 ds_put_cstr (s, "MISSING");
3566 ds_put_cstr (s, "OTHERNM");
3569 case CCT_POSTCOMPUTE:
3570 ds_put_format (s, "&%s", cat->pc->name);
3575 ds_put_cstr (s, cat->total_label);
3581 case CCT_EXCLUDED_MISSING:
3588 static struct pivot_value *
3589 ctables_postcompute_label (const struct ctables_categories *cats,
3590 const struct ctables_category *cat,
3591 const struct variable *var)
3593 struct substring in = ss_cstr (cat->pc->label);
3594 struct substring target = ss_cstr (")LABEL[");
3596 struct string out = DS_EMPTY_INITIALIZER;
3599 size_t chunk = ss_find_substring (in, target);
3600 if (chunk == SIZE_MAX)
3602 if (ds_is_empty (&out))
3603 return pivot_value_new_user_text (in.string, in.length);
3606 ds_put_substring (&out, in);
3607 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3611 ds_put_substring (&out, ss_head (in, chunk));
3612 ss_advance (&in, chunk + target.length);
3614 struct substring idx_s;
3615 if (!ss_get_until (&in, ']', &idx_s))
3618 long int idx = strtol (idx_s.string, &tail, 10);
3619 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3622 struct ctables_category *cat2 = &cats->cats[idx - 1];
3623 if (!ctables_category_format_label (cat2, var, &out))
3629 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3632 static struct pivot_value *
3633 ctables_category_create_value_label (const struct ctables_categories *cats,
3634 const struct ctables_category *cat,
3635 const struct variable *var,
3636 const union value *value)
3638 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3639 ? ctables_postcompute_label (cats, cat, var)
3640 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3641 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3642 : pivot_value_new_var_value (var, value));
3645 static struct ctables_value *
3646 ctables_value_find__ (struct ctables_table *t, const union value *value,
3647 int width, unsigned int hash)
3649 struct ctables_value *clv;
3650 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3651 hash, &t->clabels_values_map)
3652 if (value_equal (value, &clv->value, width))
3658 ctables_value_insert (struct ctables_table *t, const union value *value,
3661 unsigned int hash = value_hash (value, width, 0);
3662 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3665 clv = xmalloc (sizeof *clv);
3666 value_clone (&clv->value, value, width);
3667 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3671 static struct ctables_value *
3672 ctables_value_find (struct ctables_table *t,
3673 const union value *value, int width)
3675 return ctables_value_find__ (t, value, width,
3676 value_hash (value, width, 0));
3680 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3681 size_t ix[PIVOT_N_AXES])
3683 if (a < PIVOT_N_AXES)
3685 size_t limit = MAX (t->stacks[a].n, 1);
3686 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3687 ctables_table_add_section (t, a + 1, ix);
3691 struct ctables_section *s = &t->sections[t->n_sections++];
3692 *s = (struct ctables_section) {
3694 .cells = HMAP_INITIALIZER (s->cells),
3696 for (a = 0; a < PIVOT_N_AXES; a++)
3699 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3701 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3702 for (size_t i = 0; i < nest->n; i++)
3703 hmap_init (&s->occurrences[a][i]);
3705 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3706 hmap_init (&s->areas[at]);
3711 ctpo_add (double a, double b)
3717 ctpo_sub (double a, double b)
3723 ctpo_mul (double a, double b)
3729 ctpo_div (double a, double b)
3731 return b ? a / b : SYSMIS;
3735 ctpo_pow (double a, double b)
3737 int save_errno = errno;
3739 double result = pow (a, b);
3747 ctpo_neg (double a, double b UNUSED)
3752 struct ctables_pcexpr_evaluate_ctx
3754 const struct ctables_cell *cell;
3755 const struct ctables_section *section;
3756 const struct ctables_categories *cats;
3757 enum pivot_axis_type pc_a;
3760 enum fmt_type parse_format;
3763 static double ctables_pcexpr_evaluate (
3764 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3767 ctables_pcexpr_evaluate_nonterminal (
3768 const struct ctables_pcexpr_evaluate_ctx *ctx,
3769 const struct ctables_pcexpr *e, size_t n_args,
3770 double evaluate (double, double))
3772 double args[2] = { 0, 0 };
3773 for (size_t i = 0; i < n_args; i++)
3775 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3776 if (!isfinite (args[i]) || args[i] == SYSMIS)
3779 return evaluate (args[0], args[1]);
3783 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3784 const struct ctables_cell_value *pc_cv)
3786 const struct ctables_section *s = ctx->section;
3789 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3791 const struct ctables_nest *nest = s->nests[a];
3792 for (size_t i = 0; i < nest->n; i++)
3793 if (i != nest->scale_idx)
3795 const struct ctables_cell_value *cv
3796 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3797 : &ctx->cell->axes[a].cvs[i]);
3798 hash = hash_pointer (cv->category, hash);
3799 if (cv->category->type != CCT_TOTAL
3800 && cv->category->type != CCT_SUBTOTAL
3801 && cv->category->type != CCT_POSTCOMPUTE)
3802 hash = value_hash (&cv->value,
3803 var_get_width (nest->vars[i]), hash);
3807 struct ctables_cell *tc;
3808 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3810 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3812 const struct ctables_nest *nest = s->nests[a];
3813 for (size_t i = 0; i < nest->n; i++)
3814 if (i != nest->scale_idx)
3816 const struct ctables_cell_value *p_cv
3817 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3818 : &ctx->cell->axes[a].cvs[i]);
3819 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3820 if (p_cv->category != t_cv->category
3821 || (p_cv->category->type != CCT_TOTAL
3822 && p_cv->category->type != CCT_SUBTOTAL
3823 && p_cv->category->type != CCT_POSTCOMPUTE
3824 && !value_equal (&p_cv->value,
3826 var_get_width (nest->vars[i]))))
3838 const struct ctables_table *t = s->table;
3839 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3840 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3841 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3842 &specs->specs[ctx->summary_idx]);
3846 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3847 const struct ctables_pcexpr *e)
3854 case CTPO_CAT_NRANGE:
3855 case CTPO_CAT_SRANGE:
3856 case CTPO_CAT_MISSING:
3857 case CTPO_CAT_OTHERNM:
3859 struct ctables_cell_value cv = {
3860 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3862 assert (cv.category != NULL);
3864 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3865 const struct ctables_occurrence *o;
3868 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3869 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3870 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3872 cv.value = o->value;
3873 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3878 case CTPO_CAT_NUMBER:
3879 case CTPO_CAT_SUBTOTAL:
3880 case CTPO_CAT_TOTAL:
3882 struct ctables_cell_value cv = {
3883 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3884 .value = { .f = e->number },
3886 assert (cv.category != NULL);
3887 return ctables_pcexpr_evaluate_category (ctx, &cv);
3890 case CTPO_CAT_STRING:
3892 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3894 if (width > e->string.length)
3896 s = xmalloc (width);
3897 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3900 const struct ctables_category *category
3901 = ctables_find_category_for_postcompute (
3902 ctx->section->table->ctables->dict,
3903 ctx->cats, ctx->parse_format, e);
3904 assert (category != NULL);
3906 struct ctables_cell_value cv = { .category = category };
3907 if (category->type == CCT_NUMBER)
3908 cv.value.f = category->number;
3909 else if (category->type == CCT_STRING)
3910 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3914 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3920 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3923 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3926 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3929 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3932 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3935 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3941 static const struct ctables_category *
3942 ctables_cell_postcompute (const struct ctables_section *s,
3943 const struct ctables_cell *cell,
3944 enum pivot_axis_type *pc_a_p,
3947 assert (cell->postcompute);
3948 const struct ctables_category *pc_cat = NULL;
3949 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3950 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3952 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3953 if (cv->category->type == CCT_POSTCOMPUTE)
3957 /* Multiple postcomputes cross each other. The value is
3962 pc_cat = cv->category;
3966 *pc_a_idx_p = pc_a_idx;
3970 assert (pc_cat != NULL);
3975 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3976 const struct ctables_cell *cell,
3977 const struct ctables_summary_spec *ss,
3978 struct fmt_spec *format,
3979 bool *is_ctables_format,
3982 enum pivot_axis_type pc_a = 0;
3983 size_t pc_a_idx = 0;
3984 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3985 s, cell, &pc_a, &pc_a_idx);
3989 const struct ctables_postcompute *pc = pc_cat->pc;
3992 for (size_t i = 0; i < pc->specs->n; i++)
3994 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3995 if (ss->function == ss2->function
3996 && ss->weighting == ss2->weighting
3997 && ss->calc_area == ss2->calc_area
3998 && ss->percentile == ss2->percentile)
4000 *format = ss2->format;
4001 *is_ctables_format = ss2->is_ctables_format;
4007 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4008 const struct ctables_categories *cats = s->table->categories[
4009 var_get_dict_index (var)];
4010 struct ctables_pcexpr_evaluate_ctx ctx = {
4015 .pc_a_idx = pc_a_idx,
4016 .summary_idx = summary_idx,
4017 .parse_format = pc_cat->parse_format,
4019 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4023 ctables_format (double d, const struct fmt_spec *format,
4024 const struct fmt_settings *settings)
4026 const union value v = { .f = d };
4027 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4029 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4030 produce the results we want for negative numbers, putting the negative
4031 sign in the wrong spot, before the prefix instead of after it. We can't,
4032 in fact, produce the desired results using a custom-currency
4033 specification. Instead, we postprocess the output, moving the negative
4036 NEQUAL: "-N=3" => "N=-3"
4037 PAREN: "-(3)" => "(-3)"
4038 PCTPAREN: "-(3%)" => "(-3%)"
4040 This transformation doesn't affect NEGPAREN. */
4041 char *minus_src = strchr (s, '-');
4042 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4044 char *n_equals = strstr (s, "N=");
4045 char *lparen = strchr (s, '(');
4046 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4048 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4054 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4056 for (size_t i = 0; i < t->stacks[a].n; i++)
4058 struct ctables_nest *nest = &t->stacks[a].nests[i];
4059 if (nest->n != 1 || nest->scale_idx != 0)
4062 enum ctables_vlabel vlabel
4063 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4064 if (vlabel != CTVL_NONE)
4071 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4073 struct pivot_table *pt = pivot_table_create__ (
4075 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4076 : pivot_value_new_text (N_("Custom Tables"))),
4079 pivot_table_set_caption (
4080 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4082 pivot_table_set_corner_text (
4083 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4085 bool summary_dimension = (t->summary_axis != t->slabels_axis
4086 || (!t->slabels_visible
4087 && t->summary_specs.n > 1));
4088 if (summary_dimension)
4090 struct pivot_dimension *d = pivot_dimension_create (
4091 pt, t->slabels_axis, N_("Statistics"));
4092 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4093 if (!t->slabels_visible)
4094 d->hide_all_labels = true;
4095 for (size_t i = 0; i < specs->n; i++)
4096 pivot_category_create_leaf (
4097 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4100 bool categories_dimension = t->clabels_example != NULL;
4101 if (categories_dimension)
4103 struct pivot_dimension *d = pivot_dimension_create (
4104 pt, t->label_axis[t->clabels_from_axis],
4105 t->clabels_from_axis == PIVOT_AXIS_ROW
4106 ? N_("Row Categories")
4107 : N_("Column Categories"));
4108 const struct variable *var = t->clabels_example;
4109 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4110 for (size_t i = 0; i < t->n_clabels_values; i++)
4112 const struct ctables_value *value = t->clabels_values[i];
4113 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4114 assert (cat != NULL);
4115 pivot_category_create_leaf (
4116 d->root, ctables_category_create_value_label (c, cat,
4122 pivot_table_set_look (pt, ct->look);
4123 struct pivot_dimension *d[PIVOT_N_AXES];
4124 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4126 static const char *names[] = {
4127 [PIVOT_AXIS_ROW] = N_("Rows"),
4128 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4129 [PIVOT_AXIS_LAYER] = N_("Layers"),
4131 d[a] = (t->axes[a] || a == t->summary_axis
4132 ? pivot_dimension_create (pt, a, names[a])
4137 assert (t->axes[a]);
4139 for (size_t i = 0; i < t->stacks[a].n; i++)
4141 struct ctables_nest *nest = &t->stacks[a].nests[i];
4142 struct ctables_section **sections = xnmalloc (t->n_sections,
4144 size_t n_sections = 0;
4146 size_t n_total_cells = 0;
4147 size_t max_depth = 0;
4148 for (size_t j = 0; j < t->n_sections; j++)
4149 if (t->sections[j].nests[a] == nest)
4151 struct ctables_section *s = &t->sections[j];
4152 sections[n_sections++] = s;
4153 n_total_cells += hmap_count (&s->cells);
4155 size_t depth = s->nests[a]->n;
4156 max_depth = MAX (depth, max_depth);
4159 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4161 size_t n_sorted = 0;
4163 for (size_t j = 0; j < n_sections; j++)
4165 struct ctables_section *s = sections[j];
4167 struct ctables_cell *cell;
4168 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4170 sorted[n_sorted++] = cell;
4171 assert (n_sorted <= n_total_cells);
4174 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4175 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4178 if (a == PIVOT_AXIS_ROW)
4180 size_t ids[N_CTATS];
4181 memset (ids, 0, sizeof ids);
4182 for (size_t j = 0; j < n_sorted; j++)
4184 struct ctables_cell *cell = sorted[j];
4185 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4187 struct ctables_area *area = cell->areas[at];
4188 if (!area->sequence)
4189 area->sequence = ++ids[at];
4196 for (size_t j = 0; j < n_sorted; j++)
4198 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0);
4203 struct ctables_level
4205 enum ctables_level_type
4207 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4208 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4209 CTL_SUMMARY, /* Summary functions. */
4213 enum settings_value_show vlabel; /* CTL_VAR only. */
4216 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4217 size_t n_levels = 0;
4218 for (size_t k = 0; k < nest->n; k++)
4220 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4221 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4223 if (vlabel != CTVL_NONE)
4225 levels[n_levels++] = (struct ctables_level) {
4227 .vlabel = (enum settings_value_show) vlabel,
4232 if (nest->scale_idx != k
4233 && (k != nest->n - 1 || t->label_axis[a] == a))
4235 levels[n_levels++] = (struct ctables_level) {
4236 .type = CTL_CATEGORY,
4242 if (!summary_dimension && a == t->slabels_axis)
4244 levels[n_levels++] = (struct ctables_level) {
4245 .type = CTL_SUMMARY,
4246 .var_idx = SIZE_MAX,
4250 /* Pivot categories:
4252 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4253 - category for nest->vars[0], if nest->scale_idx != 0
4254 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4255 - category for nest->vars[1], if nest->scale_idx != 1
4257 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4258 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4259 - summary function, if 'a == t->slabels_axis && a ==
4262 Additional dimensions:
4264 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4266 - If 't->label_axis[b] == a' for some 'b != a', add a category
4271 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4273 for (size_t j = 0; j < n_sorted; j++)
4275 struct ctables_cell *cell = sorted[j];
4276 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4278 size_t n_common = 0;
4281 for (; n_common < n_levels; n_common++)
4283 const struct ctables_level *level = &levels[n_common];
4284 if (level->type == CTL_CATEGORY)
4286 size_t var_idx = level->var_idx;
4287 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4288 if (prev->axes[a].cvs[var_idx].category != c)
4290 else if (c->type != CCT_SUBTOTAL
4291 && c->type != CCT_TOTAL
4292 && c->type != CCT_POSTCOMPUTE
4293 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4294 &cell->axes[a].cvs[var_idx].value,
4295 var_get_type (nest->vars[var_idx])))
4301 for (size_t k = n_common; k < n_levels; k++)
4303 const struct ctables_level *level = &levels[k];
4304 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4305 if (level->type == CTL_SUMMARY)
4307 assert (k == n_levels - 1);
4309 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4310 for (size_t m = 0; m < specs->n; m++)
4312 int leaf = pivot_category_create_leaf (
4313 parent, ctables_summary_label (&specs->specs[m],
4321 const struct variable *var = nest->vars[level->var_idx];
4322 struct pivot_value *label;
4323 if (level->type == CTL_VAR)
4325 label = pivot_value_new_variable (var);
4326 label->variable.show = level->vlabel;
4328 else if (level->type == CTL_CATEGORY)
4330 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4331 label = ctables_category_create_value_label (
4332 t->categories[var_get_dict_index (var)],
4333 cv->category, var, &cv->value);
4338 if (k == n_levels - 1)
4339 prev_leaf = pivot_category_create_leaf (parent, label);
4341 groups[k] = pivot_category_create_group__ (parent, label);
4345 cell->axes[a].leaf = prev_leaf;
4354 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4358 size_t n_total_cells = 0;
4359 for (size_t j = 0; j < t->n_sections; j++)
4360 n_total_cells += hmap_count (&t->sections[j].cells);
4362 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4363 size_t n_sorted = 0;
4364 for (size_t j = 0; j < t->n_sections; j++)
4366 const struct ctables_section *s = &t->sections[j];
4367 struct ctables_cell *cell;
4368 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4370 sorted[n_sorted++] = cell;
4372 assert (n_sorted <= n_total_cells);
4373 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4375 size_t ids[N_CTATS];
4376 memset (ids, 0, sizeof ids);
4377 for (size_t j = 0; j < n_sorted; j++)
4379 struct ctables_cell *cell = sorted[j];
4380 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4382 struct ctables_area *area = cell->areas[at];
4383 if (!area->sequence)
4384 area->sequence = ++ids[at];
4391 for (size_t i = 0; i < t->n_sections; i++)
4393 struct ctables_section *s = &t->sections[i];
4395 struct ctables_cell *cell;
4396 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4401 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4402 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4403 for (size_t j = 0; j < specs->n; j++)
4406 size_t n_dindexes = 0;
4408 if (summary_dimension)
4409 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4411 if (categories_dimension)
4413 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4414 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4415 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4416 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4419 dindexes[n_dindexes++] = ctv->leaf;
4422 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4425 int leaf = cell->axes[a].leaf;
4426 if (a == t->summary_axis && !summary_dimension)
4428 dindexes[n_dindexes++] = leaf;
4431 const struct ctables_summary_spec *ss = &specs->specs[j];
4433 struct fmt_spec format = specs->specs[j].format;
4434 bool is_ctables_format = ss->is_ctables_format;
4435 double d = (cell->postcompute
4436 ? ctables_cell_calculate_postcompute (
4437 s, cell, ss, &format, &is_ctables_format, j)
4438 : ctables_summary_value (cell, &cell->summaries[j],
4441 struct pivot_value *value;
4442 if (ct->hide_threshold != 0
4443 && d < ct->hide_threshold
4444 && ss->function == CTSF_COUNT)
4446 value = pivot_value_new_user_text_nocopy (
4447 xasprintf ("<%d", ct->hide_threshold));
4449 else if (d == 0 && ct->zero)
4450 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4451 else if (d == SYSMIS && ct->missing)
4452 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4453 else if (is_ctables_format)
4454 value = pivot_value_new_user_text_nocopy (
4455 ctables_format (d, &format, &ct->ctables_formats));
4458 value = pivot_value_new_number (d);
4459 value->numeric.format = format;
4461 /* XXX should text values be right-justified? */
4462 pivot_table_put (pt, dindexes, n_dindexes, value);
4467 pivot_table_submit (pt);
4471 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4473 enum pivot_axis_type label_pos = t->label_axis[a];
4477 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4478 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4480 const struct ctables_stack *stack = &t->stacks[a];
4484 const struct ctables_nest *n0 = &stack->nests[0];
4487 assert (stack->n == 1);
4491 const struct variable *v0 = n0->vars[n0->n - 1];
4492 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4493 t->clabels_example = v0;
4495 for (size_t i = 0; i < c0->n_cats; i++)
4496 if (c0->cats[i].type == CCT_FUNCTION)
4498 msg (SE, _("%s=%s is not allowed with sorting based "
4499 "on a summary function."),
4500 subcommand_name, pos_name);
4503 if (n0->n - 1 == n0->scale_idx)
4505 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4506 "but %s is a scale variable."),
4507 subcommand_name, pos_name, var_get_name (v0));
4511 for (size_t i = 1; i < stack->n; i++)
4513 const struct ctables_nest *ni = &stack->nests[i];
4515 const struct variable *vi = ni->vars[ni->n - 1];
4516 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4518 if (ni->n - 1 == ni->scale_idx)
4520 msg (SE, _("%s=%s requires the variables to be moved to be "
4521 "categorical, but %s is a scale variable."),
4522 subcommand_name, pos_name, var_get_name (vi));
4525 if (var_get_width (v0) != var_get_width (vi))
4527 msg (SE, _("%s=%s requires the variables to be "
4528 "moved to have the same width, but %s has "
4529 "width %d and %s has width %d."),
4530 subcommand_name, pos_name,
4531 var_get_name (v0), var_get_width (v0),
4532 var_get_name (vi), var_get_width (vi));
4535 if (!val_labs_equal (var_get_value_labels (v0),
4536 var_get_value_labels (vi)))
4538 msg (SE, _("%s=%s requires the variables to be "
4539 "moved to have the same value labels, but %s "
4540 "and %s have different value labels."),
4541 subcommand_name, pos_name,
4542 var_get_name (v0), var_get_name (vi));
4545 if (!ctables_categories_equal (c0, ci))
4547 msg (SE, _("%s=%s requires the variables to be "
4548 "moved to have the same category "
4549 "specifications, but %s and %s have different "
4550 "category specifications."),
4551 subcommand_name, pos_name,
4552 var_get_name (v0), var_get_name (vi));
4561 add_sum_var (struct variable *var,
4562 struct variable ***sum_vars, size_t *n, size_t *allocated)
4564 for (size_t i = 0; i < *n; i++)
4565 if (var == (*sum_vars)[i])
4568 if (*n >= *allocated)
4569 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4570 (*sum_vars)[*n] = var;
4574 static enum ctables_area_type
4575 rotate_area (enum ctables_area_type area)
4586 return CTAT_LAYERCOL;
4589 return CTAT_LAYERROW;
4602 enumerate_sum_vars (const struct ctables_axis *a,
4603 struct variable ***sum_vars, size_t *n, size_t *allocated)
4611 for (size_t i = 0; i < N_CSVS; i++)
4612 for (size_t j = 0; j < a->specs[i].n; j++)
4614 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4615 if (spec->function == CTSF_areaPCT_SUM)
4616 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4622 for (size_t i = 0; i < 2; i++)
4623 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4629 ctables_prepare_table (struct ctables_table *t)
4631 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4634 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4636 for (size_t j = 0; j < t->stacks[a].n; j++)
4638 struct ctables_nest *nest = &t->stacks[a].nests[j];
4639 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4641 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4642 nest->n_areas[at] = 0;
4644 enum pivot_axis_type ata, atb;
4645 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4647 ata = PIVOT_AXIS_ROW;
4648 atb = PIVOT_AXIS_COLUMN;
4650 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4652 ata = PIVOT_AXIS_COLUMN;
4653 atb = PIVOT_AXIS_ROW;
4656 if (at == CTAT_LAYER
4657 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
4658 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
4659 ? a == atb && t->label_axis[a] != a
4662 for (size_t k = nest->n - 1; k < nest->n; k--)
4663 if (k != nest->scale_idx)
4665 nest->areas[at][nest->n_areas[at]++] = k;
4671 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
4672 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
4673 : at == CTAT_TABLE ? true
4677 for (size_t k = 0; k < nest->n; k++)
4678 if (k != nest->scale_idx)
4679 nest->areas[at][nest->n_areas[at]++] = k;
4685 #define L PIVOT_AXIS_LAYER
4686 n_drop = (t->clabels_from_axis == L ? a != L
4687 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
4688 : t->clabels_from_axis == a ? 2
4695 n_drop = a == ata && t->label_axis[ata] == atb;
4700 n_drop = (a == ata ? t->label_axis[ata] == atb
4702 : t->clabels_from_axis == atb ? -1
4703 : t->clabels_to_axis != atb ? 1
4715 size_t n = nest->n_areas[at];
4718 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4719 nest->n_areas[at]--;
4724 for (int i = 0; i < n_drop; i++)
4725 if (nest->n_areas[at] > 0)
4726 nest->n_areas[at]--;
4733 struct ctables_nest *nest = xmalloc (sizeof *nest);
4734 *nest = (struct ctables_nest) {
4736 .scale_idx = SIZE_MAX,
4737 .summary_idx = SIZE_MAX
4739 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4741 /* There's no point in moving labels away from an axis that has no
4742 labels, so avoid dealing with the special cases around that. */
4743 t->label_axis[a] = a;
4746 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4747 for (size_t i = 0; i < stack->n; i++)
4749 struct ctables_nest *nest = &stack->nests[i];
4750 if (!nest->specs[CSV_CELL].n)
4752 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
4753 ss->specs = xmalloc (sizeof *ss->specs);
4756 enum ctables_summary_function function
4757 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
4761 nest->summary_idx = nest->n - 1;
4762 ss->var = nest->vars[nest->summary_idx];
4764 *ss->specs = (struct ctables_summary_spec) {
4765 .function = function,
4766 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
4767 .format = ctables_summary_default_format (function, ss->var),
4770 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4771 &nest->specs[CSV_CELL]);
4773 else if (!nest->specs[CSV_TOTAL].n)
4774 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4775 &nest->specs[CSV_CELL]);
4777 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4778 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4780 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4781 for (size_t i = 0; i < nest->specs[sv].n; i++)
4783 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4784 const struct ctables_function_info *cfi =
4785 &ctables_function_info[ss->function];
4787 ss->calc_area = rotate_area (ss->calc_area);
4791 if (t->ctables->smissing_listwise)
4793 struct variable **listwise_vars = NULL;
4795 size_t allocated = 0;
4797 for (size_t j = nest->group_head; j < stack->n; j++)
4799 const struct ctables_nest *other_nest = &stack->nests[j];
4800 if (other_nest->group_head != nest->group_head)
4803 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4806 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4807 sizeof *listwise_vars);
4808 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4811 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4814 listwise_vars = xmemdup (listwise_vars,
4815 n * sizeof *listwise_vars);
4816 nest->specs[sv].listwise_vars = listwise_vars;
4817 nest->specs[sv].n_listwise_vars = n;
4822 struct ctables_summary_spec_set *merged = &t->summary_specs;
4823 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4825 for (size_t j = 0; j < stack->n; j++)
4827 const struct ctables_nest *nest = &stack->nests[j];
4829 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4830 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4835 struct merge_item min = items[0];
4836 for (size_t j = 1; j < n_left; j++)
4837 if (merge_item_compare_3way (&items[j], &min) < 0)
4840 if (merged->n >= merged->allocated)
4841 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4842 sizeof *merged->specs);
4843 merged->specs[merged->n++] = min.set->specs[min.ofs];
4845 for (size_t j = 0; j < n_left; )
4847 if (merge_item_compare_3way (&items[j], &min) == 0)
4849 struct merge_item *item = &items[j];
4850 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4851 if (++item->ofs >= item->set->n)
4853 items[j] = items[--n_left];
4863 for (size_t j = 0; j < merged->n; j++)
4864 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4866 for (size_t j = 0; j < stack->n; j++)
4868 const struct ctables_nest *nest = &stack->nests[j];
4869 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4871 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4872 for (size_t k = 0; k < specs->n; k++)
4873 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4874 specs->specs[k].axis_idx);
4880 size_t allocated_sum_vars = 0;
4881 enumerate_sum_vars (t->axes[t->summary_axis],
4882 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4884 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4885 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4889 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4890 enum pivot_axis_type a)
4892 struct ctables_stack *stack = &t->stacks[a];
4893 for (size_t i = 0; i < stack->n; i++)
4895 const struct ctables_nest *nest = &stack->nests[i];
4896 const struct variable *var = nest->vars[nest->n - 1];
4897 const union value *value = case_data (c, var);
4899 if (var_is_numeric (var) && value->f == SYSMIS)
4902 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4904 ctables_value_insert (t, value, var_get_width (var));
4909 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4911 const struct ctables_value *const *ap = a_;
4912 const struct ctables_value *const *bp = b_;
4913 const struct ctables_value *a = *ap;
4914 const struct ctables_value *b = *bp;
4915 const int *width = width_;
4916 return value_compare_3way (&a->value, &b->value, *width);
4920 ctables_sort_clabels_values (struct ctables_table *t)
4922 const struct variable *v0 = t->clabels_example;
4923 int width = var_get_width (v0);
4925 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4928 const struct val_labs *val_labs = var_get_value_labels (v0);
4929 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4930 vl = val_labs_next (val_labs, vl))
4931 if (ctables_categories_match (c0, &vl->value, v0))
4932 ctables_value_insert (t, &vl->value, width);
4935 size_t n = hmap_count (&t->clabels_values_map);
4936 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4938 struct ctables_value *clv;
4940 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4941 t->clabels_values[i++] = clv;
4942 t->n_clabels_values = n;
4945 sort (t->clabels_values, n, sizeof *t->clabels_values,
4946 compare_clabels_values_3way, &width);
4948 for (size_t i = 0; i < n; i++)
4949 t->clabels_values[i]->leaf = i;
4953 ctables_add_category_occurrences (const struct variable *var,
4954 struct hmap *occurrences,
4955 const struct ctables_categories *cats)
4957 const struct val_labs *val_labs = var_get_value_labels (var);
4959 for (size_t i = 0; i < cats->n_cats; i++)
4961 const struct ctables_category *c = &cats->cats[i];
4965 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4971 int width = var_get_width (var);
4973 value_init (&value, width);
4974 value_copy_buf_rpad (&value, width,
4975 CHAR_CAST (uint8_t *, c->string.string),
4976 c->string.length, ' ');
4977 ctables_add_occurrence (var, &value, occurrences);
4978 value_destroy (&value, width);
4983 assert (var_is_numeric (var));
4984 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4985 vl = val_labs_next (val_labs, vl))
4986 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4987 ctables_add_occurrence (var, &vl->value, occurrences);
4991 assert (var_is_alpha (var));
4992 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4993 vl = val_labs_next (val_labs, vl))
4994 if (in_string_range (&vl->value, var, c->srange))
4995 ctables_add_occurrence (var, &vl->value, occurrences);
4999 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5000 vl = val_labs_next (val_labs, vl))
5001 if (var_is_value_missing (var, &vl->value))
5002 ctables_add_occurrence (var, &vl->value, occurrences);
5006 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5007 vl = val_labs_next (val_labs, vl))
5008 ctables_add_occurrence (var, &vl->value, occurrences);
5011 case CCT_POSTCOMPUTE:
5021 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5022 vl = val_labs_next (val_labs, vl))
5023 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5024 ctables_add_occurrence (var, &vl->value, occurrences);
5027 case CCT_EXCLUDED_MISSING:
5034 ctables_section_recurse_add_empty_categories (
5035 struct ctables_section *s,
5036 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5037 enum pivot_axis_type a, size_t a_idx)
5039 if (a >= PIVOT_N_AXES)
5040 ctables_cell_insert__ (s, c, cats);
5041 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5042 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5045 const struct variable *var = s->nests[a]->vars[a_idx];
5046 const struct ctables_categories *categories = s->table->categories[
5047 var_get_dict_index (var)];
5048 int width = var_get_width (var);
5049 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5050 const struct ctables_occurrence *o;
5051 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5053 union value *value = case_data_rw (c, var);
5054 value_destroy (value, width);
5055 value_clone (value, &o->value, width);
5056 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5057 assert (cats[a][a_idx] != NULL);
5058 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5061 for (size_t i = 0; i < categories->n_cats; i++)
5063 const struct ctables_category *cat = &categories->cats[i];
5064 if (cat->type == CCT_POSTCOMPUTE)
5066 cats[a][a_idx] = cat;
5067 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5074 ctables_section_add_empty_categories (struct ctables_section *s)
5076 bool show_empty = false;
5077 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5079 for (size_t k = 0; k < s->nests[a]->n; k++)
5080 if (k != s->nests[a]->scale_idx)
5082 const struct variable *var = s->nests[a]->vars[k];
5083 const struct ctables_categories *cats = s->table->categories[
5084 var_get_dict_index (var)];
5085 if (cats->show_empty)
5088 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5094 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5095 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5096 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5101 ctables_section_clear (struct ctables_section *s)
5103 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5105 const struct ctables_nest *nest = s->nests[a];
5106 for (size_t i = 0; i < nest->n; i++)
5107 if (i != nest->scale_idx)
5109 const struct variable *var = nest->vars[i];
5110 int width = var_get_width (var);
5111 struct ctables_occurrence *o, *next;
5112 struct hmap *map = &s->occurrences[a][i];
5113 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5115 value_destroy (&o->value, width);
5116 hmap_delete (map, &o->node);
5123 struct ctables_cell *cell, *next_cell;
5124 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5126 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5128 const struct ctables_nest *nest = s->nests[a];
5129 for (size_t i = 0; i < nest->n; i++)
5130 if (i != nest->scale_idx)
5131 value_destroy (&cell->axes[a].cvs[i].value,
5132 var_get_width (nest->vars[i]));
5133 free (cell->axes[a].cvs);
5136 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5137 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5138 for (size_t i = 0; i < specs->n; i++)
5139 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5140 free (cell->summaries);
5142 hmap_delete (&s->cells, &cell->node);
5145 hmap_shrink (&s->cells);
5147 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5149 struct ctables_area *area, *next_area;
5150 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5154 hmap_delete (&s->areas[at], &area->node);
5157 hmap_shrink (&s->areas[at]);
5162 ctables_section_uninit (struct ctables_section *s)
5164 ctables_section_clear (s);
5166 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5168 struct ctables_nest *nest = s->nests[a];
5169 for (size_t i = 0; i < nest->n; i++)
5170 hmap_destroy (&s->occurrences[a][i]);
5171 free (s->occurrences[a]);
5174 hmap_destroy (&s->cells);
5175 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5176 hmap_destroy (&s->areas[at]);
5180 ctables_table_clear (struct ctables_table *t)
5182 for (size_t i = 0; i < t->n_sections; i++)
5183 ctables_section_clear (&t->sections[i]);
5185 if (t->clabels_example)
5187 int width = var_get_width (t->clabels_example);
5188 struct ctables_value *value, *next_value;
5189 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5190 &t->clabels_values_map)
5192 value_destroy (&value->value, width);
5193 hmap_delete (&t->clabels_values_map, &value->node);
5196 hmap_shrink (&t->clabels_values_map);
5198 free (t->clabels_values);
5199 t->clabels_values = NULL;
5200 t->n_clabels_values = 0;
5205 ctables_execute (struct dataset *ds, struct casereader *input,
5208 for (size_t i = 0; i < ct->n_tables; i++)
5210 struct ctables_table *t = ct->tables[i];
5211 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5212 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5213 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5214 sizeof *t->sections);
5215 size_t ix[PIVOT_N_AXES];
5216 ctables_table_add_section (t, 0, ix);
5219 struct dictionary *dict = dataset_dict (ds);
5221 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5222 struct casegrouper *grouper
5224 ? casegrouper_create_splits (input, dict)
5225 : casegrouper_create_vars (input, NULL, 0));
5226 struct casereader *group;
5227 while (casegrouper_get_next_group (grouper, &group))
5231 struct ccase *c = casereader_peek (group, 0);
5234 output_split_file_values (ds, c);
5239 bool warn_on_invalid = true;
5240 for (struct ccase *c = casereader_read (group); c;
5241 case_unref (c), c = casereader_read (group))
5243 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5244 double e_weight = (ct->e_weight
5245 ? var_force_valid_weight (ct->e_weight,
5246 case_num (c, ct->e_weight),
5250 [CTW_DICTIONARY] = d_weight,
5251 [CTW_EFFECTIVE] = e_weight,
5252 [CTW_UNWEIGHTED] = 1.0,
5255 for (size_t i = 0; i < ct->n_tables; i++)
5257 struct ctables_table *t = ct->tables[i];
5259 for (size_t j = 0; j < t->n_sections; j++)
5260 ctables_cell_insert (&t->sections[j], c, weight);
5262 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5263 if (t->label_axis[a] != a)
5264 ctables_insert_clabels_values (t, c, a);
5267 casereader_destroy (group);
5269 for (size_t i = 0; i < ct->n_tables; i++)
5271 struct ctables_table *t = ct->tables[i];
5273 if (t->clabels_example)
5274 ctables_sort_clabels_values (t);
5276 for (size_t j = 0; j < t->n_sections; j++)
5277 ctables_section_add_empty_categories (&t->sections[j]);
5279 ctables_table_output (ct, t);
5280 ctables_table_clear (t);
5283 return casegrouper_destroy (grouper);
5288 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5289 struct dictionary *);
5292 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5298 case CTPO_CAT_STRING:
5299 ss_dealloc (&e->string);
5302 case CTPO_CAT_SRANGE:
5303 for (size_t i = 0; i < 2; i++)
5304 ss_dealloc (&e->srange[i]);
5313 for (size_t i = 0; i < 2; i++)
5314 ctables_pcexpr_destroy (e->subs[i]);
5318 case CTPO_CAT_NUMBER:
5319 case CTPO_CAT_NRANGE:
5320 case CTPO_CAT_MISSING:
5321 case CTPO_CAT_OTHERNM:
5322 case CTPO_CAT_SUBTOTAL:
5323 case CTPO_CAT_TOTAL:
5327 msg_location_destroy (e->location);
5332 static struct ctables_pcexpr *
5333 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5334 struct ctables_pcexpr *sub0,
5335 struct ctables_pcexpr *sub1)
5337 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5338 *e = (struct ctables_pcexpr) {
5340 .subs = { sub0, sub1 },
5341 .location = msg_location_merged (sub0->location, sub1->location),
5346 /* How to parse an operator. */
5349 enum token_type token;
5350 enum ctables_postcompute_op op;
5353 static const struct operator *
5354 ctables_pcexpr_match_operator (struct lexer *lexer,
5355 const struct operator ops[], size_t n_ops)
5357 for (const struct operator *op = ops; op < ops + n_ops; op++)
5358 if (lex_token (lexer) == op->token)
5360 if (op->token != T_NEG_NUM)
5369 static struct ctables_pcexpr *
5370 ctables_pcexpr_parse_binary_operators__ (
5371 struct lexer *lexer, struct dictionary *dict,
5372 const struct operator ops[], size_t n_ops,
5373 parse_recursively_func *parse_next_level,
5374 const char *chain_warning, struct ctables_pcexpr *lhs)
5376 for (int op_count = 0; ; op_count++)
5378 const struct operator *op
5379 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5382 if (op_count > 1 && chain_warning)
5383 msg_at (SW, lhs->location, "%s", chain_warning);
5388 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5391 ctables_pcexpr_destroy (lhs);
5395 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5399 static struct ctables_pcexpr *
5400 ctables_pcexpr_parse_binary_operators (
5401 struct lexer *lexer, struct dictionary *dict,
5402 const struct operator ops[], size_t n_ops,
5403 parse_recursively_func *parse_next_level, const char *chain_warning)
5405 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5409 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5411 chain_warning, lhs);
5414 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5415 struct dictionary *);
5417 static struct ctables_pcexpr
5418 ctpo_cat_nrange (double low, double high)
5420 return (struct ctables_pcexpr) {
5421 .op = CTPO_CAT_NRANGE,
5422 .nrange = { low, high },
5426 static struct ctables_pcexpr
5427 ctpo_cat_srange (struct substring low, struct substring high)
5429 return (struct ctables_pcexpr) {
5430 .op = CTPO_CAT_SRANGE,
5431 .srange = { low, high },
5435 static struct ctables_pcexpr *
5436 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5438 int start_ofs = lex_ofs (lexer);
5439 struct ctables_pcexpr e;
5440 if (lex_is_number (lexer))
5442 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5443 .number = lex_number (lexer) };
5446 else if (lex_match_id (lexer, "MISSING"))
5447 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5448 else if (lex_match_id (lexer, "OTHERNM"))
5449 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5450 else if (lex_match_id (lexer, "TOTAL"))
5451 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5452 else if (lex_match_id (lexer, "SUBTOTAL"))
5454 size_t subtotal_index = 0;
5455 if (lex_match (lexer, T_LBRACK))
5457 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5459 subtotal_index = lex_integer (lexer);
5461 if (!lex_force_match (lexer, T_RBRACK))
5464 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5465 .subtotal_index = subtotal_index };
5467 else if (lex_match (lexer, T_LBRACK))
5469 if (lex_match_id (lexer, "LO"))
5471 if (!lex_force_match_id (lexer, "THRU"))
5474 if (lex_is_string (lexer))
5476 struct substring low = { .string = NULL };
5477 struct substring high = parse_substring (lexer, dict);
5478 e = ctpo_cat_srange (low, high);
5482 if (!lex_force_num (lexer))
5484 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5488 else if (lex_is_number (lexer))
5490 double number = lex_number (lexer);
5492 if (lex_match_id (lexer, "THRU"))
5494 if (lex_match_id (lexer, "HI"))
5495 e = ctpo_cat_nrange (number, DBL_MAX);
5498 if (!lex_force_num (lexer))
5500 e = ctpo_cat_nrange (number, lex_number (lexer));
5505 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5508 else if (lex_is_string (lexer))
5510 struct substring s = parse_substring (lexer, dict);
5512 if (lex_match_id (lexer, "THRU"))
5514 struct substring high;
5516 if (lex_match_id (lexer, "HI"))
5517 high = (struct substring) { .string = NULL };
5520 if (!lex_force_string (lexer))
5525 high = parse_substring (lexer, dict);
5528 e = ctpo_cat_srange (s, high);
5531 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5535 lex_error (lexer, NULL);
5539 if (!lex_force_match (lexer, T_RBRACK))
5541 if (e.op == CTPO_CAT_STRING)
5542 ss_dealloc (&e.string);
5543 else if (e.op == CTPO_CAT_SRANGE)
5545 ss_dealloc (&e.srange[0]);
5546 ss_dealloc (&e.srange[1]);
5551 else if (lex_match (lexer, T_LPAREN))
5553 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5556 if (!lex_force_match (lexer, T_RPAREN))
5558 ctables_pcexpr_destroy (ep);
5565 lex_error (lexer, NULL);
5569 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5570 return xmemdup (&e, sizeof e);
5573 static struct ctables_pcexpr *
5574 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5575 struct lexer *lexer, int start_ofs)
5577 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5578 *e = (struct ctables_pcexpr) {
5581 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5586 static struct ctables_pcexpr *
5587 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5589 static const struct operator op = { T_EXP, CTPO_POW };
5591 const char *chain_warning =
5592 _("The exponentiation operator (`**') is left-associative: "
5593 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5594 "To disable this warning, insert parentheses.");
5596 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5597 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5598 ctables_pcexpr_parse_primary,
5601 /* Special case for situations like "-5**6", which must be parsed as
5604 int start_ofs = lex_ofs (lexer);
5605 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5606 *lhs = (struct ctables_pcexpr) {
5607 .op = CTPO_CONSTANT,
5608 .number = -lex_tokval (lexer),
5609 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5613 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5614 lexer, dict, &op, 1,
5615 ctables_pcexpr_parse_primary, chain_warning, lhs);
5619 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5622 /* Parses the unary minus level. */
5623 static struct ctables_pcexpr *
5624 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5626 int start_ofs = lex_ofs (lexer);
5627 if (!lex_match (lexer, T_DASH))
5628 return ctables_pcexpr_parse_exp (lexer, dict);
5630 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5634 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5637 /* Parses the multiplication and division level. */
5638 static struct ctables_pcexpr *
5639 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5641 static const struct operator ops[] =
5643 { T_ASTERISK, CTPO_MUL },
5644 { T_SLASH, CTPO_DIV },
5647 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5648 sizeof ops / sizeof *ops,
5649 ctables_pcexpr_parse_neg, NULL);
5652 /* Parses the addition and subtraction level. */
5653 static struct ctables_pcexpr *
5654 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5656 static const struct operator ops[] =
5658 { T_PLUS, CTPO_ADD },
5659 { T_DASH, CTPO_SUB },
5660 { T_NEG_NUM, CTPO_ADD },
5663 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5664 ops, sizeof ops / sizeof *ops,
5665 ctables_pcexpr_parse_mul, NULL);
5668 static struct ctables_postcompute *
5669 ctables_find_postcompute (struct ctables *ct, const char *name)
5671 struct ctables_postcompute *pc;
5672 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5673 utf8_hash_case_string (name, 0), &ct->postcomputes)
5674 if (!utf8_strcasecmp (pc->name, name))
5680 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5683 int pcompute_start = lex_ofs (lexer) - 1;
5685 if (!lex_match (lexer, T_AND))
5687 lex_error_expecting (lexer, "&");
5690 if (!lex_force_id (lexer))
5693 char *name = ss_xstrdup (lex_tokss (lexer));
5696 if (!lex_force_match (lexer, T_EQUALS)
5697 || !lex_force_match_id (lexer, "EXPR")
5698 || !lex_force_match (lexer, T_LPAREN))
5704 int expr_start = lex_ofs (lexer);
5705 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5706 int expr_end = lex_ofs (lexer) - 1;
5707 if (!expr || !lex_force_match (lexer, T_RPAREN))
5709 ctables_pcexpr_destroy (expr);
5713 int pcompute_end = lex_ofs (lexer) - 1;
5715 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5718 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5721 msg_at (SW, location, _("New definition of &%s will override the "
5722 "previous definition."),
5724 msg_at (SN, pc->location, _("This is the previous definition."));
5726 ctables_pcexpr_destroy (pc->expr);
5727 msg_location_destroy (pc->location);
5732 pc = xmalloc (sizeof *pc);
5733 *pc = (struct ctables_postcompute) { .name = name };
5734 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5735 utf8_hash_case_string (pc->name, 0));
5738 pc->location = location;
5740 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5745 ctables_parse_pproperties_format (struct lexer *lexer,
5746 struct ctables_summary_spec_set *sss)
5748 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5750 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5751 && !(lex_token (lexer) == T_ID
5752 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5753 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5754 lex_tokss (lexer)))))
5756 /* Parse function. */
5757 enum ctables_summary_function function;
5758 enum ctables_weighting weighting;
5759 enum ctables_area_type area;
5760 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5763 /* Parse percentile. */
5764 double percentile = 0;
5765 if (function == CTSF_PTILE)
5767 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5769 percentile = lex_number (lexer);
5774 struct fmt_spec format;
5775 bool is_ctables_format;
5776 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5779 if (sss->n >= sss->allocated)
5780 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5781 sizeof *sss->specs);
5782 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5783 .function = function,
5784 .weighting = weighting,
5787 .percentile = percentile,
5789 .is_ctables_format = is_ctables_format,
5795 ctables_summary_spec_set_uninit (sss);
5800 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5802 struct ctables_postcompute **pcs = NULL;
5804 size_t allocated_pcs = 0;
5806 while (lex_match (lexer, T_AND))
5808 if (!lex_force_id (lexer))
5810 struct ctables_postcompute *pc
5811 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5814 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5819 if (n_pcs >= allocated_pcs)
5820 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5824 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5826 if (lex_match_id (lexer, "LABEL"))
5828 lex_match (lexer, T_EQUALS);
5829 if (!lex_force_string (lexer))
5832 for (size_t i = 0; i < n_pcs; i++)
5834 free (pcs[i]->label);
5835 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5840 else if (lex_match_id (lexer, "FORMAT"))
5842 lex_match (lexer, T_EQUALS);
5844 struct ctables_summary_spec_set sss;
5845 if (!ctables_parse_pproperties_format (lexer, &sss))
5848 for (size_t i = 0; i < n_pcs; i++)
5851 ctables_summary_spec_set_uninit (pcs[i]->specs);
5853 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5854 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5856 ctables_summary_spec_set_uninit (&sss);
5858 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5860 lex_match (lexer, T_EQUALS);
5861 bool hide_source_cats;
5862 if (!parse_bool (lexer, &hide_source_cats))
5864 for (size_t i = 0; i < n_pcs; i++)
5865 pcs[i]->hide_source_cats = hide_source_cats;
5869 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5882 put_strftime (struct string *out, time_t now, const char *format)
5884 const struct tm *tm = localtime (&now);
5886 strftime (value, sizeof value, format, tm);
5887 ds_put_cstr (out, value);
5891 skip_prefix (struct substring *s, struct substring prefix)
5893 if (ss_starts_with (*s, prefix))
5895 ss_advance (s, prefix.length);
5903 put_table_expression (struct string *out, struct lexer *lexer,
5904 struct dictionary *dict, int expr_start, int expr_end)
5907 for (int ofs = expr_start; ofs < expr_end; ofs++)
5909 const struct token *t = lex_ofs_token (lexer, ofs);
5910 if (t->type == T_LBRACK)
5912 else if (t->type == T_RBRACK && nest > 0)
5918 else if (t->type == T_ID)
5920 const struct variable *var
5921 = dict_lookup_var (dict, t->string.string);
5922 const char *label = var ? var_get_label (var) : NULL;
5923 ds_put_cstr (out, label ? label : t->string.string);
5927 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5928 ds_put_byte (out, ' ');
5930 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5931 ds_put_cstr (out, repr);
5934 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5935 ds_put_byte (out, ' ');
5941 put_title_text (struct string *out, struct substring in, time_t now,
5942 struct lexer *lexer, struct dictionary *dict,
5943 int expr_start, int expr_end)
5947 size_t chunk = ss_find_byte (in, ')');
5948 ds_put_substring (out, ss_head (in, chunk));
5949 ss_advance (&in, chunk);
5950 if (ss_is_empty (in))
5953 if (skip_prefix (&in, ss_cstr (")DATE")))
5954 put_strftime (out, now, "%x");
5955 else if (skip_prefix (&in, ss_cstr (")TIME")))
5956 put_strftime (out, now, "%X");
5957 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5958 put_table_expression (out, lexer, dict, expr_start, expr_end);
5961 ds_put_byte (out, ')');
5962 ss_advance (&in, 1);
5968 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5970 struct casereader *input = NULL;
5972 struct measure_guesser *mg = measure_guesser_create (ds);
5975 input = proc_open (ds);
5976 measure_guesser_run (mg, input);
5977 measure_guesser_destroy (mg);
5980 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5981 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5982 enum settings_value_show tvars = settings_get_show_variables ();
5983 for (size_t i = 0; i < n_vars; i++)
5984 vlabels[i] = (enum ctables_vlabel) tvars;
5986 struct pivot_table_look *look = pivot_table_look_unshare (
5987 pivot_table_look_ref (pivot_table_look_get_default ()));
5988 look->omit_empty = false;
5990 struct ctables *ct = xmalloc (sizeof *ct);
5991 *ct = (struct ctables) {
5992 .dict = dataset_dict (ds),
5994 .ctables_formats = FMT_SETTINGS_INIT,
5996 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5999 time_t now = time (NULL);
6004 const char *dot_string;
6005 const char *comma_string;
6007 static const struct ctf ctfs[4] = {
6008 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6009 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6010 { CTEF_PAREN, "-,(,),", "-.(.)." },
6011 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6013 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6014 for (size_t i = 0; i < 4; i++)
6016 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6017 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6018 fmt_number_style_from_string (s));
6021 if (!lex_force_match (lexer, T_SLASH))
6024 while (!lex_match_id (lexer, "TABLE"))
6026 if (lex_match_id (lexer, "FORMAT"))
6028 double widths[2] = { SYSMIS, SYSMIS };
6029 double units_per_inch = 72.0;
6031 while (lex_token (lexer) != T_SLASH)
6033 if (lex_match_id (lexer, "MINCOLWIDTH"))
6035 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6038 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6040 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6043 else if (lex_match_id (lexer, "UNITS"))
6045 lex_match (lexer, T_EQUALS);
6046 if (lex_match_id (lexer, "POINTS"))
6047 units_per_inch = 72.0;
6048 else if (lex_match_id (lexer, "INCHES"))
6049 units_per_inch = 1.0;
6050 else if (lex_match_id (lexer, "CM"))
6051 units_per_inch = 2.54;
6054 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6058 else if (lex_match_id (lexer, "EMPTY"))
6063 lex_match (lexer, T_EQUALS);
6064 if (lex_match_id (lexer, "ZERO"))
6066 /* Nothing to do. */
6068 else if (lex_match_id (lexer, "BLANK"))
6069 ct->zero = xstrdup ("");
6070 else if (lex_force_string (lexer))
6072 ct->zero = ss_xstrdup (lex_tokss (lexer));
6078 else if (lex_match_id (lexer, "MISSING"))
6080 lex_match (lexer, T_EQUALS);
6081 if (!lex_force_string (lexer))
6085 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6086 ? ss_xstrdup (lex_tokss (lexer))
6092 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6093 "UNITS", "EMPTY", "MISSING");
6098 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6099 && widths[0] > widths[1])
6101 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6105 for (size_t i = 0; i < 2; i++)
6106 if (widths[i] != SYSMIS)
6108 int *wr = ct->look->width_ranges[TABLE_HORZ];
6109 wr[i] = widths[i] / units_per_inch * 96.0;
6114 else if (lex_match_id (lexer, "VLABELS"))
6116 if (!lex_force_match_id (lexer, "VARIABLES"))
6118 lex_match (lexer, T_EQUALS);
6120 struct variable **vars;
6122 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6126 if (!lex_force_match_id (lexer, "DISPLAY"))
6131 lex_match (lexer, T_EQUALS);
6133 enum ctables_vlabel vlabel;
6134 if (lex_match_id (lexer, "DEFAULT"))
6135 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6136 else if (lex_match_id (lexer, "NAME"))
6138 else if (lex_match_id (lexer, "LABEL"))
6139 vlabel = CTVL_LABEL;
6140 else if (lex_match_id (lexer, "BOTH"))
6142 else if (lex_match_id (lexer, "NONE"))
6146 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6152 for (size_t i = 0; i < n_vars; i++)
6153 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6156 else if (lex_match_id (lexer, "MRSETS"))
6158 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6160 lex_match (lexer, T_EQUALS);
6161 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6164 else if (lex_match_id (lexer, "SMISSING"))
6166 if (lex_match_id (lexer, "VARIABLE"))
6167 ct->smissing_listwise = false;
6168 else if (lex_match_id (lexer, "LISTWISE"))
6169 ct->smissing_listwise = true;
6172 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6176 else if (lex_match_id (lexer, "PCOMPUTE"))
6178 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6181 else if (lex_match_id (lexer, "PPROPERTIES"))
6183 if (!ctables_parse_pproperties (lexer, ct))
6186 else if (lex_match_id (lexer, "WEIGHT"))
6188 if (!lex_force_match_id (lexer, "VARIABLE"))
6190 lex_match (lexer, T_EQUALS);
6191 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6195 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6197 if (lex_match_id (lexer, "COUNT"))
6199 lex_match (lexer, T_EQUALS);
6200 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6203 ct->hide_threshold = lex_integer (lexer);
6206 else if (ct->hide_threshold == 0)
6207 ct->hide_threshold = 5;
6211 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6212 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6213 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6217 if (!lex_force_match (lexer, T_SLASH))
6221 size_t allocated_tables = 0;
6224 if (ct->n_tables >= allocated_tables)
6225 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6226 sizeof *ct->tables);
6228 struct ctables_category *cat = xmalloc (sizeof *cat);
6229 *cat = (struct ctables_category) {
6231 .include_missing = false,
6232 .sort_ascending = true,
6235 struct ctables_categories *c = xmalloc (sizeof *c);
6236 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6237 *c = (struct ctables_categories) {
6244 struct ctables_categories **categories = xnmalloc (n_vars,
6245 sizeof *categories);
6246 for (size_t i = 0; i < n_vars; i++)
6249 struct ctables_table *t = xmalloc (sizeof *t);
6250 *t = (struct ctables_table) {
6252 .slabels_axis = PIVOT_AXIS_COLUMN,
6253 .slabels_visible = true,
6254 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6256 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6257 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6258 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6260 .clabels_from_axis = PIVOT_AXIS_LAYER,
6261 .clabels_to_axis = PIVOT_AXIS_LAYER,
6262 .categories = categories,
6263 .n_categories = n_vars,
6266 ct->tables[ct->n_tables++] = t;
6268 lex_match (lexer, T_EQUALS);
6269 int expr_start = lex_ofs (lexer);
6270 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6272 if (lex_match (lexer, T_BY))
6274 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6275 ct, t, PIVOT_AXIS_COLUMN))
6278 if (lex_match (lexer, T_BY))
6280 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6281 ct, t, PIVOT_AXIS_LAYER))
6285 int expr_end = lex_ofs (lexer);
6287 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6288 && !t->axes[PIVOT_AXIS_LAYER])
6290 lex_error (lexer, _("At least one variable must be specified."));
6294 const struct ctables_axis *scales[PIVOT_N_AXES];
6295 size_t n_scales = 0;
6296 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6298 scales[a] = find_scale (t->axes[a]);
6304 msg (SE, _("Scale variables may appear only on one axis."));
6305 if (scales[PIVOT_AXIS_ROW])
6306 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6307 _("This scale variable appears on the rows axis."));
6308 if (scales[PIVOT_AXIS_COLUMN])
6309 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6310 _("This scale variable appears on the columns axis."));
6311 if (scales[PIVOT_AXIS_LAYER])
6312 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6313 _("This scale variable appears on the layer axis."));
6317 const struct ctables_axis *summaries[PIVOT_N_AXES];
6318 size_t n_summaries = 0;
6319 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6321 summaries[a] = (scales[a]
6323 : find_categorical_summary_spec (t->axes[a]));
6327 if (n_summaries > 1)
6329 msg (SE, _("Summaries may appear only on one axis."));
6330 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6333 msg_at (SN, summaries[a]->loc,
6335 ? _("This variable on the rows axis has a summary.")
6336 : a == PIVOT_AXIS_COLUMN
6337 ? _("This variable on the columns axis has a summary.")
6338 : _("This variable on the layers axis has a summary."));
6340 msg_at (SN, summaries[a]->loc,
6341 _("This is a scale variable, so it always has a "
6342 "summary even if the syntax does not explicitly "
6347 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6348 if (n_summaries ? summaries[a] : t->axes[a])
6350 t->summary_axis = a;
6354 if (lex_token (lexer) == T_ENDCMD)
6356 if (!ctables_prepare_table (t))
6360 if (!lex_force_match (lexer, T_SLASH))
6363 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6365 if (lex_match_id (lexer, "SLABELS"))
6367 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6369 if (lex_match_id (lexer, "POSITION"))
6371 lex_match (lexer, T_EQUALS);
6372 if (lex_match_id (lexer, "COLUMN"))
6373 t->slabels_axis = PIVOT_AXIS_COLUMN;
6374 else if (lex_match_id (lexer, "ROW"))
6375 t->slabels_axis = PIVOT_AXIS_ROW;
6376 else if (lex_match_id (lexer, "LAYER"))
6377 t->slabels_axis = PIVOT_AXIS_LAYER;
6380 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6384 else if (lex_match_id (lexer, "VISIBLE"))
6386 lex_match (lexer, T_EQUALS);
6387 if (!parse_bool (lexer, &t->slabels_visible))
6392 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6397 else if (lex_match_id (lexer, "CLABELS"))
6399 if (lex_match_id (lexer, "AUTO"))
6401 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6402 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6404 else if (lex_match_id (lexer, "ROWLABELS"))
6406 lex_match (lexer, T_EQUALS);
6407 if (lex_match_id (lexer, "OPPOSITE"))
6408 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6409 else if (lex_match_id (lexer, "LAYER"))
6410 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6413 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6417 else if (lex_match_id (lexer, "COLLABELS"))
6419 lex_match (lexer, T_EQUALS);
6420 if (lex_match_id (lexer, "OPPOSITE"))
6421 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6422 else if (lex_match_id (lexer, "LAYER"))
6423 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6426 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6432 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6437 else if (lex_match_id (lexer, "CRITERIA"))
6439 if (!lex_force_match_id (lexer, "CILEVEL"))
6441 lex_match (lexer, T_EQUALS);
6443 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6445 t->cilevel = lex_number (lexer);
6448 else if (lex_match_id (lexer, "CATEGORIES"))
6450 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6454 else if (lex_match_id (lexer, "TITLES"))
6459 if (lex_match_id (lexer, "CAPTION"))
6460 textp = &t->caption;
6461 else if (lex_match_id (lexer, "CORNER"))
6463 else if (lex_match_id (lexer, "TITLE"))
6467 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6470 lex_match (lexer, T_EQUALS);
6472 struct string s = DS_EMPTY_INITIALIZER;
6473 while (lex_is_string (lexer))
6475 if (!ds_is_empty (&s))
6476 ds_put_byte (&s, ' ');
6477 put_title_text (&s, lex_tokss (lexer), now,
6478 lexer, dataset_dict (ds),
6479 expr_start, expr_end);
6483 *textp = ds_steal_cstr (&s);
6485 while (lex_token (lexer) != T_SLASH
6486 && lex_token (lexer) != T_ENDCMD);
6488 else if (lex_match_id (lexer, "SIGTEST"))
6492 t->chisq = xmalloc (sizeof *t->chisq);
6493 *t->chisq = (struct ctables_chisq) {
6495 .include_mrsets = true,
6496 .all_visible = true,
6502 if (lex_match_id (lexer, "TYPE"))
6504 lex_match (lexer, T_EQUALS);
6505 if (!lex_force_match_id (lexer, "CHISQUARE"))
6508 else if (lex_match_id (lexer, "ALPHA"))
6510 lex_match (lexer, T_EQUALS);
6511 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6513 t->chisq->alpha = lex_number (lexer);
6516 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6518 lex_match (lexer, T_EQUALS);
6519 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6522 else if (lex_match_id (lexer, "CATEGORIES"))
6524 lex_match (lexer, T_EQUALS);
6525 if (lex_match_id (lexer, "ALLVISIBLE"))
6526 t->chisq->all_visible = true;
6527 else if (lex_match_id (lexer, "SUBTOTALS"))
6528 t->chisq->all_visible = false;
6531 lex_error_expecting (lexer,
6532 "ALLVISIBLE", "SUBTOTALS");
6538 lex_error_expecting (lexer, "TYPE", "ALPHA",
6539 "INCLUDEMRSETS", "CATEGORIES");
6543 while (lex_token (lexer) != T_SLASH
6544 && lex_token (lexer) != T_ENDCMD);
6546 else if (lex_match_id (lexer, "COMPARETEST"))
6550 t->pairwise = xmalloc (sizeof *t->pairwise);
6551 *t->pairwise = (struct ctables_pairwise) {
6553 .alpha = { .05, .05 },
6554 .adjust = BONFERRONI,
6555 .include_mrsets = true,
6556 .meansvariance_allcats = true,
6557 .all_visible = true,
6566 if (lex_match_id (lexer, "TYPE"))
6568 lex_match (lexer, T_EQUALS);
6569 if (lex_match_id (lexer, "PROP"))
6570 t->pairwise->type = PROP;
6571 else if (lex_match_id (lexer, "MEAN"))
6572 t->pairwise->type = MEAN;
6575 lex_error_expecting (lexer, "PROP", "MEAN");
6579 else if (lex_match_id (lexer, "ALPHA"))
6581 lex_match (lexer, T_EQUALS);
6583 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6585 double a0 = lex_number (lexer);
6588 lex_match (lexer, T_COMMA);
6589 if (lex_is_number (lexer))
6591 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6593 double a1 = lex_number (lexer);
6596 t->pairwise->alpha[0] = MIN (a0, a1);
6597 t->pairwise->alpha[1] = MAX (a0, a1);
6600 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6602 else if (lex_match_id (lexer, "ADJUST"))
6604 lex_match (lexer, T_EQUALS);
6605 if (lex_match_id (lexer, "BONFERRONI"))
6606 t->pairwise->adjust = BONFERRONI;
6607 else if (lex_match_id (lexer, "BH"))
6608 t->pairwise->adjust = BH;
6609 else if (lex_match_id (lexer, "NONE"))
6610 t->pairwise->adjust = 0;
6613 lex_error_expecting (lexer, "BONFERRONI", "BH",
6618 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6620 lex_match (lexer, T_EQUALS);
6621 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6624 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6626 lex_match (lexer, T_EQUALS);
6627 if (lex_match_id (lexer, "ALLCATS"))
6628 t->pairwise->meansvariance_allcats = true;
6629 else if (lex_match_id (lexer, "TESTEDCATS"))
6630 t->pairwise->meansvariance_allcats = false;
6633 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6637 else if (lex_match_id (lexer, "CATEGORIES"))
6639 lex_match (lexer, T_EQUALS);
6640 if (lex_match_id (lexer, "ALLVISIBLE"))
6641 t->pairwise->all_visible = true;
6642 else if (lex_match_id (lexer, "SUBTOTALS"))
6643 t->pairwise->all_visible = false;
6646 lex_error_expecting (lexer, "ALLVISIBLE",
6651 else if (lex_match_id (lexer, "MERGE"))
6653 lex_match (lexer, T_EQUALS);
6654 if (!parse_bool (lexer, &t->pairwise->merge))
6657 else if (lex_match_id (lexer, "STYLE"))
6659 lex_match (lexer, T_EQUALS);
6660 if (lex_match_id (lexer, "APA"))
6661 t->pairwise->apa_style = true;
6662 else if (lex_match_id (lexer, "SIMPLE"))
6663 t->pairwise->apa_style = false;
6666 lex_error_expecting (lexer, "APA", "SIMPLE");
6670 else if (lex_match_id (lexer, "SHOWSIG"))
6672 lex_match (lexer, T_EQUALS);
6673 if (!parse_bool (lexer, &t->pairwise->show_sig))
6678 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6679 "INCLUDEMRSETS", "MEANSVARIANCE",
6680 "CATEGORIES", "MERGE", "STYLE",
6685 while (lex_token (lexer) != T_SLASH
6686 && lex_token (lexer) != T_ENDCMD);
6690 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6691 "CRITERIA", "CATEGORIES", "TITLES",
6692 "SIGTEST", "COMPARETEST");
6696 if (!lex_match (lexer, T_SLASH))
6700 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6702 t->clabels_from_axis = PIVOT_AXIS_ROW;
6703 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6705 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6709 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6710 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6711 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6713 if (!ctables_prepare_table (t))
6716 while (lex_token (lexer) != T_ENDCMD);
6719 input = proc_open (ds);
6720 bool ok = ctables_execute (ds, input, ct);
6721 ok = proc_commit (ds) && ok;
6723 ctables_destroy (ct);
6724 return ok ? CMD_SUCCESS : CMD_FAILURE;
6729 ctables_destroy (ct);