1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_weighting
74 enum ctables_function_type
76 /* A function that operates on data in a single cell. It operates on
77 effective weights. It does not have an unweighted version. */
80 /* A function that operates on data in a single cell. The function
81 operates on effective weights and has a U-prefixed unweighted
85 /* A function that operates on data in a single cell. It operates on
86 dictionary weights, and has U-prefixed unweighted version and an
87 E-prefixed effective weight version. */
90 /* A function that operates on an area of cells. It operates on effective
91 weights and has a U-prefixed unweighted version. */
102 enum ctables_function_availability
104 CTFA_ALL, /* Any variables. */
105 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
106 //CTFA_MRSETS, /* Only multiple-response sets */
109 enum ctables_summary_function
111 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
112 #include "ctables.inc"
117 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
119 #include "ctables.inc"
123 struct ctables_function_info
125 struct substring basename;
126 enum ctables_function_type type;
127 enum ctables_format format;
128 enum ctables_function_availability availability;
130 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
131 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
132 bool is_area; /* Needs an area prefix. */
134 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
135 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
137 .basename = SS_LITERAL_INITIALIZER (NAME), \
140 .availability = AVAILABILITY, \
141 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
142 .e_prefix = (TYPE) == CTFT_UECELL, \
143 .is_area = (TYPE) == CTFT_AREA \
145 #include "ctables.inc"
149 enum ctables_area_type
151 /* Within a section, where stacked variables divide one section from
154 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
155 parse_ctables_summary_function() parses correctly. */
156 CTAT_TABLE, /* All layers of a whole section. */
157 CTAT_LAYERROW, /* Row in one layer within a section. */
158 CTAT_LAYERCOL, /* Column in one layer within a section. */
159 CTAT_LAYER, /* One layer within a section. */
161 /* Within a subtable, where a subtable pairs an innermost row variable with
162 an innermost column variable within a single layer. */
163 CTAT_SUBTABLE, /* Whole subtable. */
164 CTAT_ROW, /* Row within a subtable. */
165 CTAT_COL, /* Column within a subtable. */
169 static const char *ctables_area_type_name[N_CTATS] = {
170 [CTAT_TABLE] = "TABLE",
171 [CTAT_LAYER] = "LAYER",
172 [CTAT_LAYERROW] = "LAYERROW",
173 [CTAT_LAYERCOL] = "LAYERCOL",
174 [CTAT_SUBTABLE] = "SUBTABLE",
181 struct hmap_node node;
183 const struct ctables_cell *example;
186 double count[N_CTWS];
187 double valid[N_CTWS];
188 double total[N_CTWS];
189 struct ctables_sum *sums;
197 enum ctables_summary_variant
206 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
207 all the axes (except the scalar variable, if any). */
208 struct hmap_node node;
210 /* The areas that contain this cell. */
212 struct ctables_area *areas[N_CTATS];
217 enum ctables_summary_variant sv;
219 struct ctables_cell_axis
221 struct ctables_cell_value
223 const struct ctables_category *category;
231 union ctables_summary *summaries;
236 const struct dictionary *dict;
237 struct pivot_table_look *look;
239 /* CTABLES has a number of extra formats that we implement via custom
240 currency specifications on an alternate fmt_settings. */
241 #define CTEF_NEGPAREN FMT_CCA
242 #define CTEF_NEQUAL FMT_CCB
243 #define CTEF_PAREN FMT_CCC
244 #define CTEF_PCTPAREN FMT_CCD
245 struct fmt_settings ctables_formats;
247 /* If this is NULL, zeros are displayed using the normal print format.
248 Otherwise, this string is displayed. */
251 /* If this is NULL, missing values are displayed using the normal print
252 format. Otherwise, this string is displayed. */
255 /* Indexed by variable dictionary index. */
256 enum ctables_vlabel *vlabels;
258 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
260 bool mrsets_count_duplicates; /* MRSETS. */
261 bool smissing_listwise; /* SMISSING. */
262 struct variable *e_weight; /* WEIGHT. */
263 int hide_threshold; /* HIDESMALLCOUNTS. */
265 struct ctables_table **tables;
269 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
272 struct ctables_postcompute
274 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
275 char *name; /* Name, without leading &. */
277 struct msg_location *location; /* Location of definition. */
278 struct ctables_pcexpr *expr;
280 struct ctables_summary_spec_set *specs;
281 bool hide_source_cats;
284 struct ctables_pcexpr
294 enum ctables_postcompute_op
297 CTPO_CONSTANT, /* 5 */
298 CTPO_CAT_NUMBER, /* [5] */
299 CTPO_CAT_STRING, /* ["STRING"] */
300 CTPO_CAT_NRANGE, /* [LO THRU 5] */
301 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
302 CTPO_CAT_MISSING, /* MISSING */
303 CTPO_CAT_OTHERNM, /* OTHERNM */
304 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
305 CTPO_CAT_TOTAL, /* TOTAL */
319 /* CTPO_CAT_NUMBER. */
322 /* CTPO_CAT_STRING, in dictionary encoding. */
323 struct substring string;
325 /* CTPO_CAT_NRANGE. */
328 /* CTPO_CAT_SRANGE. */
329 struct substring srange[2];
331 /* CTPO_CAT_SUBTOTAL. */
332 size_t subtotal_index;
334 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
335 One element: CTPO_NEG. */
336 struct ctables_pcexpr *subs[2];
339 /* Source location. */
340 struct msg_location *location;
343 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
344 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
345 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
346 struct ctables_pcexpr *sub1);
348 struct ctables_summary_spec_set
350 struct ctables_summary_spec *specs;
354 /* The variable to which the summary specs are applied. */
355 struct variable *var;
357 /* Whether the variable to which the summary specs are applied is a scale
358 variable for the purpose of summarization.
360 (VALIDN and TOTALN act differently for summarizing scale and categorical
364 /* If any of these optional additional scale variables are missing, then
365 treat 'var' as if it's missing too. This is for implementing
366 SMISSING=LISTWISE. */
367 struct variable **listwise_vars;
368 size_t n_listwise_vars;
371 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
372 const struct ctables_summary_spec_set *);
373 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
375 /* A nested sequence of variables, e.g. a > b > c. */
378 struct variable **vars;
382 size_t *areas[N_CTATS];
383 size_t n_areas[N_CTATS];
386 struct ctables_summary_spec_set specs[N_CSVS];
389 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
392 struct ctables_nest *nests;
396 static void ctables_stack_uninit (struct ctables_stack *);
400 struct hmap_node node;
405 struct ctables_occurrence
407 struct hmap_node node;
411 struct ctables_section
414 struct ctables_table *table;
415 struct ctables_nest *nests[PIVOT_N_AXES];
418 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
419 struct hmap cells; /* Contains "struct ctables_cell"s. */
420 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
423 static void ctables_section_uninit (struct ctables_section *);
427 struct ctables *ctables;
428 struct ctables_axis *axes[PIVOT_N_AXES];
429 struct ctables_stack stacks[PIVOT_N_AXES];
430 struct ctables_section *sections;
432 enum pivot_axis_type summary_axis;
433 struct ctables_summary_spec_set summary_specs;
434 struct variable **sum_vars;
437 enum pivot_axis_type slabels_axis;
438 bool slabels_visible;
440 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
442 Most commonly, label_axis[a] == a, and in particular we always have
443 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
445 If ROWLABELS or COLLABELS is specified, then one of
446 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
447 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
449 If any category labels are moved, then 'clabels_example' is one of the
450 variables being moved (and it is otherwise NULL). All of the variables
451 being moved have the same width, value labels, and categories, so this
452 example variable can be used to find those out.
454 The remaining members in this group are relevant only if category labels
457 'clabels_values_map' holds a "struct ctables_value" for all the values
458 that appear in all of the variables in the moved categories. It is
459 accumulated as the data is read. Once the data is fully read, its
460 sorted values are put into 'clabels_values' and 'n_clabels_values'.
462 enum pivot_axis_type label_axis[PIVOT_N_AXES];
463 enum pivot_axis_type clabels_from_axis;
464 enum pivot_axis_type clabels_to_axis;
465 const struct variable *clabels_example;
466 struct hmap clabels_values_map;
467 struct ctables_value **clabels_values;
468 size_t n_clabels_values;
470 /* Indexed by variable dictionary index. */
471 struct ctables_categories **categories;
480 struct ctables_chisq *chisq;
481 struct ctables_pairwise *pairwise;
484 struct ctables_categories
487 struct ctables_category *cats;
492 struct ctables_category
494 enum ctables_category_type
496 /* Explicit category lists. */
499 CCT_NRANGE, /* Numerical range. */
500 CCT_SRANGE, /* String range. */
505 /* Totals and subtotals. */
509 /* Implicit category lists. */
514 /* For contributing to TOTALN. */
515 CCT_EXCLUDED_MISSING,
519 struct ctables_category *subtotal;
525 double number; /* CCT_NUMBER. */
526 struct substring string; /* CCT_STRING, in dictionary encoding. */
527 double nrange[2]; /* CCT_NRANGE. */
528 struct substring srange[2]; /* CCT_SRANGE. */
532 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
533 bool hide_subcategories; /* CCT_SUBTOTAL. */
536 /* CCT_POSTCOMPUTE. */
539 const struct ctables_postcompute *pc;
540 enum fmt_type parse_format;
543 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
546 bool include_missing;
550 enum ctables_summary_function sort_function;
551 enum ctables_weighting weighting;
552 enum ctables_area_type area;
553 struct variable *sort_var;
558 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
559 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
560 struct msg_location *location;
564 ctables_category_uninit (struct ctables_category *cat)
569 msg_location_destroy (cat->location);
576 case CCT_POSTCOMPUTE:
580 ss_dealloc (&cat->string);
584 ss_dealloc (&cat->srange[0]);
585 ss_dealloc (&cat->srange[1]);
590 free (cat->total_label);
598 case CCT_EXCLUDED_MISSING:
604 nullable_substring_equal (const struct substring *a,
605 const struct substring *b)
607 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
611 ctables_category_equal (const struct ctables_category *a,
612 const struct ctables_category *b)
614 if (a->type != b->type)
620 return a->number == b->number;
623 return ss_equals (a->string, b->string);
626 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
629 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
630 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
636 case CCT_POSTCOMPUTE:
637 return a->pc == b->pc;
641 return !strcmp (a->total_label, b->total_label);
646 return (a->include_missing == b->include_missing
647 && a->sort_ascending == b->sort_ascending
648 && a->sort_function == b->sort_function
649 && a->sort_var == b->sort_var
650 && a->percentile == b->percentile);
652 case CCT_EXCLUDED_MISSING:
660 ctables_categories_unref (struct ctables_categories *c)
665 assert (c->n_refs > 0);
669 for (size_t i = 0; i < c->n_cats; i++)
670 ctables_category_uninit (&c->cats[i]);
676 ctables_categories_equal (const struct ctables_categories *a,
677 const struct ctables_categories *b)
679 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
682 for (size_t i = 0; i < a->n_cats; i++)
683 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
689 /* Chi-square test (SIGTEST). */
697 /* Pairwise comparison test (COMPARETEST). */
698 struct ctables_pairwise
700 enum { PROP, MEAN } type;
703 bool meansvariance_allcats;
705 enum { BONFERRONI = 1, BH } adjust;
729 struct variable *var;
731 struct ctables_summary_spec_set specs[N_CSVS];
735 struct ctables_axis *subs[2];
738 struct msg_location *loc;
741 static void ctables_axis_destroy (struct ctables_axis *);
743 struct ctables_summary_spec
745 /* The calculation to be performed.
747 'function' is the function to calculate. 'weighted' specifies whether
748 to use weighted or unweighted data (for functions that do not support a
749 choice, it must be true). 'calc_area' is the area over which the
750 calculation takes place (for functions that target only an individual
751 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
752 percentile between 0 and 100 (for other functions it must be 0). */
753 enum ctables_summary_function function;
754 enum ctables_weighting weighting;
755 enum ctables_area_type calc_area;
756 double percentile; /* CTSF_PTILE only. */
758 /* How to display the result of the calculation.
760 'label' is a user-specified label, NULL if the user didn't specify
763 'user_area' is usually the same as 'calc_area', but when category labels
764 are rotated from one axis to another it swaps rows and columns.
766 'format' is the format for displaying the output. If
767 'is_ctables_format' is true, then 'format.type' is one of the special
768 CTEF_* formats instead of the standard ones. */
770 enum ctables_area_type user_area;
771 struct fmt_spec format;
772 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
779 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
780 const struct ctables_summary_spec *src)
783 dst->label = xstrdup_if_nonnull (src->label);
787 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
794 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
795 const struct ctables_summary_spec_set *src)
797 struct ctables_summary_spec *specs
798 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
799 for (size_t i = 0; i < src->n; i++)
800 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
802 *dst = (struct ctables_summary_spec_set) {
807 .is_scale = src->is_scale,
812 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
814 for (size_t i = 0; i < set->n; i++)
815 ctables_summary_spec_uninit (&set->specs[i]);
816 free (set->listwise_vars);
821 parse_col_width (struct lexer *lexer, const char *name, double *width)
823 lex_match (lexer, T_EQUALS);
824 if (lex_match_id (lexer, "DEFAULT"))
826 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
828 *width = lex_number (lexer);
838 parse_bool (struct lexer *lexer, bool *b)
840 if (lex_match_id (lexer, "NO"))
842 else if (lex_match_id (lexer, "YES"))
846 lex_error_expecting (lexer, "YES", "NO");
852 static enum ctables_function_availability
853 ctables_function_availability (enum ctables_summary_function f)
855 static enum ctables_function_availability availability[] = {
856 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
857 #include "ctables.inc"
861 return availability[f];
865 parse_ctables_summary_function (struct lexer *lexer,
866 enum ctables_summary_function *function,
867 enum ctables_weighting *weighting,
868 enum ctables_area_type *area)
870 if (!lex_force_id (lexer))
873 struct substring name = lex_tokss (lexer);
874 if (ss_ends_with_case (name, ss_cstr (".LCL"))
875 || ss_ends_with_case (name, ss_cstr (".UCL"))
876 || ss_ends_with_case (name, ss_cstr (".SE")))
878 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
879 "is not yet implemented."));
883 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
884 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
886 bool has_area = false;
888 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
889 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
894 if (ss_equals_case (name, ss_cstr ("PCT")))
896 /* Special case where .COUNT suffix is omitted. */
897 *function = CTSF_areaPCT_COUNT;
898 *weighting = CTW_EFFECTIVE;
905 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
907 const struct ctables_function_info *cfi = &ctables_function_info[f];
908 if (ss_equals_case (cfi->basename, name))
911 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
914 *weighting = (e ? CTW_EFFECTIVE
916 : cfi->e_prefix ? CTW_DICTIONARY
923 lex_error (lexer, _("Expecting summary function name."));
928 ctables_axis_destroy (struct ctables_axis *axis)
936 for (size_t i = 0; i < N_CSVS; i++)
937 ctables_summary_spec_set_uninit (&axis->specs[i]);
942 ctables_axis_destroy (axis->subs[0]);
943 ctables_axis_destroy (axis->subs[1]);
946 msg_location_destroy (axis->loc);
950 static struct ctables_axis *
951 ctables_axis_new_nonterminal (enum ctables_axis_op op,
952 struct ctables_axis *sub0,
953 struct ctables_axis *sub1,
954 struct lexer *lexer, int start_ofs)
956 struct ctables_axis *axis = xmalloc (sizeof *axis);
957 *axis = (struct ctables_axis) {
959 .subs = { sub0, sub1 },
960 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
965 struct ctables_axis_parse_ctx
968 struct dictionary *dict;
970 struct ctables_table *t;
973 static struct fmt_spec
974 ctables_summary_default_format (enum ctables_summary_function function,
975 const struct variable *var)
977 static const enum ctables_format default_formats[] = {
978 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
979 #include "ctables.inc"
982 switch (default_formats[function])
985 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
988 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
991 return *var_get_print_format (var);
999 ctables_summary_label__ (const struct ctables_summary_spec *spec)
1001 bool w = spec->weighting != CTW_UNWEIGHTED;
1002 bool d = spec->weighting == CTW_DICTIONARY;
1003 enum ctables_area_type a = spec->user_area;
1004 switch (spec->function)
1007 return (d ? N_("Count")
1008 : w ? N_("Adjusted Count")
1009 : N_("Unweighted Count"));
1011 case CTSF_areaPCT_COUNT:
1014 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1015 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1016 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1017 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1018 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1019 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1020 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1024 case CTSF_areaPCT_VALIDN:
1027 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1028 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1029 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1030 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1031 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1032 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1033 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1037 case CTSF_areaPCT_TOTALN:
1040 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1041 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1042 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1043 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1044 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1045 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1046 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1050 case CTSF_MAXIMUM: return N_("Maximum");
1051 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1052 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
1053 case CTSF_MINIMUM: return N_("Minimum");
1054 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
1055 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
1056 case CTSF_PTILE: NOT_REACHED ();
1057 case CTSF_RANGE: return N_("Range");
1058 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
1059 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
1060 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
1061 case CTSF_TOTALN: return (d ? N_("Total N")
1062 : w ? N_("Adjusted Total N")
1063 : N_("Unweighted Total N"));
1064 case CTSF_VALIDN: return (d ? N_("Valid N")
1065 : w ? N_("Adjusted Valid N")
1066 : N_("Unweighted Valid N"));
1067 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
1068 case CTSF_areaPCT_SUM:
1071 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1072 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1073 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1074 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1075 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1076 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1077 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1084 /* Don't bother translating these: they are for developers only. */
1085 case CTAT_TABLE: return "Table ID";
1086 case CTAT_LAYER: return "Layer ID";
1087 case CTAT_LAYERROW: return "Layer Row ID";
1088 case CTAT_LAYERCOL: return "Layer Column ID";
1089 case CTAT_SUBTABLE: return "Subtable ID";
1090 case CTAT_ROW: return "Row ID";
1091 case CTAT_COL: return "Column ID";
1099 static struct pivot_value *
1100 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1104 if (spec->function == CTSF_PTILE)
1106 double p = spec->percentile;
1107 char *s = (spec->weighting != CTW_UNWEIGHTED
1108 ? xasprintf (_("Percentile %.2f"), p)
1109 : xasprintf (_("Unweighted Percentile %.2f"), p));
1110 return pivot_value_new_user_text_nocopy (s);
1113 return pivot_value_new_text (ctables_summary_label__ (spec));
1117 struct substring in = ss_cstr (spec->label);
1118 struct substring target = ss_cstr (")CILEVEL");
1120 struct string out = DS_EMPTY_INITIALIZER;
1123 size_t chunk = ss_find_substring (in, target);
1124 ds_put_substring (&out, ss_head (in, chunk));
1125 ss_advance (&in, chunk);
1127 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1129 ss_advance (&in, target.length);
1130 ds_put_format (&out, "%g", cilevel);
1136 ctables_summary_function_name (enum ctables_summary_function function,
1137 enum ctables_weighting weighting,
1138 enum ctables_area_type area,
1139 char *buffer, size_t bufsize)
1141 const struct ctables_function_info *cfi = &ctables_function_info[function];
1142 snprintf (buffer, bufsize, "%s%s%s",
1143 (weighting == CTW_UNWEIGHTED ? "U"
1144 : weighting == CTW_DICTIONARY ? ""
1145 : cfi->e_prefix ? "E"
1147 cfi->is_area ? ctables_area_type_name[area] : "",
1148 cfi->basename.string);
1153 add_summary_spec (struct ctables_axis *axis,
1154 enum ctables_summary_function function,
1155 enum ctables_weighting weighting,
1156 enum ctables_area_type area, double percentile,
1157 const char *label, const struct fmt_spec *format,
1158 bool is_ctables_format, const struct msg_location *loc,
1159 enum ctables_summary_variant sv)
1161 if (axis->op == CTAO_VAR)
1163 char function_name[128];
1164 ctables_summary_function_name (function, weighting, area,
1165 function_name, sizeof function_name);
1166 const char *var_name = var_get_name (axis->var);
1167 switch (ctables_function_availability (function))
1171 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1172 "response sets."), function_name);
1173 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1179 if (!axis->scale && sv != CSV_TOTAL)
1182 _("Summary function %s applies only to scale variables."),
1184 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1194 struct ctables_summary_spec_set *set = &axis->specs[sv];
1195 if (set->n >= set->allocated)
1196 set->specs = x2nrealloc (set->specs, &set->allocated,
1197 sizeof *set->specs);
1199 struct ctables_summary_spec *dst = &set->specs[set->n++];
1200 *dst = (struct ctables_summary_spec) {
1201 .function = function,
1202 .weighting = weighting,
1205 .percentile = percentile,
1206 .label = xstrdup_if_nonnull (label),
1207 .format = (format ? *format
1208 : ctables_summary_default_format (function, axis->var)),
1209 .is_ctables_format = is_ctables_format,
1215 for (size_t i = 0; i < 2; i++)
1216 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1217 percentile, label, format, is_ctables_format,
1224 static struct ctables_axis *ctables_axis_parse_stack (
1225 struct ctables_axis_parse_ctx *);
1228 static struct ctables_axis *
1229 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1231 if (lex_match (ctx->lexer, T_LPAREN))
1233 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1234 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1236 ctables_axis_destroy (sub);
1242 if (!lex_force_id (ctx->lexer))
1245 if (lex_tokcstr (ctx->lexer)[0] == '$')
1247 lex_error (ctx->lexer,
1248 _("Multiple response set support not implemented."));
1252 int start_ofs = lex_ofs (ctx->lexer);
1253 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1257 struct ctables_axis *axis = xmalloc (sizeof *axis);
1258 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1260 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1261 : lex_match_phrase (ctx->lexer, "[C]") ? false
1262 : var_get_measure (var) == MEASURE_SCALE);
1263 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1264 lex_ofs (ctx->lexer) - 1);
1265 if (axis->scale && var_is_alpha (var))
1267 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1269 var_get_name (var));
1270 ctables_axis_destroy (axis);
1278 has_digit (const char *s)
1280 return s[strcspn (s, "0123456789")] != '\0';
1284 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1285 bool *is_ctables_format)
1287 char type[FMT_TYPE_LEN_MAX + 1];
1288 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1291 if (!strcasecmp (type, "NEGPAREN"))
1292 format->type = CTEF_NEGPAREN;
1293 else if (!strcasecmp (type, "NEQUAL"))
1294 format->type = CTEF_NEQUAL;
1295 else if (!strcasecmp (type, "PAREN"))
1296 format->type = CTEF_PAREN;
1297 else if (!strcasecmp (type, "PCTPAREN"))
1298 format->type = CTEF_PCTPAREN;
1301 *is_ctables_format = false;
1302 return (parse_format_specifier (lexer, format)
1303 && fmt_check_output (format)
1304 && fmt_check_type_compat (format, VAL_NUMERIC));
1310 lex_next_error (lexer, -1, -1,
1311 _("Output format %s requires width 2 or greater."), type);
1314 else if (format->d > format->w - 1)
1316 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1317 "greater than decimals."), type);
1322 *is_ctables_format = true;
1327 static struct ctables_axis *
1328 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1330 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1331 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1334 enum ctables_summary_variant sv = CSV_CELL;
1337 int start_ofs = lex_ofs (ctx->lexer);
1339 /* Parse function. */
1340 enum ctables_summary_function function;
1341 enum ctables_weighting weighting;
1342 enum ctables_area_type area;
1343 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1347 /* Parse percentile. */
1348 double percentile = 0;
1349 if (function == CTSF_PTILE)
1351 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1353 percentile = lex_number (ctx->lexer);
1354 lex_get (ctx->lexer);
1359 if (lex_is_string (ctx->lexer))
1361 label = ss_xstrdup (lex_tokss (ctx->lexer));
1362 lex_get (ctx->lexer);
1366 struct fmt_spec format;
1367 const struct fmt_spec *formatp;
1368 bool is_ctables_format = false;
1369 if (lex_token (ctx->lexer) == T_ID
1370 && has_digit (lex_tokcstr (ctx->lexer)))
1372 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1373 &is_ctables_format))
1383 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1384 lex_ofs (ctx->lexer) - 1);
1385 add_summary_spec (sub, function, weighting, area, percentile, label,
1386 formatp, is_ctables_format, loc, sv);
1388 msg_location_destroy (loc);
1390 lex_match (ctx->lexer, T_COMMA);
1391 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1393 if (!lex_force_match (ctx->lexer, T_LBRACK))
1397 else if (lex_match (ctx->lexer, T_RBRACK))
1399 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1406 ctables_axis_destroy (sub);
1410 static const struct ctables_axis *
1411 find_scale (const struct ctables_axis *axis)
1415 else if (axis->op == CTAO_VAR)
1416 return axis->scale ? axis : NULL;
1419 for (size_t i = 0; i < 2; i++)
1421 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1429 static const struct ctables_axis *
1430 find_categorical_summary_spec (const struct ctables_axis *axis)
1434 else if (axis->op == CTAO_VAR)
1435 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1438 for (size_t i = 0; i < 2; i++)
1440 const struct ctables_axis *sum
1441 = find_categorical_summary_spec (axis->subs[i]);
1449 static struct ctables_axis *
1450 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1452 int start_ofs = lex_ofs (ctx->lexer);
1453 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1457 while (lex_match (ctx->lexer, T_GT))
1459 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1462 ctables_axis_destroy (lhs);
1466 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1467 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1469 const struct ctables_axis *outer_scale = find_scale (lhs);
1470 const struct ctables_axis *inner_scale = find_scale (rhs);
1471 if (outer_scale && inner_scale)
1473 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1474 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1475 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1476 ctables_axis_destroy (nest);
1480 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1483 msg_at (SE, nest->loc,
1484 _("Summaries may only be requested for categorical variables "
1485 "at the innermost nesting level."));
1486 msg_at (SN, outer_sum->loc,
1487 _("This outer categorical variable has a summary."));
1488 ctables_axis_destroy (nest);
1498 static struct ctables_axis *
1499 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1501 int start_ofs = lex_ofs (ctx->lexer);
1502 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1506 while (lex_match (ctx->lexer, T_PLUS))
1508 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1511 ctables_axis_destroy (lhs);
1515 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1516 ctx->lexer, start_ofs);
1523 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1524 struct ctables *ct, struct ctables_table *t,
1525 enum pivot_axis_type a)
1527 if (lex_token (lexer) == T_BY
1528 || lex_token (lexer) == T_SLASH
1529 || lex_token (lexer) == T_ENDCMD)
1532 struct ctables_axis_parse_ctx ctx = {
1538 t->axes[a] = ctables_axis_parse_stack (&ctx);
1539 return t->axes[a] != NULL;
1543 ctables_chisq_destroy (struct ctables_chisq *chisq)
1549 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1555 ctables_table_destroy (struct ctables_table *t)
1560 for (size_t i = 0; i < t->n_sections; i++)
1561 ctables_section_uninit (&t->sections[i]);
1564 for (size_t i = 0; i < t->n_categories; i++)
1565 ctables_categories_unref (t->categories[i]);
1566 free (t->categories);
1568 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1570 ctables_axis_destroy (t->axes[a]);
1571 ctables_stack_uninit (&t->stacks[a]);
1573 free (t->summary_specs.specs);
1575 struct ctables_value *ctv, *next_ctv;
1576 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1577 &t->clabels_values_map)
1579 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1580 hmap_delete (&t->clabels_values_map, &ctv->node);
1583 hmap_destroy (&t->clabels_values_map);
1584 free (t->clabels_values);
1590 ctables_chisq_destroy (t->chisq);
1591 ctables_pairwise_destroy (t->pairwise);
1596 ctables_destroy (struct ctables *ct)
1601 struct ctables_postcompute *pc, *next_pc;
1602 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1606 msg_location_destroy (pc->location);
1607 ctables_pcexpr_destroy (pc->expr);
1611 ctables_summary_spec_set_uninit (pc->specs);
1614 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1617 hmap_destroy (&ct->postcomputes);
1619 fmt_settings_uninit (&ct->ctables_formats);
1620 pivot_table_look_unref (ct->look);
1624 for (size_t i = 0; i < ct->n_tables; i++)
1625 ctables_table_destroy (ct->tables[i]);
1630 static struct ctables_category
1631 cct_nrange (double low, double high)
1633 return (struct ctables_category) {
1635 .nrange = { low, high }
1639 static struct ctables_category
1640 cct_srange (struct substring low, struct substring high)
1642 return (struct ctables_category) {
1644 .srange = { low, high }
1649 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1650 struct ctables_category *cat)
1653 if (lex_match (lexer, T_EQUALS))
1655 if (!lex_force_string (lexer))
1658 total_label = ss_xstrdup (lex_tokss (lexer));
1662 total_label = xstrdup (_("Subtotal"));
1664 *cat = (struct ctables_category) {
1665 .type = CCT_SUBTOTAL,
1666 .hide_subcategories = hide_subcategories,
1667 .total_label = total_label
1672 static struct substring
1673 parse_substring (struct lexer *lexer, struct dictionary *dict)
1675 struct substring s = recode_substring_pool (
1676 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1677 ss_rtrim (&s, ss_cstr (" "));
1683 ctables_table_parse_explicit_category (struct lexer *lexer,
1684 struct dictionary *dict,
1686 struct ctables_category *cat)
1688 if (lex_match_id (lexer, "OTHERNM"))
1689 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1690 else if (lex_match_id (lexer, "MISSING"))
1691 *cat = (struct ctables_category) { .type = CCT_MISSING };
1692 else if (lex_match_id (lexer, "SUBTOTAL"))
1693 return ctables_table_parse_subtotal (lexer, false, cat);
1694 else if (lex_match_id (lexer, "HSUBTOTAL"))
1695 return ctables_table_parse_subtotal (lexer, true, cat);
1696 else if (lex_match_id (lexer, "LO"))
1698 if (!lex_force_match_id (lexer, "THRU"))
1700 if (lex_is_string (lexer))
1702 struct substring sr0 = { .string = NULL };
1703 struct substring sr1 = parse_substring (lexer, dict);
1704 *cat = cct_srange (sr0, sr1);
1706 else if (lex_force_num (lexer))
1708 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1714 else if (lex_is_number (lexer))
1716 double number = lex_number (lexer);
1718 if (lex_match_id (lexer, "THRU"))
1720 if (lex_match_id (lexer, "HI"))
1721 *cat = cct_nrange (number, DBL_MAX);
1724 if (!lex_force_num (lexer))
1726 *cat = cct_nrange (number, lex_number (lexer));
1731 *cat = (struct ctables_category) {
1736 else if (lex_is_string (lexer))
1738 struct substring s = parse_substring (lexer, dict);
1739 if (lex_match_id (lexer, "THRU"))
1741 if (lex_match_id (lexer, "HI"))
1743 struct substring sr1 = { .string = NULL };
1744 *cat = cct_srange (s, sr1);
1748 if (!lex_force_string (lexer))
1753 struct substring sr1 = parse_substring (lexer, dict);
1754 *cat = cct_srange (s, sr1);
1758 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1760 else if (lex_match (lexer, T_AND))
1762 if (!lex_force_id (lexer))
1764 struct ctables_postcompute *pc = ctables_find_postcompute (
1765 ct, lex_tokcstr (lexer));
1768 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1769 msg_at (SE, loc, _("Unknown postcompute &%s."),
1770 lex_tokcstr (lexer));
1771 msg_location_destroy (loc);
1776 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1780 lex_error (lexer, NULL);
1788 parse_category_string (struct msg_location *location,
1789 struct substring s, const struct dictionary *dict,
1790 enum fmt_type format, double *n)
1793 char *error = data_in (s, dict_get_encoding (dict), format,
1794 settings_get_fmt_settings (), &v, 0, NULL);
1797 msg_at (SE, location,
1798 _("Failed to parse category specification as format %s: %s."),
1799 fmt_name (format), error);
1808 static struct ctables_category *
1809 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1810 const struct ctables_pcexpr *e)
1812 struct ctables_category *best = NULL;
1813 size_t n_subtotals = 0;
1814 for (size_t i = 0; i < cats->n_cats; i++)
1816 struct ctables_category *cat = &cats->cats[i];
1819 case CTPO_CAT_NUMBER:
1820 if (cat->type == CCT_NUMBER && cat->number == e->number)
1824 case CTPO_CAT_STRING:
1825 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1829 case CTPO_CAT_NRANGE:
1830 if (cat->type == CCT_NRANGE
1831 && cat->nrange[0] == e->nrange[0]
1832 && cat->nrange[1] == e->nrange[1])
1836 case CTPO_CAT_SRANGE:
1837 if (cat->type == CCT_SRANGE
1838 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1839 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1843 case CTPO_CAT_MISSING:
1844 if (cat->type == CCT_MISSING)
1848 case CTPO_CAT_OTHERNM:
1849 if (cat->type == CCT_OTHERNM)
1853 case CTPO_CAT_SUBTOTAL:
1854 if (cat->type == CCT_SUBTOTAL)
1857 if (e->subtotal_index == n_subtotals)
1859 else if (e->subtotal_index == 0)
1864 case CTPO_CAT_TOTAL:
1865 if (cat->type == CCT_TOTAL)
1879 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1884 static struct ctables_category *
1885 ctables_find_category_for_postcompute (const struct dictionary *dict,
1886 const struct ctables_categories *cats,
1887 enum fmt_type parse_format,
1888 const struct ctables_pcexpr *e)
1890 if (parse_format != FMT_F)
1892 if (e->op == CTPO_CAT_STRING)
1895 if (!parse_category_string (e->location, e->string, dict,
1896 parse_format, &number))
1899 struct ctables_pcexpr e2 = {
1900 .op = CTPO_CAT_NUMBER,
1902 .location = e->location,
1904 return ctables_find_category_for_postcompute__ (cats, &e2);
1906 else if (e->op == CTPO_CAT_SRANGE)
1909 if (!e->srange[0].string)
1910 nrange[0] = -DBL_MAX;
1911 else if (!parse_category_string (e->location, e->srange[0], dict,
1912 parse_format, &nrange[0]))
1915 if (!e->srange[1].string)
1916 nrange[1] = DBL_MAX;
1917 else if (!parse_category_string (e->location, e->srange[1], dict,
1918 parse_format, &nrange[1]))
1921 struct ctables_pcexpr e2 = {
1922 .op = CTPO_CAT_NRANGE,
1923 .nrange = { nrange[0], nrange[1] },
1924 .location = e->location,
1926 return ctables_find_category_for_postcompute__ (cats, &e2);
1929 return ctables_find_category_for_postcompute__ (cats, e);
1933 ctables_recursive_check_postcompute (struct dictionary *dict,
1934 const struct ctables_pcexpr *e,
1935 struct ctables_category *pc_cat,
1936 const struct ctables_categories *cats,
1937 const struct msg_location *cats_location)
1941 case CTPO_CAT_NUMBER:
1942 case CTPO_CAT_STRING:
1943 case CTPO_CAT_NRANGE:
1944 case CTPO_CAT_SRANGE:
1945 case CTPO_CAT_MISSING:
1946 case CTPO_CAT_OTHERNM:
1947 case CTPO_CAT_SUBTOTAL:
1948 case CTPO_CAT_TOTAL:
1950 struct ctables_category *cat = ctables_find_category_for_postcompute (
1951 dict, cats, pc_cat->parse_format, e);
1954 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1956 size_t n_subtotals = 0;
1957 for (size_t i = 0; i < cats->n_cats; i++)
1958 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1959 if (n_subtotals > 1)
1961 msg_at (SE, cats_location,
1962 ngettext ("These categories include %zu instance "
1963 "of SUBTOTAL or HSUBTOTAL, so references "
1964 "from computed categories must refer to "
1965 "subtotals by position, "
1966 "e.g. SUBTOTAL[1].",
1967 "These categories include %zu instances "
1968 "of SUBTOTAL or HSUBTOTAL, so references "
1969 "from computed categories must refer to "
1970 "subtotals by position, "
1971 "e.g. SUBTOTAL[1].",
1974 msg_at (SN, e->location,
1975 _("This is the reference that lacks a position."));
1980 msg_at (SE, pc_cat->location,
1981 _("Computed category &%s references a category not included "
1982 "in the category list."),
1984 msg_at (SN, e->location, _("This is the missing category."));
1985 if (e->op == CTPO_CAT_SUBTOTAL)
1986 msg_at (SN, cats_location,
1987 _("To fix the problem, add subtotals to the "
1988 "list of categories here."));
1989 else if (e->op == CTPO_CAT_TOTAL)
1990 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1991 "CATEGORIES specification."));
1993 msg_at (SN, cats_location,
1994 _("To fix the problem, add the missing category to the "
1995 "list of categories here."));
1998 if (pc_cat->pc->hide_source_cats)
2012 for (size_t i = 0; i < 2; i++)
2013 if (e->subs[i] && !ctables_recursive_check_postcompute (
2014 dict, e->subs[i], pc_cat, cats, cats_location))
2023 all_strings (struct variable **vars, size_t n_vars,
2024 const struct ctables_category *cat)
2026 for (size_t j = 0; j < n_vars; j++)
2027 if (var_is_numeric (vars[j]))
2029 msg_at (SE, cat->location,
2030 _("This category specification may be applied only to string "
2031 "variables, but this subcommand tries to apply it to "
2032 "numeric variable %s."),
2033 var_get_name (vars[j]));
2040 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2041 struct ctables *ct, struct ctables_table *t)
2043 if (!lex_match_id (lexer, "VARIABLES"))
2045 lex_match (lexer, T_EQUALS);
2047 struct variable **vars;
2049 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2052 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2053 for (size_t i = 1; i < n_vars; i++)
2055 const struct fmt_spec *f = var_get_print_format (vars[i]);
2056 if (f->type != common_format->type)
2058 common_format = NULL;
2064 && (fmt_get_category (common_format->type)
2065 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2067 struct ctables_categories *c = xmalloc (sizeof *c);
2068 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2069 for (size_t i = 0; i < n_vars; i++)
2071 struct ctables_categories **cp
2072 = &t->categories[var_get_dict_index (vars[i])];
2073 ctables_categories_unref (*cp);
2077 size_t allocated_cats = 0;
2078 int cats_start_ofs = -1;
2079 int cats_end_ofs = -1;
2080 if (lex_match (lexer, T_LBRACK))
2082 cats_start_ofs = lex_ofs (lexer);
2085 if (c->n_cats >= allocated_cats)
2086 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2088 int start_ofs = lex_ofs (lexer);
2089 struct ctables_category *cat = &c->cats[c->n_cats];
2090 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2092 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2095 lex_match (lexer, T_COMMA);
2097 while (!lex_match (lexer, T_RBRACK));
2098 cats_end_ofs = lex_ofs (lexer) - 1;
2101 struct ctables_category cat = {
2103 .include_missing = false,
2104 .sort_ascending = true,
2106 bool show_totals = false;
2107 char *total_label = NULL;
2108 bool totals_before = false;
2109 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2111 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2113 lex_match (lexer, T_EQUALS);
2114 if (lex_match_id (lexer, "A"))
2115 cat.sort_ascending = true;
2116 else if (lex_match_id (lexer, "D"))
2117 cat.sort_ascending = false;
2120 lex_error_expecting (lexer, "A", "D");
2124 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2126 int start_ofs = lex_ofs (lexer) - 1;
2127 lex_match (lexer, T_EQUALS);
2128 if (lex_match_id (lexer, "VALUE"))
2129 cat.type = CCT_VALUE;
2130 else if (lex_match_id (lexer, "LABEL"))
2131 cat.type = CCT_LABEL;
2134 cat.type = CCT_FUNCTION;
2135 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2136 &cat.weighting, &cat.area))
2139 if (lex_match (lexer, T_LPAREN))
2141 cat.sort_var = parse_variable (lexer, dict);
2145 if (cat.sort_function == CTSF_PTILE)
2147 lex_match (lexer, T_COMMA);
2148 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2150 cat.percentile = lex_number (lexer);
2154 if (!lex_force_match (lexer, T_RPAREN))
2157 else if (ctables_function_availability (cat.sort_function)
2160 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2164 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
2165 _("Data-dependent sorting is not implemented."));
2169 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2171 lex_match (lexer, T_EQUALS);
2172 if (lex_match_id (lexer, "INCLUDE"))
2173 cat.include_missing = true;
2174 else if (lex_match_id (lexer, "EXCLUDE"))
2175 cat.include_missing = false;
2178 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2182 else if (lex_match_id (lexer, "TOTAL"))
2184 lex_match (lexer, T_EQUALS);
2185 if (!parse_bool (lexer, &show_totals))
2188 else if (lex_match_id (lexer, "LABEL"))
2190 lex_match (lexer, T_EQUALS);
2191 if (!lex_force_string (lexer))
2194 total_label = ss_xstrdup (lex_tokss (lexer));
2197 else if (lex_match_id (lexer, "POSITION"))
2199 lex_match (lexer, T_EQUALS);
2200 if (lex_match_id (lexer, "BEFORE"))
2201 totals_before = true;
2202 else if (lex_match_id (lexer, "AFTER"))
2203 totals_before = false;
2206 lex_error_expecting (lexer, "BEFORE", "AFTER");
2210 else if (lex_match_id (lexer, "EMPTY"))
2212 lex_match (lexer, T_EQUALS);
2213 if (lex_match_id (lexer, "INCLUDE"))
2214 c->show_empty = true;
2215 else if (lex_match_id (lexer, "EXCLUDE"))
2216 c->show_empty = false;
2219 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2226 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2227 "TOTAL", "LABEL", "POSITION", "EMPTY");
2229 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2236 if (c->n_cats >= allocated_cats)
2237 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2238 c->cats[c->n_cats++] = cat;
2243 if (c->n_cats >= allocated_cats)
2244 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2246 struct ctables_category *totals;
2249 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2250 totals = &c->cats[0];
2253 totals = &c->cats[c->n_cats];
2256 *totals = (struct ctables_category) {
2258 .total_label = total_label ? total_label : xstrdup (_("Total")),
2262 struct ctables_category *subtotal = NULL;
2263 for (size_t i = totals_before ? 0 : c->n_cats;
2264 totals_before ? i < c->n_cats : i-- > 0;
2265 totals_before ? i++ : 0)
2267 struct ctables_category *cat = &c->cats[i];
2276 cat->subtotal = subtotal;
2279 case CCT_POSTCOMPUTE:
2290 case CCT_EXCLUDED_MISSING:
2295 if (cats_start_ofs != -1)
2297 for (size_t i = 0; i < c->n_cats; i++)
2299 struct ctables_category *cat = &c->cats[i];
2302 case CCT_POSTCOMPUTE:
2303 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2304 struct msg_location *cats_location
2305 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2306 bool ok = ctables_recursive_check_postcompute (
2307 dict, cat->pc->expr, cat, c, cats_location);
2308 msg_location_destroy (cats_location);
2315 for (size_t j = 0; j < n_vars; j++)
2316 if (var_is_alpha (vars[j]))
2318 msg_at (SE, cat->location,
2319 _("This category specification may be applied "
2320 "only to numeric variables, but this "
2321 "subcommand tries to apply it to string "
2323 var_get_name (vars[j]));
2332 if (!parse_category_string (cat->location, cat->string, dict,
2333 common_format->type, &n))
2336 ss_dealloc (&cat->string);
2338 cat->type = CCT_NUMBER;
2341 else if (!all_strings (vars, n_vars, cat))
2350 if (!cat->srange[0].string)
2352 else if (!parse_category_string (cat->location,
2353 cat->srange[0], dict,
2354 common_format->type, &n[0]))
2357 if (!cat->srange[1].string)
2359 else if (!parse_category_string (cat->location,
2360 cat->srange[1], dict,
2361 common_format->type, &n[1]))
2364 ss_dealloc (&cat->srange[0]);
2365 ss_dealloc (&cat->srange[1]);
2367 cat->type = CCT_NRANGE;
2368 cat->nrange[0] = n[0];
2369 cat->nrange[1] = n[1];
2371 else if (!all_strings (vars, n_vars, cat))
2382 case CCT_EXCLUDED_MISSING:
2397 ctables_nest_uninit (struct ctables_nest *nest)
2400 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2401 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2402 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2403 free (nest->areas[at]);
2407 ctables_stack_uninit (struct ctables_stack *stack)
2411 for (size_t i = 0; i < stack->n; i++)
2412 ctables_nest_uninit (&stack->nests[i]);
2413 free (stack->nests);
2417 static struct ctables_stack
2418 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2425 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2426 for (size_t i = 0; i < s0.n; i++)
2427 for (size_t j = 0; j < s1.n; j++)
2429 const struct ctables_nest *a = &s0.nests[i];
2430 const struct ctables_nest *b = &s1.nests[j];
2432 size_t allocate = a->n + b->n;
2433 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2435 for (size_t k = 0; k < a->n; k++)
2436 vars[n++] = a->vars[k];
2437 for (size_t k = 0; k < b->n; k++)
2438 vars[n++] = b->vars[k];
2439 assert (n == allocate);
2441 const struct ctables_nest *summary_src;
2442 if (!a->specs[CSV_CELL].var)
2444 else if (!b->specs[CSV_CELL].var)
2449 struct ctables_nest *new = &stack.nests[stack.n++];
2450 *new = (struct ctables_nest) {
2452 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2453 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2455 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2456 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2460 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2461 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2463 ctables_stack_uninit (&s0);
2464 ctables_stack_uninit (&s1);
2468 static struct ctables_stack
2469 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2471 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2472 for (size_t i = 0; i < s0.n; i++)
2473 stack.nests[stack.n++] = s0.nests[i];
2474 for (size_t i = 0; i < s1.n; i++)
2476 stack.nests[stack.n] = s1.nests[i];
2477 stack.nests[stack.n].group_head += s0.n;
2480 assert (stack.n == s0.n + s1.n);
2486 static struct ctables_stack
2487 var_fts (const struct ctables_axis *a)
2489 struct variable **vars = xmalloc (sizeof *vars);
2492 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2493 struct ctables_nest *nest = xmalloc (sizeof *nest);
2494 *nest = (struct ctables_nest) {
2497 .scale_idx = a->scale ? 0 : SIZE_MAX,
2498 .summary_idx = is_summary ? 0 : SIZE_MAX,
2501 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2503 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2504 nest->specs[sv].var = a->var;
2505 nest->specs[sv].is_scale = a->scale;
2507 return (struct ctables_stack) { .nests = nest, .n = 1 };
2510 static struct ctables_stack
2511 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2514 return (struct ctables_stack) { .n = 0 };
2522 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2523 enumerate_fts (axis_type, a->subs[1]));
2526 /* This should consider any of the scale variables found in the result to
2527 be linked to each other listwise for SMISSING=LISTWISE. */
2528 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2529 enumerate_fts (axis_type, a->subs[1]));
2535 union ctables_summary
2537 /* COUNT, VALIDN, TOTALN. */
2540 /* MINIMUM, MAXIMUM, RANGE. */
2547 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2548 struct moments1 *moments;
2550 /* MEDIAN, MODE, PTILE. */
2553 struct casewriter *writer;
2560 ctables_summary_init (union ctables_summary *s,
2561 const struct ctables_summary_spec *ss)
2563 switch (ss->function)
2566 case CTSF_areaPCT_COUNT:
2567 case CTSF_areaPCT_VALIDN:
2568 case CTSF_areaPCT_TOTALN:
2581 s->min = s->max = SYSMIS;
2586 case CTSF_areaPCT_SUM:
2587 s->moments = moments1_create (MOMENT_MEAN);
2593 s->moments = moments1_create (MOMENT_VARIANCE);
2600 struct caseproto *proto = caseproto_create ();
2601 proto = caseproto_add_width (proto, 0);
2602 proto = caseproto_add_width (proto, 0);
2604 struct subcase ordering;
2605 subcase_init (&ordering, 0, 0, SC_ASCEND);
2606 s->writer = sort_create_writer (&ordering, proto);
2607 subcase_uninit (&ordering);
2608 caseproto_unref (proto);
2618 ctables_summary_uninit (union ctables_summary *s,
2619 const struct ctables_summary_spec *ss)
2621 switch (ss->function)
2624 case CTSF_areaPCT_COUNT:
2625 case CTSF_areaPCT_VALIDN:
2626 case CTSF_areaPCT_TOTALN:
2645 case CTSF_areaPCT_SUM:
2646 moments1_destroy (s->moments);
2652 casewriter_destroy (s->writer);
2658 ctables_summary_add (union ctables_summary *s,
2659 const struct ctables_summary_spec *ss,
2660 const union value *value,
2661 bool is_scale, bool is_scale_missing,
2662 bool is_missing, bool is_included,
2665 /* To determine whether a case is included in a given table for a particular
2666 kind of summary, consider the following charts for each variable in the
2667 table. Only if "yes" appears for every variable for the summary is the
2670 Categorical variables: VALIDN COUNT TOTALN
2671 Valid values in included categories yes yes yes
2672 Missing values in included categories --- yes yes
2673 Missing values in excluded categories --- --- yes
2674 Valid values in excluded categories --- --- ---
2676 Scale variables: VALIDN COUNT TOTALN
2677 Valid value yes yes yes
2678 Missing value --- yes yes
2680 Missing values include both user- and system-missing. (The system-missing
2681 value is always in an excluded category.)
2683 switch (ss->function)
2689 case CTSF_areaPCT_TOTALN:
2694 if (is_scale || is_included)
2698 case CTSF_areaPCT_COUNT:
2699 if (is_scale || is_included)
2710 case CTSF_areaPCT_VALIDN:
2730 if (!is_scale_missing)
2732 if (s->min == SYSMIS || value->f < s->min)
2734 if (s->max == SYSMIS || value->f > s->max)
2744 if (!is_scale_missing)
2745 moments1_add (s->moments, value->f, weight);
2748 case CTSF_areaPCT_SUM:
2749 if (!is_missing && !is_scale_missing)
2750 moments1_add (s->moments, value->f, weight);
2756 if (!is_scale_missing)
2758 s->ovalid += weight;
2760 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2761 *case_num_rw_idx (c, 0) = value->f;
2762 *case_num_rw_idx (c, 1) = weight;
2763 casewriter_write (s->writer, c);
2770 ctables_summary_value (const struct ctables_cell *cell,
2771 union ctables_summary *s,
2772 const struct ctables_summary_spec *ss)
2774 switch (ss->function)
2780 return cell->areas[ss->calc_area]->sequence;
2782 case CTSF_areaPCT_COUNT:
2784 const struct ctables_area *a = cell->areas[ss->calc_area];
2785 double a_count = a->count[ss->weighting];
2786 return a_count ? s->count / a_count * 100 : SYSMIS;
2789 case CTSF_areaPCT_VALIDN:
2791 const struct ctables_area *a = cell->areas[ss->calc_area];
2792 double a_valid = a->valid[ss->weighting];
2793 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2796 case CTSF_areaPCT_TOTALN:
2798 const struct ctables_area *a = cell->areas[ss->calc_area];
2799 double a_total = a->total[ss->weighting];
2800 return a_total ? s->count / a_total * 100 : SYSMIS;
2815 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2820 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2826 double weight, variance;
2827 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2828 return calc_semean (variance, weight);
2834 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2835 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2840 double weight, mean;
2841 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2842 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2848 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2852 case CTSF_areaPCT_SUM:
2854 double weight, mean;
2855 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2856 if (weight == SYSMIS || mean == SYSMIS)
2859 const struct ctables_area *a = cell->areas[ss->calc_area];
2860 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2861 double denom = sum->sum[ss->weighting];
2862 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2869 struct casereader *reader = casewriter_make_reader (s->writer);
2872 struct percentile *ptile = percentile_create (
2873 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2874 struct order_stats *os = &ptile->parent;
2875 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2876 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2877 statistic_destroy (&ptile->parent.parent);
2884 struct casereader *reader = casewriter_make_reader (s->writer);
2887 struct mode *mode = mode_create ();
2888 struct order_stats *os = &mode->parent;
2889 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2890 s->ovalue = mode->mode;
2891 statistic_destroy (&mode->parent.parent);
2899 struct ctables_cell_sort_aux
2901 const struct ctables_nest *nest;
2902 enum pivot_axis_type a;
2906 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2908 const struct ctables_cell_sort_aux *aux = aux_;
2909 struct ctables_cell *const *ap = a_;
2910 struct ctables_cell *const *bp = b_;
2911 const struct ctables_cell *a = *ap;
2912 const struct ctables_cell *b = *bp;
2914 const struct ctables_nest *nest = aux->nest;
2915 for (size_t i = 0; i < nest->n; i++)
2916 if (i != nest->scale_idx)
2918 const struct variable *var = nest->vars[i];
2919 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2920 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2921 if (a_cv->category != b_cv->category)
2922 return a_cv->category > b_cv->category ? 1 : -1;
2924 const union value *a_val = &a_cv->value;
2925 const union value *b_val = &b_cv->value;
2926 switch (a_cv->category->type)
2932 case CCT_POSTCOMPUTE:
2933 case CCT_EXCLUDED_MISSING:
2934 /* Must be equal. */
2942 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2950 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2952 return a_cv->category->sort_ascending ? cmp : -cmp;
2958 const char *a_label = var_lookup_value_label (var, a_val);
2959 const char *b_label = var_lookup_value_label (var, b_val);
2965 cmp = strcmp (a_label, b_label);
2971 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2974 return a_cv->category->sort_ascending ? cmp : -cmp;
2986 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2987 const void *aux UNUSED)
2989 struct ctables_cell *const *ap = a_;
2990 struct ctables_cell *const *bp = b_;
2991 const struct ctables_cell *a = *ap;
2992 const struct ctables_cell *b = *bp;
2994 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2996 int al = a->axes[axis].leaf;
2997 int bl = b->axes[axis].leaf;
2999 return al > bl ? 1 : -1;
3004 static struct ctables_area *
3005 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3006 enum ctables_area_type area)
3009 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3011 const struct ctables_nest *nest = s->nests[a];
3012 for (size_t i = 0; i < nest->n_areas[area]; i++)
3014 size_t v_idx = nest->areas[area][i];
3015 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3016 hash = hash_pointer (cv->category, hash);
3017 if (cv->category->type != CCT_TOTAL
3018 && cv->category->type != CCT_SUBTOTAL
3019 && cv->category->type != CCT_POSTCOMPUTE)
3020 hash = value_hash (&cv->value,
3021 var_get_width (nest->vars[v_idx]), hash);
3025 struct ctables_area *a;
3026 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3028 const struct ctables_cell *df = a->example;
3029 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3031 const struct ctables_nest *nest = s->nests[a];
3032 for (size_t i = 0; i < nest->n_areas[area]; i++)
3034 size_t v_idx = nest->areas[area][i];
3035 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3036 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3037 if (cv1->category != cv2->category
3038 || (cv1->category->type != CCT_TOTAL
3039 && cv1->category->type != CCT_SUBTOTAL
3040 && cv1->category->type != CCT_POSTCOMPUTE
3041 && !value_equal (&cv1->value, &cv2->value,
3042 var_get_width (nest->vars[v_idx]))))
3051 struct ctables_sum *sums = (s->table->n_sum_vars
3052 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3055 a = xmalloc (sizeof *a);
3056 *a = (struct ctables_area) { .example = cell, .sums = sums };
3057 hmap_insert (&s->areas[area], &a->node, hash);
3061 static struct substring
3062 rtrim_value (const union value *v, const struct variable *var)
3064 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3065 var_get_width (var));
3066 ss_rtrim (&s, ss_cstr (" "));
3071 in_string_range (const union value *v, const struct variable *var,
3072 const struct substring *srange)
3074 struct substring s = rtrim_value (v, var);
3075 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3076 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3079 static const struct ctables_category *
3080 ctables_categories_match (const struct ctables_categories *c,
3081 const union value *v, const struct variable *var)
3083 if (var_is_numeric (var) && v->f == SYSMIS)
3086 const struct ctables_category *othernm = NULL;
3087 for (size_t i = c->n_cats; i-- > 0; )
3089 const struct ctables_category *cat = &c->cats[i];
3093 if (cat->number == v->f)
3098 if (ss_equals (cat->string, rtrim_value (v, var)))
3103 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3104 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3109 if (in_string_range (v, var, cat->srange))
3114 if (var_is_value_missing (var, v))
3118 case CCT_POSTCOMPUTE:
3133 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3136 case CCT_EXCLUDED_MISSING:
3141 return var_is_value_missing (var, v) ? NULL : othernm;
3144 static const struct ctables_category *
3145 ctables_categories_total (const struct ctables_categories *c)
3147 const struct ctables_category *first = &c->cats[0];
3148 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3149 return (first->type == CCT_TOTAL ? first
3150 : last->type == CCT_TOTAL ? last
3154 static struct ctables_cell *
3155 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3156 const struct ctables_category **cats[PIVOT_N_AXES])
3159 enum ctables_summary_variant sv = CSV_CELL;
3160 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3162 const struct ctables_nest *nest = s->nests[a];
3163 for (size_t i = 0; i < nest->n; i++)
3164 if (i != nest->scale_idx)
3166 hash = hash_pointer (cats[a][i], hash);
3167 if (cats[a][i]->type != CCT_TOTAL
3168 && cats[a][i]->type != CCT_SUBTOTAL
3169 && cats[a][i]->type != CCT_POSTCOMPUTE)
3170 hash = value_hash (case_data (c, nest->vars[i]),
3171 var_get_width (nest->vars[i]), hash);
3177 struct ctables_cell *cell;
3178 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3180 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3182 const struct ctables_nest *nest = s->nests[a];
3183 for (size_t i = 0; i < nest->n; i++)
3184 if (i != nest->scale_idx
3185 && (cats[a][i] != cell->axes[a].cvs[i].category
3186 || (cats[a][i]->type != CCT_TOTAL
3187 && cats[a][i]->type != CCT_SUBTOTAL
3188 && cats[a][i]->type != CCT_POSTCOMPUTE
3189 && !value_equal (case_data (c, nest->vars[i]),
3190 &cell->axes[a].cvs[i].value,
3191 var_get_width (nest->vars[i])))))
3200 cell = xmalloc (sizeof *cell);
3203 cell->omit_areas = 0;
3204 cell->postcompute = false;
3205 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3207 const struct ctables_nest *nest = s->nests[a];
3208 cell->axes[a].cvs = (nest->n
3209 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3211 for (size_t i = 0; i < nest->n; i++)
3213 const struct ctables_category *cat = cats[a][i];
3214 const struct variable *var = nest->vars[i];
3215 const union value *value = case_data (c, var);
3216 if (i != nest->scale_idx)
3218 const struct ctables_category *subtotal = cat->subtotal;
3219 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3222 if (cat->type == CCT_TOTAL
3223 || cat->type == CCT_SUBTOTAL
3224 || cat->type == CCT_POSTCOMPUTE)
3228 case PIVOT_AXIS_COLUMN:
3229 cell->omit_areas |= ((1u << CTAT_TABLE) |
3230 (1u << CTAT_LAYER) |
3231 (1u << CTAT_LAYERCOL) |
3232 (1u << CTAT_SUBTABLE) |
3235 case PIVOT_AXIS_ROW:
3236 cell->omit_areas |= ((1u << CTAT_TABLE) |
3237 (1u << CTAT_LAYER) |
3238 (1u << CTAT_LAYERROW) |
3239 (1u << CTAT_SUBTABLE) |
3242 case PIVOT_AXIS_LAYER:
3243 cell->omit_areas |= ((1u << CTAT_TABLE) |
3244 (1u << CTAT_LAYER));
3248 if (cat->type == CCT_POSTCOMPUTE)
3249 cell->postcompute = true;
3252 cell->axes[a].cvs[i].category = cat;
3253 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3257 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3258 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3259 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3260 for (size_t i = 0; i < specs->n; i++)
3261 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3262 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3263 cell->areas[at] = ctables_area_insert (s, cell, at);
3264 hmap_insert (&s->cells, &cell->node, hash);
3269 is_listwise_missing (const struct ctables_summary_spec_set *specs,
3270 const struct ccase *c)
3272 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3274 const struct variable *var = specs->listwise_vars[i];
3275 if (var_is_num_missing (var, case_num (c, var)))
3283 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3285 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3290 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3291 const struct ctables_category **cats[PIVOT_N_AXES],
3292 bool is_included, double weight[N_CTWS])
3294 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3295 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3297 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3298 const union value *value = case_data (c, specs->var);
3299 bool is_missing = var_is_value_missing (specs->var, value);
3300 bool scale_missing = specs->is_scale && (is_missing || is_listwise_missing (specs, c));
3302 for (size_t i = 0; i < specs->n; i++)
3303 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3304 specs->is_scale, scale_missing, is_missing,
3305 is_included, weight[specs->specs[i].weighting]);
3306 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3307 if (!(cell->omit_areas && (1u << at)))
3309 struct ctables_area *a = cell->areas[at];
3311 add_weight (a->total, weight);
3313 add_weight (a->count, weight);
3316 add_weight (a->valid, weight);
3319 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3321 const struct variable *var = s->table->sum_vars[i];
3322 double addend = case_num (c, var);
3323 if (!var_is_num_missing (var, addend))
3324 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3325 a->sums[i].sum[wt] += addend * weight[wt];
3332 recurse_totals (struct ctables_section *s, const struct ccase *c,
3333 const struct ctables_category **cats[PIVOT_N_AXES],
3334 bool is_included, double weight[N_CTWS],
3335 enum pivot_axis_type start_axis, size_t start_nest)
3337 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3339 const struct ctables_nest *nest = s->nests[a];
3340 for (size_t i = start_nest; i < nest->n; i++)
3342 if (i == nest->scale_idx)
3345 const struct variable *var = nest->vars[i];
3347 const struct ctables_category *total = ctables_categories_total (
3348 s->table->categories[var_get_dict_index (var)]);
3351 const struct ctables_category *save = cats[a][i];
3353 ctables_cell_add__ (s, c, cats, is_included, weight);
3354 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3363 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3364 const struct ctables_category **cats[PIVOT_N_AXES],
3365 bool is_included, double weight[N_CTWS],
3366 enum pivot_axis_type start_axis, size_t start_nest)
3368 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3370 const struct ctables_nest *nest = s->nests[a];
3371 for (size_t i = start_nest; i < nest->n; i++)
3373 if (i == nest->scale_idx)
3376 const struct ctables_category *save = cats[a][i];
3379 cats[a][i] = save->subtotal;
3380 ctables_cell_add__ (s, c, cats, is_included, weight);
3381 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3390 ctables_add_occurrence (const struct variable *var,
3391 const union value *value,
3392 struct hmap *occurrences)
3394 int width = var_get_width (var);
3395 unsigned int hash = value_hash (value, width, 0);
3397 struct ctables_occurrence *o;
3398 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3400 if (value_equal (value, &o->value, width))
3403 o = xmalloc (sizeof *o);
3404 value_clone (&o->value, value, width);
3405 hmap_insert (occurrences, &o->node, hash);
3409 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3410 double weight[N_CTWS])
3412 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3413 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3414 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3415 const struct ctables_category **cats[PIVOT_N_AXES] =
3417 [PIVOT_AXIS_LAYER] = layer_cats,
3418 [PIVOT_AXIS_ROW] = row_cats,
3419 [PIVOT_AXIS_COLUMN] = column_cats,
3422 bool is_included = true;
3424 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3426 const struct ctables_nest *nest = s->nests[a];
3427 for (size_t i = 0; i < nest->n; i++)
3428 if (i != nest->scale_idx)
3430 const struct variable *var = nest->vars[i];
3431 const union value *value = case_data (c, var);
3433 cats[a][i] = ctables_categories_match (
3434 s->table->categories[var_get_dict_index (var)], value, var);
3437 if (i != nest->summary_idx)
3440 if (!var_is_value_missing (var, value))
3443 static const struct ctables_category cct_excluded_missing = {
3444 .type = CCT_EXCLUDED_MISSING,
3447 cats[a][i] = &cct_excluded_missing;
3448 is_included = false;
3454 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3456 const struct ctables_nest *nest = s->nests[a];
3457 for (size_t i = 0; i < nest->n; i++)
3458 if (i != nest->scale_idx)
3460 const struct variable *var = nest->vars[i];
3461 const union value *value = case_data (c, var);
3462 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3466 ctables_cell_add__ (s, c, cats, is_included, weight);
3467 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3468 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3473 const struct ctables_summary_spec_set *set;
3478 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3480 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3481 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3482 if (as->function != bs->function)
3483 return as->function > bs->function ? 1 : -1;
3484 else if (as->weighting != bs->weighting)
3485 return as->weighting > bs->weighting ? 1 : -1;
3486 else if (as->calc_area != bs->calc_area)
3487 return as->calc_area > bs->calc_area ? 1 : -1;
3488 else if (as->percentile != bs->percentile)
3489 return as->percentile < bs->percentile ? 1 : -1;
3491 const char *as_label = as->label ? as->label : "";
3492 const char *bs_label = bs->label ? bs->label : "";
3493 return strcmp (as_label, bs_label);
3497 ctables_category_format_number (double number, const struct variable *var,
3500 struct pivot_value *pv = pivot_value_new_var_value (
3501 var, &(union value) { .f = number });
3502 pivot_value_format (pv, NULL, s);
3503 pivot_value_destroy (pv);
3507 ctables_category_format_string (struct substring string,
3508 const struct variable *var, struct string *out)
3510 int width = var_get_width (var);
3511 char *s = xmalloc (width);
3512 buf_copy_rpad (s, width, string.string, string.length, ' ');
3513 struct pivot_value *pv = pivot_value_new_var_value (
3514 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3515 pivot_value_format (pv, NULL, out);
3516 pivot_value_destroy (pv);
3521 ctables_category_format_label (const struct ctables_category *cat,
3522 const struct variable *var,
3528 ctables_category_format_number (cat->number, var, s);
3532 ctables_category_format_string (cat->string, var, s);
3536 ctables_category_format_number (cat->nrange[0], var, s);
3537 ds_put_format (s, " THRU ");
3538 ctables_category_format_number (cat->nrange[1], var, s);
3542 ctables_category_format_string (cat->srange[0], var, s);
3543 ds_put_format (s, " THRU ");
3544 ctables_category_format_string (cat->srange[1], var, s);
3548 ds_put_cstr (s, "MISSING");
3552 ds_put_cstr (s, "OTHERNM");
3555 case CCT_POSTCOMPUTE:
3556 ds_put_format (s, "&%s", cat->pc->name);
3561 ds_put_cstr (s, cat->total_label);
3567 case CCT_EXCLUDED_MISSING:
3574 static struct pivot_value *
3575 ctables_postcompute_label (const struct ctables_categories *cats,
3576 const struct ctables_category *cat,
3577 const struct variable *var)
3579 struct substring in = ss_cstr (cat->pc->label);
3580 struct substring target = ss_cstr (")LABEL[");
3582 struct string out = DS_EMPTY_INITIALIZER;
3585 size_t chunk = ss_find_substring (in, target);
3586 if (chunk == SIZE_MAX)
3588 if (ds_is_empty (&out))
3589 return pivot_value_new_user_text (in.string, in.length);
3592 ds_put_substring (&out, in);
3593 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3597 ds_put_substring (&out, ss_head (in, chunk));
3598 ss_advance (&in, chunk + target.length);
3600 struct substring idx_s;
3601 if (!ss_get_until (&in, ']', &idx_s))
3604 long int idx = strtol (idx_s.string, &tail, 10);
3605 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3608 struct ctables_category *cat2 = &cats->cats[idx - 1];
3609 if (!ctables_category_format_label (cat2, var, &out))
3615 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3618 static struct pivot_value *
3619 ctables_category_create_value_label (const struct ctables_categories *cats,
3620 const struct ctables_category *cat,
3621 const struct variable *var,
3622 const union value *value)
3624 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3625 ? ctables_postcompute_label (cats, cat, var)
3626 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3627 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3628 : pivot_value_new_var_value (var, value));
3631 static struct ctables_value *
3632 ctables_value_find__ (struct ctables_table *t, const union value *value,
3633 int width, unsigned int hash)
3635 struct ctables_value *clv;
3636 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3637 hash, &t->clabels_values_map)
3638 if (value_equal (value, &clv->value, width))
3644 ctables_value_insert (struct ctables_table *t, const union value *value,
3647 unsigned int hash = value_hash (value, width, 0);
3648 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3651 clv = xmalloc (sizeof *clv);
3652 value_clone (&clv->value, value, width);
3653 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3657 static struct ctables_value *
3658 ctables_value_find (struct ctables_table *t,
3659 const union value *value, int width)
3661 return ctables_value_find__ (t, value, width,
3662 value_hash (value, width, 0));
3666 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3667 size_t ix[PIVOT_N_AXES])
3669 if (a < PIVOT_N_AXES)
3671 size_t limit = MAX (t->stacks[a].n, 1);
3672 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3673 ctables_table_add_section (t, a + 1, ix);
3677 struct ctables_section *s = &t->sections[t->n_sections++];
3678 *s = (struct ctables_section) {
3680 .cells = HMAP_INITIALIZER (s->cells),
3682 for (a = 0; a < PIVOT_N_AXES; a++)
3685 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3687 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3688 for (size_t i = 0; i < nest->n; i++)
3689 hmap_init (&s->occurrences[a][i]);
3691 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3692 hmap_init (&s->areas[at]);
3697 ctpo_add (double a, double b)
3703 ctpo_sub (double a, double b)
3709 ctpo_mul (double a, double b)
3715 ctpo_div (double a, double b)
3717 return b ? a / b : SYSMIS;
3721 ctpo_pow (double a, double b)
3723 int save_errno = errno;
3725 double result = pow (a, b);
3733 ctpo_neg (double a, double b UNUSED)
3738 struct ctables_pcexpr_evaluate_ctx
3740 const struct ctables_cell *cell;
3741 const struct ctables_section *section;
3742 const struct ctables_categories *cats;
3743 enum pivot_axis_type pc_a;
3746 enum fmt_type parse_format;
3749 static double ctables_pcexpr_evaluate (
3750 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3753 ctables_pcexpr_evaluate_nonterminal (
3754 const struct ctables_pcexpr_evaluate_ctx *ctx,
3755 const struct ctables_pcexpr *e, size_t n_args,
3756 double evaluate (double, double))
3758 double args[2] = { 0, 0 };
3759 for (size_t i = 0; i < n_args; i++)
3761 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3762 if (!isfinite (args[i]) || args[i] == SYSMIS)
3765 return evaluate (args[0], args[1]);
3769 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3770 const struct ctables_cell_value *pc_cv)
3772 const struct ctables_section *s = ctx->section;
3775 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3777 const struct ctables_nest *nest = s->nests[a];
3778 for (size_t i = 0; i < nest->n; i++)
3779 if (i != nest->scale_idx)
3781 const struct ctables_cell_value *cv
3782 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3783 : &ctx->cell->axes[a].cvs[i]);
3784 hash = hash_pointer (cv->category, hash);
3785 if (cv->category->type != CCT_TOTAL
3786 && cv->category->type != CCT_SUBTOTAL
3787 && cv->category->type != CCT_POSTCOMPUTE)
3788 hash = value_hash (&cv->value,
3789 var_get_width (nest->vars[i]), hash);
3793 struct ctables_cell *tc;
3794 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3796 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3798 const struct ctables_nest *nest = s->nests[a];
3799 for (size_t i = 0; i < nest->n; i++)
3800 if (i != nest->scale_idx)
3802 const struct ctables_cell_value *p_cv
3803 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3804 : &ctx->cell->axes[a].cvs[i]);
3805 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3806 if (p_cv->category != t_cv->category
3807 || (p_cv->category->type != CCT_TOTAL
3808 && p_cv->category->type != CCT_SUBTOTAL
3809 && p_cv->category->type != CCT_POSTCOMPUTE
3810 && !value_equal (&p_cv->value,
3812 var_get_width (nest->vars[i]))))
3824 const struct ctables_table *t = s->table;
3825 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3826 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3827 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3828 &specs->specs[ctx->summary_idx]);
3832 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3833 const struct ctables_pcexpr *e)
3840 case CTPO_CAT_NRANGE:
3841 case CTPO_CAT_SRANGE:
3842 case CTPO_CAT_MISSING:
3843 case CTPO_CAT_OTHERNM:
3845 struct ctables_cell_value cv = {
3846 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3848 assert (cv.category != NULL);
3850 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3851 const struct ctables_occurrence *o;
3854 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3855 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3856 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3858 cv.value = o->value;
3859 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3864 case CTPO_CAT_NUMBER:
3865 case CTPO_CAT_SUBTOTAL:
3866 case CTPO_CAT_TOTAL:
3868 struct ctables_cell_value cv = {
3869 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3870 .value = { .f = e->number },
3872 assert (cv.category != NULL);
3873 return ctables_pcexpr_evaluate_category (ctx, &cv);
3876 case CTPO_CAT_STRING:
3878 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3880 if (width > e->string.length)
3882 s = xmalloc (width);
3883 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3886 const struct ctables_category *category
3887 = ctables_find_category_for_postcompute (
3888 ctx->section->table->ctables->dict,
3889 ctx->cats, ctx->parse_format, e);
3890 assert (category != NULL);
3892 struct ctables_cell_value cv = { .category = category };
3893 if (category->type == CCT_NUMBER)
3894 cv.value.f = category->number;
3895 else if (category->type == CCT_STRING)
3896 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3900 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3906 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3909 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3912 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3915 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3918 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3921 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3927 static const struct ctables_category *
3928 ctables_cell_postcompute (const struct ctables_section *s,
3929 const struct ctables_cell *cell,
3930 enum pivot_axis_type *pc_a_p,
3933 assert (cell->postcompute);
3934 const struct ctables_category *pc_cat = NULL;
3935 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3936 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3938 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3939 if (cv->category->type == CCT_POSTCOMPUTE)
3943 /* Multiple postcomputes cross each other. The value is
3948 pc_cat = cv->category;
3952 *pc_a_idx_p = pc_a_idx;
3956 assert (pc_cat != NULL);
3961 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3962 const struct ctables_cell *cell,
3963 const struct ctables_summary_spec *ss,
3964 struct fmt_spec *format,
3965 bool *is_ctables_format,
3968 enum pivot_axis_type pc_a = 0;
3969 size_t pc_a_idx = 0;
3970 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3971 s, cell, &pc_a, &pc_a_idx);
3975 const struct ctables_postcompute *pc = pc_cat->pc;
3978 for (size_t i = 0; i < pc->specs->n; i++)
3980 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3981 if (ss->function == ss2->function
3982 && ss->weighting == ss2->weighting
3983 && ss->calc_area == ss2->calc_area
3984 && ss->percentile == ss2->percentile)
3986 *format = ss2->format;
3987 *is_ctables_format = ss2->is_ctables_format;
3993 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3994 const struct ctables_categories *cats = s->table->categories[
3995 var_get_dict_index (var)];
3996 struct ctables_pcexpr_evaluate_ctx ctx = {
4001 .pc_a_idx = pc_a_idx,
4002 .summary_idx = summary_idx,
4003 .parse_format = pc_cat->parse_format,
4005 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4009 ctables_format (double d, const struct fmt_spec *format,
4010 const struct fmt_settings *settings)
4012 const union value v = { .f = d };
4013 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4015 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4016 produce the results we want for negative numbers, putting the negative
4017 sign in the wrong spot, before the prefix instead of after it. We can't,
4018 in fact, produce the desired results using a custom-currency
4019 specification. Instead, we postprocess the output, moving the negative
4022 NEQUAL: "-N=3" => "N=-3"
4023 PAREN: "-(3)" => "(-3)"
4024 PCTPAREN: "-(3%)" => "(-3%)"
4026 This transformation doesn't affect NEGPAREN. */
4027 char *minus_src = strchr (s, '-');
4028 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4030 char *n_equals = strstr (s, "N=");
4031 char *lparen = strchr (s, '(');
4032 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4034 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4040 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4042 for (size_t i = 0; i < t->stacks[a].n; i++)
4044 struct ctables_nest *nest = &t->stacks[a].nests[i];
4045 if (nest->n != 1 || nest->scale_idx != 0)
4048 enum ctables_vlabel vlabel
4049 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4050 if (vlabel != CTVL_NONE)
4057 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4059 struct pivot_table *pt = pivot_table_create__ (
4061 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4062 : pivot_value_new_text (N_("Custom Tables"))),
4065 pivot_table_set_caption (
4066 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4068 pivot_table_set_corner_text (
4069 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4071 bool summary_dimension = (t->summary_axis != t->slabels_axis
4072 || (!t->slabels_visible
4073 && t->summary_specs.n > 1));
4074 if (summary_dimension)
4076 struct pivot_dimension *d = pivot_dimension_create (
4077 pt, t->slabels_axis, N_("Statistics"));
4078 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4079 if (!t->slabels_visible)
4080 d->hide_all_labels = true;
4081 for (size_t i = 0; i < specs->n; i++)
4082 pivot_category_create_leaf (
4083 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4086 bool categories_dimension = t->clabels_example != NULL;
4087 if (categories_dimension)
4089 struct pivot_dimension *d = pivot_dimension_create (
4090 pt, t->label_axis[t->clabels_from_axis],
4091 t->clabels_from_axis == PIVOT_AXIS_ROW
4092 ? N_("Row Categories")
4093 : N_("Column Categories"));
4094 const struct variable *var = t->clabels_example;
4095 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4096 for (size_t i = 0; i < t->n_clabels_values; i++)
4098 const struct ctables_value *value = t->clabels_values[i];
4099 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4100 assert (cat != NULL);
4101 pivot_category_create_leaf (
4102 d->root, ctables_category_create_value_label (c, cat,
4108 pivot_table_set_look (pt, ct->look);
4109 struct pivot_dimension *d[PIVOT_N_AXES];
4110 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4112 static const char *names[] = {
4113 [PIVOT_AXIS_ROW] = N_("Rows"),
4114 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4115 [PIVOT_AXIS_LAYER] = N_("Layers"),
4117 d[a] = (t->axes[a] || a == t->summary_axis
4118 ? pivot_dimension_create (pt, a, names[a])
4123 assert (t->axes[a]);
4125 for (size_t i = 0; i < t->stacks[a].n; i++)
4127 struct ctables_nest *nest = &t->stacks[a].nests[i];
4128 struct ctables_section **sections = xnmalloc (t->n_sections,
4130 size_t n_sections = 0;
4132 size_t n_total_cells = 0;
4133 size_t max_depth = 0;
4134 for (size_t j = 0; j < t->n_sections; j++)
4135 if (t->sections[j].nests[a] == nest)
4137 struct ctables_section *s = &t->sections[j];
4138 sections[n_sections++] = s;
4139 n_total_cells += hmap_count (&s->cells);
4141 size_t depth = s->nests[a]->n;
4142 max_depth = MAX (depth, max_depth);
4145 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4147 size_t n_sorted = 0;
4149 for (size_t j = 0; j < n_sections; j++)
4151 struct ctables_section *s = sections[j];
4153 struct ctables_cell *cell;
4154 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4156 sorted[n_sorted++] = cell;
4157 assert (n_sorted <= n_total_cells);
4160 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4161 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4163 struct ctables_level
4165 enum ctables_level_type
4167 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4168 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4169 CTL_SUMMARY, /* Summary functions. */
4173 enum settings_value_show vlabel; /* CTL_VAR only. */
4176 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4177 size_t n_levels = 0;
4178 for (size_t k = 0; k < nest->n; k++)
4180 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4181 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4183 if (vlabel != CTVL_NONE)
4185 levels[n_levels++] = (struct ctables_level) {
4187 .vlabel = (enum settings_value_show) vlabel,
4192 if (nest->scale_idx != k
4193 && (k != nest->n - 1 || t->label_axis[a] == a))
4195 levels[n_levels++] = (struct ctables_level) {
4196 .type = CTL_CATEGORY,
4202 if (!summary_dimension && a == t->slabels_axis)
4204 levels[n_levels++] = (struct ctables_level) {
4205 .type = CTL_SUMMARY,
4206 .var_idx = SIZE_MAX,
4210 /* Pivot categories:
4212 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4213 - category for nest->vars[0], if nest->scale_idx != 0
4214 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4215 - category for nest->vars[1], if nest->scale_idx != 1
4217 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4218 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4219 - summary function, if 'a == t->slabels_axis && a ==
4222 Additional dimensions:
4224 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4226 - If 't->label_axis[b] == a' for some 'b != a', add a category
4231 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4233 for (size_t j = 0; j < n_sorted; j++)
4235 struct ctables_cell *cell = sorted[j];
4236 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4238 size_t n_common = 0;
4241 for (; n_common < n_levels; n_common++)
4243 const struct ctables_level *level = &levels[n_common];
4244 if (level->type == CTL_CATEGORY)
4246 size_t var_idx = level->var_idx;
4247 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4248 if (prev->axes[a].cvs[var_idx].category != c)
4250 else if (c->type != CCT_SUBTOTAL
4251 && c->type != CCT_TOTAL
4252 && c->type != CCT_POSTCOMPUTE
4253 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4254 &cell->axes[a].cvs[var_idx].value,
4255 var_get_type (nest->vars[var_idx])))
4261 for (size_t k = n_common; k < n_levels; k++)
4263 const struct ctables_level *level = &levels[k];
4264 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4265 if (level->type == CTL_SUMMARY)
4267 assert (k == n_levels - 1);
4269 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4270 for (size_t m = 0; m < specs->n; m++)
4272 int leaf = pivot_category_create_leaf (
4273 parent, ctables_summary_label (&specs->specs[m],
4281 const struct variable *var = nest->vars[level->var_idx];
4282 struct pivot_value *label;
4283 if (level->type == CTL_VAR)
4285 label = pivot_value_new_variable (var);
4286 label->variable.show = level->vlabel;
4288 else if (level->type == CTL_CATEGORY)
4290 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4291 label = ctables_category_create_value_label (
4292 t->categories[var_get_dict_index (var)],
4293 cv->category, var, &cv->value);
4298 if (k == n_levels - 1)
4299 prev_leaf = pivot_category_create_leaf (parent, label);
4301 groups[k] = pivot_category_create_group__ (parent, label);
4305 cell->axes[a].leaf = prev_leaf;
4314 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4318 size_t n_total_cells = 0;
4319 for (size_t j = 0; j < t->n_sections; j++)
4320 n_total_cells += hmap_count (&t->sections[j].cells);
4322 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4323 size_t n_sorted = 0;
4324 for (size_t j = 0; j < t->n_sections; j++)
4326 const struct ctables_section *s = &t->sections[j];
4327 struct ctables_cell *cell;
4328 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4330 sorted[n_sorted++] = cell;
4332 assert (n_sorted <= n_total_cells);
4333 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4335 size_t ids[N_CTATS];
4336 memset (ids, 0, sizeof ids);
4337 for (size_t j = 0; j < n_sorted; j++)
4339 struct ctables_cell *cell = sorted[j];
4340 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4342 struct ctables_area *area = cell->areas[at];
4343 if (!area->sequence)
4344 area->sequence = ++ids[at];
4351 for (size_t i = 0; i < t->n_sections; i++)
4353 struct ctables_section *s = &t->sections[i];
4355 struct ctables_cell *cell;
4356 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4361 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4362 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4363 for (size_t j = 0; j < specs->n; j++)
4366 size_t n_dindexes = 0;
4368 if (summary_dimension)
4369 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4371 if (categories_dimension)
4373 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4374 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4375 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4376 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4379 dindexes[n_dindexes++] = ctv->leaf;
4382 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4385 int leaf = cell->axes[a].leaf;
4386 if (a == t->summary_axis && !summary_dimension)
4388 dindexes[n_dindexes++] = leaf;
4391 const struct ctables_summary_spec *ss = &specs->specs[j];
4393 struct fmt_spec format = specs->specs[j].format;
4394 bool is_ctables_format = ss->is_ctables_format;
4395 double d = (cell->postcompute
4396 ? ctables_cell_calculate_postcompute (
4397 s, cell, ss, &format, &is_ctables_format, j)
4398 : ctables_summary_value (cell, &cell->summaries[j],
4401 struct pivot_value *value;
4402 if (ct->hide_threshold != 0
4403 && d < ct->hide_threshold
4404 && ss->function == CTSF_COUNT)
4406 value = pivot_value_new_user_text_nocopy (
4407 xasprintf ("<%d", ct->hide_threshold));
4409 else if (d == 0 && ct->zero)
4410 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4411 else if (d == SYSMIS && ct->missing)
4412 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4413 else if (is_ctables_format)
4414 value = pivot_value_new_user_text_nocopy (
4415 ctables_format (d, &format, &ct->ctables_formats));
4418 value = pivot_value_new_number (d);
4419 value->numeric.format = format;
4421 /* XXX should text values be right-justified? */
4422 pivot_table_put (pt, dindexes, n_dindexes, value);
4427 pivot_table_submit (pt);
4431 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4433 enum pivot_axis_type label_pos = t->label_axis[a];
4437 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4438 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4440 const struct ctables_stack *stack = &t->stacks[a];
4444 const struct ctables_nest *n0 = &stack->nests[0];
4447 assert (stack->n == 1);
4451 const struct variable *v0 = n0->vars[n0->n - 1];
4452 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4453 t->clabels_example = v0;
4455 for (size_t i = 0; i < c0->n_cats; i++)
4456 if (c0->cats[i].type == CCT_FUNCTION)
4458 msg (SE, _("%s=%s is not allowed with sorting based "
4459 "on a summary function."),
4460 subcommand_name, pos_name);
4463 if (n0->n - 1 == n0->scale_idx)
4465 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4466 "but %s is a scale variable."),
4467 subcommand_name, pos_name, var_get_name (v0));
4471 for (size_t i = 1; i < stack->n; i++)
4473 const struct ctables_nest *ni = &stack->nests[i];
4475 const struct variable *vi = ni->vars[ni->n - 1];
4476 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4478 if (ni->n - 1 == ni->scale_idx)
4480 msg (SE, _("%s=%s requires the variables to be moved to be "
4481 "categorical, but %s is a scale variable."),
4482 subcommand_name, pos_name, var_get_name (vi));
4485 if (var_get_width (v0) != var_get_width (vi))
4487 msg (SE, _("%s=%s requires the variables to be "
4488 "moved to have the same width, but %s has "
4489 "width %d and %s has width %d."),
4490 subcommand_name, pos_name,
4491 var_get_name (v0), var_get_width (v0),
4492 var_get_name (vi), var_get_width (vi));
4495 if (!val_labs_equal (var_get_value_labels (v0),
4496 var_get_value_labels (vi)))
4498 msg (SE, _("%s=%s requires the variables to be "
4499 "moved to have the same value labels, but %s "
4500 "and %s have different value labels."),
4501 subcommand_name, pos_name,
4502 var_get_name (v0), var_get_name (vi));
4505 if (!ctables_categories_equal (c0, ci))
4507 msg (SE, _("%s=%s requires the variables to be "
4508 "moved to have the same category "
4509 "specifications, but %s and %s have different "
4510 "category specifications."),
4511 subcommand_name, pos_name,
4512 var_get_name (v0), var_get_name (vi));
4521 add_sum_var (struct variable *var,
4522 struct variable ***sum_vars, size_t *n, size_t *allocated)
4524 for (size_t i = 0; i < *n; i++)
4525 if (var == (*sum_vars)[i])
4528 if (*n >= *allocated)
4529 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4530 (*sum_vars)[*n] = var;
4534 static enum ctables_area_type
4535 rotate_area (enum ctables_area_type area)
4546 return CTAT_LAYERCOL;
4549 return CTAT_LAYERROW;
4562 enumerate_sum_vars (const struct ctables_axis *a,
4563 struct variable ***sum_vars, size_t *n, size_t *allocated)
4571 for (size_t i = 0; i < N_CSVS; i++)
4572 for (size_t j = 0; j < a->specs[i].n; j++)
4574 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4575 if (spec->function == CTSF_areaPCT_SUM)
4576 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4582 for (size_t i = 0; i < 2; i++)
4583 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4589 ctables_prepare_table (struct ctables_table *t)
4591 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4594 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4596 for (size_t j = 0; j < t->stacks[a].n; j++)
4598 struct ctables_nest *nest = &t->stacks[a].nests[j];
4599 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4601 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4602 nest->n_areas[at] = 0;
4604 enum pivot_axis_type ata, atb;
4605 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4607 ata = PIVOT_AXIS_ROW;
4608 atb = PIVOT_AXIS_COLUMN;
4610 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4612 ata = PIVOT_AXIS_COLUMN;
4613 atb = PIVOT_AXIS_ROW;
4616 if (at == CTAT_LAYER
4617 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
4618 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
4619 ? a == atb && t->label_axis[a] != a
4622 for (size_t k = nest->n - 1; k < nest->n; k--)
4623 if (k != nest->scale_idx)
4625 nest->areas[at][nest->n_areas[at]++] = k;
4631 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
4632 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
4633 : at == CTAT_TABLE ? true
4637 for (size_t k = 0; k < nest->n; k++)
4638 if (k != nest->scale_idx)
4639 nest->areas[at][nest->n_areas[at]++] = k;
4645 #define L PIVOT_AXIS_LAYER
4646 n_drop = (t->clabels_from_axis == L ? a != L
4647 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
4648 : t->clabels_from_axis == a ? 2
4655 n_drop = a == ata && t->label_axis[ata] == atb;
4660 n_drop = (a == ata ? t->label_axis[ata] == atb
4662 : t->clabels_from_axis == atb ? -1
4663 : t->clabels_to_axis != atb ? 1
4675 size_t n = nest->n_areas[at];
4678 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4679 nest->n_areas[at]--;
4684 for (int i = 0; i < n_drop; i++)
4685 if (nest->n_areas[at] > 0)
4686 nest->n_areas[at]--;
4693 struct ctables_nest *nest = xmalloc (sizeof *nest);
4694 *nest = (struct ctables_nest) {
4696 .scale_idx = SIZE_MAX,
4697 .summary_idx = SIZE_MAX
4699 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4701 /* There's no point in moving labels away from an axis that has no
4702 labels, so avoid dealing with the special cases around that. */
4703 t->label_axis[a] = a;
4706 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4707 for (size_t i = 0; i < stack->n; i++)
4709 struct ctables_nest *nest = &stack->nests[i];
4710 if (!nest->specs[CSV_CELL].n)
4712 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
4713 ss->specs = xmalloc (sizeof *ss->specs);
4716 enum ctables_summary_function function
4717 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
4721 nest->summary_idx = nest->n - 1;
4722 ss->var = nest->vars[nest->summary_idx];
4724 *ss->specs = (struct ctables_summary_spec) {
4725 .function = function,
4726 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
4727 .format = ctables_summary_default_format (function, ss->var),
4730 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4731 &nest->specs[CSV_CELL]);
4733 else if (!nest->specs[CSV_TOTAL].n)
4734 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4735 &nest->specs[CSV_CELL]);
4737 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4738 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4740 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4741 for (size_t i = 0; i < nest->specs[sv].n; i++)
4743 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4744 const struct ctables_function_info *cfi =
4745 &ctables_function_info[ss->function];
4747 ss->calc_area = rotate_area (ss->calc_area);
4751 if (t->ctables->smissing_listwise)
4753 struct variable **listwise_vars = NULL;
4755 size_t allocated = 0;
4757 for (size_t j = nest->group_head; j < stack->n; j++)
4759 const struct ctables_nest *other_nest = &stack->nests[j];
4760 if (other_nest->group_head != nest->group_head)
4763 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4766 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4767 sizeof *listwise_vars);
4768 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4771 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4774 listwise_vars = xmemdup (listwise_vars,
4775 n * sizeof *listwise_vars);
4776 nest->specs[sv].listwise_vars = listwise_vars;
4777 nest->specs[sv].n_listwise_vars = n;
4782 struct ctables_summary_spec_set *merged = &t->summary_specs;
4783 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4785 for (size_t j = 0; j < stack->n; j++)
4787 const struct ctables_nest *nest = &stack->nests[j];
4789 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4790 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4795 struct merge_item min = items[0];
4796 for (size_t j = 1; j < n_left; j++)
4797 if (merge_item_compare_3way (&items[j], &min) < 0)
4800 if (merged->n >= merged->allocated)
4801 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4802 sizeof *merged->specs);
4803 merged->specs[merged->n++] = min.set->specs[min.ofs];
4805 for (size_t j = 0; j < n_left; )
4807 if (merge_item_compare_3way (&items[j], &min) == 0)
4809 struct merge_item *item = &items[j];
4810 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4811 if (++item->ofs >= item->set->n)
4813 items[j] = items[--n_left];
4822 size_t allocated_sum_vars = 0;
4823 enumerate_sum_vars (t->axes[t->summary_axis],
4824 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4826 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4827 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4831 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4832 enum pivot_axis_type a)
4834 struct ctables_stack *stack = &t->stacks[a];
4835 for (size_t i = 0; i < stack->n; i++)
4837 const struct ctables_nest *nest = &stack->nests[i];
4838 const struct variable *var = nest->vars[nest->n - 1];
4839 const union value *value = case_data (c, var);
4841 if (var_is_numeric (var) && value->f == SYSMIS)
4844 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4846 ctables_value_insert (t, value, var_get_width (var));
4851 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4853 const struct ctables_value *const *ap = a_;
4854 const struct ctables_value *const *bp = b_;
4855 const struct ctables_value *a = *ap;
4856 const struct ctables_value *b = *bp;
4857 const int *width = width_;
4858 return value_compare_3way (&a->value, &b->value, *width);
4862 ctables_sort_clabels_values (struct ctables_table *t)
4864 const struct variable *v0 = t->clabels_example;
4865 int width = var_get_width (v0);
4867 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4870 const struct val_labs *val_labs = var_get_value_labels (v0);
4871 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4872 vl = val_labs_next (val_labs, vl))
4873 if (ctables_categories_match (c0, &vl->value, v0))
4874 ctables_value_insert (t, &vl->value, width);
4877 size_t n = hmap_count (&t->clabels_values_map);
4878 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4880 struct ctables_value *clv;
4882 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4883 t->clabels_values[i++] = clv;
4884 t->n_clabels_values = n;
4887 sort (t->clabels_values, n, sizeof *t->clabels_values,
4888 compare_clabels_values_3way, &width);
4890 for (size_t i = 0; i < n; i++)
4891 t->clabels_values[i]->leaf = i;
4895 ctables_add_category_occurrences (const struct variable *var,
4896 struct hmap *occurrences,
4897 const struct ctables_categories *cats)
4899 const struct val_labs *val_labs = var_get_value_labels (var);
4901 for (size_t i = 0; i < cats->n_cats; i++)
4903 const struct ctables_category *c = &cats->cats[i];
4907 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4913 int width = var_get_width (var);
4915 value_init (&value, width);
4916 value_copy_buf_rpad (&value, width,
4917 CHAR_CAST (uint8_t *, c->string.string),
4918 c->string.length, ' ');
4919 ctables_add_occurrence (var, &value, occurrences);
4920 value_destroy (&value, width);
4925 assert (var_is_numeric (var));
4926 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4927 vl = val_labs_next (val_labs, vl))
4928 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4929 ctables_add_occurrence (var, &vl->value, occurrences);
4933 assert (var_is_alpha (var));
4934 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4935 vl = val_labs_next (val_labs, vl))
4936 if (in_string_range (&vl->value, var, c->srange))
4937 ctables_add_occurrence (var, &vl->value, occurrences);
4941 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4942 vl = val_labs_next (val_labs, vl))
4943 if (var_is_value_missing (var, &vl->value))
4944 ctables_add_occurrence (var, &vl->value, occurrences);
4948 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4949 vl = val_labs_next (val_labs, vl))
4950 ctables_add_occurrence (var, &vl->value, occurrences);
4953 case CCT_POSTCOMPUTE:
4963 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4964 vl = val_labs_next (val_labs, vl))
4965 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4966 ctables_add_occurrence (var, &vl->value, occurrences);
4969 case CCT_EXCLUDED_MISSING:
4976 ctables_section_recurse_add_empty_categories (
4977 struct ctables_section *s,
4978 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
4979 enum pivot_axis_type a, size_t a_idx)
4981 if (a >= PIVOT_N_AXES)
4982 ctables_cell_insert__ (s, c, cats);
4983 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4984 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4987 const struct variable *var = s->nests[a]->vars[a_idx];
4988 const struct ctables_categories *categories = s->table->categories[
4989 var_get_dict_index (var)];
4990 int width = var_get_width (var);
4991 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4992 const struct ctables_occurrence *o;
4993 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4995 union value *value = case_data_rw (c, var);
4996 value_destroy (value, width);
4997 value_clone (value, &o->value, width);
4998 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4999 assert (cats[a][a_idx] != NULL);
5000 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5003 for (size_t i = 0; i < categories->n_cats; i++)
5005 const struct ctables_category *cat = &categories->cats[i];
5006 if (cat->type == CCT_POSTCOMPUTE)
5008 cats[a][a_idx] = cat;
5009 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5016 ctables_section_add_empty_categories (struct ctables_section *s)
5018 bool show_empty = false;
5019 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5021 for (size_t k = 0; k < s->nests[a]->n; k++)
5022 if (k != s->nests[a]->scale_idx)
5024 const struct variable *var = s->nests[a]->vars[k];
5025 const struct ctables_categories *cats = s->table->categories[
5026 var_get_dict_index (var)];
5027 if (cats->show_empty)
5030 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5036 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5037 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5038 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5039 const struct ctables_category **cats[PIVOT_N_AXES] =
5041 [PIVOT_AXIS_LAYER] = layer_cats,
5042 [PIVOT_AXIS_ROW] = row_cats,
5043 [PIVOT_AXIS_COLUMN] = column_cats,
5045 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5046 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5051 ctables_section_clear (struct ctables_section *s)
5053 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5055 const struct ctables_nest *nest = s->nests[a];
5056 for (size_t i = 0; i < nest->n; i++)
5057 if (i != nest->scale_idx)
5059 const struct variable *var = nest->vars[i];
5060 int width = var_get_width (var);
5061 struct ctables_occurrence *o, *next;
5062 struct hmap *map = &s->occurrences[a][i];
5063 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5065 value_destroy (&o->value, width);
5066 hmap_delete (map, &o->node);
5073 struct ctables_cell *cell, *next_cell;
5074 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5076 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5078 const struct ctables_nest *nest = s->nests[a];
5079 for (size_t i = 0; i < nest->n; i++)
5080 if (i != nest->scale_idx)
5081 value_destroy (&cell->axes[a].cvs[i].value,
5082 var_get_width (nest->vars[i]));
5083 free (cell->axes[a].cvs);
5086 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5087 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5088 for (size_t i = 0; i < specs->n; i++)
5089 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5090 free (cell->summaries);
5092 hmap_delete (&s->cells, &cell->node);
5095 hmap_shrink (&s->cells);
5097 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5099 struct ctables_area *area, *next_area;
5100 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5104 hmap_delete (&s->areas[at], &area->node);
5107 hmap_shrink (&s->areas[at]);
5112 ctables_section_uninit (struct ctables_section *s)
5114 ctables_section_clear (s);
5116 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5118 struct ctables_nest *nest = s->nests[a];
5119 for (size_t i = 0; i < nest->n; i++)
5120 hmap_destroy (&s->occurrences[a][i]);
5121 free (s->occurrences[a]);
5124 hmap_destroy (&s->cells);
5125 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5126 hmap_destroy (&s->areas[at]);
5130 ctables_table_clear (struct ctables_table *t)
5132 for (size_t i = 0; i < t->n_sections; i++)
5133 ctables_section_clear (&t->sections[i]);
5135 if (t->clabels_example)
5137 int width = var_get_width (t->clabels_example);
5138 struct ctables_value *value, *next_value;
5139 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5140 &t->clabels_values_map)
5142 value_destroy (&value->value, width);
5143 hmap_delete (&t->clabels_values_map, &value->node);
5146 hmap_shrink (&t->clabels_values_map);
5148 free (t->clabels_values);
5149 t->clabels_values = NULL;
5150 t->n_clabels_values = 0;
5155 ctables_execute (struct dataset *ds, struct casereader *input,
5158 for (size_t i = 0; i < ct->n_tables; i++)
5160 struct ctables_table *t = ct->tables[i];
5161 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5162 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5163 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5164 sizeof *t->sections);
5165 size_t ix[PIVOT_N_AXES];
5166 ctables_table_add_section (t, 0, ix);
5169 struct dictionary *dict = dataset_dict (ds);
5171 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5172 struct casegrouper *grouper
5174 ? casegrouper_create_splits (input, dict)
5175 : casegrouper_create_vars (input, NULL, 0));
5176 struct casereader *group;
5177 while (casegrouper_get_next_group (grouper, &group))
5181 struct ccase *c = casereader_peek (group, 0);
5184 output_split_file_values (ds, c);
5189 bool warn_on_invalid = true;
5190 for (struct ccase *c = casereader_read (group); c;
5191 case_unref (c), c = casereader_read (group))
5193 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5194 double e_weight = (ct->e_weight
5195 ? var_force_valid_weight (ct->e_weight,
5196 case_num (c, ct->e_weight),
5200 [CTW_DICTIONARY] = d_weight,
5201 [CTW_EFFECTIVE] = e_weight,
5202 [CTW_UNWEIGHTED] = 1.0,
5205 for (size_t i = 0; i < ct->n_tables; i++)
5207 struct ctables_table *t = ct->tables[i];
5209 for (size_t j = 0; j < t->n_sections; j++)
5210 ctables_cell_insert (&t->sections[j], c, weight);
5212 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5213 if (t->label_axis[a] != a)
5214 ctables_insert_clabels_values (t, c, a);
5217 casereader_destroy (group);
5219 for (size_t i = 0; i < ct->n_tables; i++)
5221 struct ctables_table *t = ct->tables[i];
5223 if (t->clabels_example)
5224 ctables_sort_clabels_values (t);
5226 for (size_t j = 0; j < t->n_sections; j++)
5227 ctables_section_add_empty_categories (&t->sections[j]);
5229 ctables_table_output (ct, t);
5230 ctables_table_clear (t);
5233 return casegrouper_destroy (grouper);
5238 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5239 struct dictionary *);
5242 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5248 case CTPO_CAT_STRING:
5249 ss_dealloc (&e->string);
5252 case CTPO_CAT_SRANGE:
5253 for (size_t i = 0; i < 2; i++)
5254 ss_dealloc (&e->srange[i]);
5263 for (size_t i = 0; i < 2; i++)
5264 ctables_pcexpr_destroy (e->subs[i]);
5268 case CTPO_CAT_NUMBER:
5269 case CTPO_CAT_NRANGE:
5270 case CTPO_CAT_MISSING:
5271 case CTPO_CAT_OTHERNM:
5272 case CTPO_CAT_SUBTOTAL:
5273 case CTPO_CAT_TOTAL:
5277 msg_location_destroy (e->location);
5282 static struct ctables_pcexpr *
5283 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5284 struct ctables_pcexpr *sub0,
5285 struct ctables_pcexpr *sub1)
5287 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5288 *e = (struct ctables_pcexpr) {
5290 .subs = { sub0, sub1 },
5291 .location = msg_location_merged (sub0->location, sub1->location),
5296 /* How to parse an operator. */
5299 enum token_type token;
5300 enum ctables_postcompute_op op;
5303 static const struct operator *
5304 ctables_pcexpr_match_operator (struct lexer *lexer,
5305 const struct operator ops[], size_t n_ops)
5307 for (const struct operator *op = ops; op < ops + n_ops; op++)
5308 if (lex_token (lexer) == op->token)
5310 if (op->token != T_NEG_NUM)
5319 static struct ctables_pcexpr *
5320 ctables_pcexpr_parse_binary_operators__ (
5321 struct lexer *lexer, struct dictionary *dict,
5322 const struct operator ops[], size_t n_ops,
5323 parse_recursively_func *parse_next_level,
5324 const char *chain_warning, struct ctables_pcexpr *lhs)
5326 for (int op_count = 0; ; op_count++)
5328 const struct operator *op
5329 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5332 if (op_count > 1 && chain_warning)
5333 msg_at (SW, lhs->location, "%s", chain_warning);
5338 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5341 ctables_pcexpr_destroy (lhs);
5345 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5349 static struct ctables_pcexpr *
5350 ctables_pcexpr_parse_binary_operators (
5351 struct lexer *lexer, struct dictionary *dict,
5352 const struct operator ops[], size_t n_ops,
5353 parse_recursively_func *parse_next_level, const char *chain_warning)
5355 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5359 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5361 chain_warning, lhs);
5364 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5365 struct dictionary *);
5367 static struct ctables_pcexpr
5368 ctpo_cat_nrange (double low, double high)
5370 return (struct ctables_pcexpr) {
5371 .op = CTPO_CAT_NRANGE,
5372 .nrange = { low, high },
5376 static struct ctables_pcexpr
5377 ctpo_cat_srange (struct substring low, struct substring high)
5379 return (struct ctables_pcexpr) {
5380 .op = CTPO_CAT_SRANGE,
5381 .srange = { low, high },
5385 static struct ctables_pcexpr *
5386 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5388 int start_ofs = lex_ofs (lexer);
5389 struct ctables_pcexpr e;
5390 if (lex_is_number (lexer))
5392 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5393 .number = lex_number (lexer) };
5396 else if (lex_match_id (lexer, "MISSING"))
5397 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5398 else if (lex_match_id (lexer, "OTHERNM"))
5399 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5400 else if (lex_match_id (lexer, "TOTAL"))
5401 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5402 else if (lex_match_id (lexer, "SUBTOTAL"))
5404 size_t subtotal_index = 0;
5405 if (lex_match (lexer, T_LBRACK))
5407 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5409 subtotal_index = lex_integer (lexer);
5411 if (!lex_force_match (lexer, T_RBRACK))
5414 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5415 .subtotal_index = subtotal_index };
5417 else if (lex_match (lexer, T_LBRACK))
5419 if (lex_match_id (lexer, "LO"))
5421 if (!lex_force_match_id (lexer, "THRU"))
5424 if (lex_is_string (lexer))
5426 struct substring low = { .string = NULL };
5427 struct substring high = parse_substring (lexer, dict);
5428 e = ctpo_cat_srange (low, high);
5432 if (!lex_force_num (lexer))
5434 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5438 else if (lex_is_number (lexer))
5440 double number = lex_number (lexer);
5442 if (lex_match_id (lexer, "THRU"))
5444 if (lex_match_id (lexer, "HI"))
5445 e = ctpo_cat_nrange (number, DBL_MAX);
5448 if (!lex_force_num (lexer))
5450 e = ctpo_cat_nrange (number, lex_number (lexer));
5455 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5458 else if (lex_is_string (lexer))
5460 struct substring s = parse_substring (lexer, dict);
5462 if (lex_match_id (lexer, "THRU"))
5464 struct substring high;
5466 if (lex_match_id (lexer, "HI"))
5467 high = (struct substring) { .string = NULL };
5470 if (!lex_force_string (lexer))
5475 high = parse_substring (lexer, dict);
5478 e = ctpo_cat_srange (s, high);
5481 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5485 lex_error (lexer, NULL);
5489 if (!lex_force_match (lexer, T_RBRACK))
5491 if (e.op == CTPO_CAT_STRING)
5492 ss_dealloc (&e.string);
5493 else if (e.op == CTPO_CAT_SRANGE)
5495 ss_dealloc (&e.srange[0]);
5496 ss_dealloc (&e.srange[1]);
5501 else if (lex_match (lexer, T_LPAREN))
5503 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5506 if (!lex_force_match (lexer, T_RPAREN))
5508 ctables_pcexpr_destroy (ep);
5515 lex_error (lexer, NULL);
5519 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5520 return xmemdup (&e, sizeof e);
5523 static struct ctables_pcexpr *
5524 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5525 struct lexer *lexer, int start_ofs)
5527 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5528 *e = (struct ctables_pcexpr) {
5531 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5536 static struct ctables_pcexpr *
5537 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5539 static const struct operator op = { T_EXP, CTPO_POW };
5541 const char *chain_warning =
5542 _("The exponentiation operator (`**') is left-associative: "
5543 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5544 "To disable this warning, insert parentheses.");
5546 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5547 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5548 ctables_pcexpr_parse_primary,
5551 /* Special case for situations like "-5**6", which must be parsed as
5554 int start_ofs = lex_ofs (lexer);
5555 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5556 *lhs = (struct ctables_pcexpr) {
5557 .op = CTPO_CONSTANT,
5558 .number = -lex_tokval (lexer),
5559 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5563 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5564 lexer, dict, &op, 1,
5565 ctables_pcexpr_parse_primary, chain_warning, lhs);
5569 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5572 /* Parses the unary minus level. */
5573 static struct ctables_pcexpr *
5574 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5576 int start_ofs = lex_ofs (lexer);
5577 if (!lex_match (lexer, T_DASH))
5578 return ctables_pcexpr_parse_exp (lexer, dict);
5580 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5584 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5587 /* Parses the multiplication and division level. */
5588 static struct ctables_pcexpr *
5589 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5591 static const struct operator ops[] =
5593 { T_ASTERISK, CTPO_MUL },
5594 { T_SLASH, CTPO_DIV },
5597 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5598 sizeof ops / sizeof *ops,
5599 ctables_pcexpr_parse_neg, NULL);
5602 /* Parses the addition and subtraction level. */
5603 static struct ctables_pcexpr *
5604 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5606 static const struct operator ops[] =
5608 { T_PLUS, CTPO_ADD },
5609 { T_DASH, CTPO_SUB },
5610 { T_NEG_NUM, CTPO_ADD },
5613 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5614 ops, sizeof ops / sizeof *ops,
5615 ctables_pcexpr_parse_mul, NULL);
5618 static struct ctables_postcompute *
5619 ctables_find_postcompute (struct ctables *ct, const char *name)
5621 struct ctables_postcompute *pc;
5622 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5623 utf8_hash_case_string (name, 0), &ct->postcomputes)
5624 if (!utf8_strcasecmp (pc->name, name))
5630 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5633 int pcompute_start = lex_ofs (lexer) - 1;
5635 if (!lex_match (lexer, T_AND))
5637 lex_error_expecting (lexer, "&");
5640 if (!lex_force_id (lexer))
5643 char *name = ss_xstrdup (lex_tokss (lexer));
5646 if (!lex_force_match (lexer, T_EQUALS)
5647 || !lex_force_match_id (lexer, "EXPR")
5648 || !lex_force_match (lexer, T_LPAREN))
5654 int expr_start = lex_ofs (lexer);
5655 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5656 int expr_end = lex_ofs (lexer) - 1;
5657 if (!expr || !lex_force_match (lexer, T_RPAREN))
5659 ctables_pcexpr_destroy (expr);
5663 int pcompute_end = lex_ofs (lexer) - 1;
5665 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5668 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5671 msg_at (SW, location, _("New definition of &%s will override the "
5672 "previous definition."),
5674 msg_at (SN, pc->location, _("This is the previous definition."));
5676 ctables_pcexpr_destroy (pc->expr);
5677 msg_location_destroy (pc->location);
5682 pc = xmalloc (sizeof *pc);
5683 *pc = (struct ctables_postcompute) { .name = name };
5684 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5685 utf8_hash_case_string (pc->name, 0));
5688 pc->location = location;
5690 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5695 ctables_parse_pproperties_format (struct lexer *lexer,
5696 struct ctables_summary_spec_set *sss)
5698 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5700 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5701 && !(lex_token (lexer) == T_ID
5702 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5703 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5704 lex_tokss (lexer)))))
5706 /* Parse function. */
5707 enum ctables_summary_function function;
5708 enum ctables_weighting weighting;
5709 enum ctables_area_type area;
5710 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5713 /* Parse percentile. */
5714 double percentile = 0;
5715 if (function == CTSF_PTILE)
5717 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5719 percentile = lex_number (lexer);
5724 struct fmt_spec format;
5725 bool is_ctables_format;
5726 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5729 if (sss->n >= sss->allocated)
5730 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5731 sizeof *sss->specs);
5732 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5733 .function = function,
5734 .weighting = weighting,
5737 .percentile = percentile,
5739 .is_ctables_format = is_ctables_format,
5745 ctables_summary_spec_set_uninit (sss);
5750 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5752 struct ctables_postcompute **pcs = NULL;
5754 size_t allocated_pcs = 0;
5756 while (lex_match (lexer, T_AND))
5758 if (!lex_force_id (lexer))
5760 struct ctables_postcompute *pc
5761 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5764 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5769 if (n_pcs >= allocated_pcs)
5770 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5774 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5776 if (lex_match_id (lexer, "LABEL"))
5778 lex_match (lexer, T_EQUALS);
5779 if (!lex_force_string (lexer))
5782 for (size_t i = 0; i < n_pcs; i++)
5784 free (pcs[i]->label);
5785 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5790 else if (lex_match_id (lexer, "FORMAT"))
5792 lex_match (lexer, T_EQUALS);
5794 struct ctables_summary_spec_set sss;
5795 if (!ctables_parse_pproperties_format (lexer, &sss))
5798 for (size_t i = 0; i < n_pcs; i++)
5801 ctables_summary_spec_set_uninit (pcs[i]->specs);
5803 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5804 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5806 ctables_summary_spec_set_uninit (&sss);
5808 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5810 lex_match (lexer, T_EQUALS);
5811 bool hide_source_cats;
5812 if (!parse_bool (lexer, &hide_source_cats))
5814 for (size_t i = 0; i < n_pcs; i++)
5815 pcs[i]->hide_source_cats = hide_source_cats;
5819 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5832 put_strftime (struct string *out, time_t now, const char *format)
5834 const struct tm *tm = localtime (&now);
5836 strftime (value, sizeof value, format, tm);
5837 ds_put_cstr (out, value);
5841 skip_prefix (struct substring *s, struct substring prefix)
5843 if (ss_starts_with (*s, prefix))
5845 ss_advance (s, prefix.length);
5853 put_table_expression (struct string *out, struct lexer *lexer,
5854 struct dictionary *dict, int expr_start, int expr_end)
5857 for (int ofs = expr_start; ofs < expr_end; ofs++)
5859 const struct token *t = lex_ofs_token (lexer, ofs);
5860 if (t->type == T_LBRACK)
5862 else if (t->type == T_RBRACK && nest > 0)
5868 else if (t->type == T_ID)
5870 const struct variable *var
5871 = dict_lookup_var (dict, t->string.string);
5872 const char *label = var ? var_get_label (var) : NULL;
5873 ds_put_cstr (out, label ? label : t->string.string);
5877 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5878 ds_put_byte (out, ' ');
5880 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5881 ds_put_cstr (out, repr);
5884 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5885 ds_put_byte (out, ' ');
5891 put_title_text (struct string *out, struct substring in, time_t now,
5892 struct lexer *lexer, struct dictionary *dict,
5893 int expr_start, int expr_end)
5897 size_t chunk = ss_find_byte (in, ')');
5898 ds_put_substring (out, ss_head (in, chunk));
5899 ss_advance (&in, chunk);
5900 if (ss_is_empty (in))
5903 if (skip_prefix (&in, ss_cstr (")DATE")))
5904 put_strftime (out, now, "%x");
5905 else if (skip_prefix (&in, ss_cstr (")TIME")))
5906 put_strftime (out, now, "%X");
5907 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5908 put_table_expression (out, lexer, dict, expr_start, expr_end);
5911 ds_put_byte (out, ')');
5912 ss_advance (&in, 1);
5918 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5920 struct casereader *input = NULL;
5922 struct measure_guesser *mg = measure_guesser_create (ds);
5925 input = proc_open (ds);
5926 measure_guesser_run (mg, input);
5927 measure_guesser_destroy (mg);
5930 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5931 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5932 enum settings_value_show tvars = settings_get_show_variables ();
5933 for (size_t i = 0; i < n_vars; i++)
5934 vlabels[i] = (enum ctables_vlabel) tvars;
5936 struct pivot_table_look *look = pivot_table_look_unshare (
5937 pivot_table_look_ref (pivot_table_look_get_default ()));
5938 look->omit_empty = false;
5940 struct ctables *ct = xmalloc (sizeof *ct);
5941 *ct = (struct ctables) {
5942 .dict = dataset_dict (ds),
5944 .ctables_formats = FMT_SETTINGS_INIT,
5946 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5949 time_t now = time (NULL);
5954 const char *dot_string;
5955 const char *comma_string;
5957 static const struct ctf ctfs[4] = {
5958 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5959 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5960 { CTEF_PAREN, "-,(,),", "-.(.)." },
5961 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5963 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5964 for (size_t i = 0; i < 4; i++)
5966 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5967 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5968 fmt_number_style_from_string (s));
5971 if (!lex_force_match (lexer, T_SLASH))
5974 while (!lex_match_id (lexer, "TABLE"))
5976 if (lex_match_id (lexer, "FORMAT"))
5978 double widths[2] = { SYSMIS, SYSMIS };
5979 double units_per_inch = 72.0;
5981 while (lex_token (lexer) != T_SLASH)
5983 if (lex_match_id (lexer, "MINCOLWIDTH"))
5985 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5988 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5990 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5993 else if (lex_match_id (lexer, "UNITS"))
5995 lex_match (lexer, T_EQUALS);
5996 if (lex_match_id (lexer, "POINTS"))
5997 units_per_inch = 72.0;
5998 else if (lex_match_id (lexer, "INCHES"))
5999 units_per_inch = 1.0;
6000 else if (lex_match_id (lexer, "CM"))
6001 units_per_inch = 2.54;
6004 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6008 else if (lex_match_id (lexer, "EMPTY"))
6013 lex_match (lexer, T_EQUALS);
6014 if (lex_match_id (lexer, "ZERO"))
6016 /* Nothing to do. */
6018 else if (lex_match_id (lexer, "BLANK"))
6019 ct->zero = xstrdup ("");
6020 else if (lex_force_string (lexer))
6022 ct->zero = ss_xstrdup (lex_tokss (lexer));
6028 else if (lex_match_id (lexer, "MISSING"))
6030 lex_match (lexer, T_EQUALS);
6031 if (!lex_force_string (lexer))
6035 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6036 ? ss_xstrdup (lex_tokss (lexer))
6042 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6043 "UNITS", "EMPTY", "MISSING");
6048 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6049 && widths[0] > widths[1])
6051 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6055 for (size_t i = 0; i < 2; i++)
6056 if (widths[i] != SYSMIS)
6058 int *wr = ct->look->width_ranges[TABLE_HORZ];
6059 wr[i] = widths[i] / units_per_inch * 96.0;
6064 else if (lex_match_id (lexer, "VLABELS"))
6066 if (!lex_force_match_id (lexer, "VARIABLES"))
6068 lex_match (lexer, T_EQUALS);
6070 struct variable **vars;
6072 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6076 if (!lex_force_match_id (lexer, "DISPLAY"))
6081 lex_match (lexer, T_EQUALS);
6083 enum ctables_vlabel vlabel;
6084 if (lex_match_id (lexer, "DEFAULT"))
6085 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6086 else if (lex_match_id (lexer, "NAME"))
6088 else if (lex_match_id (lexer, "LABEL"))
6089 vlabel = CTVL_LABEL;
6090 else if (lex_match_id (lexer, "BOTH"))
6092 else if (lex_match_id (lexer, "NONE"))
6096 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6102 for (size_t i = 0; i < n_vars; i++)
6103 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6106 else if (lex_match_id (lexer, "MRSETS"))
6108 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6110 lex_match (lexer, T_EQUALS);
6111 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6114 else if (lex_match_id (lexer, "SMISSING"))
6116 if (lex_match_id (lexer, "VARIABLE"))
6117 ct->smissing_listwise = false;
6118 else if (lex_match_id (lexer, "LISTWISE"))
6119 ct->smissing_listwise = true;
6122 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6126 else if (lex_match_id (lexer, "PCOMPUTE"))
6128 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6131 else if (lex_match_id (lexer, "PPROPERTIES"))
6133 if (!ctables_parse_pproperties (lexer, ct))
6136 else if (lex_match_id (lexer, "WEIGHT"))
6138 if (!lex_force_match_id (lexer, "VARIABLE"))
6140 lex_match (lexer, T_EQUALS);
6141 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6145 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6147 if (lex_match_id (lexer, "COUNT"))
6149 lex_match (lexer, T_EQUALS);
6150 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6153 ct->hide_threshold = lex_integer (lexer);
6156 else if (ct->hide_threshold == 0)
6157 ct->hide_threshold = 5;
6161 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6162 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6163 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6167 if (!lex_force_match (lexer, T_SLASH))
6171 size_t allocated_tables = 0;
6174 if (ct->n_tables >= allocated_tables)
6175 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6176 sizeof *ct->tables);
6178 struct ctables_category *cat = xmalloc (sizeof *cat);
6179 *cat = (struct ctables_category) {
6181 .include_missing = false,
6182 .sort_ascending = true,
6185 struct ctables_categories *c = xmalloc (sizeof *c);
6186 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6187 *c = (struct ctables_categories) {
6194 struct ctables_categories **categories = xnmalloc (n_vars,
6195 sizeof *categories);
6196 for (size_t i = 0; i < n_vars; i++)
6199 struct ctables_table *t = xmalloc (sizeof *t);
6200 *t = (struct ctables_table) {
6202 .slabels_axis = PIVOT_AXIS_COLUMN,
6203 .slabels_visible = true,
6204 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6206 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6207 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6208 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6210 .clabels_from_axis = PIVOT_AXIS_LAYER,
6211 .clabels_to_axis = PIVOT_AXIS_LAYER,
6212 .categories = categories,
6213 .n_categories = n_vars,
6216 ct->tables[ct->n_tables++] = t;
6218 lex_match (lexer, T_EQUALS);
6219 int expr_start = lex_ofs (lexer);
6220 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6222 if (lex_match (lexer, T_BY))
6224 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6225 ct, t, PIVOT_AXIS_COLUMN))
6228 if (lex_match (lexer, T_BY))
6230 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6231 ct, t, PIVOT_AXIS_LAYER))
6235 int expr_end = lex_ofs (lexer);
6237 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6238 && !t->axes[PIVOT_AXIS_LAYER])
6240 lex_error (lexer, _("At least one variable must be specified."));
6244 const struct ctables_axis *scales[PIVOT_N_AXES];
6245 size_t n_scales = 0;
6246 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6248 scales[a] = find_scale (t->axes[a]);
6254 msg (SE, _("Scale variables may appear only on one axis."));
6255 if (scales[PIVOT_AXIS_ROW])
6256 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6257 _("This scale variable appears on the rows axis."));
6258 if (scales[PIVOT_AXIS_COLUMN])
6259 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6260 _("This scale variable appears on the columns axis."));
6261 if (scales[PIVOT_AXIS_LAYER])
6262 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6263 _("This scale variable appears on the layer axis."));
6267 const struct ctables_axis *summaries[PIVOT_N_AXES];
6268 size_t n_summaries = 0;
6269 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6271 summaries[a] = (scales[a]
6273 : find_categorical_summary_spec (t->axes[a]));
6277 if (n_summaries > 1)
6279 msg (SE, _("Summaries may appear only on one axis."));
6280 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6283 msg_at (SN, summaries[a]->loc,
6285 ? _("This variable on the rows axis has a summary.")
6286 : a == PIVOT_AXIS_COLUMN
6287 ? _("This variable on the columns axis has a summary.")
6288 : _("This variable on the layers axis has a summary."));
6290 msg_at (SN, summaries[a]->loc,
6291 _("This is a scale variable, so it always has a "
6292 "summary even if the syntax does not explicitly "
6297 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6298 if (n_summaries ? summaries[a] : t->axes[a])
6300 t->summary_axis = a;
6304 if (lex_token (lexer) == T_ENDCMD)
6306 if (!ctables_prepare_table (t))
6310 if (!lex_force_match (lexer, T_SLASH))
6313 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6315 if (lex_match_id (lexer, "SLABELS"))
6317 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6319 if (lex_match_id (lexer, "POSITION"))
6321 lex_match (lexer, T_EQUALS);
6322 if (lex_match_id (lexer, "COLUMN"))
6323 t->slabels_axis = PIVOT_AXIS_COLUMN;
6324 else if (lex_match_id (lexer, "ROW"))
6325 t->slabels_axis = PIVOT_AXIS_ROW;
6326 else if (lex_match_id (lexer, "LAYER"))
6327 t->slabels_axis = PIVOT_AXIS_LAYER;
6330 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6334 else if (lex_match_id (lexer, "VISIBLE"))
6336 lex_match (lexer, T_EQUALS);
6337 if (!parse_bool (lexer, &t->slabels_visible))
6342 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6347 else if (lex_match_id (lexer, "CLABELS"))
6349 if (lex_match_id (lexer, "AUTO"))
6351 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6352 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6354 else if (lex_match_id (lexer, "ROWLABELS"))
6356 lex_match (lexer, T_EQUALS);
6357 if (lex_match_id (lexer, "OPPOSITE"))
6358 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6359 else if (lex_match_id (lexer, "LAYER"))
6360 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6363 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6367 else if (lex_match_id (lexer, "COLLABELS"))
6369 lex_match (lexer, T_EQUALS);
6370 if (lex_match_id (lexer, "OPPOSITE"))
6371 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6372 else if (lex_match_id (lexer, "LAYER"))
6373 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6376 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6382 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6387 else if (lex_match_id (lexer, "CRITERIA"))
6389 if (!lex_force_match_id (lexer, "CILEVEL"))
6391 lex_match (lexer, T_EQUALS);
6393 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6395 t->cilevel = lex_number (lexer);
6398 else if (lex_match_id (lexer, "CATEGORIES"))
6400 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6404 else if (lex_match_id (lexer, "TITLES"))
6409 if (lex_match_id (lexer, "CAPTION"))
6410 textp = &t->caption;
6411 else if (lex_match_id (lexer, "CORNER"))
6413 else if (lex_match_id (lexer, "TITLE"))
6417 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6420 lex_match (lexer, T_EQUALS);
6422 struct string s = DS_EMPTY_INITIALIZER;
6423 while (lex_is_string (lexer))
6425 if (!ds_is_empty (&s))
6426 ds_put_byte (&s, ' ');
6427 put_title_text (&s, lex_tokss (lexer), now,
6428 lexer, dataset_dict (ds),
6429 expr_start, expr_end);
6433 *textp = ds_steal_cstr (&s);
6435 while (lex_token (lexer) != T_SLASH
6436 && lex_token (lexer) != T_ENDCMD);
6438 else if (lex_match_id (lexer, "SIGTEST"))
6440 int start_ofs = lex_ofs (lexer) - 1;
6443 t->chisq = xmalloc (sizeof *t->chisq);
6444 *t->chisq = (struct ctables_chisq) {
6446 .include_mrsets = true,
6447 .all_visible = true,
6453 if (lex_match_id (lexer, "TYPE"))
6455 lex_match (lexer, T_EQUALS);
6456 if (!lex_force_match_id (lexer, "CHISQUARE"))
6459 else if (lex_match_id (lexer, "ALPHA"))
6461 lex_match (lexer, T_EQUALS);
6462 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6464 t->chisq->alpha = lex_number (lexer);
6467 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6469 lex_match (lexer, T_EQUALS);
6470 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6473 else if (lex_match_id (lexer, "CATEGORIES"))
6475 lex_match (lexer, T_EQUALS);
6476 if (lex_match_id (lexer, "ALLVISIBLE"))
6477 t->chisq->all_visible = true;
6478 else if (lex_match_id (lexer, "SUBTOTALS"))
6479 t->chisq->all_visible = false;
6482 lex_error_expecting (lexer,
6483 "ALLVISIBLE", "SUBTOTALS");
6489 lex_error_expecting (lexer, "TYPE", "ALPHA",
6490 "INCLUDEMRSETS", "CATEGORIES");
6494 while (lex_token (lexer) != T_SLASH
6495 && lex_token (lexer) != T_ENDCMD);
6497 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6498 _("Support for SIGTEST not yet implemented."));
6501 else if (lex_match_id (lexer, "COMPARETEST"))
6503 int start_ofs = lex_ofs (lexer);
6506 t->pairwise = xmalloc (sizeof *t->pairwise);
6507 *t->pairwise = (struct ctables_pairwise) {
6509 .alpha = { .05, .05 },
6510 .adjust = BONFERRONI,
6511 .include_mrsets = true,
6512 .meansvariance_allcats = true,
6513 .all_visible = true,
6522 if (lex_match_id (lexer, "TYPE"))
6524 lex_match (lexer, T_EQUALS);
6525 if (lex_match_id (lexer, "PROP"))
6526 t->pairwise->type = PROP;
6527 else if (lex_match_id (lexer, "MEAN"))
6528 t->pairwise->type = MEAN;
6531 lex_error_expecting (lexer, "PROP", "MEAN");
6535 else if (lex_match_id (lexer, "ALPHA"))
6537 lex_match (lexer, T_EQUALS);
6539 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6541 double a0 = lex_number (lexer);
6544 lex_match (lexer, T_COMMA);
6545 if (lex_is_number (lexer))
6547 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6549 double a1 = lex_number (lexer);
6552 t->pairwise->alpha[0] = MIN (a0, a1);
6553 t->pairwise->alpha[1] = MAX (a0, a1);
6556 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6558 else if (lex_match_id (lexer, "ADJUST"))
6560 lex_match (lexer, T_EQUALS);
6561 if (lex_match_id (lexer, "BONFERRONI"))
6562 t->pairwise->adjust = BONFERRONI;
6563 else if (lex_match_id (lexer, "BH"))
6564 t->pairwise->adjust = BH;
6565 else if (lex_match_id (lexer, "NONE"))
6566 t->pairwise->adjust = 0;
6569 lex_error_expecting (lexer, "BONFERRONI", "BH",
6574 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6576 lex_match (lexer, T_EQUALS);
6577 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6580 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6582 lex_match (lexer, T_EQUALS);
6583 if (lex_match_id (lexer, "ALLCATS"))
6584 t->pairwise->meansvariance_allcats = true;
6585 else if (lex_match_id (lexer, "TESTEDCATS"))
6586 t->pairwise->meansvariance_allcats = false;
6589 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6593 else if (lex_match_id (lexer, "CATEGORIES"))
6595 lex_match (lexer, T_EQUALS);
6596 if (lex_match_id (lexer, "ALLVISIBLE"))
6597 t->pairwise->all_visible = true;
6598 else if (lex_match_id (lexer, "SUBTOTALS"))
6599 t->pairwise->all_visible = false;
6602 lex_error_expecting (lexer, "ALLVISIBLE",
6607 else if (lex_match_id (lexer, "MERGE"))
6609 lex_match (lexer, T_EQUALS);
6610 if (!parse_bool (lexer, &t->pairwise->merge))
6613 else if (lex_match_id (lexer, "STYLE"))
6615 lex_match (lexer, T_EQUALS);
6616 if (lex_match_id (lexer, "APA"))
6617 t->pairwise->apa_style = true;
6618 else if (lex_match_id (lexer, "SIMPLE"))
6619 t->pairwise->apa_style = false;
6622 lex_error_expecting (lexer, "APA", "SIMPLE");
6626 else if (lex_match_id (lexer, "SHOWSIG"))
6628 lex_match (lexer, T_EQUALS);
6629 if (!parse_bool (lexer, &t->pairwise->show_sig))
6634 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6635 "INCLUDEMRSETS", "MEANSVARIANCE",
6636 "CATEGORIES", "MERGE", "STYLE",
6641 while (lex_token (lexer) != T_SLASH
6642 && lex_token (lexer) != T_ENDCMD);
6644 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6645 _("Support for COMPARETEST not yet implemented."));
6650 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6651 "CRITERIA", "CATEGORIES", "TITLES",
6652 "SIGTEST", "COMPARETEST");
6656 if (!lex_match (lexer, T_SLASH))
6660 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6662 t->clabels_from_axis = PIVOT_AXIS_ROW;
6663 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6665 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6669 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6670 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6671 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6673 if (!ctables_prepare_table (t))
6676 while (lex_token (lexer) != T_ENDCMD);
6679 input = proc_open (ds);
6680 bool ok = ctables_execute (ds, input, ct);
6681 ok = proc_commit (ds) && ok;
6683 ctables_destroy (ct);
6684 return ok ? CMD_SUCCESS : CMD_FAILURE;
6689 ctables_destroy (ct);