1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_function_type
68 /* A function that operates on data in a single cell. The function does
69 not have an unweighted version. */
72 /* A function that operates on data in a single cell. The function has an
73 unweighted version. */
76 /* A function that operates on an area of cells. The function has an
77 unweighted version. */
88 enum ctables_function_availability
90 CTFA_ALL, /* Any variables. */
91 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
92 //CTFA_MRSETS, /* Only multiple-response sets */
95 enum ctables_summary_function
97 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
98 #include "ctables.inc"
103 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
105 #include "ctables.inc"
109 struct ctables_function_info
111 struct substring basename;
112 enum ctables_function_type type;
113 enum ctables_format format;
114 enum ctables_function_availability availability;
116 bool may_be_unweighted;
119 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
120 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
122 .basename = SS_LITERAL_INITIALIZER (NAME), \
125 .availability = AVAILABILITY, \
126 .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA, \
127 .is_area = (TYPE) == CTFT_AREA \
129 #include "ctables.inc"
133 static bool ctables_summary_function_is_count (enum ctables_summary_function);
135 enum ctables_area_type
137 /* Within a section, where stacked variables divide one section from
140 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
141 parse_ctables_summary_function() parses correctly. */
142 CTAT_TABLE, /* All layers of a whole section. */
143 CTAT_LAYERROW, /* Row in one layer within a section. */
144 CTAT_LAYERCOL, /* Column in one layer within a section. */
145 CTAT_LAYER, /* One layer within a section. */
147 /* Within a subtable, where a subtable pairs an innermost row variable with
148 an innermost column variable within a single layer. */
149 CTAT_SUBTABLE, /* Whole subtable. */
150 CTAT_ROW, /* Row within a subtable. */
151 CTAT_COL, /* Column within a subtable. */
155 static const char *ctables_area_type_name[N_CTATS] = {
156 [CTAT_TABLE] = "TABLE",
157 [CTAT_LAYER] = "LAYER",
158 [CTAT_LAYERROW] = "LAYERROW",
159 [CTAT_LAYERCOL] = "LAYERCOL",
160 [CTAT_SUBTABLE] = "SUBTABLE",
167 struct hmap_node node;
169 const struct ctables_cell *example;
172 double d_valid; /* Dictionary weight. */
175 double e_valid; /* Effective weight */
178 double u_valid; /* Unweighted. */
181 struct ctables_sum *sums;
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The areas that contain this cell. */
205 struct ctables_area *areas[N_CTATS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
231 const struct dictionary *dict;
232 struct pivot_table_look *look;
234 /* CTABLES has a number of extra formats that we implement via custom
235 currency specifications on an alternate fmt_settings. */
236 #define CTEF_NEGPAREN FMT_CCA
237 #define CTEF_NEQUAL FMT_CCB
238 #define CTEF_PAREN FMT_CCC
239 #define CTEF_PCTPAREN FMT_CCD
240 struct fmt_settings ctables_formats;
242 /* If this is NULL, zeros are displayed using the normal print format.
243 Otherwise, this string is displayed. */
246 /* If this is NULL, missing values are displayed using the normal print
247 format. Otherwise, this string is displayed. */
250 /* Indexed by variable dictionary index. */
251 enum ctables_vlabel *vlabels;
253 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
255 bool mrsets_count_duplicates; /* MRSETS. */
256 bool smissing_listwise; /* SMISSING. */
257 struct variable *e_weight; /* WEIGHT. */
258 int hide_threshold; /* HIDESMALLCOUNTS. */
260 struct ctables_table **tables;
264 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
267 struct ctables_postcompute
269 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
270 char *name; /* Name, without leading &. */
272 struct msg_location *location; /* Location of definition. */
273 struct ctables_pcexpr *expr;
275 struct ctables_summary_spec_set *specs;
276 bool hide_source_cats;
279 struct ctables_pcexpr
289 enum ctables_postcompute_op
292 CTPO_CONSTANT, /* 5 */
293 CTPO_CAT_NUMBER, /* [5] */
294 CTPO_CAT_STRING, /* ["STRING"] */
295 CTPO_CAT_NRANGE, /* [LO THRU 5] */
296 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
297 CTPO_CAT_MISSING, /* MISSING */
298 CTPO_CAT_OTHERNM, /* OTHERNM */
299 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
300 CTPO_CAT_TOTAL, /* TOTAL */
314 /* CTPO_CAT_NUMBER. */
317 /* CTPO_CAT_STRING, in dictionary encoding. */
318 struct substring string;
320 /* CTPO_CAT_NRANGE. */
323 /* CTPO_CAT_SRANGE. */
324 struct substring srange[2];
326 /* CTPO_CAT_SUBTOTAL. */
327 size_t subtotal_index;
329 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
330 One element: CTPO_NEG. */
331 struct ctables_pcexpr *subs[2];
334 /* Source location. */
335 struct msg_location *location;
338 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
339 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
340 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
341 struct ctables_pcexpr *sub1);
343 struct ctables_summary_spec_set
345 struct ctables_summary_spec *specs;
349 /* The variable to which the summary specs are applied. */
350 struct variable *var;
352 /* Whether the variable to which the summary specs are applied is a scale
353 variable for the purpose of summarization.
355 (VALIDN and TOTALN act differently for summarizing scale and categorical
359 /* If any of these optional additional scale variables are missing, then
360 treat 'var' as if it's missing too. This is for implementing
361 SMISSING=LISTWISE. */
362 struct variable **listwise_vars;
363 size_t n_listwise_vars;
366 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
367 const struct ctables_summary_spec_set *);
368 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
370 /* A nested sequence of variables, e.g. a > b > c. */
373 struct variable **vars;
376 size_t *areas[N_CTATS];
377 size_t n_areas[N_CTATS];
380 struct ctables_summary_spec_set specs[N_CSVS];
383 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
386 struct ctables_nest *nests;
390 static void ctables_stack_uninit (struct ctables_stack *);
394 struct hmap_node node;
399 struct ctables_occurrence
401 struct hmap_node node;
405 struct ctables_section
408 struct ctables_table *table;
409 struct ctables_nest *nests[PIVOT_N_AXES];
412 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
413 struct hmap cells; /* Contains "struct ctables_cell"s. */
414 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
417 static void ctables_section_uninit (struct ctables_section *);
421 struct ctables *ctables;
422 struct ctables_axis *axes[PIVOT_N_AXES];
423 struct ctables_stack stacks[PIVOT_N_AXES];
424 struct ctables_section *sections;
426 enum pivot_axis_type summary_axis;
427 struct ctables_summary_spec_set summary_specs;
428 struct variable **sum_vars;
431 enum pivot_axis_type slabels_axis;
432 bool slabels_visible;
434 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
436 Most commonly, label_axis[a] == a, and in particular we always have
437 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
439 If ROWLABELS or COLLABELS is specified, then one of
440 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
441 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
443 If any category labels are moved, then 'clabels_example' is one of the
444 variables being moved (and it is otherwise NULL). All of the variables
445 being moved have the same width, value labels, and categories, so this
446 example variable can be used to find those out.
448 The remaining members in this group are relevant only if category labels
451 'clabels_values_map' holds a "struct ctables_value" for all the values
452 that appear in all of the variables in the moved categories. It is
453 accumulated as the data is read. Once the data is fully read, its
454 sorted values are put into 'clabels_values' and 'n_clabels_values'.
456 enum pivot_axis_type label_axis[PIVOT_N_AXES];
457 enum pivot_axis_type clabels_from_axis;
458 const struct variable *clabels_example;
459 struct hmap clabels_values_map;
460 struct ctables_value **clabels_values;
461 size_t n_clabels_values;
463 /* Indexed by variable dictionary index. */
464 struct ctables_categories **categories;
473 struct ctables_chisq *chisq;
474 struct ctables_pairwise *pairwise;
477 struct ctables_categories
480 struct ctables_category *cats;
485 struct ctables_category
487 enum ctables_category_type
489 /* Explicit category lists. */
492 CCT_NRANGE, /* Numerical range. */
493 CCT_SRANGE, /* String range. */
498 /* Totals and subtotals. */
502 /* Implicit category lists. */
507 /* For contributing to TOTALN. */
508 CCT_EXCLUDED_MISSING,
512 struct ctables_category *subtotal;
518 double number; /* CCT_NUMBER. */
519 struct substring string; /* CCT_STRING, in dictionary encoding. */
520 double nrange[2]; /* CCT_NRANGE. */
521 struct substring srange[2]; /* CCT_SRANGE. */
525 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
526 bool hide_subcategories; /* CCT_SUBTOTAL. */
529 /* CCT_POSTCOMPUTE. */
532 const struct ctables_postcompute *pc;
533 enum fmt_type parse_format;
536 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
539 bool include_missing;
543 enum ctables_summary_function sort_function;
545 enum ctables_area_type area;
546 struct variable *sort_var;
551 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
552 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
553 struct msg_location *location;
557 ctables_category_uninit (struct ctables_category *cat)
562 msg_location_destroy (cat->location);
569 case CCT_POSTCOMPUTE:
573 ss_dealloc (&cat->string);
577 ss_dealloc (&cat->srange[0]);
578 ss_dealloc (&cat->srange[1]);
583 free (cat->total_label);
591 case CCT_EXCLUDED_MISSING:
597 nullable_substring_equal (const struct substring *a,
598 const struct substring *b)
600 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
604 ctables_category_equal (const struct ctables_category *a,
605 const struct ctables_category *b)
607 if (a->type != b->type)
613 return a->number == b->number;
616 return ss_equals (a->string, b->string);
619 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
622 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
623 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
629 case CCT_POSTCOMPUTE:
630 return a->pc == b->pc;
634 return !strcmp (a->total_label, b->total_label);
639 return (a->include_missing == b->include_missing
640 && a->sort_ascending == b->sort_ascending
641 && a->sort_function == b->sort_function
642 && a->sort_var == b->sort_var
643 && a->percentile == b->percentile);
645 case CCT_EXCLUDED_MISSING:
653 ctables_categories_unref (struct ctables_categories *c)
658 assert (c->n_refs > 0);
662 for (size_t i = 0; i < c->n_cats; i++)
663 ctables_category_uninit (&c->cats[i]);
669 ctables_categories_equal (const struct ctables_categories *a,
670 const struct ctables_categories *b)
672 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
675 for (size_t i = 0; i < a->n_cats; i++)
676 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
682 /* Chi-square test (SIGTEST). */
690 /* Pairwise comparison test (COMPARETEST). */
691 struct ctables_pairwise
693 enum { PROP, MEAN } type;
696 bool meansvariance_allcats;
698 enum { BONFERRONI = 1, BH } adjust;
722 struct variable *var;
724 struct ctables_summary_spec_set specs[N_CSVS];
728 struct ctables_axis *subs[2];
731 struct msg_location *loc;
734 static void ctables_axis_destroy (struct ctables_axis *);
736 struct ctables_summary_spec
738 enum ctables_summary_function function;
740 enum ctables_area_type area;
741 double percentile; /* CTSF_PTILE only. */
744 struct fmt_spec format;
745 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
752 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
753 const struct ctables_summary_spec *src)
756 dst->label = xstrdup_if_nonnull (src->label);
760 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
767 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
768 const struct ctables_summary_spec_set *src)
770 struct ctables_summary_spec *specs
771 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
772 for (size_t i = 0; i < src->n; i++)
773 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
775 *dst = (struct ctables_summary_spec_set) {
780 .is_scale = src->is_scale,
785 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
787 for (size_t i = 0; i < set->n; i++)
788 ctables_summary_spec_uninit (&set->specs[i]);
789 free (set->listwise_vars);
794 parse_col_width (struct lexer *lexer, const char *name, double *width)
796 lex_match (lexer, T_EQUALS);
797 if (lex_match_id (lexer, "DEFAULT"))
799 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
801 *width = lex_number (lexer);
811 parse_bool (struct lexer *lexer, bool *b)
813 if (lex_match_id (lexer, "NO"))
815 else if (lex_match_id (lexer, "YES"))
819 lex_error_expecting (lexer, "YES", "NO");
825 static enum ctables_function_availability
826 ctables_function_availability (enum ctables_summary_function f)
828 static enum ctables_function_availability availability[] = {
829 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
830 #include "ctables.inc"
834 return availability[f];
838 ctables_summary_function_is_count (enum ctables_summary_function f)
840 return f == CTSF_COUNT || f == CTSF_ECOUNT;
844 parse_ctables_summary_function (struct lexer *lexer,
845 enum ctables_summary_function *function,
847 enum ctables_area_type *area)
849 if (!lex_force_id (lexer))
852 struct substring name = lex_tokss (lexer);
853 *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'));
855 bool has_area = false;
857 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
858 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
863 if (ss_equals_case (name, ss_cstr ("PCT")))
865 /* Special case where .COUNT suffix is omitted. */
866 *function = CTSF_areaPCT_COUNT;
873 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
875 const struct ctables_function_info *cfi = &ctables_function_info[f];
876 if (ss_equals_case (cfi->basename, name))
879 if (!*weighted && !cfi->may_be_unweighted)
881 if (has_area != cfi->is_area)
889 lex_error (lexer, _("Expecting summary function name."));
894 ctables_axis_destroy (struct ctables_axis *axis)
902 for (size_t i = 0; i < N_CSVS; i++)
903 ctables_summary_spec_set_uninit (&axis->specs[i]);
908 ctables_axis_destroy (axis->subs[0]);
909 ctables_axis_destroy (axis->subs[1]);
912 msg_location_destroy (axis->loc);
916 static struct ctables_axis *
917 ctables_axis_new_nonterminal (enum ctables_axis_op op,
918 struct ctables_axis *sub0,
919 struct ctables_axis *sub1,
920 struct lexer *lexer, int start_ofs)
922 struct ctables_axis *axis = xmalloc (sizeof *axis);
923 *axis = (struct ctables_axis) {
925 .subs = { sub0, sub1 },
926 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
931 struct ctables_axis_parse_ctx
934 struct dictionary *dict;
936 struct ctables_table *t;
939 static struct fmt_spec
940 ctables_summary_default_format (enum ctables_summary_function function,
941 const struct variable *var)
943 static const enum ctables_format default_formats[] = {
944 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
945 #include "ctables.inc"
948 switch (default_formats[function])
951 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
954 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
957 return *var_get_print_format (var);
965 ctables_summary_label__ (const struct ctables_summary_spec *spec)
967 bool w = spec->weighted;
968 enum ctables_area_type a = spec->area;
969 switch (spec->function)
972 return w ? N_("Count") : N_("Unweighted Count");
975 return N_("Adjusted Count");
977 case CTSF_areaPCT_COUNT:
980 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
981 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
982 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
983 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
984 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
985 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
986 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
990 case CTSF_areaPCT_VALIDN:
993 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
994 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
995 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
996 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
997 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
998 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
999 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1003 case CTSF_areaPCT_TOTALN:
1006 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1007 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1008 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1009 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1010 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1011 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1012 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1016 case CTSF_MAXIMUM: return N_("Maximum");
1017 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1018 case CTSF_MEDIAN: return N_("Median");
1019 case CTSF_MINIMUM: return N_("Minimum");
1020 case CTSF_MISSING: return N_("Missing");
1021 case CTSF_MODE: return N_("Mode");
1022 case CTSF_PTILE: NOT_REACHED ();
1023 case CTSF_RANGE: return N_("Range");
1024 case CTSF_SEMEAN: return N_("Std Error of Mean");
1025 case CTSF_STDDEV: return N_("Std Deviation");
1026 case CTSF_SUM: return N_("Sum");
1027 case CTSF_TOTALN: return N_("Total N");
1028 case CTSF_ETOTALN: return N_("Adjusted Total N");
1029 case CTSF_VALIDN: return N_("Valid N");
1030 case CTSF_EVALIDN: return N_("Adjusted Valid N");
1031 case CTSF_VARIANCE: return N_("Variance");
1032 case CTSF_areaPCT_SUM:
1035 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1036 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1037 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1038 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1039 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1040 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1041 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1048 /* Don't bother translating these: they are for developers only. */
1049 case CTAT_TABLE: return "Table ID";
1050 case CTAT_LAYER: return "Layer ID";
1051 case CTAT_LAYERROW: return "Layer Row ID";
1052 case CTAT_LAYERCOL: return "Layer Column ID";
1053 case CTAT_SUBTABLE: return "Subtable ID";
1054 case CTAT_ROW: return "Row ID";
1055 case CTAT_COL: return "Column ID";
1063 static struct pivot_value *
1064 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1068 if (spec->function == CTSF_PTILE)
1070 double p = spec->percentile;
1071 char *s = (spec->weighted
1072 ? xasprintf (_("Percentile %.2f"), p)
1073 : xasprintf (_("Unweighted Percentile %.2f"), p));
1074 return pivot_value_new_user_text_nocopy (s);
1077 return pivot_value_new_text (ctables_summary_label__ (spec));
1081 struct substring in = ss_cstr (spec->label);
1082 struct substring target = ss_cstr (")CILEVEL");
1084 struct string out = DS_EMPTY_INITIALIZER;
1087 size_t chunk = ss_find_substring (in, target);
1088 ds_put_substring (&out, ss_head (in, chunk));
1089 ss_advance (&in, chunk);
1091 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1093 ss_advance (&in, target.length);
1094 ds_put_format (&out, "%g", cilevel);
1100 ctables_summary_function_name (enum ctables_summary_function function,
1102 enum ctables_area_type area,
1103 char *buffer, size_t bufsize)
1105 const struct ctables_function_info *cfi = &ctables_function_info[function];
1106 snprintf (buffer, bufsize, "%s%s%s",
1107 weighted ? "" : "U",
1108 cfi->is_area ? ctables_area_type_name[area] : "",
1109 cfi->basename.string);
1114 add_summary_spec (struct ctables_axis *axis,
1115 enum ctables_summary_function function, bool weighted,
1116 enum ctables_area_type area, double percentile,
1117 const char *label, const struct fmt_spec *format,
1118 bool is_ctables_format, const struct msg_location *loc,
1119 enum ctables_summary_variant sv)
1121 if (axis->op == CTAO_VAR)
1123 char function_name[128];
1124 ctables_summary_function_name (function, weighted, area,
1125 function_name, sizeof function_name);
1126 const char *var_name = var_get_name (axis->var);
1127 switch (ctables_function_availability (function))
1131 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1132 "response sets."), function_name);
1133 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1139 if (!axis->scale && sv != CSV_TOTAL)
1142 _("Summary function %s applies only to scale variables."),
1144 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1154 struct ctables_summary_spec_set *set = &axis->specs[sv];
1155 if (set->n >= set->allocated)
1156 set->specs = x2nrealloc (set->specs, &set->allocated,
1157 sizeof *set->specs);
1159 struct ctables_summary_spec *dst = &set->specs[set->n++];
1160 *dst = (struct ctables_summary_spec) {
1161 .function = function,
1162 .weighted = weighted,
1164 .percentile = percentile,
1165 .label = xstrdup_if_nonnull (label),
1166 .format = (format ? *format
1167 : ctables_summary_default_format (function, axis->var)),
1168 .is_ctables_format = is_ctables_format,
1174 for (size_t i = 0; i < 2; i++)
1175 if (!add_summary_spec (axis->subs[i], function, weighted, area,
1176 percentile, label, format, is_ctables_format,
1183 static struct ctables_axis *ctables_axis_parse_stack (
1184 struct ctables_axis_parse_ctx *);
1187 static struct ctables_axis *
1188 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1190 if (lex_match (ctx->lexer, T_LPAREN))
1192 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1193 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1195 ctables_axis_destroy (sub);
1201 if (!lex_force_id (ctx->lexer))
1204 int start_ofs = lex_ofs (ctx->lexer);
1205 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1209 struct ctables_axis *axis = xmalloc (sizeof *axis);
1210 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1212 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1213 : lex_match_phrase (ctx->lexer, "[C]") ? false
1214 : var_get_measure (var) == MEASURE_SCALE);
1215 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1216 lex_ofs (ctx->lexer) - 1);
1217 if (axis->scale && var_is_alpha (var))
1219 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1221 var_get_name (var));
1222 ctables_axis_destroy (axis);
1230 has_digit (const char *s)
1232 return s[strcspn (s, "0123456789")] != '\0';
1236 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1237 bool *is_ctables_format)
1239 char type[FMT_TYPE_LEN_MAX + 1];
1240 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1243 if (!strcasecmp (type, "NEGPAREN"))
1244 format->type = CTEF_NEGPAREN;
1245 else if (!strcasecmp (type, "NEQUAL"))
1246 format->type = CTEF_NEQUAL;
1247 else if (!strcasecmp (type, "PAREN"))
1248 format->type = CTEF_PAREN;
1249 else if (!strcasecmp (type, "PCTPAREN"))
1250 format->type = CTEF_PCTPAREN;
1253 *is_ctables_format = false;
1254 return (parse_format_specifier (lexer, format)
1255 && fmt_check_output (format)
1256 && fmt_check_type_compat (format, VAL_NUMERIC));
1262 lex_next_error (lexer, -1, -1,
1263 _("Output format %s requires width 2 or greater."), type);
1266 else if (format->d > format->w - 1)
1268 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1269 "greater than decimals."), type);
1274 *is_ctables_format = true;
1279 static struct ctables_axis *
1280 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1282 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1283 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1286 enum ctables_summary_variant sv = CSV_CELL;
1289 int start_ofs = lex_ofs (ctx->lexer);
1291 /* Parse function. */
1292 enum ctables_summary_function function;
1294 enum ctables_area_type area;
1295 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted,
1299 /* Parse percentile. */
1300 double percentile = 0;
1301 if (function == CTSF_PTILE)
1303 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1305 percentile = lex_number (ctx->lexer);
1306 lex_get (ctx->lexer);
1311 if (lex_is_string (ctx->lexer))
1313 label = ss_xstrdup (lex_tokss (ctx->lexer));
1314 lex_get (ctx->lexer);
1318 struct fmt_spec format;
1319 const struct fmt_spec *formatp;
1320 bool is_ctables_format = false;
1321 if (lex_token (ctx->lexer) == T_ID
1322 && has_digit (lex_tokcstr (ctx->lexer)))
1324 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1325 &is_ctables_format))
1335 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1336 lex_ofs (ctx->lexer) - 1);
1337 add_summary_spec (sub, function, weighted, area, percentile, label,
1338 formatp, is_ctables_format, loc, sv);
1340 msg_location_destroy (loc);
1342 lex_match (ctx->lexer, T_COMMA);
1343 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1345 if (!lex_force_match (ctx->lexer, T_LBRACK))
1349 else if (lex_match (ctx->lexer, T_RBRACK))
1351 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1358 ctables_axis_destroy (sub);
1362 static const struct ctables_axis *
1363 find_scale (const struct ctables_axis *axis)
1367 else if (axis->op == CTAO_VAR)
1368 return axis->scale ? axis : NULL;
1371 for (size_t i = 0; i < 2; i++)
1373 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1381 static const struct ctables_axis *
1382 find_categorical_summary_spec (const struct ctables_axis *axis)
1386 else if (axis->op == CTAO_VAR)
1387 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1390 for (size_t i = 0; i < 2; i++)
1392 const struct ctables_axis *sum
1393 = find_categorical_summary_spec (axis->subs[i]);
1401 static struct ctables_axis *
1402 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1404 int start_ofs = lex_ofs (ctx->lexer);
1405 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1409 while (lex_match (ctx->lexer, T_GT))
1411 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1414 ctables_axis_destroy (lhs);
1418 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1419 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1421 const struct ctables_axis *outer_scale = find_scale (lhs);
1422 const struct ctables_axis *inner_scale = find_scale (rhs);
1423 if (outer_scale && inner_scale)
1425 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1426 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1427 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1428 ctables_axis_destroy (nest);
1432 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1435 msg_at (SE, nest->loc,
1436 _("Summaries may only be requested for categorical variables "
1437 "at the innermost nesting level."));
1438 msg_at (SN, outer_sum->loc,
1439 _("This outer categorical variable has a summary."));
1440 ctables_axis_destroy (nest);
1450 static struct ctables_axis *
1451 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1453 int start_ofs = lex_ofs (ctx->lexer);
1454 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1458 while (lex_match (ctx->lexer, T_PLUS))
1460 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1463 ctables_axis_destroy (lhs);
1467 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1468 ctx->lexer, start_ofs);
1475 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1476 struct ctables *ct, struct ctables_table *t,
1477 enum pivot_axis_type a)
1479 if (lex_token (lexer) == T_BY
1480 || lex_token (lexer) == T_SLASH
1481 || lex_token (lexer) == T_ENDCMD)
1484 struct ctables_axis_parse_ctx ctx = {
1490 t->axes[a] = ctables_axis_parse_stack (&ctx);
1491 return t->axes[a] != NULL;
1495 ctables_chisq_destroy (struct ctables_chisq *chisq)
1501 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1507 ctables_table_destroy (struct ctables_table *t)
1512 for (size_t i = 0; i < t->n_sections; i++)
1513 ctables_section_uninit (&t->sections[i]);
1516 for (size_t i = 0; i < t->n_categories; i++)
1517 ctables_categories_unref (t->categories[i]);
1518 free (t->categories);
1520 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1522 ctables_axis_destroy (t->axes[a]);
1523 ctables_stack_uninit (&t->stacks[a]);
1525 free (t->summary_specs.specs);
1527 struct ctables_value *ctv, *next_ctv;
1528 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1529 &t->clabels_values_map)
1531 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1532 hmap_delete (&t->clabels_values_map, &ctv->node);
1535 hmap_destroy (&t->clabels_values_map);
1536 free (t->clabels_values);
1542 ctables_chisq_destroy (t->chisq);
1543 ctables_pairwise_destroy (t->pairwise);
1548 ctables_destroy (struct ctables *ct)
1553 struct ctables_postcompute *pc, *next_pc;
1554 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1558 msg_location_destroy (pc->location);
1559 ctables_pcexpr_destroy (pc->expr);
1563 ctables_summary_spec_set_uninit (pc->specs);
1566 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1569 hmap_destroy (&ct->postcomputes);
1571 fmt_settings_uninit (&ct->ctables_formats);
1572 pivot_table_look_unref (ct->look);
1576 for (size_t i = 0; i < ct->n_tables; i++)
1577 ctables_table_destroy (ct->tables[i]);
1582 static struct ctables_category
1583 cct_nrange (double low, double high)
1585 return (struct ctables_category) {
1587 .nrange = { low, high }
1591 static struct ctables_category
1592 cct_srange (struct substring low, struct substring high)
1594 return (struct ctables_category) {
1596 .srange = { low, high }
1601 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1602 struct ctables_category *cat)
1605 if (lex_match (lexer, T_EQUALS))
1607 if (!lex_force_string (lexer))
1610 total_label = ss_xstrdup (lex_tokss (lexer));
1614 total_label = xstrdup (_("Subtotal"));
1616 *cat = (struct ctables_category) {
1617 .type = CCT_SUBTOTAL,
1618 .hide_subcategories = hide_subcategories,
1619 .total_label = total_label
1624 static struct substring
1625 parse_substring (struct lexer *lexer, struct dictionary *dict)
1627 struct substring s = recode_substring_pool (
1628 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1629 ss_rtrim (&s, ss_cstr (" "));
1635 ctables_table_parse_explicit_category (struct lexer *lexer,
1636 struct dictionary *dict,
1638 struct ctables_category *cat)
1640 if (lex_match_id (lexer, "OTHERNM"))
1641 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1642 else if (lex_match_id (lexer, "MISSING"))
1643 *cat = (struct ctables_category) { .type = CCT_MISSING };
1644 else if (lex_match_id (lexer, "SUBTOTAL"))
1645 return ctables_table_parse_subtotal (lexer, false, cat);
1646 else if (lex_match_id (lexer, "HSUBTOTAL"))
1647 return ctables_table_parse_subtotal (lexer, true, cat);
1648 else if (lex_match_id (lexer, "LO"))
1650 if (!lex_force_match_id (lexer, "THRU"))
1652 if (lex_is_string (lexer))
1654 struct substring sr0 = { .string = NULL };
1655 struct substring sr1 = parse_substring (lexer, dict);
1656 *cat = cct_srange (sr0, sr1);
1658 else if (lex_force_num (lexer))
1660 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1666 else if (lex_is_number (lexer))
1668 double number = lex_number (lexer);
1670 if (lex_match_id (lexer, "THRU"))
1672 if (lex_match_id (lexer, "HI"))
1673 *cat = cct_nrange (number, DBL_MAX);
1676 if (!lex_force_num (lexer))
1678 *cat = cct_nrange (number, lex_number (lexer));
1683 *cat = (struct ctables_category) {
1688 else if (lex_is_string (lexer))
1690 struct substring s = parse_substring (lexer, dict);
1691 if (lex_match_id (lexer, "THRU"))
1693 if (lex_match_id (lexer, "HI"))
1695 struct substring sr1 = { .string = NULL };
1696 *cat = cct_srange (s, sr1);
1700 if (!lex_force_string (lexer))
1705 struct substring sr1 = parse_substring (lexer, dict);
1706 *cat = cct_srange (s, sr1);
1710 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1712 else if (lex_match (lexer, T_AND))
1714 if (!lex_force_id (lexer))
1716 struct ctables_postcompute *pc = ctables_find_postcompute (
1717 ct, lex_tokcstr (lexer));
1720 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1721 msg_at (SE, loc, _("Unknown postcompute &%s."),
1722 lex_tokcstr (lexer));
1723 msg_location_destroy (loc);
1728 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1732 lex_error (lexer, NULL);
1740 parse_category_string (struct msg_location *location,
1741 struct substring s, const struct dictionary *dict,
1742 enum fmt_type format, double *n)
1745 char *error = data_in (s, dict_get_encoding (dict), format,
1746 settings_get_fmt_settings (), &v, 0, NULL);
1749 msg_at (SE, location,
1750 _("Failed to parse category specification as format %s: %s."),
1751 fmt_name (format), error);
1760 static struct ctables_category *
1761 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1762 const struct ctables_pcexpr *e)
1764 struct ctables_category *best = NULL;
1765 size_t n_subtotals = 0;
1766 for (size_t i = 0; i < cats->n_cats; i++)
1768 struct ctables_category *cat = &cats->cats[i];
1771 case CTPO_CAT_NUMBER:
1772 if (cat->type == CCT_NUMBER && cat->number == e->number)
1776 case CTPO_CAT_STRING:
1777 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1781 case CTPO_CAT_NRANGE:
1782 if (cat->type == CCT_NRANGE
1783 && cat->nrange[0] == e->nrange[0]
1784 && cat->nrange[1] == e->nrange[1])
1788 case CTPO_CAT_SRANGE:
1789 if (cat->type == CCT_SRANGE
1790 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1791 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1795 case CTPO_CAT_MISSING:
1796 if (cat->type == CCT_MISSING)
1800 case CTPO_CAT_OTHERNM:
1801 if (cat->type == CCT_OTHERNM)
1805 case CTPO_CAT_SUBTOTAL:
1806 if (cat->type == CCT_SUBTOTAL)
1809 if (e->subtotal_index == n_subtotals)
1811 else if (e->subtotal_index == 0)
1816 case CTPO_CAT_TOTAL:
1817 if (cat->type == CCT_TOTAL)
1831 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1836 static struct ctables_category *
1837 ctables_find_category_for_postcompute (const struct dictionary *dict,
1838 const struct ctables_categories *cats,
1839 enum fmt_type parse_format,
1840 const struct ctables_pcexpr *e)
1842 if (parse_format != FMT_F)
1844 if (e->op == CTPO_CAT_STRING)
1847 if (!parse_category_string (e->location, e->string, dict,
1848 parse_format, &number))
1851 struct ctables_pcexpr e2 = {
1852 .op = CTPO_CAT_NUMBER,
1854 .location = e->location,
1856 return ctables_find_category_for_postcompute__ (cats, &e2);
1858 else if (e->op == CTPO_CAT_SRANGE)
1861 if (!e->srange[0].string)
1862 nrange[0] = -DBL_MAX;
1863 else if (!parse_category_string (e->location, e->srange[0], dict,
1864 parse_format, &nrange[0]))
1867 if (!e->srange[1].string)
1868 nrange[1] = DBL_MAX;
1869 else if (!parse_category_string (e->location, e->srange[1], dict,
1870 parse_format, &nrange[1]))
1873 struct ctables_pcexpr e2 = {
1874 .op = CTPO_CAT_NRANGE,
1875 .nrange = { nrange[0], nrange[1] },
1876 .location = e->location,
1878 return ctables_find_category_for_postcompute__ (cats, &e2);
1881 return ctables_find_category_for_postcompute__ (cats, e);
1885 ctables_recursive_check_postcompute (struct dictionary *dict,
1886 const struct ctables_pcexpr *e,
1887 struct ctables_category *pc_cat,
1888 const struct ctables_categories *cats,
1889 const struct msg_location *cats_location)
1893 case CTPO_CAT_NUMBER:
1894 case CTPO_CAT_STRING:
1895 case CTPO_CAT_NRANGE:
1896 case CTPO_CAT_SRANGE:
1897 case CTPO_CAT_MISSING:
1898 case CTPO_CAT_OTHERNM:
1899 case CTPO_CAT_SUBTOTAL:
1900 case CTPO_CAT_TOTAL:
1902 struct ctables_category *cat = ctables_find_category_for_postcompute (
1903 dict, cats, pc_cat->parse_format, e);
1906 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1908 size_t n_subtotals = 0;
1909 for (size_t i = 0; i < cats->n_cats; i++)
1910 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1911 if (n_subtotals > 1)
1913 msg_at (SE, cats_location,
1914 ngettext ("These categories include %zu instance "
1915 "of SUBTOTAL or HSUBTOTAL, so references "
1916 "from computed categories must refer to "
1917 "subtotals by position, "
1918 "e.g. SUBTOTAL[1].",
1919 "These categories include %zu instances "
1920 "of SUBTOTAL or HSUBTOTAL, so references "
1921 "from computed categories must refer to "
1922 "subtotals by position, "
1923 "e.g. SUBTOTAL[1].",
1926 msg_at (SN, e->location,
1927 _("This is the reference that lacks a position."));
1932 msg_at (SE, pc_cat->location,
1933 _("Computed category &%s references a category not included "
1934 "in the category list."),
1936 msg_at (SN, e->location, _("This is the missing category."));
1937 if (e->op == CTPO_CAT_SUBTOTAL)
1938 msg_at (SN, cats_location,
1939 _("To fix the problem, add subtotals to the "
1940 "list of categories here."));
1941 else if (e->op == CTPO_CAT_TOTAL)
1942 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1943 "CATEGORIES specification."));
1945 msg_at (SN, cats_location,
1946 _("To fix the problem, add the missing category to the "
1947 "list of categories here."));
1950 if (pc_cat->pc->hide_source_cats)
1964 for (size_t i = 0; i < 2; i++)
1965 if (e->subs[i] && !ctables_recursive_check_postcompute (
1966 dict, e->subs[i], pc_cat, cats, cats_location))
1975 all_strings (struct variable **vars, size_t n_vars,
1976 const struct ctables_category *cat)
1978 for (size_t j = 0; j < n_vars; j++)
1979 if (var_is_numeric (vars[j]))
1981 msg_at (SE, cat->location,
1982 _("This category specification may be applied only to string "
1983 "variables, but this subcommand tries to apply it to "
1984 "numeric variable %s."),
1985 var_get_name (vars[j]));
1992 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1993 struct ctables *ct, struct ctables_table *t)
1995 if (!lex_match_id (lexer, "VARIABLES"))
1997 lex_match (lexer, T_EQUALS);
1999 struct variable **vars;
2001 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2004 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2005 for (size_t i = 1; i < n_vars; i++)
2007 const struct fmt_spec *f = var_get_print_format (vars[i]);
2008 if (f->type != common_format->type)
2010 common_format = NULL;
2016 && (fmt_get_category (common_format->type)
2017 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2019 struct ctables_categories *c = xmalloc (sizeof *c);
2020 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2021 for (size_t i = 0; i < n_vars; i++)
2023 struct ctables_categories **cp
2024 = &t->categories[var_get_dict_index (vars[i])];
2025 ctables_categories_unref (*cp);
2029 size_t allocated_cats = 0;
2030 int cats_start_ofs = -1;
2031 int cats_end_ofs = -1;
2032 if (lex_match (lexer, T_LBRACK))
2034 cats_start_ofs = lex_ofs (lexer);
2037 if (c->n_cats >= allocated_cats)
2038 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2040 int start_ofs = lex_ofs (lexer);
2041 struct ctables_category *cat = &c->cats[c->n_cats];
2042 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2044 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2047 lex_match (lexer, T_COMMA);
2049 while (!lex_match (lexer, T_RBRACK));
2050 cats_end_ofs = lex_ofs (lexer) - 1;
2053 struct ctables_category cat = {
2055 .include_missing = false,
2056 .sort_ascending = true,
2058 bool show_totals = false;
2059 char *total_label = NULL;
2060 bool totals_before = false;
2061 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2063 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2065 lex_match (lexer, T_EQUALS);
2066 if (lex_match_id (lexer, "A"))
2067 cat.sort_ascending = true;
2068 else if (lex_match_id (lexer, "D"))
2069 cat.sort_ascending = false;
2072 lex_error_expecting (lexer, "A", "D");
2076 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2078 lex_match (lexer, T_EQUALS);
2079 if (lex_match_id (lexer, "VALUE"))
2080 cat.type = CCT_VALUE;
2081 else if (lex_match_id (lexer, "LABEL"))
2082 cat.type = CCT_LABEL;
2085 cat.type = CCT_FUNCTION;
2086 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2087 &cat.weighted, &cat.area))
2090 if (lex_match (lexer, T_LPAREN))
2092 cat.sort_var = parse_variable (lexer, dict);
2096 if (cat.sort_function == CTSF_PTILE)
2098 lex_match (lexer, T_COMMA);
2099 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2101 cat.percentile = lex_number (lexer);
2105 if (!lex_force_match (lexer, T_RPAREN))
2108 else if (ctables_function_availability (cat.sort_function)
2111 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2116 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2118 lex_match (lexer, T_EQUALS);
2119 if (lex_match_id (lexer, "INCLUDE"))
2120 cat.include_missing = true;
2121 else if (lex_match_id (lexer, "EXCLUDE"))
2122 cat.include_missing = false;
2125 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2129 else if (lex_match_id (lexer, "TOTAL"))
2131 lex_match (lexer, T_EQUALS);
2132 if (!parse_bool (lexer, &show_totals))
2135 else if (lex_match_id (lexer, "LABEL"))
2137 lex_match (lexer, T_EQUALS);
2138 if (!lex_force_string (lexer))
2141 total_label = ss_xstrdup (lex_tokss (lexer));
2144 else if (lex_match_id (lexer, "POSITION"))
2146 lex_match (lexer, T_EQUALS);
2147 if (lex_match_id (lexer, "BEFORE"))
2148 totals_before = true;
2149 else if (lex_match_id (lexer, "AFTER"))
2150 totals_before = false;
2153 lex_error_expecting (lexer, "BEFORE", "AFTER");
2157 else if (lex_match_id (lexer, "EMPTY"))
2159 lex_match (lexer, T_EQUALS);
2160 if (lex_match_id (lexer, "INCLUDE"))
2161 c->show_empty = true;
2162 else if (lex_match_id (lexer, "EXCLUDE"))
2163 c->show_empty = false;
2166 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2173 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2174 "TOTAL", "LABEL", "POSITION", "EMPTY");
2176 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2183 if (c->n_cats >= allocated_cats)
2184 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2185 c->cats[c->n_cats++] = cat;
2190 if (c->n_cats >= allocated_cats)
2191 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2193 struct ctables_category *totals;
2196 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2197 totals = &c->cats[0];
2200 totals = &c->cats[c->n_cats];
2203 *totals = (struct ctables_category) {
2205 .total_label = total_label ? total_label : xstrdup (_("Total")),
2209 struct ctables_category *subtotal = NULL;
2210 for (size_t i = totals_before ? 0 : c->n_cats;
2211 totals_before ? i < c->n_cats : i-- > 0;
2212 totals_before ? i++ : 0)
2214 struct ctables_category *cat = &c->cats[i];
2223 cat->subtotal = subtotal;
2226 case CCT_POSTCOMPUTE:
2237 case CCT_EXCLUDED_MISSING:
2242 if (cats_start_ofs != -1)
2244 for (size_t i = 0; i < c->n_cats; i++)
2246 struct ctables_category *cat = &c->cats[i];
2249 case CCT_POSTCOMPUTE:
2250 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2251 struct msg_location *cats_location
2252 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2253 bool ok = ctables_recursive_check_postcompute (
2254 dict, cat->pc->expr, cat, c, cats_location);
2255 msg_location_destroy (cats_location);
2262 for (size_t j = 0; j < n_vars; j++)
2263 if (var_is_alpha (vars[j]))
2265 msg_at (SE, cat->location,
2266 _("This category specification may be applied "
2267 "only to numeric variables, but this "
2268 "subcommand tries to apply it to string "
2270 var_get_name (vars[j]));
2279 if (!parse_category_string (cat->location, cat->string, dict,
2280 common_format->type, &n))
2283 ss_dealloc (&cat->string);
2285 cat->type = CCT_NUMBER;
2288 else if (!all_strings (vars, n_vars, cat))
2297 if (!cat->srange[0].string)
2299 else if (!parse_category_string (cat->location,
2300 cat->srange[0], dict,
2301 common_format->type, &n[0]))
2304 if (!cat->srange[1].string)
2306 else if (!parse_category_string (cat->location,
2307 cat->srange[1], dict,
2308 common_format->type, &n[1]))
2311 ss_dealloc (&cat->srange[0]);
2312 ss_dealloc (&cat->srange[1]);
2314 cat->type = CCT_NRANGE;
2315 cat->nrange[0] = n[0];
2316 cat->nrange[1] = n[1];
2318 else if (!all_strings (vars, n_vars, cat))
2329 case CCT_EXCLUDED_MISSING:
2344 ctables_nest_uninit (struct ctables_nest *nest)
2347 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2348 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2349 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2350 free (nest->areas[at]);
2354 ctables_stack_uninit (struct ctables_stack *stack)
2358 for (size_t i = 0; i < stack->n; i++)
2359 ctables_nest_uninit (&stack->nests[i]);
2360 free (stack->nests);
2364 static struct ctables_stack
2365 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2372 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2373 for (size_t i = 0; i < s0.n; i++)
2374 for (size_t j = 0; j < s1.n; j++)
2376 const struct ctables_nest *a = &s0.nests[i];
2377 const struct ctables_nest *b = &s1.nests[j];
2379 size_t allocate = a->n + b->n;
2380 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2382 for (size_t k = 0; k < a->n; k++)
2383 vars[n++] = a->vars[k];
2384 for (size_t k = 0; k < b->n; k++)
2385 vars[n++] = b->vars[k];
2386 assert (n == allocate);
2388 const struct ctables_nest *summary_src;
2389 if (!a->specs[CSV_CELL].var)
2391 else if (!b->specs[CSV_CELL].var)
2396 struct ctables_nest *new = &stack.nests[stack.n++];
2397 *new = (struct ctables_nest) {
2399 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2400 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2404 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2405 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2407 ctables_stack_uninit (&s0);
2408 ctables_stack_uninit (&s1);
2412 static struct ctables_stack
2413 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2415 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2416 for (size_t i = 0; i < s0.n; i++)
2417 stack.nests[stack.n++] = s0.nests[i];
2418 for (size_t i = 0; i < s1.n; i++)
2420 stack.nests[stack.n] = s1.nests[i];
2421 stack.nests[stack.n].group_head += s0.n;
2424 assert (stack.n == s0.n + s1.n);
2430 static struct ctables_stack
2431 var_fts (const struct ctables_axis *a)
2433 struct variable **vars = xmalloc (sizeof *vars);
2436 struct ctables_nest *nest = xmalloc (sizeof *nest);
2437 *nest = (struct ctables_nest) {
2440 .scale_idx = a->scale ? 0 : SIZE_MAX,
2442 if (a->specs[CSV_CELL].n || a->scale)
2443 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2445 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2446 nest->specs[sv].var = a->var;
2447 nest->specs[sv].is_scale = a->scale;
2449 return (struct ctables_stack) { .nests = nest, .n = 1 };
2452 static struct ctables_stack
2453 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2456 return (struct ctables_stack) { .n = 0 };
2464 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2465 enumerate_fts (axis_type, a->subs[1]));
2468 /* This should consider any of the scale variables found in the result to
2469 be linked to each other listwise for SMISSING=LISTWISE. */
2470 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2471 enumerate_fts (axis_type, a->subs[1]));
2477 union ctables_summary
2479 /* COUNT, VALIDN, TOTALN. */
2482 /* MINIMUM, MAXIMUM, RANGE. */
2489 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2490 struct moments1 *moments;
2492 /* MEDIAN, MODE, PTILE. */
2495 struct casewriter *writer;
2500 /* XXX multiple response */
2504 ctables_summary_init (union ctables_summary *s,
2505 const struct ctables_summary_spec *ss)
2507 switch (ss->function)
2511 case CTSF_areaPCT_COUNT:
2512 case CTSF_areaPCT_VALIDN:
2513 case CTSF_areaPCT_TOTALN:
2528 s->min = s->max = SYSMIS;
2536 case CTSF_areaPCT_SUM:
2537 s->moments = moments1_create (MOMENT_VARIANCE);
2544 struct caseproto *proto = caseproto_create ();
2545 proto = caseproto_add_width (proto, 0);
2546 proto = caseproto_add_width (proto, 0);
2548 struct subcase ordering;
2549 subcase_init (&ordering, 0, 0, SC_ASCEND);
2550 s->writer = sort_create_writer (&ordering, proto);
2551 subcase_uninit (&ordering);
2552 caseproto_unref (proto);
2562 ctables_summary_uninit (union ctables_summary *s,
2563 const struct ctables_summary_spec *ss)
2565 switch (ss->function)
2569 case CTSF_areaPCT_COUNT:
2570 case CTSF_areaPCT_VALIDN:
2571 case CTSF_areaPCT_TOTALN:
2592 case CTSF_areaPCT_SUM:
2593 moments1_destroy (s->moments);
2599 casewriter_destroy (s->writer);
2605 ctables_summary_add (union ctables_summary *s,
2606 const struct ctables_summary_spec *ss,
2607 const struct variable *var, const union value *value,
2608 bool is_scale, bool is_scale_missing,
2609 bool is_missing, bool excluded_missing,
2610 double d_weight, double e_weight)
2612 /* To determine whether a case is included in a given table for a particular
2613 kind of summary, consider the following charts for each variable in the
2614 table. Only if "yes" appears for every variable for the summary is the
2617 Categorical variables: VALIDN COUNT TOTALN
2618 Valid values in included categories yes yes yes
2619 Missing values in included categories --- yes yes
2620 Missing values in excluded categories --- --- yes
2621 Valid values in excluded categories --- --- ---
2623 Scale variables: VALIDN COUNT TOTALN
2624 Valid value yes yes yes
2625 Missing value --- yes yes
2627 Missing values include both user- and system-missing. (The system-missing
2628 value is always in an excluded category.)
2630 switch (ss->function)
2633 case CTSF_areaPCT_TOTALN:
2634 s->count += ss->weighted ? d_weight : 1.0;
2638 case CTSF_areaPCT_COUNT:
2639 if (is_scale || !excluded_missing)
2640 s->count += ss->weighted ? d_weight : 1.0;
2644 case CTSF_areaPCT_VALIDN:
2648 s->count += ss->weighted ? d_weight : 1.0;
2658 s->count += ss->weighted ? d_weight : 1.0;
2662 if (is_scale || !excluded_missing)
2663 s->count += e_weight;
2670 s->count += e_weight;
2674 s->count += e_weight;
2680 if (!is_scale_missing)
2682 assert (!var_is_alpha (var)); /* XXX? */
2683 if (s->min == SYSMIS || value->f < s->min)
2685 if (s->max == SYSMIS || value->f > s->max)
2695 case CTSF_areaPCT_SUM:
2696 if (!is_scale_missing)
2697 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2703 if (!is_scale_missing)
2705 double w = ss->weighted ? e_weight : 1.0;
2708 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2709 *case_num_rw_idx (c, 0) = value->f;
2710 *case_num_rw_idx (c, 1) = w;
2711 casewriter_write (s->writer, c);
2718 ctables_summary_value (const struct ctables_cell *cell,
2719 union ctables_summary *s,
2720 const struct ctables_summary_spec *ss)
2722 switch (ss->function)
2729 return cell->areas[ss->area]->sequence;
2731 case CTSF_areaPCT_COUNT:
2733 const struct ctables_area *a = cell->areas[ss->area];
2734 double a_count = ss->weighted ? a->e_count : a->u_count;
2735 return a_count ? s->count / a_count * 100 : SYSMIS;
2738 case CTSF_areaPCT_VALIDN:
2740 const struct ctables_area *a = cell->areas[ss->area];
2741 double a_valid = ss->weighted ? a->e_valid : a->u_valid;
2742 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2745 case CTSF_areaPCT_TOTALN:
2747 const struct ctables_area *a = cell->areas[ss->area];
2748 double a_total = ss->weighted ? a->e_total : a->u_total;
2749 return a_total ? s->count / a_total * 100 : SYSMIS;
2766 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2771 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2777 double weight, variance;
2778 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2779 return calc_semean (variance, weight);
2785 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2786 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2791 double weight, mean;
2792 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2793 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2799 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2803 case CTSF_areaPCT_SUM:
2805 double weight, mean;
2806 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2807 if (weight == SYSMIS || mean == SYSMIS)
2810 const struct ctables_area *a = cell->areas[ss->area];
2811 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2812 double denom = ss->weighted ? sum->e_sum : sum->u_sum;
2813 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2820 struct casereader *reader = casewriter_make_reader (s->writer);
2823 struct percentile *ptile = percentile_create (
2824 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2825 struct order_stats *os = &ptile->parent;
2826 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2827 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2828 statistic_destroy (&ptile->parent.parent);
2835 struct casereader *reader = casewriter_make_reader (s->writer);
2838 struct mode *mode = mode_create ();
2839 struct order_stats *os = &mode->parent;
2840 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2841 s->ovalue = mode->mode;
2842 statistic_destroy (&mode->parent.parent);
2850 struct ctables_cell_sort_aux
2852 const struct ctables_nest *nest;
2853 enum pivot_axis_type a;
2857 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2859 const struct ctables_cell_sort_aux *aux = aux_;
2860 struct ctables_cell *const *ap = a_;
2861 struct ctables_cell *const *bp = b_;
2862 const struct ctables_cell *a = *ap;
2863 const struct ctables_cell *b = *bp;
2865 const struct ctables_nest *nest = aux->nest;
2866 for (size_t i = 0; i < nest->n; i++)
2867 if (i != nest->scale_idx)
2869 const struct variable *var = nest->vars[i];
2870 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2871 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2872 if (a_cv->category != b_cv->category)
2873 return a_cv->category > b_cv->category ? 1 : -1;
2875 const union value *a_val = &a_cv->value;
2876 const union value *b_val = &b_cv->value;
2877 switch (a_cv->category->type)
2883 case CCT_POSTCOMPUTE:
2884 case CCT_EXCLUDED_MISSING:
2885 /* Must be equal. */
2893 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2901 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2903 return a_cv->category->sort_ascending ? cmp : -cmp;
2909 const char *a_label = var_lookup_value_label (var, a_val);
2910 const char *b_label = var_lookup_value_label (var, b_val);
2916 cmp = strcmp (a_label, b_label);
2922 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2925 return a_cv->category->sort_ascending ? cmp : -cmp;
2937 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2938 const void *aux UNUSED)
2940 struct ctables_cell *const *ap = a_;
2941 struct ctables_cell *const *bp = b_;
2942 const struct ctables_cell *a = *ap;
2943 const struct ctables_cell *b = *bp;
2945 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2947 int al = a->axes[axis].leaf;
2948 int bl = b->axes[axis].leaf;
2950 return al > bl ? 1 : -1;
2958 For each ctables_table:
2959 For each combination of row vars:
2960 For each combination of column vars:
2961 For each combination of layer vars:
2963 Make a table of row values:
2964 Sort entries by row values
2965 Assign a 0-based index to each actual value
2966 Construct a dimension
2967 Make a table of column values
2968 Make a table of layer values
2970 Fill the table entry using the indexes from before.
2973 static struct ctables_area *
2974 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
2975 enum ctables_area_type area)
2978 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2980 const struct ctables_nest *nest = s->nests[a];
2981 for (size_t i = 0; i < nest->n_areas[area]; i++)
2983 size_t v_idx = nest->areas[area][i];
2984 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
2985 hash = hash_pointer (cv->category, hash);
2986 if (cv->category->type != CCT_TOTAL
2987 && cv->category->type != CCT_SUBTOTAL
2988 && cv->category->type != CCT_POSTCOMPUTE)
2989 hash = value_hash (&cv->value,
2990 var_get_width (nest->vars[v_idx]), hash);
2994 struct ctables_area *a;
2995 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
2997 const struct ctables_cell *df = a->example;
2998 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3000 const struct ctables_nest *nest = s->nests[a];
3001 for (size_t i = 0; i < nest->n_areas[area]; i++)
3003 size_t v_idx = nest->areas[area][i];
3004 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3005 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3006 if (cv1->category != cv2->category
3007 || (cv1->category->type != CCT_TOTAL
3008 && cv1->category->type != CCT_SUBTOTAL
3009 && cv1->category->type != CCT_POSTCOMPUTE
3010 && !value_equal (&cv1->value, &cv2->value,
3011 var_get_width (nest->vars[v_idx]))))
3020 struct ctables_sum *sums = (s->table->n_sum_vars
3021 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3024 a = xmalloc (sizeof *a);
3025 *a = (struct ctables_area) { .example = cell, .sums = sums };
3026 hmap_insert (&s->areas[area], &a->node, hash);
3030 static struct substring
3031 rtrim_value (const union value *v, const struct variable *var)
3033 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3034 var_get_width (var));
3035 ss_rtrim (&s, ss_cstr (" "));
3040 in_string_range (const union value *v, const struct variable *var,
3041 const struct substring *srange)
3043 struct substring s = rtrim_value (v, var);
3044 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3045 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3048 static const struct ctables_category *
3049 ctables_categories_match (const struct ctables_categories *c,
3050 const union value *v, const struct variable *var)
3052 if (var_is_numeric (var) && v->f == SYSMIS)
3055 const struct ctables_category *othernm = NULL;
3056 for (size_t i = c->n_cats; i-- > 0; )
3058 const struct ctables_category *cat = &c->cats[i];
3062 if (cat->number == v->f)
3067 if (ss_equals (cat->string, rtrim_value (v, var)))
3072 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3073 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3078 if (in_string_range (v, var, cat->srange))
3083 if (var_is_value_missing (var, v))
3087 case CCT_POSTCOMPUTE:
3102 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3105 case CCT_EXCLUDED_MISSING:
3110 return var_is_value_missing (var, v) ? NULL : othernm;
3113 static const struct ctables_category *
3114 ctables_categories_total (const struct ctables_categories *c)
3116 const struct ctables_category *first = &c->cats[0];
3117 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3118 return (first->type == CCT_TOTAL ? first
3119 : last->type == CCT_TOTAL ? last
3123 static struct ctables_cell *
3124 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3125 const struct ctables_category *cats[PIVOT_N_AXES][10])
3128 enum ctables_summary_variant sv = CSV_CELL;
3129 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3131 const struct ctables_nest *nest = s->nests[a];
3132 for (size_t i = 0; i < nest->n; i++)
3133 if (i != nest->scale_idx)
3135 hash = hash_pointer (cats[a][i], hash);
3136 if (cats[a][i]->type != CCT_TOTAL
3137 && cats[a][i]->type != CCT_SUBTOTAL
3138 && cats[a][i]->type != CCT_POSTCOMPUTE)
3139 hash = value_hash (case_data (c, nest->vars[i]),
3140 var_get_width (nest->vars[i]), hash);
3146 struct ctables_cell *cell;
3147 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3149 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3151 const struct ctables_nest *nest = s->nests[a];
3152 for (size_t i = 0; i < nest->n; i++)
3153 if (i != nest->scale_idx
3154 && (cats[a][i] != cell->axes[a].cvs[i].category
3155 || (cats[a][i]->type != CCT_TOTAL
3156 && cats[a][i]->type != CCT_SUBTOTAL
3157 && cats[a][i]->type != CCT_POSTCOMPUTE
3158 && !value_equal (case_data (c, nest->vars[i]),
3159 &cell->axes[a].cvs[i].value,
3160 var_get_width (nest->vars[i])))))
3169 cell = xmalloc (sizeof *cell);
3172 cell->omit_areas = 0;
3173 cell->postcompute = false;
3174 //struct string name = DS_EMPTY_INITIALIZER;
3175 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3177 const struct ctables_nest *nest = s->nests[a];
3178 cell->axes[a].cvs = (nest->n
3179 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3181 for (size_t i = 0; i < nest->n; i++)
3183 const struct ctables_category *cat = cats[a][i];
3184 const struct variable *var = nest->vars[i];
3185 const union value *value = case_data (c, var);
3186 if (i != nest->scale_idx)
3188 const struct ctables_category *subtotal = cat->subtotal;
3189 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3192 if (cat->type == CCT_TOTAL
3193 || cat->type == CCT_SUBTOTAL
3194 || cat->type == CCT_POSTCOMPUTE)
3196 /* XXX these should be more encompassing I think.*/
3200 case PIVOT_AXIS_COLUMN:
3201 cell->omit_areas |= ((1u << CTAT_TABLE) |
3202 (1u << CTAT_LAYER) |
3203 (1u << CTAT_LAYERCOL) |
3204 (1u << CTAT_SUBTABLE) |
3207 case PIVOT_AXIS_ROW:
3208 cell->omit_areas |= ((1u << CTAT_TABLE) |
3209 (1u << CTAT_LAYER) |
3210 (1u << CTAT_LAYERROW) |
3211 (1u << CTAT_SUBTABLE) |
3214 case PIVOT_AXIS_LAYER:
3215 cell->omit_areas |= ((1u << CTAT_TABLE) |
3216 (1u << CTAT_LAYER));
3220 if (cat->type == CCT_POSTCOMPUTE)
3221 cell->postcompute = true;
3224 cell->axes[a].cvs[i].category = cat;
3225 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3228 if (i != nest->scale_idx)
3230 if (!ds_is_empty (&name))
3231 ds_put_cstr (&name, ", ");
3232 char *value_s = data_out (value, var_get_encoding (var),
3233 var_get_print_format (var),
3234 settings_get_fmt_settings ());
3235 if (cat->type == CCT_TOTAL
3236 || cat->type == CCT_SUBTOTAL
3237 || cat->type == CCT_POSTCOMPUTE)
3238 ds_put_format (&name, "%s=total", var_get_name (var));
3240 ds_put_format (&name, "%s=%s", var_get_name (var),
3241 value_s + strspn (value_s, " "));
3247 //cell->name = ds_steal_cstr (&name);
3249 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3250 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3251 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3252 for (size_t i = 0; i < specs->n; i++)
3253 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3254 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3255 cell->areas[at] = ctables_area_insert (s, cell, at);
3256 hmap_insert (&s->cells, &cell->node, hash);
3261 is_scale_missing (const struct ctables_summary_spec_set *specs,
3262 const struct ccase *c)
3264 if (!specs->is_scale)
3267 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3270 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3272 const struct variable *var = specs->listwise_vars[i];
3273 if (var_is_num_missing (var, case_num (c, var)))
3281 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3282 const struct ctables_category *cats[PIVOT_N_AXES][10],
3283 bool is_missing, bool excluded_missing,
3284 double d_weight, double e_weight)
3286 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3287 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3289 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3291 bool scale_missing = is_scale_missing (specs, c);
3292 for (size_t i = 0; i < specs->n; i++)
3293 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3294 specs->var, case_data (c, specs->var), specs->is_scale,
3295 scale_missing, is_missing, excluded_missing,
3296 d_weight, e_weight);
3297 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3298 if (!(cell->omit_areas && (1u << at)))
3300 struct ctables_area *a = cell->areas[at];
3301 a->d_total += d_weight;
3302 a->e_total += e_weight;
3304 if (!excluded_missing)
3306 a->d_count += d_weight;
3307 a->e_count += e_weight;
3312 a->d_valid += d_weight;
3313 a->e_valid += e_weight;
3316 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3318 /* XXX listwise_missing??? */
3319 const struct variable *var = s->table->sum_vars[i];
3320 double addend = case_num (c, var);
3321 if (!var_is_num_missing (var, addend))
3323 struct ctables_sum *sum = &a->sums[i];
3324 sum->e_sum += addend * e_weight;
3325 sum->u_sum += addend;
3333 recurse_totals (struct ctables_section *s, const struct ccase *c,
3334 const struct ctables_category *cats[PIVOT_N_AXES][10],
3335 bool is_missing, bool excluded_missing,
3336 double d_weight, double e_weight,
3337 enum pivot_axis_type start_axis, size_t start_nest)
3339 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3341 const struct ctables_nest *nest = s->nests[a];
3342 for (size_t i = start_nest; i < nest->n; i++)
3344 if (i == nest->scale_idx)
3347 const struct variable *var = nest->vars[i];
3349 const struct ctables_category *total = ctables_categories_total (
3350 s->table->categories[var_get_dict_index (var)]);
3353 const struct ctables_category *save = cats[a][i];
3355 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3356 d_weight, e_weight);
3357 recurse_totals (s, c, cats, is_missing, excluded_missing,
3358 d_weight, e_weight, a, i + 1);
3367 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3368 const struct ctables_category *cats[PIVOT_N_AXES][10],
3369 bool is_missing, bool excluded_missing,
3370 double d_weight, double e_weight,
3371 enum pivot_axis_type start_axis, size_t start_nest)
3373 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3375 const struct ctables_nest *nest = s->nests[a];
3376 for (size_t i = start_nest; i < nest->n; i++)
3378 if (i == nest->scale_idx)
3381 const struct ctables_category *save = cats[a][i];
3384 cats[a][i] = save->subtotal;
3385 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3386 d_weight, e_weight);
3387 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3388 d_weight, e_weight, a, i + 1);
3397 ctables_add_occurrence (const struct variable *var,
3398 const union value *value,
3399 struct hmap *occurrences)
3401 int width = var_get_width (var);
3402 unsigned int hash = value_hash (value, width, 0);
3404 struct ctables_occurrence *o;
3405 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3407 if (value_equal (value, &o->value, width))
3410 o = xmalloc (sizeof *o);
3411 value_clone (&o->value, value, width);
3412 hmap_insert (occurrences, &o->node, hash);
3416 ctables_cell_insert (struct ctables_section *s,
3417 const struct ccase *c,
3418 double d_weight, double e_weight)
3420 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3422 /* Does at least one categorical variable have a missing value in an included
3423 or excluded category? */
3424 bool is_missing = false;
3426 /* Does at least one categorical variable have a missing value in an excluded
3428 bool excluded_missing = false;
3430 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3432 const struct ctables_nest *nest = s->nests[a];
3433 for (size_t i = 0; i < nest->n; i++)
3435 if (i == nest->scale_idx)
3438 const struct variable *var = nest->vars[i];
3439 const union value *value = case_data (c, var);
3441 bool var_missing = var_is_value_missing (var, value) != 0;
3445 cats[a][i] = ctables_categories_match (
3446 s->table->categories[var_get_dict_index (var)], value, var);
3452 static const struct ctables_category cct_excluded_missing = {
3453 .type = CCT_EXCLUDED_MISSING,
3456 cats[a][i] = &cct_excluded_missing;
3457 excluded_missing = true;
3462 if (!excluded_missing)
3463 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3465 const struct ctables_nest *nest = s->nests[a];
3466 for (size_t i = 0; i < nest->n; i++)
3467 if (i != nest->scale_idx)
3469 const struct variable *var = nest->vars[i];
3470 const union value *value = case_data (c, var);
3471 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3475 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3476 d_weight, e_weight);
3478 //if (!excluded_missing)
3480 recurse_totals (s, c, cats, is_missing, excluded_missing,
3481 d_weight, e_weight, 0, 0);
3482 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3483 d_weight, e_weight, 0, 0);
3489 const struct ctables_summary_spec_set *set;
3494 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3496 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3497 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3498 if (as->function != bs->function)
3499 return as->function > bs->function ? 1 : -1;
3500 else if (as->weighted != bs->weighted)
3501 return as->weighted > bs->weighted ? 1 : -1;
3502 else if (as->area != bs->area)
3503 return as->area > bs->area ? 1 : -1;
3504 else if (as->percentile != bs->percentile)
3505 return as->percentile < bs->percentile ? 1 : -1;
3507 const char *as_label = as->label ? as->label : "";
3508 const char *bs_label = bs->label ? bs->label : "";
3509 return strcmp (as_label, bs_label);
3513 ctables_category_format_number (double number, const struct variable *var,
3516 struct pivot_value *pv = pivot_value_new_var_value (
3517 var, &(union value) { .f = number });
3518 pivot_value_format (pv, NULL, s);
3519 pivot_value_destroy (pv);
3523 ctables_category_format_string (struct substring string,
3524 const struct variable *var, struct string *out)
3526 int width = var_get_width (var);
3527 char *s = xmalloc (width);
3528 buf_copy_rpad (s, width, string.string, string.length, ' ');
3529 struct pivot_value *pv = pivot_value_new_var_value (
3530 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3531 pivot_value_format (pv, NULL, out);
3532 pivot_value_destroy (pv);
3537 ctables_category_format_label (const struct ctables_category *cat,
3538 const struct variable *var,
3544 ctables_category_format_number (cat->number, var, s);
3548 ctables_category_format_string (cat->string, var, s);
3552 ctables_category_format_number (cat->nrange[0], var, s);
3553 ds_put_format (s, " THRU ");
3554 ctables_category_format_number (cat->nrange[1], var, s);
3558 ctables_category_format_string (cat->srange[0], var, s);
3559 ds_put_format (s, " THRU ");
3560 ctables_category_format_string (cat->srange[1], var, s);
3564 ds_put_cstr (s, "MISSING");
3568 ds_put_cstr (s, "OTHERNM");
3571 case CCT_POSTCOMPUTE:
3572 ds_put_format (s, "&%s", cat->pc->name);
3577 ds_put_cstr (s, cat->total_label);
3583 case CCT_EXCLUDED_MISSING:
3590 static struct pivot_value *
3591 ctables_postcompute_label (const struct ctables_categories *cats,
3592 const struct ctables_category *cat,
3593 const struct variable *var)
3595 struct substring in = ss_cstr (cat->pc->label);
3596 struct substring target = ss_cstr (")LABEL[");
3598 struct string out = DS_EMPTY_INITIALIZER;
3601 size_t chunk = ss_find_substring (in, target);
3602 if (chunk == SIZE_MAX)
3604 if (ds_is_empty (&out))
3605 return pivot_value_new_user_text (in.string, in.length);
3608 ds_put_substring (&out, in);
3609 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3613 ds_put_substring (&out, ss_head (in, chunk));
3614 ss_advance (&in, chunk + target.length);
3616 struct substring idx_s;
3617 if (!ss_get_until (&in, ']', &idx_s))
3620 long int idx = strtol (idx_s.string, &tail, 10);
3621 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3624 struct ctables_category *cat2 = &cats->cats[idx - 1];
3625 if (!ctables_category_format_label (cat2, var, &out))
3631 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3634 static struct pivot_value *
3635 ctables_category_create_value_label (const struct ctables_categories *cats,
3636 const struct ctables_category *cat,
3637 const struct variable *var,
3638 const union value *value)
3640 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3641 ? ctables_postcompute_label (cats, cat, var)
3642 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3643 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3644 : pivot_value_new_var_value (var, value));
3647 static struct ctables_value *
3648 ctables_value_find__ (struct ctables_table *t, const union value *value,
3649 int width, unsigned int hash)
3651 struct ctables_value *clv;
3652 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3653 hash, &t->clabels_values_map)
3654 if (value_equal (value, &clv->value, width))
3660 ctables_value_insert (struct ctables_table *t, const union value *value,
3663 unsigned int hash = value_hash (value, width, 0);
3664 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3667 clv = xmalloc (sizeof *clv);
3668 value_clone (&clv->value, value, width);
3669 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3673 static struct ctables_value *
3674 ctables_value_find (struct ctables_table *t,
3675 const union value *value, int width)
3677 return ctables_value_find__ (t, value, width,
3678 value_hash (value, width, 0));
3682 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3683 size_t ix[PIVOT_N_AXES])
3685 if (a < PIVOT_N_AXES)
3687 size_t limit = MAX (t->stacks[a].n, 1);
3688 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3689 ctables_table_add_section (t, a + 1, ix);
3693 struct ctables_section *s = &t->sections[t->n_sections++];
3694 *s = (struct ctables_section) {
3696 .cells = HMAP_INITIALIZER (s->cells),
3698 for (a = 0; a < PIVOT_N_AXES; a++)
3701 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3703 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3704 for (size_t i = 0; i < nest->n; i++)
3705 hmap_init (&s->occurrences[a][i]);
3707 for (size_t i = 0; i < N_CTATS; i++)
3708 hmap_init (&s->areas[i]);
3713 ctpo_add (double a, double b)
3719 ctpo_sub (double a, double b)
3725 ctpo_mul (double a, double b)
3731 ctpo_div (double a, double b)
3733 return b ? a / b : SYSMIS;
3737 ctpo_pow (double a, double b)
3739 int save_errno = errno;
3741 double result = pow (a, b);
3749 ctpo_neg (double a, double b UNUSED)
3754 struct ctables_pcexpr_evaluate_ctx
3756 const struct ctables_cell *cell;
3757 const struct ctables_section *section;
3758 const struct ctables_categories *cats;
3759 enum pivot_axis_type pc_a;
3762 enum fmt_type parse_format;
3765 static double ctables_pcexpr_evaluate (
3766 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3769 ctables_pcexpr_evaluate_nonterminal (
3770 const struct ctables_pcexpr_evaluate_ctx *ctx,
3771 const struct ctables_pcexpr *e, size_t n_args,
3772 double evaluate (double, double))
3774 double args[2] = { 0, 0 };
3775 for (size_t i = 0; i < n_args; i++)
3777 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3778 if (!isfinite (args[i]) || args[i] == SYSMIS)
3781 return evaluate (args[0], args[1]);
3785 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3786 const struct ctables_cell_value *pc_cv)
3788 const struct ctables_section *s = ctx->section;
3791 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3793 const struct ctables_nest *nest = s->nests[a];
3794 for (size_t i = 0; i < nest->n; i++)
3795 if (i != nest->scale_idx)
3797 const struct ctables_cell_value *cv
3798 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3799 : &ctx->cell->axes[a].cvs[i]);
3800 hash = hash_pointer (cv->category, hash);
3801 if (cv->category->type != CCT_TOTAL
3802 && cv->category->type != CCT_SUBTOTAL
3803 && cv->category->type != CCT_POSTCOMPUTE)
3804 hash = value_hash (&cv->value,
3805 var_get_width (nest->vars[i]), hash);
3809 struct ctables_cell *tc;
3810 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3812 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3814 const struct ctables_nest *nest = s->nests[a];
3815 for (size_t i = 0; i < nest->n; i++)
3816 if (i != nest->scale_idx)
3818 const struct ctables_cell_value *p_cv
3819 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3820 : &ctx->cell->axes[a].cvs[i]);
3821 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3822 if (p_cv->category != t_cv->category
3823 || (p_cv->category->type != CCT_TOTAL
3824 && p_cv->category->type != CCT_SUBTOTAL
3825 && p_cv->category->type != CCT_POSTCOMPUTE
3826 && !value_equal (&p_cv->value,
3828 var_get_width (nest->vars[i]))))
3840 const struct ctables_table *t = s->table;
3841 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3842 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3843 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3844 &specs->specs[ctx->summary_idx]);
3848 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3849 const struct ctables_pcexpr *e)
3856 case CTPO_CAT_NRANGE:
3857 case CTPO_CAT_SRANGE:
3858 case CTPO_CAT_MISSING:
3859 case CTPO_CAT_OTHERNM:
3861 struct ctables_cell_value cv = {
3862 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3864 assert (cv.category != NULL);
3866 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3867 const struct ctables_occurrence *o;
3870 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3871 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3872 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3874 cv.value = o->value;
3875 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3880 case CTPO_CAT_NUMBER:
3881 case CTPO_CAT_SUBTOTAL:
3882 case CTPO_CAT_TOTAL:
3884 struct ctables_cell_value cv = {
3885 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3886 .value = { .f = e->number },
3888 assert (cv.category != NULL);
3889 return ctables_pcexpr_evaluate_category (ctx, &cv);
3892 case CTPO_CAT_STRING:
3894 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3896 if (width > e->string.length)
3898 s = xmalloc (width);
3899 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3902 const struct ctables_category *category
3903 = ctables_find_category_for_postcompute (
3904 ctx->section->table->ctables->dict,
3905 ctx->cats, ctx->parse_format, e);
3906 assert (category != NULL);
3908 struct ctables_cell_value cv = { .category = category };
3909 if (category->type == CCT_NUMBER)
3910 cv.value.f = category->number;
3911 else if (category->type == CCT_STRING)
3912 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3916 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3922 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3925 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3928 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3931 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3934 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3937 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3943 static const struct ctables_category *
3944 ctables_cell_postcompute (const struct ctables_section *s,
3945 const struct ctables_cell *cell,
3946 enum pivot_axis_type *pc_a_p,
3949 assert (cell->postcompute);
3950 const struct ctables_category *pc_cat = NULL;
3951 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3952 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3954 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3955 if (cv->category->type == CCT_POSTCOMPUTE)
3959 /* Multiple postcomputes cross each other. The value is
3964 pc_cat = cv->category;
3968 *pc_a_idx_p = pc_a_idx;
3972 assert (pc_cat != NULL);
3977 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3978 const struct ctables_cell *cell,
3979 const struct ctables_summary_spec *ss,
3980 struct fmt_spec *format,
3981 bool *is_ctables_format,
3984 enum pivot_axis_type pc_a = 0;
3985 size_t pc_a_idx = 0;
3986 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3987 s, cell, &pc_a, &pc_a_idx);
3991 const struct ctables_postcompute *pc = pc_cat->pc;
3994 for (size_t i = 0; i < pc->specs->n; i++)
3996 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3997 if (ss->function == ss2->function
3998 && ss->weighted == ss2->weighted
3999 && ss->area == ss2->area
4000 && ss->percentile == ss2->percentile)
4002 *format = ss2->format;
4003 *is_ctables_format = ss2->is_ctables_format;
4009 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4010 const struct ctables_categories *cats = s->table->categories[
4011 var_get_dict_index (var)];
4012 struct ctables_pcexpr_evaluate_ctx ctx = {
4017 .pc_a_idx = pc_a_idx,
4018 .summary_idx = summary_idx,
4019 .parse_format = pc_cat->parse_format,
4021 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4025 ctables_format (double d, const struct fmt_spec *format,
4026 const struct fmt_settings *settings)
4028 const union value v = { .f = d };
4029 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4031 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4032 produce the results we want for negative numbers, putting the negative
4033 sign in the wrong spot, before the prefix instead of after it. We can't,
4034 in fact, produce the desired results using a custom-currency
4035 specification. Instead, we postprocess the output, moving the negative
4038 NEQUAL: "-N=3" => "N=-3"
4039 PAREN: "-(3)" => "(-3)"
4040 PCTPAREN: "-(3%)" => "(-3%)"
4042 This transformation doesn't affect NEGPAREN. */
4043 char *minus_src = strchr (s, '-');
4044 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4046 char *n_equals = strstr (s, "N=");
4047 char *lparen = strchr (s, '(');
4048 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4050 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4056 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4058 for (size_t i = 0; i < t->stacks[a].n; i++)
4060 struct ctables_nest *nest = &t->stacks[a].nests[i];
4061 if (nest->n != 1 || nest->scale_idx != 0)
4064 enum ctables_vlabel vlabel
4065 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4066 if (vlabel != CTVL_NONE)
4073 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4075 struct pivot_table *pt = pivot_table_create__ (
4077 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4078 : pivot_value_new_text (N_("Custom Tables"))),
4081 pivot_table_set_caption (
4082 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4084 pivot_table_set_corner_text (
4085 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4087 bool summary_dimension = (t->summary_axis != t->slabels_axis
4088 || (!t->slabels_visible
4089 && t->summary_specs.n > 1));
4090 if (summary_dimension)
4092 struct pivot_dimension *d = pivot_dimension_create (
4093 pt, t->slabels_axis, N_("Statistics"));
4094 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4095 if (!t->slabels_visible)
4096 d->hide_all_labels = true;
4097 for (size_t i = 0; i < specs->n; i++)
4098 pivot_category_create_leaf (
4099 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4102 bool categories_dimension = t->clabels_example != NULL;
4103 if (categories_dimension)
4105 struct pivot_dimension *d = pivot_dimension_create (
4106 pt, t->label_axis[t->clabels_from_axis],
4107 t->clabels_from_axis == PIVOT_AXIS_ROW
4108 ? N_("Row Categories")
4109 : N_("Column Categories"));
4110 const struct variable *var = t->clabels_example;
4111 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4112 for (size_t i = 0; i < t->n_clabels_values; i++)
4114 const struct ctables_value *value = t->clabels_values[i];
4115 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4116 assert (cat != NULL);
4117 pivot_category_create_leaf (
4118 d->root, ctables_category_create_value_label (c, cat,
4124 pivot_table_set_look (pt, ct->look);
4125 struct pivot_dimension *d[PIVOT_N_AXES];
4126 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4128 static const char *names[] = {
4129 [PIVOT_AXIS_ROW] = N_("Rows"),
4130 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4131 [PIVOT_AXIS_LAYER] = N_("Layers"),
4133 d[a] = (t->axes[a] || a == t->summary_axis
4134 ? pivot_dimension_create (pt, a, names[a])
4139 assert (t->axes[a]);
4141 for (size_t i = 0; i < t->stacks[a].n; i++)
4143 struct ctables_nest *nest = &t->stacks[a].nests[i];
4144 struct ctables_section **sections = xnmalloc (t->n_sections,
4146 size_t n_sections = 0;
4148 size_t n_total_cells = 0;
4149 size_t max_depth = 0;
4150 for (size_t j = 0; j < t->n_sections; j++)
4151 if (t->sections[j].nests[a] == nest)
4153 struct ctables_section *s = &t->sections[j];
4154 sections[n_sections++] = s;
4155 n_total_cells += hmap_count (&s->cells);
4157 size_t depth = s->nests[a]->n;
4158 max_depth = MAX (depth, max_depth);
4161 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4163 size_t n_sorted = 0;
4165 for (size_t j = 0; j < n_sections; j++)
4167 struct ctables_section *s = sections[j];
4169 struct ctables_cell *cell;
4170 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4172 sorted[n_sorted++] = cell;
4173 assert (n_sorted <= n_total_cells);
4176 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4177 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4180 if (a == PIVOT_AXIS_ROW)
4182 size_t ids[N_CTATS];
4183 memset (ids, 0, sizeof ids);
4184 for (size_t j = 0; j < n_sorted; j++)
4186 struct ctables_cell *cell = sorted[j];
4187 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4189 struct ctables_area *area = cell->areas[at];
4190 if (!area->sequence)
4191 area->sequence = ++ids[at];
4198 for (size_t j = 0; j < n_sorted; j++)
4200 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0);
4205 struct ctables_level
4207 enum ctables_level_type
4209 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4210 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4211 CTL_SUMMARY, /* Summary functions. */
4215 enum settings_value_show vlabel; /* CTL_VAR only. */
4218 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4219 size_t n_levels = 0;
4220 for (size_t k = 0; k < nest->n; k++)
4222 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4223 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4225 if (vlabel != CTVL_NONE)
4227 levels[n_levels++] = (struct ctables_level) {
4229 .vlabel = (enum settings_value_show) vlabel,
4234 if (nest->scale_idx != k
4235 && (k != nest->n - 1 || t->label_axis[a] == a))
4237 levels[n_levels++] = (struct ctables_level) {
4238 .type = CTL_CATEGORY,
4244 if (!summary_dimension && a == t->slabels_axis)
4246 levels[n_levels++] = (struct ctables_level) {
4247 .type = CTL_SUMMARY,
4248 .var_idx = SIZE_MAX,
4252 /* Pivot categories:
4254 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4255 - category for nest->vars[0], if nest->scale_idx != 0
4256 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4257 - category for nest->vars[1], if nest->scale_idx != 1
4259 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4260 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4261 - summary function, if 'a == t->slabels_axis && a ==
4264 Additional dimensions:
4266 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4268 - If 't->label_axis[b] == a' for some 'b != a', add a category
4273 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4275 for (size_t j = 0; j < n_sorted; j++)
4277 struct ctables_cell *cell = sorted[j];
4278 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4280 size_t n_common = 0;
4283 for (; n_common < n_levels; n_common++)
4285 const struct ctables_level *level = &levels[n_common];
4286 if (level->type == CTL_CATEGORY)
4288 size_t var_idx = level->var_idx;
4289 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4290 if (prev->axes[a].cvs[var_idx].category != c)
4292 else if (c->type != CCT_SUBTOTAL
4293 && c->type != CCT_TOTAL
4294 && c->type != CCT_POSTCOMPUTE
4295 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4296 &cell->axes[a].cvs[var_idx].value,
4297 var_get_type (nest->vars[var_idx])))
4303 for (size_t k = n_common; k < n_levels; k++)
4305 const struct ctables_level *level = &levels[k];
4306 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4307 if (level->type == CTL_SUMMARY)
4309 assert (k == n_levels - 1);
4311 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4312 for (size_t m = 0; m < specs->n; m++)
4314 int leaf = pivot_category_create_leaf (
4315 parent, ctables_summary_label (&specs->specs[m],
4323 const struct variable *var = nest->vars[level->var_idx];
4324 struct pivot_value *label;
4325 if (level->type == CTL_VAR)
4327 label = pivot_value_new_variable (var);
4328 label->variable.show = level->vlabel;
4330 else if (level->type == CTL_CATEGORY)
4332 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4333 label = ctables_category_create_value_label (
4334 t->categories[var_get_dict_index (var)],
4335 cv->category, var, &cv->value);
4340 if (k == n_levels - 1)
4341 prev_leaf = pivot_category_create_leaf (parent, label);
4343 groups[k] = pivot_category_create_group__ (parent, label);
4347 cell->axes[a].leaf = prev_leaf;
4356 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4360 size_t n_total_cells = 0;
4361 for (size_t j = 0; j < t->n_sections; j++)
4362 n_total_cells += hmap_count (&t->sections[j].cells);
4364 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4365 size_t n_sorted = 0;
4366 for (size_t j = 0; j < t->n_sections; j++)
4368 const struct ctables_section *s = &t->sections[j];
4369 struct ctables_cell *cell;
4370 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4372 sorted[n_sorted++] = cell;
4374 assert (n_sorted <= n_total_cells);
4375 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4377 size_t ids[N_CTATS];
4378 memset (ids, 0, sizeof ids);
4379 for (size_t j = 0; j < n_sorted; j++)
4381 struct ctables_cell *cell = sorted[j];
4382 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4384 struct ctables_area *area = cell->areas[at];
4385 if (!area->sequence)
4386 area->sequence = ++ids[at];
4393 for (size_t i = 0; i < t->n_sections; i++)
4395 struct ctables_section *s = &t->sections[i];
4397 struct ctables_cell *cell;
4398 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4403 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4404 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4405 for (size_t j = 0; j < specs->n; j++)
4408 size_t n_dindexes = 0;
4410 if (summary_dimension)
4411 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4413 if (categories_dimension)
4415 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4416 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4417 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4418 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4421 dindexes[n_dindexes++] = ctv->leaf;
4424 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4427 int leaf = cell->axes[a].leaf;
4428 if (a == t->summary_axis && !summary_dimension)
4430 dindexes[n_dindexes++] = leaf;
4433 const struct ctables_summary_spec *ss = &specs->specs[j];
4435 struct fmt_spec format = specs->specs[j].format;
4436 bool is_ctables_format = ss->is_ctables_format;
4437 double d = (cell->postcompute
4438 ? ctables_cell_calculate_postcompute (
4439 s, cell, ss, &format, &is_ctables_format, j)
4440 : ctables_summary_value (cell, &cell->summaries[j],
4443 struct pivot_value *value;
4444 if (ct->hide_threshold != 0
4445 && d < ct->hide_threshold
4446 && ctables_summary_function_is_count (ss->function))
4448 value = pivot_value_new_user_text_nocopy (
4449 xasprintf ("<%d", ct->hide_threshold));
4451 else if (d == 0 && ct->zero)
4452 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4453 else if (d == SYSMIS && ct->missing)
4454 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4455 else if (is_ctables_format)
4456 value = pivot_value_new_user_text_nocopy (
4457 ctables_format (d, &format, &ct->ctables_formats));
4460 value = pivot_value_new_number (d);
4461 value->numeric.format = format;
4463 /* XXX should text values be right-justified? */
4464 pivot_table_put (pt, dindexes, n_dindexes, value);
4469 pivot_table_submit (pt);
4473 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4475 enum pivot_axis_type label_pos = t->label_axis[a];
4479 t->clabels_from_axis = a;
4481 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4482 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4484 const struct ctables_stack *stack = &t->stacks[a];
4488 const struct ctables_nest *n0 = &stack->nests[0];
4491 assert (stack->n == 1);
4495 const struct variable *v0 = n0->vars[n0->n - 1];
4496 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4497 t->clabels_example = v0;
4499 for (size_t i = 0; i < c0->n_cats; i++)
4500 if (c0->cats[i].type == CCT_FUNCTION)
4502 msg (SE, _("%s=%s is not allowed with sorting based "
4503 "on a summary function."),
4504 subcommand_name, pos_name);
4507 if (n0->n - 1 == n0->scale_idx)
4509 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4510 "but %s is a scale variable."),
4511 subcommand_name, pos_name, var_get_name (v0));
4515 for (size_t i = 1; i < stack->n; i++)
4517 const struct ctables_nest *ni = &stack->nests[i];
4519 const struct variable *vi = ni->vars[ni->n - 1];
4520 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4522 if (ni->n - 1 == ni->scale_idx)
4524 msg (SE, _("%s=%s requires the variables to be moved to be "
4525 "categorical, but %s is a scale variable."),
4526 subcommand_name, pos_name, var_get_name (vi));
4529 if (var_get_width (v0) != var_get_width (vi))
4531 msg (SE, _("%s=%s requires the variables to be "
4532 "moved to have the same width, but %s has "
4533 "width %d and %s has width %d."),
4534 subcommand_name, pos_name,
4535 var_get_name (v0), var_get_width (v0),
4536 var_get_name (vi), var_get_width (vi));
4539 if (!val_labs_equal (var_get_value_labels (v0),
4540 var_get_value_labels (vi)))
4542 msg (SE, _("%s=%s requires the variables to be "
4543 "moved to have the same value labels, but %s "
4544 "and %s have different value labels."),
4545 subcommand_name, pos_name,
4546 var_get_name (v0), var_get_name (vi));
4549 if (!ctables_categories_equal (c0, ci))
4551 msg (SE, _("%s=%s requires the variables to be "
4552 "moved to have the same category "
4553 "specifications, but %s and %s have different "
4554 "category specifications."),
4555 subcommand_name, pos_name,
4556 var_get_name (v0), var_get_name (vi));
4565 add_sum_var (struct variable *var,
4566 struct variable ***sum_vars, size_t *n, size_t *allocated)
4568 for (size_t i = 0; i < *n; i++)
4569 if (var == (*sum_vars)[i])
4572 if (*n >= *allocated)
4573 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4574 (*sum_vars)[*n] = var;
4579 enumerate_sum_vars (const struct ctables_axis *a,
4580 struct variable ***sum_vars, size_t *n, size_t *allocated)
4588 for (size_t i = 0; i < N_CSVS; i++)
4589 for (size_t j = 0; j < a->specs[i].n; j++)
4591 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4592 if (spec->function == CTSF_areaPCT_SUM)
4593 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4599 for (size_t i = 0; i < 2; i++)
4600 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4606 ctables_prepare_table (struct ctables_table *t)
4608 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4611 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4613 for (size_t j = 0; j < t->stacks[a].n; j++)
4615 struct ctables_nest *nest = &t->stacks[a].nests[j];
4616 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4618 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4619 nest->n_areas[at] = 0;
4621 for (size_t k = 0; k < nest->n; k++)
4623 if (k == nest->scale_idx)
4632 if (a != PIVOT_AXIS_LAYER)
4639 if (at == CTAT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4640 : at == CTAT_ROW ? a == PIVOT_AXIS_COLUMN
4641 : a == PIVOT_AXIS_ROW)
4643 if (k == nest->n - 1
4644 || (nest->scale_idx == nest->n - 1
4645 && k == nest->n - 2))
4651 if (a == PIVOT_AXIS_COLUMN)
4656 if (a == PIVOT_AXIS_ROW)
4661 nest->areas[at][nest->n_areas[at]++] = k;
4668 struct ctables_nest *nest = xmalloc (sizeof *nest);
4669 *nest = (struct ctables_nest) { .n = 0 };
4670 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4672 /* There's no point in moving labels away from an axis that has no
4673 labels, so avoid dealing with the special cases around that. */
4674 t->label_axis[a] = a;
4677 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4678 for (size_t i = 0; i < stack->n; i++)
4680 struct ctables_nest *nest = &stack->nests[i];
4681 if (!nest->specs[CSV_CELL].n)
4683 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4684 specs->specs = xmalloc (sizeof *specs->specs);
4687 enum ctables_summary_function function
4688 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4690 *specs->specs = (struct ctables_summary_spec) {
4691 .function = function,
4693 .format = ctables_summary_default_format (function, specs->var),
4696 specs->var = nest->vars[0];
4698 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4699 &nest->specs[CSV_CELL]);
4701 else if (!nest->specs[CSV_TOTAL].n)
4702 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4703 &nest->specs[CSV_CELL]);
4705 if (t->ctables->smissing_listwise)
4707 struct variable **listwise_vars = NULL;
4709 size_t allocated = 0;
4711 for (size_t j = nest->group_head; j < stack->n; j++)
4713 const struct ctables_nest *other_nest = &stack->nests[j];
4714 if (other_nest->group_head != nest->group_head)
4717 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4720 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4721 sizeof *listwise_vars);
4722 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4725 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4728 listwise_vars = xmemdup (listwise_vars,
4729 n * sizeof *listwise_vars);
4730 nest->specs[sv].listwise_vars = listwise_vars;
4731 nest->specs[sv].n_listwise_vars = n;
4736 struct ctables_summary_spec_set *merged = &t->summary_specs;
4737 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4739 for (size_t j = 0; j < stack->n; j++)
4741 const struct ctables_nest *nest = &stack->nests[j];
4743 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4744 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4749 struct merge_item min = items[0];
4750 for (size_t j = 1; j < n_left; j++)
4751 if (merge_item_compare_3way (&items[j], &min) < 0)
4754 if (merged->n >= merged->allocated)
4755 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4756 sizeof *merged->specs);
4757 merged->specs[merged->n++] = min.set->specs[min.ofs];
4759 for (size_t j = 0; j < n_left; )
4761 if (merge_item_compare_3way (&items[j], &min) == 0)
4763 struct merge_item *item = &items[j];
4764 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4765 if (++item->ofs >= item->set->n)
4767 items[j] = items[--n_left];
4777 for (size_t j = 0; j < merged->n; j++)
4778 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4780 for (size_t j = 0; j < stack->n; j++)
4782 const struct ctables_nest *nest = &stack->nests[j];
4783 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4785 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4786 for (size_t k = 0; k < specs->n; k++)
4787 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4788 specs->specs[k].axis_idx);
4794 size_t allocated_sum_vars = 0;
4795 enumerate_sum_vars (t->axes[t->summary_axis],
4796 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4798 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4799 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4803 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4804 enum pivot_axis_type a)
4806 struct ctables_stack *stack = &t->stacks[a];
4807 for (size_t i = 0; i < stack->n; i++)
4809 const struct ctables_nest *nest = &stack->nests[i];
4810 const struct variable *var = nest->vars[nest->n - 1];
4811 const union value *value = case_data (c, var);
4813 if (var_is_numeric (var) && value->f == SYSMIS)
4816 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4818 ctables_value_insert (t, value, var_get_width (var));
4823 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4825 const struct ctables_value *const *ap = a_;
4826 const struct ctables_value *const *bp = b_;
4827 const struct ctables_value *a = *ap;
4828 const struct ctables_value *b = *bp;
4829 const int *width = width_;
4830 return value_compare_3way (&a->value, &b->value, *width);
4834 ctables_sort_clabels_values (struct ctables_table *t)
4836 const struct variable *v0 = t->clabels_example;
4837 int width = var_get_width (v0);
4839 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4842 const struct val_labs *val_labs = var_get_value_labels (v0);
4843 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4844 vl = val_labs_next (val_labs, vl))
4845 if (ctables_categories_match (c0, &vl->value, v0))
4846 ctables_value_insert (t, &vl->value, width);
4849 size_t n = hmap_count (&t->clabels_values_map);
4850 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4852 struct ctables_value *clv;
4854 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4855 t->clabels_values[i++] = clv;
4856 t->n_clabels_values = n;
4859 sort (t->clabels_values, n, sizeof *t->clabels_values,
4860 compare_clabels_values_3way, &width);
4862 for (size_t i = 0; i < n; i++)
4863 t->clabels_values[i]->leaf = i;
4867 ctables_add_category_occurrences (const struct variable *var,
4868 struct hmap *occurrences,
4869 const struct ctables_categories *cats)
4871 const struct val_labs *val_labs = var_get_value_labels (var);
4873 for (size_t i = 0; i < cats->n_cats; i++)
4875 const struct ctables_category *c = &cats->cats[i];
4879 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4885 int width = var_get_width (var);
4887 value_init (&value, width);
4888 value_copy_buf_rpad (&value, width,
4889 CHAR_CAST (uint8_t *, c->string.string),
4890 c->string.length, ' ');
4891 ctables_add_occurrence (var, &value, occurrences);
4892 value_destroy (&value, width);
4897 assert (var_is_numeric (var));
4898 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4899 vl = val_labs_next (val_labs, vl))
4900 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4901 ctables_add_occurrence (var, &vl->value, occurrences);
4905 assert (var_is_alpha (var));
4906 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4907 vl = val_labs_next (val_labs, vl))
4908 if (in_string_range (&vl->value, var, c->srange))
4909 ctables_add_occurrence (var, &vl->value, occurrences);
4913 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4914 vl = val_labs_next (val_labs, vl))
4915 if (var_is_value_missing (var, &vl->value))
4916 ctables_add_occurrence (var, &vl->value, occurrences);
4920 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4921 vl = val_labs_next (val_labs, vl))
4922 ctables_add_occurrence (var, &vl->value, occurrences);
4925 case CCT_POSTCOMPUTE:
4935 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4936 vl = val_labs_next (val_labs, vl))
4937 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4938 ctables_add_occurrence (var, &vl->value, occurrences);
4941 case CCT_EXCLUDED_MISSING:
4948 ctables_section_recurse_add_empty_categories (
4949 struct ctables_section *s,
4950 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4951 enum pivot_axis_type a, size_t a_idx)
4953 if (a >= PIVOT_N_AXES)
4954 ctables_cell_insert__ (s, c, cats);
4955 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4956 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4959 const struct variable *var = s->nests[a]->vars[a_idx];
4960 const struct ctables_categories *categories = s->table->categories[
4961 var_get_dict_index (var)];
4962 int width = var_get_width (var);
4963 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4964 const struct ctables_occurrence *o;
4965 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4967 union value *value = case_data_rw (c, var);
4968 value_destroy (value, width);
4969 value_clone (value, &o->value, width);
4970 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4971 assert (cats[a][a_idx] != NULL);
4972 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4975 for (size_t i = 0; i < categories->n_cats; i++)
4977 const struct ctables_category *cat = &categories->cats[i];
4978 if (cat->type == CCT_POSTCOMPUTE)
4980 cats[a][a_idx] = cat;
4981 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4988 ctables_section_add_empty_categories (struct ctables_section *s)
4990 bool show_empty = false;
4991 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4993 for (size_t k = 0; k < s->nests[a]->n; k++)
4994 if (k != s->nests[a]->scale_idx)
4996 const struct variable *var = s->nests[a]->vars[k];
4997 const struct ctables_categories *cats = s->table->categories[
4998 var_get_dict_index (var)];
4999 if (cats->show_empty)
5002 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5008 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5009 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5010 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5015 ctables_section_clear (struct ctables_section *s)
5017 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5019 const struct ctables_nest *nest = s->nests[a];
5020 for (size_t i = 0; i < nest->n; i++)
5021 if (i != nest->scale_idx)
5023 const struct variable *var = nest->vars[i];
5024 int width = var_get_width (var);
5025 struct ctables_occurrence *o, *next;
5026 struct hmap *map = &s->occurrences[a][i];
5027 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5029 value_destroy (&o->value, width);
5030 hmap_delete (map, &o->node);
5037 struct ctables_cell *cell, *next_cell;
5038 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5040 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5042 const struct ctables_nest *nest = s->nests[a];
5043 for (size_t i = 0; i < nest->n; i++)
5044 if (i != nest->scale_idx)
5045 value_destroy (&cell->axes[a].cvs[i].value,
5046 var_get_width (nest->vars[i]));
5047 free (cell->axes[a].cvs);
5050 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5051 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5052 for (size_t i = 0; i < specs->n; i++)
5053 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5054 free (cell->summaries);
5056 hmap_delete (&s->cells, &cell->node);
5059 hmap_shrink (&s->cells);
5061 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5063 struct ctables_area *area, *next_area;
5064 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5068 hmap_delete (&s->areas[at], &area->node);
5071 hmap_shrink (&s->areas[at]);
5076 ctables_section_uninit (struct ctables_section *s)
5078 ctables_section_clear (s);
5080 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5082 struct ctables_nest *nest = s->nests[a];
5083 for (size_t i = 0; i < nest->n; i++)
5084 hmap_destroy (&s->occurrences[a][i]);
5085 free (s->occurrences[a]);
5088 hmap_destroy (&s->cells);
5089 for (size_t i = 0; i < N_CTATS; i++)
5090 hmap_destroy (&s->areas[i]);
5094 ctables_table_clear (struct ctables_table *t)
5096 for (size_t i = 0; i < t->n_sections; i++)
5097 ctables_section_clear (&t->sections[i]);
5099 if (t->clabels_example)
5101 int width = var_get_width (t->clabels_example);
5102 struct ctables_value *value, *next_value;
5103 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5104 &t->clabels_values_map)
5106 value_destroy (&value->value, width);
5107 hmap_delete (&t->clabels_values_map, &value->node);
5110 hmap_shrink (&t->clabels_values_map);
5112 free (t->clabels_values);
5113 t->clabels_values = NULL;
5114 t->n_clabels_values = 0;
5119 ctables_execute (struct dataset *ds, struct casereader *input,
5122 for (size_t i = 0; i < ct->n_tables; i++)
5124 struct ctables_table *t = ct->tables[i];
5125 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5126 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5127 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5128 sizeof *t->sections);
5129 size_t ix[PIVOT_N_AXES];
5130 ctables_table_add_section (t, 0, ix);
5133 struct dictionary *dict = dataset_dict (ds);
5135 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5136 struct casegrouper *grouper
5138 ? casegrouper_create_splits (input, dict)
5139 : casegrouper_create_vars (input, NULL, 0));
5140 struct casereader *group;
5141 while (casegrouper_get_next_group (grouper, &group))
5145 struct ccase *c = casereader_peek (group, 0);
5148 output_split_file_values (ds, c);
5153 bool warn_on_invalid = true;
5154 for (struct ccase *c = casereader_read (group); c;
5155 case_unref (c), c = casereader_read (group))
5157 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5158 double e_weight = (ct->e_weight
5159 ? var_force_valid_weight (ct->e_weight,
5160 case_num (c, ct->e_weight),
5164 for (size_t i = 0; i < ct->n_tables; i++)
5166 struct ctables_table *t = ct->tables[i];
5168 for (size_t j = 0; j < t->n_sections; j++)
5169 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5171 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5172 if (t->label_axis[a] != a)
5173 ctables_insert_clabels_values (t, c, a);
5176 casereader_destroy (group);
5178 for (size_t i = 0; i < ct->n_tables; i++)
5180 struct ctables_table *t = ct->tables[i];
5182 if (t->clabels_example)
5183 ctables_sort_clabels_values (t);
5185 for (size_t j = 0; j < t->n_sections; j++)
5186 ctables_section_add_empty_categories (&t->sections[j]);
5188 ctables_table_output (ct, t);
5189 ctables_table_clear (t);
5192 return casegrouper_destroy (grouper);
5197 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5198 struct dictionary *);
5201 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5207 case CTPO_CAT_STRING:
5208 ss_dealloc (&e->string);
5211 case CTPO_CAT_SRANGE:
5212 for (size_t i = 0; i < 2; i++)
5213 ss_dealloc (&e->srange[i]);
5222 for (size_t i = 0; i < 2; i++)
5223 ctables_pcexpr_destroy (e->subs[i]);
5227 case CTPO_CAT_NUMBER:
5228 case CTPO_CAT_NRANGE:
5229 case CTPO_CAT_MISSING:
5230 case CTPO_CAT_OTHERNM:
5231 case CTPO_CAT_SUBTOTAL:
5232 case CTPO_CAT_TOTAL:
5236 msg_location_destroy (e->location);
5241 static struct ctables_pcexpr *
5242 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5243 struct ctables_pcexpr *sub0,
5244 struct ctables_pcexpr *sub1)
5246 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5247 *e = (struct ctables_pcexpr) {
5249 .subs = { sub0, sub1 },
5250 .location = msg_location_merged (sub0->location, sub1->location),
5255 /* How to parse an operator. */
5258 enum token_type token;
5259 enum ctables_postcompute_op op;
5262 static const struct operator *
5263 ctables_pcexpr_match_operator (struct lexer *lexer,
5264 const struct operator ops[], size_t n_ops)
5266 for (const struct operator *op = ops; op < ops + n_ops; op++)
5267 if (lex_token (lexer) == op->token)
5269 if (op->token != T_NEG_NUM)
5278 static struct ctables_pcexpr *
5279 ctables_pcexpr_parse_binary_operators__ (
5280 struct lexer *lexer, struct dictionary *dict,
5281 const struct operator ops[], size_t n_ops,
5282 parse_recursively_func *parse_next_level,
5283 const char *chain_warning, struct ctables_pcexpr *lhs)
5285 for (int op_count = 0; ; op_count++)
5287 const struct operator *op
5288 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5291 if (op_count > 1 && chain_warning)
5292 msg_at (SW, lhs->location, "%s", chain_warning);
5297 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5300 ctables_pcexpr_destroy (lhs);
5304 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5308 static struct ctables_pcexpr *
5309 ctables_pcexpr_parse_binary_operators (
5310 struct lexer *lexer, struct dictionary *dict,
5311 const struct operator ops[], size_t n_ops,
5312 parse_recursively_func *parse_next_level, const char *chain_warning)
5314 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5318 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5320 chain_warning, lhs);
5323 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5324 struct dictionary *);
5326 static struct ctables_pcexpr
5327 ctpo_cat_nrange (double low, double high)
5329 return (struct ctables_pcexpr) {
5330 .op = CTPO_CAT_NRANGE,
5331 .nrange = { low, high },
5335 static struct ctables_pcexpr
5336 ctpo_cat_srange (struct substring low, struct substring high)
5338 return (struct ctables_pcexpr) {
5339 .op = CTPO_CAT_SRANGE,
5340 .srange = { low, high },
5344 static struct ctables_pcexpr *
5345 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5347 int start_ofs = lex_ofs (lexer);
5348 struct ctables_pcexpr e;
5349 if (lex_is_number (lexer))
5351 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5352 .number = lex_number (lexer) };
5355 else if (lex_match_id (lexer, "MISSING"))
5356 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5357 else if (lex_match_id (lexer, "OTHERNM"))
5358 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5359 else if (lex_match_id (lexer, "TOTAL"))
5360 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5361 else if (lex_match_id (lexer, "SUBTOTAL"))
5363 size_t subtotal_index = 0;
5364 if (lex_match (lexer, T_LBRACK))
5366 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5368 subtotal_index = lex_integer (lexer);
5370 if (!lex_force_match (lexer, T_RBRACK))
5373 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5374 .subtotal_index = subtotal_index };
5376 else if (lex_match (lexer, T_LBRACK))
5378 if (lex_match_id (lexer, "LO"))
5380 if (!lex_force_match_id (lexer, "THRU"))
5383 if (lex_is_string (lexer))
5385 struct substring low = { .string = NULL };
5386 struct substring high = parse_substring (lexer, dict);
5387 e = ctpo_cat_srange (low, high);
5391 if (!lex_force_num (lexer))
5393 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5397 else if (lex_is_number (lexer))
5399 double number = lex_number (lexer);
5401 if (lex_match_id (lexer, "THRU"))
5403 if (lex_match_id (lexer, "HI"))
5404 e = ctpo_cat_nrange (number, DBL_MAX);
5407 if (!lex_force_num (lexer))
5409 e = ctpo_cat_nrange (number, lex_number (lexer));
5414 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5417 else if (lex_is_string (lexer))
5419 struct substring s = parse_substring (lexer, dict);
5421 if (lex_match_id (lexer, "THRU"))
5423 struct substring high;
5425 if (lex_match_id (lexer, "HI"))
5426 high = (struct substring) { .string = NULL };
5429 if (!lex_force_string (lexer))
5434 high = parse_substring (lexer, dict);
5437 e = ctpo_cat_srange (s, high);
5440 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5444 lex_error (lexer, NULL);
5448 if (!lex_force_match (lexer, T_RBRACK))
5450 if (e.op == CTPO_CAT_STRING)
5451 ss_dealloc (&e.string);
5452 else if (e.op == CTPO_CAT_SRANGE)
5454 ss_dealloc (&e.srange[0]);
5455 ss_dealloc (&e.srange[1]);
5460 else if (lex_match (lexer, T_LPAREN))
5462 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5465 if (!lex_force_match (lexer, T_RPAREN))
5467 ctables_pcexpr_destroy (ep);
5474 lex_error (lexer, NULL);
5478 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5479 return xmemdup (&e, sizeof e);
5482 static struct ctables_pcexpr *
5483 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5484 struct lexer *lexer, int start_ofs)
5486 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5487 *e = (struct ctables_pcexpr) {
5490 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5495 static struct ctables_pcexpr *
5496 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5498 static const struct operator op = { T_EXP, CTPO_POW };
5500 const char *chain_warning =
5501 _("The exponentiation operator (`**') is left-associative: "
5502 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5503 "To disable this warning, insert parentheses.");
5505 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5506 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5507 ctables_pcexpr_parse_primary,
5510 /* Special case for situations like "-5**6", which must be parsed as
5513 int start_ofs = lex_ofs (lexer);
5514 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5515 *lhs = (struct ctables_pcexpr) {
5516 .op = CTPO_CONSTANT,
5517 .number = -lex_tokval (lexer),
5518 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5522 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5523 lexer, dict, &op, 1,
5524 ctables_pcexpr_parse_primary, chain_warning, lhs);
5528 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5531 /* Parses the unary minus level. */
5532 static struct ctables_pcexpr *
5533 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5535 int start_ofs = lex_ofs (lexer);
5536 if (!lex_match (lexer, T_DASH))
5537 return ctables_pcexpr_parse_exp (lexer, dict);
5539 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5543 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5546 /* Parses the multiplication and division level. */
5547 static struct ctables_pcexpr *
5548 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5550 static const struct operator ops[] =
5552 { T_ASTERISK, CTPO_MUL },
5553 { T_SLASH, CTPO_DIV },
5556 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5557 sizeof ops / sizeof *ops,
5558 ctables_pcexpr_parse_neg, NULL);
5561 /* Parses the addition and subtraction level. */
5562 static struct ctables_pcexpr *
5563 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5565 static const struct operator ops[] =
5567 { T_PLUS, CTPO_ADD },
5568 { T_DASH, CTPO_SUB },
5569 { T_NEG_NUM, CTPO_ADD },
5572 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5573 ops, sizeof ops / sizeof *ops,
5574 ctables_pcexpr_parse_mul, NULL);
5577 static struct ctables_postcompute *
5578 ctables_find_postcompute (struct ctables *ct, const char *name)
5580 struct ctables_postcompute *pc;
5581 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5582 utf8_hash_case_string (name, 0), &ct->postcomputes)
5583 if (!utf8_strcasecmp (pc->name, name))
5589 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5592 int pcompute_start = lex_ofs (lexer) - 1;
5594 if (!lex_match (lexer, T_AND))
5596 lex_error_expecting (lexer, "&");
5599 if (!lex_force_id (lexer))
5602 char *name = ss_xstrdup (lex_tokss (lexer));
5605 if (!lex_force_match (lexer, T_EQUALS)
5606 || !lex_force_match_id (lexer, "EXPR")
5607 || !lex_force_match (lexer, T_LPAREN))
5613 int expr_start = lex_ofs (lexer);
5614 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5615 int expr_end = lex_ofs (lexer) - 1;
5616 if (!expr || !lex_force_match (lexer, T_RPAREN))
5618 ctables_pcexpr_destroy (expr);
5622 int pcompute_end = lex_ofs (lexer) - 1;
5624 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5627 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5630 msg_at (SW, location, _("New definition of &%s will override the "
5631 "previous definition."),
5633 msg_at (SN, pc->location, _("This is the previous definition."));
5635 ctables_pcexpr_destroy (pc->expr);
5636 msg_location_destroy (pc->location);
5641 pc = xmalloc (sizeof *pc);
5642 *pc = (struct ctables_postcompute) { .name = name };
5643 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5644 utf8_hash_case_string (pc->name, 0));
5647 pc->location = location;
5649 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5654 ctables_parse_pproperties_format (struct lexer *lexer,
5655 struct ctables_summary_spec_set *sss)
5657 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5659 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5660 && !(lex_token (lexer) == T_ID
5661 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5662 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5663 lex_tokss (lexer)))))
5665 /* Parse function. */
5666 enum ctables_summary_function function;
5668 enum ctables_area_type area;
5669 if (!parse_ctables_summary_function (lexer, &function, &weighted, &area))
5672 /* Parse percentile. */
5673 double percentile = 0;
5674 if (function == CTSF_PTILE)
5676 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5678 percentile = lex_number (lexer);
5683 struct fmt_spec format;
5684 bool is_ctables_format;
5685 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5688 if (sss->n >= sss->allocated)
5689 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5690 sizeof *sss->specs);
5691 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5692 .function = function,
5693 .weighted = weighted,
5695 .percentile = percentile,
5697 .is_ctables_format = is_ctables_format,
5703 ctables_summary_spec_set_uninit (sss);
5708 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5710 struct ctables_postcompute **pcs = NULL;
5712 size_t allocated_pcs = 0;
5714 while (lex_match (lexer, T_AND))
5716 if (!lex_force_id (lexer))
5718 struct ctables_postcompute *pc
5719 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5722 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5727 if (n_pcs >= allocated_pcs)
5728 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5732 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5734 if (lex_match_id (lexer, "LABEL"))
5736 lex_match (lexer, T_EQUALS);
5737 if (!lex_force_string (lexer))
5740 for (size_t i = 0; i < n_pcs; i++)
5742 free (pcs[i]->label);
5743 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5748 else if (lex_match_id (lexer, "FORMAT"))
5750 lex_match (lexer, T_EQUALS);
5752 struct ctables_summary_spec_set sss;
5753 if (!ctables_parse_pproperties_format (lexer, &sss))
5756 for (size_t i = 0; i < n_pcs; i++)
5759 ctables_summary_spec_set_uninit (pcs[i]->specs);
5761 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5762 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5764 ctables_summary_spec_set_uninit (&sss);
5766 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5768 lex_match (lexer, T_EQUALS);
5769 bool hide_source_cats;
5770 if (!parse_bool (lexer, &hide_source_cats))
5772 for (size_t i = 0; i < n_pcs; i++)
5773 pcs[i]->hide_source_cats = hide_source_cats;
5777 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5790 put_strftime (struct string *out, time_t now, const char *format)
5792 const struct tm *tm = localtime (&now);
5794 strftime (value, sizeof value, format, tm);
5795 ds_put_cstr (out, value);
5799 skip_prefix (struct substring *s, struct substring prefix)
5801 if (ss_starts_with (*s, prefix))
5803 ss_advance (s, prefix.length);
5811 put_table_expression (struct string *out, struct lexer *lexer,
5812 struct dictionary *dict, int expr_start, int expr_end)
5815 for (int ofs = expr_start; ofs < expr_end; ofs++)
5817 const struct token *t = lex_ofs_token (lexer, ofs);
5818 if (t->type == T_LBRACK)
5820 else if (t->type == T_RBRACK && nest > 0)
5826 else if (t->type == T_ID)
5828 const struct variable *var
5829 = dict_lookup_var (dict, t->string.string);
5830 const char *label = var ? var_get_label (var) : NULL;
5831 ds_put_cstr (out, label ? label : t->string.string);
5835 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5836 ds_put_byte (out, ' ');
5838 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5839 ds_put_cstr (out, repr);
5842 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5843 ds_put_byte (out, ' ');
5849 put_title_text (struct string *out, struct substring in, time_t now,
5850 struct lexer *lexer, struct dictionary *dict,
5851 int expr_start, int expr_end)
5855 size_t chunk = ss_find_byte (in, ')');
5856 ds_put_substring (out, ss_head (in, chunk));
5857 ss_advance (&in, chunk);
5858 if (ss_is_empty (in))
5861 if (skip_prefix (&in, ss_cstr (")DATE")))
5862 put_strftime (out, now, "%x");
5863 else if (skip_prefix (&in, ss_cstr (")TIME")))
5864 put_strftime (out, now, "%X");
5865 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5866 put_table_expression (out, lexer, dict, expr_start, expr_end);
5869 ds_put_byte (out, ')');
5870 ss_advance (&in, 1);
5876 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5878 struct casereader *input = NULL;
5880 struct measure_guesser *mg = measure_guesser_create (ds);
5883 input = proc_open (ds);
5884 measure_guesser_run (mg, input);
5885 measure_guesser_destroy (mg);
5888 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5889 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5890 enum settings_value_show tvars = settings_get_show_variables ();
5891 for (size_t i = 0; i < n_vars; i++)
5892 vlabels[i] = (enum ctables_vlabel) tvars;
5894 struct pivot_table_look *look = pivot_table_look_unshare (
5895 pivot_table_look_ref (pivot_table_look_get_default ()));
5896 look->omit_empty = false;
5898 struct ctables *ct = xmalloc (sizeof *ct);
5899 *ct = (struct ctables) {
5900 .dict = dataset_dict (ds),
5902 .ctables_formats = FMT_SETTINGS_INIT,
5904 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5907 time_t now = time (NULL);
5912 const char *dot_string;
5913 const char *comma_string;
5915 static const struct ctf ctfs[4] = {
5916 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5917 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5918 { CTEF_PAREN, "-,(,),", "-.(.)." },
5919 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5921 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5922 for (size_t i = 0; i < 4; i++)
5924 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5925 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5926 fmt_number_style_from_string (s));
5929 if (!lex_force_match (lexer, T_SLASH))
5932 while (!lex_match_id (lexer, "TABLE"))
5934 if (lex_match_id (lexer, "FORMAT"))
5936 double widths[2] = { SYSMIS, SYSMIS };
5937 double units_per_inch = 72.0;
5939 while (lex_token (lexer) != T_SLASH)
5941 if (lex_match_id (lexer, "MINCOLWIDTH"))
5943 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5946 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5948 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5951 else if (lex_match_id (lexer, "UNITS"))
5953 lex_match (lexer, T_EQUALS);
5954 if (lex_match_id (lexer, "POINTS"))
5955 units_per_inch = 72.0;
5956 else if (lex_match_id (lexer, "INCHES"))
5957 units_per_inch = 1.0;
5958 else if (lex_match_id (lexer, "CM"))
5959 units_per_inch = 2.54;
5962 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
5966 else if (lex_match_id (lexer, "EMPTY"))
5971 lex_match (lexer, T_EQUALS);
5972 if (lex_match_id (lexer, "ZERO"))
5974 /* Nothing to do. */
5976 else if (lex_match_id (lexer, "BLANK"))
5977 ct->zero = xstrdup ("");
5978 else if (lex_force_string (lexer))
5980 ct->zero = ss_xstrdup (lex_tokss (lexer));
5986 else if (lex_match_id (lexer, "MISSING"))
5988 lex_match (lexer, T_EQUALS);
5989 if (!lex_force_string (lexer))
5993 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
5994 ? ss_xstrdup (lex_tokss (lexer))
6000 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6001 "UNITS", "EMPTY", "MISSING");
6006 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6007 && widths[0] > widths[1])
6009 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6013 for (size_t i = 0; i < 2; i++)
6014 if (widths[i] != SYSMIS)
6016 int *wr = ct->look->width_ranges[TABLE_HORZ];
6017 wr[i] = widths[i] / units_per_inch * 96.0;
6022 else if (lex_match_id (lexer, "VLABELS"))
6024 if (!lex_force_match_id (lexer, "VARIABLES"))
6026 lex_match (lexer, T_EQUALS);
6028 struct variable **vars;
6030 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6034 if (!lex_force_match_id (lexer, "DISPLAY"))
6039 lex_match (lexer, T_EQUALS);
6041 enum ctables_vlabel vlabel;
6042 if (lex_match_id (lexer, "DEFAULT"))
6043 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6044 else if (lex_match_id (lexer, "NAME"))
6046 else if (lex_match_id (lexer, "LABEL"))
6047 vlabel = CTVL_LABEL;
6048 else if (lex_match_id (lexer, "BOTH"))
6050 else if (lex_match_id (lexer, "NONE"))
6054 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6060 for (size_t i = 0; i < n_vars; i++)
6061 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6064 else if (lex_match_id (lexer, "MRSETS"))
6066 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6068 lex_match (lexer, T_EQUALS);
6069 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6072 else if (lex_match_id (lexer, "SMISSING"))
6074 if (lex_match_id (lexer, "VARIABLE"))
6075 ct->smissing_listwise = false;
6076 else if (lex_match_id (lexer, "LISTWISE"))
6077 ct->smissing_listwise = true;
6080 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6084 else if (lex_match_id (lexer, "PCOMPUTE"))
6086 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6089 else if (lex_match_id (lexer, "PPROPERTIES"))
6091 if (!ctables_parse_pproperties (lexer, ct))
6094 else if (lex_match_id (lexer, "WEIGHT"))
6096 if (!lex_force_match_id (lexer, "VARIABLE"))
6098 lex_match (lexer, T_EQUALS);
6099 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6103 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6105 if (lex_match_id (lexer, "COUNT"))
6107 lex_match (lexer, T_EQUALS);
6108 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6111 ct->hide_threshold = lex_integer (lexer);
6114 else if (ct->hide_threshold == 0)
6115 ct->hide_threshold = 5;
6119 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6120 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6121 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6125 if (!lex_force_match (lexer, T_SLASH))
6129 size_t allocated_tables = 0;
6132 if (ct->n_tables >= allocated_tables)
6133 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6134 sizeof *ct->tables);
6136 struct ctables_category *cat = xmalloc (sizeof *cat);
6137 *cat = (struct ctables_category) {
6139 .include_missing = false,
6140 .sort_ascending = true,
6143 struct ctables_categories *c = xmalloc (sizeof *c);
6144 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6145 *c = (struct ctables_categories) {
6152 struct ctables_categories **categories = xnmalloc (n_vars,
6153 sizeof *categories);
6154 for (size_t i = 0; i < n_vars; i++)
6157 struct ctables_table *t = xmalloc (sizeof *t);
6158 *t = (struct ctables_table) {
6160 .slabels_axis = PIVOT_AXIS_COLUMN,
6161 .slabels_visible = true,
6162 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6164 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6165 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6166 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6168 .clabels_from_axis = PIVOT_AXIS_LAYER,
6169 .categories = categories,
6170 .n_categories = n_vars,
6173 ct->tables[ct->n_tables++] = t;
6175 lex_match (lexer, T_EQUALS);
6176 int expr_start = lex_ofs (lexer);
6177 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6179 if (lex_match (lexer, T_BY))
6181 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6182 ct, t, PIVOT_AXIS_COLUMN))
6185 if (lex_match (lexer, T_BY))
6187 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6188 ct, t, PIVOT_AXIS_LAYER))
6192 int expr_end = lex_ofs (lexer);
6194 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6195 && !t->axes[PIVOT_AXIS_LAYER])
6197 lex_error (lexer, _("At least one variable must be specified."));
6201 const struct ctables_axis *scales[PIVOT_N_AXES];
6202 size_t n_scales = 0;
6203 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6205 scales[a] = find_scale (t->axes[a]);
6211 msg (SE, _("Scale variables may appear only on one axis."));
6212 if (scales[PIVOT_AXIS_ROW])
6213 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6214 _("This scale variable appears on the rows axis."));
6215 if (scales[PIVOT_AXIS_COLUMN])
6216 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6217 _("This scale variable appears on the columns axis."));
6218 if (scales[PIVOT_AXIS_LAYER])
6219 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6220 _("This scale variable appears on the layer axis."));
6224 const struct ctables_axis *summaries[PIVOT_N_AXES];
6225 size_t n_summaries = 0;
6226 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6228 summaries[a] = (scales[a]
6230 : find_categorical_summary_spec (t->axes[a]));
6234 if (n_summaries > 1)
6236 msg (SE, _("Summaries may appear only on one axis."));
6237 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6240 msg_at (SN, summaries[a]->loc,
6242 ? _("This variable on the rows axis has a summary.")
6243 : a == PIVOT_AXIS_COLUMN
6244 ? _("This variable on the columns axis has a summary.")
6245 : _("This variable on the layers axis has a summary."));
6247 msg_at (SN, summaries[a]->loc,
6248 _("This is a scale variable, so it always has a "
6249 "summary even if the syntax does not explicitly "
6254 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6255 if (n_summaries ? summaries[a] : t->axes[a])
6257 t->summary_axis = a;
6261 if (lex_token (lexer) == T_ENDCMD)
6263 if (!ctables_prepare_table (t))
6267 if (!lex_force_match (lexer, T_SLASH))
6270 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6272 if (lex_match_id (lexer, "SLABELS"))
6274 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6276 if (lex_match_id (lexer, "POSITION"))
6278 lex_match (lexer, T_EQUALS);
6279 if (lex_match_id (lexer, "COLUMN"))
6280 t->slabels_axis = PIVOT_AXIS_COLUMN;
6281 else if (lex_match_id (lexer, "ROW"))
6282 t->slabels_axis = PIVOT_AXIS_ROW;
6283 else if (lex_match_id (lexer, "LAYER"))
6284 t->slabels_axis = PIVOT_AXIS_LAYER;
6287 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6291 else if (lex_match_id (lexer, "VISIBLE"))
6293 lex_match (lexer, T_EQUALS);
6294 if (!parse_bool (lexer, &t->slabels_visible))
6299 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6304 else if (lex_match_id (lexer, "CLABELS"))
6306 if (lex_match_id (lexer, "AUTO"))
6308 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6309 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6311 else if (lex_match_id (lexer, "ROWLABELS"))
6313 lex_match (lexer, T_EQUALS);
6314 if (lex_match_id (lexer, "OPPOSITE"))
6315 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6316 else if (lex_match_id (lexer, "LAYER"))
6317 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6320 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6324 else if (lex_match_id (lexer, "COLLABELS"))
6326 lex_match (lexer, T_EQUALS);
6327 if (lex_match_id (lexer, "OPPOSITE"))
6328 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6329 else if (lex_match_id (lexer, "LAYER"))
6330 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6333 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6339 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6344 else if (lex_match_id (lexer, "CRITERIA"))
6346 if (!lex_force_match_id (lexer, "CILEVEL"))
6348 lex_match (lexer, T_EQUALS);
6350 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6352 t->cilevel = lex_number (lexer);
6355 else if (lex_match_id (lexer, "CATEGORIES"))
6357 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6361 else if (lex_match_id (lexer, "TITLES"))
6366 if (lex_match_id (lexer, "CAPTION"))
6367 textp = &t->caption;
6368 else if (lex_match_id (lexer, "CORNER"))
6370 else if (lex_match_id (lexer, "TITLE"))
6374 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6377 lex_match (lexer, T_EQUALS);
6379 struct string s = DS_EMPTY_INITIALIZER;
6380 while (lex_is_string (lexer))
6382 if (!ds_is_empty (&s))
6383 ds_put_byte (&s, ' ');
6384 put_title_text (&s, lex_tokss (lexer), now,
6385 lexer, dataset_dict (ds),
6386 expr_start, expr_end);
6390 *textp = ds_steal_cstr (&s);
6392 while (lex_token (lexer) != T_SLASH
6393 && lex_token (lexer) != T_ENDCMD);
6395 else if (lex_match_id (lexer, "SIGTEST"))
6399 t->chisq = xmalloc (sizeof *t->chisq);
6400 *t->chisq = (struct ctables_chisq) {
6402 .include_mrsets = true,
6403 .all_visible = true,
6409 if (lex_match_id (lexer, "TYPE"))
6411 lex_match (lexer, T_EQUALS);
6412 if (!lex_force_match_id (lexer, "CHISQUARE"))
6415 else if (lex_match_id (lexer, "ALPHA"))
6417 lex_match (lexer, T_EQUALS);
6418 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6420 t->chisq->alpha = lex_number (lexer);
6423 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6425 lex_match (lexer, T_EQUALS);
6426 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6429 else if (lex_match_id (lexer, "CATEGORIES"))
6431 lex_match (lexer, T_EQUALS);
6432 if (lex_match_id (lexer, "ALLVISIBLE"))
6433 t->chisq->all_visible = true;
6434 else if (lex_match_id (lexer, "SUBTOTALS"))
6435 t->chisq->all_visible = false;
6438 lex_error_expecting (lexer,
6439 "ALLVISIBLE", "SUBTOTALS");
6445 lex_error_expecting (lexer, "TYPE", "ALPHA",
6446 "INCLUDEMRSETS", "CATEGORIES");
6450 while (lex_token (lexer) != T_SLASH
6451 && lex_token (lexer) != T_ENDCMD);
6453 else if (lex_match_id (lexer, "COMPARETEST"))
6457 t->pairwise = xmalloc (sizeof *t->pairwise);
6458 *t->pairwise = (struct ctables_pairwise) {
6460 .alpha = { .05, .05 },
6461 .adjust = BONFERRONI,
6462 .include_mrsets = true,
6463 .meansvariance_allcats = true,
6464 .all_visible = true,
6473 if (lex_match_id (lexer, "TYPE"))
6475 lex_match (lexer, T_EQUALS);
6476 if (lex_match_id (lexer, "PROP"))
6477 t->pairwise->type = PROP;
6478 else if (lex_match_id (lexer, "MEAN"))
6479 t->pairwise->type = MEAN;
6482 lex_error_expecting (lexer, "PROP", "MEAN");
6486 else if (lex_match_id (lexer, "ALPHA"))
6488 lex_match (lexer, T_EQUALS);
6490 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6492 double a0 = lex_number (lexer);
6495 lex_match (lexer, T_COMMA);
6496 if (lex_is_number (lexer))
6498 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6500 double a1 = lex_number (lexer);
6503 t->pairwise->alpha[0] = MIN (a0, a1);
6504 t->pairwise->alpha[1] = MAX (a0, a1);
6507 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6509 else if (lex_match_id (lexer, "ADJUST"))
6511 lex_match (lexer, T_EQUALS);
6512 if (lex_match_id (lexer, "BONFERRONI"))
6513 t->pairwise->adjust = BONFERRONI;
6514 else if (lex_match_id (lexer, "BH"))
6515 t->pairwise->adjust = BH;
6516 else if (lex_match_id (lexer, "NONE"))
6517 t->pairwise->adjust = 0;
6520 lex_error_expecting (lexer, "BONFERRONI", "BH",
6525 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6527 lex_match (lexer, T_EQUALS);
6528 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6531 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6533 lex_match (lexer, T_EQUALS);
6534 if (lex_match_id (lexer, "ALLCATS"))
6535 t->pairwise->meansvariance_allcats = true;
6536 else if (lex_match_id (lexer, "TESTEDCATS"))
6537 t->pairwise->meansvariance_allcats = false;
6540 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6544 else if (lex_match_id (lexer, "CATEGORIES"))
6546 lex_match (lexer, T_EQUALS);
6547 if (lex_match_id (lexer, "ALLVISIBLE"))
6548 t->pairwise->all_visible = true;
6549 else if (lex_match_id (lexer, "SUBTOTALS"))
6550 t->pairwise->all_visible = false;
6553 lex_error_expecting (lexer, "ALLVISIBLE",
6558 else if (lex_match_id (lexer, "MERGE"))
6560 lex_match (lexer, T_EQUALS);
6561 if (!parse_bool (lexer, &t->pairwise->merge))
6564 else if (lex_match_id (lexer, "STYLE"))
6566 lex_match (lexer, T_EQUALS);
6567 if (lex_match_id (lexer, "APA"))
6568 t->pairwise->apa_style = true;
6569 else if (lex_match_id (lexer, "SIMPLE"))
6570 t->pairwise->apa_style = false;
6573 lex_error_expecting (lexer, "APA", "SIMPLE");
6577 else if (lex_match_id (lexer, "SHOWSIG"))
6579 lex_match (lexer, T_EQUALS);
6580 if (!parse_bool (lexer, &t->pairwise->show_sig))
6585 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6586 "INCLUDEMRSETS", "MEANSVARIANCE",
6587 "CATEGORIES", "MERGE", "STYLE",
6592 while (lex_token (lexer) != T_SLASH
6593 && lex_token (lexer) != T_ENDCMD);
6597 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6598 "CRITERIA", "CATEGORIES", "TITLES",
6599 "SIGTEST", "COMPARETEST");
6603 if (!lex_match (lexer, T_SLASH))
6607 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6608 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6610 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6614 if (!ctables_prepare_table (t))
6617 while (lex_token (lexer) != T_ENDCMD);
6620 input = proc_open (ds);
6621 bool ok = ctables_execute (ds, input, ct);
6622 ok = proc_commit (ds) && ok;
6624 ctables_destroy (ct);
6625 return ok ? CMD_SUCCESS : CMD_FAILURE;
6630 ctables_destroy (ct);