1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_function_type
68 /* A function that operates on data in a single cell. The function does
69 not have an unweighted version. */
72 /* A function that operates on data in a single cell. The function has an
73 unweighted version. */
76 /* A function that operates on an area of cells. The function has an
77 unweighted version. */
88 enum ctables_function_availability
90 CTFA_ALL, /* Any variables. */
91 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
92 //CTFA_MRSETS, /* Only multiple-response sets */
95 enum ctables_summary_function
97 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
98 #include "ctables.inc"
103 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
105 #include "ctables.inc"
109 struct ctables_function_info
111 struct substring basename;
112 enum ctables_function_type type;
113 enum ctables_format format;
114 enum ctables_function_availability availability;
116 bool may_be_unweighted;
119 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
120 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
122 .basename = SS_LITERAL_INITIALIZER (NAME), \
125 .availability = AVAILABILITY, \
126 .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA, \
127 .is_area = (TYPE) == CTFT_AREA \
129 #include "ctables.inc"
133 static bool ctables_summary_function_is_count (enum ctables_summary_function);
135 enum ctables_area_type
137 /* Within a section, where stacked variables divide one section from
140 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
141 parse_ctables_summary_function() parses correctly. */
142 CTAT_TABLE, /* All layers of a whole section. */
143 CTAT_LAYERROW, /* Row in one layer within a section. */
144 CTAT_LAYERCOL, /* Column in one layer within a section. */
145 CTAT_LAYER, /* One layer within a section. */
147 /* Within a subtable, where a subtable pairs an innermost row variable with
148 an innermost column variable within a single layer. */
149 CTAT_SUBTABLE, /* Whole subtable. */
150 CTAT_ROW, /* Row within a subtable. */
151 CTAT_COL, /* Column within a subtable. */
155 static const char *ctables_area_type_name[N_CTATS] = {
156 [CTAT_TABLE] = "TABLE",
157 [CTAT_LAYER] = "LAYER",
158 [CTAT_LAYERROW] = "LAYERROW",
159 [CTAT_LAYERCOL] = "LAYERCOL",
160 [CTAT_SUBTABLE] = "SUBTABLE",
167 struct hmap_node node;
169 const struct ctables_cell *example;
172 double d_valid; /* Dictionary weight. */
175 double e_valid; /* Effective weight */
178 double u_valid; /* Unweighted. */
181 struct ctables_sum *sums;
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The areas that contain this cell. */
205 struct ctables_area *areas[N_CTATS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
231 const struct dictionary *dict;
232 struct pivot_table_look *look;
234 /* CTABLES has a number of extra formats that we implement via custom
235 currency specifications on an alternate fmt_settings. */
236 #define CTEF_NEGPAREN FMT_CCA
237 #define CTEF_NEQUAL FMT_CCB
238 #define CTEF_PAREN FMT_CCC
239 #define CTEF_PCTPAREN FMT_CCD
240 struct fmt_settings ctables_formats;
242 /* If this is NULL, zeros are displayed using the normal print format.
243 Otherwise, this string is displayed. */
246 /* If this is NULL, missing values are displayed using the normal print
247 format. Otherwise, this string is displayed. */
250 /* Indexed by variable dictionary index. */
251 enum ctables_vlabel *vlabels;
253 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
255 bool mrsets_count_duplicates; /* MRSETS. */
256 bool smissing_listwise; /* SMISSING. */
257 struct variable *e_weight; /* WEIGHT. */
258 int hide_threshold; /* HIDESMALLCOUNTS. */
260 struct ctables_table **tables;
264 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
267 struct ctables_postcompute
269 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
270 char *name; /* Name, without leading &. */
272 struct msg_location *location; /* Location of definition. */
273 struct ctables_pcexpr *expr;
275 struct ctables_summary_spec_set *specs;
276 bool hide_source_cats;
279 struct ctables_pcexpr
289 enum ctables_postcompute_op
292 CTPO_CONSTANT, /* 5 */
293 CTPO_CAT_NUMBER, /* [5] */
294 CTPO_CAT_STRING, /* ["STRING"] */
295 CTPO_CAT_NRANGE, /* [LO THRU 5] */
296 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
297 CTPO_CAT_MISSING, /* MISSING */
298 CTPO_CAT_OTHERNM, /* OTHERNM */
299 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
300 CTPO_CAT_TOTAL, /* TOTAL */
314 /* CTPO_CAT_NUMBER. */
317 /* CTPO_CAT_STRING, in dictionary encoding. */
318 struct substring string;
320 /* CTPO_CAT_NRANGE. */
323 /* CTPO_CAT_SRANGE. */
324 struct substring srange[2];
326 /* CTPO_CAT_SUBTOTAL. */
327 size_t subtotal_index;
329 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
330 One element: CTPO_NEG. */
331 struct ctables_pcexpr *subs[2];
334 /* Source location. */
335 struct msg_location *location;
338 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
339 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
340 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
341 struct ctables_pcexpr *sub1);
343 struct ctables_summary_spec_set
345 struct ctables_summary_spec *specs;
349 /* The variable to which the summary specs are applied. */
350 struct variable *var;
352 /* Whether the variable to which the summary specs are applied is a scale
353 variable for the purpose of summarization.
355 (VALIDN and TOTALN act differently for summarizing scale and categorical
359 /* If any of these optional additional scale variables are missing, then
360 treat 'var' as if it's missing too. This is for implementing
361 SMISSING=LISTWISE. */
362 struct variable **listwise_vars;
363 size_t n_listwise_vars;
366 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
367 const struct ctables_summary_spec_set *);
368 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
370 /* A nested sequence of variables, e.g. a > b > c. */
373 struct variable **vars;
376 size_t *areas[N_CTATS];
377 size_t n_areas[N_CTATS];
380 struct ctables_summary_spec_set specs[N_CSVS];
383 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
386 struct ctables_nest *nests;
390 static void ctables_stack_uninit (struct ctables_stack *);
394 struct hmap_node node;
399 struct ctables_occurrence
401 struct hmap_node node;
405 struct ctables_section
408 struct ctables_table *table;
409 struct ctables_nest *nests[PIVOT_N_AXES];
412 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
413 struct hmap cells; /* Contains "struct ctables_cell"s. */
414 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
417 static void ctables_section_uninit (struct ctables_section *);
421 struct ctables *ctables;
422 struct ctables_axis *axes[PIVOT_N_AXES];
423 struct ctables_stack stacks[PIVOT_N_AXES];
424 struct ctables_section *sections;
426 enum pivot_axis_type summary_axis;
427 struct ctables_summary_spec_set summary_specs;
428 struct variable **sum_vars;
431 enum pivot_axis_type slabels_axis;
432 bool slabels_visible;
434 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
436 Most commonly, label_axis[a] == a, and in particular we always have
437 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
439 If ROWLABELS or COLLABELS is specified, then one of
440 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
441 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
443 If any category labels are moved, then 'clabels_example' is one of the
444 variables being moved (and it is otherwise NULL). All of the variables
445 being moved have the same width, value labels, and categories, so this
446 example variable can be used to find those out.
448 The remaining members in this group are relevant only if category labels
451 'clabels_values_map' holds a "struct ctables_value" for all the values
452 that appear in all of the variables in the moved categories. It is
453 accumulated as the data is read. Once the data is fully read, its
454 sorted values are put into 'clabels_values' and 'n_clabels_values'.
456 enum pivot_axis_type label_axis[PIVOT_N_AXES];
457 enum pivot_axis_type clabels_from_axis;
458 const struct variable *clabels_example;
459 struct hmap clabels_values_map;
460 struct ctables_value **clabels_values;
461 size_t n_clabels_values;
463 /* Indexed by variable dictionary index. */
464 struct ctables_categories **categories;
473 struct ctables_chisq *chisq;
474 struct ctables_pairwise *pairwise;
477 struct ctables_categories
480 struct ctables_category *cats;
485 struct ctables_category
487 enum ctables_category_type
489 /* Explicit category lists. */
492 CCT_NRANGE, /* Numerical range. */
493 CCT_SRANGE, /* String range. */
498 /* Totals and subtotals. */
502 /* Implicit category lists. */
507 /* For contributing to TOTALN. */
508 CCT_EXCLUDED_MISSING,
512 struct ctables_category *subtotal;
518 double number; /* CCT_NUMBER. */
519 struct substring string; /* CCT_STRING, in dictionary encoding. */
520 double nrange[2]; /* CCT_NRANGE. */
521 struct substring srange[2]; /* CCT_SRANGE. */
525 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
526 bool hide_subcategories; /* CCT_SUBTOTAL. */
529 /* CCT_POSTCOMPUTE. */
532 const struct ctables_postcompute *pc;
533 enum fmt_type parse_format;
536 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
539 bool include_missing;
543 enum ctables_summary_function sort_function;
545 enum ctables_area_type area;
546 struct variable *sort_var;
551 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
552 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
553 struct msg_location *location;
557 ctables_category_uninit (struct ctables_category *cat)
562 msg_location_destroy (cat->location);
569 case CCT_POSTCOMPUTE:
573 ss_dealloc (&cat->string);
577 ss_dealloc (&cat->srange[0]);
578 ss_dealloc (&cat->srange[1]);
583 free (cat->total_label);
591 case CCT_EXCLUDED_MISSING:
597 nullable_substring_equal (const struct substring *a,
598 const struct substring *b)
600 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
604 ctables_category_equal (const struct ctables_category *a,
605 const struct ctables_category *b)
607 if (a->type != b->type)
613 return a->number == b->number;
616 return ss_equals (a->string, b->string);
619 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
622 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
623 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
629 case CCT_POSTCOMPUTE:
630 return a->pc == b->pc;
634 return !strcmp (a->total_label, b->total_label);
639 return (a->include_missing == b->include_missing
640 && a->sort_ascending == b->sort_ascending
641 && a->sort_function == b->sort_function
642 && a->sort_var == b->sort_var
643 && a->percentile == b->percentile);
645 case CCT_EXCLUDED_MISSING:
653 ctables_categories_unref (struct ctables_categories *c)
658 assert (c->n_refs > 0);
662 for (size_t i = 0; i < c->n_cats; i++)
663 ctables_category_uninit (&c->cats[i]);
669 ctables_categories_equal (const struct ctables_categories *a,
670 const struct ctables_categories *b)
672 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
675 for (size_t i = 0; i < a->n_cats; i++)
676 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
682 /* Chi-square test (SIGTEST). */
690 /* Pairwise comparison test (COMPARETEST). */
691 struct ctables_pairwise
693 enum { PROP, MEAN } type;
696 bool meansvariance_allcats;
698 enum { BONFERRONI = 1, BH } adjust;
722 struct variable *var;
724 struct ctables_summary_spec_set specs[N_CSVS];
728 struct ctables_axis *subs[2];
731 struct msg_location *loc;
734 static void ctables_axis_destroy (struct ctables_axis *);
736 struct ctables_summary_spec
738 /* The calculation to be performed.
740 'function' is the function to calculate. 'weighted' specifies whether
741 to use weighted or unweighted data (for functions that do not support a
742 choice, it must be true). 'calc_area' is the area over which the
743 calculation takes place (for functions that target only an individual
744 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
745 percentile between 0 and 100 (for other functions it must be 0). */
746 enum ctables_summary_function function;
748 enum ctables_area_type calc_area;
749 double percentile; /* CTSF_PTILE only. */
751 /* How to display the result of the calculation.
753 'label' is a user-specified label, NULL if the user didn't specify
756 'user_area' is usually the same as 'calc_area', but when category labels
757 are rotated from one axis to another it swaps rows and columns.
759 'format' is the format for displaying the output. If
760 'is_ctables_format' is true, then 'format.type' is one of the special
761 CTEF_* formats instead of the standard ones. */
763 enum ctables_area_type user_area;
764 struct fmt_spec format;
765 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
772 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
773 const struct ctables_summary_spec *src)
776 dst->label = xstrdup_if_nonnull (src->label);
780 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
787 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
788 const struct ctables_summary_spec_set *src)
790 struct ctables_summary_spec *specs
791 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
792 for (size_t i = 0; i < src->n; i++)
793 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
795 *dst = (struct ctables_summary_spec_set) {
800 .is_scale = src->is_scale,
805 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
807 for (size_t i = 0; i < set->n; i++)
808 ctables_summary_spec_uninit (&set->specs[i]);
809 free (set->listwise_vars);
814 parse_col_width (struct lexer *lexer, const char *name, double *width)
816 lex_match (lexer, T_EQUALS);
817 if (lex_match_id (lexer, "DEFAULT"))
819 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
821 *width = lex_number (lexer);
831 parse_bool (struct lexer *lexer, bool *b)
833 if (lex_match_id (lexer, "NO"))
835 else if (lex_match_id (lexer, "YES"))
839 lex_error_expecting (lexer, "YES", "NO");
845 static enum ctables_function_availability
846 ctables_function_availability (enum ctables_summary_function f)
848 static enum ctables_function_availability availability[] = {
849 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
850 #include "ctables.inc"
854 return availability[f];
858 ctables_summary_function_is_count (enum ctables_summary_function f)
860 return f == CTSF_COUNT || f == CTSF_ECOUNT;
864 parse_ctables_summary_function (struct lexer *lexer,
865 enum ctables_summary_function *function,
867 enum ctables_area_type *area)
869 if (!lex_force_id (lexer))
872 struct substring name = lex_tokss (lexer);
873 *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'));
875 bool has_area = false;
877 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
878 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
883 if (ss_equals_case (name, ss_cstr ("PCT")))
885 /* Special case where .COUNT suffix is omitted. */
886 *function = CTSF_areaPCT_COUNT;
893 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
895 const struct ctables_function_info *cfi = &ctables_function_info[f];
896 if (ss_equals_case (cfi->basename, name))
899 if (!*weighted && !cfi->may_be_unweighted)
901 if (has_area != cfi->is_area)
909 lex_error (lexer, _("Expecting summary function name."));
914 ctables_axis_destroy (struct ctables_axis *axis)
922 for (size_t i = 0; i < N_CSVS; i++)
923 ctables_summary_spec_set_uninit (&axis->specs[i]);
928 ctables_axis_destroy (axis->subs[0]);
929 ctables_axis_destroy (axis->subs[1]);
932 msg_location_destroy (axis->loc);
936 static struct ctables_axis *
937 ctables_axis_new_nonterminal (enum ctables_axis_op op,
938 struct ctables_axis *sub0,
939 struct ctables_axis *sub1,
940 struct lexer *lexer, int start_ofs)
942 struct ctables_axis *axis = xmalloc (sizeof *axis);
943 *axis = (struct ctables_axis) {
945 .subs = { sub0, sub1 },
946 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
951 struct ctables_axis_parse_ctx
954 struct dictionary *dict;
956 struct ctables_table *t;
959 static struct fmt_spec
960 ctables_summary_default_format (enum ctables_summary_function function,
961 const struct variable *var)
963 static const enum ctables_format default_formats[] = {
964 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
965 #include "ctables.inc"
968 switch (default_formats[function])
971 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
974 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
977 return *var_get_print_format (var);
985 ctables_summary_label__ (const struct ctables_summary_spec *spec)
987 bool w = spec->weighted;
988 enum ctables_area_type a = spec->user_area;
989 switch (spec->function)
992 return w ? N_("Count") : N_("Unweighted Count");
995 return N_("Adjusted Count");
997 case CTSF_areaPCT_COUNT:
1000 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1001 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1002 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1003 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1004 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1005 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1006 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1010 case CTSF_areaPCT_VALIDN:
1013 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1014 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1015 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1016 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1017 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1018 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1019 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1023 case CTSF_areaPCT_TOTALN:
1026 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1027 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1028 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1029 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1030 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1031 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1032 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1036 case CTSF_MAXIMUM: return N_("Maximum");
1037 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1038 case CTSF_MEDIAN: return N_("Median");
1039 case CTSF_MINIMUM: return N_("Minimum");
1040 case CTSF_MISSING: return N_("Missing");
1041 case CTSF_MODE: return N_("Mode");
1042 case CTSF_PTILE: NOT_REACHED ();
1043 case CTSF_RANGE: return N_("Range");
1044 case CTSF_SEMEAN: return N_("Std Error of Mean");
1045 case CTSF_STDDEV: return N_("Std Deviation");
1046 case CTSF_SUM: return N_("Sum");
1047 case CTSF_TOTALN: return N_("Total N");
1048 case CTSF_ETOTALN: return N_("Adjusted Total N");
1049 case CTSF_VALIDN: return N_("Valid N");
1050 case CTSF_EVALIDN: return N_("Adjusted Valid N");
1051 case CTSF_VARIANCE: return N_("Variance");
1052 case CTSF_areaPCT_SUM:
1055 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1056 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1057 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1058 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1059 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1060 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1061 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1068 /* Don't bother translating these: they are for developers only. */
1069 case CTAT_TABLE: return "Table ID";
1070 case CTAT_LAYER: return "Layer ID";
1071 case CTAT_LAYERROW: return "Layer Row ID";
1072 case CTAT_LAYERCOL: return "Layer Column ID";
1073 case CTAT_SUBTABLE: return "Subtable ID";
1074 case CTAT_ROW: return "Row ID";
1075 case CTAT_COL: return "Column ID";
1083 static struct pivot_value *
1084 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1088 if (spec->function == CTSF_PTILE)
1090 double p = spec->percentile;
1091 char *s = (spec->weighted
1092 ? xasprintf (_("Percentile %.2f"), p)
1093 : xasprintf (_("Unweighted Percentile %.2f"), p));
1094 return pivot_value_new_user_text_nocopy (s);
1097 return pivot_value_new_text (ctables_summary_label__ (spec));
1101 struct substring in = ss_cstr (spec->label);
1102 struct substring target = ss_cstr (")CILEVEL");
1104 struct string out = DS_EMPTY_INITIALIZER;
1107 size_t chunk = ss_find_substring (in, target);
1108 ds_put_substring (&out, ss_head (in, chunk));
1109 ss_advance (&in, chunk);
1111 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1113 ss_advance (&in, target.length);
1114 ds_put_format (&out, "%g", cilevel);
1120 ctables_summary_function_name (enum ctables_summary_function function,
1122 enum ctables_area_type area,
1123 char *buffer, size_t bufsize)
1125 const struct ctables_function_info *cfi = &ctables_function_info[function];
1126 snprintf (buffer, bufsize, "%s%s%s",
1127 weighted ? "" : "U",
1128 cfi->is_area ? ctables_area_type_name[area] : "",
1129 cfi->basename.string);
1134 add_summary_spec (struct ctables_axis *axis,
1135 enum ctables_summary_function function, bool weighted,
1136 enum ctables_area_type area, double percentile,
1137 const char *label, const struct fmt_spec *format,
1138 bool is_ctables_format, const struct msg_location *loc,
1139 enum ctables_summary_variant sv)
1141 if (axis->op == CTAO_VAR)
1143 char function_name[128];
1144 ctables_summary_function_name (function, weighted, area,
1145 function_name, sizeof function_name);
1146 const char *var_name = var_get_name (axis->var);
1147 switch (ctables_function_availability (function))
1151 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1152 "response sets."), function_name);
1153 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1159 if (!axis->scale && sv != CSV_TOTAL)
1162 _("Summary function %s applies only to scale variables."),
1164 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1174 struct ctables_summary_spec_set *set = &axis->specs[sv];
1175 if (set->n >= set->allocated)
1176 set->specs = x2nrealloc (set->specs, &set->allocated,
1177 sizeof *set->specs);
1179 struct ctables_summary_spec *dst = &set->specs[set->n++];
1180 *dst = (struct ctables_summary_spec) {
1181 .function = function,
1182 .weighted = weighted,
1185 .percentile = percentile,
1186 .label = xstrdup_if_nonnull (label),
1187 .format = (format ? *format
1188 : ctables_summary_default_format (function, axis->var)),
1189 .is_ctables_format = is_ctables_format,
1195 for (size_t i = 0; i < 2; i++)
1196 if (!add_summary_spec (axis->subs[i], function, weighted, area,
1197 percentile, label, format, is_ctables_format,
1204 static struct ctables_axis *ctables_axis_parse_stack (
1205 struct ctables_axis_parse_ctx *);
1208 static struct ctables_axis *
1209 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1211 if (lex_match (ctx->lexer, T_LPAREN))
1213 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1214 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1216 ctables_axis_destroy (sub);
1222 if (!lex_force_id (ctx->lexer))
1225 int start_ofs = lex_ofs (ctx->lexer);
1226 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1230 struct ctables_axis *axis = xmalloc (sizeof *axis);
1231 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1233 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1234 : lex_match_phrase (ctx->lexer, "[C]") ? false
1235 : var_get_measure (var) == MEASURE_SCALE);
1236 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1237 lex_ofs (ctx->lexer) - 1);
1238 if (axis->scale && var_is_alpha (var))
1240 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1242 var_get_name (var));
1243 ctables_axis_destroy (axis);
1251 has_digit (const char *s)
1253 return s[strcspn (s, "0123456789")] != '\0';
1257 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1258 bool *is_ctables_format)
1260 char type[FMT_TYPE_LEN_MAX + 1];
1261 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1264 if (!strcasecmp (type, "NEGPAREN"))
1265 format->type = CTEF_NEGPAREN;
1266 else if (!strcasecmp (type, "NEQUAL"))
1267 format->type = CTEF_NEQUAL;
1268 else if (!strcasecmp (type, "PAREN"))
1269 format->type = CTEF_PAREN;
1270 else if (!strcasecmp (type, "PCTPAREN"))
1271 format->type = CTEF_PCTPAREN;
1274 *is_ctables_format = false;
1275 return (parse_format_specifier (lexer, format)
1276 && fmt_check_output (format)
1277 && fmt_check_type_compat (format, VAL_NUMERIC));
1283 lex_next_error (lexer, -1, -1,
1284 _("Output format %s requires width 2 or greater."), type);
1287 else if (format->d > format->w - 1)
1289 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1290 "greater than decimals."), type);
1295 *is_ctables_format = true;
1300 static struct ctables_axis *
1301 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1303 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1304 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1307 enum ctables_summary_variant sv = CSV_CELL;
1310 int start_ofs = lex_ofs (ctx->lexer);
1312 /* Parse function. */
1313 enum ctables_summary_function function;
1315 enum ctables_area_type area;
1316 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted,
1320 /* Parse percentile. */
1321 double percentile = 0;
1322 if (function == CTSF_PTILE)
1324 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1326 percentile = lex_number (ctx->lexer);
1327 lex_get (ctx->lexer);
1332 if (lex_is_string (ctx->lexer))
1334 label = ss_xstrdup (lex_tokss (ctx->lexer));
1335 lex_get (ctx->lexer);
1339 struct fmt_spec format;
1340 const struct fmt_spec *formatp;
1341 bool is_ctables_format = false;
1342 if (lex_token (ctx->lexer) == T_ID
1343 && has_digit (lex_tokcstr (ctx->lexer)))
1345 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1346 &is_ctables_format))
1356 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1357 lex_ofs (ctx->lexer) - 1);
1358 add_summary_spec (sub, function, weighted, area, percentile, label,
1359 formatp, is_ctables_format, loc, sv);
1361 msg_location_destroy (loc);
1363 lex_match (ctx->lexer, T_COMMA);
1364 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1366 if (!lex_force_match (ctx->lexer, T_LBRACK))
1370 else if (lex_match (ctx->lexer, T_RBRACK))
1372 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1379 ctables_axis_destroy (sub);
1383 static const struct ctables_axis *
1384 find_scale (const struct ctables_axis *axis)
1388 else if (axis->op == CTAO_VAR)
1389 return axis->scale ? axis : NULL;
1392 for (size_t i = 0; i < 2; i++)
1394 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1402 static const struct ctables_axis *
1403 find_categorical_summary_spec (const struct ctables_axis *axis)
1407 else if (axis->op == CTAO_VAR)
1408 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1411 for (size_t i = 0; i < 2; i++)
1413 const struct ctables_axis *sum
1414 = find_categorical_summary_spec (axis->subs[i]);
1422 static struct ctables_axis *
1423 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1425 int start_ofs = lex_ofs (ctx->lexer);
1426 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1430 while (lex_match (ctx->lexer, T_GT))
1432 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1435 ctables_axis_destroy (lhs);
1439 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1440 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1442 const struct ctables_axis *outer_scale = find_scale (lhs);
1443 const struct ctables_axis *inner_scale = find_scale (rhs);
1444 if (outer_scale && inner_scale)
1446 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1447 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1448 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1449 ctables_axis_destroy (nest);
1453 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1456 msg_at (SE, nest->loc,
1457 _("Summaries may only be requested for categorical variables "
1458 "at the innermost nesting level."));
1459 msg_at (SN, outer_sum->loc,
1460 _("This outer categorical variable has a summary."));
1461 ctables_axis_destroy (nest);
1471 static struct ctables_axis *
1472 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1474 int start_ofs = lex_ofs (ctx->lexer);
1475 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1479 while (lex_match (ctx->lexer, T_PLUS))
1481 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1484 ctables_axis_destroy (lhs);
1488 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1489 ctx->lexer, start_ofs);
1496 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1497 struct ctables *ct, struct ctables_table *t,
1498 enum pivot_axis_type a)
1500 if (lex_token (lexer) == T_BY
1501 || lex_token (lexer) == T_SLASH
1502 || lex_token (lexer) == T_ENDCMD)
1505 struct ctables_axis_parse_ctx ctx = {
1511 t->axes[a] = ctables_axis_parse_stack (&ctx);
1512 return t->axes[a] != NULL;
1516 ctables_chisq_destroy (struct ctables_chisq *chisq)
1522 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1528 ctables_table_destroy (struct ctables_table *t)
1533 for (size_t i = 0; i < t->n_sections; i++)
1534 ctables_section_uninit (&t->sections[i]);
1537 for (size_t i = 0; i < t->n_categories; i++)
1538 ctables_categories_unref (t->categories[i]);
1539 free (t->categories);
1541 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1543 ctables_axis_destroy (t->axes[a]);
1544 ctables_stack_uninit (&t->stacks[a]);
1546 free (t->summary_specs.specs);
1548 struct ctables_value *ctv, *next_ctv;
1549 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1550 &t->clabels_values_map)
1552 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1553 hmap_delete (&t->clabels_values_map, &ctv->node);
1556 hmap_destroy (&t->clabels_values_map);
1557 free (t->clabels_values);
1563 ctables_chisq_destroy (t->chisq);
1564 ctables_pairwise_destroy (t->pairwise);
1569 ctables_destroy (struct ctables *ct)
1574 struct ctables_postcompute *pc, *next_pc;
1575 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1579 msg_location_destroy (pc->location);
1580 ctables_pcexpr_destroy (pc->expr);
1584 ctables_summary_spec_set_uninit (pc->specs);
1587 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1590 hmap_destroy (&ct->postcomputes);
1592 fmt_settings_uninit (&ct->ctables_formats);
1593 pivot_table_look_unref (ct->look);
1597 for (size_t i = 0; i < ct->n_tables; i++)
1598 ctables_table_destroy (ct->tables[i]);
1603 static struct ctables_category
1604 cct_nrange (double low, double high)
1606 return (struct ctables_category) {
1608 .nrange = { low, high }
1612 static struct ctables_category
1613 cct_srange (struct substring low, struct substring high)
1615 return (struct ctables_category) {
1617 .srange = { low, high }
1622 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1623 struct ctables_category *cat)
1626 if (lex_match (lexer, T_EQUALS))
1628 if (!lex_force_string (lexer))
1631 total_label = ss_xstrdup (lex_tokss (lexer));
1635 total_label = xstrdup (_("Subtotal"));
1637 *cat = (struct ctables_category) {
1638 .type = CCT_SUBTOTAL,
1639 .hide_subcategories = hide_subcategories,
1640 .total_label = total_label
1645 static struct substring
1646 parse_substring (struct lexer *lexer, struct dictionary *dict)
1648 struct substring s = recode_substring_pool (
1649 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1650 ss_rtrim (&s, ss_cstr (" "));
1656 ctables_table_parse_explicit_category (struct lexer *lexer,
1657 struct dictionary *dict,
1659 struct ctables_category *cat)
1661 if (lex_match_id (lexer, "OTHERNM"))
1662 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1663 else if (lex_match_id (lexer, "MISSING"))
1664 *cat = (struct ctables_category) { .type = CCT_MISSING };
1665 else if (lex_match_id (lexer, "SUBTOTAL"))
1666 return ctables_table_parse_subtotal (lexer, false, cat);
1667 else if (lex_match_id (lexer, "HSUBTOTAL"))
1668 return ctables_table_parse_subtotal (lexer, true, cat);
1669 else if (lex_match_id (lexer, "LO"))
1671 if (!lex_force_match_id (lexer, "THRU"))
1673 if (lex_is_string (lexer))
1675 struct substring sr0 = { .string = NULL };
1676 struct substring sr1 = parse_substring (lexer, dict);
1677 *cat = cct_srange (sr0, sr1);
1679 else if (lex_force_num (lexer))
1681 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1687 else if (lex_is_number (lexer))
1689 double number = lex_number (lexer);
1691 if (lex_match_id (lexer, "THRU"))
1693 if (lex_match_id (lexer, "HI"))
1694 *cat = cct_nrange (number, DBL_MAX);
1697 if (!lex_force_num (lexer))
1699 *cat = cct_nrange (number, lex_number (lexer));
1704 *cat = (struct ctables_category) {
1709 else if (lex_is_string (lexer))
1711 struct substring s = parse_substring (lexer, dict);
1712 if (lex_match_id (lexer, "THRU"))
1714 if (lex_match_id (lexer, "HI"))
1716 struct substring sr1 = { .string = NULL };
1717 *cat = cct_srange (s, sr1);
1721 if (!lex_force_string (lexer))
1726 struct substring sr1 = parse_substring (lexer, dict);
1727 *cat = cct_srange (s, sr1);
1731 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1733 else if (lex_match (lexer, T_AND))
1735 if (!lex_force_id (lexer))
1737 struct ctables_postcompute *pc = ctables_find_postcompute (
1738 ct, lex_tokcstr (lexer));
1741 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1742 msg_at (SE, loc, _("Unknown postcompute &%s."),
1743 lex_tokcstr (lexer));
1744 msg_location_destroy (loc);
1749 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1753 lex_error (lexer, NULL);
1761 parse_category_string (struct msg_location *location,
1762 struct substring s, const struct dictionary *dict,
1763 enum fmt_type format, double *n)
1766 char *error = data_in (s, dict_get_encoding (dict), format,
1767 settings_get_fmt_settings (), &v, 0, NULL);
1770 msg_at (SE, location,
1771 _("Failed to parse category specification as format %s: %s."),
1772 fmt_name (format), error);
1781 static struct ctables_category *
1782 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1783 const struct ctables_pcexpr *e)
1785 struct ctables_category *best = NULL;
1786 size_t n_subtotals = 0;
1787 for (size_t i = 0; i < cats->n_cats; i++)
1789 struct ctables_category *cat = &cats->cats[i];
1792 case CTPO_CAT_NUMBER:
1793 if (cat->type == CCT_NUMBER && cat->number == e->number)
1797 case CTPO_CAT_STRING:
1798 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1802 case CTPO_CAT_NRANGE:
1803 if (cat->type == CCT_NRANGE
1804 && cat->nrange[0] == e->nrange[0]
1805 && cat->nrange[1] == e->nrange[1])
1809 case CTPO_CAT_SRANGE:
1810 if (cat->type == CCT_SRANGE
1811 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1812 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1816 case CTPO_CAT_MISSING:
1817 if (cat->type == CCT_MISSING)
1821 case CTPO_CAT_OTHERNM:
1822 if (cat->type == CCT_OTHERNM)
1826 case CTPO_CAT_SUBTOTAL:
1827 if (cat->type == CCT_SUBTOTAL)
1830 if (e->subtotal_index == n_subtotals)
1832 else if (e->subtotal_index == 0)
1837 case CTPO_CAT_TOTAL:
1838 if (cat->type == CCT_TOTAL)
1852 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1857 static struct ctables_category *
1858 ctables_find_category_for_postcompute (const struct dictionary *dict,
1859 const struct ctables_categories *cats,
1860 enum fmt_type parse_format,
1861 const struct ctables_pcexpr *e)
1863 if (parse_format != FMT_F)
1865 if (e->op == CTPO_CAT_STRING)
1868 if (!parse_category_string (e->location, e->string, dict,
1869 parse_format, &number))
1872 struct ctables_pcexpr e2 = {
1873 .op = CTPO_CAT_NUMBER,
1875 .location = e->location,
1877 return ctables_find_category_for_postcompute__ (cats, &e2);
1879 else if (e->op == CTPO_CAT_SRANGE)
1882 if (!e->srange[0].string)
1883 nrange[0] = -DBL_MAX;
1884 else if (!parse_category_string (e->location, e->srange[0], dict,
1885 parse_format, &nrange[0]))
1888 if (!e->srange[1].string)
1889 nrange[1] = DBL_MAX;
1890 else if (!parse_category_string (e->location, e->srange[1], dict,
1891 parse_format, &nrange[1]))
1894 struct ctables_pcexpr e2 = {
1895 .op = CTPO_CAT_NRANGE,
1896 .nrange = { nrange[0], nrange[1] },
1897 .location = e->location,
1899 return ctables_find_category_for_postcompute__ (cats, &e2);
1902 return ctables_find_category_for_postcompute__ (cats, e);
1906 ctables_recursive_check_postcompute (struct dictionary *dict,
1907 const struct ctables_pcexpr *e,
1908 struct ctables_category *pc_cat,
1909 const struct ctables_categories *cats,
1910 const struct msg_location *cats_location)
1914 case CTPO_CAT_NUMBER:
1915 case CTPO_CAT_STRING:
1916 case CTPO_CAT_NRANGE:
1917 case CTPO_CAT_SRANGE:
1918 case CTPO_CAT_MISSING:
1919 case CTPO_CAT_OTHERNM:
1920 case CTPO_CAT_SUBTOTAL:
1921 case CTPO_CAT_TOTAL:
1923 struct ctables_category *cat = ctables_find_category_for_postcompute (
1924 dict, cats, pc_cat->parse_format, e);
1927 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1929 size_t n_subtotals = 0;
1930 for (size_t i = 0; i < cats->n_cats; i++)
1931 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1932 if (n_subtotals > 1)
1934 msg_at (SE, cats_location,
1935 ngettext ("These categories include %zu instance "
1936 "of SUBTOTAL or HSUBTOTAL, so references "
1937 "from computed categories must refer to "
1938 "subtotals by position, "
1939 "e.g. SUBTOTAL[1].",
1940 "These categories include %zu instances "
1941 "of SUBTOTAL or HSUBTOTAL, so references "
1942 "from computed categories must refer to "
1943 "subtotals by position, "
1944 "e.g. SUBTOTAL[1].",
1947 msg_at (SN, e->location,
1948 _("This is the reference that lacks a position."));
1953 msg_at (SE, pc_cat->location,
1954 _("Computed category &%s references a category not included "
1955 "in the category list."),
1957 msg_at (SN, e->location, _("This is the missing category."));
1958 if (e->op == CTPO_CAT_SUBTOTAL)
1959 msg_at (SN, cats_location,
1960 _("To fix the problem, add subtotals to the "
1961 "list of categories here."));
1962 else if (e->op == CTPO_CAT_TOTAL)
1963 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1964 "CATEGORIES specification."));
1966 msg_at (SN, cats_location,
1967 _("To fix the problem, add the missing category to the "
1968 "list of categories here."));
1971 if (pc_cat->pc->hide_source_cats)
1985 for (size_t i = 0; i < 2; i++)
1986 if (e->subs[i] && !ctables_recursive_check_postcompute (
1987 dict, e->subs[i], pc_cat, cats, cats_location))
1996 all_strings (struct variable **vars, size_t n_vars,
1997 const struct ctables_category *cat)
1999 for (size_t j = 0; j < n_vars; j++)
2000 if (var_is_numeric (vars[j]))
2002 msg_at (SE, cat->location,
2003 _("This category specification may be applied only to string "
2004 "variables, but this subcommand tries to apply it to "
2005 "numeric variable %s."),
2006 var_get_name (vars[j]));
2013 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2014 struct ctables *ct, struct ctables_table *t)
2016 if (!lex_match_id (lexer, "VARIABLES"))
2018 lex_match (lexer, T_EQUALS);
2020 struct variable **vars;
2022 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2025 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2026 for (size_t i = 1; i < n_vars; i++)
2028 const struct fmt_spec *f = var_get_print_format (vars[i]);
2029 if (f->type != common_format->type)
2031 common_format = NULL;
2037 && (fmt_get_category (common_format->type)
2038 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2040 struct ctables_categories *c = xmalloc (sizeof *c);
2041 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2042 for (size_t i = 0; i < n_vars; i++)
2044 struct ctables_categories **cp
2045 = &t->categories[var_get_dict_index (vars[i])];
2046 ctables_categories_unref (*cp);
2050 size_t allocated_cats = 0;
2051 int cats_start_ofs = -1;
2052 int cats_end_ofs = -1;
2053 if (lex_match (lexer, T_LBRACK))
2055 cats_start_ofs = lex_ofs (lexer);
2058 if (c->n_cats >= allocated_cats)
2059 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2061 int start_ofs = lex_ofs (lexer);
2062 struct ctables_category *cat = &c->cats[c->n_cats];
2063 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2065 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2068 lex_match (lexer, T_COMMA);
2070 while (!lex_match (lexer, T_RBRACK));
2071 cats_end_ofs = lex_ofs (lexer) - 1;
2074 struct ctables_category cat = {
2076 .include_missing = false,
2077 .sort_ascending = true,
2079 bool show_totals = false;
2080 char *total_label = NULL;
2081 bool totals_before = false;
2082 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2084 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2086 lex_match (lexer, T_EQUALS);
2087 if (lex_match_id (lexer, "A"))
2088 cat.sort_ascending = true;
2089 else if (lex_match_id (lexer, "D"))
2090 cat.sort_ascending = false;
2093 lex_error_expecting (lexer, "A", "D");
2097 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2099 lex_match (lexer, T_EQUALS);
2100 if (lex_match_id (lexer, "VALUE"))
2101 cat.type = CCT_VALUE;
2102 else if (lex_match_id (lexer, "LABEL"))
2103 cat.type = CCT_LABEL;
2106 cat.type = CCT_FUNCTION;
2107 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2108 &cat.weighted, &cat.area))
2111 if (lex_match (lexer, T_LPAREN))
2113 cat.sort_var = parse_variable (lexer, dict);
2117 if (cat.sort_function == CTSF_PTILE)
2119 lex_match (lexer, T_COMMA);
2120 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2122 cat.percentile = lex_number (lexer);
2126 if (!lex_force_match (lexer, T_RPAREN))
2129 else if (ctables_function_availability (cat.sort_function)
2132 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2137 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2139 lex_match (lexer, T_EQUALS);
2140 if (lex_match_id (lexer, "INCLUDE"))
2141 cat.include_missing = true;
2142 else if (lex_match_id (lexer, "EXCLUDE"))
2143 cat.include_missing = false;
2146 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2150 else if (lex_match_id (lexer, "TOTAL"))
2152 lex_match (lexer, T_EQUALS);
2153 if (!parse_bool (lexer, &show_totals))
2156 else if (lex_match_id (lexer, "LABEL"))
2158 lex_match (lexer, T_EQUALS);
2159 if (!lex_force_string (lexer))
2162 total_label = ss_xstrdup (lex_tokss (lexer));
2165 else if (lex_match_id (lexer, "POSITION"))
2167 lex_match (lexer, T_EQUALS);
2168 if (lex_match_id (lexer, "BEFORE"))
2169 totals_before = true;
2170 else if (lex_match_id (lexer, "AFTER"))
2171 totals_before = false;
2174 lex_error_expecting (lexer, "BEFORE", "AFTER");
2178 else if (lex_match_id (lexer, "EMPTY"))
2180 lex_match (lexer, T_EQUALS);
2181 if (lex_match_id (lexer, "INCLUDE"))
2182 c->show_empty = true;
2183 else if (lex_match_id (lexer, "EXCLUDE"))
2184 c->show_empty = false;
2187 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2194 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2195 "TOTAL", "LABEL", "POSITION", "EMPTY");
2197 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2204 if (c->n_cats >= allocated_cats)
2205 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2206 c->cats[c->n_cats++] = cat;
2211 if (c->n_cats >= allocated_cats)
2212 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2214 struct ctables_category *totals;
2217 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2218 totals = &c->cats[0];
2221 totals = &c->cats[c->n_cats];
2224 *totals = (struct ctables_category) {
2226 .total_label = total_label ? total_label : xstrdup (_("Total")),
2230 struct ctables_category *subtotal = NULL;
2231 for (size_t i = totals_before ? 0 : c->n_cats;
2232 totals_before ? i < c->n_cats : i-- > 0;
2233 totals_before ? i++ : 0)
2235 struct ctables_category *cat = &c->cats[i];
2244 cat->subtotal = subtotal;
2247 case CCT_POSTCOMPUTE:
2258 case CCT_EXCLUDED_MISSING:
2263 if (cats_start_ofs != -1)
2265 for (size_t i = 0; i < c->n_cats; i++)
2267 struct ctables_category *cat = &c->cats[i];
2270 case CCT_POSTCOMPUTE:
2271 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2272 struct msg_location *cats_location
2273 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2274 bool ok = ctables_recursive_check_postcompute (
2275 dict, cat->pc->expr, cat, c, cats_location);
2276 msg_location_destroy (cats_location);
2283 for (size_t j = 0; j < n_vars; j++)
2284 if (var_is_alpha (vars[j]))
2286 msg_at (SE, cat->location,
2287 _("This category specification may be applied "
2288 "only to numeric variables, but this "
2289 "subcommand tries to apply it to string "
2291 var_get_name (vars[j]));
2300 if (!parse_category_string (cat->location, cat->string, dict,
2301 common_format->type, &n))
2304 ss_dealloc (&cat->string);
2306 cat->type = CCT_NUMBER;
2309 else if (!all_strings (vars, n_vars, cat))
2318 if (!cat->srange[0].string)
2320 else if (!parse_category_string (cat->location,
2321 cat->srange[0], dict,
2322 common_format->type, &n[0]))
2325 if (!cat->srange[1].string)
2327 else if (!parse_category_string (cat->location,
2328 cat->srange[1], dict,
2329 common_format->type, &n[1]))
2332 ss_dealloc (&cat->srange[0]);
2333 ss_dealloc (&cat->srange[1]);
2335 cat->type = CCT_NRANGE;
2336 cat->nrange[0] = n[0];
2337 cat->nrange[1] = n[1];
2339 else if (!all_strings (vars, n_vars, cat))
2350 case CCT_EXCLUDED_MISSING:
2365 ctables_nest_uninit (struct ctables_nest *nest)
2368 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2369 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2370 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2371 free (nest->areas[at]);
2375 ctables_stack_uninit (struct ctables_stack *stack)
2379 for (size_t i = 0; i < stack->n; i++)
2380 ctables_nest_uninit (&stack->nests[i]);
2381 free (stack->nests);
2385 static struct ctables_stack
2386 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2393 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2394 for (size_t i = 0; i < s0.n; i++)
2395 for (size_t j = 0; j < s1.n; j++)
2397 const struct ctables_nest *a = &s0.nests[i];
2398 const struct ctables_nest *b = &s1.nests[j];
2400 size_t allocate = a->n + b->n;
2401 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2403 for (size_t k = 0; k < a->n; k++)
2404 vars[n++] = a->vars[k];
2405 for (size_t k = 0; k < b->n; k++)
2406 vars[n++] = b->vars[k];
2407 assert (n == allocate);
2409 const struct ctables_nest *summary_src;
2410 if (!a->specs[CSV_CELL].var)
2412 else if (!b->specs[CSV_CELL].var)
2417 struct ctables_nest *new = &stack.nests[stack.n++];
2418 *new = (struct ctables_nest) {
2420 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2421 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2425 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2426 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2428 ctables_stack_uninit (&s0);
2429 ctables_stack_uninit (&s1);
2433 static struct ctables_stack
2434 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2436 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2437 for (size_t i = 0; i < s0.n; i++)
2438 stack.nests[stack.n++] = s0.nests[i];
2439 for (size_t i = 0; i < s1.n; i++)
2441 stack.nests[stack.n] = s1.nests[i];
2442 stack.nests[stack.n].group_head += s0.n;
2445 assert (stack.n == s0.n + s1.n);
2451 static struct ctables_stack
2452 var_fts (const struct ctables_axis *a)
2454 struct variable **vars = xmalloc (sizeof *vars);
2457 struct ctables_nest *nest = xmalloc (sizeof *nest);
2458 *nest = (struct ctables_nest) {
2461 .scale_idx = a->scale ? 0 : SIZE_MAX,
2463 if (a->specs[CSV_CELL].n || a->scale)
2464 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2466 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2467 nest->specs[sv].var = a->var;
2468 nest->specs[sv].is_scale = a->scale;
2470 return (struct ctables_stack) { .nests = nest, .n = 1 };
2473 static struct ctables_stack
2474 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2477 return (struct ctables_stack) { .n = 0 };
2485 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2486 enumerate_fts (axis_type, a->subs[1]));
2489 /* This should consider any of the scale variables found in the result to
2490 be linked to each other listwise for SMISSING=LISTWISE. */
2491 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2492 enumerate_fts (axis_type, a->subs[1]));
2498 union ctables_summary
2500 /* COUNT, VALIDN, TOTALN. */
2503 /* MINIMUM, MAXIMUM, RANGE. */
2510 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2511 struct moments1 *moments;
2513 /* MEDIAN, MODE, PTILE. */
2516 struct casewriter *writer;
2521 /* XXX multiple response */
2525 ctables_summary_init (union ctables_summary *s,
2526 const struct ctables_summary_spec *ss)
2528 switch (ss->function)
2532 case CTSF_areaPCT_COUNT:
2533 case CTSF_areaPCT_VALIDN:
2534 case CTSF_areaPCT_TOTALN:
2549 s->min = s->max = SYSMIS;
2557 case CTSF_areaPCT_SUM:
2558 s->moments = moments1_create (MOMENT_VARIANCE);
2565 struct caseproto *proto = caseproto_create ();
2566 proto = caseproto_add_width (proto, 0);
2567 proto = caseproto_add_width (proto, 0);
2569 struct subcase ordering;
2570 subcase_init (&ordering, 0, 0, SC_ASCEND);
2571 s->writer = sort_create_writer (&ordering, proto);
2572 subcase_uninit (&ordering);
2573 caseproto_unref (proto);
2583 ctables_summary_uninit (union ctables_summary *s,
2584 const struct ctables_summary_spec *ss)
2586 switch (ss->function)
2590 case CTSF_areaPCT_COUNT:
2591 case CTSF_areaPCT_VALIDN:
2592 case CTSF_areaPCT_TOTALN:
2613 case CTSF_areaPCT_SUM:
2614 moments1_destroy (s->moments);
2620 casewriter_destroy (s->writer);
2626 ctables_summary_add (union ctables_summary *s,
2627 const struct ctables_summary_spec *ss,
2628 const struct variable *var, const union value *value,
2629 bool is_scale, bool is_scale_missing,
2630 bool is_missing, bool excluded_missing,
2631 double d_weight, double e_weight)
2633 /* To determine whether a case is included in a given table for a particular
2634 kind of summary, consider the following charts for each variable in the
2635 table. Only if "yes" appears for every variable for the summary is the
2638 Categorical variables: VALIDN COUNT TOTALN
2639 Valid values in included categories yes yes yes
2640 Missing values in included categories --- yes yes
2641 Missing values in excluded categories --- --- yes
2642 Valid values in excluded categories --- --- ---
2644 Scale variables: VALIDN COUNT TOTALN
2645 Valid value yes yes yes
2646 Missing value --- yes yes
2648 Missing values include both user- and system-missing. (The system-missing
2649 value is always in an excluded category.)
2651 switch (ss->function)
2654 case CTSF_areaPCT_TOTALN:
2655 s->count += ss->weighted ? d_weight : 1.0;
2659 case CTSF_areaPCT_COUNT:
2660 if (is_scale || !excluded_missing)
2661 s->count += ss->weighted ? d_weight : 1.0;
2665 case CTSF_areaPCT_VALIDN:
2669 s->count += ss->weighted ? d_weight : 1.0;
2679 s->count += ss->weighted ? d_weight : 1.0;
2683 if (is_scale || !excluded_missing)
2684 s->count += e_weight;
2691 s->count += e_weight;
2695 s->count += e_weight;
2701 if (!is_scale_missing)
2703 assert (!var_is_alpha (var)); /* XXX? */
2704 if (s->min == SYSMIS || value->f < s->min)
2706 if (s->max == SYSMIS || value->f > s->max)
2716 case CTSF_areaPCT_SUM:
2717 if (!is_scale_missing)
2718 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2724 if (!is_scale_missing)
2726 double w = ss->weighted ? e_weight : 1.0;
2729 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2730 *case_num_rw_idx (c, 0) = value->f;
2731 *case_num_rw_idx (c, 1) = w;
2732 casewriter_write (s->writer, c);
2739 ctables_summary_value (const struct ctables_cell *cell,
2740 union ctables_summary *s,
2741 const struct ctables_summary_spec *ss)
2743 switch (ss->function)
2750 return cell->areas[ss->calc_area]->sequence;
2752 case CTSF_areaPCT_COUNT:
2754 const struct ctables_area *a = cell->areas[ss->calc_area];
2755 double a_count = ss->weighted ? a->e_count : a->u_count;
2756 return a_count ? s->count / a_count * 100 : SYSMIS;
2759 case CTSF_areaPCT_VALIDN:
2761 const struct ctables_area *a = cell->areas[ss->calc_area];
2762 double a_valid = ss->weighted ? a->e_valid : a->u_valid;
2763 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2766 case CTSF_areaPCT_TOTALN:
2768 const struct ctables_area *a = cell->areas[ss->calc_area];
2769 double a_total = ss->weighted ? a->e_total : a->u_total;
2770 return a_total ? s->count / a_total * 100 : SYSMIS;
2787 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2792 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2798 double weight, variance;
2799 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2800 return calc_semean (variance, weight);
2806 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2807 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2812 double weight, mean;
2813 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2814 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2820 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2824 case CTSF_areaPCT_SUM:
2826 double weight, mean;
2827 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2828 if (weight == SYSMIS || mean == SYSMIS)
2831 const struct ctables_area *a = cell->areas[ss->calc_area];
2832 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2833 double denom = ss->weighted ? sum->e_sum : sum->u_sum;
2834 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2841 struct casereader *reader = casewriter_make_reader (s->writer);
2844 struct percentile *ptile = percentile_create (
2845 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2846 struct order_stats *os = &ptile->parent;
2847 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2848 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2849 statistic_destroy (&ptile->parent.parent);
2856 struct casereader *reader = casewriter_make_reader (s->writer);
2859 struct mode *mode = mode_create ();
2860 struct order_stats *os = &mode->parent;
2861 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2862 s->ovalue = mode->mode;
2863 statistic_destroy (&mode->parent.parent);
2871 struct ctables_cell_sort_aux
2873 const struct ctables_nest *nest;
2874 enum pivot_axis_type a;
2878 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2880 const struct ctables_cell_sort_aux *aux = aux_;
2881 struct ctables_cell *const *ap = a_;
2882 struct ctables_cell *const *bp = b_;
2883 const struct ctables_cell *a = *ap;
2884 const struct ctables_cell *b = *bp;
2886 const struct ctables_nest *nest = aux->nest;
2887 for (size_t i = 0; i < nest->n; i++)
2888 if (i != nest->scale_idx)
2890 const struct variable *var = nest->vars[i];
2891 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2892 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2893 if (a_cv->category != b_cv->category)
2894 return a_cv->category > b_cv->category ? 1 : -1;
2896 const union value *a_val = &a_cv->value;
2897 const union value *b_val = &b_cv->value;
2898 switch (a_cv->category->type)
2904 case CCT_POSTCOMPUTE:
2905 case CCT_EXCLUDED_MISSING:
2906 /* Must be equal. */
2914 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2922 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2924 return a_cv->category->sort_ascending ? cmp : -cmp;
2930 const char *a_label = var_lookup_value_label (var, a_val);
2931 const char *b_label = var_lookup_value_label (var, b_val);
2937 cmp = strcmp (a_label, b_label);
2943 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2946 return a_cv->category->sort_ascending ? cmp : -cmp;
2958 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2959 const void *aux UNUSED)
2961 struct ctables_cell *const *ap = a_;
2962 struct ctables_cell *const *bp = b_;
2963 const struct ctables_cell *a = *ap;
2964 const struct ctables_cell *b = *bp;
2966 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2968 int al = a->axes[axis].leaf;
2969 int bl = b->axes[axis].leaf;
2971 return al > bl ? 1 : -1;
2979 For each ctables_table:
2980 For each combination of row vars:
2981 For each combination of column vars:
2982 For each combination of layer vars:
2984 Make a table of row values:
2985 Sort entries by row values
2986 Assign a 0-based index to each actual value
2987 Construct a dimension
2988 Make a table of column values
2989 Make a table of layer values
2991 Fill the table entry using the indexes from before.
2994 static struct ctables_area *
2995 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
2996 enum ctables_area_type area)
2999 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3001 const struct ctables_nest *nest = s->nests[a];
3002 for (size_t i = 0; i < nest->n_areas[area]; i++)
3004 size_t v_idx = nest->areas[area][i];
3005 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3006 hash = hash_pointer (cv->category, hash);
3007 if (cv->category->type != CCT_TOTAL
3008 && cv->category->type != CCT_SUBTOTAL
3009 && cv->category->type != CCT_POSTCOMPUTE)
3010 hash = value_hash (&cv->value,
3011 var_get_width (nest->vars[v_idx]), hash);
3015 struct ctables_area *a;
3016 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3018 const struct ctables_cell *df = a->example;
3019 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3021 const struct ctables_nest *nest = s->nests[a];
3022 for (size_t i = 0; i < nest->n_areas[area]; i++)
3024 size_t v_idx = nest->areas[area][i];
3025 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3026 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3027 if (cv1->category != cv2->category
3028 || (cv1->category->type != CCT_TOTAL
3029 && cv1->category->type != CCT_SUBTOTAL
3030 && cv1->category->type != CCT_POSTCOMPUTE
3031 && !value_equal (&cv1->value, &cv2->value,
3032 var_get_width (nest->vars[v_idx]))))
3041 struct ctables_sum *sums = (s->table->n_sum_vars
3042 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3045 a = xmalloc (sizeof *a);
3046 *a = (struct ctables_area) { .example = cell, .sums = sums };
3047 hmap_insert (&s->areas[area], &a->node, hash);
3051 static struct substring
3052 rtrim_value (const union value *v, const struct variable *var)
3054 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3055 var_get_width (var));
3056 ss_rtrim (&s, ss_cstr (" "));
3061 in_string_range (const union value *v, const struct variable *var,
3062 const struct substring *srange)
3064 struct substring s = rtrim_value (v, var);
3065 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3066 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3069 static const struct ctables_category *
3070 ctables_categories_match (const struct ctables_categories *c,
3071 const union value *v, const struct variable *var)
3073 if (var_is_numeric (var) && v->f == SYSMIS)
3076 const struct ctables_category *othernm = NULL;
3077 for (size_t i = c->n_cats; i-- > 0; )
3079 const struct ctables_category *cat = &c->cats[i];
3083 if (cat->number == v->f)
3088 if (ss_equals (cat->string, rtrim_value (v, var)))
3093 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3094 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3099 if (in_string_range (v, var, cat->srange))
3104 if (var_is_value_missing (var, v))
3108 case CCT_POSTCOMPUTE:
3123 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3126 case CCT_EXCLUDED_MISSING:
3131 return var_is_value_missing (var, v) ? NULL : othernm;
3134 static const struct ctables_category *
3135 ctables_categories_total (const struct ctables_categories *c)
3137 const struct ctables_category *first = &c->cats[0];
3138 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3139 return (first->type == CCT_TOTAL ? first
3140 : last->type == CCT_TOTAL ? last
3144 static struct ctables_cell *
3145 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3146 const struct ctables_category *cats[PIVOT_N_AXES][10])
3149 enum ctables_summary_variant sv = CSV_CELL;
3150 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3152 const struct ctables_nest *nest = s->nests[a];
3153 for (size_t i = 0; i < nest->n; i++)
3154 if (i != nest->scale_idx)
3156 hash = hash_pointer (cats[a][i], hash);
3157 if (cats[a][i]->type != CCT_TOTAL
3158 && cats[a][i]->type != CCT_SUBTOTAL
3159 && cats[a][i]->type != CCT_POSTCOMPUTE)
3160 hash = value_hash (case_data (c, nest->vars[i]),
3161 var_get_width (nest->vars[i]), hash);
3167 struct ctables_cell *cell;
3168 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3170 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3172 const struct ctables_nest *nest = s->nests[a];
3173 for (size_t i = 0; i < nest->n; i++)
3174 if (i != nest->scale_idx
3175 && (cats[a][i] != cell->axes[a].cvs[i].category
3176 || (cats[a][i]->type != CCT_TOTAL
3177 && cats[a][i]->type != CCT_SUBTOTAL
3178 && cats[a][i]->type != CCT_POSTCOMPUTE
3179 && !value_equal (case_data (c, nest->vars[i]),
3180 &cell->axes[a].cvs[i].value,
3181 var_get_width (nest->vars[i])))))
3190 cell = xmalloc (sizeof *cell);
3193 cell->omit_areas = 0;
3194 cell->postcompute = false;
3195 //struct string name = DS_EMPTY_INITIALIZER;
3196 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3198 const struct ctables_nest *nest = s->nests[a];
3199 cell->axes[a].cvs = (nest->n
3200 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3202 for (size_t i = 0; i < nest->n; i++)
3204 const struct ctables_category *cat = cats[a][i];
3205 const struct variable *var = nest->vars[i];
3206 const union value *value = case_data (c, var);
3207 if (i != nest->scale_idx)
3209 const struct ctables_category *subtotal = cat->subtotal;
3210 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3213 if (cat->type == CCT_TOTAL
3214 || cat->type == CCT_SUBTOTAL
3215 || cat->type == CCT_POSTCOMPUTE)
3217 /* XXX these should be more encompassing I think.*/
3221 case PIVOT_AXIS_COLUMN:
3222 cell->omit_areas |= ((1u << CTAT_TABLE) |
3223 (1u << CTAT_LAYER) |
3224 (1u << CTAT_LAYERCOL) |
3225 (1u << CTAT_SUBTABLE) |
3228 case PIVOT_AXIS_ROW:
3229 cell->omit_areas |= ((1u << CTAT_TABLE) |
3230 (1u << CTAT_LAYER) |
3231 (1u << CTAT_LAYERROW) |
3232 (1u << CTAT_SUBTABLE) |
3235 case PIVOT_AXIS_LAYER:
3236 cell->omit_areas |= ((1u << CTAT_TABLE) |
3237 (1u << CTAT_LAYER));
3241 if (cat->type == CCT_POSTCOMPUTE)
3242 cell->postcompute = true;
3245 cell->axes[a].cvs[i].category = cat;
3246 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3249 if (i != nest->scale_idx)
3251 if (!ds_is_empty (&name))
3252 ds_put_cstr (&name, ", ");
3253 char *value_s = data_out (value, var_get_encoding (var),
3254 var_get_print_format (var),
3255 settings_get_fmt_settings ());
3256 if (cat->type == CCT_TOTAL
3257 || cat->type == CCT_SUBTOTAL
3258 || cat->type == CCT_POSTCOMPUTE)
3259 ds_put_format (&name, "%s=total", var_get_name (var));
3261 ds_put_format (&name, "%s=%s", var_get_name (var),
3262 value_s + strspn (value_s, " "));
3268 //cell->name = ds_steal_cstr (&name);
3270 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3271 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3272 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3273 for (size_t i = 0; i < specs->n; i++)
3274 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3275 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3276 cell->areas[at] = ctables_area_insert (s, cell, at);
3277 hmap_insert (&s->cells, &cell->node, hash);
3282 is_scale_missing (const struct ctables_summary_spec_set *specs,
3283 const struct ccase *c)
3285 if (!specs->is_scale)
3288 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3291 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3293 const struct variable *var = specs->listwise_vars[i];
3294 if (var_is_num_missing (var, case_num (c, var)))
3302 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3303 const struct ctables_category *cats[PIVOT_N_AXES][10],
3304 bool is_missing, bool excluded_missing,
3305 double d_weight, double e_weight)
3307 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3308 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3310 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3312 bool scale_missing = is_scale_missing (specs, c);
3313 for (size_t i = 0; i < specs->n; i++)
3314 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3315 specs->var, case_data (c, specs->var), specs->is_scale,
3316 scale_missing, is_missing, excluded_missing,
3317 d_weight, e_weight);
3318 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3319 if (!(cell->omit_areas && (1u << at)))
3321 struct ctables_area *a = cell->areas[at];
3322 a->d_total += d_weight;
3323 a->e_total += e_weight;
3325 if (!excluded_missing)
3327 a->d_count += d_weight;
3328 a->e_count += e_weight;
3333 a->d_valid += d_weight;
3334 a->e_valid += e_weight;
3337 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3339 /* XXX listwise_missing??? */
3340 const struct variable *var = s->table->sum_vars[i];
3341 double addend = case_num (c, var);
3342 if (!var_is_num_missing (var, addend))
3344 struct ctables_sum *sum = &a->sums[i];
3345 sum->e_sum += addend * e_weight;
3346 sum->u_sum += addend;
3354 recurse_totals (struct ctables_section *s, const struct ccase *c,
3355 const struct ctables_category *cats[PIVOT_N_AXES][10],
3356 bool is_missing, bool excluded_missing,
3357 double d_weight, double e_weight,
3358 enum pivot_axis_type start_axis, size_t start_nest)
3360 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3362 const struct ctables_nest *nest = s->nests[a];
3363 for (size_t i = start_nest; i < nest->n; i++)
3365 if (i == nest->scale_idx)
3368 const struct variable *var = nest->vars[i];
3370 const struct ctables_category *total = ctables_categories_total (
3371 s->table->categories[var_get_dict_index (var)]);
3374 const struct ctables_category *save = cats[a][i];
3376 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3377 d_weight, e_weight);
3378 recurse_totals (s, c, cats, is_missing, excluded_missing,
3379 d_weight, e_weight, a, i + 1);
3388 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3389 const struct ctables_category *cats[PIVOT_N_AXES][10],
3390 bool is_missing, bool excluded_missing,
3391 double d_weight, double e_weight,
3392 enum pivot_axis_type start_axis, size_t start_nest)
3394 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3396 const struct ctables_nest *nest = s->nests[a];
3397 for (size_t i = start_nest; i < nest->n; i++)
3399 if (i == nest->scale_idx)
3402 const struct ctables_category *save = cats[a][i];
3405 cats[a][i] = save->subtotal;
3406 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3407 d_weight, e_weight);
3408 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3409 d_weight, e_weight, a, i + 1);
3418 ctables_add_occurrence (const struct variable *var,
3419 const union value *value,
3420 struct hmap *occurrences)
3422 int width = var_get_width (var);
3423 unsigned int hash = value_hash (value, width, 0);
3425 struct ctables_occurrence *o;
3426 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3428 if (value_equal (value, &o->value, width))
3431 o = xmalloc (sizeof *o);
3432 value_clone (&o->value, value, width);
3433 hmap_insert (occurrences, &o->node, hash);
3437 ctables_cell_insert (struct ctables_section *s,
3438 const struct ccase *c,
3439 double d_weight, double e_weight)
3441 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3443 /* Does at least one categorical variable have a missing value in an included
3444 or excluded category? */
3445 bool is_missing = false;
3447 /* Does at least one categorical variable have a missing value in an excluded
3449 bool excluded_missing = false;
3451 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3453 const struct ctables_nest *nest = s->nests[a];
3454 for (size_t i = 0; i < nest->n; i++)
3456 if (i == nest->scale_idx)
3459 const struct variable *var = nest->vars[i];
3460 const union value *value = case_data (c, var);
3462 bool var_missing = var_is_value_missing (var, value) != 0;
3466 cats[a][i] = ctables_categories_match (
3467 s->table->categories[var_get_dict_index (var)], value, var);
3473 static const struct ctables_category cct_excluded_missing = {
3474 .type = CCT_EXCLUDED_MISSING,
3477 cats[a][i] = &cct_excluded_missing;
3478 excluded_missing = true;
3483 if (!excluded_missing)
3484 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3486 const struct ctables_nest *nest = s->nests[a];
3487 for (size_t i = 0; i < nest->n; i++)
3488 if (i != nest->scale_idx)
3490 const struct variable *var = nest->vars[i];
3491 const union value *value = case_data (c, var);
3492 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3496 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3497 d_weight, e_weight);
3499 //if (!excluded_missing)
3501 recurse_totals (s, c, cats, is_missing, excluded_missing,
3502 d_weight, e_weight, 0, 0);
3503 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3504 d_weight, e_weight, 0, 0);
3510 const struct ctables_summary_spec_set *set;
3515 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3517 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3518 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3519 if (as->function != bs->function)
3520 return as->function > bs->function ? 1 : -1;
3521 else if (as->weighted != bs->weighted)
3522 return as->weighted > bs->weighted ? 1 : -1;
3523 else if (as->calc_area != bs->calc_area)
3524 return as->calc_area > bs->calc_area ? 1 : -1;
3525 else if (as->percentile != bs->percentile)
3526 return as->percentile < bs->percentile ? 1 : -1;
3528 const char *as_label = as->label ? as->label : "";
3529 const char *bs_label = bs->label ? bs->label : "";
3530 return strcmp (as_label, bs_label);
3534 ctables_category_format_number (double number, const struct variable *var,
3537 struct pivot_value *pv = pivot_value_new_var_value (
3538 var, &(union value) { .f = number });
3539 pivot_value_format (pv, NULL, s);
3540 pivot_value_destroy (pv);
3544 ctables_category_format_string (struct substring string,
3545 const struct variable *var, struct string *out)
3547 int width = var_get_width (var);
3548 char *s = xmalloc (width);
3549 buf_copy_rpad (s, width, string.string, string.length, ' ');
3550 struct pivot_value *pv = pivot_value_new_var_value (
3551 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3552 pivot_value_format (pv, NULL, out);
3553 pivot_value_destroy (pv);
3558 ctables_category_format_label (const struct ctables_category *cat,
3559 const struct variable *var,
3565 ctables_category_format_number (cat->number, var, s);
3569 ctables_category_format_string (cat->string, var, s);
3573 ctables_category_format_number (cat->nrange[0], var, s);
3574 ds_put_format (s, " THRU ");
3575 ctables_category_format_number (cat->nrange[1], var, s);
3579 ctables_category_format_string (cat->srange[0], var, s);
3580 ds_put_format (s, " THRU ");
3581 ctables_category_format_string (cat->srange[1], var, s);
3585 ds_put_cstr (s, "MISSING");
3589 ds_put_cstr (s, "OTHERNM");
3592 case CCT_POSTCOMPUTE:
3593 ds_put_format (s, "&%s", cat->pc->name);
3598 ds_put_cstr (s, cat->total_label);
3604 case CCT_EXCLUDED_MISSING:
3611 static struct pivot_value *
3612 ctables_postcompute_label (const struct ctables_categories *cats,
3613 const struct ctables_category *cat,
3614 const struct variable *var)
3616 struct substring in = ss_cstr (cat->pc->label);
3617 struct substring target = ss_cstr (")LABEL[");
3619 struct string out = DS_EMPTY_INITIALIZER;
3622 size_t chunk = ss_find_substring (in, target);
3623 if (chunk == SIZE_MAX)
3625 if (ds_is_empty (&out))
3626 return pivot_value_new_user_text (in.string, in.length);
3629 ds_put_substring (&out, in);
3630 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3634 ds_put_substring (&out, ss_head (in, chunk));
3635 ss_advance (&in, chunk + target.length);
3637 struct substring idx_s;
3638 if (!ss_get_until (&in, ']', &idx_s))
3641 long int idx = strtol (idx_s.string, &tail, 10);
3642 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3645 struct ctables_category *cat2 = &cats->cats[idx - 1];
3646 if (!ctables_category_format_label (cat2, var, &out))
3652 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3655 static struct pivot_value *
3656 ctables_category_create_value_label (const struct ctables_categories *cats,
3657 const struct ctables_category *cat,
3658 const struct variable *var,
3659 const union value *value)
3661 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3662 ? ctables_postcompute_label (cats, cat, var)
3663 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3664 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3665 : pivot_value_new_var_value (var, value));
3668 static struct ctables_value *
3669 ctables_value_find__ (struct ctables_table *t, const union value *value,
3670 int width, unsigned int hash)
3672 struct ctables_value *clv;
3673 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3674 hash, &t->clabels_values_map)
3675 if (value_equal (value, &clv->value, width))
3681 ctables_value_insert (struct ctables_table *t, const union value *value,
3684 unsigned int hash = value_hash (value, width, 0);
3685 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3688 clv = xmalloc (sizeof *clv);
3689 value_clone (&clv->value, value, width);
3690 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3694 static struct ctables_value *
3695 ctables_value_find (struct ctables_table *t,
3696 const union value *value, int width)
3698 return ctables_value_find__ (t, value, width,
3699 value_hash (value, width, 0));
3703 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3704 size_t ix[PIVOT_N_AXES])
3706 if (a < PIVOT_N_AXES)
3708 size_t limit = MAX (t->stacks[a].n, 1);
3709 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3710 ctables_table_add_section (t, a + 1, ix);
3714 struct ctables_section *s = &t->sections[t->n_sections++];
3715 *s = (struct ctables_section) {
3717 .cells = HMAP_INITIALIZER (s->cells),
3719 for (a = 0; a < PIVOT_N_AXES; a++)
3722 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3724 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3725 for (size_t i = 0; i < nest->n; i++)
3726 hmap_init (&s->occurrences[a][i]);
3728 for (size_t i = 0; i < N_CTATS; i++)
3729 hmap_init (&s->areas[i]);
3734 ctpo_add (double a, double b)
3740 ctpo_sub (double a, double b)
3746 ctpo_mul (double a, double b)
3752 ctpo_div (double a, double b)
3754 return b ? a / b : SYSMIS;
3758 ctpo_pow (double a, double b)
3760 int save_errno = errno;
3762 double result = pow (a, b);
3770 ctpo_neg (double a, double b UNUSED)
3775 struct ctables_pcexpr_evaluate_ctx
3777 const struct ctables_cell *cell;
3778 const struct ctables_section *section;
3779 const struct ctables_categories *cats;
3780 enum pivot_axis_type pc_a;
3783 enum fmt_type parse_format;
3786 static double ctables_pcexpr_evaluate (
3787 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3790 ctables_pcexpr_evaluate_nonterminal (
3791 const struct ctables_pcexpr_evaluate_ctx *ctx,
3792 const struct ctables_pcexpr *e, size_t n_args,
3793 double evaluate (double, double))
3795 double args[2] = { 0, 0 };
3796 for (size_t i = 0; i < n_args; i++)
3798 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3799 if (!isfinite (args[i]) || args[i] == SYSMIS)
3802 return evaluate (args[0], args[1]);
3806 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3807 const struct ctables_cell_value *pc_cv)
3809 const struct ctables_section *s = ctx->section;
3812 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3814 const struct ctables_nest *nest = s->nests[a];
3815 for (size_t i = 0; i < nest->n; i++)
3816 if (i != nest->scale_idx)
3818 const struct ctables_cell_value *cv
3819 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3820 : &ctx->cell->axes[a].cvs[i]);
3821 hash = hash_pointer (cv->category, hash);
3822 if (cv->category->type != CCT_TOTAL
3823 && cv->category->type != CCT_SUBTOTAL
3824 && cv->category->type != CCT_POSTCOMPUTE)
3825 hash = value_hash (&cv->value,
3826 var_get_width (nest->vars[i]), hash);
3830 struct ctables_cell *tc;
3831 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3833 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3835 const struct ctables_nest *nest = s->nests[a];
3836 for (size_t i = 0; i < nest->n; i++)
3837 if (i != nest->scale_idx)
3839 const struct ctables_cell_value *p_cv
3840 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3841 : &ctx->cell->axes[a].cvs[i]);
3842 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3843 if (p_cv->category != t_cv->category
3844 || (p_cv->category->type != CCT_TOTAL
3845 && p_cv->category->type != CCT_SUBTOTAL
3846 && p_cv->category->type != CCT_POSTCOMPUTE
3847 && !value_equal (&p_cv->value,
3849 var_get_width (nest->vars[i]))))
3861 const struct ctables_table *t = s->table;
3862 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3863 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3864 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3865 &specs->specs[ctx->summary_idx]);
3869 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3870 const struct ctables_pcexpr *e)
3877 case CTPO_CAT_NRANGE:
3878 case CTPO_CAT_SRANGE:
3879 case CTPO_CAT_MISSING:
3880 case CTPO_CAT_OTHERNM:
3882 struct ctables_cell_value cv = {
3883 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3885 assert (cv.category != NULL);
3887 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3888 const struct ctables_occurrence *o;
3891 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3892 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3893 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3895 cv.value = o->value;
3896 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3901 case CTPO_CAT_NUMBER:
3902 case CTPO_CAT_SUBTOTAL:
3903 case CTPO_CAT_TOTAL:
3905 struct ctables_cell_value cv = {
3906 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3907 .value = { .f = e->number },
3909 assert (cv.category != NULL);
3910 return ctables_pcexpr_evaluate_category (ctx, &cv);
3913 case CTPO_CAT_STRING:
3915 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3917 if (width > e->string.length)
3919 s = xmalloc (width);
3920 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3923 const struct ctables_category *category
3924 = ctables_find_category_for_postcompute (
3925 ctx->section->table->ctables->dict,
3926 ctx->cats, ctx->parse_format, e);
3927 assert (category != NULL);
3929 struct ctables_cell_value cv = { .category = category };
3930 if (category->type == CCT_NUMBER)
3931 cv.value.f = category->number;
3932 else if (category->type == CCT_STRING)
3933 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3937 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3943 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3946 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3949 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3952 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3955 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3958 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3964 static const struct ctables_category *
3965 ctables_cell_postcompute (const struct ctables_section *s,
3966 const struct ctables_cell *cell,
3967 enum pivot_axis_type *pc_a_p,
3970 assert (cell->postcompute);
3971 const struct ctables_category *pc_cat = NULL;
3972 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3973 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3975 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3976 if (cv->category->type == CCT_POSTCOMPUTE)
3980 /* Multiple postcomputes cross each other. The value is
3985 pc_cat = cv->category;
3989 *pc_a_idx_p = pc_a_idx;
3993 assert (pc_cat != NULL);
3998 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3999 const struct ctables_cell *cell,
4000 const struct ctables_summary_spec *ss,
4001 struct fmt_spec *format,
4002 bool *is_ctables_format,
4005 enum pivot_axis_type pc_a = 0;
4006 size_t pc_a_idx = 0;
4007 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4008 s, cell, &pc_a, &pc_a_idx);
4012 const struct ctables_postcompute *pc = pc_cat->pc;
4015 for (size_t i = 0; i < pc->specs->n; i++)
4017 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4018 if (ss->function == ss2->function
4019 && ss->weighted == ss2->weighted
4020 && ss->calc_area == ss2->calc_area
4021 && ss->percentile == ss2->percentile)
4023 *format = ss2->format;
4024 *is_ctables_format = ss2->is_ctables_format;
4030 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4031 const struct ctables_categories *cats = s->table->categories[
4032 var_get_dict_index (var)];
4033 struct ctables_pcexpr_evaluate_ctx ctx = {
4038 .pc_a_idx = pc_a_idx,
4039 .summary_idx = summary_idx,
4040 .parse_format = pc_cat->parse_format,
4042 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4046 ctables_format (double d, const struct fmt_spec *format,
4047 const struct fmt_settings *settings)
4049 const union value v = { .f = d };
4050 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4052 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4053 produce the results we want for negative numbers, putting the negative
4054 sign in the wrong spot, before the prefix instead of after it. We can't,
4055 in fact, produce the desired results using a custom-currency
4056 specification. Instead, we postprocess the output, moving the negative
4059 NEQUAL: "-N=3" => "N=-3"
4060 PAREN: "-(3)" => "(-3)"
4061 PCTPAREN: "-(3%)" => "(-3%)"
4063 This transformation doesn't affect NEGPAREN. */
4064 char *minus_src = strchr (s, '-');
4065 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4067 char *n_equals = strstr (s, "N=");
4068 char *lparen = strchr (s, '(');
4069 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4071 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4077 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4079 for (size_t i = 0; i < t->stacks[a].n; i++)
4081 struct ctables_nest *nest = &t->stacks[a].nests[i];
4082 if (nest->n != 1 || nest->scale_idx != 0)
4085 enum ctables_vlabel vlabel
4086 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4087 if (vlabel != CTVL_NONE)
4094 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4096 struct pivot_table *pt = pivot_table_create__ (
4098 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4099 : pivot_value_new_text (N_("Custom Tables"))),
4102 pivot_table_set_caption (
4103 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4105 pivot_table_set_corner_text (
4106 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4108 bool summary_dimension = (t->summary_axis != t->slabels_axis
4109 || (!t->slabels_visible
4110 && t->summary_specs.n > 1));
4111 if (summary_dimension)
4113 struct pivot_dimension *d = pivot_dimension_create (
4114 pt, t->slabels_axis, N_("Statistics"));
4115 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4116 if (!t->slabels_visible)
4117 d->hide_all_labels = true;
4118 for (size_t i = 0; i < specs->n; i++)
4119 pivot_category_create_leaf (
4120 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4123 bool categories_dimension = t->clabels_example != NULL;
4124 if (categories_dimension)
4126 struct pivot_dimension *d = pivot_dimension_create (
4127 pt, t->label_axis[t->clabels_from_axis],
4128 t->clabels_from_axis == PIVOT_AXIS_ROW
4129 ? N_("Row Categories")
4130 : N_("Column Categories"));
4131 const struct variable *var = t->clabels_example;
4132 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4133 for (size_t i = 0; i < t->n_clabels_values; i++)
4135 const struct ctables_value *value = t->clabels_values[i];
4136 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4137 assert (cat != NULL);
4138 pivot_category_create_leaf (
4139 d->root, ctables_category_create_value_label (c, cat,
4145 pivot_table_set_look (pt, ct->look);
4146 struct pivot_dimension *d[PIVOT_N_AXES];
4147 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4149 static const char *names[] = {
4150 [PIVOT_AXIS_ROW] = N_("Rows"),
4151 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4152 [PIVOT_AXIS_LAYER] = N_("Layers"),
4154 d[a] = (t->axes[a] || a == t->summary_axis
4155 ? pivot_dimension_create (pt, a, names[a])
4160 assert (t->axes[a]);
4162 for (size_t i = 0; i < t->stacks[a].n; i++)
4164 struct ctables_nest *nest = &t->stacks[a].nests[i];
4165 struct ctables_section **sections = xnmalloc (t->n_sections,
4167 size_t n_sections = 0;
4169 size_t n_total_cells = 0;
4170 size_t max_depth = 0;
4171 for (size_t j = 0; j < t->n_sections; j++)
4172 if (t->sections[j].nests[a] == nest)
4174 struct ctables_section *s = &t->sections[j];
4175 sections[n_sections++] = s;
4176 n_total_cells += hmap_count (&s->cells);
4178 size_t depth = s->nests[a]->n;
4179 max_depth = MAX (depth, max_depth);
4182 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4184 size_t n_sorted = 0;
4186 for (size_t j = 0; j < n_sections; j++)
4188 struct ctables_section *s = sections[j];
4190 struct ctables_cell *cell;
4191 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4193 sorted[n_sorted++] = cell;
4194 assert (n_sorted <= n_total_cells);
4197 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4198 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4201 if (a == PIVOT_AXIS_ROW)
4203 size_t ids[N_CTATS];
4204 memset (ids, 0, sizeof ids);
4205 for (size_t j = 0; j < n_sorted; j++)
4207 struct ctables_cell *cell = sorted[j];
4208 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4210 struct ctables_area *area = cell->areas[at];
4211 if (!area->sequence)
4212 area->sequence = ++ids[at];
4219 for (size_t j = 0; j < n_sorted; j++)
4221 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0);
4226 struct ctables_level
4228 enum ctables_level_type
4230 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4231 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4232 CTL_SUMMARY, /* Summary functions. */
4236 enum settings_value_show vlabel; /* CTL_VAR only. */
4239 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4240 size_t n_levels = 0;
4241 for (size_t k = 0; k < nest->n; k++)
4243 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4244 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4246 if (vlabel != CTVL_NONE)
4248 levels[n_levels++] = (struct ctables_level) {
4250 .vlabel = (enum settings_value_show) vlabel,
4255 if (nest->scale_idx != k
4256 && (k != nest->n - 1 || t->label_axis[a] == a))
4258 levels[n_levels++] = (struct ctables_level) {
4259 .type = CTL_CATEGORY,
4265 if (!summary_dimension && a == t->slabels_axis)
4267 levels[n_levels++] = (struct ctables_level) {
4268 .type = CTL_SUMMARY,
4269 .var_idx = SIZE_MAX,
4273 /* Pivot categories:
4275 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4276 - category for nest->vars[0], if nest->scale_idx != 0
4277 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4278 - category for nest->vars[1], if nest->scale_idx != 1
4280 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4281 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4282 - summary function, if 'a == t->slabels_axis && a ==
4285 Additional dimensions:
4287 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4289 - If 't->label_axis[b] == a' for some 'b != a', add a category
4294 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4296 for (size_t j = 0; j < n_sorted; j++)
4298 struct ctables_cell *cell = sorted[j];
4299 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4301 size_t n_common = 0;
4304 for (; n_common < n_levels; n_common++)
4306 const struct ctables_level *level = &levels[n_common];
4307 if (level->type == CTL_CATEGORY)
4309 size_t var_idx = level->var_idx;
4310 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4311 if (prev->axes[a].cvs[var_idx].category != c)
4313 else if (c->type != CCT_SUBTOTAL
4314 && c->type != CCT_TOTAL
4315 && c->type != CCT_POSTCOMPUTE
4316 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4317 &cell->axes[a].cvs[var_idx].value,
4318 var_get_type (nest->vars[var_idx])))
4324 for (size_t k = n_common; k < n_levels; k++)
4326 const struct ctables_level *level = &levels[k];
4327 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4328 if (level->type == CTL_SUMMARY)
4330 assert (k == n_levels - 1);
4332 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4333 for (size_t m = 0; m < specs->n; m++)
4335 int leaf = pivot_category_create_leaf (
4336 parent, ctables_summary_label (&specs->specs[m],
4344 const struct variable *var = nest->vars[level->var_idx];
4345 struct pivot_value *label;
4346 if (level->type == CTL_VAR)
4348 label = pivot_value_new_variable (var);
4349 label->variable.show = level->vlabel;
4351 else if (level->type == CTL_CATEGORY)
4353 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4354 label = ctables_category_create_value_label (
4355 t->categories[var_get_dict_index (var)],
4356 cv->category, var, &cv->value);
4361 if (k == n_levels - 1)
4362 prev_leaf = pivot_category_create_leaf (parent, label);
4364 groups[k] = pivot_category_create_group__ (parent, label);
4368 cell->axes[a].leaf = prev_leaf;
4377 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4381 size_t n_total_cells = 0;
4382 for (size_t j = 0; j < t->n_sections; j++)
4383 n_total_cells += hmap_count (&t->sections[j].cells);
4385 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4386 size_t n_sorted = 0;
4387 for (size_t j = 0; j < t->n_sections; j++)
4389 const struct ctables_section *s = &t->sections[j];
4390 struct ctables_cell *cell;
4391 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4393 sorted[n_sorted++] = cell;
4395 assert (n_sorted <= n_total_cells);
4396 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4398 size_t ids[N_CTATS];
4399 memset (ids, 0, sizeof ids);
4400 for (size_t j = 0; j < n_sorted; j++)
4402 struct ctables_cell *cell = sorted[j];
4403 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4405 struct ctables_area *area = cell->areas[at];
4406 if (!area->sequence)
4407 area->sequence = ++ids[at];
4414 for (size_t i = 0; i < t->n_sections; i++)
4416 struct ctables_section *s = &t->sections[i];
4418 struct ctables_cell *cell;
4419 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4424 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4425 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4426 for (size_t j = 0; j < specs->n; j++)
4429 size_t n_dindexes = 0;
4431 if (summary_dimension)
4432 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4434 if (categories_dimension)
4436 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4437 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4438 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4439 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4442 dindexes[n_dindexes++] = ctv->leaf;
4445 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4448 int leaf = cell->axes[a].leaf;
4449 if (a == t->summary_axis && !summary_dimension)
4451 dindexes[n_dindexes++] = leaf;
4454 const struct ctables_summary_spec *ss = &specs->specs[j];
4456 struct fmt_spec format = specs->specs[j].format;
4457 bool is_ctables_format = ss->is_ctables_format;
4458 double d = (cell->postcompute
4459 ? ctables_cell_calculate_postcompute (
4460 s, cell, ss, &format, &is_ctables_format, j)
4461 : ctables_summary_value (cell, &cell->summaries[j],
4464 struct pivot_value *value;
4465 if (ct->hide_threshold != 0
4466 && d < ct->hide_threshold
4467 && ctables_summary_function_is_count (ss->function))
4469 value = pivot_value_new_user_text_nocopy (
4470 xasprintf ("<%d", ct->hide_threshold));
4472 else if (d == 0 && ct->zero)
4473 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4474 else if (d == SYSMIS && ct->missing)
4475 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4476 else if (is_ctables_format)
4477 value = pivot_value_new_user_text_nocopy (
4478 ctables_format (d, &format, &ct->ctables_formats));
4481 value = pivot_value_new_number (d);
4482 value->numeric.format = format;
4484 /* XXX should text values be right-justified? */
4485 pivot_table_put (pt, dindexes, n_dindexes, value);
4490 pivot_table_submit (pt);
4494 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4496 enum pivot_axis_type label_pos = t->label_axis[a];
4500 t->clabels_from_axis = a;
4502 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4503 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4505 const struct ctables_stack *stack = &t->stacks[a];
4509 const struct ctables_nest *n0 = &stack->nests[0];
4512 assert (stack->n == 1);
4516 const struct variable *v0 = n0->vars[n0->n - 1];
4517 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4518 t->clabels_example = v0;
4520 for (size_t i = 0; i < c0->n_cats; i++)
4521 if (c0->cats[i].type == CCT_FUNCTION)
4523 msg (SE, _("%s=%s is not allowed with sorting based "
4524 "on a summary function."),
4525 subcommand_name, pos_name);
4528 if (n0->n - 1 == n0->scale_idx)
4530 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4531 "but %s is a scale variable."),
4532 subcommand_name, pos_name, var_get_name (v0));
4536 for (size_t i = 1; i < stack->n; i++)
4538 const struct ctables_nest *ni = &stack->nests[i];
4540 const struct variable *vi = ni->vars[ni->n - 1];
4541 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4543 if (ni->n - 1 == ni->scale_idx)
4545 msg (SE, _("%s=%s requires the variables to be moved to be "
4546 "categorical, but %s is a scale variable."),
4547 subcommand_name, pos_name, var_get_name (vi));
4550 if (var_get_width (v0) != var_get_width (vi))
4552 msg (SE, _("%s=%s requires the variables to be "
4553 "moved to have the same width, but %s has "
4554 "width %d and %s has width %d."),
4555 subcommand_name, pos_name,
4556 var_get_name (v0), var_get_width (v0),
4557 var_get_name (vi), var_get_width (vi));
4560 if (!val_labs_equal (var_get_value_labels (v0),
4561 var_get_value_labels (vi)))
4563 msg (SE, _("%s=%s requires the variables to be "
4564 "moved to have the same value labels, but %s "
4565 "and %s have different value labels."),
4566 subcommand_name, pos_name,
4567 var_get_name (v0), var_get_name (vi));
4570 if (!ctables_categories_equal (c0, ci))
4572 msg (SE, _("%s=%s requires the variables to be "
4573 "moved to have the same category "
4574 "specifications, but %s and %s have different "
4575 "category specifications."),
4576 subcommand_name, pos_name,
4577 var_get_name (v0), var_get_name (vi));
4586 add_sum_var (struct variable *var,
4587 struct variable ***sum_vars, size_t *n, size_t *allocated)
4589 for (size_t i = 0; i < *n; i++)
4590 if (var == (*sum_vars)[i])
4593 if (*n >= *allocated)
4594 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4595 (*sum_vars)[*n] = var;
4599 static enum ctables_area_type
4600 rotate_area (enum ctables_area_type area)
4611 return CTAT_LAYERCOL;
4614 return CTAT_LAYERROW;
4627 enumerate_sum_vars (const struct ctables_axis *a,
4628 struct variable ***sum_vars, size_t *n, size_t *allocated)
4636 for (size_t i = 0; i < N_CSVS; i++)
4637 for (size_t j = 0; j < a->specs[i].n; j++)
4639 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4640 if (spec->function == CTSF_areaPCT_SUM)
4641 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4647 for (size_t i = 0; i < 2; i++)
4648 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4654 ctables_prepare_table (struct ctables_table *t)
4656 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4659 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4661 for (size_t j = 0; j < t->stacks[a].n; j++)
4663 struct ctables_nest *nest = &t->stacks[a].nests[j];
4664 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4666 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4667 nest->n_areas[at] = 0;
4669 bool add_vars = (at == CTAT_LAYER ? a == PIVOT_AXIS_LAYER
4670 : at == CTAT_LAYERROW ? a != PIVOT_AXIS_COLUMN
4671 : at == CTAT_LAYERCOL ? a != PIVOT_AXIS_ROW
4672 : at == CTAT_TABLE ? false
4675 for (size_t k = 0; k < nest->n; k++)
4677 if (k == nest->scale_idx)
4679 nest->areas[at][nest->n_areas[at]++] = k;
4681 else if (at == CTAT_LAYERCOL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN)
4683 for (size_t k = nest->n - 1; k < nest->n; k--)
4685 if (k == nest->scale_idx)
4687 nest->areas[at][nest->n_areas[at]++] = k;
4691 else if (at == CTAT_LAYERCOL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_LAYER)
4693 for (size_t k = nest->n - 1; k < nest->n; k--)
4695 if (k == nest->scale_idx)
4697 nest->areas[at][nest->n_areas[at]++] = k;
4701 else if (at == CTAT_LAYERROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_LAYER)
4703 for (size_t k = nest->n - 1; k < nest->n; k--)
4705 if (k == nest->scale_idx)
4707 nest->areas[at][nest->n_areas[at]++] = k;
4711 else if (at == CTAT_LAYERROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4713 for (size_t k = nest->n - 1; k < nest->n; k--)
4715 if (k == nest->scale_idx)
4717 nest->areas[at][nest->n_areas[at]++] = k;
4721 else if (at == CTAT_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER)
4723 for (size_t k = nest->n - 1; k < nest->n; k--)
4725 if (k == nest->scale_idx)
4727 nest->areas[at][nest->n_areas[at]++] = k;
4733 if (at == CTAT_SUBTABLE && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN)
4735 size_t n_drop = (a == PIVOT_AXIS_LAYER ? 0
4736 : a == PIVOT_AXIS_ROW ? 2
4738 for (size_t i = 0; i < n_drop; i++)
4739 if (nest->n_areas[at] > 0)
4740 nest->n_areas[at]--;
4743 else if (at == CTAT_SUBTABLE && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_LAYER)
4745 if (a == PIVOT_AXIS_ROW)
4747 size_t n = nest->n_areas[at];
4750 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4751 nest->n_areas[at]--;
4754 else if (a == PIVOT_AXIS_COLUMN)
4756 if (nest->n_areas[at] > 0)
4757 nest->n_areas[at]--;
4761 else if (at == CTAT_SUBTABLE && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_LAYER)
4763 if (a == PIVOT_AXIS_COLUMN)
4765 size_t n = nest->n_areas[at];
4768 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4769 nest->n_areas[at]--;
4772 else if (a == PIVOT_AXIS_ROW)
4774 if (nest->n_areas[at] > 0)
4775 nest->n_areas[at]--;
4779 else if (at == CTAT_SUBTABLE && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4781 size_t n_drop = (a == PIVOT_AXIS_LAYER ? 0
4782 : a == PIVOT_AXIS_COLUMN ? 2
4784 for (size_t i = 0; i < n_drop; i++)
4785 if (nest->n_areas[at] > 0)
4786 nest->n_areas[at]--;
4789 else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN)
4791 else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN)
4793 size_t n = nest->n_areas[at];
4796 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4797 nest->n_areas[at]--;
4801 else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4803 else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4805 size_t n = nest->n_areas[at];
4808 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4809 nest->n_areas[at]--;
4813 else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_LAYER)
4815 size_t n = nest->n_areas[at];
4818 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4819 nest->n_areas[at]--;
4823 else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_LAYER)
4825 size_t n = nest->n_areas[at];
4828 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4829 nest->n_areas[at]--;
4834 bool drop_last = (at == CTAT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4835 : at == CTAT_ROW ? a == PIVOT_AXIS_COLUMN
4836 : at == CTAT_COL ? a == PIVOT_AXIS_ROW
4838 if (drop_last && nest->n_areas[at] > 0)
4839 nest->n_areas[at]--;
4841 bool drop_additional
4842 = ((t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN && (at == CTAT_ROW || at == CTAT_LAYERROW) && a == PIVOT_AXIS_ROW)
4843 || (t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW && (at == CTAT_COL || at == CTAT_LAYERCOL) && a == PIVOT_AXIS_COLUMN));
4844 if (drop_additional && nest->n_areas[at] > 0)
4845 nest->n_areas[at]--;
4851 struct ctables_nest *nest = xmalloc (sizeof *nest);
4852 *nest = (struct ctables_nest) { .n = 0 };
4853 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4855 /* There's no point in moving labels away from an axis that has no
4856 labels, so avoid dealing with the special cases around that. */
4857 t->label_axis[a] = a;
4860 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4861 for (size_t i = 0; i < stack->n; i++)
4863 struct ctables_nest *nest = &stack->nests[i];
4864 if (!nest->specs[CSV_CELL].n)
4866 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4867 specs->specs = xmalloc (sizeof *specs->specs);
4870 enum ctables_summary_function function
4871 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4873 *specs->specs = (struct ctables_summary_spec) {
4874 .function = function,
4876 .format = ctables_summary_default_format (function, specs->var),
4879 specs->var = nest->vars[0];
4881 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4882 &nest->specs[CSV_CELL]);
4884 else if (!nest->specs[CSV_TOTAL].n)
4885 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4886 &nest->specs[CSV_CELL]);
4888 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4889 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4891 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4892 for (size_t i = 0; i < nest->specs[sv].n; i++)
4894 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4895 const struct ctables_function_info *cfi =
4896 &ctables_function_info[ss->function];
4898 ss->calc_area = rotate_area (ss->calc_area);
4902 if (t->ctables->smissing_listwise)
4904 struct variable **listwise_vars = NULL;
4906 size_t allocated = 0;
4908 for (size_t j = nest->group_head; j < stack->n; j++)
4910 const struct ctables_nest *other_nest = &stack->nests[j];
4911 if (other_nest->group_head != nest->group_head)
4914 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4917 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4918 sizeof *listwise_vars);
4919 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4922 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4925 listwise_vars = xmemdup (listwise_vars,
4926 n * sizeof *listwise_vars);
4927 nest->specs[sv].listwise_vars = listwise_vars;
4928 nest->specs[sv].n_listwise_vars = n;
4933 struct ctables_summary_spec_set *merged = &t->summary_specs;
4934 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4936 for (size_t j = 0; j < stack->n; j++)
4938 const struct ctables_nest *nest = &stack->nests[j];
4940 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4941 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4946 struct merge_item min = items[0];
4947 for (size_t j = 1; j < n_left; j++)
4948 if (merge_item_compare_3way (&items[j], &min) < 0)
4951 if (merged->n >= merged->allocated)
4952 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4953 sizeof *merged->specs);
4954 merged->specs[merged->n++] = min.set->specs[min.ofs];
4956 for (size_t j = 0; j < n_left; )
4958 if (merge_item_compare_3way (&items[j], &min) == 0)
4960 struct merge_item *item = &items[j];
4961 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4962 if (++item->ofs >= item->set->n)
4964 items[j] = items[--n_left];
4974 for (size_t j = 0; j < merged->n; j++)
4975 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4977 for (size_t j = 0; j < stack->n; j++)
4979 const struct ctables_nest *nest = &stack->nests[j];
4980 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4982 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4983 for (size_t k = 0; k < specs->n; k++)
4984 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4985 specs->specs[k].axis_idx);
4991 size_t allocated_sum_vars = 0;
4992 enumerate_sum_vars (t->axes[t->summary_axis],
4993 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4995 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4996 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5000 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5001 enum pivot_axis_type a)
5003 struct ctables_stack *stack = &t->stacks[a];
5004 for (size_t i = 0; i < stack->n; i++)
5006 const struct ctables_nest *nest = &stack->nests[i];
5007 const struct variable *var = nest->vars[nest->n - 1];
5008 const union value *value = case_data (c, var);
5010 if (var_is_numeric (var) && value->f == SYSMIS)
5013 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5015 ctables_value_insert (t, value, var_get_width (var));
5020 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5022 const struct ctables_value *const *ap = a_;
5023 const struct ctables_value *const *bp = b_;
5024 const struct ctables_value *a = *ap;
5025 const struct ctables_value *b = *bp;
5026 const int *width = width_;
5027 return value_compare_3way (&a->value, &b->value, *width);
5031 ctables_sort_clabels_values (struct ctables_table *t)
5033 const struct variable *v0 = t->clabels_example;
5034 int width = var_get_width (v0);
5036 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5039 const struct val_labs *val_labs = var_get_value_labels (v0);
5040 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5041 vl = val_labs_next (val_labs, vl))
5042 if (ctables_categories_match (c0, &vl->value, v0))
5043 ctables_value_insert (t, &vl->value, width);
5046 size_t n = hmap_count (&t->clabels_values_map);
5047 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5049 struct ctables_value *clv;
5051 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5052 t->clabels_values[i++] = clv;
5053 t->n_clabels_values = n;
5056 sort (t->clabels_values, n, sizeof *t->clabels_values,
5057 compare_clabels_values_3way, &width);
5059 for (size_t i = 0; i < n; i++)
5060 t->clabels_values[i]->leaf = i;
5064 ctables_add_category_occurrences (const struct variable *var,
5065 struct hmap *occurrences,
5066 const struct ctables_categories *cats)
5068 const struct val_labs *val_labs = var_get_value_labels (var);
5070 for (size_t i = 0; i < cats->n_cats; i++)
5072 const struct ctables_category *c = &cats->cats[i];
5076 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5082 int width = var_get_width (var);
5084 value_init (&value, width);
5085 value_copy_buf_rpad (&value, width,
5086 CHAR_CAST (uint8_t *, c->string.string),
5087 c->string.length, ' ');
5088 ctables_add_occurrence (var, &value, occurrences);
5089 value_destroy (&value, width);
5094 assert (var_is_numeric (var));
5095 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5096 vl = val_labs_next (val_labs, vl))
5097 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5098 ctables_add_occurrence (var, &vl->value, occurrences);
5102 assert (var_is_alpha (var));
5103 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5104 vl = val_labs_next (val_labs, vl))
5105 if (in_string_range (&vl->value, var, c->srange))
5106 ctables_add_occurrence (var, &vl->value, occurrences);
5110 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5111 vl = val_labs_next (val_labs, vl))
5112 if (var_is_value_missing (var, &vl->value))
5113 ctables_add_occurrence (var, &vl->value, occurrences);
5117 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5118 vl = val_labs_next (val_labs, vl))
5119 ctables_add_occurrence (var, &vl->value, occurrences);
5122 case CCT_POSTCOMPUTE:
5132 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5133 vl = val_labs_next (val_labs, vl))
5134 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5135 ctables_add_occurrence (var, &vl->value, occurrences);
5138 case CCT_EXCLUDED_MISSING:
5145 ctables_section_recurse_add_empty_categories (
5146 struct ctables_section *s,
5147 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5148 enum pivot_axis_type a, size_t a_idx)
5150 if (a >= PIVOT_N_AXES)
5151 ctables_cell_insert__ (s, c, cats);
5152 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5153 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5156 const struct variable *var = s->nests[a]->vars[a_idx];
5157 const struct ctables_categories *categories = s->table->categories[
5158 var_get_dict_index (var)];
5159 int width = var_get_width (var);
5160 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5161 const struct ctables_occurrence *o;
5162 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5164 union value *value = case_data_rw (c, var);
5165 value_destroy (value, width);
5166 value_clone (value, &o->value, width);
5167 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5168 assert (cats[a][a_idx] != NULL);
5169 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5172 for (size_t i = 0; i < categories->n_cats; i++)
5174 const struct ctables_category *cat = &categories->cats[i];
5175 if (cat->type == CCT_POSTCOMPUTE)
5177 cats[a][a_idx] = cat;
5178 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5185 ctables_section_add_empty_categories (struct ctables_section *s)
5187 bool show_empty = false;
5188 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5190 for (size_t k = 0; k < s->nests[a]->n; k++)
5191 if (k != s->nests[a]->scale_idx)
5193 const struct variable *var = s->nests[a]->vars[k];
5194 const struct ctables_categories *cats = s->table->categories[
5195 var_get_dict_index (var)];
5196 if (cats->show_empty)
5199 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5205 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5206 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5207 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5212 ctables_section_clear (struct ctables_section *s)
5214 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5216 const struct ctables_nest *nest = s->nests[a];
5217 for (size_t i = 0; i < nest->n; i++)
5218 if (i != nest->scale_idx)
5220 const struct variable *var = nest->vars[i];
5221 int width = var_get_width (var);
5222 struct ctables_occurrence *o, *next;
5223 struct hmap *map = &s->occurrences[a][i];
5224 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5226 value_destroy (&o->value, width);
5227 hmap_delete (map, &o->node);
5234 struct ctables_cell *cell, *next_cell;
5235 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5237 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5239 const struct ctables_nest *nest = s->nests[a];
5240 for (size_t i = 0; i < nest->n; i++)
5241 if (i != nest->scale_idx)
5242 value_destroy (&cell->axes[a].cvs[i].value,
5243 var_get_width (nest->vars[i]));
5244 free (cell->axes[a].cvs);
5247 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5248 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5249 for (size_t i = 0; i < specs->n; i++)
5250 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5251 free (cell->summaries);
5253 hmap_delete (&s->cells, &cell->node);
5256 hmap_shrink (&s->cells);
5258 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5260 struct ctables_area *area, *next_area;
5261 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5265 hmap_delete (&s->areas[at], &area->node);
5268 hmap_shrink (&s->areas[at]);
5273 ctables_section_uninit (struct ctables_section *s)
5275 ctables_section_clear (s);
5277 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5279 struct ctables_nest *nest = s->nests[a];
5280 for (size_t i = 0; i < nest->n; i++)
5281 hmap_destroy (&s->occurrences[a][i]);
5282 free (s->occurrences[a]);
5285 hmap_destroy (&s->cells);
5286 for (size_t i = 0; i < N_CTATS; i++)
5287 hmap_destroy (&s->areas[i]);
5291 ctables_table_clear (struct ctables_table *t)
5293 for (size_t i = 0; i < t->n_sections; i++)
5294 ctables_section_clear (&t->sections[i]);
5296 if (t->clabels_example)
5298 int width = var_get_width (t->clabels_example);
5299 struct ctables_value *value, *next_value;
5300 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5301 &t->clabels_values_map)
5303 value_destroy (&value->value, width);
5304 hmap_delete (&t->clabels_values_map, &value->node);
5307 hmap_shrink (&t->clabels_values_map);
5309 free (t->clabels_values);
5310 t->clabels_values = NULL;
5311 t->n_clabels_values = 0;
5316 ctables_execute (struct dataset *ds, struct casereader *input,
5319 for (size_t i = 0; i < ct->n_tables; i++)
5321 struct ctables_table *t = ct->tables[i];
5322 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5323 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5324 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5325 sizeof *t->sections);
5326 size_t ix[PIVOT_N_AXES];
5327 ctables_table_add_section (t, 0, ix);
5330 struct dictionary *dict = dataset_dict (ds);
5332 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5333 struct casegrouper *grouper
5335 ? casegrouper_create_splits (input, dict)
5336 : casegrouper_create_vars (input, NULL, 0));
5337 struct casereader *group;
5338 while (casegrouper_get_next_group (grouper, &group))
5342 struct ccase *c = casereader_peek (group, 0);
5345 output_split_file_values (ds, c);
5350 bool warn_on_invalid = true;
5351 for (struct ccase *c = casereader_read (group); c;
5352 case_unref (c), c = casereader_read (group))
5354 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5355 double e_weight = (ct->e_weight
5356 ? var_force_valid_weight (ct->e_weight,
5357 case_num (c, ct->e_weight),
5361 for (size_t i = 0; i < ct->n_tables; i++)
5363 struct ctables_table *t = ct->tables[i];
5365 for (size_t j = 0; j < t->n_sections; j++)
5366 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5368 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5369 if (t->label_axis[a] != a)
5370 ctables_insert_clabels_values (t, c, a);
5373 casereader_destroy (group);
5375 for (size_t i = 0; i < ct->n_tables; i++)
5377 struct ctables_table *t = ct->tables[i];
5379 if (t->clabels_example)
5380 ctables_sort_clabels_values (t);
5382 for (size_t j = 0; j < t->n_sections; j++)
5383 ctables_section_add_empty_categories (&t->sections[j]);
5385 ctables_table_output (ct, t);
5386 ctables_table_clear (t);
5389 return casegrouper_destroy (grouper);
5394 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5395 struct dictionary *);
5398 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5404 case CTPO_CAT_STRING:
5405 ss_dealloc (&e->string);
5408 case CTPO_CAT_SRANGE:
5409 for (size_t i = 0; i < 2; i++)
5410 ss_dealloc (&e->srange[i]);
5419 for (size_t i = 0; i < 2; i++)
5420 ctables_pcexpr_destroy (e->subs[i]);
5424 case CTPO_CAT_NUMBER:
5425 case CTPO_CAT_NRANGE:
5426 case CTPO_CAT_MISSING:
5427 case CTPO_CAT_OTHERNM:
5428 case CTPO_CAT_SUBTOTAL:
5429 case CTPO_CAT_TOTAL:
5433 msg_location_destroy (e->location);
5438 static struct ctables_pcexpr *
5439 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5440 struct ctables_pcexpr *sub0,
5441 struct ctables_pcexpr *sub1)
5443 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5444 *e = (struct ctables_pcexpr) {
5446 .subs = { sub0, sub1 },
5447 .location = msg_location_merged (sub0->location, sub1->location),
5452 /* How to parse an operator. */
5455 enum token_type token;
5456 enum ctables_postcompute_op op;
5459 static const struct operator *
5460 ctables_pcexpr_match_operator (struct lexer *lexer,
5461 const struct operator ops[], size_t n_ops)
5463 for (const struct operator *op = ops; op < ops + n_ops; op++)
5464 if (lex_token (lexer) == op->token)
5466 if (op->token != T_NEG_NUM)
5475 static struct ctables_pcexpr *
5476 ctables_pcexpr_parse_binary_operators__ (
5477 struct lexer *lexer, struct dictionary *dict,
5478 const struct operator ops[], size_t n_ops,
5479 parse_recursively_func *parse_next_level,
5480 const char *chain_warning, struct ctables_pcexpr *lhs)
5482 for (int op_count = 0; ; op_count++)
5484 const struct operator *op
5485 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5488 if (op_count > 1 && chain_warning)
5489 msg_at (SW, lhs->location, "%s", chain_warning);
5494 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5497 ctables_pcexpr_destroy (lhs);
5501 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5505 static struct ctables_pcexpr *
5506 ctables_pcexpr_parse_binary_operators (
5507 struct lexer *lexer, struct dictionary *dict,
5508 const struct operator ops[], size_t n_ops,
5509 parse_recursively_func *parse_next_level, const char *chain_warning)
5511 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5515 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5517 chain_warning, lhs);
5520 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5521 struct dictionary *);
5523 static struct ctables_pcexpr
5524 ctpo_cat_nrange (double low, double high)
5526 return (struct ctables_pcexpr) {
5527 .op = CTPO_CAT_NRANGE,
5528 .nrange = { low, high },
5532 static struct ctables_pcexpr
5533 ctpo_cat_srange (struct substring low, struct substring high)
5535 return (struct ctables_pcexpr) {
5536 .op = CTPO_CAT_SRANGE,
5537 .srange = { low, high },
5541 static struct ctables_pcexpr *
5542 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5544 int start_ofs = lex_ofs (lexer);
5545 struct ctables_pcexpr e;
5546 if (lex_is_number (lexer))
5548 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5549 .number = lex_number (lexer) };
5552 else if (lex_match_id (lexer, "MISSING"))
5553 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5554 else if (lex_match_id (lexer, "OTHERNM"))
5555 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5556 else if (lex_match_id (lexer, "TOTAL"))
5557 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5558 else if (lex_match_id (lexer, "SUBTOTAL"))
5560 size_t subtotal_index = 0;
5561 if (lex_match (lexer, T_LBRACK))
5563 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5565 subtotal_index = lex_integer (lexer);
5567 if (!lex_force_match (lexer, T_RBRACK))
5570 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5571 .subtotal_index = subtotal_index };
5573 else if (lex_match (lexer, T_LBRACK))
5575 if (lex_match_id (lexer, "LO"))
5577 if (!lex_force_match_id (lexer, "THRU"))
5580 if (lex_is_string (lexer))
5582 struct substring low = { .string = NULL };
5583 struct substring high = parse_substring (lexer, dict);
5584 e = ctpo_cat_srange (low, high);
5588 if (!lex_force_num (lexer))
5590 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5594 else if (lex_is_number (lexer))
5596 double number = lex_number (lexer);
5598 if (lex_match_id (lexer, "THRU"))
5600 if (lex_match_id (lexer, "HI"))
5601 e = ctpo_cat_nrange (number, DBL_MAX);
5604 if (!lex_force_num (lexer))
5606 e = ctpo_cat_nrange (number, lex_number (lexer));
5611 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5614 else if (lex_is_string (lexer))
5616 struct substring s = parse_substring (lexer, dict);
5618 if (lex_match_id (lexer, "THRU"))
5620 struct substring high;
5622 if (lex_match_id (lexer, "HI"))
5623 high = (struct substring) { .string = NULL };
5626 if (!lex_force_string (lexer))
5631 high = parse_substring (lexer, dict);
5634 e = ctpo_cat_srange (s, high);
5637 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5641 lex_error (lexer, NULL);
5645 if (!lex_force_match (lexer, T_RBRACK))
5647 if (e.op == CTPO_CAT_STRING)
5648 ss_dealloc (&e.string);
5649 else if (e.op == CTPO_CAT_SRANGE)
5651 ss_dealloc (&e.srange[0]);
5652 ss_dealloc (&e.srange[1]);
5657 else if (lex_match (lexer, T_LPAREN))
5659 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5662 if (!lex_force_match (lexer, T_RPAREN))
5664 ctables_pcexpr_destroy (ep);
5671 lex_error (lexer, NULL);
5675 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5676 return xmemdup (&e, sizeof e);
5679 static struct ctables_pcexpr *
5680 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5681 struct lexer *lexer, int start_ofs)
5683 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5684 *e = (struct ctables_pcexpr) {
5687 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5692 static struct ctables_pcexpr *
5693 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5695 static const struct operator op = { T_EXP, CTPO_POW };
5697 const char *chain_warning =
5698 _("The exponentiation operator (`**') is left-associative: "
5699 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5700 "To disable this warning, insert parentheses.");
5702 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5703 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5704 ctables_pcexpr_parse_primary,
5707 /* Special case for situations like "-5**6", which must be parsed as
5710 int start_ofs = lex_ofs (lexer);
5711 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5712 *lhs = (struct ctables_pcexpr) {
5713 .op = CTPO_CONSTANT,
5714 .number = -lex_tokval (lexer),
5715 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5719 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5720 lexer, dict, &op, 1,
5721 ctables_pcexpr_parse_primary, chain_warning, lhs);
5725 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5728 /* Parses the unary minus level. */
5729 static struct ctables_pcexpr *
5730 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5732 int start_ofs = lex_ofs (lexer);
5733 if (!lex_match (lexer, T_DASH))
5734 return ctables_pcexpr_parse_exp (lexer, dict);
5736 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5740 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5743 /* Parses the multiplication and division level. */
5744 static struct ctables_pcexpr *
5745 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5747 static const struct operator ops[] =
5749 { T_ASTERISK, CTPO_MUL },
5750 { T_SLASH, CTPO_DIV },
5753 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5754 sizeof ops / sizeof *ops,
5755 ctables_pcexpr_parse_neg, NULL);
5758 /* Parses the addition and subtraction level. */
5759 static struct ctables_pcexpr *
5760 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5762 static const struct operator ops[] =
5764 { T_PLUS, CTPO_ADD },
5765 { T_DASH, CTPO_SUB },
5766 { T_NEG_NUM, CTPO_ADD },
5769 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5770 ops, sizeof ops / sizeof *ops,
5771 ctables_pcexpr_parse_mul, NULL);
5774 static struct ctables_postcompute *
5775 ctables_find_postcompute (struct ctables *ct, const char *name)
5777 struct ctables_postcompute *pc;
5778 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5779 utf8_hash_case_string (name, 0), &ct->postcomputes)
5780 if (!utf8_strcasecmp (pc->name, name))
5786 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5789 int pcompute_start = lex_ofs (lexer) - 1;
5791 if (!lex_match (lexer, T_AND))
5793 lex_error_expecting (lexer, "&");
5796 if (!lex_force_id (lexer))
5799 char *name = ss_xstrdup (lex_tokss (lexer));
5802 if (!lex_force_match (lexer, T_EQUALS)
5803 || !lex_force_match_id (lexer, "EXPR")
5804 || !lex_force_match (lexer, T_LPAREN))
5810 int expr_start = lex_ofs (lexer);
5811 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5812 int expr_end = lex_ofs (lexer) - 1;
5813 if (!expr || !lex_force_match (lexer, T_RPAREN))
5815 ctables_pcexpr_destroy (expr);
5819 int pcompute_end = lex_ofs (lexer) - 1;
5821 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5824 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5827 msg_at (SW, location, _("New definition of &%s will override the "
5828 "previous definition."),
5830 msg_at (SN, pc->location, _("This is the previous definition."));
5832 ctables_pcexpr_destroy (pc->expr);
5833 msg_location_destroy (pc->location);
5838 pc = xmalloc (sizeof *pc);
5839 *pc = (struct ctables_postcompute) { .name = name };
5840 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5841 utf8_hash_case_string (pc->name, 0));
5844 pc->location = location;
5846 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5851 ctables_parse_pproperties_format (struct lexer *lexer,
5852 struct ctables_summary_spec_set *sss)
5854 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5856 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5857 && !(lex_token (lexer) == T_ID
5858 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5859 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5860 lex_tokss (lexer)))))
5862 /* Parse function. */
5863 enum ctables_summary_function function;
5865 enum ctables_area_type area;
5866 if (!parse_ctables_summary_function (lexer, &function, &weighted, &area))
5869 /* Parse percentile. */
5870 double percentile = 0;
5871 if (function == CTSF_PTILE)
5873 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5875 percentile = lex_number (lexer);
5880 struct fmt_spec format;
5881 bool is_ctables_format;
5882 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5885 if (sss->n >= sss->allocated)
5886 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5887 sizeof *sss->specs);
5888 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5889 .function = function,
5890 .weighted = weighted,
5893 .percentile = percentile,
5895 .is_ctables_format = is_ctables_format,
5901 ctables_summary_spec_set_uninit (sss);
5906 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5908 struct ctables_postcompute **pcs = NULL;
5910 size_t allocated_pcs = 0;
5912 while (lex_match (lexer, T_AND))
5914 if (!lex_force_id (lexer))
5916 struct ctables_postcompute *pc
5917 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5920 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5925 if (n_pcs >= allocated_pcs)
5926 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5930 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5932 if (lex_match_id (lexer, "LABEL"))
5934 lex_match (lexer, T_EQUALS);
5935 if (!lex_force_string (lexer))
5938 for (size_t i = 0; i < n_pcs; i++)
5940 free (pcs[i]->label);
5941 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5946 else if (lex_match_id (lexer, "FORMAT"))
5948 lex_match (lexer, T_EQUALS);
5950 struct ctables_summary_spec_set sss;
5951 if (!ctables_parse_pproperties_format (lexer, &sss))
5954 for (size_t i = 0; i < n_pcs; i++)
5957 ctables_summary_spec_set_uninit (pcs[i]->specs);
5959 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5960 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5962 ctables_summary_spec_set_uninit (&sss);
5964 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5966 lex_match (lexer, T_EQUALS);
5967 bool hide_source_cats;
5968 if (!parse_bool (lexer, &hide_source_cats))
5970 for (size_t i = 0; i < n_pcs; i++)
5971 pcs[i]->hide_source_cats = hide_source_cats;
5975 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5988 put_strftime (struct string *out, time_t now, const char *format)
5990 const struct tm *tm = localtime (&now);
5992 strftime (value, sizeof value, format, tm);
5993 ds_put_cstr (out, value);
5997 skip_prefix (struct substring *s, struct substring prefix)
5999 if (ss_starts_with (*s, prefix))
6001 ss_advance (s, prefix.length);
6009 put_table_expression (struct string *out, struct lexer *lexer,
6010 struct dictionary *dict, int expr_start, int expr_end)
6013 for (int ofs = expr_start; ofs < expr_end; ofs++)
6015 const struct token *t = lex_ofs_token (lexer, ofs);
6016 if (t->type == T_LBRACK)
6018 else if (t->type == T_RBRACK && nest > 0)
6024 else if (t->type == T_ID)
6026 const struct variable *var
6027 = dict_lookup_var (dict, t->string.string);
6028 const char *label = var ? var_get_label (var) : NULL;
6029 ds_put_cstr (out, label ? label : t->string.string);
6033 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6034 ds_put_byte (out, ' ');
6036 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6037 ds_put_cstr (out, repr);
6040 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6041 ds_put_byte (out, ' ');
6047 put_title_text (struct string *out, struct substring in, time_t now,
6048 struct lexer *lexer, struct dictionary *dict,
6049 int expr_start, int expr_end)
6053 size_t chunk = ss_find_byte (in, ')');
6054 ds_put_substring (out, ss_head (in, chunk));
6055 ss_advance (&in, chunk);
6056 if (ss_is_empty (in))
6059 if (skip_prefix (&in, ss_cstr (")DATE")))
6060 put_strftime (out, now, "%x");
6061 else if (skip_prefix (&in, ss_cstr (")TIME")))
6062 put_strftime (out, now, "%X");
6063 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6064 put_table_expression (out, lexer, dict, expr_start, expr_end);
6067 ds_put_byte (out, ')');
6068 ss_advance (&in, 1);
6074 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6076 struct casereader *input = NULL;
6078 struct measure_guesser *mg = measure_guesser_create (ds);
6081 input = proc_open (ds);
6082 measure_guesser_run (mg, input);
6083 measure_guesser_destroy (mg);
6086 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6087 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6088 enum settings_value_show tvars = settings_get_show_variables ();
6089 for (size_t i = 0; i < n_vars; i++)
6090 vlabels[i] = (enum ctables_vlabel) tvars;
6092 struct pivot_table_look *look = pivot_table_look_unshare (
6093 pivot_table_look_ref (pivot_table_look_get_default ()));
6094 look->omit_empty = false;
6096 struct ctables *ct = xmalloc (sizeof *ct);
6097 *ct = (struct ctables) {
6098 .dict = dataset_dict (ds),
6100 .ctables_formats = FMT_SETTINGS_INIT,
6102 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6105 time_t now = time (NULL);
6110 const char *dot_string;
6111 const char *comma_string;
6113 static const struct ctf ctfs[4] = {
6114 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6115 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6116 { CTEF_PAREN, "-,(,),", "-.(.)." },
6117 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6119 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6120 for (size_t i = 0; i < 4; i++)
6122 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6123 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6124 fmt_number_style_from_string (s));
6127 if (!lex_force_match (lexer, T_SLASH))
6130 while (!lex_match_id (lexer, "TABLE"))
6132 if (lex_match_id (lexer, "FORMAT"))
6134 double widths[2] = { SYSMIS, SYSMIS };
6135 double units_per_inch = 72.0;
6137 while (lex_token (lexer) != T_SLASH)
6139 if (lex_match_id (lexer, "MINCOLWIDTH"))
6141 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6144 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6146 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6149 else if (lex_match_id (lexer, "UNITS"))
6151 lex_match (lexer, T_EQUALS);
6152 if (lex_match_id (lexer, "POINTS"))
6153 units_per_inch = 72.0;
6154 else if (lex_match_id (lexer, "INCHES"))
6155 units_per_inch = 1.0;
6156 else if (lex_match_id (lexer, "CM"))
6157 units_per_inch = 2.54;
6160 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6164 else if (lex_match_id (lexer, "EMPTY"))
6169 lex_match (lexer, T_EQUALS);
6170 if (lex_match_id (lexer, "ZERO"))
6172 /* Nothing to do. */
6174 else if (lex_match_id (lexer, "BLANK"))
6175 ct->zero = xstrdup ("");
6176 else if (lex_force_string (lexer))
6178 ct->zero = ss_xstrdup (lex_tokss (lexer));
6184 else if (lex_match_id (lexer, "MISSING"))
6186 lex_match (lexer, T_EQUALS);
6187 if (!lex_force_string (lexer))
6191 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6192 ? ss_xstrdup (lex_tokss (lexer))
6198 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6199 "UNITS", "EMPTY", "MISSING");
6204 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6205 && widths[0] > widths[1])
6207 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6211 for (size_t i = 0; i < 2; i++)
6212 if (widths[i] != SYSMIS)
6214 int *wr = ct->look->width_ranges[TABLE_HORZ];
6215 wr[i] = widths[i] / units_per_inch * 96.0;
6220 else if (lex_match_id (lexer, "VLABELS"))
6222 if (!lex_force_match_id (lexer, "VARIABLES"))
6224 lex_match (lexer, T_EQUALS);
6226 struct variable **vars;
6228 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6232 if (!lex_force_match_id (lexer, "DISPLAY"))
6237 lex_match (lexer, T_EQUALS);
6239 enum ctables_vlabel vlabel;
6240 if (lex_match_id (lexer, "DEFAULT"))
6241 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6242 else if (lex_match_id (lexer, "NAME"))
6244 else if (lex_match_id (lexer, "LABEL"))
6245 vlabel = CTVL_LABEL;
6246 else if (lex_match_id (lexer, "BOTH"))
6248 else if (lex_match_id (lexer, "NONE"))
6252 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6258 for (size_t i = 0; i < n_vars; i++)
6259 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6262 else if (lex_match_id (lexer, "MRSETS"))
6264 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6266 lex_match (lexer, T_EQUALS);
6267 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6270 else if (lex_match_id (lexer, "SMISSING"))
6272 if (lex_match_id (lexer, "VARIABLE"))
6273 ct->smissing_listwise = false;
6274 else if (lex_match_id (lexer, "LISTWISE"))
6275 ct->smissing_listwise = true;
6278 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6282 else if (lex_match_id (lexer, "PCOMPUTE"))
6284 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6287 else if (lex_match_id (lexer, "PPROPERTIES"))
6289 if (!ctables_parse_pproperties (lexer, ct))
6292 else if (lex_match_id (lexer, "WEIGHT"))
6294 if (!lex_force_match_id (lexer, "VARIABLE"))
6296 lex_match (lexer, T_EQUALS);
6297 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6301 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6303 if (lex_match_id (lexer, "COUNT"))
6305 lex_match (lexer, T_EQUALS);
6306 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6309 ct->hide_threshold = lex_integer (lexer);
6312 else if (ct->hide_threshold == 0)
6313 ct->hide_threshold = 5;
6317 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6318 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6319 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6323 if (!lex_force_match (lexer, T_SLASH))
6327 size_t allocated_tables = 0;
6330 if (ct->n_tables >= allocated_tables)
6331 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6332 sizeof *ct->tables);
6334 struct ctables_category *cat = xmalloc (sizeof *cat);
6335 *cat = (struct ctables_category) {
6337 .include_missing = false,
6338 .sort_ascending = true,
6341 struct ctables_categories *c = xmalloc (sizeof *c);
6342 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6343 *c = (struct ctables_categories) {
6350 struct ctables_categories **categories = xnmalloc (n_vars,
6351 sizeof *categories);
6352 for (size_t i = 0; i < n_vars; i++)
6355 struct ctables_table *t = xmalloc (sizeof *t);
6356 *t = (struct ctables_table) {
6358 .slabels_axis = PIVOT_AXIS_COLUMN,
6359 .slabels_visible = true,
6360 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6362 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6363 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6364 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6366 .clabels_from_axis = PIVOT_AXIS_LAYER,
6367 .categories = categories,
6368 .n_categories = n_vars,
6371 ct->tables[ct->n_tables++] = t;
6373 lex_match (lexer, T_EQUALS);
6374 int expr_start = lex_ofs (lexer);
6375 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6377 if (lex_match (lexer, T_BY))
6379 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6380 ct, t, PIVOT_AXIS_COLUMN))
6383 if (lex_match (lexer, T_BY))
6385 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6386 ct, t, PIVOT_AXIS_LAYER))
6390 int expr_end = lex_ofs (lexer);
6392 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6393 && !t->axes[PIVOT_AXIS_LAYER])
6395 lex_error (lexer, _("At least one variable must be specified."));
6399 const struct ctables_axis *scales[PIVOT_N_AXES];
6400 size_t n_scales = 0;
6401 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6403 scales[a] = find_scale (t->axes[a]);
6409 msg (SE, _("Scale variables may appear only on one axis."));
6410 if (scales[PIVOT_AXIS_ROW])
6411 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6412 _("This scale variable appears on the rows axis."));
6413 if (scales[PIVOT_AXIS_COLUMN])
6414 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6415 _("This scale variable appears on the columns axis."));
6416 if (scales[PIVOT_AXIS_LAYER])
6417 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6418 _("This scale variable appears on the layer axis."));
6422 const struct ctables_axis *summaries[PIVOT_N_AXES];
6423 size_t n_summaries = 0;
6424 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6426 summaries[a] = (scales[a]
6428 : find_categorical_summary_spec (t->axes[a]));
6432 if (n_summaries > 1)
6434 msg (SE, _("Summaries may appear only on one axis."));
6435 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6438 msg_at (SN, summaries[a]->loc,
6440 ? _("This variable on the rows axis has a summary.")
6441 : a == PIVOT_AXIS_COLUMN
6442 ? _("This variable on the columns axis has a summary.")
6443 : _("This variable on the layers axis has a summary."));
6445 msg_at (SN, summaries[a]->loc,
6446 _("This is a scale variable, so it always has a "
6447 "summary even if the syntax does not explicitly "
6452 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6453 if (n_summaries ? summaries[a] : t->axes[a])
6455 t->summary_axis = a;
6459 if (lex_token (lexer) == T_ENDCMD)
6461 if (!ctables_prepare_table (t))
6465 if (!lex_force_match (lexer, T_SLASH))
6468 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6470 if (lex_match_id (lexer, "SLABELS"))
6472 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6474 if (lex_match_id (lexer, "POSITION"))
6476 lex_match (lexer, T_EQUALS);
6477 if (lex_match_id (lexer, "COLUMN"))
6478 t->slabels_axis = PIVOT_AXIS_COLUMN;
6479 else if (lex_match_id (lexer, "ROW"))
6480 t->slabels_axis = PIVOT_AXIS_ROW;
6481 else if (lex_match_id (lexer, "LAYER"))
6482 t->slabels_axis = PIVOT_AXIS_LAYER;
6485 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6489 else if (lex_match_id (lexer, "VISIBLE"))
6491 lex_match (lexer, T_EQUALS);
6492 if (!parse_bool (lexer, &t->slabels_visible))
6497 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6502 else if (lex_match_id (lexer, "CLABELS"))
6504 if (lex_match_id (lexer, "AUTO"))
6506 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6507 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6509 else if (lex_match_id (lexer, "ROWLABELS"))
6511 lex_match (lexer, T_EQUALS);
6512 if (lex_match_id (lexer, "OPPOSITE"))
6513 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6514 else if (lex_match_id (lexer, "LAYER"))
6515 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6518 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6522 else if (lex_match_id (lexer, "COLLABELS"))
6524 lex_match (lexer, T_EQUALS);
6525 if (lex_match_id (lexer, "OPPOSITE"))
6526 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6527 else if (lex_match_id (lexer, "LAYER"))
6528 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6531 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6537 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6542 else if (lex_match_id (lexer, "CRITERIA"))
6544 if (!lex_force_match_id (lexer, "CILEVEL"))
6546 lex_match (lexer, T_EQUALS);
6548 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6550 t->cilevel = lex_number (lexer);
6553 else if (lex_match_id (lexer, "CATEGORIES"))
6555 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6559 else if (lex_match_id (lexer, "TITLES"))
6564 if (lex_match_id (lexer, "CAPTION"))
6565 textp = &t->caption;
6566 else if (lex_match_id (lexer, "CORNER"))
6568 else if (lex_match_id (lexer, "TITLE"))
6572 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6575 lex_match (lexer, T_EQUALS);
6577 struct string s = DS_EMPTY_INITIALIZER;
6578 while (lex_is_string (lexer))
6580 if (!ds_is_empty (&s))
6581 ds_put_byte (&s, ' ');
6582 put_title_text (&s, lex_tokss (lexer), now,
6583 lexer, dataset_dict (ds),
6584 expr_start, expr_end);
6588 *textp = ds_steal_cstr (&s);
6590 while (lex_token (lexer) != T_SLASH
6591 && lex_token (lexer) != T_ENDCMD);
6593 else if (lex_match_id (lexer, "SIGTEST"))
6597 t->chisq = xmalloc (sizeof *t->chisq);
6598 *t->chisq = (struct ctables_chisq) {
6600 .include_mrsets = true,
6601 .all_visible = true,
6607 if (lex_match_id (lexer, "TYPE"))
6609 lex_match (lexer, T_EQUALS);
6610 if (!lex_force_match_id (lexer, "CHISQUARE"))
6613 else if (lex_match_id (lexer, "ALPHA"))
6615 lex_match (lexer, T_EQUALS);
6616 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6618 t->chisq->alpha = lex_number (lexer);
6621 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6623 lex_match (lexer, T_EQUALS);
6624 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6627 else if (lex_match_id (lexer, "CATEGORIES"))
6629 lex_match (lexer, T_EQUALS);
6630 if (lex_match_id (lexer, "ALLVISIBLE"))
6631 t->chisq->all_visible = true;
6632 else if (lex_match_id (lexer, "SUBTOTALS"))
6633 t->chisq->all_visible = false;
6636 lex_error_expecting (lexer,
6637 "ALLVISIBLE", "SUBTOTALS");
6643 lex_error_expecting (lexer, "TYPE", "ALPHA",
6644 "INCLUDEMRSETS", "CATEGORIES");
6648 while (lex_token (lexer) != T_SLASH
6649 && lex_token (lexer) != T_ENDCMD);
6651 else if (lex_match_id (lexer, "COMPARETEST"))
6655 t->pairwise = xmalloc (sizeof *t->pairwise);
6656 *t->pairwise = (struct ctables_pairwise) {
6658 .alpha = { .05, .05 },
6659 .adjust = BONFERRONI,
6660 .include_mrsets = true,
6661 .meansvariance_allcats = true,
6662 .all_visible = true,
6671 if (lex_match_id (lexer, "TYPE"))
6673 lex_match (lexer, T_EQUALS);
6674 if (lex_match_id (lexer, "PROP"))
6675 t->pairwise->type = PROP;
6676 else if (lex_match_id (lexer, "MEAN"))
6677 t->pairwise->type = MEAN;
6680 lex_error_expecting (lexer, "PROP", "MEAN");
6684 else if (lex_match_id (lexer, "ALPHA"))
6686 lex_match (lexer, T_EQUALS);
6688 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6690 double a0 = lex_number (lexer);
6693 lex_match (lexer, T_COMMA);
6694 if (lex_is_number (lexer))
6696 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6698 double a1 = lex_number (lexer);
6701 t->pairwise->alpha[0] = MIN (a0, a1);
6702 t->pairwise->alpha[1] = MAX (a0, a1);
6705 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6707 else if (lex_match_id (lexer, "ADJUST"))
6709 lex_match (lexer, T_EQUALS);
6710 if (lex_match_id (lexer, "BONFERRONI"))
6711 t->pairwise->adjust = BONFERRONI;
6712 else if (lex_match_id (lexer, "BH"))
6713 t->pairwise->adjust = BH;
6714 else if (lex_match_id (lexer, "NONE"))
6715 t->pairwise->adjust = 0;
6718 lex_error_expecting (lexer, "BONFERRONI", "BH",
6723 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6725 lex_match (lexer, T_EQUALS);
6726 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6729 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6731 lex_match (lexer, T_EQUALS);
6732 if (lex_match_id (lexer, "ALLCATS"))
6733 t->pairwise->meansvariance_allcats = true;
6734 else if (lex_match_id (lexer, "TESTEDCATS"))
6735 t->pairwise->meansvariance_allcats = false;
6738 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6742 else if (lex_match_id (lexer, "CATEGORIES"))
6744 lex_match (lexer, T_EQUALS);
6745 if (lex_match_id (lexer, "ALLVISIBLE"))
6746 t->pairwise->all_visible = true;
6747 else if (lex_match_id (lexer, "SUBTOTALS"))
6748 t->pairwise->all_visible = false;
6751 lex_error_expecting (lexer, "ALLVISIBLE",
6756 else if (lex_match_id (lexer, "MERGE"))
6758 lex_match (lexer, T_EQUALS);
6759 if (!parse_bool (lexer, &t->pairwise->merge))
6762 else if (lex_match_id (lexer, "STYLE"))
6764 lex_match (lexer, T_EQUALS);
6765 if (lex_match_id (lexer, "APA"))
6766 t->pairwise->apa_style = true;
6767 else if (lex_match_id (lexer, "SIMPLE"))
6768 t->pairwise->apa_style = false;
6771 lex_error_expecting (lexer, "APA", "SIMPLE");
6775 else if (lex_match_id (lexer, "SHOWSIG"))
6777 lex_match (lexer, T_EQUALS);
6778 if (!parse_bool (lexer, &t->pairwise->show_sig))
6783 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6784 "INCLUDEMRSETS", "MEANSVARIANCE",
6785 "CATEGORIES", "MERGE", "STYLE",
6790 while (lex_token (lexer) != T_SLASH
6791 && lex_token (lexer) != T_ENDCMD);
6795 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6796 "CRITERIA", "CATEGORIES", "TITLES",
6797 "SIGTEST", "COMPARETEST");
6801 if (!lex_match (lexer, T_SLASH))
6805 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6806 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6808 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6812 if (!ctables_prepare_table (t))
6815 while (lex_token (lexer) != T_ENDCMD);
6818 input = proc_open (ds);
6819 bool ok = ctables_execute (ds, input, ct);
6820 ok = proc_commit (ds) && ok;
6822 ctables_destroy (ct);
6823 return ok ? CMD_SUCCESS : CMD_FAILURE;
6828 ctables_destroy (ct);