1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_function_type
68 /* A function that operates on data in a single cell. The function does
69 not have an unweighted version. */
72 /* A function that operates on data in a single cell. The function has an
73 unweighted version. */
76 /* A function that operates on an area of cells. The function has an
77 unweighted version. */
88 enum ctables_function_availability
90 CTFA_ALL, /* Any variables. */
91 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
92 //CTFA_MRSETS, /* Only multiple-response sets */
95 enum ctables_summary_function
97 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
98 #include "ctables.inc"
103 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
105 #include "ctables.inc"
109 struct ctables_function_info
111 struct substring basename;
112 enum ctables_function_type type;
113 enum ctables_format format;
114 enum ctables_function_availability availability;
116 bool may_be_unweighted;
119 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
120 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
122 .basename = SS_LITERAL_INITIALIZER (NAME), \
125 .availability = AVAILABILITY, \
126 .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA, \
127 .is_area = (TYPE) == CTFT_AREA \
129 #include "ctables.inc"
133 static bool ctables_summary_function_is_count (enum ctables_summary_function);
135 enum ctables_area_type
137 /* Within a section, where stacked variables divide one section from
140 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
141 parse_ctables_summary_function() parses correctly. */
142 CTAT_TABLE, /* All layers of a whole section. */
143 CTAT_LAYERROW, /* Row in one layer within a section. */
144 CTAT_LAYERCOL, /* Column in one layer within a section. */
145 CTAT_LAYER, /* One layer within a section. */
147 /* Within a subtable, where a subtable pairs an innermost row variable with
148 an innermost column variable within a single layer. */
149 CTAT_SUBTABLE, /* Whole subtable. */
150 CTAT_ROW, /* Row within a subtable. */
151 CTAT_COL, /* Column within a subtable. */
155 static const char *ctables_area_type_name[N_CTATS] = {
156 [CTAT_TABLE] = "TABLE",
157 [CTAT_LAYER] = "LAYER",
158 [CTAT_LAYERROW] = "LAYERROW",
159 [CTAT_LAYERCOL] = "LAYERCOL",
160 [CTAT_SUBTABLE] = "SUBTABLE",
167 struct hmap_node node;
169 const struct ctables_cell *example;
172 double d_valid; /* Dictionary weight. */
175 double e_valid; /* Effective weight */
178 double u_valid; /* Unweighted. */
181 struct ctables_sum *sums;
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The areas that contain this cell. */
205 struct ctables_area *areas[N_CTATS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
231 const struct dictionary *dict;
232 struct pivot_table_look *look;
234 /* CTABLES has a number of extra formats that we implement via custom
235 currency specifications on an alternate fmt_settings. */
236 #define CTEF_NEGPAREN FMT_CCA
237 #define CTEF_NEQUAL FMT_CCB
238 #define CTEF_PAREN FMT_CCC
239 #define CTEF_PCTPAREN FMT_CCD
240 struct fmt_settings ctables_formats;
242 /* If this is NULL, zeros are displayed using the normal print format.
243 Otherwise, this string is displayed. */
246 /* If this is NULL, missing values are displayed using the normal print
247 format. Otherwise, this string is displayed. */
250 /* Indexed by variable dictionary index. */
251 enum ctables_vlabel *vlabels;
253 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
255 bool mrsets_count_duplicates; /* MRSETS. */
256 bool smissing_listwise; /* SMISSING. */
257 struct variable *e_weight; /* WEIGHT. */
258 int hide_threshold; /* HIDESMALLCOUNTS. */
260 struct ctables_table **tables;
264 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
267 struct ctables_postcompute
269 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
270 char *name; /* Name, without leading &. */
272 struct msg_location *location; /* Location of definition. */
273 struct ctables_pcexpr *expr;
275 struct ctables_summary_spec_set *specs;
276 bool hide_source_cats;
279 struct ctables_pcexpr
289 enum ctables_postcompute_op
292 CTPO_CONSTANT, /* 5 */
293 CTPO_CAT_NUMBER, /* [5] */
294 CTPO_CAT_STRING, /* ["STRING"] */
295 CTPO_CAT_NRANGE, /* [LO THRU 5] */
296 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
297 CTPO_CAT_MISSING, /* MISSING */
298 CTPO_CAT_OTHERNM, /* OTHERNM */
299 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
300 CTPO_CAT_TOTAL, /* TOTAL */
314 /* CTPO_CAT_NUMBER. */
317 /* CTPO_CAT_STRING, in dictionary encoding. */
318 struct substring string;
320 /* CTPO_CAT_NRANGE. */
323 /* CTPO_CAT_SRANGE. */
324 struct substring srange[2];
326 /* CTPO_CAT_SUBTOTAL. */
327 size_t subtotal_index;
329 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
330 One element: CTPO_NEG. */
331 struct ctables_pcexpr *subs[2];
334 /* Source location. */
335 struct msg_location *location;
338 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
339 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
340 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
341 struct ctables_pcexpr *sub1);
343 struct ctables_summary_spec_set
345 struct ctables_summary_spec *specs;
349 /* The variable to which the summary specs are applied. */
350 struct variable *var;
352 /* Whether the variable to which the summary specs are applied is a scale
353 variable for the purpose of summarization.
355 (VALIDN and TOTALN act differently for summarizing scale and categorical
359 /* If any of these optional additional scale variables are missing, then
360 treat 'var' as if it's missing too. This is for implementing
361 SMISSING=LISTWISE. */
362 struct variable **listwise_vars;
363 size_t n_listwise_vars;
366 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
367 const struct ctables_summary_spec_set *);
368 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
370 /* A nested sequence of variables, e.g. a > b > c. */
373 struct variable **vars;
376 size_t *areas[N_CTATS];
377 size_t n_areas[N_CTATS];
380 struct ctables_summary_spec_set specs[N_CSVS];
383 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
386 struct ctables_nest *nests;
390 static void ctables_stack_uninit (struct ctables_stack *);
394 struct hmap_node node;
399 struct ctables_occurrence
401 struct hmap_node node;
405 struct ctables_section
408 struct ctables_table *table;
409 struct ctables_nest *nests[PIVOT_N_AXES];
412 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
413 struct hmap cells; /* Contains "struct ctables_cell"s. */
414 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
417 static void ctables_section_uninit (struct ctables_section *);
421 struct ctables *ctables;
422 struct ctables_axis *axes[PIVOT_N_AXES];
423 struct ctables_stack stacks[PIVOT_N_AXES];
424 struct ctables_section *sections;
426 enum pivot_axis_type summary_axis;
427 struct ctables_summary_spec_set summary_specs;
428 struct variable **sum_vars;
431 enum pivot_axis_type slabels_axis;
432 bool slabels_visible;
434 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
436 Most commonly, label_axis[a] == a, and in particular we always have
437 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
439 If ROWLABELS or COLLABELS is specified, then one of
440 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
441 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
443 If any category labels are moved, then 'clabels_example' is one of the
444 variables being moved (and it is otherwise NULL). All of the variables
445 being moved have the same width, value labels, and categories, so this
446 example variable can be used to find those out.
448 The remaining members in this group are relevant only if category labels
451 'clabels_values_map' holds a "struct ctables_value" for all the values
452 that appear in all of the variables in the moved categories. It is
453 accumulated as the data is read. Once the data is fully read, its
454 sorted values are put into 'clabels_values' and 'n_clabels_values'.
456 enum pivot_axis_type label_axis[PIVOT_N_AXES];
457 enum pivot_axis_type clabels_from_axis;
458 enum pivot_axis_type clabels_to_axis;
459 const struct variable *clabels_example;
460 struct hmap clabels_values_map;
461 struct ctables_value **clabels_values;
462 size_t n_clabels_values;
464 /* Indexed by variable dictionary index. */
465 struct ctables_categories **categories;
474 struct ctables_chisq *chisq;
475 struct ctables_pairwise *pairwise;
478 struct ctables_categories
481 struct ctables_category *cats;
486 struct ctables_category
488 enum ctables_category_type
490 /* Explicit category lists. */
493 CCT_NRANGE, /* Numerical range. */
494 CCT_SRANGE, /* String range. */
499 /* Totals and subtotals. */
503 /* Implicit category lists. */
508 /* For contributing to TOTALN. */
509 CCT_EXCLUDED_MISSING,
513 struct ctables_category *subtotal;
519 double number; /* CCT_NUMBER. */
520 struct substring string; /* CCT_STRING, in dictionary encoding. */
521 double nrange[2]; /* CCT_NRANGE. */
522 struct substring srange[2]; /* CCT_SRANGE. */
526 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
527 bool hide_subcategories; /* CCT_SUBTOTAL. */
530 /* CCT_POSTCOMPUTE. */
533 const struct ctables_postcompute *pc;
534 enum fmt_type parse_format;
537 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
540 bool include_missing;
544 enum ctables_summary_function sort_function;
546 enum ctables_area_type area;
547 struct variable *sort_var;
552 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
553 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
554 struct msg_location *location;
558 ctables_category_uninit (struct ctables_category *cat)
563 msg_location_destroy (cat->location);
570 case CCT_POSTCOMPUTE:
574 ss_dealloc (&cat->string);
578 ss_dealloc (&cat->srange[0]);
579 ss_dealloc (&cat->srange[1]);
584 free (cat->total_label);
592 case CCT_EXCLUDED_MISSING:
598 nullable_substring_equal (const struct substring *a,
599 const struct substring *b)
601 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
605 ctables_category_equal (const struct ctables_category *a,
606 const struct ctables_category *b)
608 if (a->type != b->type)
614 return a->number == b->number;
617 return ss_equals (a->string, b->string);
620 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
623 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
624 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
630 case CCT_POSTCOMPUTE:
631 return a->pc == b->pc;
635 return !strcmp (a->total_label, b->total_label);
640 return (a->include_missing == b->include_missing
641 && a->sort_ascending == b->sort_ascending
642 && a->sort_function == b->sort_function
643 && a->sort_var == b->sort_var
644 && a->percentile == b->percentile);
646 case CCT_EXCLUDED_MISSING:
654 ctables_categories_unref (struct ctables_categories *c)
659 assert (c->n_refs > 0);
663 for (size_t i = 0; i < c->n_cats; i++)
664 ctables_category_uninit (&c->cats[i]);
670 ctables_categories_equal (const struct ctables_categories *a,
671 const struct ctables_categories *b)
673 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
676 for (size_t i = 0; i < a->n_cats; i++)
677 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
683 /* Chi-square test (SIGTEST). */
691 /* Pairwise comparison test (COMPARETEST). */
692 struct ctables_pairwise
694 enum { PROP, MEAN } type;
697 bool meansvariance_allcats;
699 enum { BONFERRONI = 1, BH } adjust;
723 struct variable *var;
725 struct ctables_summary_spec_set specs[N_CSVS];
729 struct ctables_axis *subs[2];
732 struct msg_location *loc;
735 static void ctables_axis_destroy (struct ctables_axis *);
737 struct ctables_summary_spec
739 /* The calculation to be performed.
741 'function' is the function to calculate. 'weighted' specifies whether
742 to use weighted or unweighted data (for functions that do not support a
743 choice, it must be true). 'calc_area' is the area over which the
744 calculation takes place (for functions that target only an individual
745 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
746 percentile between 0 and 100 (for other functions it must be 0). */
747 enum ctables_summary_function function;
749 enum ctables_area_type calc_area;
750 double percentile; /* CTSF_PTILE only. */
752 /* How to display the result of the calculation.
754 'label' is a user-specified label, NULL if the user didn't specify
757 'user_area' is usually the same as 'calc_area', but when category labels
758 are rotated from one axis to another it swaps rows and columns.
760 'format' is the format for displaying the output. If
761 'is_ctables_format' is true, then 'format.type' is one of the special
762 CTEF_* formats instead of the standard ones. */
764 enum ctables_area_type user_area;
765 struct fmt_spec format;
766 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
773 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
774 const struct ctables_summary_spec *src)
777 dst->label = xstrdup_if_nonnull (src->label);
781 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
788 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
789 const struct ctables_summary_spec_set *src)
791 struct ctables_summary_spec *specs
792 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
793 for (size_t i = 0; i < src->n; i++)
794 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
796 *dst = (struct ctables_summary_spec_set) {
801 .is_scale = src->is_scale,
806 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
808 for (size_t i = 0; i < set->n; i++)
809 ctables_summary_spec_uninit (&set->specs[i]);
810 free (set->listwise_vars);
815 parse_col_width (struct lexer *lexer, const char *name, double *width)
817 lex_match (lexer, T_EQUALS);
818 if (lex_match_id (lexer, "DEFAULT"))
820 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
822 *width = lex_number (lexer);
832 parse_bool (struct lexer *lexer, bool *b)
834 if (lex_match_id (lexer, "NO"))
836 else if (lex_match_id (lexer, "YES"))
840 lex_error_expecting (lexer, "YES", "NO");
846 static enum ctables_function_availability
847 ctables_function_availability (enum ctables_summary_function f)
849 static enum ctables_function_availability availability[] = {
850 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
851 #include "ctables.inc"
855 return availability[f];
859 ctables_summary_function_is_count (enum ctables_summary_function f)
861 return f == CTSF_COUNT || f == CTSF_ECOUNT;
865 parse_ctables_summary_function (struct lexer *lexer,
866 enum ctables_summary_function *function,
868 enum ctables_area_type *area)
870 if (!lex_force_id (lexer))
873 struct substring name = lex_tokss (lexer);
874 *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'));
876 bool has_area = false;
878 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
879 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
884 if (ss_equals_case (name, ss_cstr ("PCT")))
886 /* Special case where .COUNT suffix is omitted. */
887 *function = CTSF_areaPCT_COUNT;
894 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
896 const struct ctables_function_info *cfi = &ctables_function_info[f];
897 if (ss_equals_case (cfi->basename, name))
900 if (!*weighted && !cfi->may_be_unweighted)
902 if (has_area != cfi->is_area)
910 lex_error (lexer, _("Expecting summary function name."));
915 ctables_axis_destroy (struct ctables_axis *axis)
923 for (size_t i = 0; i < N_CSVS; i++)
924 ctables_summary_spec_set_uninit (&axis->specs[i]);
929 ctables_axis_destroy (axis->subs[0]);
930 ctables_axis_destroy (axis->subs[1]);
933 msg_location_destroy (axis->loc);
937 static struct ctables_axis *
938 ctables_axis_new_nonterminal (enum ctables_axis_op op,
939 struct ctables_axis *sub0,
940 struct ctables_axis *sub1,
941 struct lexer *lexer, int start_ofs)
943 struct ctables_axis *axis = xmalloc (sizeof *axis);
944 *axis = (struct ctables_axis) {
946 .subs = { sub0, sub1 },
947 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
952 struct ctables_axis_parse_ctx
955 struct dictionary *dict;
957 struct ctables_table *t;
960 static struct fmt_spec
961 ctables_summary_default_format (enum ctables_summary_function function,
962 const struct variable *var)
964 static const enum ctables_format default_formats[] = {
965 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
966 #include "ctables.inc"
969 switch (default_formats[function])
972 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
975 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
978 return *var_get_print_format (var);
986 ctables_summary_label__ (const struct ctables_summary_spec *spec)
988 bool w = spec->weighted;
989 enum ctables_area_type a = spec->user_area;
990 switch (spec->function)
993 return w ? N_("Count") : N_("Unweighted Count");
996 return N_("Adjusted Count");
998 case CTSF_areaPCT_COUNT:
1001 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1002 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1003 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1004 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1005 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1006 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1007 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1011 case CTSF_areaPCT_VALIDN:
1014 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1015 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1016 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1017 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1018 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1019 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1020 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1024 case CTSF_areaPCT_TOTALN:
1027 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1028 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1029 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1030 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1031 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1032 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1033 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1037 case CTSF_MAXIMUM: return N_("Maximum");
1038 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1039 case CTSF_MEDIAN: return N_("Median");
1040 case CTSF_MINIMUM: return N_("Minimum");
1041 case CTSF_MISSING: return N_("Missing");
1042 case CTSF_MODE: return N_("Mode");
1043 case CTSF_PTILE: NOT_REACHED ();
1044 case CTSF_RANGE: return N_("Range");
1045 case CTSF_SEMEAN: return N_("Std Error of Mean");
1046 case CTSF_STDDEV: return N_("Std Deviation");
1047 case CTSF_SUM: return N_("Sum");
1048 case CTSF_TOTALN: return N_("Total N");
1049 case CTSF_ETOTALN: return N_("Adjusted Total N");
1050 case CTSF_VALIDN: return N_("Valid N");
1051 case CTSF_EVALIDN: return N_("Adjusted Valid N");
1052 case CTSF_VARIANCE: return N_("Variance");
1053 case CTSF_areaPCT_SUM:
1056 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1057 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1058 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1059 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1060 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1061 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1062 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1069 /* Don't bother translating these: they are for developers only. */
1070 case CTAT_TABLE: return "Table ID";
1071 case CTAT_LAYER: return "Layer ID";
1072 case CTAT_LAYERROW: return "Layer Row ID";
1073 case CTAT_LAYERCOL: return "Layer Column ID";
1074 case CTAT_SUBTABLE: return "Subtable ID";
1075 case CTAT_ROW: return "Row ID";
1076 case CTAT_COL: return "Column ID";
1084 static struct pivot_value *
1085 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1089 if (spec->function == CTSF_PTILE)
1091 double p = spec->percentile;
1092 char *s = (spec->weighted
1093 ? xasprintf (_("Percentile %.2f"), p)
1094 : xasprintf (_("Unweighted Percentile %.2f"), p));
1095 return pivot_value_new_user_text_nocopy (s);
1098 return pivot_value_new_text (ctables_summary_label__ (spec));
1102 struct substring in = ss_cstr (spec->label);
1103 struct substring target = ss_cstr (")CILEVEL");
1105 struct string out = DS_EMPTY_INITIALIZER;
1108 size_t chunk = ss_find_substring (in, target);
1109 ds_put_substring (&out, ss_head (in, chunk));
1110 ss_advance (&in, chunk);
1112 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1114 ss_advance (&in, target.length);
1115 ds_put_format (&out, "%g", cilevel);
1121 ctables_summary_function_name (enum ctables_summary_function function,
1123 enum ctables_area_type area,
1124 char *buffer, size_t bufsize)
1126 const struct ctables_function_info *cfi = &ctables_function_info[function];
1127 snprintf (buffer, bufsize, "%s%s%s",
1128 weighted ? "" : "U",
1129 cfi->is_area ? ctables_area_type_name[area] : "",
1130 cfi->basename.string);
1135 add_summary_spec (struct ctables_axis *axis,
1136 enum ctables_summary_function function, bool weighted,
1137 enum ctables_area_type area, double percentile,
1138 const char *label, const struct fmt_spec *format,
1139 bool is_ctables_format, const struct msg_location *loc,
1140 enum ctables_summary_variant sv)
1142 if (axis->op == CTAO_VAR)
1144 char function_name[128];
1145 ctables_summary_function_name (function, weighted, area,
1146 function_name, sizeof function_name);
1147 const char *var_name = var_get_name (axis->var);
1148 switch (ctables_function_availability (function))
1152 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1153 "response sets."), function_name);
1154 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1160 if (!axis->scale && sv != CSV_TOTAL)
1163 _("Summary function %s applies only to scale variables."),
1165 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1175 struct ctables_summary_spec_set *set = &axis->specs[sv];
1176 if (set->n >= set->allocated)
1177 set->specs = x2nrealloc (set->specs, &set->allocated,
1178 sizeof *set->specs);
1180 struct ctables_summary_spec *dst = &set->specs[set->n++];
1181 *dst = (struct ctables_summary_spec) {
1182 .function = function,
1183 .weighted = weighted,
1186 .percentile = percentile,
1187 .label = xstrdup_if_nonnull (label),
1188 .format = (format ? *format
1189 : ctables_summary_default_format (function, axis->var)),
1190 .is_ctables_format = is_ctables_format,
1196 for (size_t i = 0; i < 2; i++)
1197 if (!add_summary_spec (axis->subs[i], function, weighted, area,
1198 percentile, label, format, is_ctables_format,
1205 static struct ctables_axis *ctables_axis_parse_stack (
1206 struct ctables_axis_parse_ctx *);
1209 static struct ctables_axis *
1210 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1212 if (lex_match (ctx->lexer, T_LPAREN))
1214 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1215 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1217 ctables_axis_destroy (sub);
1223 if (!lex_force_id (ctx->lexer))
1226 int start_ofs = lex_ofs (ctx->lexer);
1227 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1231 struct ctables_axis *axis = xmalloc (sizeof *axis);
1232 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1234 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1235 : lex_match_phrase (ctx->lexer, "[C]") ? false
1236 : var_get_measure (var) == MEASURE_SCALE);
1237 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1238 lex_ofs (ctx->lexer) - 1);
1239 if (axis->scale && var_is_alpha (var))
1241 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1243 var_get_name (var));
1244 ctables_axis_destroy (axis);
1252 has_digit (const char *s)
1254 return s[strcspn (s, "0123456789")] != '\0';
1258 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1259 bool *is_ctables_format)
1261 char type[FMT_TYPE_LEN_MAX + 1];
1262 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1265 if (!strcasecmp (type, "NEGPAREN"))
1266 format->type = CTEF_NEGPAREN;
1267 else if (!strcasecmp (type, "NEQUAL"))
1268 format->type = CTEF_NEQUAL;
1269 else if (!strcasecmp (type, "PAREN"))
1270 format->type = CTEF_PAREN;
1271 else if (!strcasecmp (type, "PCTPAREN"))
1272 format->type = CTEF_PCTPAREN;
1275 *is_ctables_format = false;
1276 return (parse_format_specifier (lexer, format)
1277 && fmt_check_output (format)
1278 && fmt_check_type_compat (format, VAL_NUMERIC));
1284 lex_next_error (lexer, -1, -1,
1285 _("Output format %s requires width 2 or greater."), type);
1288 else if (format->d > format->w - 1)
1290 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1291 "greater than decimals."), type);
1296 *is_ctables_format = true;
1301 static struct ctables_axis *
1302 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1304 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1305 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1308 enum ctables_summary_variant sv = CSV_CELL;
1311 int start_ofs = lex_ofs (ctx->lexer);
1313 /* Parse function. */
1314 enum ctables_summary_function function;
1316 enum ctables_area_type area;
1317 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted,
1321 /* Parse percentile. */
1322 double percentile = 0;
1323 if (function == CTSF_PTILE)
1325 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1327 percentile = lex_number (ctx->lexer);
1328 lex_get (ctx->lexer);
1333 if (lex_is_string (ctx->lexer))
1335 label = ss_xstrdup (lex_tokss (ctx->lexer));
1336 lex_get (ctx->lexer);
1340 struct fmt_spec format;
1341 const struct fmt_spec *formatp;
1342 bool is_ctables_format = false;
1343 if (lex_token (ctx->lexer) == T_ID
1344 && has_digit (lex_tokcstr (ctx->lexer)))
1346 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1347 &is_ctables_format))
1357 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1358 lex_ofs (ctx->lexer) - 1);
1359 add_summary_spec (sub, function, weighted, area, percentile, label,
1360 formatp, is_ctables_format, loc, sv);
1362 msg_location_destroy (loc);
1364 lex_match (ctx->lexer, T_COMMA);
1365 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1367 if (!lex_force_match (ctx->lexer, T_LBRACK))
1371 else if (lex_match (ctx->lexer, T_RBRACK))
1373 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1380 ctables_axis_destroy (sub);
1384 static const struct ctables_axis *
1385 find_scale (const struct ctables_axis *axis)
1389 else if (axis->op == CTAO_VAR)
1390 return axis->scale ? axis : NULL;
1393 for (size_t i = 0; i < 2; i++)
1395 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1403 static const struct ctables_axis *
1404 find_categorical_summary_spec (const struct ctables_axis *axis)
1408 else if (axis->op == CTAO_VAR)
1409 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1412 for (size_t i = 0; i < 2; i++)
1414 const struct ctables_axis *sum
1415 = find_categorical_summary_spec (axis->subs[i]);
1423 static struct ctables_axis *
1424 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1426 int start_ofs = lex_ofs (ctx->lexer);
1427 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1431 while (lex_match (ctx->lexer, T_GT))
1433 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1436 ctables_axis_destroy (lhs);
1440 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1441 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1443 const struct ctables_axis *outer_scale = find_scale (lhs);
1444 const struct ctables_axis *inner_scale = find_scale (rhs);
1445 if (outer_scale && inner_scale)
1447 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1448 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1449 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1450 ctables_axis_destroy (nest);
1454 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1457 msg_at (SE, nest->loc,
1458 _("Summaries may only be requested for categorical variables "
1459 "at the innermost nesting level."));
1460 msg_at (SN, outer_sum->loc,
1461 _("This outer categorical variable has a summary."));
1462 ctables_axis_destroy (nest);
1472 static struct ctables_axis *
1473 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1475 int start_ofs = lex_ofs (ctx->lexer);
1476 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1480 while (lex_match (ctx->lexer, T_PLUS))
1482 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1485 ctables_axis_destroy (lhs);
1489 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1490 ctx->lexer, start_ofs);
1497 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1498 struct ctables *ct, struct ctables_table *t,
1499 enum pivot_axis_type a)
1501 if (lex_token (lexer) == T_BY
1502 || lex_token (lexer) == T_SLASH
1503 || lex_token (lexer) == T_ENDCMD)
1506 struct ctables_axis_parse_ctx ctx = {
1512 t->axes[a] = ctables_axis_parse_stack (&ctx);
1513 return t->axes[a] != NULL;
1517 ctables_chisq_destroy (struct ctables_chisq *chisq)
1523 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1529 ctables_table_destroy (struct ctables_table *t)
1534 for (size_t i = 0; i < t->n_sections; i++)
1535 ctables_section_uninit (&t->sections[i]);
1538 for (size_t i = 0; i < t->n_categories; i++)
1539 ctables_categories_unref (t->categories[i]);
1540 free (t->categories);
1542 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1544 ctables_axis_destroy (t->axes[a]);
1545 ctables_stack_uninit (&t->stacks[a]);
1547 free (t->summary_specs.specs);
1549 struct ctables_value *ctv, *next_ctv;
1550 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1551 &t->clabels_values_map)
1553 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1554 hmap_delete (&t->clabels_values_map, &ctv->node);
1557 hmap_destroy (&t->clabels_values_map);
1558 free (t->clabels_values);
1564 ctables_chisq_destroy (t->chisq);
1565 ctables_pairwise_destroy (t->pairwise);
1570 ctables_destroy (struct ctables *ct)
1575 struct ctables_postcompute *pc, *next_pc;
1576 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1580 msg_location_destroy (pc->location);
1581 ctables_pcexpr_destroy (pc->expr);
1585 ctables_summary_spec_set_uninit (pc->specs);
1588 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1591 hmap_destroy (&ct->postcomputes);
1593 fmt_settings_uninit (&ct->ctables_formats);
1594 pivot_table_look_unref (ct->look);
1598 for (size_t i = 0; i < ct->n_tables; i++)
1599 ctables_table_destroy (ct->tables[i]);
1604 static struct ctables_category
1605 cct_nrange (double low, double high)
1607 return (struct ctables_category) {
1609 .nrange = { low, high }
1613 static struct ctables_category
1614 cct_srange (struct substring low, struct substring high)
1616 return (struct ctables_category) {
1618 .srange = { low, high }
1623 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1624 struct ctables_category *cat)
1627 if (lex_match (lexer, T_EQUALS))
1629 if (!lex_force_string (lexer))
1632 total_label = ss_xstrdup (lex_tokss (lexer));
1636 total_label = xstrdup (_("Subtotal"));
1638 *cat = (struct ctables_category) {
1639 .type = CCT_SUBTOTAL,
1640 .hide_subcategories = hide_subcategories,
1641 .total_label = total_label
1646 static struct substring
1647 parse_substring (struct lexer *lexer, struct dictionary *dict)
1649 struct substring s = recode_substring_pool (
1650 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1651 ss_rtrim (&s, ss_cstr (" "));
1657 ctables_table_parse_explicit_category (struct lexer *lexer,
1658 struct dictionary *dict,
1660 struct ctables_category *cat)
1662 if (lex_match_id (lexer, "OTHERNM"))
1663 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1664 else if (lex_match_id (lexer, "MISSING"))
1665 *cat = (struct ctables_category) { .type = CCT_MISSING };
1666 else if (lex_match_id (lexer, "SUBTOTAL"))
1667 return ctables_table_parse_subtotal (lexer, false, cat);
1668 else if (lex_match_id (lexer, "HSUBTOTAL"))
1669 return ctables_table_parse_subtotal (lexer, true, cat);
1670 else if (lex_match_id (lexer, "LO"))
1672 if (!lex_force_match_id (lexer, "THRU"))
1674 if (lex_is_string (lexer))
1676 struct substring sr0 = { .string = NULL };
1677 struct substring sr1 = parse_substring (lexer, dict);
1678 *cat = cct_srange (sr0, sr1);
1680 else if (lex_force_num (lexer))
1682 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1688 else if (lex_is_number (lexer))
1690 double number = lex_number (lexer);
1692 if (lex_match_id (lexer, "THRU"))
1694 if (lex_match_id (lexer, "HI"))
1695 *cat = cct_nrange (number, DBL_MAX);
1698 if (!lex_force_num (lexer))
1700 *cat = cct_nrange (number, lex_number (lexer));
1705 *cat = (struct ctables_category) {
1710 else if (lex_is_string (lexer))
1712 struct substring s = parse_substring (lexer, dict);
1713 if (lex_match_id (lexer, "THRU"))
1715 if (lex_match_id (lexer, "HI"))
1717 struct substring sr1 = { .string = NULL };
1718 *cat = cct_srange (s, sr1);
1722 if (!lex_force_string (lexer))
1727 struct substring sr1 = parse_substring (lexer, dict);
1728 *cat = cct_srange (s, sr1);
1732 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1734 else if (lex_match (lexer, T_AND))
1736 if (!lex_force_id (lexer))
1738 struct ctables_postcompute *pc = ctables_find_postcompute (
1739 ct, lex_tokcstr (lexer));
1742 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1743 msg_at (SE, loc, _("Unknown postcompute &%s."),
1744 lex_tokcstr (lexer));
1745 msg_location_destroy (loc);
1750 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1754 lex_error (lexer, NULL);
1762 parse_category_string (struct msg_location *location,
1763 struct substring s, const struct dictionary *dict,
1764 enum fmt_type format, double *n)
1767 char *error = data_in (s, dict_get_encoding (dict), format,
1768 settings_get_fmt_settings (), &v, 0, NULL);
1771 msg_at (SE, location,
1772 _("Failed to parse category specification as format %s: %s."),
1773 fmt_name (format), error);
1782 static struct ctables_category *
1783 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1784 const struct ctables_pcexpr *e)
1786 struct ctables_category *best = NULL;
1787 size_t n_subtotals = 0;
1788 for (size_t i = 0; i < cats->n_cats; i++)
1790 struct ctables_category *cat = &cats->cats[i];
1793 case CTPO_CAT_NUMBER:
1794 if (cat->type == CCT_NUMBER && cat->number == e->number)
1798 case CTPO_CAT_STRING:
1799 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1803 case CTPO_CAT_NRANGE:
1804 if (cat->type == CCT_NRANGE
1805 && cat->nrange[0] == e->nrange[0]
1806 && cat->nrange[1] == e->nrange[1])
1810 case CTPO_CAT_SRANGE:
1811 if (cat->type == CCT_SRANGE
1812 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1813 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1817 case CTPO_CAT_MISSING:
1818 if (cat->type == CCT_MISSING)
1822 case CTPO_CAT_OTHERNM:
1823 if (cat->type == CCT_OTHERNM)
1827 case CTPO_CAT_SUBTOTAL:
1828 if (cat->type == CCT_SUBTOTAL)
1831 if (e->subtotal_index == n_subtotals)
1833 else if (e->subtotal_index == 0)
1838 case CTPO_CAT_TOTAL:
1839 if (cat->type == CCT_TOTAL)
1853 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1858 static struct ctables_category *
1859 ctables_find_category_for_postcompute (const struct dictionary *dict,
1860 const struct ctables_categories *cats,
1861 enum fmt_type parse_format,
1862 const struct ctables_pcexpr *e)
1864 if (parse_format != FMT_F)
1866 if (e->op == CTPO_CAT_STRING)
1869 if (!parse_category_string (e->location, e->string, dict,
1870 parse_format, &number))
1873 struct ctables_pcexpr e2 = {
1874 .op = CTPO_CAT_NUMBER,
1876 .location = e->location,
1878 return ctables_find_category_for_postcompute__ (cats, &e2);
1880 else if (e->op == CTPO_CAT_SRANGE)
1883 if (!e->srange[0].string)
1884 nrange[0] = -DBL_MAX;
1885 else if (!parse_category_string (e->location, e->srange[0], dict,
1886 parse_format, &nrange[0]))
1889 if (!e->srange[1].string)
1890 nrange[1] = DBL_MAX;
1891 else if (!parse_category_string (e->location, e->srange[1], dict,
1892 parse_format, &nrange[1]))
1895 struct ctables_pcexpr e2 = {
1896 .op = CTPO_CAT_NRANGE,
1897 .nrange = { nrange[0], nrange[1] },
1898 .location = e->location,
1900 return ctables_find_category_for_postcompute__ (cats, &e2);
1903 return ctables_find_category_for_postcompute__ (cats, e);
1907 ctables_recursive_check_postcompute (struct dictionary *dict,
1908 const struct ctables_pcexpr *e,
1909 struct ctables_category *pc_cat,
1910 const struct ctables_categories *cats,
1911 const struct msg_location *cats_location)
1915 case CTPO_CAT_NUMBER:
1916 case CTPO_CAT_STRING:
1917 case CTPO_CAT_NRANGE:
1918 case CTPO_CAT_SRANGE:
1919 case CTPO_CAT_MISSING:
1920 case CTPO_CAT_OTHERNM:
1921 case CTPO_CAT_SUBTOTAL:
1922 case CTPO_CAT_TOTAL:
1924 struct ctables_category *cat = ctables_find_category_for_postcompute (
1925 dict, cats, pc_cat->parse_format, e);
1928 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1930 size_t n_subtotals = 0;
1931 for (size_t i = 0; i < cats->n_cats; i++)
1932 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1933 if (n_subtotals > 1)
1935 msg_at (SE, cats_location,
1936 ngettext ("These categories include %zu instance "
1937 "of SUBTOTAL or HSUBTOTAL, so references "
1938 "from computed categories must refer to "
1939 "subtotals by position, "
1940 "e.g. SUBTOTAL[1].",
1941 "These categories include %zu instances "
1942 "of SUBTOTAL or HSUBTOTAL, so references "
1943 "from computed categories must refer to "
1944 "subtotals by position, "
1945 "e.g. SUBTOTAL[1].",
1948 msg_at (SN, e->location,
1949 _("This is the reference that lacks a position."));
1954 msg_at (SE, pc_cat->location,
1955 _("Computed category &%s references a category not included "
1956 "in the category list."),
1958 msg_at (SN, e->location, _("This is the missing category."));
1959 if (e->op == CTPO_CAT_SUBTOTAL)
1960 msg_at (SN, cats_location,
1961 _("To fix the problem, add subtotals to the "
1962 "list of categories here."));
1963 else if (e->op == CTPO_CAT_TOTAL)
1964 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1965 "CATEGORIES specification."));
1967 msg_at (SN, cats_location,
1968 _("To fix the problem, add the missing category to the "
1969 "list of categories here."));
1972 if (pc_cat->pc->hide_source_cats)
1986 for (size_t i = 0; i < 2; i++)
1987 if (e->subs[i] && !ctables_recursive_check_postcompute (
1988 dict, e->subs[i], pc_cat, cats, cats_location))
1997 all_strings (struct variable **vars, size_t n_vars,
1998 const struct ctables_category *cat)
2000 for (size_t j = 0; j < n_vars; j++)
2001 if (var_is_numeric (vars[j]))
2003 msg_at (SE, cat->location,
2004 _("This category specification may be applied only to string "
2005 "variables, but this subcommand tries to apply it to "
2006 "numeric variable %s."),
2007 var_get_name (vars[j]));
2014 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2015 struct ctables *ct, struct ctables_table *t)
2017 if (!lex_match_id (lexer, "VARIABLES"))
2019 lex_match (lexer, T_EQUALS);
2021 struct variable **vars;
2023 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2026 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2027 for (size_t i = 1; i < n_vars; i++)
2029 const struct fmt_spec *f = var_get_print_format (vars[i]);
2030 if (f->type != common_format->type)
2032 common_format = NULL;
2038 && (fmt_get_category (common_format->type)
2039 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2041 struct ctables_categories *c = xmalloc (sizeof *c);
2042 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2043 for (size_t i = 0; i < n_vars; i++)
2045 struct ctables_categories **cp
2046 = &t->categories[var_get_dict_index (vars[i])];
2047 ctables_categories_unref (*cp);
2051 size_t allocated_cats = 0;
2052 int cats_start_ofs = -1;
2053 int cats_end_ofs = -1;
2054 if (lex_match (lexer, T_LBRACK))
2056 cats_start_ofs = lex_ofs (lexer);
2059 if (c->n_cats >= allocated_cats)
2060 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2062 int start_ofs = lex_ofs (lexer);
2063 struct ctables_category *cat = &c->cats[c->n_cats];
2064 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2066 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2069 lex_match (lexer, T_COMMA);
2071 while (!lex_match (lexer, T_RBRACK));
2072 cats_end_ofs = lex_ofs (lexer) - 1;
2075 struct ctables_category cat = {
2077 .include_missing = false,
2078 .sort_ascending = true,
2080 bool show_totals = false;
2081 char *total_label = NULL;
2082 bool totals_before = false;
2083 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2085 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2087 lex_match (lexer, T_EQUALS);
2088 if (lex_match_id (lexer, "A"))
2089 cat.sort_ascending = true;
2090 else if (lex_match_id (lexer, "D"))
2091 cat.sort_ascending = false;
2094 lex_error_expecting (lexer, "A", "D");
2098 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2100 lex_match (lexer, T_EQUALS);
2101 if (lex_match_id (lexer, "VALUE"))
2102 cat.type = CCT_VALUE;
2103 else if (lex_match_id (lexer, "LABEL"))
2104 cat.type = CCT_LABEL;
2107 cat.type = CCT_FUNCTION;
2108 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2109 &cat.weighted, &cat.area))
2112 if (lex_match (lexer, T_LPAREN))
2114 cat.sort_var = parse_variable (lexer, dict);
2118 if (cat.sort_function == CTSF_PTILE)
2120 lex_match (lexer, T_COMMA);
2121 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2123 cat.percentile = lex_number (lexer);
2127 if (!lex_force_match (lexer, T_RPAREN))
2130 else if (ctables_function_availability (cat.sort_function)
2133 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2138 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2140 lex_match (lexer, T_EQUALS);
2141 if (lex_match_id (lexer, "INCLUDE"))
2142 cat.include_missing = true;
2143 else if (lex_match_id (lexer, "EXCLUDE"))
2144 cat.include_missing = false;
2147 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2151 else if (lex_match_id (lexer, "TOTAL"))
2153 lex_match (lexer, T_EQUALS);
2154 if (!parse_bool (lexer, &show_totals))
2157 else if (lex_match_id (lexer, "LABEL"))
2159 lex_match (lexer, T_EQUALS);
2160 if (!lex_force_string (lexer))
2163 total_label = ss_xstrdup (lex_tokss (lexer));
2166 else if (lex_match_id (lexer, "POSITION"))
2168 lex_match (lexer, T_EQUALS);
2169 if (lex_match_id (lexer, "BEFORE"))
2170 totals_before = true;
2171 else if (lex_match_id (lexer, "AFTER"))
2172 totals_before = false;
2175 lex_error_expecting (lexer, "BEFORE", "AFTER");
2179 else if (lex_match_id (lexer, "EMPTY"))
2181 lex_match (lexer, T_EQUALS);
2182 if (lex_match_id (lexer, "INCLUDE"))
2183 c->show_empty = true;
2184 else if (lex_match_id (lexer, "EXCLUDE"))
2185 c->show_empty = false;
2188 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2195 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2196 "TOTAL", "LABEL", "POSITION", "EMPTY");
2198 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2205 if (c->n_cats >= allocated_cats)
2206 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2207 c->cats[c->n_cats++] = cat;
2212 if (c->n_cats >= allocated_cats)
2213 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2215 struct ctables_category *totals;
2218 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2219 totals = &c->cats[0];
2222 totals = &c->cats[c->n_cats];
2225 *totals = (struct ctables_category) {
2227 .total_label = total_label ? total_label : xstrdup (_("Total")),
2231 struct ctables_category *subtotal = NULL;
2232 for (size_t i = totals_before ? 0 : c->n_cats;
2233 totals_before ? i < c->n_cats : i-- > 0;
2234 totals_before ? i++ : 0)
2236 struct ctables_category *cat = &c->cats[i];
2245 cat->subtotal = subtotal;
2248 case CCT_POSTCOMPUTE:
2259 case CCT_EXCLUDED_MISSING:
2264 if (cats_start_ofs != -1)
2266 for (size_t i = 0; i < c->n_cats; i++)
2268 struct ctables_category *cat = &c->cats[i];
2271 case CCT_POSTCOMPUTE:
2272 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2273 struct msg_location *cats_location
2274 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2275 bool ok = ctables_recursive_check_postcompute (
2276 dict, cat->pc->expr, cat, c, cats_location);
2277 msg_location_destroy (cats_location);
2284 for (size_t j = 0; j < n_vars; j++)
2285 if (var_is_alpha (vars[j]))
2287 msg_at (SE, cat->location,
2288 _("This category specification may be applied "
2289 "only to numeric variables, but this "
2290 "subcommand tries to apply it to string "
2292 var_get_name (vars[j]));
2301 if (!parse_category_string (cat->location, cat->string, dict,
2302 common_format->type, &n))
2305 ss_dealloc (&cat->string);
2307 cat->type = CCT_NUMBER;
2310 else if (!all_strings (vars, n_vars, cat))
2319 if (!cat->srange[0].string)
2321 else if (!parse_category_string (cat->location,
2322 cat->srange[0], dict,
2323 common_format->type, &n[0]))
2326 if (!cat->srange[1].string)
2328 else if (!parse_category_string (cat->location,
2329 cat->srange[1], dict,
2330 common_format->type, &n[1]))
2333 ss_dealloc (&cat->srange[0]);
2334 ss_dealloc (&cat->srange[1]);
2336 cat->type = CCT_NRANGE;
2337 cat->nrange[0] = n[0];
2338 cat->nrange[1] = n[1];
2340 else if (!all_strings (vars, n_vars, cat))
2351 case CCT_EXCLUDED_MISSING:
2366 ctables_nest_uninit (struct ctables_nest *nest)
2369 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2370 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2371 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2372 free (nest->areas[at]);
2376 ctables_stack_uninit (struct ctables_stack *stack)
2380 for (size_t i = 0; i < stack->n; i++)
2381 ctables_nest_uninit (&stack->nests[i]);
2382 free (stack->nests);
2386 static struct ctables_stack
2387 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2394 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2395 for (size_t i = 0; i < s0.n; i++)
2396 for (size_t j = 0; j < s1.n; j++)
2398 const struct ctables_nest *a = &s0.nests[i];
2399 const struct ctables_nest *b = &s1.nests[j];
2401 size_t allocate = a->n + b->n;
2402 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2404 for (size_t k = 0; k < a->n; k++)
2405 vars[n++] = a->vars[k];
2406 for (size_t k = 0; k < b->n; k++)
2407 vars[n++] = b->vars[k];
2408 assert (n == allocate);
2410 const struct ctables_nest *summary_src;
2411 if (!a->specs[CSV_CELL].var)
2413 else if (!b->specs[CSV_CELL].var)
2418 struct ctables_nest *new = &stack.nests[stack.n++];
2419 *new = (struct ctables_nest) {
2421 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2422 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2426 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2427 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2429 ctables_stack_uninit (&s0);
2430 ctables_stack_uninit (&s1);
2434 static struct ctables_stack
2435 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2437 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2438 for (size_t i = 0; i < s0.n; i++)
2439 stack.nests[stack.n++] = s0.nests[i];
2440 for (size_t i = 0; i < s1.n; i++)
2442 stack.nests[stack.n] = s1.nests[i];
2443 stack.nests[stack.n].group_head += s0.n;
2446 assert (stack.n == s0.n + s1.n);
2452 static struct ctables_stack
2453 var_fts (const struct ctables_axis *a)
2455 struct variable **vars = xmalloc (sizeof *vars);
2458 struct ctables_nest *nest = xmalloc (sizeof *nest);
2459 *nest = (struct ctables_nest) {
2462 .scale_idx = a->scale ? 0 : SIZE_MAX,
2464 if (a->specs[CSV_CELL].n || a->scale)
2465 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2467 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2468 nest->specs[sv].var = a->var;
2469 nest->specs[sv].is_scale = a->scale;
2471 return (struct ctables_stack) { .nests = nest, .n = 1 };
2474 static struct ctables_stack
2475 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2478 return (struct ctables_stack) { .n = 0 };
2486 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2487 enumerate_fts (axis_type, a->subs[1]));
2490 /* This should consider any of the scale variables found in the result to
2491 be linked to each other listwise for SMISSING=LISTWISE. */
2492 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2493 enumerate_fts (axis_type, a->subs[1]));
2499 union ctables_summary
2501 /* COUNT, VALIDN, TOTALN. */
2504 /* MINIMUM, MAXIMUM, RANGE. */
2511 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2512 struct moments1 *moments;
2514 /* MEDIAN, MODE, PTILE. */
2517 struct casewriter *writer;
2522 /* XXX multiple response */
2526 ctables_summary_init (union ctables_summary *s,
2527 const struct ctables_summary_spec *ss)
2529 switch (ss->function)
2533 case CTSF_areaPCT_COUNT:
2534 case CTSF_areaPCT_VALIDN:
2535 case CTSF_areaPCT_TOTALN:
2550 s->min = s->max = SYSMIS;
2555 case CTSF_areaPCT_SUM:
2556 s->moments = moments1_create (MOMENT_MEAN);
2562 s->moments = moments1_create (MOMENT_VARIANCE);
2569 struct caseproto *proto = caseproto_create ();
2570 proto = caseproto_add_width (proto, 0);
2571 proto = caseproto_add_width (proto, 0);
2573 struct subcase ordering;
2574 subcase_init (&ordering, 0, 0, SC_ASCEND);
2575 s->writer = sort_create_writer (&ordering, proto);
2576 subcase_uninit (&ordering);
2577 caseproto_unref (proto);
2587 ctables_summary_uninit (union ctables_summary *s,
2588 const struct ctables_summary_spec *ss)
2590 switch (ss->function)
2594 case CTSF_areaPCT_COUNT:
2595 case CTSF_areaPCT_VALIDN:
2596 case CTSF_areaPCT_TOTALN:
2617 case CTSF_areaPCT_SUM:
2618 moments1_destroy (s->moments);
2624 casewriter_destroy (s->writer);
2630 ctables_summary_add (union ctables_summary *s,
2631 const struct ctables_summary_spec *ss,
2632 const struct variable *var, const union value *value,
2633 bool is_scale, bool is_scale_missing,
2634 bool is_missing, bool excluded_missing,
2635 double d_weight, double e_weight)
2637 /* To determine whether a case is included in a given table for a particular
2638 kind of summary, consider the following charts for each variable in the
2639 table. Only if "yes" appears for every variable for the summary is the
2642 Categorical variables: VALIDN COUNT TOTALN
2643 Valid values in included categories yes yes yes
2644 Missing values in included categories --- yes yes
2645 Missing values in excluded categories --- --- yes
2646 Valid values in excluded categories --- --- ---
2648 Scale variables: VALIDN COUNT TOTALN
2649 Valid value yes yes yes
2650 Missing value --- yes yes
2652 Missing values include both user- and system-missing. (The system-missing
2653 value is always in an excluded category.)
2655 switch (ss->function)
2658 s->count += ss->weighted ? d_weight : 1.0;
2661 case CTSF_areaPCT_TOTALN:
2662 s->count += ss->weighted ? e_weight : 1.0;
2666 if (is_scale || !excluded_missing)
2667 s->count += ss->weighted ? d_weight : 1.0;
2670 case CTSF_areaPCT_COUNT:
2671 if (is_scale || !excluded_missing)
2672 s->count += ss->weighted ? e_weight : 1.0;
2679 s->count += ss->weighted ? d_weight : 1.0;
2682 case CTSF_areaPCT_VALIDN:
2686 s->count += ss->weighted ? e_weight : 1.0;
2696 s->count += ss->weighted ? e_weight : 1.0;
2700 if (is_scale || !excluded_missing)
2701 s->count += e_weight;
2708 s->count += e_weight;
2712 s->count += e_weight;
2718 if (!is_scale_missing)
2720 assert (!var_is_alpha (var)); /* XXX? */
2721 if (s->min == SYSMIS || value->f < s->min)
2723 if (s->max == SYSMIS || value->f > s->max)
2733 if (!is_scale_missing)
2734 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2737 case CTSF_areaPCT_SUM:
2738 if (!is_missing && !is_scale_missing)
2739 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2745 if (!is_scale_missing)
2747 double w = ss->weighted ? e_weight : 1.0;
2750 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2751 *case_num_rw_idx (c, 0) = value->f;
2752 *case_num_rw_idx (c, 1) = w;
2753 casewriter_write (s->writer, c);
2760 ctables_summary_value (const struct ctables_cell *cell,
2761 union ctables_summary *s,
2762 const struct ctables_summary_spec *ss)
2764 switch (ss->function)
2771 return cell->areas[ss->calc_area]->sequence;
2773 case CTSF_areaPCT_COUNT:
2775 const struct ctables_area *a = cell->areas[ss->calc_area];
2776 double a_count = ss->weighted ? a->e_count : a->u_count;
2777 return a_count ? s->count / a_count * 100 : SYSMIS;
2780 case CTSF_areaPCT_VALIDN:
2782 const struct ctables_area *a = cell->areas[ss->calc_area];
2783 double a_valid = ss->weighted ? a->e_valid : a->u_valid;
2784 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2787 case CTSF_areaPCT_TOTALN:
2789 const struct ctables_area *a = cell->areas[ss->calc_area];
2790 double a_total = ss->weighted ? a->e_total : a->u_total;
2791 return a_total ? s->count / a_total * 100 : SYSMIS;
2808 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2813 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2819 double weight, variance;
2820 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2821 return calc_semean (variance, weight);
2827 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2828 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2833 double weight, mean;
2834 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2835 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2841 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2845 case CTSF_areaPCT_SUM:
2847 double weight, mean;
2848 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2849 if (weight == SYSMIS || mean == SYSMIS)
2852 const struct ctables_area *a = cell->areas[ss->calc_area];
2853 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2854 double denom = ss->weighted ? sum->e_sum : sum->u_sum;
2855 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2862 struct casereader *reader = casewriter_make_reader (s->writer);
2865 struct percentile *ptile = percentile_create (
2866 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2867 struct order_stats *os = &ptile->parent;
2868 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2869 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2870 statistic_destroy (&ptile->parent.parent);
2877 struct casereader *reader = casewriter_make_reader (s->writer);
2880 struct mode *mode = mode_create ();
2881 struct order_stats *os = &mode->parent;
2882 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2883 s->ovalue = mode->mode;
2884 statistic_destroy (&mode->parent.parent);
2892 struct ctables_cell_sort_aux
2894 const struct ctables_nest *nest;
2895 enum pivot_axis_type a;
2899 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2901 const struct ctables_cell_sort_aux *aux = aux_;
2902 struct ctables_cell *const *ap = a_;
2903 struct ctables_cell *const *bp = b_;
2904 const struct ctables_cell *a = *ap;
2905 const struct ctables_cell *b = *bp;
2907 const struct ctables_nest *nest = aux->nest;
2908 for (size_t i = 0; i < nest->n; i++)
2909 if (i != nest->scale_idx)
2911 const struct variable *var = nest->vars[i];
2912 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2913 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2914 if (a_cv->category != b_cv->category)
2915 return a_cv->category > b_cv->category ? 1 : -1;
2917 const union value *a_val = &a_cv->value;
2918 const union value *b_val = &b_cv->value;
2919 switch (a_cv->category->type)
2925 case CCT_POSTCOMPUTE:
2926 case CCT_EXCLUDED_MISSING:
2927 /* Must be equal. */
2935 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2943 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2945 return a_cv->category->sort_ascending ? cmp : -cmp;
2951 const char *a_label = var_lookup_value_label (var, a_val);
2952 const char *b_label = var_lookup_value_label (var, b_val);
2958 cmp = strcmp (a_label, b_label);
2964 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2967 return a_cv->category->sort_ascending ? cmp : -cmp;
2979 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2980 const void *aux UNUSED)
2982 struct ctables_cell *const *ap = a_;
2983 struct ctables_cell *const *bp = b_;
2984 const struct ctables_cell *a = *ap;
2985 const struct ctables_cell *b = *bp;
2987 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2989 int al = a->axes[axis].leaf;
2990 int bl = b->axes[axis].leaf;
2992 return al > bl ? 1 : -1;
3000 For each ctables_table:
3001 For each combination of row vars:
3002 For each combination of column vars:
3003 For each combination of layer vars:
3005 Make a table of row values:
3006 Sort entries by row values
3007 Assign a 0-based index to each actual value
3008 Construct a dimension
3009 Make a table of column values
3010 Make a table of layer values
3012 Fill the table entry using the indexes from before.
3015 static struct ctables_area *
3016 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3017 enum ctables_area_type area)
3020 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3022 const struct ctables_nest *nest = s->nests[a];
3023 for (size_t i = 0; i < nest->n_areas[area]; i++)
3025 size_t v_idx = nest->areas[area][i];
3026 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3027 hash = hash_pointer (cv->category, hash);
3028 if (cv->category->type != CCT_TOTAL
3029 && cv->category->type != CCT_SUBTOTAL
3030 && cv->category->type != CCT_POSTCOMPUTE)
3031 hash = value_hash (&cv->value,
3032 var_get_width (nest->vars[v_idx]), hash);
3036 struct ctables_area *a;
3037 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3039 const struct ctables_cell *df = a->example;
3040 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3042 const struct ctables_nest *nest = s->nests[a];
3043 for (size_t i = 0; i < nest->n_areas[area]; i++)
3045 size_t v_idx = nest->areas[area][i];
3046 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3047 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3048 if (cv1->category != cv2->category
3049 || (cv1->category->type != CCT_TOTAL
3050 && cv1->category->type != CCT_SUBTOTAL
3051 && cv1->category->type != CCT_POSTCOMPUTE
3052 && !value_equal (&cv1->value, &cv2->value,
3053 var_get_width (nest->vars[v_idx]))))
3062 struct ctables_sum *sums = (s->table->n_sum_vars
3063 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3066 a = xmalloc (sizeof *a);
3067 *a = (struct ctables_area) { .example = cell, .sums = sums };
3068 hmap_insert (&s->areas[area], &a->node, hash);
3072 static struct substring
3073 rtrim_value (const union value *v, const struct variable *var)
3075 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3076 var_get_width (var));
3077 ss_rtrim (&s, ss_cstr (" "));
3082 in_string_range (const union value *v, const struct variable *var,
3083 const struct substring *srange)
3085 struct substring s = rtrim_value (v, var);
3086 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3087 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3090 static const struct ctables_category *
3091 ctables_categories_match (const struct ctables_categories *c,
3092 const union value *v, const struct variable *var)
3094 if (var_is_numeric (var) && v->f == SYSMIS)
3097 const struct ctables_category *othernm = NULL;
3098 for (size_t i = c->n_cats; i-- > 0; )
3100 const struct ctables_category *cat = &c->cats[i];
3104 if (cat->number == v->f)
3109 if (ss_equals (cat->string, rtrim_value (v, var)))
3114 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3115 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3120 if (in_string_range (v, var, cat->srange))
3125 if (var_is_value_missing (var, v))
3129 case CCT_POSTCOMPUTE:
3144 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3147 case CCT_EXCLUDED_MISSING:
3152 return var_is_value_missing (var, v) ? NULL : othernm;
3155 static const struct ctables_category *
3156 ctables_categories_total (const struct ctables_categories *c)
3158 const struct ctables_category *first = &c->cats[0];
3159 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3160 return (first->type == CCT_TOTAL ? first
3161 : last->type == CCT_TOTAL ? last
3165 static struct ctables_cell *
3166 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3167 const struct ctables_category *cats[PIVOT_N_AXES][10])
3170 enum ctables_summary_variant sv = CSV_CELL;
3171 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3173 const struct ctables_nest *nest = s->nests[a];
3174 for (size_t i = 0; i < nest->n; i++)
3175 if (i != nest->scale_idx)
3177 hash = hash_pointer (cats[a][i], hash);
3178 if (cats[a][i]->type != CCT_TOTAL
3179 && cats[a][i]->type != CCT_SUBTOTAL
3180 && cats[a][i]->type != CCT_POSTCOMPUTE)
3181 hash = value_hash (case_data (c, nest->vars[i]),
3182 var_get_width (nest->vars[i]), hash);
3188 struct ctables_cell *cell;
3189 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3191 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3193 const struct ctables_nest *nest = s->nests[a];
3194 for (size_t i = 0; i < nest->n; i++)
3195 if (i != nest->scale_idx
3196 && (cats[a][i] != cell->axes[a].cvs[i].category
3197 || (cats[a][i]->type != CCT_TOTAL
3198 && cats[a][i]->type != CCT_SUBTOTAL
3199 && cats[a][i]->type != CCT_POSTCOMPUTE
3200 && !value_equal (case_data (c, nest->vars[i]),
3201 &cell->axes[a].cvs[i].value,
3202 var_get_width (nest->vars[i])))))
3211 cell = xmalloc (sizeof *cell);
3214 cell->omit_areas = 0;
3215 cell->postcompute = false;
3216 //struct string name = DS_EMPTY_INITIALIZER;
3217 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3219 const struct ctables_nest *nest = s->nests[a];
3220 cell->axes[a].cvs = (nest->n
3221 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3223 for (size_t i = 0; i < nest->n; i++)
3225 const struct ctables_category *cat = cats[a][i];
3226 const struct variable *var = nest->vars[i];
3227 const union value *value = case_data (c, var);
3228 if (i != nest->scale_idx)
3230 const struct ctables_category *subtotal = cat->subtotal;
3231 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3234 if (cat->type == CCT_TOTAL
3235 || cat->type == CCT_SUBTOTAL
3236 || cat->type == CCT_POSTCOMPUTE)
3238 /* XXX these should be more encompassing I think.*/
3242 case PIVOT_AXIS_COLUMN:
3243 cell->omit_areas |= ((1u << CTAT_TABLE) |
3244 (1u << CTAT_LAYER) |
3245 (1u << CTAT_LAYERCOL) |
3246 (1u << CTAT_SUBTABLE) |
3249 case PIVOT_AXIS_ROW:
3250 cell->omit_areas |= ((1u << CTAT_TABLE) |
3251 (1u << CTAT_LAYER) |
3252 (1u << CTAT_LAYERROW) |
3253 (1u << CTAT_SUBTABLE) |
3256 case PIVOT_AXIS_LAYER:
3257 cell->omit_areas |= ((1u << CTAT_TABLE) |
3258 (1u << CTAT_LAYER));
3262 if (cat->type == CCT_POSTCOMPUTE)
3263 cell->postcompute = true;
3266 cell->axes[a].cvs[i].category = cat;
3267 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3270 if (i != nest->scale_idx)
3272 if (!ds_is_empty (&name))
3273 ds_put_cstr (&name, ", ");
3274 char *value_s = data_out (value, var_get_encoding (var),
3275 var_get_print_format (var),
3276 settings_get_fmt_settings ());
3277 if (cat->type == CCT_TOTAL
3278 || cat->type == CCT_SUBTOTAL
3279 || cat->type == CCT_POSTCOMPUTE)
3280 ds_put_format (&name, "%s=total", var_get_name (var));
3282 ds_put_format (&name, "%s=%s", var_get_name (var),
3283 value_s + strspn (value_s, " "));
3289 //cell->name = ds_steal_cstr (&name);
3291 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3292 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3293 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3294 for (size_t i = 0; i < specs->n; i++)
3295 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3296 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3297 cell->areas[at] = ctables_area_insert (s, cell, at);
3298 hmap_insert (&s->cells, &cell->node, hash);
3303 is_scale_missing (const struct ctables_summary_spec_set *specs,
3304 const struct ccase *c)
3306 if (!specs->is_scale)
3309 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3312 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3314 const struct variable *var = specs->listwise_vars[i];
3315 if (var_is_num_missing (var, case_num (c, var)))
3323 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3324 const struct ctables_category *cats[PIVOT_N_AXES][10],
3325 bool is_missing, bool excluded_missing,
3326 double d_weight, double e_weight)
3328 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3329 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3331 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3333 bool scale_missing = is_scale_missing (specs, c);
3334 for (size_t i = 0; i < specs->n; i++)
3335 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3336 specs->var, case_data (c, specs->var), specs->is_scale,
3337 scale_missing, is_missing, excluded_missing,
3338 d_weight, e_weight);
3339 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3340 if (!(cell->omit_areas && (1u << at)))
3342 struct ctables_area *a = cell->areas[at];
3343 a->d_total += d_weight;
3344 a->e_total += e_weight;
3346 if (!excluded_missing)
3348 a->d_count += d_weight;
3349 a->e_count += e_weight;
3354 a->d_valid += d_weight;
3355 a->e_valid += e_weight;
3358 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3360 /* XXX listwise_missing??? */
3361 const struct variable *var = s->table->sum_vars[i];
3362 double addend = case_num (c, var);
3363 if (!var_is_num_missing (var, addend))
3365 struct ctables_sum *sum = &a->sums[i];
3366 sum->e_sum += addend * e_weight;
3367 sum->u_sum += addend;
3375 recurse_totals (struct ctables_section *s, const struct ccase *c,
3376 const struct ctables_category *cats[PIVOT_N_AXES][10],
3377 bool is_missing, bool excluded_missing,
3378 double d_weight, double e_weight,
3379 enum pivot_axis_type start_axis, size_t start_nest)
3381 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3383 const struct ctables_nest *nest = s->nests[a];
3384 for (size_t i = start_nest; i < nest->n; i++)
3386 if (i == nest->scale_idx)
3389 const struct variable *var = nest->vars[i];
3391 const struct ctables_category *total = ctables_categories_total (
3392 s->table->categories[var_get_dict_index (var)]);
3395 const struct ctables_category *save = cats[a][i];
3397 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3398 d_weight, e_weight);
3399 recurse_totals (s, c, cats, is_missing, excluded_missing,
3400 d_weight, e_weight, a, i + 1);
3409 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3410 const struct ctables_category *cats[PIVOT_N_AXES][10],
3411 bool is_missing, bool excluded_missing,
3412 double d_weight, double e_weight,
3413 enum pivot_axis_type start_axis, size_t start_nest)
3415 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3417 const struct ctables_nest *nest = s->nests[a];
3418 for (size_t i = start_nest; i < nest->n; i++)
3420 if (i == nest->scale_idx)
3423 const struct ctables_category *save = cats[a][i];
3426 cats[a][i] = save->subtotal;
3427 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3428 d_weight, e_weight);
3429 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3430 d_weight, e_weight, a, i + 1);
3439 ctables_add_occurrence (const struct variable *var,
3440 const union value *value,
3441 struct hmap *occurrences)
3443 int width = var_get_width (var);
3444 unsigned int hash = value_hash (value, width, 0);
3446 struct ctables_occurrence *o;
3447 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3449 if (value_equal (value, &o->value, width))
3452 o = xmalloc (sizeof *o);
3453 value_clone (&o->value, value, width);
3454 hmap_insert (occurrences, &o->node, hash);
3458 ctables_cell_insert (struct ctables_section *s,
3459 const struct ccase *c,
3460 double d_weight, double e_weight)
3462 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3464 /* Does at least one categorical variable have a missing value in an included
3465 or excluded category? */
3466 bool is_missing = false;
3468 /* Does at least one categorical variable have a missing value in an excluded
3470 bool excluded_missing = false;
3472 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3474 const struct ctables_nest *nest = s->nests[a];
3475 for (size_t i = 0; i < nest->n; i++)
3477 if (i == nest->scale_idx)
3480 const struct variable *var = nest->vars[i];
3481 const union value *value = case_data (c, var);
3483 bool var_missing = var_is_value_missing (var, value) != 0;
3487 cats[a][i] = ctables_categories_match (
3488 s->table->categories[var_get_dict_index (var)], value, var);
3494 static const struct ctables_category cct_excluded_missing = {
3495 .type = CCT_EXCLUDED_MISSING,
3498 cats[a][i] = &cct_excluded_missing;
3499 excluded_missing = true;
3504 if (!excluded_missing)
3505 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3507 const struct ctables_nest *nest = s->nests[a];
3508 for (size_t i = 0; i < nest->n; i++)
3509 if (i != nest->scale_idx)
3511 const struct variable *var = nest->vars[i];
3512 const union value *value = case_data (c, var);
3513 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3517 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3518 d_weight, e_weight);
3520 //if (!excluded_missing)
3522 recurse_totals (s, c, cats, is_missing, excluded_missing,
3523 d_weight, e_weight, 0, 0);
3524 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3525 d_weight, e_weight, 0, 0);
3531 const struct ctables_summary_spec_set *set;
3536 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3538 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3539 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3540 if (as->function != bs->function)
3541 return as->function > bs->function ? 1 : -1;
3542 else if (as->weighted != bs->weighted)
3543 return as->weighted > bs->weighted ? 1 : -1;
3544 else if (as->calc_area != bs->calc_area)
3545 return as->calc_area > bs->calc_area ? 1 : -1;
3546 else if (as->percentile != bs->percentile)
3547 return as->percentile < bs->percentile ? 1 : -1;
3549 const char *as_label = as->label ? as->label : "";
3550 const char *bs_label = bs->label ? bs->label : "";
3551 return strcmp (as_label, bs_label);
3555 ctables_category_format_number (double number, const struct variable *var,
3558 struct pivot_value *pv = pivot_value_new_var_value (
3559 var, &(union value) { .f = number });
3560 pivot_value_format (pv, NULL, s);
3561 pivot_value_destroy (pv);
3565 ctables_category_format_string (struct substring string,
3566 const struct variable *var, struct string *out)
3568 int width = var_get_width (var);
3569 char *s = xmalloc (width);
3570 buf_copy_rpad (s, width, string.string, string.length, ' ');
3571 struct pivot_value *pv = pivot_value_new_var_value (
3572 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3573 pivot_value_format (pv, NULL, out);
3574 pivot_value_destroy (pv);
3579 ctables_category_format_label (const struct ctables_category *cat,
3580 const struct variable *var,
3586 ctables_category_format_number (cat->number, var, s);
3590 ctables_category_format_string (cat->string, var, s);
3594 ctables_category_format_number (cat->nrange[0], var, s);
3595 ds_put_format (s, " THRU ");
3596 ctables_category_format_number (cat->nrange[1], var, s);
3600 ctables_category_format_string (cat->srange[0], var, s);
3601 ds_put_format (s, " THRU ");
3602 ctables_category_format_string (cat->srange[1], var, s);
3606 ds_put_cstr (s, "MISSING");
3610 ds_put_cstr (s, "OTHERNM");
3613 case CCT_POSTCOMPUTE:
3614 ds_put_format (s, "&%s", cat->pc->name);
3619 ds_put_cstr (s, cat->total_label);
3625 case CCT_EXCLUDED_MISSING:
3632 static struct pivot_value *
3633 ctables_postcompute_label (const struct ctables_categories *cats,
3634 const struct ctables_category *cat,
3635 const struct variable *var)
3637 struct substring in = ss_cstr (cat->pc->label);
3638 struct substring target = ss_cstr (")LABEL[");
3640 struct string out = DS_EMPTY_INITIALIZER;
3643 size_t chunk = ss_find_substring (in, target);
3644 if (chunk == SIZE_MAX)
3646 if (ds_is_empty (&out))
3647 return pivot_value_new_user_text (in.string, in.length);
3650 ds_put_substring (&out, in);
3651 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3655 ds_put_substring (&out, ss_head (in, chunk));
3656 ss_advance (&in, chunk + target.length);
3658 struct substring idx_s;
3659 if (!ss_get_until (&in, ']', &idx_s))
3662 long int idx = strtol (idx_s.string, &tail, 10);
3663 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3666 struct ctables_category *cat2 = &cats->cats[idx - 1];
3667 if (!ctables_category_format_label (cat2, var, &out))
3673 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3676 static struct pivot_value *
3677 ctables_category_create_value_label (const struct ctables_categories *cats,
3678 const struct ctables_category *cat,
3679 const struct variable *var,
3680 const union value *value)
3682 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3683 ? ctables_postcompute_label (cats, cat, var)
3684 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3685 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3686 : pivot_value_new_var_value (var, value));
3689 static struct ctables_value *
3690 ctables_value_find__ (struct ctables_table *t, const union value *value,
3691 int width, unsigned int hash)
3693 struct ctables_value *clv;
3694 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3695 hash, &t->clabels_values_map)
3696 if (value_equal (value, &clv->value, width))
3702 ctables_value_insert (struct ctables_table *t, const union value *value,
3705 unsigned int hash = value_hash (value, width, 0);
3706 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3709 clv = xmalloc (sizeof *clv);
3710 value_clone (&clv->value, value, width);
3711 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3715 static struct ctables_value *
3716 ctables_value_find (struct ctables_table *t,
3717 const union value *value, int width)
3719 return ctables_value_find__ (t, value, width,
3720 value_hash (value, width, 0));
3724 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3725 size_t ix[PIVOT_N_AXES])
3727 if (a < PIVOT_N_AXES)
3729 size_t limit = MAX (t->stacks[a].n, 1);
3730 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3731 ctables_table_add_section (t, a + 1, ix);
3735 struct ctables_section *s = &t->sections[t->n_sections++];
3736 *s = (struct ctables_section) {
3738 .cells = HMAP_INITIALIZER (s->cells),
3740 for (a = 0; a < PIVOT_N_AXES; a++)
3743 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3745 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3746 for (size_t i = 0; i < nest->n; i++)
3747 hmap_init (&s->occurrences[a][i]);
3749 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3750 hmap_init (&s->areas[at]);
3755 ctpo_add (double a, double b)
3761 ctpo_sub (double a, double b)
3767 ctpo_mul (double a, double b)
3773 ctpo_div (double a, double b)
3775 return b ? a / b : SYSMIS;
3779 ctpo_pow (double a, double b)
3781 int save_errno = errno;
3783 double result = pow (a, b);
3791 ctpo_neg (double a, double b UNUSED)
3796 struct ctables_pcexpr_evaluate_ctx
3798 const struct ctables_cell *cell;
3799 const struct ctables_section *section;
3800 const struct ctables_categories *cats;
3801 enum pivot_axis_type pc_a;
3804 enum fmt_type parse_format;
3807 static double ctables_pcexpr_evaluate (
3808 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3811 ctables_pcexpr_evaluate_nonterminal (
3812 const struct ctables_pcexpr_evaluate_ctx *ctx,
3813 const struct ctables_pcexpr *e, size_t n_args,
3814 double evaluate (double, double))
3816 double args[2] = { 0, 0 };
3817 for (size_t i = 0; i < n_args; i++)
3819 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3820 if (!isfinite (args[i]) || args[i] == SYSMIS)
3823 return evaluate (args[0], args[1]);
3827 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3828 const struct ctables_cell_value *pc_cv)
3830 const struct ctables_section *s = ctx->section;
3833 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3835 const struct ctables_nest *nest = s->nests[a];
3836 for (size_t i = 0; i < nest->n; i++)
3837 if (i != nest->scale_idx)
3839 const struct ctables_cell_value *cv
3840 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3841 : &ctx->cell->axes[a].cvs[i]);
3842 hash = hash_pointer (cv->category, hash);
3843 if (cv->category->type != CCT_TOTAL
3844 && cv->category->type != CCT_SUBTOTAL
3845 && cv->category->type != CCT_POSTCOMPUTE)
3846 hash = value_hash (&cv->value,
3847 var_get_width (nest->vars[i]), hash);
3851 struct ctables_cell *tc;
3852 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3854 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3856 const struct ctables_nest *nest = s->nests[a];
3857 for (size_t i = 0; i < nest->n; i++)
3858 if (i != nest->scale_idx)
3860 const struct ctables_cell_value *p_cv
3861 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3862 : &ctx->cell->axes[a].cvs[i]);
3863 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3864 if (p_cv->category != t_cv->category
3865 || (p_cv->category->type != CCT_TOTAL
3866 && p_cv->category->type != CCT_SUBTOTAL
3867 && p_cv->category->type != CCT_POSTCOMPUTE
3868 && !value_equal (&p_cv->value,
3870 var_get_width (nest->vars[i]))))
3882 const struct ctables_table *t = s->table;
3883 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3884 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3885 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3886 &specs->specs[ctx->summary_idx]);
3890 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3891 const struct ctables_pcexpr *e)
3898 case CTPO_CAT_NRANGE:
3899 case CTPO_CAT_SRANGE:
3900 case CTPO_CAT_MISSING:
3901 case CTPO_CAT_OTHERNM:
3903 struct ctables_cell_value cv = {
3904 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3906 assert (cv.category != NULL);
3908 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3909 const struct ctables_occurrence *o;
3912 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3913 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3914 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3916 cv.value = o->value;
3917 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3922 case CTPO_CAT_NUMBER:
3923 case CTPO_CAT_SUBTOTAL:
3924 case CTPO_CAT_TOTAL:
3926 struct ctables_cell_value cv = {
3927 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3928 .value = { .f = e->number },
3930 assert (cv.category != NULL);
3931 return ctables_pcexpr_evaluate_category (ctx, &cv);
3934 case CTPO_CAT_STRING:
3936 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3938 if (width > e->string.length)
3940 s = xmalloc (width);
3941 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3944 const struct ctables_category *category
3945 = ctables_find_category_for_postcompute (
3946 ctx->section->table->ctables->dict,
3947 ctx->cats, ctx->parse_format, e);
3948 assert (category != NULL);
3950 struct ctables_cell_value cv = { .category = category };
3951 if (category->type == CCT_NUMBER)
3952 cv.value.f = category->number;
3953 else if (category->type == CCT_STRING)
3954 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3958 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3964 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3967 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3970 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3973 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3976 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3979 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3985 static const struct ctables_category *
3986 ctables_cell_postcompute (const struct ctables_section *s,
3987 const struct ctables_cell *cell,
3988 enum pivot_axis_type *pc_a_p,
3991 assert (cell->postcompute);
3992 const struct ctables_category *pc_cat = NULL;
3993 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3994 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3996 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3997 if (cv->category->type == CCT_POSTCOMPUTE)
4001 /* Multiple postcomputes cross each other. The value is
4006 pc_cat = cv->category;
4010 *pc_a_idx_p = pc_a_idx;
4014 assert (pc_cat != NULL);
4019 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4020 const struct ctables_cell *cell,
4021 const struct ctables_summary_spec *ss,
4022 struct fmt_spec *format,
4023 bool *is_ctables_format,
4026 enum pivot_axis_type pc_a = 0;
4027 size_t pc_a_idx = 0;
4028 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4029 s, cell, &pc_a, &pc_a_idx);
4033 const struct ctables_postcompute *pc = pc_cat->pc;
4036 for (size_t i = 0; i < pc->specs->n; i++)
4038 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4039 if (ss->function == ss2->function
4040 && ss->weighted == ss2->weighted
4041 && ss->calc_area == ss2->calc_area
4042 && ss->percentile == ss2->percentile)
4044 *format = ss2->format;
4045 *is_ctables_format = ss2->is_ctables_format;
4051 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4052 const struct ctables_categories *cats = s->table->categories[
4053 var_get_dict_index (var)];
4054 struct ctables_pcexpr_evaluate_ctx ctx = {
4059 .pc_a_idx = pc_a_idx,
4060 .summary_idx = summary_idx,
4061 .parse_format = pc_cat->parse_format,
4063 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4067 ctables_format (double d, const struct fmt_spec *format,
4068 const struct fmt_settings *settings)
4070 const union value v = { .f = d };
4071 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4073 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4074 produce the results we want for negative numbers, putting the negative
4075 sign in the wrong spot, before the prefix instead of after it. We can't,
4076 in fact, produce the desired results using a custom-currency
4077 specification. Instead, we postprocess the output, moving the negative
4080 NEQUAL: "-N=3" => "N=-3"
4081 PAREN: "-(3)" => "(-3)"
4082 PCTPAREN: "-(3%)" => "(-3%)"
4084 This transformation doesn't affect NEGPAREN. */
4085 char *minus_src = strchr (s, '-');
4086 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4088 char *n_equals = strstr (s, "N=");
4089 char *lparen = strchr (s, '(');
4090 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4092 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4098 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4100 for (size_t i = 0; i < t->stacks[a].n; i++)
4102 struct ctables_nest *nest = &t->stacks[a].nests[i];
4103 if (nest->n != 1 || nest->scale_idx != 0)
4106 enum ctables_vlabel vlabel
4107 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4108 if (vlabel != CTVL_NONE)
4115 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4117 struct pivot_table *pt = pivot_table_create__ (
4119 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4120 : pivot_value_new_text (N_("Custom Tables"))),
4123 pivot_table_set_caption (
4124 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4126 pivot_table_set_corner_text (
4127 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4129 bool summary_dimension = (t->summary_axis != t->slabels_axis
4130 || (!t->slabels_visible
4131 && t->summary_specs.n > 1));
4132 if (summary_dimension)
4134 struct pivot_dimension *d = pivot_dimension_create (
4135 pt, t->slabels_axis, N_("Statistics"));
4136 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4137 if (!t->slabels_visible)
4138 d->hide_all_labels = true;
4139 for (size_t i = 0; i < specs->n; i++)
4140 pivot_category_create_leaf (
4141 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4144 bool categories_dimension = t->clabels_example != NULL;
4145 if (categories_dimension)
4147 struct pivot_dimension *d = pivot_dimension_create (
4148 pt, t->label_axis[t->clabels_from_axis],
4149 t->clabels_from_axis == PIVOT_AXIS_ROW
4150 ? N_("Row Categories")
4151 : N_("Column Categories"));
4152 const struct variable *var = t->clabels_example;
4153 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4154 for (size_t i = 0; i < t->n_clabels_values; i++)
4156 const struct ctables_value *value = t->clabels_values[i];
4157 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4158 assert (cat != NULL);
4159 pivot_category_create_leaf (
4160 d->root, ctables_category_create_value_label (c, cat,
4166 pivot_table_set_look (pt, ct->look);
4167 struct pivot_dimension *d[PIVOT_N_AXES];
4168 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4170 static const char *names[] = {
4171 [PIVOT_AXIS_ROW] = N_("Rows"),
4172 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4173 [PIVOT_AXIS_LAYER] = N_("Layers"),
4175 d[a] = (t->axes[a] || a == t->summary_axis
4176 ? pivot_dimension_create (pt, a, names[a])
4181 assert (t->axes[a]);
4183 for (size_t i = 0; i < t->stacks[a].n; i++)
4185 struct ctables_nest *nest = &t->stacks[a].nests[i];
4186 struct ctables_section **sections = xnmalloc (t->n_sections,
4188 size_t n_sections = 0;
4190 size_t n_total_cells = 0;
4191 size_t max_depth = 0;
4192 for (size_t j = 0; j < t->n_sections; j++)
4193 if (t->sections[j].nests[a] == nest)
4195 struct ctables_section *s = &t->sections[j];
4196 sections[n_sections++] = s;
4197 n_total_cells += hmap_count (&s->cells);
4199 size_t depth = s->nests[a]->n;
4200 max_depth = MAX (depth, max_depth);
4203 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4205 size_t n_sorted = 0;
4207 for (size_t j = 0; j < n_sections; j++)
4209 struct ctables_section *s = sections[j];
4211 struct ctables_cell *cell;
4212 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4214 sorted[n_sorted++] = cell;
4215 assert (n_sorted <= n_total_cells);
4218 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4219 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4222 if (a == PIVOT_AXIS_ROW)
4224 size_t ids[N_CTATS];
4225 memset (ids, 0, sizeof ids);
4226 for (size_t j = 0; j < n_sorted; j++)
4228 struct ctables_cell *cell = sorted[j];
4229 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4231 struct ctables_area *area = cell->areas[at];
4232 if (!area->sequence)
4233 area->sequence = ++ids[at];
4240 for (size_t j = 0; j < n_sorted; j++)
4242 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0);
4247 struct ctables_level
4249 enum ctables_level_type
4251 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4252 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4253 CTL_SUMMARY, /* Summary functions. */
4257 enum settings_value_show vlabel; /* CTL_VAR only. */
4260 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4261 size_t n_levels = 0;
4262 for (size_t k = 0; k < nest->n; k++)
4264 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4265 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4267 if (vlabel != CTVL_NONE)
4269 levels[n_levels++] = (struct ctables_level) {
4271 .vlabel = (enum settings_value_show) vlabel,
4276 if (nest->scale_idx != k
4277 && (k != nest->n - 1 || t->label_axis[a] == a))
4279 levels[n_levels++] = (struct ctables_level) {
4280 .type = CTL_CATEGORY,
4286 if (!summary_dimension && a == t->slabels_axis)
4288 levels[n_levels++] = (struct ctables_level) {
4289 .type = CTL_SUMMARY,
4290 .var_idx = SIZE_MAX,
4294 /* Pivot categories:
4296 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4297 - category for nest->vars[0], if nest->scale_idx != 0
4298 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4299 - category for nest->vars[1], if nest->scale_idx != 1
4301 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4302 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4303 - summary function, if 'a == t->slabels_axis && a ==
4306 Additional dimensions:
4308 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4310 - If 't->label_axis[b] == a' for some 'b != a', add a category
4315 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4317 for (size_t j = 0; j < n_sorted; j++)
4319 struct ctables_cell *cell = sorted[j];
4320 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4322 size_t n_common = 0;
4325 for (; n_common < n_levels; n_common++)
4327 const struct ctables_level *level = &levels[n_common];
4328 if (level->type == CTL_CATEGORY)
4330 size_t var_idx = level->var_idx;
4331 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4332 if (prev->axes[a].cvs[var_idx].category != c)
4334 else if (c->type != CCT_SUBTOTAL
4335 && c->type != CCT_TOTAL
4336 && c->type != CCT_POSTCOMPUTE
4337 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4338 &cell->axes[a].cvs[var_idx].value,
4339 var_get_type (nest->vars[var_idx])))
4345 for (size_t k = n_common; k < n_levels; k++)
4347 const struct ctables_level *level = &levels[k];
4348 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4349 if (level->type == CTL_SUMMARY)
4351 assert (k == n_levels - 1);
4353 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4354 for (size_t m = 0; m < specs->n; m++)
4356 int leaf = pivot_category_create_leaf (
4357 parent, ctables_summary_label (&specs->specs[m],
4365 const struct variable *var = nest->vars[level->var_idx];
4366 struct pivot_value *label;
4367 if (level->type == CTL_VAR)
4369 label = pivot_value_new_variable (var);
4370 label->variable.show = level->vlabel;
4372 else if (level->type == CTL_CATEGORY)
4374 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4375 label = ctables_category_create_value_label (
4376 t->categories[var_get_dict_index (var)],
4377 cv->category, var, &cv->value);
4382 if (k == n_levels - 1)
4383 prev_leaf = pivot_category_create_leaf (parent, label);
4385 groups[k] = pivot_category_create_group__ (parent, label);
4389 cell->axes[a].leaf = prev_leaf;
4398 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4402 size_t n_total_cells = 0;
4403 for (size_t j = 0; j < t->n_sections; j++)
4404 n_total_cells += hmap_count (&t->sections[j].cells);
4406 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4407 size_t n_sorted = 0;
4408 for (size_t j = 0; j < t->n_sections; j++)
4410 const struct ctables_section *s = &t->sections[j];
4411 struct ctables_cell *cell;
4412 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4414 sorted[n_sorted++] = cell;
4416 assert (n_sorted <= n_total_cells);
4417 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4419 size_t ids[N_CTATS];
4420 memset (ids, 0, sizeof ids);
4421 for (size_t j = 0; j < n_sorted; j++)
4423 struct ctables_cell *cell = sorted[j];
4424 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4426 struct ctables_area *area = cell->areas[at];
4427 if (!area->sequence)
4428 area->sequence = ++ids[at];
4435 for (size_t i = 0; i < t->n_sections; i++)
4437 struct ctables_section *s = &t->sections[i];
4439 struct ctables_cell *cell;
4440 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4445 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4446 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4447 for (size_t j = 0; j < specs->n; j++)
4450 size_t n_dindexes = 0;
4452 if (summary_dimension)
4453 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4455 if (categories_dimension)
4457 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4458 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4459 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4460 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4463 dindexes[n_dindexes++] = ctv->leaf;
4466 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4469 int leaf = cell->axes[a].leaf;
4470 if (a == t->summary_axis && !summary_dimension)
4472 dindexes[n_dindexes++] = leaf;
4475 const struct ctables_summary_spec *ss = &specs->specs[j];
4477 struct fmt_spec format = specs->specs[j].format;
4478 bool is_ctables_format = ss->is_ctables_format;
4479 double d = (cell->postcompute
4480 ? ctables_cell_calculate_postcompute (
4481 s, cell, ss, &format, &is_ctables_format, j)
4482 : ctables_summary_value (cell, &cell->summaries[j],
4485 struct pivot_value *value;
4486 if (ct->hide_threshold != 0
4487 && d < ct->hide_threshold
4488 && ctables_summary_function_is_count (ss->function))
4490 value = pivot_value_new_user_text_nocopy (
4491 xasprintf ("<%d", ct->hide_threshold));
4493 else if (d == 0 && ct->zero)
4494 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4495 else if (d == SYSMIS && ct->missing)
4496 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4497 else if (is_ctables_format)
4498 value = pivot_value_new_user_text_nocopy (
4499 ctables_format (d, &format, &ct->ctables_formats));
4502 value = pivot_value_new_number (d);
4503 value->numeric.format = format;
4505 /* XXX should text values be right-justified? */
4506 pivot_table_put (pt, dindexes, n_dindexes, value);
4511 pivot_table_submit (pt);
4515 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4517 enum pivot_axis_type label_pos = t->label_axis[a];
4521 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4522 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4524 const struct ctables_stack *stack = &t->stacks[a];
4528 const struct ctables_nest *n0 = &stack->nests[0];
4531 assert (stack->n == 1);
4535 const struct variable *v0 = n0->vars[n0->n - 1];
4536 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4537 t->clabels_example = v0;
4539 for (size_t i = 0; i < c0->n_cats; i++)
4540 if (c0->cats[i].type == CCT_FUNCTION)
4542 msg (SE, _("%s=%s is not allowed with sorting based "
4543 "on a summary function."),
4544 subcommand_name, pos_name);
4547 if (n0->n - 1 == n0->scale_idx)
4549 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4550 "but %s is a scale variable."),
4551 subcommand_name, pos_name, var_get_name (v0));
4555 for (size_t i = 1; i < stack->n; i++)
4557 const struct ctables_nest *ni = &stack->nests[i];
4559 const struct variable *vi = ni->vars[ni->n - 1];
4560 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4562 if (ni->n - 1 == ni->scale_idx)
4564 msg (SE, _("%s=%s requires the variables to be moved to be "
4565 "categorical, but %s is a scale variable."),
4566 subcommand_name, pos_name, var_get_name (vi));
4569 if (var_get_width (v0) != var_get_width (vi))
4571 msg (SE, _("%s=%s requires the variables to be "
4572 "moved to have the same width, but %s has "
4573 "width %d and %s has width %d."),
4574 subcommand_name, pos_name,
4575 var_get_name (v0), var_get_width (v0),
4576 var_get_name (vi), var_get_width (vi));
4579 if (!val_labs_equal (var_get_value_labels (v0),
4580 var_get_value_labels (vi)))
4582 msg (SE, _("%s=%s requires the variables to be "
4583 "moved to have the same value labels, but %s "
4584 "and %s have different value labels."),
4585 subcommand_name, pos_name,
4586 var_get_name (v0), var_get_name (vi));
4589 if (!ctables_categories_equal (c0, ci))
4591 msg (SE, _("%s=%s requires the variables to be "
4592 "moved to have the same category "
4593 "specifications, but %s and %s have different "
4594 "category specifications."),
4595 subcommand_name, pos_name,
4596 var_get_name (v0), var_get_name (vi));
4605 add_sum_var (struct variable *var,
4606 struct variable ***sum_vars, size_t *n, size_t *allocated)
4608 for (size_t i = 0; i < *n; i++)
4609 if (var == (*sum_vars)[i])
4612 if (*n >= *allocated)
4613 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4614 (*sum_vars)[*n] = var;
4618 static enum ctables_area_type
4619 rotate_area (enum ctables_area_type area)
4630 return CTAT_LAYERCOL;
4633 return CTAT_LAYERROW;
4646 enumerate_sum_vars (const struct ctables_axis *a,
4647 struct variable ***sum_vars, size_t *n, size_t *allocated)
4655 for (size_t i = 0; i < N_CSVS; i++)
4656 for (size_t j = 0; j < a->specs[i].n; j++)
4658 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4659 if (spec->function == CTSF_areaPCT_SUM)
4660 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4666 for (size_t i = 0; i < 2; i++)
4667 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4673 ctables_prepare_table (struct ctables_table *t)
4675 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4678 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4680 for (size_t j = 0; j < t->stacks[a].n; j++)
4682 struct ctables_nest *nest = &t->stacks[a].nests[j];
4683 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4685 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4686 nest->n_areas[at] = 0;
4688 enum pivot_axis_type ata, atb;
4689 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4691 ata = PIVOT_AXIS_ROW;
4692 atb = PIVOT_AXIS_COLUMN;
4694 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4696 ata = PIVOT_AXIS_COLUMN;
4697 atb = PIVOT_AXIS_ROW;
4700 if (at == CTAT_LAYER
4701 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
4702 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
4703 ? a == atb && t->label_axis[a] != a
4706 for (size_t k = nest->n - 1; k < nest->n; k--)
4707 if (k != nest->scale_idx)
4709 nest->areas[at][nest->n_areas[at]++] = k;
4715 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
4716 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
4717 : at == CTAT_TABLE ? true
4721 for (size_t k = 0; k < nest->n; k++)
4722 if (k != nest->scale_idx)
4723 nest->areas[at][nest->n_areas[at]++] = k;
4729 #define L PIVOT_AXIS_LAYER
4730 n_drop = (t->clabels_from_axis == L ? a != L
4731 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
4732 : t->clabels_from_axis == a ? 2
4739 n_drop = a == ata && t->label_axis[ata] == atb;
4744 n_drop = (a == ata ? t->label_axis[ata] == atb
4746 : t->clabels_from_axis == atb ? -1
4747 : t->clabels_to_axis != atb ? 1
4759 size_t n = nest->n_areas[at];
4762 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4763 nest->n_areas[at]--;
4768 for (int i = 0; i < n_drop; i++)
4769 if (nest->n_areas[at] > 0)
4770 nest->n_areas[at]--;
4777 struct ctables_nest *nest = xmalloc (sizeof *nest);
4778 *nest = (struct ctables_nest) { .n = 0 };
4779 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4781 /* There's no point in moving labels away from an axis that has no
4782 labels, so avoid dealing with the special cases around that. */
4783 t->label_axis[a] = a;
4786 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4787 for (size_t i = 0; i < stack->n; i++)
4789 struct ctables_nest *nest = &stack->nests[i];
4790 if (!nest->specs[CSV_CELL].n)
4792 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4793 specs->specs = xmalloc (sizeof *specs->specs);
4796 enum ctables_summary_function function
4797 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4799 *specs->specs = (struct ctables_summary_spec) {
4800 .function = function,
4802 .format = ctables_summary_default_format (function, specs->var),
4805 specs->var = nest->vars[0];
4807 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4808 &nest->specs[CSV_CELL]);
4810 else if (!nest->specs[CSV_TOTAL].n)
4811 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4812 &nest->specs[CSV_CELL]);
4814 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4815 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4817 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4818 for (size_t i = 0; i < nest->specs[sv].n; i++)
4820 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4821 const struct ctables_function_info *cfi =
4822 &ctables_function_info[ss->function];
4824 ss->calc_area = rotate_area (ss->calc_area);
4828 if (t->ctables->smissing_listwise)
4830 struct variable **listwise_vars = NULL;
4832 size_t allocated = 0;
4834 for (size_t j = nest->group_head; j < stack->n; j++)
4836 const struct ctables_nest *other_nest = &stack->nests[j];
4837 if (other_nest->group_head != nest->group_head)
4840 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4843 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4844 sizeof *listwise_vars);
4845 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4848 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4851 listwise_vars = xmemdup (listwise_vars,
4852 n * sizeof *listwise_vars);
4853 nest->specs[sv].listwise_vars = listwise_vars;
4854 nest->specs[sv].n_listwise_vars = n;
4859 struct ctables_summary_spec_set *merged = &t->summary_specs;
4860 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4862 for (size_t j = 0; j < stack->n; j++)
4864 const struct ctables_nest *nest = &stack->nests[j];
4866 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4867 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4872 struct merge_item min = items[0];
4873 for (size_t j = 1; j < n_left; j++)
4874 if (merge_item_compare_3way (&items[j], &min) < 0)
4877 if (merged->n >= merged->allocated)
4878 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4879 sizeof *merged->specs);
4880 merged->specs[merged->n++] = min.set->specs[min.ofs];
4882 for (size_t j = 0; j < n_left; )
4884 if (merge_item_compare_3way (&items[j], &min) == 0)
4886 struct merge_item *item = &items[j];
4887 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4888 if (++item->ofs >= item->set->n)
4890 items[j] = items[--n_left];
4900 for (size_t j = 0; j < merged->n; j++)
4901 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4903 for (size_t j = 0; j < stack->n; j++)
4905 const struct ctables_nest *nest = &stack->nests[j];
4906 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4908 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4909 for (size_t k = 0; k < specs->n; k++)
4910 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4911 specs->specs[k].axis_idx);
4917 size_t allocated_sum_vars = 0;
4918 enumerate_sum_vars (t->axes[t->summary_axis],
4919 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4921 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4922 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4926 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4927 enum pivot_axis_type a)
4929 struct ctables_stack *stack = &t->stacks[a];
4930 for (size_t i = 0; i < stack->n; i++)
4932 const struct ctables_nest *nest = &stack->nests[i];
4933 const struct variable *var = nest->vars[nest->n - 1];
4934 const union value *value = case_data (c, var);
4936 if (var_is_numeric (var) && value->f == SYSMIS)
4939 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4941 ctables_value_insert (t, value, var_get_width (var));
4946 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4948 const struct ctables_value *const *ap = a_;
4949 const struct ctables_value *const *bp = b_;
4950 const struct ctables_value *a = *ap;
4951 const struct ctables_value *b = *bp;
4952 const int *width = width_;
4953 return value_compare_3way (&a->value, &b->value, *width);
4957 ctables_sort_clabels_values (struct ctables_table *t)
4959 const struct variable *v0 = t->clabels_example;
4960 int width = var_get_width (v0);
4962 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4965 const struct val_labs *val_labs = var_get_value_labels (v0);
4966 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4967 vl = val_labs_next (val_labs, vl))
4968 if (ctables_categories_match (c0, &vl->value, v0))
4969 ctables_value_insert (t, &vl->value, width);
4972 size_t n = hmap_count (&t->clabels_values_map);
4973 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4975 struct ctables_value *clv;
4977 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4978 t->clabels_values[i++] = clv;
4979 t->n_clabels_values = n;
4982 sort (t->clabels_values, n, sizeof *t->clabels_values,
4983 compare_clabels_values_3way, &width);
4985 for (size_t i = 0; i < n; i++)
4986 t->clabels_values[i]->leaf = i;
4990 ctables_add_category_occurrences (const struct variable *var,
4991 struct hmap *occurrences,
4992 const struct ctables_categories *cats)
4994 const struct val_labs *val_labs = var_get_value_labels (var);
4996 for (size_t i = 0; i < cats->n_cats; i++)
4998 const struct ctables_category *c = &cats->cats[i];
5002 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5008 int width = var_get_width (var);
5010 value_init (&value, width);
5011 value_copy_buf_rpad (&value, width,
5012 CHAR_CAST (uint8_t *, c->string.string),
5013 c->string.length, ' ');
5014 ctables_add_occurrence (var, &value, occurrences);
5015 value_destroy (&value, width);
5020 assert (var_is_numeric (var));
5021 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5022 vl = val_labs_next (val_labs, vl))
5023 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5024 ctables_add_occurrence (var, &vl->value, occurrences);
5028 assert (var_is_alpha (var));
5029 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5030 vl = val_labs_next (val_labs, vl))
5031 if (in_string_range (&vl->value, var, c->srange))
5032 ctables_add_occurrence (var, &vl->value, occurrences);
5036 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5037 vl = val_labs_next (val_labs, vl))
5038 if (var_is_value_missing (var, &vl->value))
5039 ctables_add_occurrence (var, &vl->value, occurrences);
5043 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5044 vl = val_labs_next (val_labs, vl))
5045 ctables_add_occurrence (var, &vl->value, occurrences);
5048 case CCT_POSTCOMPUTE:
5058 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5059 vl = val_labs_next (val_labs, vl))
5060 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5061 ctables_add_occurrence (var, &vl->value, occurrences);
5064 case CCT_EXCLUDED_MISSING:
5071 ctables_section_recurse_add_empty_categories (
5072 struct ctables_section *s,
5073 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5074 enum pivot_axis_type a, size_t a_idx)
5076 if (a >= PIVOT_N_AXES)
5077 ctables_cell_insert__ (s, c, cats);
5078 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5079 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5082 const struct variable *var = s->nests[a]->vars[a_idx];
5083 const struct ctables_categories *categories = s->table->categories[
5084 var_get_dict_index (var)];
5085 int width = var_get_width (var);
5086 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5087 const struct ctables_occurrence *o;
5088 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5090 union value *value = case_data_rw (c, var);
5091 value_destroy (value, width);
5092 value_clone (value, &o->value, width);
5093 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5094 assert (cats[a][a_idx] != NULL);
5095 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5098 for (size_t i = 0; i < categories->n_cats; i++)
5100 const struct ctables_category *cat = &categories->cats[i];
5101 if (cat->type == CCT_POSTCOMPUTE)
5103 cats[a][a_idx] = cat;
5104 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5111 ctables_section_add_empty_categories (struct ctables_section *s)
5113 bool show_empty = false;
5114 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5116 for (size_t k = 0; k < s->nests[a]->n; k++)
5117 if (k != s->nests[a]->scale_idx)
5119 const struct variable *var = s->nests[a]->vars[k];
5120 const struct ctables_categories *cats = s->table->categories[
5121 var_get_dict_index (var)];
5122 if (cats->show_empty)
5125 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5131 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5132 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5133 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5138 ctables_section_clear (struct ctables_section *s)
5140 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5142 const struct ctables_nest *nest = s->nests[a];
5143 for (size_t i = 0; i < nest->n; i++)
5144 if (i != nest->scale_idx)
5146 const struct variable *var = nest->vars[i];
5147 int width = var_get_width (var);
5148 struct ctables_occurrence *o, *next;
5149 struct hmap *map = &s->occurrences[a][i];
5150 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5152 value_destroy (&o->value, width);
5153 hmap_delete (map, &o->node);
5160 struct ctables_cell *cell, *next_cell;
5161 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5163 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5165 const struct ctables_nest *nest = s->nests[a];
5166 for (size_t i = 0; i < nest->n; i++)
5167 if (i != nest->scale_idx)
5168 value_destroy (&cell->axes[a].cvs[i].value,
5169 var_get_width (nest->vars[i]));
5170 free (cell->axes[a].cvs);
5173 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5174 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5175 for (size_t i = 0; i < specs->n; i++)
5176 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5177 free (cell->summaries);
5179 hmap_delete (&s->cells, &cell->node);
5182 hmap_shrink (&s->cells);
5184 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5186 struct ctables_area *area, *next_area;
5187 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5191 hmap_delete (&s->areas[at], &area->node);
5194 hmap_shrink (&s->areas[at]);
5199 ctables_section_uninit (struct ctables_section *s)
5201 ctables_section_clear (s);
5203 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5205 struct ctables_nest *nest = s->nests[a];
5206 for (size_t i = 0; i < nest->n; i++)
5207 hmap_destroy (&s->occurrences[a][i]);
5208 free (s->occurrences[a]);
5211 hmap_destroy (&s->cells);
5212 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5213 hmap_destroy (&s->areas[at]);
5217 ctables_table_clear (struct ctables_table *t)
5219 for (size_t i = 0; i < t->n_sections; i++)
5220 ctables_section_clear (&t->sections[i]);
5222 if (t->clabels_example)
5224 int width = var_get_width (t->clabels_example);
5225 struct ctables_value *value, *next_value;
5226 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5227 &t->clabels_values_map)
5229 value_destroy (&value->value, width);
5230 hmap_delete (&t->clabels_values_map, &value->node);
5233 hmap_shrink (&t->clabels_values_map);
5235 free (t->clabels_values);
5236 t->clabels_values = NULL;
5237 t->n_clabels_values = 0;
5242 ctables_execute (struct dataset *ds, struct casereader *input,
5245 for (size_t i = 0; i < ct->n_tables; i++)
5247 struct ctables_table *t = ct->tables[i];
5248 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5249 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5250 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5251 sizeof *t->sections);
5252 size_t ix[PIVOT_N_AXES];
5253 ctables_table_add_section (t, 0, ix);
5256 struct dictionary *dict = dataset_dict (ds);
5258 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5259 struct casegrouper *grouper
5261 ? casegrouper_create_splits (input, dict)
5262 : casegrouper_create_vars (input, NULL, 0));
5263 struct casereader *group;
5264 while (casegrouper_get_next_group (grouper, &group))
5268 struct ccase *c = casereader_peek (group, 0);
5271 output_split_file_values (ds, c);
5276 bool warn_on_invalid = true;
5277 for (struct ccase *c = casereader_read (group); c;
5278 case_unref (c), c = casereader_read (group))
5280 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5281 double e_weight = (ct->e_weight
5282 ? var_force_valid_weight (ct->e_weight,
5283 case_num (c, ct->e_weight),
5287 for (size_t i = 0; i < ct->n_tables; i++)
5289 struct ctables_table *t = ct->tables[i];
5291 for (size_t j = 0; j < t->n_sections; j++)
5292 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5294 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5295 if (t->label_axis[a] != a)
5296 ctables_insert_clabels_values (t, c, a);
5299 casereader_destroy (group);
5301 for (size_t i = 0; i < ct->n_tables; i++)
5303 struct ctables_table *t = ct->tables[i];
5305 if (t->clabels_example)
5306 ctables_sort_clabels_values (t);
5308 for (size_t j = 0; j < t->n_sections; j++)
5309 ctables_section_add_empty_categories (&t->sections[j]);
5311 ctables_table_output (ct, t);
5312 ctables_table_clear (t);
5315 return casegrouper_destroy (grouper);
5320 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5321 struct dictionary *);
5324 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5330 case CTPO_CAT_STRING:
5331 ss_dealloc (&e->string);
5334 case CTPO_CAT_SRANGE:
5335 for (size_t i = 0; i < 2; i++)
5336 ss_dealloc (&e->srange[i]);
5345 for (size_t i = 0; i < 2; i++)
5346 ctables_pcexpr_destroy (e->subs[i]);
5350 case CTPO_CAT_NUMBER:
5351 case CTPO_CAT_NRANGE:
5352 case CTPO_CAT_MISSING:
5353 case CTPO_CAT_OTHERNM:
5354 case CTPO_CAT_SUBTOTAL:
5355 case CTPO_CAT_TOTAL:
5359 msg_location_destroy (e->location);
5364 static struct ctables_pcexpr *
5365 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5366 struct ctables_pcexpr *sub0,
5367 struct ctables_pcexpr *sub1)
5369 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5370 *e = (struct ctables_pcexpr) {
5372 .subs = { sub0, sub1 },
5373 .location = msg_location_merged (sub0->location, sub1->location),
5378 /* How to parse an operator. */
5381 enum token_type token;
5382 enum ctables_postcompute_op op;
5385 static const struct operator *
5386 ctables_pcexpr_match_operator (struct lexer *lexer,
5387 const struct operator ops[], size_t n_ops)
5389 for (const struct operator *op = ops; op < ops + n_ops; op++)
5390 if (lex_token (lexer) == op->token)
5392 if (op->token != T_NEG_NUM)
5401 static struct ctables_pcexpr *
5402 ctables_pcexpr_parse_binary_operators__ (
5403 struct lexer *lexer, struct dictionary *dict,
5404 const struct operator ops[], size_t n_ops,
5405 parse_recursively_func *parse_next_level,
5406 const char *chain_warning, struct ctables_pcexpr *lhs)
5408 for (int op_count = 0; ; op_count++)
5410 const struct operator *op
5411 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5414 if (op_count > 1 && chain_warning)
5415 msg_at (SW, lhs->location, "%s", chain_warning);
5420 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5423 ctables_pcexpr_destroy (lhs);
5427 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5431 static struct ctables_pcexpr *
5432 ctables_pcexpr_parse_binary_operators (
5433 struct lexer *lexer, struct dictionary *dict,
5434 const struct operator ops[], size_t n_ops,
5435 parse_recursively_func *parse_next_level, const char *chain_warning)
5437 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5441 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5443 chain_warning, lhs);
5446 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5447 struct dictionary *);
5449 static struct ctables_pcexpr
5450 ctpo_cat_nrange (double low, double high)
5452 return (struct ctables_pcexpr) {
5453 .op = CTPO_CAT_NRANGE,
5454 .nrange = { low, high },
5458 static struct ctables_pcexpr
5459 ctpo_cat_srange (struct substring low, struct substring high)
5461 return (struct ctables_pcexpr) {
5462 .op = CTPO_CAT_SRANGE,
5463 .srange = { low, high },
5467 static struct ctables_pcexpr *
5468 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5470 int start_ofs = lex_ofs (lexer);
5471 struct ctables_pcexpr e;
5472 if (lex_is_number (lexer))
5474 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5475 .number = lex_number (lexer) };
5478 else if (lex_match_id (lexer, "MISSING"))
5479 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5480 else if (lex_match_id (lexer, "OTHERNM"))
5481 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5482 else if (lex_match_id (lexer, "TOTAL"))
5483 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5484 else if (lex_match_id (lexer, "SUBTOTAL"))
5486 size_t subtotal_index = 0;
5487 if (lex_match (lexer, T_LBRACK))
5489 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5491 subtotal_index = lex_integer (lexer);
5493 if (!lex_force_match (lexer, T_RBRACK))
5496 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5497 .subtotal_index = subtotal_index };
5499 else if (lex_match (lexer, T_LBRACK))
5501 if (lex_match_id (lexer, "LO"))
5503 if (!lex_force_match_id (lexer, "THRU"))
5506 if (lex_is_string (lexer))
5508 struct substring low = { .string = NULL };
5509 struct substring high = parse_substring (lexer, dict);
5510 e = ctpo_cat_srange (low, high);
5514 if (!lex_force_num (lexer))
5516 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5520 else if (lex_is_number (lexer))
5522 double number = lex_number (lexer);
5524 if (lex_match_id (lexer, "THRU"))
5526 if (lex_match_id (lexer, "HI"))
5527 e = ctpo_cat_nrange (number, DBL_MAX);
5530 if (!lex_force_num (lexer))
5532 e = ctpo_cat_nrange (number, lex_number (lexer));
5537 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5540 else if (lex_is_string (lexer))
5542 struct substring s = parse_substring (lexer, dict);
5544 if (lex_match_id (lexer, "THRU"))
5546 struct substring high;
5548 if (lex_match_id (lexer, "HI"))
5549 high = (struct substring) { .string = NULL };
5552 if (!lex_force_string (lexer))
5557 high = parse_substring (lexer, dict);
5560 e = ctpo_cat_srange (s, high);
5563 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5567 lex_error (lexer, NULL);
5571 if (!lex_force_match (lexer, T_RBRACK))
5573 if (e.op == CTPO_CAT_STRING)
5574 ss_dealloc (&e.string);
5575 else if (e.op == CTPO_CAT_SRANGE)
5577 ss_dealloc (&e.srange[0]);
5578 ss_dealloc (&e.srange[1]);
5583 else if (lex_match (lexer, T_LPAREN))
5585 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5588 if (!lex_force_match (lexer, T_RPAREN))
5590 ctables_pcexpr_destroy (ep);
5597 lex_error (lexer, NULL);
5601 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5602 return xmemdup (&e, sizeof e);
5605 static struct ctables_pcexpr *
5606 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5607 struct lexer *lexer, int start_ofs)
5609 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5610 *e = (struct ctables_pcexpr) {
5613 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5618 static struct ctables_pcexpr *
5619 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5621 static const struct operator op = { T_EXP, CTPO_POW };
5623 const char *chain_warning =
5624 _("The exponentiation operator (`**') is left-associative: "
5625 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5626 "To disable this warning, insert parentheses.");
5628 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5629 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5630 ctables_pcexpr_parse_primary,
5633 /* Special case for situations like "-5**6", which must be parsed as
5636 int start_ofs = lex_ofs (lexer);
5637 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5638 *lhs = (struct ctables_pcexpr) {
5639 .op = CTPO_CONSTANT,
5640 .number = -lex_tokval (lexer),
5641 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5645 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5646 lexer, dict, &op, 1,
5647 ctables_pcexpr_parse_primary, chain_warning, lhs);
5651 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5654 /* Parses the unary minus level. */
5655 static struct ctables_pcexpr *
5656 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5658 int start_ofs = lex_ofs (lexer);
5659 if (!lex_match (lexer, T_DASH))
5660 return ctables_pcexpr_parse_exp (lexer, dict);
5662 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5666 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5669 /* Parses the multiplication and division level. */
5670 static struct ctables_pcexpr *
5671 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5673 static const struct operator ops[] =
5675 { T_ASTERISK, CTPO_MUL },
5676 { T_SLASH, CTPO_DIV },
5679 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5680 sizeof ops / sizeof *ops,
5681 ctables_pcexpr_parse_neg, NULL);
5684 /* Parses the addition and subtraction level. */
5685 static struct ctables_pcexpr *
5686 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5688 static const struct operator ops[] =
5690 { T_PLUS, CTPO_ADD },
5691 { T_DASH, CTPO_SUB },
5692 { T_NEG_NUM, CTPO_ADD },
5695 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5696 ops, sizeof ops / sizeof *ops,
5697 ctables_pcexpr_parse_mul, NULL);
5700 static struct ctables_postcompute *
5701 ctables_find_postcompute (struct ctables *ct, const char *name)
5703 struct ctables_postcompute *pc;
5704 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5705 utf8_hash_case_string (name, 0), &ct->postcomputes)
5706 if (!utf8_strcasecmp (pc->name, name))
5712 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5715 int pcompute_start = lex_ofs (lexer) - 1;
5717 if (!lex_match (lexer, T_AND))
5719 lex_error_expecting (lexer, "&");
5722 if (!lex_force_id (lexer))
5725 char *name = ss_xstrdup (lex_tokss (lexer));
5728 if (!lex_force_match (lexer, T_EQUALS)
5729 || !lex_force_match_id (lexer, "EXPR")
5730 || !lex_force_match (lexer, T_LPAREN))
5736 int expr_start = lex_ofs (lexer);
5737 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5738 int expr_end = lex_ofs (lexer) - 1;
5739 if (!expr || !lex_force_match (lexer, T_RPAREN))
5741 ctables_pcexpr_destroy (expr);
5745 int pcompute_end = lex_ofs (lexer) - 1;
5747 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5750 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5753 msg_at (SW, location, _("New definition of &%s will override the "
5754 "previous definition."),
5756 msg_at (SN, pc->location, _("This is the previous definition."));
5758 ctables_pcexpr_destroy (pc->expr);
5759 msg_location_destroy (pc->location);
5764 pc = xmalloc (sizeof *pc);
5765 *pc = (struct ctables_postcompute) { .name = name };
5766 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5767 utf8_hash_case_string (pc->name, 0));
5770 pc->location = location;
5772 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5777 ctables_parse_pproperties_format (struct lexer *lexer,
5778 struct ctables_summary_spec_set *sss)
5780 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5782 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5783 && !(lex_token (lexer) == T_ID
5784 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5785 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5786 lex_tokss (lexer)))))
5788 /* Parse function. */
5789 enum ctables_summary_function function;
5791 enum ctables_area_type area;
5792 if (!parse_ctables_summary_function (lexer, &function, &weighted, &area))
5795 /* Parse percentile. */
5796 double percentile = 0;
5797 if (function == CTSF_PTILE)
5799 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5801 percentile = lex_number (lexer);
5806 struct fmt_spec format;
5807 bool is_ctables_format;
5808 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5811 if (sss->n >= sss->allocated)
5812 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5813 sizeof *sss->specs);
5814 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5815 .function = function,
5816 .weighted = weighted,
5819 .percentile = percentile,
5821 .is_ctables_format = is_ctables_format,
5827 ctables_summary_spec_set_uninit (sss);
5832 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5834 struct ctables_postcompute **pcs = NULL;
5836 size_t allocated_pcs = 0;
5838 while (lex_match (lexer, T_AND))
5840 if (!lex_force_id (lexer))
5842 struct ctables_postcompute *pc
5843 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5846 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5851 if (n_pcs >= allocated_pcs)
5852 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5856 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5858 if (lex_match_id (lexer, "LABEL"))
5860 lex_match (lexer, T_EQUALS);
5861 if (!lex_force_string (lexer))
5864 for (size_t i = 0; i < n_pcs; i++)
5866 free (pcs[i]->label);
5867 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5872 else if (lex_match_id (lexer, "FORMAT"))
5874 lex_match (lexer, T_EQUALS);
5876 struct ctables_summary_spec_set sss;
5877 if (!ctables_parse_pproperties_format (lexer, &sss))
5880 for (size_t i = 0; i < n_pcs; i++)
5883 ctables_summary_spec_set_uninit (pcs[i]->specs);
5885 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5886 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5888 ctables_summary_spec_set_uninit (&sss);
5890 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5892 lex_match (lexer, T_EQUALS);
5893 bool hide_source_cats;
5894 if (!parse_bool (lexer, &hide_source_cats))
5896 for (size_t i = 0; i < n_pcs; i++)
5897 pcs[i]->hide_source_cats = hide_source_cats;
5901 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5914 put_strftime (struct string *out, time_t now, const char *format)
5916 const struct tm *tm = localtime (&now);
5918 strftime (value, sizeof value, format, tm);
5919 ds_put_cstr (out, value);
5923 skip_prefix (struct substring *s, struct substring prefix)
5925 if (ss_starts_with (*s, prefix))
5927 ss_advance (s, prefix.length);
5935 put_table_expression (struct string *out, struct lexer *lexer,
5936 struct dictionary *dict, int expr_start, int expr_end)
5939 for (int ofs = expr_start; ofs < expr_end; ofs++)
5941 const struct token *t = lex_ofs_token (lexer, ofs);
5942 if (t->type == T_LBRACK)
5944 else if (t->type == T_RBRACK && nest > 0)
5950 else if (t->type == T_ID)
5952 const struct variable *var
5953 = dict_lookup_var (dict, t->string.string);
5954 const char *label = var ? var_get_label (var) : NULL;
5955 ds_put_cstr (out, label ? label : t->string.string);
5959 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5960 ds_put_byte (out, ' ');
5962 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5963 ds_put_cstr (out, repr);
5966 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5967 ds_put_byte (out, ' ');
5973 put_title_text (struct string *out, struct substring in, time_t now,
5974 struct lexer *lexer, struct dictionary *dict,
5975 int expr_start, int expr_end)
5979 size_t chunk = ss_find_byte (in, ')');
5980 ds_put_substring (out, ss_head (in, chunk));
5981 ss_advance (&in, chunk);
5982 if (ss_is_empty (in))
5985 if (skip_prefix (&in, ss_cstr (")DATE")))
5986 put_strftime (out, now, "%x");
5987 else if (skip_prefix (&in, ss_cstr (")TIME")))
5988 put_strftime (out, now, "%X");
5989 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5990 put_table_expression (out, lexer, dict, expr_start, expr_end);
5993 ds_put_byte (out, ')');
5994 ss_advance (&in, 1);
6000 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6002 struct casereader *input = NULL;
6004 struct measure_guesser *mg = measure_guesser_create (ds);
6007 input = proc_open (ds);
6008 measure_guesser_run (mg, input);
6009 measure_guesser_destroy (mg);
6012 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6013 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6014 enum settings_value_show tvars = settings_get_show_variables ();
6015 for (size_t i = 0; i < n_vars; i++)
6016 vlabels[i] = (enum ctables_vlabel) tvars;
6018 struct pivot_table_look *look = pivot_table_look_unshare (
6019 pivot_table_look_ref (pivot_table_look_get_default ()));
6020 look->omit_empty = false;
6022 struct ctables *ct = xmalloc (sizeof *ct);
6023 *ct = (struct ctables) {
6024 .dict = dataset_dict (ds),
6026 .ctables_formats = FMT_SETTINGS_INIT,
6028 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6031 time_t now = time (NULL);
6036 const char *dot_string;
6037 const char *comma_string;
6039 static const struct ctf ctfs[4] = {
6040 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6041 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6042 { CTEF_PAREN, "-,(,),", "-.(.)." },
6043 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6045 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6046 for (size_t i = 0; i < 4; i++)
6048 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6049 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6050 fmt_number_style_from_string (s));
6053 if (!lex_force_match (lexer, T_SLASH))
6056 while (!lex_match_id (lexer, "TABLE"))
6058 if (lex_match_id (lexer, "FORMAT"))
6060 double widths[2] = { SYSMIS, SYSMIS };
6061 double units_per_inch = 72.0;
6063 while (lex_token (lexer) != T_SLASH)
6065 if (lex_match_id (lexer, "MINCOLWIDTH"))
6067 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6070 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6072 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6075 else if (lex_match_id (lexer, "UNITS"))
6077 lex_match (lexer, T_EQUALS);
6078 if (lex_match_id (lexer, "POINTS"))
6079 units_per_inch = 72.0;
6080 else if (lex_match_id (lexer, "INCHES"))
6081 units_per_inch = 1.0;
6082 else if (lex_match_id (lexer, "CM"))
6083 units_per_inch = 2.54;
6086 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6090 else if (lex_match_id (lexer, "EMPTY"))
6095 lex_match (lexer, T_EQUALS);
6096 if (lex_match_id (lexer, "ZERO"))
6098 /* Nothing to do. */
6100 else if (lex_match_id (lexer, "BLANK"))
6101 ct->zero = xstrdup ("");
6102 else if (lex_force_string (lexer))
6104 ct->zero = ss_xstrdup (lex_tokss (lexer));
6110 else if (lex_match_id (lexer, "MISSING"))
6112 lex_match (lexer, T_EQUALS);
6113 if (!lex_force_string (lexer))
6117 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6118 ? ss_xstrdup (lex_tokss (lexer))
6124 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6125 "UNITS", "EMPTY", "MISSING");
6130 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6131 && widths[0] > widths[1])
6133 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6137 for (size_t i = 0; i < 2; i++)
6138 if (widths[i] != SYSMIS)
6140 int *wr = ct->look->width_ranges[TABLE_HORZ];
6141 wr[i] = widths[i] / units_per_inch * 96.0;
6146 else if (lex_match_id (lexer, "VLABELS"))
6148 if (!lex_force_match_id (lexer, "VARIABLES"))
6150 lex_match (lexer, T_EQUALS);
6152 struct variable **vars;
6154 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6158 if (!lex_force_match_id (lexer, "DISPLAY"))
6163 lex_match (lexer, T_EQUALS);
6165 enum ctables_vlabel vlabel;
6166 if (lex_match_id (lexer, "DEFAULT"))
6167 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6168 else if (lex_match_id (lexer, "NAME"))
6170 else if (lex_match_id (lexer, "LABEL"))
6171 vlabel = CTVL_LABEL;
6172 else if (lex_match_id (lexer, "BOTH"))
6174 else if (lex_match_id (lexer, "NONE"))
6178 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6184 for (size_t i = 0; i < n_vars; i++)
6185 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6188 else if (lex_match_id (lexer, "MRSETS"))
6190 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6192 lex_match (lexer, T_EQUALS);
6193 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6196 else if (lex_match_id (lexer, "SMISSING"))
6198 if (lex_match_id (lexer, "VARIABLE"))
6199 ct->smissing_listwise = false;
6200 else if (lex_match_id (lexer, "LISTWISE"))
6201 ct->smissing_listwise = true;
6204 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6208 else if (lex_match_id (lexer, "PCOMPUTE"))
6210 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6213 else if (lex_match_id (lexer, "PPROPERTIES"))
6215 if (!ctables_parse_pproperties (lexer, ct))
6218 else if (lex_match_id (lexer, "WEIGHT"))
6220 if (!lex_force_match_id (lexer, "VARIABLE"))
6222 lex_match (lexer, T_EQUALS);
6223 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6227 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6229 if (lex_match_id (lexer, "COUNT"))
6231 lex_match (lexer, T_EQUALS);
6232 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6235 ct->hide_threshold = lex_integer (lexer);
6238 else if (ct->hide_threshold == 0)
6239 ct->hide_threshold = 5;
6243 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6244 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6245 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6249 if (!lex_force_match (lexer, T_SLASH))
6253 size_t allocated_tables = 0;
6256 if (ct->n_tables >= allocated_tables)
6257 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6258 sizeof *ct->tables);
6260 struct ctables_category *cat = xmalloc (sizeof *cat);
6261 *cat = (struct ctables_category) {
6263 .include_missing = false,
6264 .sort_ascending = true,
6267 struct ctables_categories *c = xmalloc (sizeof *c);
6268 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6269 *c = (struct ctables_categories) {
6276 struct ctables_categories **categories = xnmalloc (n_vars,
6277 sizeof *categories);
6278 for (size_t i = 0; i < n_vars; i++)
6281 struct ctables_table *t = xmalloc (sizeof *t);
6282 *t = (struct ctables_table) {
6284 .slabels_axis = PIVOT_AXIS_COLUMN,
6285 .slabels_visible = true,
6286 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6288 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6289 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6290 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6292 .clabels_from_axis = PIVOT_AXIS_LAYER,
6293 .clabels_to_axis = PIVOT_AXIS_LAYER,
6294 .categories = categories,
6295 .n_categories = n_vars,
6298 ct->tables[ct->n_tables++] = t;
6300 lex_match (lexer, T_EQUALS);
6301 int expr_start = lex_ofs (lexer);
6302 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6304 if (lex_match (lexer, T_BY))
6306 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6307 ct, t, PIVOT_AXIS_COLUMN))
6310 if (lex_match (lexer, T_BY))
6312 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6313 ct, t, PIVOT_AXIS_LAYER))
6317 int expr_end = lex_ofs (lexer);
6319 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6320 && !t->axes[PIVOT_AXIS_LAYER])
6322 lex_error (lexer, _("At least one variable must be specified."));
6326 const struct ctables_axis *scales[PIVOT_N_AXES];
6327 size_t n_scales = 0;
6328 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6330 scales[a] = find_scale (t->axes[a]);
6336 msg (SE, _("Scale variables may appear only on one axis."));
6337 if (scales[PIVOT_AXIS_ROW])
6338 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6339 _("This scale variable appears on the rows axis."));
6340 if (scales[PIVOT_AXIS_COLUMN])
6341 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6342 _("This scale variable appears on the columns axis."));
6343 if (scales[PIVOT_AXIS_LAYER])
6344 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6345 _("This scale variable appears on the layer axis."));
6349 const struct ctables_axis *summaries[PIVOT_N_AXES];
6350 size_t n_summaries = 0;
6351 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6353 summaries[a] = (scales[a]
6355 : find_categorical_summary_spec (t->axes[a]));
6359 if (n_summaries > 1)
6361 msg (SE, _("Summaries may appear only on one axis."));
6362 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6365 msg_at (SN, summaries[a]->loc,
6367 ? _("This variable on the rows axis has a summary.")
6368 : a == PIVOT_AXIS_COLUMN
6369 ? _("This variable on the columns axis has a summary.")
6370 : _("This variable on the layers axis has a summary."));
6372 msg_at (SN, summaries[a]->loc,
6373 _("This is a scale variable, so it always has a "
6374 "summary even if the syntax does not explicitly "
6379 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6380 if (n_summaries ? summaries[a] : t->axes[a])
6382 t->summary_axis = a;
6386 if (lex_token (lexer) == T_ENDCMD)
6388 if (!ctables_prepare_table (t))
6392 if (!lex_force_match (lexer, T_SLASH))
6395 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6397 if (lex_match_id (lexer, "SLABELS"))
6399 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6401 if (lex_match_id (lexer, "POSITION"))
6403 lex_match (lexer, T_EQUALS);
6404 if (lex_match_id (lexer, "COLUMN"))
6405 t->slabels_axis = PIVOT_AXIS_COLUMN;
6406 else if (lex_match_id (lexer, "ROW"))
6407 t->slabels_axis = PIVOT_AXIS_ROW;
6408 else if (lex_match_id (lexer, "LAYER"))
6409 t->slabels_axis = PIVOT_AXIS_LAYER;
6412 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6416 else if (lex_match_id (lexer, "VISIBLE"))
6418 lex_match (lexer, T_EQUALS);
6419 if (!parse_bool (lexer, &t->slabels_visible))
6424 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6429 else if (lex_match_id (lexer, "CLABELS"))
6431 if (lex_match_id (lexer, "AUTO"))
6433 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6434 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6436 else if (lex_match_id (lexer, "ROWLABELS"))
6438 lex_match (lexer, T_EQUALS);
6439 if (lex_match_id (lexer, "OPPOSITE"))
6440 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6441 else if (lex_match_id (lexer, "LAYER"))
6442 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6445 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6449 else if (lex_match_id (lexer, "COLLABELS"))
6451 lex_match (lexer, T_EQUALS);
6452 if (lex_match_id (lexer, "OPPOSITE"))
6453 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6454 else if (lex_match_id (lexer, "LAYER"))
6455 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6458 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6464 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6469 else if (lex_match_id (lexer, "CRITERIA"))
6471 if (!lex_force_match_id (lexer, "CILEVEL"))
6473 lex_match (lexer, T_EQUALS);
6475 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6477 t->cilevel = lex_number (lexer);
6480 else if (lex_match_id (lexer, "CATEGORIES"))
6482 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6486 else if (lex_match_id (lexer, "TITLES"))
6491 if (lex_match_id (lexer, "CAPTION"))
6492 textp = &t->caption;
6493 else if (lex_match_id (lexer, "CORNER"))
6495 else if (lex_match_id (lexer, "TITLE"))
6499 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6502 lex_match (lexer, T_EQUALS);
6504 struct string s = DS_EMPTY_INITIALIZER;
6505 while (lex_is_string (lexer))
6507 if (!ds_is_empty (&s))
6508 ds_put_byte (&s, ' ');
6509 put_title_text (&s, lex_tokss (lexer), now,
6510 lexer, dataset_dict (ds),
6511 expr_start, expr_end);
6515 *textp = ds_steal_cstr (&s);
6517 while (lex_token (lexer) != T_SLASH
6518 && lex_token (lexer) != T_ENDCMD);
6520 else if (lex_match_id (lexer, "SIGTEST"))
6524 t->chisq = xmalloc (sizeof *t->chisq);
6525 *t->chisq = (struct ctables_chisq) {
6527 .include_mrsets = true,
6528 .all_visible = true,
6534 if (lex_match_id (lexer, "TYPE"))
6536 lex_match (lexer, T_EQUALS);
6537 if (!lex_force_match_id (lexer, "CHISQUARE"))
6540 else if (lex_match_id (lexer, "ALPHA"))
6542 lex_match (lexer, T_EQUALS);
6543 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6545 t->chisq->alpha = lex_number (lexer);
6548 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6550 lex_match (lexer, T_EQUALS);
6551 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6554 else if (lex_match_id (lexer, "CATEGORIES"))
6556 lex_match (lexer, T_EQUALS);
6557 if (lex_match_id (lexer, "ALLVISIBLE"))
6558 t->chisq->all_visible = true;
6559 else if (lex_match_id (lexer, "SUBTOTALS"))
6560 t->chisq->all_visible = false;
6563 lex_error_expecting (lexer,
6564 "ALLVISIBLE", "SUBTOTALS");
6570 lex_error_expecting (lexer, "TYPE", "ALPHA",
6571 "INCLUDEMRSETS", "CATEGORIES");
6575 while (lex_token (lexer) != T_SLASH
6576 && lex_token (lexer) != T_ENDCMD);
6578 else if (lex_match_id (lexer, "COMPARETEST"))
6582 t->pairwise = xmalloc (sizeof *t->pairwise);
6583 *t->pairwise = (struct ctables_pairwise) {
6585 .alpha = { .05, .05 },
6586 .adjust = BONFERRONI,
6587 .include_mrsets = true,
6588 .meansvariance_allcats = true,
6589 .all_visible = true,
6598 if (lex_match_id (lexer, "TYPE"))
6600 lex_match (lexer, T_EQUALS);
6601 if (lex_match_id (lexer, "PROP"))
6602 t->pairwise->type = PROP;
6603 else if (lex_match_id (lexer, "MEAN"))
6604 t->pairwise->type = MEAN;
6607 lex_error_expecting (lexer, "PROP", "MEAN");
6611 else if (lex_match_id (lexer, "ALPHA"))
6613 lex_match (lexer, T_EQUALS);
6615 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6617 double a0 = lex_number (lexer);
6620 lex_match (lexer, T_COMMA);
6621 if (lex_is_number (lexer))
6623 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6625 double a1 = lex_number (lexer);
6628 t->pairwise->alpha[0] = MIN (a0, a1);
6629 t->pairwise->alpha[1] = MAX (a0, a1);
6632 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6634 else if (lex_match_id (lexer, "ADJUST"))
6636 lex_match (lexer, T_EQUALS);
6637 if (lex_match_id (lexer, "BONFERRONI"))
6638 t->pairwise->adjust = BONFERRONI;
6639 else if (lex_match_id (lexer, "BH"))
6640 t->pairwise->adjust = BH;
6641 else if (lex_match_id (lexer, "NONE"))
6642 t->pairwise->adjust = 0;
6645 lex_error_expecting (lexer, "BONFERRONI", "BH",
6650 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6652 lex_match (lexer, T_EQUALS);
6653 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6656 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6658 lex_match (lexer, T_EQUALS);
6659 if (lex_match_id (lexer, "ALLCATS"))
6660 t->pairwise->meansvariance_allcats = true;
6661 else if (lex_match_id (lexer, "TESTEDCATS"))
6662 t->pairwise->meansvariance_allcats = false;
6665 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6669 else if (lex_match_id (lexer, "CATEGORIES"))
6671 lex_match (lexer, T_EQUALS);
6672 if (lex_match_id (lexer, "ALLVISIBLE"))
6673 t->pairwise->all_visible = true;
6674 else if (lex_match_id (lexer, "SUBTOTALS"))
6675 t->pairwise->all_visible = false;
6678 lex_error_expecting (lexer, "ALLVISIBLE",
6683 else if (lex_match_id (lexer, "MERGE"))
6685 lex_match (lexer, T_EQUALS);
6686 if (!parse_bool (lexer, &t->pairwise->merge))
6689 else if (lex_match_id (lexer, "STYLE"))
6691 lex_match (lexer, T_EQUALS);
6692 if (lex_match_id (lexer, "APA"))
6693 t->pairwise->apa_style = true;
6694 else if (lex_match_id (lexer, "SIMPLE"))
6695 t->pairwise->apa_style = false;
6698 lex_error_expecting (lexer, "APA", "SIMPLE");
6702 else if (lex_match_id (lexer, "SHOWSIG"))
6704 lex_match (lexer, T_EQUALS);
6705 if (!parse_bool (lexer, &t->pairwise->show_sig))
6710 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6711 "INCLUDEMRSETS", "MEANSVARIANCE",
6712 "CATEGORIES", "MERGE", "STYLE",
6717 while (lex_token (lexer) != T_SLASH
6718 && lex_token (lexer) != T_ENDCMD);
6722 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6723 "CRITERIA", "CATEGORIES", "TITLES",
6724 "SIGTEST", "COMPARETEST");
6728 if (!lex_match (lexer, T_SLASH))
6732 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6734 t->clabels_from_axis = PIVOT_AXIS_ROW;
6735 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6737 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6741 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6742 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6743 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6745 if (!ctables_prepare_table (t))
6748 while (lex_token (lexer) != T_ENDCMD);
6751 input = proc_open (ds);
6752 bool ok = ctables_execute (ds, input, ct);
6753 ok = proc_commit (ds) && ok;
6755 ctables_destroy (ct);
6756 return ok ? CMD_SUCCESS : CMD_FAILURE;
6761 ctables_destroy (ct);