1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_function_type
68 /* A function that operates on data in a single cell. The function does
69 not have an unweighted version. */
72 /* A function that operates on data in a single cell. The function has an
73 unweighted version. */
76 /* A function that operates on an area of cells. The function has an
77 unweighted version. */
88 enum ctables_function_availability
90 CTFA_ALL, /* Any variables. */
91 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
92 //CTFA_MRSETS, /* Only multiple-response sets */
95 enum ctables_summary_function
97 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
98 #include "ctables.inc"
103 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
105 #include "ctables.inc"
109 struct ctables_function_info
111 struct substring basename;
112 enum ctables_function_type type;
113 enum ctables_format format;
114 enum ctables_function_availability availability;
116 bool may_be_unweighted;
119 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
120 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
122 .basename = SS_LITERAL_INITIALIZER (NAME), \
125 .availability = AVAILABILITY, \
126 .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA, \
127 .is_area = (TYPE) == CTFT_AREA \
129 #include "ctables.inc"
133 static bool ctables_summary_function_is_count (enum ctables_summary_function);
135 enum ctables_area_type
137 /* Within a section, where stacked variables divide one section from
140 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
141 parse_ctables_summary_function() parses correctly. */
142 CTAT_TABLE, /* All layers of a whole section. */
143 CTAT_LAYERROW, /* Row in one layer within a section. */
144 CTAT_LAYERCOL, /* Column in one layer within a section. */
145 CTAT_LAYER, /* One layer within a section. */
147 /* Within a subtable, where a subtable pairs an innermost row variable with
148 an innermost column variable within a single layer. */
149 CTAT_SUBTABLE, /* Whole subtable. */
150 CTAT_ROW, /* Row within a subtable. */
151 CTAT_COL, /* Column within a subtable. */
155 static const char *ctables_area_type_name[N_CTATS] = {
156 [CTAT_TABLE] = "TABLE",
157 [CTAT_LAYER] = "LAYER",
158 [CTAT_LAYERROW] = "LAYERROW",
159 [CTAT_LAYERCOL] = "LAYERCOL",
160 [CTAT_SUBTABLE] = "SUBTABLE",
167 struct hmap_node node;
169 const struct ctables_cell *example;
172 double d_valid; /* Dictionary weight. */
175 double e_valid; /* Effective weight */
178 double u_valid; /* Unweighted. */
181 struct ctables_sum *sums;
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The areas that contain this cell. */
205 struct ctables_area *areas[N_CTATS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
231 const struct dictionary *dict;
232 struct pivot_table_look *look;
234 /* CTABLES has a number of extra formats that we implement via custom
235 currency specifications on an alternate fmt_settings. */
236 #define CTEF_NEGPAREN FMT_CCA
237 #define CTEF_NEQUAL FMT_CCB
238 #define CTEF_PAREN FMT_CCC
239 #define CTEF_PCTPAREN FMT_CCD
240 struct fmt_settings ctables_formats;
242 /* If this is NULL, zeros are displayed using the normal print format.
243 Otherwise, this string is displayed. */
246 /* If this is NULL, missing values are displayed using the normal print
247 format. Otherwise, this string is displayed. */
250 /* Indexed by variable dictionary index. */
251 enum ctables_vlabel *vlabels;
253 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
255 bool mrsets_count_duplicates; /* MRSETS. */
256 bool smissing_listwise; /* SMISSING. */
257 struct variable *e_weight; /* WEIGHT. */
258 int hide_threshold; /* HIDESMALLCOUNTS. */
260 struct ctables_table **tables;
264 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
267 struct ctables_postcompute
269 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
270 char *name; /* Name, without leading &. */
272 struct msg_location *location; /* Location of definition. */
273 struct ctables_pcexpr *expr;
275 struct ctables_summary_spec_set *specs;
276 bool hide_source_cats;
279 struct ctables_pcexpr
289 enum ctables_postcompute_op
292 CTPO_CONSTANT, /* 5 */
293 CTPO_CAT_NUMBER, /* [5] */
294 CTPO_CAT_STRING, /* ["STRING"] */
295 CTPO_CAT_NRANGE, /* [LO THRU 5] */
296 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
297 CTPO_CAT_MISSING, /* MISSING */
298 CTPO_CAT_OTHERNM, /* OTHERNM */
299 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
300 CTPO_CAT_TOTAL, /* TOTAL */
314 /* CTPO_CAT_NUMBER. */
317 /* CTPO_CAT_STRING, in dictionary encoding. */
318 struct substring string;
320 /* CTPO_CAT_NRANGE. */
323 /* CTPO_CAT_SRANGE. */
324 struct substring srange[2];
326 /* CTPO_CAT_SUBTOTAL. */
327 size_t subtotal_index;
329 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
330 One element: CTPO_NEG. */
331 struct ctables_pcexpr *subs[2];
334 /* Source location. */
335 struct msg_location *location;
338 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
339 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
340 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
341 struct ctables_pcexpr *sub1);
343 struct ctables_summary_spec_set
345 struct ctables_summary_spec *specs;
349 /* The variable to which the summary specs are applied. */
350 struct variable *var;
352 /* Whether the variable to which the summary specs are applied is a scale
353 variable for the purpose of summarization.
355 (VALIDN and TOTALN act differently for summarizing scale and categorical
359 /* If any of these optional additional scale variables are missing, then
360 treat 'var' as if it's missing too. This is for implementing
361 SMISSING=LISTWISE. */
362 struct variable **listwise_vars;
363 size_t n_listwise_vars;
366 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
367 const struct ctables_summary_spec_set *);
368 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
370 /* A nested sequence of variables, e.g. a > b > c. */
373 struct variable **vars;
376 size_t *areas[N_CTATS];
377 size_t n_areas[N_CTATS];
380 struct ctables_summary_spec_set specs[N_CSVS];
383 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
386 struct ctables_nest *nests;
390 static void ctables_stack_uninit (struct ctables_stack *);
394 struct hmap_node node;
399 struct ctables_occurrence
401 struct hmap_node node;
405 struct ctables_section
408 struct ctables_table *table;
409 struct ctables_nest *nests[PIVOT_N_AXES];
412 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
413 struct hmap cells; /* Contains "struct ctables_cell"s. */
414 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
417 static void ctables_section_uninit (struct ctables_section *);
421 struct ctables *ctables;
422 struct ctables_axis *axes[PIVOT_N_AXES];
423 struct ctables_stack stacks[PIVOT_N_AXES];
424 struct ctables_section *sections;
426 enum pivot_axis_type summary_axis;
427 struct ctables_summary_spec_set summary_specs;
428 struct variable **sum_vars;
431 enum pivot_axis_type slabels_axis;
432 bool slabels_visible;
434 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
436 Most commonly, label_axis[a] == a, and in particular we always have
437 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
439 If ROWLABELS or COLLABELS is specified, then one of
440 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
441 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
443 If any category labels are moved, then 'clabels_example' is one of the
444 variables being moved (and it is otherwise NULL). All of the variables
445 being moved have the same width, value labels, and categories, so this
446 example variable can be used to find those out.
448 The remaining members in this group are relevant only if category labels
451 'clabels_values_map' holds a "struct ctables_value" for all the values
452 that appear in all of the variables in the moved categories. It is
453 accumulated as the data is read. Once the data is fully read, its
454 sorted values are put into 'clabels_values' and 'n_clabels_values'.
456 enum pivot_axis_type label_axis[PIVOT_N_AXES];
457 enum pivot_axis_type clabels_from_axis;
458 enum pivot_axis_type clabels_to_axis;
459 const struct variable *clabels_example;
460 struct hmap clabels_values_map;
461 struct ctables_value **clabels_values;
462 size_t n_clabels_values;
464 /* Indexed by variable dictionary index. */
465 struct ctables_categories **categories;
474 struct ctables_chisq *chisq;
475 struct ctables_pairwise *pairwise;
478 struct ctables_categories
481 struct ctables_category *cats;
486 struct ctables_category
488 enum ctables_category_type
490 /* Explicit category lists. */
493 CCT_NRANGE, /* Numerical range. */
494 CCT_SRANGE, /* String range. */
499 /* Totals and subtotals. */
503 /* Implicit category lists. */
508 /* For contributing to TOTALN. */
509 CCT_EXCLUDED_MISSING,
513 struct ctables_category *subtotal;
519 double number; /* CCT_NUMBER. */
520 struct substring string; /* CCT_STRING, in dictionary encoding. */
521 double nrange[2]; /* CCT_NRANGE. */
522 struct substring srange[2]; /* CCT_SRANGE. */
526 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
527 bool hide_subcategories; /* CCT_SUBTOTAL. */
530 /* CCT_POSTCOMPUTE. */
533 const struct ctables_postcompute *pc;
534 enum fmt_type parse_format;
537 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
540 bool include_missing;
544 enum ctables_summary_function sort_function;
546 enum ctables_area_type area;
547 struct variable *sort_var;
552 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
553 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
554 struct msg_location *location;
558 ctables_category_uninit (struct ctables_category *cat)
563 msg_location_destroy (cat->location);
570 case CCT_POSTCOMPUTE:
574 ss_dealloc (&cat->string);
578 ss_dealloc (&cat->srange[0]);
579 ss_dealloc (&cat->srange[1]);
584 free (cat->total_label);
592 case CCT_EXCLUDED_MISSING:
598 nullable_substring_equal (const struct substring *a,
599 const struct substring *b)
601 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
605 ctables_category_equal (const struct ctables_category *a,
606 const struct ctables_category *b)
608 if (a->type != b->type)
614 return a->number == b->number;
617 return ss_equals (a->string, b->string);
620 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
623 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
624 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
630 case CCT_POSTCOMPUTE:
631 return a->pc == b->pc;
635 return !strcmp (a->total_label, b->total_label);
640 return (a->include_missing == b->include_missing
641 && a->sort_ascending == b->sort_ascending
642 && a->sort_function == b->sort_function
643 && a->sort_var == b->sort_var
644 && a->percentile == b->percentile);
646 case CCT_EXCLUDED_MISSING:
654 ctables_categories_unref (struct ctables_categories *c)
659 assert (c->n_refs > 0);
663 for (size_t i = 0; i < c->n_cats; i++)
664 ctables_category_uninit (&c->cats[i]);
670 ctables_categories_equal (const struct ctables_categories *a,
671 const struct ctables_categories *b)
673 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
676 for (size_t i = 0; i < a->n_cats; i++)
677 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
683 /* Chi-square test (SIGTEST). */
691 /* Pairwise comparison test (COMPARETEST). */
692 struct ctables_pairwise
694 enum { PROP, MEAN } type;
697 bool meansvariance_allcats;
699 enum { BONFERRONI = 1, BH } adjust;
723 struct variable *var;
725 struct ctables_summary_spec_set specs[N_CSVS];
729 struct ctables_axis *subs[2];
732 struct msg_location *loc;
735 static void ctables_axis_destroy (struct ctables_axis *);
737 struct ctables_summary_spec
739 /* The calculation to be performed.
741 'function' is the function to calculate. 'weighted' specifies whether
742 to use weighted or unweighted data (for functions that do not support a
743 choice, it must be true). 'calc_area' is the area over which the
744 calculation takes place (for functions that target only an individual
745 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
746 percentile between 0 and 100 (for other functions it must be 0). */
747 enum ctables_summary_function function;
749 enum ctables_area_type calc_area;
750 double percentile; /* CTSF_PTILE only. */
752 /* How to display the result of the calculation.
754 'label' is a user-specified label, NULL if the user didn't specify
757 'user_area' is usually the same as 'calc_area', but when category labels
758 are rotated from one axis to another it swaps rows and columns.
760 'format' is the format for displaying the output. If
761 'is_ctables_format' is true, then 'format.type' is one of the special
762 CTEF_* formats instead of the standard ones. */
764 enum ctables_area_type user_area;
765 struct fmt_spec format;
766 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
773 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
774 const struct ctables_summary_spec *src)
777 dst->label = xstrdup_if_nonnull (src->label);
781 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
788 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
789 const struct ctables_summary_spec_set *src)
791 struct ctables_summary_spec *specs
792 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
793 for (size_t i = 0; i < src->n; i++)
794 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
796 *dst = (struct ctables_summary_spec_set) {
801 .is_scale = src->is_scale,
806 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
808 for (size_t i = 0; i < set->n; i++)
809 ctables_summary_spec_uninit (&set->specs[i]);
810 free (set->listwise_vars);
815 parse_col_width (struct lexer *lexer, const char *name, double *width)
817 lex_match (lexer, T_EQUALS);
818 if (lex_match_id (lexer, "DEFAULT"))
820 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
822 *width = lex_number (lexer);
832 parse_bool (struct lexer *lexer, bool *b)
834 if (lex_match_id (lexer, "NO"))
836 else if (lex_match_id (lexer, "YES"))
840 lex_error_expecting (lexer, "YES", "NO");
846 static enum ctables_function_availability
847 ctables_function_availability (enum ctables_summary_function f)
849 static enum ctables_function_availability availability[] = {
850 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
851 #include "ctables.inc"
855 return availability[f];
859 ctables_summary_function_is_count (enum ctables_summary_function f)
861 return f == CTSF_COUNT || f == CTSF_ECOUNT;
865 parse_ctables_summary_function (struct lexer *lexer,
866 enum ctables_summary_function *function,
868 enum ctables_area_type *area)
870 if (!lex_force_id (lexer))
873 struct substring name = lex_tokss (lexer);
874 *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'));
876 bool has_area = false;
878 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
879 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
884 if (ss_equals_case (name, ss_cstr ("PCT")))
886 /* Special case where .COUNT suffix is omitted. */
887 *function = CTSF_areaPCT_COUNT;
894 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
896 const struct ctables_function_info *cfi = &ctables_function_info[f];
897 if (ss_equals_case (cfi->basename, name))
900 if (!*weighted && !cfi->may_be_unweighted)
902 if (has_area != cfi->is_area)
910 lex_error (lexer, _("Expecting summary function name."));
915 ctables_axis_destroy (struct ctables_axis *axis)
923 for (size_t i = 0; i < N_CSVS; i++)
924 ctables_summary_spec_set_uninit (&axis->specs[i]);
929 ctables_axis_destroy (axis->subs[0]);
930 ctables_axis_destroy (axis->subs[1]);
933 msg_location_destroy (axis->loc);
937 static struct ctables_axis *
938 ctables_axis_new_nonterminal (enum ctables_axis_op op,
939 struct ctables_axis *sub0,
940 struct ctables_axis *sub1,
941 struct lexer *lexer, int start_ofs)
943 struct ctables_axis *axis = xmalloc (sizeof *axis);
944 *axis = (struct ctables_axis) {
946 .subs = { sub0, sub1 },
947 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
952 struct ctables_axis_parse_ctx
955 struct dictionary *dict;
957 struct ctables_table *t;
960 static struct fmt_spec
961 ctables_summary_default_format (enum ctables_summary_function function,
962 const struct variable *var)
964 static const enum ctables_format default_formats[] = {
965 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
966 #include "ctables.inc"
969 switch (default_formats[function])
972 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
975 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
978 return *var_get_print_format (var);
986 ctables_summary_label__ (const struct ctables_summary_spec *spec)
988 bool w = spec->weighted;
989 enum ctables_area_type a = spec->user_area;
990 switch (spec->function)
993 return w ? N_("Count") : N_("Unweighted Count");
996 return N_("Adjusted Count");
998 case CTSF_areaPCT_COUNT:
1001 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1002 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1003 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1004 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1005 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1006 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1007 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1011 case CTSF_areaPCT_VALIDN:
1014 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1015 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1016 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1017 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1018 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1019 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1020 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1024 case CTSF_areaPCT_TOTALN:
1027 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1028 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1029 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1030 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1031 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1032 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1033 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1037 case CTSF_MAXIMUM: return N_("Maximum");
1038 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1039 case CTSF_MEDIAN: return N_("Median");
1040 case CTSF_MINIMUM: return N_("Minimum");
1041 case CTSF_MISSING: return N_("Missing");
1042 case CTSF_MODE: return N_("Mode");
1043 case CTSF_PTILE: NOT_REACHED ();
1044 case CTSF_RANGE: return N_("Range");
1045 case CTSF_SEMEAN: return N_("Std Error of Mean");
1046 case CTSF_STDDEV: return N_("Std Deviation");
1047 case CTSF_SUM: return N_("Sum");
1048 case CTSF_TOTALN: return N_("Total N");
1049 case CTSF_ETOTALN: return N_("Adjusted Total N");
1050 case CTSF_VALIDN: return N_("Valid N");
1051 case CTSF_EVALIDN: return N_("Adjusted Valid N");
1052 case CTSF_VARIANCE: return N_("Variance");
1053 case CTSF_areaPCT_SUM:
1056 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1057 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1058 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1059 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1060 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1061 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1062 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1069 /* Don't bother translating these: they are for developers only. */
1070 case CTAT_TABLE: return "Table ID";
1071 case CTAT_LAYER: return "Layer ID";
1072 case CTAT_LAYERROW: return "Layer Row ID";
1073 case CTAT_LAYERCOL: return "Layer Column ID";
1074 case CTAT_SUBTABLE: return "Subtable ID";
1075 case CTAT_ROW: return "Row ID";
1076 case CTAT_COL: return "Column ID";
1084 static struct pivot_value *
1085 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1089 if (spec->function == CTSF_PTILE)
1091 double p = spec->percentile;
1092 char *s = (spec->weighted
1093 ? xasprintf (_("Percentile %.2f"), p)
1094 : xasprintf (_("Unweighted Percentile %.2f"), p));
1095 return pivot_value_new_user_text_nocopy (s);
1098 return pivot_value_new_text (ctables_summary_label__ (spec));
1102 struct substring in = ss_cstr (spec->label);
1103 struct substring target = ss_cstr (")CILEVEL");
1105 struct string out = DS_EMPTY_INITIALIZER;
1108 size_t chunk = ss_find_substring (in, target);
1109 ds_put_substring (&out, ss_head (in, chunk));
1110 ss_advance (&in, chunk);
1112 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1114 ss_advance (&in, target.length);
1115 ds_put_format (&out, "%g", cilevel);
1121 ctables_summary_function_name (enum ctables_summary_function function,
1123 enum ctables_area_type area,
1124 char *buffer, size_t bufsize)
1126 const struct ctables_function_info *cfi = &ctables_function_info[function];
1127 snprintf (buffer, bufsize, "%s%s%s",
1128 weighted ? "" : "U",
1129 cfi->is_area ? ctables_area_type_name[area] : "",
1130 cfi->basename.string);
1135 add_summary_spec (struct ctables_axis *axis,
1136 enum ctables_summary_function function, bool weighted,
1137 enum ctables_area_type area, double percentile,
1138 const char *label, const struct fmt_spec *format,
1139 bool is_ctables_format, const struct msg_location *loc,
1140 enum ctables_summary_variant sv)
1142 if (axis->op == CTAO_VAR)
1144 char function_name[128];
1145 ctables_summary_function_name (function, weighted, area,
1146 function_name, sizeof function_name);
1147 const char *var_name = var_get_name (axis->var);
1148 switch (ctables_function_availability (function))
1152 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1153 "response sets."), function_name);
1154 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1160 if (!axis->scale && sv != CSV_TOTAL)
1163 _("Summary function %s applies only to scale variables."),
1165 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1175 struct ctables_summary_spec_set *set = &axis->specs[sv];
1176 if (set->n >= set->allocated)
1177 set->specs = x2nrealloc (set->specs, &set->allocated,
1178 sizeof *set->specs);
1180 struct ctables_summary_spec *dst = &set->specs[set->n++];
1181 *dst = (struct ctables_summary_spec) {
1182 .function = function,
1183 .weighted = weighted,
1186 .percentile = percentile,
1187 .label = xstrdup_if_nonnull (label),
1188 .format = (format ? *format
1189 : ctables_summary_default_format (function, axis->var)),
1190 .is_ctables_format = is_ctables_format,
1196 for (size_t i = 0; i < 2; i++)
1197 if (!add_summary_spec (axis->subs[i], function, weighted, area,
1198 percentile, label, format, is_ctables_format,
1205 static struct ctables_axis *ctables_axis_parse_stack (
1206 struct ctables_axis_parse_ctx *);
1209 static struct ctables_axis *
1210 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1212 if (lex_match (ctx->lexer, T_LPAREN))
1214 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1215 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1217 ctables_axis_destroy (sub);
1223 if (!lex_force_id (ctx->lexer))
1226 int start_ofs = lex_ofs (ctx->lexer);
1227 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1231 struct ctables_axis *axis = xmalloc (sizeof *axis);
1232 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1234 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1235 : lex_match_phrase (ctx->lexer, "[C]") ? false
1236 : var_get_measure (var) == MEASURE_SCALE);
1237 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1238 lex_ofs (ctx->lexer) - 1);
1239 if (axis->scale && var_is_alpha (var))
1241 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1243 var_get_name (var));
1244 ctables_axis_destroy (axis);
1252 has_digit (const char *s)
1254 return s[strcspn (s, "0123456789")] != '\0';
1258 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1259 bool *is_ctables_format)
1261 char type[FMT_TYPE_LEN_MAX + 1];
1262 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1265 if (!strcasecmp (type, "NEGPAREN"))
1266 format->type = CTEF_NEGPAREN;
1267 else if (!strcasecmp (type, "NEQUAL"))
1268 format->type = CTEF_NEQUAL;
1269 else if (!strcasecmp (type, "PAREN"))
1270 format->type = CTEF_PAREN;
1271 else if (!strcasecmp (type, "PCTPAREN"))
1272 format->type = CTEF_PCTPAREN;
1275 *is_ctables_format = false;
1276 return (parse_format_specifier (lexer, format)
1277 && fmt_check_output (format)
1278 && fmt_check_type_compat (format, VAL_NUMERIC));
1284 lex_next_error (lexer, -1, -1,
1285 _("Output format %s requires width 2 or greater."), type);
1288 else if (format->d > format->w - 1)
1290 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1291 "greater than decimals."), type);
1296 *is_ctables_format = true;
1301 static struct ctables_axis *
1302 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1304 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1305 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1308 enum ctables_summary_variant sv = CSV_CELL;
1311 int start_ofs = lex_ofs (ctx->lexer);
1313 /* Parse function. */
1314 enum ctables_summary_function function;
1316 enum ctables_area_type area;
1317 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted,
1321 /* Parse percentile. */
1322 double percentile = 0;
1323 if (function == CTSF_PTILE)
1325 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1327 percentile = lex_number (ctx->lexer);
1328 lex_get (ctx->lexer);
1333 if (lex_is_string (ctx->lexer))
1335 label = ss_xstrdup (lex_tokss (ctx->lexer));
1336 lex_get (ctx->lexer);
1340 struct fmt_spec format;
1341 const struct fmt_spec *formatp;
1342 bool is_ctables_format = false;
1343 if (lex_token (ctx->lexer) == T_ID
1344 && has_digit (lex_tokcstr (ctx->lexer)))
1346 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1347 &is_ctables_format))
1357 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1358 lex_ofs (ctx->lexer) - 1);
1359 add_summary_spec (sub, function, weighted, area, percentile, label,
1360 formatp, is_ctables_format, loc, sv);
1362 msg_location_destroy (loc);
1364 lex_match (ctx->lexer, T_COMMA);
1365 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1367 if (!lex_force_match (ctx->lexer, T_LBRACK))
1371 else if (lex_match (ctx->lexer, T_RBRACK))
1373 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1380 ctables_axis_destroy (sub);
1384 static const struct ctables_axis *
1385 find_scale (const struct ctables_axis *axis)
1389 else if (axis->op == CTAO_VAR)
1390 return axis->scale ? axis : NULL;
1393 for (size_t i = 0; i < 2; i++)
1395 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1403 static const struct ctables_axis *
1404 find_categorical_summary_spec (const struct ctables_axis *axis)
1408 else if (axis->op == CTAO_VAR)
1409 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1412 for (size_t i = 0; i < 2; i++)
1414 const struct ctables_axis *sum
1415 = find_categorical_summary_spec (axis->subs[i]);
1423 static struct ctables_axis *
1424 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1426 int start_ofs = lex_ofs (ctx->lexer);
1427 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1431 while (lex_match (ctx->lexer, T_GT))
1433 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1436 ctables_axis_destroy (lhs);
1440 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1441 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1443 const struct ctables_axis *outer_scale = find_scale (lhs);
1444 const struct ctables_axis *inner_scale = find_scale (rhs);
1445 if (outer_scale && inner_scale)
1447 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1448 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1449 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1450 ctables_axis_destroy (nest);
1454 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1457 msg_at (SE, nest->loc,
1458 _("Summaries may only be requested for categorical variables "
1459 "at the innermost nesting level."));
1460 msg_at (SN, outer_sum->loc,
1461 _("This outer categorical variable has a summary."));
1462 ctables_axis_destroy (nest);
1472 static struct ctables_axis *
1473 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1475 int start_ofs = lex_ofs (ctx->lexer);
1476 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1480 while (lex_match (ctx->lexer, T_PLUS))
1482 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1485 ctables_axis_destroy (lhs);
1489 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1490 ctx->lexer, start_ofs);
1497 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1498 struct ctables *ct, struct ctables_table *t,
1499 enum pivot_axis_type a)
1501 if (lex_token (lexer) == T_BY
1502 || lex_token (lexer) == T_SLASH
1503 || lex_token (lexer) == T_ENDCMD)
1506 struct ctables_axis_parse_ctx ctx = {
1512 t->axes[a] = ctables_axis_parse_stack (&ctx);
1513 return t->axes[a] != NULL;
1517 ctables_chisq_destroy (struct ctables_chisq *chisq)
1523 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1529 ctables_table_destroy (struct ctables_table *t)
1534 for (size_t i = 0; i < t->n_sections; i++)
1535 ctables_section_uninit (&t->sections[i]);
1538 for (size_t i = 0; i < t->n_categories; i++)
1539 ctables_categories_unref (t->categories[i]);
1540 free (t->categories);
1542 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1544 ctables_axis_destroy (t->axes[a]);
1545 ctables_stack_uninit (&t->stacks[a]);
1547 free (t->summary_specs.specs);
1549 struct ctables_value *ctv, *next_ctv;
1550 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1551 &t->clabels_values_map)
1553 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1554 hmap_delete (&t->clabels_values_map, &ctv->node);
1557 hmap_destroy (&t->clabels_values_map);
1558 free (t->clabels_values);
1564 ctables_chisq_destroy (t->chisq);
1565 ctables_pairwise_destroy (t->pairwise);
1570 ctables_destroy (struct ctables *ct)
1575 struct ctables_postcompute *pc, *next_pc;
1576 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1580 msg_location_destroy (pc->location);
1581 ctables_pcexpr_destroy (pc->expr);
1585 ctables_summary_spec_set_uninit (pc->specs);
1588 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1591 hmap_destroy (&ct->postcomputes);
1593 fmt_settings_uninit (&ct->ctables_formats);
1594 pivot_table_look_unref (ct->look);
1598 for (size_t i = 0; i < ct->n_tables; i++)
1599 ctables_table_destroy (ct->tables[i]);
1604 static struct ctables_category
1605 cct_nrange (double low, double high)
1607 return (struct ctables_category) {
1609 .nrange = { low, high }
1613 static struct ctables_category
1614 cct_srange (struct substring low, struct substring high)
1616 return (struct ctables_category) {
1618 .srange = { low, high }
1623 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1624 struct ctables_category *cat)
1627 if (lex_match (lexer, T_EQUALS))
1629 if (!lex_force_string (lexer))
1632 total_label = ss_xstrdup (lex_tokss (lexer));
1636 total_label = xstrdup (_("Subtotal"));
1638 *cat = (struct ctables_category) {
1639 .type = CCT_SUBTOTAL,
1640 .hide_subcategories = hide_subcategories,
1641 .total_label = total_label
1646 static struct substring
1647 parse_substring (struct lexer *lexer, struct dictionary *dict)
1649 struct substring s = recode_substring_pool (
1650 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1651 ss_rtrim (&s, ss_cstr (" "));
1657 ctables_table_parse_explicit_category (struct lexer *lexer,
1658 struct dictionary *dict,
1660 struct ctables_category *cat)
1662 if (lex_match_id (lexer, "OTHERNM"))
1663 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1664 else if (lex_match_id (lexer, "MISSING"))
1665 *cat = (struct ctables_category) { .type = CCT_MISSING };
1666 else if (lex_match_id (lexer, "SUBTOTAL"))
1667 return ctables_table_parse_subtotal (lexer, false, cat);
1668 else if (lex_match_id (lexer, "HSUBTOTAL"))
1669 return ctables_table_parse_subtotal (lexer, true, cat);
1670 else if (lex_match_id (lexer, "LO"))
1672 if (!lex_force_match_id (lexer, "THRU"))
1674 if (lex_is_string (lexer))
1676 struct substring sr0 = { .string = NULL };
1677 struct substring sr1 = parse_substring (lexer, dict);
1678 *cat = cct_srange (sr0, sr1);
1680 else if (lex_force_num (lexer))
1682 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1688 else if (lex_is_number (lexer))
1690 double number = lex_number (lexer);
1692 if (lex_match_id (lexer, "THRU"))
1694 if (lex_match_id (lexer, "HI"))
1695 *cat = cct_nrange (number, DBL_MAX);
1698 if (!lex_force_num (lexer))
1700 *cat = cct_nrange (number, lex_number (lexer));
1705 *cat = (struct ctables_category) {
1710 else if (lex_is_string (lexer))
1712 struct substring s = parse_substring (lexer, dict);
1713 if (lex_match_id (lexer, "THRU"))
1715 if (lex_match_id (lexer, "HI"))
1717 struct substring sr1 = { .string = NULL };
1718 *cat = cct_srange (s, sr1);
1722 if (!lex_force_string (lexer))
1727 struct substring sr1 = parse_substring (lexer, dict);
1728 *cat = cct_srange (s, sr1);
1732 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1734 else if (lex_match (lexer, T_AND))
1736 if (!lex_force_id (lexer))
1738 struct ctables_postcompute *pc = ctables_find_postcompute (
1739 ct, lex_tokcstr (lexer));
1742 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1743 msg_at (SE, loc, _("Unknown postcompute &%s."),
1744 lex_tokcstr (lexer));
1745 msg_location_destroy (loc);
1750 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1754 lex_error (lexer, NULL);
1762 parse_category_string (struct msg_location *location,
1763 struct substring s, const struct dictionary *dict,
1764 enum fmt_type format, double *n)
1767 char *error = data_in (s, dict_get_encoding (dict), format,
1768 settings_get_fmt_settings (), &v, 0, NULL);
1771 msg_at (SE, location,
1772 _("Failed to parse category specification as format %s: %s."),
1773 fmt_name (format), error);
1782 static struct ctables_category *
1783 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1784 const struct ctables_pcexpr *e)
1786 struct ctables_category *best = NULL;
1787 size_t n_subtotals = 0;
1788 for (size_t i = 0; i < cats->n_cats; i++)
1790 struct ctables_category *cat = &cats->cats[i];
1793 case CTPO_CAT_NUMBER:
1794 if (cat->type == CCT_NUMBER && cat->number == e->number)
1798 case CTPO_CAT_STRING:
1799 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1803 case CTPO_CAT_NRANGE:
1804 if (cat->type == CCT_NRANGE
1805 && cat->nrange[0] == e->nrange[0]
1806 && cat->nrange[1] == e->nrange[1])
1810 case CTPO_CAT_SRANGE:
1811 if (cat->type == CCT_SRANGE
1812 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1813 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1817 case CTPO_CAT_MISSING:
1818 if (cat->type == CCT_MISSING)
1822 case CTPO_CAT_OTHERNM:
1823 if (cat->type == CCT_OTHERNM)
1827 case CTPO_CAT_SUBTOTAL:
1828 if (cat->type == CCT_SUBTOTAL)
1831 if (e->subtotal_index == n_subtotals)
1833 else if (e->subtotal_index == 0)
1838 case CTPO_CAT_TOTAL:
1839 if (cat->type == CCT_TOTAL)
1853 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1858 static struct ctables_category *
1859 ctables_find_category_for_postcompute (const struct dictionary *dict,
1860 const struct ctables_categories *cats,
1861 enum fmt_type parse_format,
1862 const struct ctables_pcexpr *e)
1864 if (parse_format != FMT_F)
1866 if (e->op == CTPO_CAT_STRING)
1869 if (!parse_category_string (e->location, e->string, dict,
1870 parse_format, &number))
1873 struct ctables_pcexpr e2 = {
1874 .op = CTPO_CAT_NUMBER,
1876 .location = e->location,
1878 return ctables_find_category_for_postcompute__ (cats, &e2);
1880 else if (e->op == CTPO_CAT_SRANGE)
1883 if (!e->srange[0].string)
1884 nrange[0] = -DBL_MAX;
1885 else if (!parse_category_string (e->location, e->srange[0], dict,
1886 parse_format, &nrange[0]))
1889 if (!e->srange[1].string)
1890 nrange[1] = DBL_MAX;
1891 else if (!parse_category_string (e->location, e->srange[1], dict,
1892 parse_format, &nrange[1]))
1895 struct ctables_pcexpr e2 = {
1896 .op = CTPO_CAT_NRANGE,
1897 .nrange = { nrange[0], nrange[1] },
1898 .location = e->location,
1900 return ctables_find_category_for_postcompute__ (cats, &e2);
1903 return ctables_find_category_for_postcompute__ (cats, e);
1907 ctables_recursive_check_postcompute (struct dictionary *dict,
1908 const struct ctables_pcexpr *e,
1909 struct ctables_category *pc_cat,
1910 const struct ctables_categories *cats,
1911 const struct msg_location *cats_location)
1915 case CTPO_CAT_NUMBER:
1916 case CTPO_CAT_STRING:
1917 case CTPO_CAT_NRANGE:
1918 case CTPO_CAT_SRANGE:
1919 case CTPO_CAT_MISSING:
1920 case CTPO_CAT_OTHERNM:
1921 case CTPO_CAT_SUBTOTAL:
1922 case CTPO_CAT_TOTAL:
1924 struct ctables_category *cat = ctables_find_category_for_postcompute (
1925 dict, cats, pc_cat->parse_format, e);
1928 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1930 size_t n_subtotals = 0;
1931 for (size_t i = 0; i < cats->n_cats; i++)
1932 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1933 if (n_subtotals > 1)
1935 msg_at (SE, cats_location,
1936 ngettext ("These categories include %zu instance "
1937 "of SUBTOTAL or HSUBTOTAL, so references "
1938 "from computed categories must refer to "
1939 "subtotals by position, "
1940 "e.g. SUBTOTAL[1].",
1941 "These categories include %zu instances "
1942 "of SUBTOTAL or HSUBTOTAL, so references "
1943 "from computed categories must refer to "
1944 "subtotals by position, "
1945 "e.g. SUBTOTAL[1].",
1948 msg_at (SN, e->location,
1949 _("This is the reference that lacks a position."));
1954 msg_at (SE, pc_cat->location,
1955 _("Computed category &%s references a category not included "
1956 "in the category list."),
1958 msg_at (SN, e->location, _("This is the missing category."));
1959 if (e->op == CTPO_CAT_SUBTOTAL)
1960 msg_at (SN, cats_location,
1961 _("To fix the problem, add subtotals to the "
1962 "list of categories here."));
1963 else if (e->op == CTPO_CAT_TOTAL)
1964 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1965 "CATEGORIES specification."));
1967 msg_at (SN, cats_location,
1968 _("To fix the problem, add the missing category to the "
1969 "list of categories here."));
1972 if (pc_cat->pc->hide_source_cats)
1986 for (size_t i = 0; i < 2; i++)
1987 if (e->subs[i] && !ctables_recursive_check_postcompute (
1988 dict, e->subs[i], pc_cat, cats, cats_location))
1997 all_strings (struct variable **vars, size_t n_vars,
1998 const struct ctables_category *cat)
2000 for (size_t j = 0; j < n_vars; j++)
2001 if (var_is_numeric (vars[j]))
2003 msg_at (SE, cat->location,
2004 _("This category specification may be applied only to string "
2005 "variables, but this subcommand tries to apply it to "
2006 "numeric variable %s."),
2007 var_get_name (vars[j]));
2014 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2015 struct ctables *ct, struct ctables_table *t)
2017 if (!lex_match_id (lexer, "VARIABLES"))
2019 lex_match (lexer, T_EQUALS);
2021 struct variable **vars;
2023 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2026 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2027 for (size_t i = 1; i < n_vars; i++)
2029 const struct fmt_spec *f = var_get_print_format (vars[i]);
2030 if (f->type != common_format->type)
2032 common_format = NULL;
2038 && (fmt_get_category (common_format->type)
2039 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2041 struct ctables_categories *c = xmalloc (sizeof *c);
2042 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2043 for (size_t i = 0; i < n_vars; i++)
2045 struct ctables_categories **cp
2046 = &t->categories[var_get_dict_index (vars[i])];
2047 ctables_categories_unref (*cp);
2051 size_t allocated_cats = 0;
2052 int cats_start_ofs = -1;
2053 int cats_end_ofs = -1;
2054 if (lex_match (lexer, T_LBRACK))
2056 cats_start_ofs = lex_ofs (lexer);
2059 if (c->n_cats >= allocated_cats)
2060 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2062 int start_ofs = lex_ofs (lexer);
2063 struct ctables_category *cat = &c->cats[c->n_cats];
2064 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2066 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2069 lex_match (lexer, T_COMMA);
2071 while (!lex_match (lexer, T_RBRACK));
2072 cats_end_ofs = lex_ofs (lexer) - 1;
2075 struct ctables_category cat = {
2077 .include_missing = false,
2078 .sort_ascending = true,
2080 bool show_totals = false;
2081 char *total_label = NULL;
2082 bool totals_before = false;
2083 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2085 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2087 lex_match (lexer, T_EQUALS);
2088 if (lex_match_id (lexer, "A"))
2089 cat.sort_ascending = true;
2090 else if (lex_match_id (lexer, "D"))
2091 cat.sort_ascending = false;
2094 lex_error_expecting (lexer, "A", "D");
2098 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2100 lex_match (lexer, T_EQUALS);
2101 if (lex_match_id (lexer, "VALUE"))
2102 cat.type = CCT_VALUE;
2103 else if (lex_match_id (lexer, "LABEL"))
2104 cat.type = CCT_LABEL;
2107 cat.type = CCT_FUNCTION;
2108 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2109 &cat.weighted, &cat.area))
2112 if (lex_match (lexer, T_LPAREN))
2114 cat.sort_var = parse_variable (lexer, dict);
2118 if (cat.sort_function == CTSF_PTILE)
2120 lex_match (lexer, T_COMMA);
2121 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2123 cat.percentile = lex_number (lexer);
2127 if (!lex_force_match (lexer, T_RPAREN))
2130 else if (ctables_function_availability (cat.sort_function)
2133 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2138 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2140 lex_match (lexer, T_EQUALS);
2141 if (lex_match_id (lexer, "INCLUDE"))
2142 cat.include_missing = true;
2143 else if (lex_match_id (lexer, "EXCLUDE"))
2144 cat.include_missing = false;
2147 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2151 else if (lex_match_id (lexer, "TOTAL"))
2153 lex_match (lexer, T_EQUALS);
2154 if (!parse_bool (lexer, &show_totals))
2157 else if (lex_match_id (lexer, "LABEL"))
2159 lex_match (lexer, T_EQUALS);
2160 if (!lex_force_string (lexer))
2163 total_label = ss_xstrdup (lex_tokss (lexer));
2166 else if (lex_match_id (lexer, "POSITION"))
2168 lex_match (lexer, T_EQUALS);
2169 if (lex_match_id (lexer, "BEFORE"))
2170 totals_before = true;
2171 else if (lex_match_id (lexer, "AFTER"))
2172 totals_before = false;
2175 lex_error_expecting (lexer, "BEFORE", "AFTER");
2179 else if (lex_match_id (lexer, "EMPTY"))
2181 lex_match (lexer, T_EQUALS);
2182 if (lex_match_id (lexer, "INCLUDE"))
2183 c->show_empty = true;
2184 else if (lex_match_id (lexer, "EXCLUDE"))
2185 c->show_empty = false;
2188 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2195 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2196 "TOTAL", "LABEL", "POSITION", "EMPTY");
2198 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2205 if (c->n_cats >= allocated_cats)
2206 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2207 c->cats[c->n_cats++] = cat;
2212 if (c->n_cats >= allocated_cats)
2213 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2215 struct ctables_category *totals;
2218 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2219 totals = &c->cats[0];
2222 totals = &c->cats[c->n_cats];
2225 *totals = (struct ctables_category) {
2227 .total_label = total_label ? total_label : xstrdup (_("Total")),
2231 struct ctables_category *subtotal = NULL;
2232 for (size_t i = totals_before ? 0 : c->n_cats;
2233 totals_before ? i < c->n_cats : i-- > 0;
2234 totals_before ? i++ : 0)
2236 struct ctables_category *cat = &c->cats[i];
2245 cat->subtotal = subtotal;
2248 case CCT_POSTCOMPUTE:
2259 case CCT_EXCLUDED_MISSING:
2264 if (cats_start_ofs != -1)
2266 for (size_t i = 0; i < c->n_cats; i++)
2268 struct ctables_category *cat = &c->cats[i];
2271 case CCT_POSTCOMPUTE:
2272 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2273 struct msg_location *cats_location
2274 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2275 bool ok = ctables_recursive_check_postcompute (
2276 dict, cat->pc->expr, cat, c, cats_location);
2277 msg_location_destroy (cats_location);
2284 for (size_t j = 0; j < n_vars; j++)
2285 if (var_is_alpha (vars[j]))
2287 msg_at (SE, cat->location,
2288 _("This category specification may be applied "
2289 "only to numeric variables, but this "
2290 "subcommand tries to apply it to string "
2292 var_get_name (vars[j]));
2301 if (!parse_category_string (cat->location, cat->string, dict,
2302 common_format->type, &n))
2305 ss_dealloc (&cat->string);
2307 cat->type = CCT_NUMBER;
2310 else if (!all_strings (vars, n_vars, cat))
2319 if (!cat->srange[0].string)
2321 else if (!parse_category_string (cat->location,
2322 cat->srange[0], dict,
2323 common_format->type, &n[0]))
2326 if (!cat->srange[1].string)
2328 else if (!parse_category_string (cat->location,
2329 cat->srange[1], dict,
2330 common_format->type, &n[1]))
2333 ss_dealloc (&cat->srange[0]);
2334 ss_dealloc (&cat->srange[1]);
2336 cat->type = CCT_NRANGE;
2337 cat->nrange[0] = n[0];
2338 cat->nrange[1] = n[1];
2340 else if (!all_strings (vars, n_vars, cat))
2351 case CCT_EXCLUDED_MISSING:
2366 ctables_nest_uninit (struct ctables_nest *nest)
2369 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2370 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2371 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2372 free (nest->areas[at]);
2376 ctables_stack_uninit (struct ctables_stack *stack)
2380 for (size_t i = 0; i < stack->n; i++)
2381 ctables_nest_uninit (&stack->nests[i]);
2382 free (stack->nests);
2386 static struct ctables_stack
2387 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2394 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2395 for (size_t i = 0; i < s0.n; i++)
2396 for (size_t j = 0; j < s1.n; j++)
2398 const struct ctables_nest *a = &s0.nests[i];
2399 const struct ctables_nest *b = &s1.nests[j];
2401 size_t allocate = a->n + b->n;
2402 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2404 for (size_t k = 0; k < a->n; k++)
2405 vars[n++] = a->vars[k];
2406 for (size_t k = 0; k < b->n; k++)
2407 vars[n++] = b->vars[k];
2408 assert (n == allocate);
2410 const struct ctables_nest *summary_src;
2411 if (!a->specs[CSV_CELL].var)
2413 else if (!b->specs[CSV_CELL].var)
2418 struct ctables_nest *new = &stack.nests[stack.n++];
2419 *new = (struct ctables_nest) {
2421 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2422 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2426 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2427 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2429 ctables_stack_uninit (&s0);
2430 ctables_stack_uninit (&s1);
2434 static struct ctables_stack
2435 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2437 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2438 for (size_t i = 0; i < s0.n; i++)
2439 stack.nests[stack.n++] = s0.nests[i];
2440 for (size_t i = 0; i < s1.n; i++)
2442 stack.nests[stack.n] = s1.nests[i];
2443 stack.nests[stack.n].group_head += s0.n;
2446 assert (stack.n == s0.n + s1.n);
2452 static struct ctables_stack
2453 var_fts (const struct ctables_axis *a)
2455 struct variable **vars = xmalloc (sizeof *vars);
2458 struct ctables_nest *nest = xmalloc (sizeof *nest);
2459 *nest = (struct ctables_nest) {
2462 .scale_idx = a->scale ? 0 : SIZE_MAX,
2464 if (a->specs[CSV_CELL].n || a->scale)
2465 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2467 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2468 nest->specs[sv].var = a->var;
2469 nest->specs[sv].is_scale = a->scale;
2471 return (struct ctables_stack) { .nests = nest, .n = 1 };
2474 static struct ctables_stack
2475 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2478 return (struct ctables_stack) { .n = 0 };
2486 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2487 enumerate_fts (axis_type, a->subs[1]));
2490 /* This should consider any of the scale variables found in the result to
2491 be linked to each other listwise for SMISSING=LISTWISE. */
2492 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2493 enumerate_fts (axis_type, a->subs[1]));
2499 union ctables_summary
2501 /* COUNT, VALIDN, TOTALN. */
2504 /* MINIMUM, MAXIMUM, RANGE. */
2511 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2512 struct moments1 *moments;
2514 /* MEDIAN, MODE, PTILE. */
2517 struct casewriter *writer;
2522 /* XXX multiple response */
2526 ctables_summary_init (union ctables_summary *s,
2527 const struct ctables_summary_spec *ss)
2529 switch (ss->function)
2533 case CTSF_areaPCT_COUNT:
2534 case CTSF_areaPCT_VALIDN:
2535 case CTSF_areaPCT_TOTALN:
2550 s->min = s->max = SYSMIS;
2558 case CTSF_areaPCT_SUM:
2559 s->moments = moments1_create (MOMENT_VARIANCE);
2566 struct caseproto *proto = caseproto_create ();
2567 proto = caseproto_add_width (proto, 0);
2568 proto = caseproto_add_width (proto, 0);
2570 struct subcase ordering;
2571 subcase_init (&ordering, 0, 0, SC_ASCEND);
2572 s->writer = sort_create_writer (&ordering, proto);
2573 subcase_uninit (&ordering);
2574 caseproto_unref (proto);
2584 ctables_summary_uninit (union ctables_summary *s,
2585 const struct ctables_summary_spec *ss)
2587 switch (ss->function)
2591 case CTSF_areaPCT_COUNT:
2592 case CTSF_areaPCT_VALIDN:
2593 case CTSF_areaPCT_TOTALN:
2614 case CTSF_areaPCT_SUM:
2615 moments1_destroy (s->moments);
2621 casewriter_destroy (s->writer);
2627 ctables_summary_add (union ctables_summary *s,
2628 const struct ctables_summary_spec *ss,
2629 const struct variable *var, const union value *value,
2630 bool is_scale, bool is_scale_missing,
2631 bool is_missing, bool excluded_missing,
2632 double d_weight, double e_weight)
2634 /* To determine whether a case is included in a given table for a particular
2635 kind of summary, consider the following charts for each variable in the
2636 table. Only if "yes" appears for every variable for the summary is the
2639 Categorical variables: VALIDN COUNT TOTALN
2640 Valid values in included categories yes yes yes
2641 Missing values in included categories --- yes yes
2642 Missing values in excluded categories --- --- yes
2643 Valid values in excluded categories --- --- ---
2645 Scale variables: VALIDN COUNT TOTALN
2646 Valid value yes yes yes
2647 Missing value --- yes yes
2649 Missing values include both user- and system-missing. (The system-missing
2650 value is always in an excluded category.)
2652 switch (ss->function)
2655 case CTSF_areaPCT_TOTALN:
2656 s->count += ss->weighted ? d_weight : 1.0;
2660 case CTSF_areaPCT_COUNT:
2661 if (is_scale || !excluded_missing)
2662 s->count += ss->weighted ? d_weight : 1.0;
2666 case CTSF_areaPCT_VALIDN:
2670 s->count += ss->weighted ? d_weight : 1.0;
2680 s->count += ss->weighted ? d_weight : 1.0;
2684 if (is_scale || !excluded_missing)
2685 s->count += e_weight;
2692 s->count += e_weight;
2696 s->count += e_weight;
2702 if (!is_scale_missing)
2704 assert (!var_is_alpha (var)); /* XXX? */
2705 if (s->min == SYSMIS || value->f < s->min)
2707 if (s->max == SYSMIS || value->f > s->max)
2717 case CTSF_areaPCT_SUM:
2718 if (!is_scale_missing)
2719 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2725 if (!is_scale_missing)
2727 double w = ss->weighted ? e_weight : 1.0;
2730 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2731 *case_num_rw_idx (c, 0) = value->f;
2732 *case_num_rw_idx (c, 1) = w;
2733 casewriter_write (s->writer, c);
2740 ctables_summary_value (const struct ctables_cell *cell,
2741 union ctables_summary *s,
2742 const struct ctables_summary_spec *ss)
2744 switch (ss->function)
2751 return cell->areas[ss->calc_area]->sequence;
2753 case CTSF_areaPCT_COUNT:
2755 const struct ctables_area *a = cell->areas[ss->calc_area];
2756 double a_count = ss->weighted ? a->e_count : a->u_count;
2757 return a_count ? s->count / a_count * 100 : SYSMIS;
2760 case CTSF_areaPCT_VALIDN:
2762 const struct ctables_area *a = cell->areas[ss->calc_area];
2763 double a_valid = ss->weighted ? a->e_valid : a->u_valid;
2764 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2767 case CTSF_areaPCT_TOTALN:
2769 const struct ctables_area *a = cell->areas[ss->calc_area];
2770 double a_total = ss->weighted ? a->e_total : a->u_total;
2771 return a_total ? s->count / a_total * 100 : SYSMIS;
2788 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2793 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2799 double weight, variance;
2800 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2801 return calc_semean (variance, weight);
2807 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2808 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2813 double weight, mean;
2814 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2815 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2821 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2825 case CTSF_areaPCT_SUM:
2827 double weight, mean;
2828 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2829 if (weight == SYSMIS || mean == SYSMIS)
2832 const struct ctables_area *a = cell->areas[ss->calc_area];
2833 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2834 double denom = ss->weighted ? sum->e_sum : sum->u_sum;
2835 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2842 struct casereader *reader = casewriter_make_reader (s->writer);
2845 struct percentile *ptile = percentile_create (
2846 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2847 struct order_stats *os = &ptile->parent;
2848 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2849 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2850 statistic_destroy (&ptile->parent.parent);
2857 struct casereader *reader = casewriter_make_reader (s->writer);
2860 struct mode *mode = mode_create ();
2861 struct order_stats *os = &mode->parent;
2862 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2863 s->ovalue = mode->mode;
2864 statistic_destroy (&mode->parent.parent);
2872 struct ctables_cell_sort_aux
2874 const struct ctables_nest *nest;
2875 enum pivot_axis_type a;
2879 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2881 const struct ctables_cell_sort_aux *aux = aux_;
2882 struct ctables_cell *const *ap = a_;
2883 struct ctables_cell *const *bp = b_;
2884 const struct ctables_cell *a = *ap;
2885 const struct ctables_cell *b = *bp;
2887 const struct ctables_nest *nest = aux->nest;
2888 for (size_t i = 0; i < nest->n; i++)
2889 if (i != nest->scale_idx)
2891 const struct variable *var = nest->vars[i];
2892 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2893 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2894 if (a_cv->category != b_cv->category)
2895 return a_cv->category > b_cv->category ? 1 : -1;
2897 const union value *a_val = &a_cv->value;
2898 const union value *b_val = &b_cv->value;
2899 switch (a_cv->category->type)
2905 case CCT_POSTCOMPUTE:
2906 case CCT_EXCLUDED_MISSING:
2907 /* Must be equal. */
2915 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2923 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2925 return a_cv->category->sort_ascending ? cmp : -cmp;
2931 const char *a_label = var_lookup_value_label (var, a_val);
2932 const char *b_label = var_lookup_value_label (var, b_val);
2938 cmp = strcmp (a_label, b_label);
2944 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2947 return a_cv->category->sort_ascending ? cmp : -cmp;
2959 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2960 const void *aux UNUSED)
2962 struct ctables_cell *const *ap = a_;
2963 struct ctables_cell *const *bp = b_;
2964 const struct ctables_cell *a = *ap;
2965 const struct ctables_cell *b = *bp;
2967 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2969 int al = a->axes[axis].leaf;
2970 int bl = b->axes[axis].leaf;
2972 return al > bl ? 1 : -1;
2980 For each ctables_table:
2981 For each combination of row vars:
2982 For each combination of column vars:
2983 For each combination of layer vars:
2985 Make a table of row values:
2986 Sort entries by row values
2987 Assign a 0-based index to each actual value
2988 Construct a dimension
2989 Make a table of column values
2990 Make a table of layer values
2992 Fill the table entry using the indexes from before.
2995 static struct ctables_area *
2996 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
2997 enum ctables_area_type area)
3000 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3002 const struct ctables_nest *nest = s->nests[a];
3003 for (size_t i = 0; i < nest->n_areas[area]; i++)
3005 size_t v_idx = nest->areas[area][i];
3006 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3007 hash = hash_pointer (cv->category, hash);
3008 if (cv->category->type != CCT_TOTAL
3009 && cv->category->type != CCT_SUBTOTAL
3010 && cv->category->type != CCT_POSTCOMPUTE)
3011 hash = value_hash (&cv->value,
3012 var_get_width (nest->vars[v_idx]), hash);
3016 struct ctables_area *a;
3017 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3019 const struct ctables_cell *df = a->example;
3020 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3022 const struct ctables_nest *nest = s->nests[a];
3023 for (size_t i = 0; i < nest->n_areas[area]; i++)
3025 size_t v_idx = nest->areas[area][i];
3026 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3027 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3028 if (cv1->category != cv2->category
3029 || (cv1->category->type != CCT_TOTAL
3030 && cv1->category->type != CCT_SUBTOTAL
3031 && cv1->category->type != CCT_POSTCOMPUTE
3032 && !value_equal (&cv1->value, &cv2->value,
3033 var_get_width (nest->vars[v_idx]))))
3042 struct ctables_sum *sums = (s->table->n_sum_vars
3043 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3046 a = xmalloc (sizeof *a);
3047 *a = (struct ctables_area) { .example = cell, .sums = sums };
3048 hmap_insert (&s->areas[area], &a->node, hash);
3052 static struct substring
3053 rtrim_value (const union value *v, const struct variable *var)
3055 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3056 var_get_width (var));
3057 ss_rtrim (&s, ss_cstr (" "));
3062 in_string_range (const union value *v, const struct variable *var,
3063 const struct substring *srange)
3065 struct substring s = rtrim_value (v, var);
3066 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3067 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3070 static const struct ctables_category *
3071 ctables_categories_match (const struct ctables_categories *c,
3072 const union value *v, const struct variable *var)
3074 if (var_is_numeric (var) && v->f == SYSMIS)
3077 const struct ctables_category *othernm = NULL;
3078 for (size_t i = c->n_cats; i-- > 0; )
3080 const struct ctables_category *cat = &c->cats[i];
3084 if (cat->number == v->f)
3089 if (ss_equals (cat->string, rtrim_value (v, var)))
3094 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3095 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3100 if (in_string_range (v, var, cat->srange))
3105 if (var_is_value_missing (var, v))
3109 case CCT_POSTCOMPUTE:
3124 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3127 case CCT_EXCLUDED_MISSING:
3132 return var_is_value_missing (var, v) ? NULL : othernm;
3135 static const struct ctables_category *
3136 ctables_categories_total (const struct ctables_categories *c)
3138 const struct ctables_category *first = &c->cats[0];
3139 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3140 return (first->type == CCT_TOTAL ? first
3141 : last->type == CCT_TOTAL ? last
3145 static struct ctables_cell *
3146 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3147 const struct ctables_category *cats[PIVOT_N_AXES][10])
3150 enum ctables_summary_variant sv = CSV_CELL;
3151 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3153 const struct ctables_nest *nest = s->nests[a];
3154 for (size_t i = 0; i < nest->n; i++)
3155 if (i != nest->scale_idx)
3157 hash = hash_pointer (cats[a][i], hash);
3158 if (cats[a][i]->type != CCT_TOTAL
3159 && cats[a][i]->type != CCT_SUBTOTAL
3160 && cats[a][i]->type != CCT_POSTCOMPUTE)
3161 hash = value_hash (case_data (c, nest->vars[i]),
3162 var_get_width (nest->vars[i]), hash);
3168 struct ctables_cell *cell;
3169 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3171 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3173 const struct ctables_nest *nest = s->nests[a];
3174 for (size_t i = 0; i < nest->n; i++)
3175 if (i != nest->scale_idx
3176 && (cats[a][i] != cell->axes[a].cvs[i].category
3177 || (cats[a][i]->type != CCT_TOTAL
3178 && cats[a][i]->type != CCT_SUBTOTAL
3179 && cats[a][i]->type != CCT_POSTCOMPUTE
3180 && !value_equal (case_data (c, nest->vars[i]),
3181 &cell->axes[a].cvs[i].value,
3182 var_get_width (nest->vars[i])))))
3191 cell = xmalloc (sizeof *cell);
3194 cell->omit_areas = 0;
3195 cell->postcompute = false;
3196 //struct string name = DS_EMPTY_INITIALIZER;
3197 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3199 const struct ctables_nest *nest = s->nests[a];
3200 cell->axes[a].cvs = (nest->n
3201 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3203 for (size_t i = 0; i < nest->n; i++)
3205 const struct ctables_category *cat = cats[a][i];
3206 const struct variable *var = nest->vars[i];
3207 const union value *value = case_data (c, var);
3208 if (i != nest->scale_idx)
3210 const struct ctables_category *subtotal = cat->subtotal;
3211 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3214 if (cat->type == CCT_TOTAL
3215 || cat->type == CCT_SUBTOTAL
3216 || cat->type == CCT_POSTCOMPUTE)
3218 /* XXX these should be more encompassing I think.*/
3222 case PIVOT_AXIS_COLUMN:
3223 cell->omit_areas |= ((1u << CTAT_TABLE) |
3224 (1u << CTAT_LAYER) |
3225 (1u << CTAT_LAYERCOL) |
3226 (1u << CTAT_SUBTABLE) |
3229 case PIVOT_AXIS_ROW:
3230 cell->omit_areas |= ((1u << CTAT_TABLE) |
3231 (1u << CTAT_LAYER) |
3232 (1u << CTAT_LAYERROW) |
3233 (1u << CTAT_SUBTABLE) |
3236 case PIVOT_AXIS_LAYER:
3237 cell->omit_areas |= ((1u << CTAT_TABLE) |
3238 (1u << CTAT_LAYER));
3242 if (cat->type == CCT_POSTCOMPUTE)
3243 cell->postcompute = true;
3246 cell->axes[a].cvs[i].category = cat;
3247 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3250 if (i != nest->scale_idx)
3252 if (!ds_is_empty (&name))
3253 ds_put_cstr (&name, ", ");
3254 char *value_s = data_out (value, var_get_encoding (var),
3255 var_get_print_format (var),
3256 settings_get_fmt_settings ());
3257 if (cat->type == CCT_TOTAL
3258 || cat->type == CCT_SUBTOTAL
3259 || cat->type == CCT_POSTCOMPUTE)
3260 ds_put_format (&name, "%s=total", var_get_name (var));
3262 ds_put_format (&name, "%s=%s", var_get_name (var),
3263 value_s + strspn (value_s, " "));
3269 //cell->name = ds_steal_cstr (&name);
3271 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3272 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3273 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3274 for (size_t i = 0; i < specs->n; i++)
3275 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3276 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3277 cell->areas[at] = ctables_area_insert (s, cell, at);
3278 hmap_insert (&s->cells, &cell->node, hash);
3283 is_scale_missing (const struct ctables_summary_spec_set *specs,
3284 const struct ccase *c)
3286 if (!specs->is_scale)
3289 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3292 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3294 const struct variable *var = specs->listwise_vars[i];
3295 if (var_is_num_missing (var, case_num (c, var)))
3303 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3304 const struct ctables_category *cats[PIVOT_N_AXES][10],
3305 bool is_missing, bool excluded_missing,
3306 double d_weight, double e_weight)
3308 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3309 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3311 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3313 bool scale_missing = is_scale_missing (specs, c);
3314 for (size_t i = 0; i < specs->n; i++)
3315 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3316 specs->var, case_data (c, specs->var), specs->is_scale,
3317 scale_missing, is_missing, excluded_missing,
3318 d_weight, e_weight);
3319 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3320 if (!(cell->omit_areas && (1u << at)))
3322 struct ctables_area *a = cell->areas[at];
3323 a->d_total += d_weight;
3324 a->e_total += e_weight;
3326 if (!excluded_missing)
3328 a->d_count += d_weight;
3329 a->e_count += e_weight;
3334 a->d_valid += d_weight;
3335 a->e_valid += e_weight;
3338 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3340 /* XXX listwise_missing??? */
3341 const struct variable *var = s->table->sum_vars[i];
3342 double addend = case_num (c, var);
3343 if (!var_is_num_missing (var, addend))
3345 struct ctables_sum *sum = &a->sums[i];
3346 sum->e_sum += addend * e_weight;
3347 sum->u_sum += addend;
3355 recurse_totals (struct ctables_section *s, const struct ccase *c,
3356 const struct ctables_category *cats[PIVOT_N_AXES][10],
3357 bool is_missing, bool excluded_missing,
3358 double d_weight, double e_weight,
3359 enum pivot_axis_type start_axis, size_t start_nest)
3361 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3363 const struct ctables_nest *nest = s->nests[a];
3364 for (size_t i = start_nest; i < nest->n; i++)
3366 if (i == nest->scale_idx)
3369 const struct variable *var = nest->vars[i];
3371 const struct ctables_category *total = ctables_categories_total (
3372 s->table->categories[var_get_dict_index (var)]);
3375 const struct ctables_category *save = cats[a][i];
3377 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3378 d_weight, e_weight);
3379 recurse_totals (s, c, cats, is_missing, excluded_missing,
3380 d_weight, e_weight, a, i + 1);
3389 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3390 const struct ctables_category *cats[PIVOT_N_AXES][10],
3391 bool is_missing, bool excluded_missing,
3392 double d_weight, double e_weight,
3393 enum pivot_axis_type start_axis, size_t start_nest)
3395 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3397 const struct ctables_nest *nest = s->nests[a];
3398 for (size_t i = start_nest; i < nest->n; i++)
3400 if (i == nest->scale_idx)
3403 const struct ctables_category *save = cats[a][i];
3406 cats[a][i] = save->subtotal;
3407 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3408 d_weight, e_weight);
3409 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3410 d_weight, e_weight, a, i + 1);
3419 ctables_add_occurrence (const struct variable *var,
3420 const union value *value,
3421 struct hmap *occurrences)
3423 int width = var_get_width (var);
3424 unsigned int hash = value_hash (value, width, 0);
3426 struct ctables_occurrence *o;
3427 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3429 if (value_equal (value, &o->value, width))
3432 o = xmalloc (sizeof *o);
3433 value_clone (&o->value, value, width);
3434 hmap_insert (occurrences, &o->node, hash);
3438 ctables_cell_insert (struct ctables_section *s,
3439 const struct ccase *c,
3440 double d_weight, double e_weight)
3442 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3444 /* Does at least one categorical variable have a missing value in an included
3445 or excluded category? */
3446 bool is_missing = false;
3448 /* Does at least one categorical variable have a missing value in an excluded
3450 bool excluded_missing = false;
3452 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3454 const struct ctables_nest *nest = s->nests[a];
3455 for (size_t i = 0; i < nest->n; i++)
3457 if (i == nest->scale_idx)
3460 const struct variable *var = nest->vars[i];
3461 const union value *value = case_data (c, var);
3463 bool var_missing = var_is_value_missing (var, value) != 0;
3467 cats[a][i] = ctables_categories_match (
3468 s->table->categories[var_get_dict_index (var)], value, var);
3474 static const struct ctables_category cct_excluded_missing = {
3475 .type = CCT_EXCLUDED_MISSING,
3478 cats[a][i] = &cct_excluded_missing;
3479 excluded_missing = true;
3484 if (!excluded_missing)
3485 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3487 const struct ctables_nest *nest = s->nests[a];
3488 for (size_t i = 0; i < nest->n; i++)
3489 if (i != nest->scale_idx)
3491 const struct variable *var = nest->vars[i];
3492 const union value *value = case_data (c, var);
3493 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3497 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3498 d_weight, e_weight);
3500 //if (!excluded_missing)
3502 recurse_totals (s, c, cats, is_missing, excluded_missing,
3503 d_weight, e_weight, 0, 0);
3504 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3505 d_weight, e_weight, 0, 0);
3511 const struct ctables_summary_spec_set *set;
3516 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3518 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3519 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3520 if (as->function != bs->function)
3521 return as->function > bs->function ? 1 : -1;
3522 else if (as->weighted != bs->weighted)
3523 return as->weighted > bs->weighted ? 1 : -1;
3524 else if (as->calc_area != bs->calc_area)
3525 return as->calc_area > bs->calc_area ? 1 : -1;
3526 else if (as->percentile != bs->percentile)
3527 return as->percentile < bs->percentile ? 1 : -1;
3529 const char *as_label = as->label ? as->label : "";
3530 const char *bs_label = bs->label ? bs->label : "";
3531 return strcmp (as_label, bs_label);
3535 ctables_category_format_number (double number, const struct variable *var,
3538 struct pivot_value *pv = pivot_value_new_var_value (
3539 var, &(union value) { .f = number });
3540 pivot_value_format (pv, NULL, s);
3541 pivot_value_destroy (pv);
3545 ctables_category_format_string (struct substring string,
3546 const struct variable *var, struct string *out)
3548 int width = var_get_width (var);
3549 char *s = xmalloc (width);
3550 buf_copy_rpad (s, width, string.string, string.length, ' ');
3551 struct pivot_value *pv = pivot_value_new_var_value (
3552 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3553 pivot_value_format (pv, NULL, out);
3554 pivot_value_destroy (pv);
3559 ctables_category_format_label (const struct ctables_category *cat,
3560 const struct variable *var,
3566 ctables_category_format_number (cat->number, var, s);
3570 ctables_category_format_string (cat->string, var, s);
3574 ctables_category_format_number (cat->nrange[0], var, s);
3575 ds_put_format (s, " THRU ");
3576 ctables_category_format_number (cat->nrange[1], var, s);
3580 ctables_category_format_string (cat->srange[0], var, s);
3581 ds_put_format (s, " THRU ");
3582 ctables_category_format_string (cat->srange[1], var, s);
3586 ds_put_cstr (s, "MISSING");
3590 ds_put_cstr (s, "OTHERNM");
3593 case CCT_POSTCOMPUTE:
3594 ds_put_format (s, "&%s", cat->pc->name);
3599 ds_put_cstr (s, cat->total_label);
3605 case CCT_EXCLUDED_MISSING:
3612 static struct pivot_value *
3613 ctables_postcompute_label (const struct ctables_categories *cats,
3614 const struct ctables_category *cat,
3615 const struct variable *var)
3617 struct substring in = ss_cstr (cat->pc->label);
3618 struct substring target = ss_cstr (")LABEL[");
3620 struct string out = DS_EMPTY_INITIALIZER;
3623 size_t chunk = ss_find_substring (in, target);
3624 if (chunk == SIZE_MAX)
3626 if (ds_is_empty (&out))
3627 return pivot_value_new_user_text (in.string, in.length);
3630 ds_put_substring (&out, in);
3631 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3635 ds_put_substring (&out, ss_head (in, chunk));
3636 ss_advance (&in, chunk + target.length);
3638 struct substring idx_s;
3639 if (!ss_get_until (&in, ']', &idx_s))
3642 long int idx = strtol (idx_s.string, &tail, 10);
3643 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3646 struct ctables_category *cat2 = &cats->cats[idx - 1];
3647 if (!ctables_category_format_label (cat2, var, &out))
3653 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3656 static struct pivot_value *
3657 ctables_category_create_value_label (const struct ctables_categories *cats,
3658 const struct ctables_category *cat,
3659 const struct variable *var,
3660 const union value *value)
3662 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3663 ? ctables_postcompute_label (cats, cat, var)
3664 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3665 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3666 : pivot_value_new_var_value (var, value));
3669 static struct ctables_value *
3670 ctables_value_find__ (struct ctables_table *t, const union value *value,
3671 int width, unsigned int hash)
3673 struct ctables_value *clv;
3674 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3675 hash, &t->clabels_values_map)
3676 if (value_equal (value, &clv->value, width))
3682 ctables_value_insert (struct ctables_table *t, const union value *value,
3685 unsigned int hash = value_hash (value, width, 0);
3686 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3689 clv = xmalloc (sizeof *clv);
3690 value_clone (&clv->value, value, width);
3691 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3695 static struct ctables_value *
3696 ctables_value_find (struct ctables_table *t,
3697 const union value *value, int width)
3699 return ctables_value_find__ (t, value, width,
3700 value_hash (value, width, 0));
3704 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3705 size_t ix[PIVOT_N_AXES])
3707 if (a < PIVOT_N_AXES)
3709 size_t limit = MAX (t->stacks[a].n, 1);
3710 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3711 ctables_table_add_section (t, a + 1, ix);
3715 struct ctables_section *s = &t->sections[t->n_sections++];
3716 *s = (struct ctables_section) {
3718 .cells = HMAP_INITIALIZER (s->cells),
3720 for (a = 0; a < PIVOT_N_AXES; a++)
3723 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3725 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3726 for (size_t i = 0; i < nest->n; i++)
3727 hmap_init (&s->occurrences[a][i]);
3729 for (size_t i = 0; i < N_CTATS; i++)
3730 hmap_init (&s->areas[i]);
3735 ctpo_add (double a, double b)
3741 ctpo_sub (double a, double b)
3747 ctpo_mul (double a, double b)
3753 ctpo_div (double a, double b)
3755 return b ? a / b : SYSMIS;
3759 ctpo_pow (double a, double b)
3761 int save_errno = errno;
3763 double result = pow (a, b);
3771 ctpo_neg (double a, double b UNUSED)
3776 struct ctables_pcexpr_evaluate_ctx
3778 const struct ctables_cell *cell;
3779 const struct ctables_section *section;
3780 const struct ctables_categories *cats;
3781 enum pivot_axis_type pc_a;
3784 enum fmt_type parse_format;
3787 static double ctables_pcexpr_evaluate (
3788 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3791 ctables_pcexpr_evaluate_nonterminal (
3792 const struct ctables_pcexpr_evaluate_ctx *ctx,
3793 const struct ctables_pcexpr *e, size_t n_args,
3794 double evaluate (double, double))
3796 double args[2] = { 0, 0 };
3797 for (size_t i = 0; i < n_args; i++)
3799 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3800 if (!isfinite (args[i]) || args[i] == SYSMIS)
3803 return evaluate (args[0], args[1]);
3807 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3808 const struct ctables_cell_value *pc_cv)
3810 const struct ctables_section *s = ctx->section;
3813 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3815 const struct ctables_nest *nest = s->nests[a];
3816 for (size_t i = 0; i < nest->n; i++)
3817 if (i != nest->scale_idx)
3819 const struct ctables_cell_value *cv
3820 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3821 : &ctx->cell->axes[a].cvs[i]);
3822 hash = hash_pointer (cv->category, hash);
3823 if (cv->category->type != CCT_TOTAL
3824 && cv->category->type != CCT_SUBTOTAL
3825 && cv->category->type != CCT_POSTCOMPUTE)
3826 hash = value_hash (&cv->value,
3827 var_get_width (nest->vars[i]), hash);
3831 struct ctables_cell *tc;
3832 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3834 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3836 const struct ctables_nest *nest = s->nests[a];
3837 for (size_t i = 0; i < nest->n; i++)
3838 if (i != nest->scale_idx)
3840 const struct ctables_cell_value *p_cv
3841 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3842 : &ctx->cell->axes[a].cvs[i]);
3843 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3844 if (p_cv->category != t_cv->category
3845 || (p_cv->category->type != CCT_TOTAL
3846 && p_cv->category->type != CCT_SUBTOTAL
3847 && p_cv->category->type != CCT_POSTCOMPUTE
3848 && !value_equal (&p_cv->value,
3850 var_get_width (nest->vars[i]))))
3862 const struct ctables_table *t = s->table;
3863 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3864 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3865 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3866 &specs->specs[ctx->summary_idx]);
3870 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3871 const struct ctables_pcexpr *e)
3878 case CTPO_CAT_NRANGE:
3879 case CTPO_CAT_SRANGE:
3880 case CTPO_CAT_MISSING:
3881 case CTPO_CAT_OTHERNM:
3883 struct ctables_cell_value cv = {
3884 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3886 assert (cv.category != NULL);
3888 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3889 const struct ctables_occurrence *o;
3892 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3893 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3894 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3896 cv.value = o->value;
3897 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3902 case CTPO_CAT_NUMBER:
3903 case CTPO_CAT_SUBTOTAL:
3904 case CTPO_CAT_TOTAL:
3906 struct ctables_cell_value cv = {
3907 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3908 .value = { .f = e->number },
3910 assert (cv.category != NULL);
3911 return ctables_pcexpr_evaluate_category (ctx, &cv);
3914 case CTPO_CAT_STRING:
3916 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3918 if (width > e->string.length)
3920 s = xmalloc (width);
3921 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3924 const struct ctables_category *category
3925 = ctables_find_category_for_postcompute (
3926 ctx->section->table->ctables->dict,
3927 ctx->cats, ctx->parse_format, e);
3928 assert (category != NULL);
3930 struct ctables_cell_value cv = { .category = category };
3931 if (category->type == CCT_NUMBER)
3932 cv.value.f = category->number;
3933 else if (category->type == CCT_STRING)
3934 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3938 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3944 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3947 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3950 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3953 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3956 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3959 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3965 static const struct ctables_category *
3966 ctables_cell_postcompute (const struct ctables_section *s,
3967 const struct ctables_cell *cell,
3968 enum pivot_axis_type *pc_a_p,
3971 assert (cell->postcompute);
3972 const struct ctables_category *pc_cat = NULL;
3973 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3974 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3976 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3977 if (cv->category->type == CCT_POSTCOMPUTE)
3981 /* Multiple postcomputes cross each other. The value is
3986 pc_cat = cv->category;
3990 *pc_a_idx_p = pc_a_idx;
3994 assert (pc_cat != NULL);
3999 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4000 const struct ctables_cell *cell,
4001 const struct ctables_summary_spec *ss,
4002 struct fmt_spec *format,
4003 bool *is_ctables_format,
4006 enum pivot_axis_type pc_a = 0;
4007 size_t pc_a_idx = 0;
4008 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4009 s, cell, &pc_a, &pc_a_idx);
4013 const struct ctables_postcompute *pc = pc_cat->pc;
4016 for (size_t i = 0; i < pc->specs->n; i++)
4018 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4019 if (ss->function == ss2->function
4020 && ss->weighted == ss2->weighted
4021 && ss->calc_area == ss2->calc_area
4022 && ss->percentile == ss2->percentile)
4024 *format = ss2->format;
4025 *is_ctables_format = ss2->is_ctables_format;
4031 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4032 const struct ctables_categories *cats = s->table->categories[
4033 var_get_dict_index (var)];
4034 struct ctables_pcexpr_evaluate_ctx ctx = {
4039 .pc_a_idx = pc_a_idx,
4040 .summary_idx = summary_idx,
4041 .parse_format = pc_cat->parse_format,
4043 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4047 ctables_format (double d, const struct fmt_spec *format,
4048 const struct fmt_settings *settings)
4050 const union value v = { .f = d };
4051 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4053 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4054 produce the results we want for negative numbers, putting the negative
4055 sign in the wrong spot, before the prefix instead of after it. We can't,
4056 in fact, produce the desired results using a custom-currency
4057 specification. Instead, we postprocess the output, moving the negative
4060 NEQUAL: "-N=3" => "N=-3"
4061 PAREN: "-(3)" => "(-3)"
4062 PCTPAREN: "-(3%)" => "(-3%)"
4064 This transformation doesn't affect NEGPAREN. */
4065 char *minus_src = strchr (s, '-');
4066 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4068 char *n_equals = strstr (s, "N=");
4069 char *lparen = strchr (s, '(');
4070 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4072 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4078 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4080 for (size_t i = 0; i < t->stacks[a].n; i++)
4082 struct ctables_nest *nest = &t->stacks[a].nests[i];
4083 if (nest->n != 1 || nest->scale_idx != 0)
4086 enum ctables_vlabel vlabel
4087 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4088 if (vlabel != CTVL_NONE)
4095 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4097 struct pivot_table *pt = pivot_table_create__ (
4099 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4100 : pivot_value_new_text (N_("Custom Tables"))),
4103 pivot_table_set_caption (
4104 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4106 pivot_table_set_corner_text (
4107 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4109 bool summary_dimension = (t->summary_axis != t->slabels_axis
4110 || (!t->slabels_visible
4111 && t->summary_specs.n > 1));
4112 if (summary_dimension)
4114 struct pivot_dimension *d = pivot_dimension_create (
4115 pt, t->slabels_axis, N_("Statistics"));
4116 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4117 if (!t->slabels_visible)
4118 d->hide_all_labels = true;
4119 for (size_t i = 0; i < specs->n; i++)
4120 pivot_category_create_leaf (
4121 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4124 bool categories_dimension = t->clabels_example != NULL;
4125 if (categories_dimension)
4127 struct pivot_dimension *d = pivot_dimension_create (
4128 pt, t->label_axis[t->clabels_from_axis],
4129 t->clabels_from_axis == PIVOT_AXIS_ROW
4130 ? N_("Row Categories")
4131 : N_("Column Categories"));
4132 const struct variable *var = t->clabels_example;
4133 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4134 for (size_t i = 0; i < t->n_clabels_values; i++)
4136 const struct ctables_value *value = t->clabels_values[i];
4137 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4138 assert (cat != NULL);
4139 pivot_category_create_leaf (
4140 d->root, ctables_category_create_value_label (c, cat,
4146 pivot_table_set_look (pt, ct->look);
4147 struct pivot_dimension *d[PIVOT_N_AXES];
4148 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4150 static const char *names[] = {
4151 [PIVOT_AXIS_ROW] = N_("Rows"),
4152 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4153 [PIVOT_AXIS_LAYER] = N_("Layers"),
4155 d[a] = (t->axes[a] || a == t->summary_axis
4156 ? pivot_dimension_create (pt, a, names[a])
4161 assert (t->axes[a]);
4163 for (size_t i = 0; i < t->stacks[a].n; i++)
4165 struct ctables_nest *nest = &t->stacks[a].nests[i];
4166 struct ctables_section **sections = xnmalloc (t->n_sections,
4168 size_t n_sections = 0;
4170 size_t n_total_cells = 0;
4171 size_t max_depth = 0;
4172 for (size_t j = 0; j < t->n_sections; j++)
4173 if (t->sections[j].nests[a] == nest)
4175 struct ctables_section *s = &t->sections[j];
4176 sections[n_sections++] = s;
4177 n_total_cells += hmap_count (&s->cells);
4179 size_t depth = s->nests[a]->n;
4180 max_depth = MAX (depth, max_depth);
4183 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4185 size_t n_sorted = 0;
4187 for (size_t j = 0; j < n_sections; j++)
4189 struct ctables_section *s = sections[j];
4191 struct ctables_cell *cell;
4192 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4194 sorted[n_sorted++] = cell;
4195 assert (n_sorted <= n_total_cells);
4198 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4199 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4202 if (a == PIVOT_AXIS_ROW)
4204 size_t ids[N_CTATS];
4205 memset (ids, 0, sizeof ids);
4206 for (size_t j = 0; j < n_sorted; j++)
4208 struct ctables_cell *cell = sorted[j];
4209 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4211 struct ctables_area *area = cell->areas[at];
4212 if (!area->sequence)
4213 area->sequence = ++ids[at];
4220 for (size_t j = 0; j < n_sorted; j++)
4222 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0);
4227 struct ctables_level
4229 enum ctables_level_type
4231 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4232 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4233 CTL_SUMMARY, /* Summary functions. */
4237 enum settings_value_show vlabel; /* CTL_VAR only. */
4240 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4241 size_t n_levels = 0;
4242 for (size_t k = 0; k < nest->n; k++)
4244 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4245 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4247 if (vlabel != CTVL_NONE)
4249 levels[n_levels++] = (struct ctables_level) {
4251 .vlabel = (enum settings_value_show) vlabel,
4256 if (nest->scale_idx != k
4257 && (k != nest->n - 1 || t->label_axis[a] == a))
4259 levels[n_levels++] = (struct ctables_level) {
4260 .type = CTL_CATEGORY,
4266 if (!summary_dimension && a == t->slabels_axis)
4268 levels[n_levels++] = (struct ctables_level) {
4269 .type = CTL_SUMMARY,
4270 .var_idx = SIZE_MAX,
4274 /* Pivot categories:
4276 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4277 - category for nest->vars[0], if nest->scale_idx != 0
4278 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4279 - category for nest->vars[1], if nest->scale_idx != 1
4281 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4282 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4283 - summary function, if 'a == t->slabels_axis && a ==
4286 Additional dimensions:
4288 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4290 - If 't->label_axis[b] == a' for some 'b != a', add a category
4295 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4297 for (size_t j = 0; j < n_sorted; j++)
4299 struct ctables_cell *cell = sorted[j];
4300 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4302 size_t n_common = 0;
4305 for (; n_common < n_levels; n_common++)
4307 const struct ctables_level *level = &levels[n_common];
4308 if (level->type == CTL_CATEGORY)
4310 size_t var_idx = level->var_idx;
4311 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4312 if (prev->axes[a].cvs[var_idx].category != c)
4314 else if (c->type != CCT_SUBTOTAL
4315 && c->type != CCT_TOTAL
4316 && c->type != CCT_POSTCOMPUTE
4317 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4318 &cell->axes[a].cvs[var_idx].value,
4319 var_get_type (nest->vars[var_idx])))
4325 for (size_t k = n_common; k < n_levels; k++)
4327 const struct ctables_level *level = &levels[k];
4328 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4329 if (level->type == CTL_SUMMARY)
4331 assert (k == n_levels - 1);
4333 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4334 for (size_t m = 0; m < specs->n; m++)
4336 int leaf = pivot_category_create_leaf (
4337 parent, ctables_summary_label (&specs->specs[m],
4345 const struct variable *var = nest->vars[level->var_idx];
4346 struct pivot_value *label;
4347 if (level->type == CTL_VAR)
4349 label = pivot_value_new_variable (var);
4350 label->variable.show = level->vlabel;
4352 else if (level->type == CTL_CATEGORY)
4354 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4355 label = ctables_category_create_value_label (
4356 t->categories[var_get_dict_index (var)],
4357 cv->category, var, &cv->value);
4362 if (k == n_levels - 1)
4363 prev_leaf = pivot_category_create_leaf (parent, label);
4365 groups[k] = pivot_category_create_group__ (parent, label);
4369 cell->axes[a].leaf = prev_leaf;
4378 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4382 size_t n_total_cells = 0;
4383 for (size_t j = 0; j < t->n_sections; j++)
4384 n_total_cells += hmap_count (&t->sections[j].cells);
4386 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4387 size_t n_sorted = 0;
4388 for (size_t j = 0; j < t->n_sections; j++)
4390 const struct ctables_section *s = &t->sections[j];
4391 struct ctables_cell *cell;
4392 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4394 sorted[n_sorted++] = cell;
4396 assert (n_sorted <= n_total_cells);
4397 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4399 size_t ids[N_CTATS];
4400 memset (ids, 0, sizeof ids);
4401 for (size_t j = 0; j < n_sorted; j++)
4403 struct ctables_cell *cell = sorted[j];
4404 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4406 struct ctables_area *area = cell->areas[at];
4407 if (!area->sequence)
4408 area->sequence = ++ids[at];
4415 for (size_t i = 0; i < t->n_sections; i++)
4417 struct ctables_section *s = &t->sections[i];
4419 struct ctables_cell *cell;
4420 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4425 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4426 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4427 for (size_t j = 0; j < specs->n; j++)
4430 size_t n_dindexes = 0;
4432 if (summary_dimension)
4433 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4435 if (categories_dimension)
4437 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4438 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4439 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4440 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4443 dindexes[n_dindexes++] = ctv->leaf;
4446 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4449 int leaf = cell->axes[a].leaf;
4450 if (a == t->summary_axis && !summary_dimension)
4452 dindexes[n_dindexes++] = leaf;
4455 const struct ctables_summary_spec *ss = &specs->specs[j];
4457 struct fmt_spec format = specs->specs[j].format;
4458 bool is_ctables_format = ss->is_ctables_format;
4459 double d = (cell->postcompute
4460 ? ctables_cell_calculate_postcompute (
4461 s, cell, ss, &format, &is_ctables_format, j)
4462 : ctables_summary_value (cell, &cell->summaries[j],
4465 struct pivot_value *value;
4466 if (ct->hide_threshold != 0
4467 && d < ct->hide_threshold
4468 && ctables_summary_function_is_count (ss->function))
4470 value = pivot_value_new_user_text_nocopy (
4471 xasprintf ("<%d", ct->hide_threshold));
4473 else if (d == 0 && ct->zero)
4474 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4475 else if (d == SYSMIS && ct->missing)
4476 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4477 else if (is_ctables_format)
4478 value = pivot_value_new_user_text_nocopy (
4479 ctables_format (d, &format, &ct->ctables_formats));
4482 value = pivot_value_new_number (d);
4483 value->numeric.format = format;
4485 /* XXX should text values be right-justified? */
4486 pivot_table_put (pt, dindexes, n_dindexes, value);
4491 pivot_table_submit (pt);
4495 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4497 enum pivot_axis_type label_pos = t->label_axis[a];
4501 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4502 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4504 const struct ctables_stack *stack = &t->stacks[a];
4508 const struct ctables_nest *n0 = &stack->nests[0];
4511 assert (stack->n == 1);
4515 const struct variable *v0 = n0->vars[n0->n - 1];
4516 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4517 t->clabels_example = v0;
4519 for (size_t i = 0; i < c0->n_cats; i++)
4520 if (c0->cats[i].type == CCT_FUNCTION)
4522 msg (SE, _("%s=%s is not allowed with sorting based "
4523 "on a summary function."),
4524 subcommand_name, pos_name);
4527 if (n0->n - 1 == n0->scale_idx)
4529 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4530 "but %s is a scale variable."),
4531 subcommand_name, pos_name, var_get_name (v0));
4535 for (size_t i = 1; i < stack->n; i++)
4537 const struct ctables_nest *ni = &stack->nests[i];
4539 const struct variable *vi = ni->vars[ni->n - 1];
4540 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4542 if (ni->n - 1 == ni->scale_idx)
4544 msg (SE, _("%s=%s requires the variables to be moved to be "
4545 "categorical, but %s is a scale variable."),
4546 subcommand_name, pos_name, var_get_name (vi));
4549 if (var_get_width (v0) != var_get_width (vi))
4551 msg (SE, _("%s=%s requires the variables to be "
4552 "moved to have the same width, but %s has "
4553 "width %d and %s has width %d."),
4554 subcommand_name, pos_name,
4555 var_get_name (v0), var_get_width (v0),
4556 var_get_name (vi), var_get_width (vi));
4559 if (!val_labs_equal (var_get_value_labels (v0),
4560 var_get_value_labels (vi)))
4562 msg (SE, _("%s=%s requires the variables to be "
4563 "moved to have the same value labels, but %s "
4564 "and %s have different value labels."),
4565 subcommand_name, pos_name,
4566 var_get_name (v0), var_get_name (vi));
4569 if (!ctables_categories_equal (c0, ci))
4571 msg (SE, _("%s=%s requires the variables to be "
4572 "moved to have the same category "
4573 "specifications, but %s and %s have different "
4574 "category specifications."),
4575 subcommand_name, pos_name,
4576 var_get_name (v0), var_get_name (vi));
4585 add_sum_var (struct variable *var,
4586 struct variable ***sum_vars, size_t *n, size_t *allocated)
4588 for (size_t i = 0; i < *n; i++)
4589 if (var == (*sum_vars)[i])
4592 if (*n >= *allocated)
4593 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4594 (*sum_vars)[*n] = var;
4598 static enum ctables_area_type
4599 rotate_area (enum ctables_area_type area)
4610 return CTAT_LAYERCOL;
4613 return CTAT_LAYERROW;
4626 enumerate_sum_vars (const struct ctables_axis *a,
4627 struct variable ***sum_vars, size_t *n, size_t *allocated)
4635 for (size_t i = 0; i < N_CSVS; i++)
4636 for (size_t j = 0; j < a->specs[i].n; j++)
4638 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4639 if (spec->function == CTSF_areaPCT_SUM)
4640 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4646 for (size_t i = 0; i < 2; i++)
4647 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4653 ctables_prepare_table (struct ctables_table *t)
4655 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4658 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4660 for (size_t j = 0; j < t->stacks[a].n; j++)
4662 struct ctables_nest *nest = &t->stacks[a].nests[j];
4663 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4665 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4666 nest->n_areas[at] = 0;
4668 bool add_vars = (at == CTAT_LAYER ? a == PIVOT_AXIS_LAYER
4669 : at == CTAT_LAYERROW ? a != PIVOT_AXIS_COLUMN
4670 : at == CTAT_LAYERCOL ? a != PIVOT_AXIS_ROW
4671 : at == CTAT_TABLE ? false
4674 for (size_t k = 0; k < nest->n; k++)
4676 if (k == nest->scale_idx)
4678 nest->areas[at][nest->n_areas[at]++] = k;
4680 else if ((at == CTAT_LAYERCOL && a == PIVOT_AXIS_ROW && t->label_axis[a] != a)
4681 || (at == CTAT_LAYERROW && a == PIVOT_AXIS_COLUMN && t->label_axis[a] != a)
4682 || (at == CTAT_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER))
4684 for (size_t k = nest->n - 1; k < nest->n; k--)
4686 if (k == nest->scale_idx)
4688 nest->areas[at][nest->n_areas[at]++] = k;
4695 if (at == CTAT_SUBTABLE
4696 && t->clabels_from_axis != PIVOT_AXIS_LAYER
4697 && t->clabels_to_axis != PIVOT_AXIS_LAYER)
4698 n_drop = a == t->clabels_from_axis ? 2 : 0;
4699 else if (at == CTAT_SUBTABLE && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_LAYER)
4701 if (a == PIVOT_AXIS_COLUMN)
4705 if (a == PIVOT_AXIS_ROW)
4707 size_t n = nest->n_areas[at];
4710 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4711 nest->n_areas[at]--;
4717 else if (at == CTAT_SUBTABLE && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_LAYER)
4719 if (a == PIVOT_AXIS_ROW)
4723 if (a == PIVOT_AXIS_COLUMN)
4725 size_t n = nest->n_areas[at];
4728 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4729 nest->n_areas[at]--;
4735 else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN)
4737 else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN)
4739 size_t n = nest->n_areas[at];
4742 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4743 nest->n_areas[at]--;
4747 else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4749 else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4751 size_t n = nest->n_areas[at];
4754 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4755 nest->n_areas[at]--;
4759 else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_LAYER)
4761 size_t n = nest->n_areas[at];
4764 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4765 nest->n_areas[at]--;
4769 else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_LAYER)
4771 size_t n = nest->n_areas[at];
4774 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4775 nest->n_areas[at]--;
4781 bool drop_last = (at == CTAT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4782 : at == CTAT_ROW ? a == PIVOT_AXIS_COLUMN
4783 : at == CTAT_COL ? a == PIVOT_AXIS_ROW
4785 bool drop_additional
4786 = ((t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN && (at == CTAT_ROW || at == CTAT_LAYERROW) && a == PIVOT_AXIS_ROW)
4787 || (t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW && (at == CTAT_COL || at == CTAT_LAYERCOL) && a == PIVOT_AXIS_COLUMN));
4792 if (drop_additional)
4796 for (size_t i = 0; i < n_drop; i++)
4797 if (nest->n_areas[at] > 0)
4798 nest->n_areas[at]--;
4804 struct ctables_nest *nest = xmalloc (sizeof *nest);
4805 *nest = (struct ctables_nest) { .n = 0 };
4806 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4808 /* There's no point in moving labels away from an axis that has no
4809 labels, so avoid dealing with the special cases around that. */
4810 t->label_axis[a] = a;
4813 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4814 for (size_t i = 0; i < stack->n; i++)
4816 struct ctables_nest *nest = &stack->nests[i];
4817 if (!nest->specs[CSV_CELL].n)
4819 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4820 specs->specs = xmalloc (sizeof *specs->specs);
4823 enum ctables_summary_function function
4824 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4826 *specs->specs = (struct ctables_summary_spec) {
4827 .function = function,
4829 .format = ctables_summary_default_format (function, specs->var),
4832 specs->var = nest->vars[0];
4834 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4835 &nest->specs[CSV_CELL]);
4837 else if (!nest->specs[CSV_TOTAL].n)
4838 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4839 &nest->specs[CSV_CELL]);
4841 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4842 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4844 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4845 for (size_t i = 0; i < nest->specs[sv].n; i++)
4847 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4848 const struct ctables_function_info *cfi =
4849 &ctables_function_info[ss->function];
4851 ss->calc_area = rotate_area (ss->calc_area);
4855 if (t->ctables->smissing_listwise)
4857 struct variable **listwise_vars = NULL;
4859 size_t allocated = 0;
4861 for (size_t j = nest->group_head; j < stack->n; j++)
4863 const struct ctables_nest *other_nest = &stack->nests[j];
4864 if (other_nest->group_head != nest->group_head)
4867 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4870 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4871 sizeof *listwise_vars);
4872 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4875 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4878 listwise_vars = xmemdup (listwise_vars,
4879 n * sizeof *listwise_vars);
4880 nest->specs[sv].listwise_vars = listwise_vars;
4881 nest->specs[sv].n_listwise_vars = n;
4886 struct ctables_summary_spec_set *merged = &t->summary_specs;
4887 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4889 for (size_t j = 0; j < stack->n; j++)
4891 const struct ctables_nest *nest = &stack->nests[j];
4893 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4894 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4899 struct merge_item min = items[0];
4900 for (size_t j = 1; j < n_left; j++)
4901 if (merge_item_compare_3way (&items[j], &min) < 0)
4904 if (merged->n >= merged->allocated)
4905 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4906 sizeof *merged->specs);
4907 merged->specs[merged->n++] = min.set->specs[min.ofs];
4909 for (size_t j = 0; j < n_left; )
4911 if (merge_item_compare_3way (&items[j], &min) == 0)
4913 struct merge_item *item = &items[j];
4914 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4915 if (++item->ofs >= item->set->n)
4917 items[j] = items[--n_left];
4927 for (size_t j = 0; j < merged->n; j++)
4928 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4930 for (size_t j = 0; j < stack->n; j++)
4932 const struct ctables_nest *nest = &stack->nests[j];
4933 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4935 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4936 for (size_t k = 0; k < specs->n; k++)
4937 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4938 specs->specs[k].axis_idx);
4944 size_t allocated_sum_vars = 0;
4945 enumerate_sum_vars (t->axes[t->summary_axis],
4946 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4948 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4949 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4953 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4954 enum pivot_axis_type a)
4956 struct ctables_stack *stack = &t->stacks[a];
4957 for (size_t i = 0; i < stack->n; i++)
4959 const struct ctables_nest *nest = &stack->nests[i];
4960 const struct variable *var = nest->vars[nest->n - 1];
4961 const union value *value = case_data (c, var);
4963 if (var_is_numeric (var) && value->f == SYSMIS)
4966 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4968 ctables_value_insert (t, value, var_get_width (var));
4973 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4975 const struct ctables_value *const *ap = a_;
4976 const struct ctables_value *const *bp = b_;
4977 const struct ctables_value *a = *ap;
4978 const struct ctables_value *b = *bp;
4979 const int *width = width_;
4980 return value_compare_3way (&a->value, &b->value, *width);
4984 ctables_sort_clabels_values (struct ctables_table *t)
4986 const struct variable *v0 = t->clabels_example;
4987 int width = var_get_width (v0);
4989 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4992 const struct val_labs *val_labs = var_get_value_labels (v0);
4993 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4994 vl = val_labs_next (val_labs, vl))
4995 if (ctables_categories_match (c0, &vl->value, v0))
4996 ctables_value_insert (t, &vl->value, width);
4999 size_t n = hmap_count (&t->clabels_values_map);
5000 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5002 struct ctables_value *clv;
5004 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5005 t->clabels_values[i++] = clv;
5006 t->n_clabels_values = n;
5009 sort (t->clabels_values, n, sizeof *t->clabels_values,
5010 compare_clabels_values_3way, &width);
5012 for (size_t i = 0; i < n; i++)
5013 t->clabels_values[i]->leaf = i;
5017 ctables_add_category_occurrences (const struct variable *var,
5018 struct hmap *occurrences,
5019 const struct ctables_categories *cats)
5021 const struct val_labs *val_labs = var_get_value_labels (var);
5023 for (size_t i = 0; i < cats->n_cats; i++)
5025 const struct ctables_category *c = &cats->cats[i];
5029 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5035 int width = var_get_width (var);
5037 value_init (&value, width);
5038 value_copy_buf_rpad (&value, width,
5039 CHAR_CAST (uint8_t *, c->string.string),
5040 c->string.length, ' ');
5041 ctables_add_occurrence (var, &value, occurrences);
5042 value_destroy (&value, width);
5047 assert (var_is_numeric (var));
5048 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5049 vl = val_labs_next (val_labs, vl))
5050 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5051 ctables_add_occurrence (var, &vl->value, occurrences);
5055 assert (var_is_alpha (var));
5056 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5057 vl = val_labs_next (val_labs, vl))
5058 if (in_string_range (&vl->value, var, c->srange))
5059 ctables_add_occurrence (var, &vl->value, occurrences);
5063 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5064 vl = val_labs_next (val_labs, vl))
5065 if (var_is_value_missing (var, &vl->value))
5066 ctables_add_occurrence (var, &vl->value, occurrences);
5070 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5071 vl = val_labs_next (val_labs, vl))
5072 ctables_add_occurrence (var, &vl->value, occurrences);
5075 case CCT_POSTCOMPUTE:
5085 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5086 vl = val_labs_next (val_labs, vl))
5087 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5088 ctables_add_occurrence (var, &vl->value, occurrences);
5091 case CCT_EXCLUDED_MISSING:
5098 ctables_section_recurse_add_empty_categories (
5099 struct ctables_section *s,
5100 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5101 enum pivot_axis_type a, size_t a_idx)
5103 if (a >= PIVOT_N_AXES)
5104 ctables_cell_insert__ (s, c, cats);
5105 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5106 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5109 const struct variable *var = s->nests[a]->vars[a_idx];
5110 const struct ctables_categories *categories = s->table->categories[
5111 var_get_dict_index (var)];
5112 int width = var_get_width (var);
5113 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5114 const struct ctables_occurrence *o;
5115 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5117 union value *value = case_data_rw (c, var);
5118 value_destroy (value, width);
5119 value_clone (value, &o->value, width);
5120 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5121 assert (cats[a][a_idx] != NULL);
5122 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5125 for (size_t i = 0; i < categories->n_cats; i++)
5127 const struct ctables_category *cat = &categories->cats[i];
5128 if (cat->type == CCT_POSTCOMPUTE)
5130 cats[a][a_idx] = cat;
5131 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5138 ctables_section_add_empty_categories (struct ctables_section *s)
5140 bool show_empty = false;
5141 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5143 for (size_t k = 0; k < s->nests[a]->n; k++)
5144 if (k != s->nests[a]->scale_idx)
5146 const struct variable *var = s->nests[a]->vars[k];
5147 const struct ctables_categories *cats = s->table->categories[
5148 var_get_dict_index (var)];
5149 if (cats->show_empty)
5152 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5158 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5159 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5160 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5165 ctables_section_clear (struct ctables_section *s)
5167 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5169 const struct ctables_nest *nest = s->nests[a];
5170 for (size_t i = 0; i < nest->n; i++)
5171 if (i != nest->scale_idx)
5173 const struct variable *var = nest->vars[i];
5174 int width = var_get_width (var);
5175 struct ctables_occurrence *o, *next;
5176 struct hmap *map = &s->occurrences[a][i];
5177 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5179 value_destroy (&o->value, width);
5180 hmap_delete (map, &o->node);
5187 struct ctables_cell *cell, *next_cell;
5188 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5190 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5192 const struct ctables_nest *nest = s->nests[a];
5193 for (size_t i = 0; i < nest->n; i++)
5194 if (i != nest->scale_idx)
5195 value_destroy (&cell->axes[a].cvs[i].value,
5196 var_get_width (nest->vars[i]));
5197 free (cell->axes[a].cvs);
5200 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5201 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5202 for (size_t i = 0; i < specs->n; i++)
5203 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5204 free (cell->summaries);
5206 hmap_delete (&s->cells, &cell->node);
5209 hmap_shrink (&s->cells);
5211 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5213 struct ctables_area *area, *next_area;
5214 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5218 hmap_delete (&s->areas[at], &area->node);
5221 hmap_shrink (&s->areas[at]);
5226 ctables_section_uninit (struct ctables_section *s)
5228 ctables_section_clear (s);
5230 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5232 struct ctables_nest *nest = s->nests[a];
5233 for (size_t i = 0; i < nest->n; i++)
5234 hmap_destroy (&s->occurrences[a][i]);
5235 free (s->occurrences[a]);
5238 hmap_destroy (&s->cells);
5239 for (size_t i = 0; i < N_CTATS; i++)
5240 hmap_destroy (&s->areas[i]);
5244 ctables_table_clear (struct ctables_table *t)
5246 for (size_t i = 0; i < t->n_sections; i++)
5247 ctables_section_clear (&t->sections[i]);
5249 if (t->clabels_example)
5251 int width = var_get_width (t->clabels_example);
5252 struct ctables_value *value, *next_value;
5253 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5254 &t->clabels_values_map)
5256 value_destroy (&value->value, width);
5257 hmap_delete (&t->clabels_values_map, &value->node);
5260 hmap_shrink (&t->clabels_values_map);
5262 free (t->clabels_values);
5263 t->clabels_values = NULL;
5264 t->n_clabels_values = 0;
5269 ctables_execute (struct dataset *ds, struct casereader *input,
5272 for (size_t i = 0; i < ct->n_tables; i++)
5274 struct ctables_table *t = ct->tables[i];
5275 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5276 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5277 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5278 sizeof *t->sections);
5279 size_t ix[PIVOT_N_AXES];
5280 ctables_table_add_section (t, 0, ix);
5283 struct dictionary *dict = dataset_dict (ds);
5285 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5286 struct casegrouper *grouper
5288 ? casegrouper_create_splits (input, dict)
5289 : casegrouper_create_vars (input, NULL, 0));
5290 struct casereader *group;
5291 while (casegrouper_get_next_group (grouper, &group))
5295 struct ccase *c = casereader_peek (group, 0);
5298 output_split_file_values (ds, c);
5303 bool warn_on_invalid = true;
5304 for (struct ccase *c = casereader_read (group); c;
5305 case_unref (c), c = casereader_read (group))
5307 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5308 double e_weight = (ct->e_weight
5309 ? var_force_valid_weight (ct->e_weight,
5310 case_num (c, ct->e_weight),
5314 for (size_t i = 0; i < ct->n_tables; i++)
5316 struct ctables_table *t = ct->tables[i];
5318 for (size_t j = 0; j < t->n_sections; j++)
5319 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5321 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5322 if (t->label_axis[a] != a)
5323 ctables_insert_clabels_values (t, c, a);
5326 casereader_destroy (group);
5328 for (size_t i = 0; i < ct->n_tables; i++)
5330 struct ctables_table *t = ct->tables[i];
5332 if (t->clabels_example)
5333 ctables_sort_clabels_values (t);
5335 for (size_t j = 0; j < t->n_sections; j++)
5336 ctables_section_add_empty_categories (&t->sections[j]);
5338 ctables_table_output (ct, t);
5339 ctables_table_clear (t);
5342 return casegrouper_destroy (grouper);
5347 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5348 struct dictionary *);
5351 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5357 case CTPO_CAT_STRING:
5358 ss_dealloc (&e->string);
5361 case CTPO_CAT_SRANGE:
5362 for (size_t i = 0; i < 2; i++)
5363 ss_dealloc (&e->srange[i]);
5372 for (size_t i = 0; i < 2; i++)
5373 ctables_pcexpr_destroy (e->subs[i]);
5377 case CTPO_CAT_NUMBER:
5378 case CTPO_CAT_NRANGE:
5379 case CTPO_CAT_MISSING:
5380 case CTPO_CAT_OTHERNM:
5381 case CTPO_CAT_SUBTOTAL:
5382 case CTPO_CAT_TOTAL:
5386 msg_location_destroy (e->location);
5391 static struct ctables_pcexpr *
5392 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5393 struct ctables_pcexpr *sub0,
5394 struct ctables_pcexpr *sub1)
5396 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5397 *e = (struct ctables_pcexpr) {
5399 .subs = { sub0, sub1 },
5400 .location = msg_location_merged (sub0->location, sub1->location),
5405 /* How to parse an operator. */
5408 enum token_type token;
5409 enum ctables_postcompute_op op;
5412 static const struct operator *
5413 ctables_pcexpr_match_operator (struct lexer *lexer,
5414 const struct operator ops[], size_t n_ops)
5416 for (const struct operator *op = ops; op < ops + n_ops; op++)
5417 if (lex_token (lexer) == op->token)
5419 if (op->token != T_NEG_NUM)
5428 static struct ctables_pcexpr *
5429 ctables_pcexpr_parse_binary_operators__ (
5430 struct lexer *lexer, struct dictionary *dict,
5431 const struct operator ops[], size_t n_ops,
5432 parse_recursively_func *parse_next_level,
5433 const char *chain_warning, struct ctables_pcexpr *lhs)
5435 for (int op_count = 0; ; op_count++)
5437 const struct operator *op
5438 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5441 if (op_count > 1 && chain_warning)
5442 msg_at (SW, lhs->location, "%s", chain_warning);
5447 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5450 ctables_pcexpr_destroy (lhs);
5454 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5458 static struct ctables_pcexpr *
5459 ctables_pcexpr_parse_binary_operators (
5460 struct lexer *lexer, struct dictionary *dict,
5461 const struct operator ops[], size_t n_ops,
5462 parse_recursively_func *parse_next_level, const char *chain_warning)
5464 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5468 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5470 chain_warning, lhs);
5473 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5474 struct dictionary *);
5476 static struct ctables_pcexpr
5477 ctpo_cat_nrange (double low, double high)
5479 return (struct ctables_pcexpr) {
5480 .op = CTPO_CAT_NRANGE,
5481 .nrange = { low, high },
5485 static struct ctables_pcexpr
5486 ctpo_cat_srange (struct substring low, struct substring high)
5488 return (struct ctables_pcexpr) {
5489 .op = CTPO_CAT_SRANGE,
5490 .srange = { low, high },
5494 static struct ctables_pcexpr *
5495 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5497 int start_ofs = lex_ofs (lexer);
5498 struct ctables_pcexpr e;
5499 if (lex_is_number (lexer))
5501 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5502 .number = lex_number (lexer) };
5505 else if (lex_match_id (lexer, "MISSING"))
5506 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5507 else if (lex_match_id (lexer, "OTHERNM"))
5508 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5509 else if (lex_match_id (lexer, "TOTAL"))
5510 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5511 else if (lex_match_id (lexer, "SUBTOTAL"))
5513 size_t subtotal_index = 0;
5514 if (lex_match (lexer, T_LBRACK))
5516 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5518 subtotal_index = lex_integer (lexer);
5520 if (!lex_force_match (lexer, T_RBRACK))
5523 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5524 .subtotal_index = subtotal_index };
5526 else if (lex_match (lexer, T_LBRACK))
5528 if (lex_match_id (lexer, "LO"))
5530 if (!lex_force_match_id (lexer, "THRU"))
5533 if (lex_is_string (lexer))
5535 struct substring low = { .string = NULL };
5536 struct substring high = parse_substring (lexer, dict);
5537 e = ctpo_cat_srange (low, high);
5541 if (!lex_force_num (lexer))
5543 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5547 else if (lex_is_number (lexer))
5549 double number = lex_number (lexer);
5551 if (lex_match_id (lexer, "THRU"))
5553 if (lex_match_id (lexer, "HI"))
5554 e = ctpo_cat_nrange (number, DBL_MAX);
5557 if (!lex_force_num (lexer))
5559 e = ctpo_cat_nrange (number, lex_number (lexer));
5564 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5567 else if (lex_is_string (lexer))
5569 struct substring s = parse_substring (lexer, dict);
5571 if (lex_match_id (lexer, "THRU"))
5573 struct substring high;
5575 if (lex_match_id (lexer, "HI"))
5576 high = (struct substring) { .string = NULL };
5579 if (!lex_force_string (lexer))
5584 high = parse_substring (lexer, dict);
5587 e = ctpo_cat_srange (s, high);
5590 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5594 lex_error (lexer, NULL);
5598 if (!lex_force_match (lexer, T_RBRACK))
5600 if (e.op == CTPO_CAT_STRING)
5601 ss_dealloc (&e.string);
5602 else if (e.op == CTPO_CAT_SRANGE)
5604 ss_dealloc (&e.srange[0]);
5605 ss_dealloc (&e.srange[1]);
5610 else if (lex_match (lexer, T_LPAREN))
5612 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5615 if (!lex_force_match (lexer, T_RPAREN))
5617 ctables_pcexpr_destroy (ep);
5624 lex_error (lexer, NULL);
5628 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5629 return xmemdup (&e, sizeof e);
5632 static struct ctables_pcexpr *
5633 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5634 struct lexer *lexer, int start_ofs)
5636 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5637 *e = (struct ctables_pcexpr) {
5640 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5645 static struct ctables_pcexpr *
5646 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5648 static const struct operator op = { T_EXP, CTPO_POW };
5650 const char *chain_warning =
5651 _("The exponentiation operator (`**') is left-associative: "
5652 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5653 "To disable this warning, insert parentheses.");
5655 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5656 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5657 ctables_pcexpr_parse_primary,
5660 /* Special case for situations like "-5**6", which must be parsed as
5663 int start_ofs = lex_ofs (lexer);
5664 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5665 *lhs = (struct ctables_pcexpr) {
5666 .op = CTPO_CONSTANT,
5667 .number = -lex_tokval (lexer),
5668 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5672 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5673 lexer, dict, &op, 1,
5674 ctables_pcexpr_parse_primary, chain_warning, lhs);
5678 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5681 /* Parses the unary minus level. */
5682 static struct ctables_pcexpr *
5683 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5685 int start_ofs = lex_ofs (lexer);
5686 if (!lex_match (lexer, T_DASH))
5687 return ctables_pcexpr_parse_exp (lexer, dict);
5689 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5693 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5696 /* Parses the multiplication and division level. */
5697 static struct ctables_pcexpr *
5698 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5700 static const struct operator ops[] =
5702 { T_ASTERISK, CTPO_MUL },
5703 { T_SLASH, CTPO_DIV },
5706 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5707 sizeof ops / sizeof *ops,
5708 ctables_pcexpr_parse_neg, NULL);
5711 /* Parses the addition and subtraction level. */
5712 static struct ctables_pcexpr *
5713 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5715 static const struct operator ops[] =
5717 { T_PLUS, CTPO_ADD },
5718 { T_DASH, CTPO_SUB },
5719 { T_NEG_NUM, CTPO_ADD },
5722 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5723 ops, sizeof ops / sizeof *ops,
5724 ctables_pcexpr_parse_mul, NULL);
5727 static struct ctables_postcompute *
5728 ctables_find_postcompute (struct ctables *ct, const char *name)
5730 struct ctables_postcompute *pc;
5731 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5732 utf8_hash_case_string (name, 0), &ct->postcomputes)
5733 if (!utf8_strcasecmp (pc->name, name))
5739 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5742 int pcompute_start = lex_ofs (lexer) - 1;
5744 if (!lex_match (lexer, T_AND))
5746 lex_error_expecting (lexer, "&");
5749 if (!lex_force_id (lexer))
5752 char *name = ss_xstrdup (lex_tokss (lexer));
5755 if (!lex_force_match (lexer, T_EQUALS)
5756 || !lex_force_match_id (lexer, "EXPR")
5757 || !lex_force_match (lexer, T_LPAREN))
5763 int expr_start = lex_ofs (lexer);
5764 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5765 int expr_end = lex_ofs (lexer) - 1;
5766 if (!expr || !lex_force_match (lexer, T_RPAREN))
5768 ctables_pcexpr_destroy (expr);
5772 int pcompute_end = lex_ofs (lexer) - 1;
5774 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5777 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5780 msg_at (SW, location, _("New definition of &%s will override the "
5781 "previous definition."),
5783 msg_at (SN, pc->location, _("This is the previous definition."));
5785 ctables_pcexpr_destroy (pc->expr);
5786 msg_location_destroy (pc->location);
5791 pc = xmalloc (sizeof *pc);
5792 *pc = (struct ctables_postcompute) { .name = name };
5793 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5794 utf8_hash_case_string (pc->name, 0));
5797 pc->location = location;
5799 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5804 ctables_parse_pproperties_format (struct lexer *lexer,
5805 struct ctables_summary_spec_set *sss)
5807 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5809 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5810 && !(lex_token (lexer) == T_ID
5811 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5812 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5813 lex_tokss (lexer)))))
5815 /* Parse function. */
5816 enum ctables_summary_function function;
5818 enum ctables_area_type area;
5819 if (!parse_ctables_summary_function (lexer, &function, &weighted, &area))
5822 /* Parse percentile. */
5823 double percentile = 0;
5824 if (function == CTSF_PTILE)
5826 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5828 percentile = lex_number (lexer);
5833 struct fmt_spec format;
5834 bool is_ctables_format;
5835 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5838 if (sss->n >= sss->allocated)
5839 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5840 sizeof *sss->specs);
5841 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5842 .function = function,
5843 .weighted = weighted,
5846 .percentile = percentile,
5848 .is_ctables_format = is_ctables_format,
5854 ctables_summary_spec_set_uninit (sss);
5859 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5861 struct ctables_postcompute **pcs = NULL;
5863 size_t allocated_pcs = 0;
5865 while (lex_match (lexer, T_AND))
5867 if (!lex_force_id (lexer))
5869 struct ctables_postcompute *pc
5870 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5873 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5878 if (n_pcs >= allocated_pcs)
5879 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5883 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5885 if (lex_match_id (lexer, "LABEL"))
5887 lex_match (lexer, T_EQUALS);
5888 if (!lex_force_string (lexer))
5891 for (size_t i = 0; i < n_pcs; i++)
5893 free (pcs[i]->label);
5894 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5899 else if (lex_match_id (lexer, "FORMAT"))
5901 lex_match (lexer, T_EQUALS);
5903 struct ctables_summary_spec_set sss;
5904 if (!ctables_parse_pproperties_format (lexer, &sss))
5907 for (size_t i = 0; i < n_pcs; i++)
5910 ctables_summary_spec_set_uninit (pcs[i]->specs);
5912 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5913 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5915 ctables_summary_spec_set_uninit (&sss);
5917 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5919 lex_match (lexer, T_EQUALS);
5920 bool hide_source_cats;
5921 if (!parse_bool (lexer, &hide_source_cats))
5923 for (size_t i = 0; i < n_pcs; i++)
5924 pcs[i]->hide_source_cats = hide_source_cats;
5928 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5941 put_strftime (struct string *out, time_t now, const char *format)
5943 const struct tm *tm = localtime (&now);
5945 strftime (value, sizeof value, format, tm);
5946 ds_put_cstr (out, value);
5950 skip_prefix (struct substring *s, struct substring prefix)
5952 if (ss_starts_with (*s, prefix))
5954 ss_advance (s, prefix.length);
5962 put_table_expression (struct string *out, struct lexer *lexer,
5963 struct dictionary *dict, int expr_start, int expr_end)
5966 for (int ofs = expr_start; ofs < expr_end; ofs++)
5968 const struct token *t = lex_ofs_token (lexer, ofs);
5969 if (t->type == T_LBRACK)
5971 else if (t->type == T_RBRACK && nest > 0)
5977 else if (t->type == T_ID)
5979 const struct variable *var
5980 = dict_lookup_var (dict, t->string.string);
5981 const char *label = var ? var_get_label (var) : NULL;
5982 ds_put_cstr (out, label ? label : t->string.string);
5986 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5987 ds_put_byte (out, ' ');
5989 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5990 ds_put_cstr (out, repr);
5993 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5994 ds_put_byte (out, ' ');
6000 put_title_text (struct string *out, struct substring in, time_t now,
6001 struct lexer *lexer, struct dictionary *dict,
6002 int expr_start, int expr_end)
6006 size_t chunk = ss_find_byte (in, ')');
6007 ds_put_substring (out, ss_head (in, chunk));
6008 ss_advance (&in, chunk);
6009 if (ss_is_empty (in))
6012 if (skip_prefix (&in, ss_cstr (")DATE")))
6013 put_strftime (out, now, "%x");
6014 else if (skip_prefix (&in, ss_cstr (")TIME")))
6015 put_strftime (out, now, "%X");
6016 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6017 put_table_expression (out, lexer, dict, expr_start, expr_end);
6020 ds_put_byte (out, ')');
6021 ss_advance (&in, 1);
6027 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6029 struct casereader *input = NULL;
6031 struct measure_guesser *mg = measure_guesser_create (ds);
6034 input = proc_open (ds);
6035 measure_guesser_run (mg, input);
6036 measure_guesser_destroy (mg);
6039 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6040 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6041 enum settings_value_show tvars = settings_get_show_variables ();
6042 for (size_t i = 0; i < n_vars; i++)
6043 vlabels[i] = (enum ctables_vlabel) tvars;
6045 struct pivot_table_look *look = pivot_table_look_unshare (
6046 pivot_table_look_ref (pivot_table_look_get_default ()));
6047 look->omit_empty = false;
6049 struct ctables *ct = xmalloc (sizeof *ct);
6050 *ct = (struct ctables) {
6051 .dict = dataset_dict (ds),
6053 .ctables_formats = FMT_SETTINGS_INIT,
6055 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6058 time_t now = time (NULL);
6063 const char *dot_string;
6064 const char *comma_string;
6066 static const struct ctf ctfs[4] = {
6067 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6068 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6069 { CTEF_PAREN, "-,(,),", "-.(.)." },
6070 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6072 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6073 for (size_t i = 0; i < 4; i++)
6075 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6076 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6077 fmt_number_style_from_string (s));
6080 if (!lex_force_match (lexer, T_SLASH))
6083 while (!lex_match_id (lexer, "TABLE"))
6085 if (lex_match_id (lexer, "FORMAT"))
6087 double widths[2] = { SYSMIS, SYSMIS };
6088 double units_per_inch = 72.0;
6090 while (lex_token (lexer) != T_SLASH)
6092 if (lex_match_id (lexer, "MINCOLWIDTH"))
6094 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6097 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6099 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6102 else if (lex_match_id (lexer, "UNITS"))
6104 lex_match (lexer, T_EQUALS);
6105 if (lex_match_id (lexer, "POINTS"))
6106 units_per_inch = 72.0;
6107 else if (lex_match_id (lexer, "INCHES"))
6108 units_per_inch = 1.0;
6109 else if (lex_match_id (lexer, "CM"))
6110 units_per_inch = 2.54;
6113 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6117 else if (lex_match_id (lexer, "EMPTY"))
6122 lex_match (lexer, T_EQUALS);
6123 if (lex_match_id (lexer, "ZERO"))
6125 /* Nothing to do. */
6127 else if (lex_match_id (lexer, "BLANK"))
6128 ct->zero = xstrdup ("");
6129 else if (lex_force_string (lexer))
6131 ct->zero = ss_xstrdup (lex_tokss (lexer));
6137 else if (lex_match_id (lexer, "MISSING"))
6139 lex_match (lexer, T_EQUALS);
6140 if (!lex_force_string (lexer))
6144 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6145 ? ss_xstrdup (lex_tokss (lexer))
6151 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6152 "UNITS", "EMPTY", "MISSING");
6157 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6158 && widths[0] > widths[1])
6160 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6164 for (size_t i = 0; i < 2; i++)
6165 if (widths[i] != SYSMIS)
6167 int *wr = ct->look->width_ranges[TABLE_HORZ];
6168 wr[i] = widths[i] / units_per_inch * 96.0;
6173 else if (lex_match_id (lexer, "VLABELS"))
6175 if (!lex_force_match_id (lexer, "VARIABLES"))
6177 lex_match (lexer, T_EQUALS);
6179 struct variable **vars;
6181 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6185 if (!lex_force_match_id (lexer, "DISPLAY"))
6190 lex_match (lexer, T_EQUALS);
6192 enum ctables_vlabel vlabel;
6193 if (lex_match_id (lexer, "DEFAULT"))
6194 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6195 else if (lex_match_id (lexer, "NAME"))
6197 else if (lex_match_id (lexer, "LABEL"))
6198 vlabel = CTVL_LABEL;
6199 else if (lex_match_id (lexer, "BOTH"))
6201 else if (lex_match_id (lexer, "NONE"))
6205 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6211 for (size_t i = 0; i < n_vars; i++)
6212 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6215 else if (lex_match_id (lexer, "MRSETS"))
6217 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6219 lex_match (lexer, T_EQUALS);
6220 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6223 else if (lex_match_id (lexer, "SMISSING"))
6225 if (lex_match_id (lexer, "VARIABLE"))
6226 ct->smissing_listwise = false;
6227 else if (lex_match_id (lexer, "LISTWISE"))
6228 ct->smissing_listwise = true;
6231 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6235 else if (lex_match_id (lexer, "PCOMPUTE"))
6237 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6240 else if (lex_match_id (lexer, "PPROPERTIES"))
6242 if (!ctables_parse_pproperties (lexer, ct))
6245 else if (lex_match_id (lexer, "WEIGHT"))
6247 if (!lex_force_match_id (lexer, "VARIABLE"))
6249 lex_match (lexer, T_EQUALS);
6250 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6254 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6256 if (lex_match_id (lexer, "COUNT"))
6258 lex_match (lexer, T_EQUALS);
6259 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6262 ct->hide_threshold = lex_integer (lexer);
6265 else if (ct->hide_threshold == 0)
6266 ct->hide_threshold = 5;
6270 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6271 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6272 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6276 if (!lex_force_match (lexer, T_SLASH))
6280 size_t allocated_tables = 0;
6283 if (ct->n_tables >= allocated_tables)
6284 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6285 sizeof *ct->tables);
6287 struct ctables_category *cat = xmalloc (sizeof *cat);
6288 *cat = (struct ctables_category) {
6290 .include_missing = false,
6291 .sort_ascending = true,
6294 struct ctables_categories *c = xmalloc (sizeof *c);
6295 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6296 *c = (struct ctables_categories) {
6303 struct ctables_categories **categories = xnmalloc (n_vars,
6304 sizeof *categories);
6305 for (size_t i = 0; i < n_vars; i++)
6308 struct ctables_table *t = xmalloc (sizeof *t);
6309 *t = (struct ctables_table) {
6311 .slabels_axis = PIVOT_AXIS_COLUMN,
6312 .slabels_visible = true,
6313 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6315 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6316 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6317 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6319 .clabels_from_axis = PIVOT_AXIS_LAYER,
6320 .clabels_to_axis = PIVOT_AXIS_LAYER,
6321 .categories = categories,
6322 .n_categories = n_vars,
6325 ct->tables[ct->n_tables++] = t;
6327 lex_match (lexer, T_EQUALS);
6328 int expr_start = lex_ofs (lexer);
6329 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6331 if (lex_match (lexer, T_BY))
6333 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6334 ct, t, PIVOT_AXIS_COLUMN))
6337 if (lex_match (lexer, T_BY))
6339 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6340 ct, t, PIVOT_AXIS_LAYER))
6344 int expr_end = lex_ofs (lexer);
6346 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6347 && !t->axes[PIVOT_AXIS_LAYER])
6349 lex_error (lexer, _("At least one variable must be specified."));
6353 const struct ctables_axis *scales[PIVOT_N_AXES];
6354 size_t n_scales = 0;
6355 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6357 scales[a] = find_scale (t->axes[a]);
6363 msg (SE, _("Scale variables may appear only on one axis."));
6364 if (scales[PIVOT_AXIS_ROW])
6365 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6366 _("This scale variable appears on the rows axis."));
6367 if (scales[PIVOT_AXIS_COLUMN])
6368 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6369 _("This scale variable appears on the columns axis."));
6370 if (scales[PIVOT_AXIS_LAYER])
6371 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6372 _("This scale variable appears on the layer axis."));
6376 const struct ctables_axis *summaries[PIVOT_N_AXES];
6377 size_t n_summaries = 0;
6378 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6380 summaries[a] = (scales[a]
6382 : find_categorical_summary_spec (t->axes[a]));
6386 if (n_summaries > 1)
6388 msg (SE, _("Summaries may appear only on one axis."));
6389 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6392 msg_at (SN, summaries[a]->loc,
6394 ? _("This variable on the rows axis has a summary.")
6395 : a == PIVOT_AXIS_COLUMN
6396 ? _("This variable on the columns axis has a summary.")
6397 : _("This variable on the layers axis has a summary."));
6399 msg_at (SN, summaries[a]->loc,
6400 _("This is a scale variable, so it always has a "
6401 "summary even if the syntax does not explicitly "
6406 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6407 if (n_summaries ? summaries[a] : t->axes[a])
6409 t->summary_axis = a;
6413 if (lex_token (lexer) == T_ENDCMD)
6415 if (!ctables_prepare_table (t))
6419 if (!lex_force_match (lexer, T_SLASH))
6422 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6424 if (lex_match_id (lexer, "SLABELS"))
6426 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6428 if (lex_match_id (lexer, "POSITION"))
6430 lex_match (lexer, T_EQUALS);
6431 if (lex_match_id (lexer, "COLUMN"))
6432 t->slabels_axis = PIVOT_AXIS_COLUMN;
6433 else if (lex_match_id (lexer, "ROW"))
6434 t->slabels_axis = PIVOT_AXIS_ROW;
6435 else if (lex_match_id (lexer, "LAYER"))
6436 t->slabels_axis = PIVOT_AXIS_LAYER;
6439 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6443 else if (lex_match_id (lexer, "VISIBLE"))
6445 lex_match (lexer, T_EQUALS);
6446 if (!parse_bool (lexer, &t->slabels_visible))
6451 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6456 else if (lex_match_id (lexer, "CLABELS"))
6458 if (lex_match_id (lexer, "AUTO"))
6460 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6461 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6463 else if (lex_match_id (lexer, "ROWLABELS"))
6465 lex_match (lexer, T_EQUALS);
6466 if (lex_match_id (lexer, "OPPOSITE"))
6467 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6468 else if (lex_match_id (lexer, "LAYER"))
6469 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6472 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6476 else if (lex_match_id (lexer, "COLLABELS"))
6478 lex_match (lexer, T_EQUALS);
6479 if (lex_match_id (lexer, "OPPOSITE"))
6480 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6481 else if (lex_match_id (lexer, "LAYER"))
6482 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6485 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6491 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6496 else if (lex_match_id (lexer, "CRITERIA"))
6498 if (!lex_force_match_id (lexer, "CILEVEL"))
6500 lex_match (lexer, T_EQUALS);
6502 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6504 t->cilevel = lex_number (lexer);
6507 else if (lex_match_id (lexer, "CATEGORIES"))
6509 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6513 else if (lex_match_id (lexer, "TITLES"))
6518 if (lex_match_id (lexer, "CAPTION"))
6519 textp = &t->caption;
6520 else if (lex_match_id (lexer, "CORNER"))
6522 else if (lex_match_id (lexer, "TITLE"))
6526 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6529 lex_match (lexer, T_EQUALS);
6531 struct string s = DS_EMPTY_INITIALIZER;
6532 while (lex_is_string (lexer))
6534 if (!ds_is_empty (&s))
6535 ds_put_byte (&s, ' ');
6536 put_title_text (&s, lex_tokss (lexer), now,
6537 lexer, dataset_dict (ds),
6538 expr_start, expr_end);
6542 *textp = ds_steal_cstr (&s);
6544 while (lex_token (lexer) != T_SLASH
6545 && lex_token (lexer) != T_ENDCMD);
6547 else if (lex_match_id (lexer, "SIGTEST"))
6551 t->chisq = xmalloc (sizeof *t->chisq);
6552 *t->chisq = (struct ctables_chisq) {
6554 .include_mrsets = true,
6555 .all_visible = true,
6561 if (lex_match_id (lexer, "TYPE"))
6563 lex_match (lexer, T_EQUALS);
6564 if (!lex_force_match_id (lexer, "CHISQUARE"))
6567 else if (lex_match_id (lexer, "ALPHA"))
6569 lex_match (lexer, T_EQUALS);
6570 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6572 t->chisq->alpha = lex_number (lexer);
6575 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6577 lex_match (lexer, T_EQUALS);
6578 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6581 else if (lex_match_id (lexer, "CATEGORIES"))
6583 lex_match (lexer, T_EQUALS);
6584 if (lex_match_id (lexer, "ALLVISIBLE"))
6585 t->chisq->all_visible = true;
6586 else if (lex_match_id (lexer, "SUBTOTALS"))
6587 t->chisq->all_visible = false;
6590 lex_error_expecting (lexer,
6591 "ALLVISIBLE", "SUBTOTALS");
6597 lex_error_expecting (lexer, "TYPE", "ALPHA",
6598 "INCLUDEMRSETS", "CATEGORIES");
6602 while (lex_token (lexer) != T_SLASH
6603 && lex_token (lexer) != T_ENDCMD);
6605 else if (lex_match_id (lexer, "COMPARETEST"))
6609 t->pairwise = xmalloc (sizeof *t->pairwise);
6610 *t->pairwise = (struct ctables_pairwise) {
6612 .alpha = { .05, .05 },
6613 .adjust = BONFERRONI,
6614 .include_mrsets = true,
6615 .meansvariance_allcats = true,
6616 .all_visible = true,
6625 if (lex_match_id (lexer, "TYPE"))
6627 lex_match (lexer, T_EQUALS);
6628 if (lex_match_id (lexer, "PROP"))
6629 t->pairwise->type = PROP;
6630 else if (lex_match_id (lexer, "MEAN"))
6631 t->pairwise->type = MEAN;
6634 lex_error_expecting (lexer, "PROP", "MEAN");
6638 else if (lex_match_id (lexer, "ALPHA"))
6640 lex_match (lexer, T_EQUALS);
6642 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6644 double a0 = lex_number (lexer);
6647 lex_match (lexer, T_COMMA);
6648 if (lex_is_number (lexer))
6650 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6652 double a1 = lex_number (lexer);
6655 t->pairwise->alpha[0] = MIN (a0, a1);
6656 t->pairwise->alpha[1] = MAX (a0, a1);
6659 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6661 else if (lex_match_id (lexer, "ADJUST"))
6663 lex_match (lexer, T_EQUALS);
6664 if (lex_match_id (lexer, "BONFERRONI"))
6665 t->pairwise->adjust = BONFERRONI;
6666 else if (lex_match_id (lexer, "BH"))
6667 t->pairwise->adjust = BH;
6668 else if (lex_match_id (lexer, "NONE"))
6669 t->pairwise->adjust = 0;
6672 lex_error_expecting (lexer, "BONFERRONI", "BH",
6677 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6679 lex_match (lexer, T_EQUALS);
6680 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6683 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6685 lex_match (lexer, T_EQUALS);
6686 if (lex_match_id (lexer, "ALLCATS"))
6687 t->pairwise->meansvariance_allcats = true;
6688 else if (lex_match_id (lexer, "TESTEDCATS"))
6689 t->pairwise->meansvariance_allcats = false;
6692 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6696 else if (lex_match_id (lexer, "CATEGORIES"))
6698 lex_match (lexer, T_EQUALS);
6699 if (lex_match_id (lexer, "ALLVISIBLE"))
6700 t->pairwise->all_visible = true;
6701 else if (lex_match_id (lexer, "SUBTOTALS"))
6702 t->pairwise->all_visible = false;
6705 lex_error_expecting (lexer, "ALLVISIBLE",
6710 else if (lex_match_id (lexer, "MERGE"))
6712 lex_match (lexer, T_EQUALS);
6713 if (!parse_bool (lexer, &t->pairwise->merge))
6716 else if (lex_match_id (lexer, "STYLE"))
6718 lex_match (lexer, T_EQUALS);
6719 if (lex_match_id (lexer, "APA"))
6720 t->pairwise->apa_style = true;
6721 else if (lex_match_id (lexer, "SIMPLE"))
6722 t->pairwise->apa_style = false;
6725 lex_error_expecting (lexer, "APA", "SIMPLE");
6729 else if (lex_match_id (lexer, "SHOWSIG"))
6731 lex_match (lexer, T_EQUALS);
6732 if (!parse_bool (lexer, &t->pairwise->show_sig))
6737 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6738 "INCLUDEMRSETS", "MEANSVARIANCE",
6739 "CATEGORIES", "MERGE", "STYLE",
6744 while (lex_token (lexer) != T_SLASH
6745 && lex_token (lexer) != T_ENDCMD);
6749 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6750 "CRITERIA", "CATEGORIES", "TITLES",
6751 "SIGTEST", "COMPARETEST");
6755 if (!lex_match (lexer, T_SLASH))
6759 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6761 t->clabels_from_axis = PIVOT_AXIS_ROW;
6762 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6764 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6768 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6769 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6770 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6772 if (!ctables_prepare_table (t))
6775 while (lex_token (lexer) != T_ENDCMD);
6778 input = proc_open (ds);
6779 bool ok = ctables_execute (ds, input, ct);
6780 ok = proc_commit (ds) && ok;
6782 ctables_destroy (ct);
6783 return ok ? CMD_SUCCESS : CMD_FAILURE;
6788 ctables_destroy (ct);