1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_function_type
68 /* A function that operates on data in a single cell. The function does
69 not have an unweighted version. */
72 /* A function that operates on data in a single cell. The function has an
73 unweighted version. */
76 /* A function that operates on an area of cells. The function has an
77 unweighted version. */
88 enum ctables_function_availability
90 CTFA_ALL, /* Any variables. */
91 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
92 //CTFA_MRSETS, /* Only multiple-response sets */
95 enum ctables_summary_function
97 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
98 #include "ctables.inc"
103 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
105 #include "ctables.inc"
109 struct ctables_function_info
111 struct substring basename;
112 enum ctables_function_type type;
113 enum ctables_format format;
114 enum ctables_function_availability availability;
116 bool may_be_unweighted;
119 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
120 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
122 .basename = SS_LITERAL_INITIALIZER (NAME), \
125 .availability = AVAILABILITY, \
126 .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA, \
127 .is_area = (TYPE) == CTFT_AREA \
129 #include "ctables.inc"
133 static bool ctables_summary_function_is_count (enum ctables_summary_function);
135 enum ctables_area_type
137 /* Within a section, where stacked variables divide one section from
140 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
141 parse_ctables_summary_function() parses correctly. */
142 CTAT_TABLE, /* All layers of a whole section. */
143 CTAT_LAYERROW, /* Row in one layer within a section. */
144 CTAT_LAYERCOL, /* Column in one layer within a section. */
145 CTAT_LAYER, /* One layer within a section. */
147 /* Within a subtable, where a subtable pairs an innermost row variable with
148 an innermost column variable within a single layer. */
149 CTAT_SUBTABLE, /* Whole subtable. */
150 CTAT_ROW, /* Row within a subtable. */
151 CTAT_COL, /* Column within a subtable. */
155 static const char *ctables_area_type_name[N_CTATS] = {
156 [CTAT_TABLE] = "TABLE",
157 [CTAT_LAYER] = "LAYER",
158 [CTAT_LAYERROW] = "LAYERROW",
159 [CTAT_LAYERCOL] = "LAYERCOL",
160 [CTAT_SUBTABLE] = "SUBTABLE",
167 struct hmap_node node;
169 const struct ctables_cell *example;
172 double d_valid; /* Dictionary weight. */
175 double e_valid; /* Effective weight */
178 double u_valid; /* Unweighted. */
181 struct ctables_sum *sums;
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The areas that contain this cell. */
205 struct ctables_area *areas[N_CTATS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
231 const struct dictionary *dict;
232 struct pivot_table_look *look;
234 /* CTABLES has a number of extra formats that we implement via custom
235 currency specifications on an alternate fmt_settings. */
236 #define CTEF_NEGPAREN FMT_CCA
237 #define CTEF_NEQUAL FMT_CCB
238 #define CTEF_PAREN FMT_CCC
239 #define CTEF_PCTPAREN FMT_CCD
240 struct fmt_settings ctables_formats;
242 /* If this is NULL, zeros are displayed using the normal print format.
243 Otherwise, this string is displayed. */
246 /* If this is NULL, missing values are displayed using the normal print
247 format. Otherwise, this string is displayed. */
250 /* Indexed by variable dictionary index. */
251 enum ctables_vlabel *vlabels;
253 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
255 bool mrsets_count_duplicates; /* MRSETS. */
256 bool smissing_listwise; /* SMISSING. */
257 struct variable *e_weight; /* WEIGHT. */
258 int hide_threshold; /* HIDESMALLCOUNTS. */
260 struct ctables_table **tables;
264 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
267 struct ctables_postcompute
269 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
270 char *name; /* Name, without leading &. */
272 struct msg_location *location; /* Location of definition. */
273 struct ctables_pcexpr *expr;
275 struct ctables_summary_spec_set *specs;
276 bool hide_source_cats;
279 struct ctables_pcexpr
289 enum ctables_postcompute_op
292 CTPO_CONSTANT, /* 5 */
293 CTPO_CAT_NUMBER, /* [5] */
294 CTPO_CAT_STRING, /* ["STRING"] */
295 CTPO_CAT_NRANGE, /* [LO THRU 5] */
296 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
297 CTPO_CAT_MISSING, /* MISSING */
298 CTPO_CAT_OTHERNM, /* OTHERNM */
299 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
300 CTPO_CAT_TOTAL, /* TOTAL */
314 /* CTPO_CAT_NUMBER. */
317 /* CTPO_CAT_STRING, in dictionary encoding. */
318 struct substring string;
320 /* CTPO_CAT_NRANGE. */
323 /* CTPO_CAT_SRANGE. */
324 struct substring srange[2];
326 /* CTPO_CAT_SUBTOTAL. */
327 size_t subtotal_index;
329 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
330 One element: CTPO_NEG. */
331 struct ctables_pcexpr *subs[2];
334 /* Source location. */
335 struct msg_location *location;
338 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
339 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
340 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
341 struct ctables_pcexpr *sub1);
343 struct ctables_summary_spec_set
345 struct ctables_summary_spec *specs;
349 /* The variable to which the summary specs are applied. */
350 struct variable *var;
352 /* Whether the variable to which the summary specs are applied is a scale
353 variable for the purpose of summarization.
355 (VALIDN and TOTALN act differently for summarizing scale and categorical
359 /* If any of these optional additional scale variables are missing, then
360 treat 'var' as if it's missing too. This is for implementing
361 SMISSING=LISTWISE. */
362 struct variable **listwise_vars;
363 size_t n_listwise_vars;
366 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
367 const struct ctables_summary_spec_set *);
368 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
370 /* A nested sequence of variables, e.g. a > b > c. */
373 struct variable **vars;
376 size_t *areas[N_CTATS];
377 size_t n_areas[N_CTATS];
380 struct ctables_summary_spec_set specs[N_CSVS];
383 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
386 struct ctables_nest *nests;
390 static void ctables_stack_uninit (struct ctables_stack *);
394 struct hmap_node node;
399 struct ctables_occurrence
401 struct hmap_node node;
405 struct ctables_section
408 struct ctables_table *table;
409 struct ctables_nest *nests[PIVOT_N_AXES];
412 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
413 struct hmap cells; /* Contains "struct ctables_cell"s. */
414 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
417 static void ctables_section_uninit (struct ctables_section *);
421 struct ctables *ctables;
422 struct ctables_axis *axes[PIVOT_N_AXES];
423 struct ctables_stack stacks[PIVOT_N_AXES];
424 struct ctables_section *sections;
426 enum pivot_axis_type summary_axis;
427 struct ctables_summary_spec_set summary_specs;
428 struct variable **sum_vars;
431 enum pivot_axis_type slabels_axis;
432 bool slabels_visible;
434 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
436 Most commonly, label_axis[a] == a, and in particular we always have
437 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
439 If ROWLABELS or COLLABELS is specified, then one of
440 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
441 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
443 If any category labels are moved, then 'clabels_example' is one of the
444 variables being moved (and it is otherwise NULL). All of the variables
445 being moved have the same width, value labels, and categories, so this
446 example variable can be used to find those out.
448 The remaining members in this group are relevant only if category labels
451 'clabels_values_map' holds a "struct ctables_value" for all the values
452 that appear in all of the variables in the moved categories. It is
453 accumulated as the data is read. Once the data is fully read, its
454 sorted values are put into 'clabels_values' and 'n_clabels_values'.
456 enum pivot_axis_type label_axis[PIVOT_N_AXES];
457 enum pivot_axis_type clabels_from_axis;
458 enum pivot_axis_type clabels_to_axis;
459 const struct variable *clabels_example;
460 struct hmap clabels_values_map;
461 struct ctables_value **clabels_values;
462 size_t n_clabels_values;
464 /* Indexed by variable dictionary index. */
465 struct ctables_categories **categories;
474 struct ctables_chisq *chisq;
475 struct ctables_pairwise *pairwise;
478 struct ctables_categories
481 struct ctables_category *cats;
486 struct ctables_category
488 enum ctables_category_type
490 /* Explicit category lists. */
493 CCT_NRANGE, /* Numerical range. */
494 CCT_SRANGE, /* String range. */
499 /* Totals and subtotals. */
503 /* Implicit category lists. */
508 /* For contributing to TOTALN. */
509 CCT_EXCLUDED_MISSING,
513 struct ctables_category *subtotal;
519 double number; /* CCT_NUMBER. */
520 struct substring string; /* CCT_STRING, in dictionary encoding. */
521 double nrange[2]; /* CCT_NRANGE. */
522 struct substring srange[2]; /* CCT_SRANGE. */
526 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
527 bool hide_subcategories; /* CCT_SUBTOTAL. */
530 /* CCT_POSTCOMPUTE. */
533 const struct ctables_postcompute *pc;
534 enum fmt_type parse_format;
537 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
540 bool include_missing;
544 enum ctables_summary_function sort_function;
546 enum ctables_area_type area;
547 struct variable *sort_var;
552 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
553 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
554 struct msg_location *location;
558 ctables_category_uninit (struct ctables_category *cat)
563 msg_location_destroy (cat->location);
570 case CCT_POSTCOMPUTE:
574 ss_dealloc (&cat->string);
578 ss_dealloc (&cat->srange[0]);
579 ss_dealloc (&cat->srange[1]);
584 free (cat->total_label);
592 case CCT_EXCLUDED_MISSING:
598 nullable_substring_equal (const struct substring *a,
599 const struct substring *b)
601 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
605 ctables_category_equal (const struct ctables_category *a,
606 const struct ctables_category *b)
608 if (a->type != b->type)
614 return a->number == b->number;
617 return ss_equals (a->string, b->string);
620 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
623 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
624 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
630 case CCT_POSTCOMPUTE:
631 return a->pc == b->pc;
635 return !strcmp (a->total_label, b->total_label);
640 return (a->include_missing == b->include_missing
641 && a->sort_ascending == b->sort_ascending
642 && a->sort_function == b->sort_function
643 && a->sort_var == b->sort_var
644 && a->percentile == b->percentile);
646 case CCT_EXCLUDED_MISSING:
654 ctables_categories_unref (struct ctables_categories *c)
659 assert (c->n_refs > 0);
663 for (size_t i = 0; i < c->n_cats; i++)
664 ctables_category_uninit (&c->cats[i]);
670 ctables_categories_equal (const struct ctables_categories *a,
671 const struct ctables_categories *b)
673 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
676 for (size_t i = 0; i < a->n_cats; i++)
677 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
683 /* Chi-square test (SIGTEST). */
691 /* Pairwise comparison test (COMPARETEST). */
692 struct ctables_pairwise
694 enum { PROP, MEAN } type;
697 bool meansvariance_allcats;
699 enum { BONFERRONI = 1, BH } adjust;
723 struct variable *var;
725 struct ctables_summary_spec_set specs[N_CSVS];
729 struct ctables_axis *subs[2];
732 struct msg_location *loc;
735 static void ctables_axis_destroy (struct ctables_axis *);
737 struct ctables_summary_spec
739 /* The calculation to be performed.
741 'function' is the function to calculate. 'weighted' specifies whether
742 to use weighted or unweighted data (for functions that do not support a
743 choice, it must be true). 'calc_area' is the area over which the
744 calculation takes place (for functions that target only an individual
745 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
746 percentile between 0 and 100 (for other functions it must be 0). */
747 enum ctables_summary_function function;
749 enum ctables_area_type calc_area;
750 double percentile; /* CTSF_PTILE only. */
752 /* How to display the result of the calculation.
754 'label' is a user-specified label, NULL if the user didn't specify
757 'user_area' is usually the same as 'calc_area', but when category labels
758 are rotated from one axis to another it swaps rows and columns.
760 'format' is the format for displaying the output. If
761 'is_ctables_format' is true, then 'format.type' is one of the special
762 CTEF_* formats instead of the standard ones. */
764 enum ctables_area_type user_area;
765 struct fmt_spec format;
766 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
773 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
774 const struct ctables_summary_spec *src)
777 dst->label = xstrdup_if_nonnull (src->label);
781 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
788 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
789 const struct ctables_summary_spec_set *src)
791 struct ctables_summary_spec *specs
792 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
793 for (size_t i = 0; i < src->n; i++)
794 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
796 *dst = (struct ctables_summary_spec_set) {
801 .is_scale = src->is_scale,
806 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
808 for (size_t i = 0; i < set->n; i++)
809 ctables_summary_spec_uninit (&set->specs[i]);
810 free (set->listwise_vars);
815 parse_col_width (struct lexer *lexer, const char *name, double *width)
817 lex_match (lexer, T_EQUALS);
818 if (lex_match_id (lexer, "DEFAULT"))
820 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
822 *width = lex_number (lexer);
832 parse_bool (struct lexer *lexer, bool *b)
834 if (lex_match_id (lexer, "NO"))
836 else if (lex_match_id (lexer, "YES"))
840 lex_error_expecting (lexer, "YES", "NO");
846 static enum ctables_function_availability
847 ctables_function_availability (enum ctables_summary_function f)
849 static enum ctables_function_availability availability[] = {
850 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
851 #include "ctables.inc"
855 return availability[f];
859 ctables_summary_function_is_count (enum ctables_summary_function f)
861 return f == CTSF_COUNT || f == CTSF_ECOUNT;
865 parse_ctables_summary_function (struct lexer *lexer,
866 enum ctables_summary_function *function,
868 enum ctables_area_type *area)
870 if (!lex_force_id (lexer))
873 struct substring name = lex_tokss (lexer);
874 *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'));
876 bool has_area = false;
878 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
879 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
884 if (ss_equals_case (name, ss_cstr ("PCT")))
886 /* Special case where .COUNT suffix is omitted. */
887 *function = CTSF_areaPCT_COUNT;
894 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
896 const struct ctables_function_info *cfi = &ctables_function_info[f];
897 if (ss_equals_case (cfi->basename, name))
900 if (!*weighted && !cfi->may_be_unweighted)
902 if (has_area != cfi->is_area)
910 lex_error (lexer, _("Expecting summary function name."));
915 ctables_axis_destroy (struct ctables_axis *axis)
923 for (size_t i = 0; i < N_CSVS; i++)
924 ctables_summary_spec_set_uninit (&axis->specs[i]);
929 ctables_axis_destroy (axis->subs[0]);
930 ctables_axis_destroy (axis->subs[1]);
933 msg_location_destroy (axis->loc);
937 static struct ctables_axis *
938 ctables_axis_new_nonterminal (enum ctables_axis_op op,
939 struct ctables_axis *sub0,
940 struct ctables_axis *sub1,
941 struct lexer *lexer, int start_ofs)
943 struct ctables_axis *axis = xmalloc (sizeof *axis);
944 *axis = (struct ctables_axis) {
946 .subs = { sub0, sub1 },
947 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
952 struct ctables_axis_parse_ctx
955 struct dictionary *dict;
957 struct ctables_table *t;
960 static struct fmt_spec
961 ctables_summary_default_format (enum ctables_summary_function function,
962 const struct variable *var)
964 static const enum ctables_format default_formats[] = {
965 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
966 #include "ctables.inc"
969 switch (default_formats[function])
972 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
975 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
978 return *var_get_print_format (var);
986 ctables_summary_label__ (const struct ctables_summary_spec *spec)
988 bool w = spec->weighted;
989 enum ctables_area_type a = spec->user_area;
990 switch (spec->function)
993 return w ? N_("Count") : N_("Unweighted Count");
996 return N_("Adjusted Count");
998 case CTSF_areaPCT_COUNT:
1001 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1002 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1003 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1004 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1005 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1006 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1007 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1011 case CTSF_areaPCT_VALIDN:
1014 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1015 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1016 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1017 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1018 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1019 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1020 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1024 case CTSF_areaPCT_TOTALN:
1027 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1028 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1029 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1030 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1031 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1032 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1033 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1037 case CTSF_MAXIMUM: return N_("Maximum");
1038 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1039 case CTSF_MEDIAN: return N_("Median");
1040 case CTSF_MINIMUM: return N_("Minimum");
1041 case CTSF_MISSING: return N_("Missing");
1042 case CTSF_MODE: return N_("Mode");
1043 case CTSF_PTILE: NOT_REACHED ();
1044 case CTSF_RANGE: return N_("Range");
1045 case CTSF_SEMEAN: return N_("Std Error of Mean");
1046 case CTSF_STDDEV: return N_("Std Deviation");
1047 case CTSF_SUM: return N_("Sum");
1048 case CTSF_TOTALN: return N_("Total N");
1049 case CTSF_ETOTALN: return N_("Adjusted Total N");
1050 case CTSF_VALIDN: return N_("Valid N");
1051 case CTSF_EVALIDN: return N_("Adjusted Valid N");
1052 case CTSF_VARIANCE: return N_("Variance");
1053 case CTSF_areaPCT_SUM:
1056 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1057 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1058 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1059 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1060 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1061 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1062 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1069 /* Don't bother translating these: they are for developers only. */
1070 case CTAT_TABLE: return "Table ID";
1071 case CTAT_LAYER: return "Layer ID";
1072 case CTAT_LAYERROW: return "Layer Row ID";
1073 case CTAT_LAYERCOL: return "Layer Column ID";
1074 case CTAT_SUBTABLE: return "Subtable ID";
1075 case CTAT_ROW: return "Row ID";
1076 case CTAT_COL: return "Column ID";
1084 static struct pivot_value *
1085 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1089 if (spec->function == CTSF_PTILE)
1091 double p = spec->percentile;
1092 char *s = (spec->weighted
1093 ? xasprintf (_("Percentile %.2f"), p)
1094 : xasprintf (_("Unweighted Percentile %.2f"), p));
1095 return pivot_value_new_user_text_nocopy (s);
1098 return pivot_value_new_text (ctables_summary_label__ (spec));
1102 struct substring in = ss_cstr (spec->label);
1103 struct substring target = ss_cstr (")CILEVEL");
1105 struct string out = DS_EMPTY_INITIALIZER;
1108 size_t chunk = ss_find_substring (in, target);
1109 ds_put_substring (&out, ss_head (in, chunk));
1110 ss_advance (&in, chunk);
1112 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1114 ss_advance (&in, target.length);
1115 ds_put_format (&out, "%g", cilevel);
1121 ctables_summary_function_name (enum ctables_summary_function function,
1123 enum ctables_area_type area,
1124 char *buffer, size_t bufsize)
1126 const struct ctables_function_info *cfi = &ctables_function_info[function];
1127 snprintf (buffer, bufsize, "%s%s%s",
1128 weighted ? "" : "U",
1129 cfi->is_area ? ctables_area_type_name[area] : "",
1130 cfi->basename.string);
1135 add_summary_spec (struct ctables_axis *axis,
1136 enum ctables_summary_function function, bool weighted,
1137 enum ctables_area_type area, double percentile,
1138 const char *label, const struct fmt_spec *format,
1139 bool is_ctables_format, const struct msg_location *loc,
1140 enum ctables_summary_variant sv)
1142 if (axis->op == CTAO_VAR)
1144 char function_name[128];
1145 ctables_summary_function_name (function, weighted, area,
1146 function_name, sizeof function_name);
1147 const char *var_name = var_get_name (axis->var);
1148 switch (ctables_function_availability (function))
1152 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1153 "response sets."), function_name);
1154 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1160 if (!axis->scale && sv != CSV_TOTAL)
1163 _("Summary function %s applies only to scale variables."),
1165 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1175 struct ctables_summary_spec_set *set = &axis->specs[sv];
1176 if (set->n >= set->allocated)
1177 set->specs = x2nrealloc (set->specs, &set->allocated,
1178 sizeof *set->specs);
1180 struct ctables_summary_spec *dst = &set->specs[set->n++];
1181 *dst = (struct ctables_summary_spec) {
1182 .function = function,
1183 .weighted = weighted,
1186 .percentile = percentile,
1187 .label = xstrdup_if_nonnull (label),
1188 .format = (format ? *format
1189 : ctables_summary_default_format (function, axis->var)),
1190 .is_ctables_format = is_ctables_format,
1196 for (size_t i = 0; i < 2; i++)
1197 if (!add_summary_spec (axis->subs[i], function, weighted, area,
1198 percentile, label, format, is_ctables_format,
1205 static struct ctables_axis *ctables_axis_parse_stack (
1206 struct ctables_axis_parse_ctx *);
1209 static struct ctables_axis *
1210 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1212 if (lex_match (ctx->lexer, T_LPAREN))
1214 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1215 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1217 ctables_axis_destroy (sub);
1223 if (!lex_force_id (ctx->lexer))
1226 int start_ofs = lex_ofs (ctx->lexer);
1227 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1231 struct ctables_axis *axis = xmalloc (sizeof *axis);
1232 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1234 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1235 : lex_match_phrase (ctx->lexer, "[C]") ? false
1236 : var_get_measure (var) == MEASURE_SCALE);
1237 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1238 lex_ofs (ctx->lexer) - 1);
1239 if (axis->scale && var_is_alpha (var))
1241 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1243 var_get_name (var));
1244 ctables_axis_destroy (axis);
1252 has_digit (const char *s)
1254 return s[strcspn (s, "0123456789")] != '\0';
1258 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1259 bool *is_ctables_format)
1261 char type[FMT_TYPE_LEN_MAX + 1];
1262 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1265 if (!strcasecmp (type, "NEGPAREN"))
1266 format->type = CTEF_NEGPAREN;
1267 else if (!strcasecmp (type, "NEQUAL"))
1268 format->type = CTEF_NEQUAL;
1269 else if (!strcasecmp (type, "PAREN"))
1270 format->type = CTEF_PAREN;
1271 else if (!strcasecmp (type, "PCTPAREN"))
1272 format->type = CTEF_PCTPAREN;
1275 *is_ctables_format = false;
1276 return (parse_format_specifier (lexer, format)
1277 && fmt_check_output (format)
1278 && fmt_check_type_compat (format, VAL_NUMERIC));
1284 lex_next_error (lexer, -1, -1,
1285 _("Output format %s requires width 2 or greater."), type);
1288 else if (format->d > format->w - 1)
1290 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1291 "greater than decimals."), type);
1296 *is_ctables_format = true;
1301 static struct ctables_axis *
1302 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1304 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1305 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1308 enum ctables_summary_variant sv = CSV_CELL;
1311 int start_ofs = lex_ofs (ctx->lexer);
1313 /* Parse function. */
1314 enum ctables_summary_function function;
1316 enum ctables_area_type area;
1317 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted,
1321 /* Parse percentile. */
1322 double percentile = 0;
1323 if (function == CTSF_PTILE)
1325 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1327 percentile = lex_number (ctx->lexer);
1328 lex_get (ctx->lexer);
1333 if (lex_is_string (ctx->lexer))
1335 label = ss_xstrdup (lex_tokss (ctx->lexer));
1336 lex_get (ctx->lexer);
1340 struct fmt_spec format;
1341 const struct fmt_spec *formatp;
1342 bool is_ctables_format = false;
1343 if (lex_token (ctx->lexer) == T_ID
1344 && has_digit (lex_tokcstr (ctx->lexer)))
1346 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1347 &is_ctables_format))
1357 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1358 lex_ofs (ctx->lexer) - 1);
1359 add_summary_spec (sub, function, weighted, area, percentile, label,
1360 formatp, is_ctables_format, loc, sv);
1362 msg_location_destroy (loc);
1364 lex_match (ctx->lexer, T_COMMA);
1365 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1367 if (!lex_force_match (ctx->lexer, T_LBRACK))
1371 else if (lex_match (ctx->lexer, T_RBRACK))
1373 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1380 ctables_axis_destroy (sub);
1384 static const struct ctables_axis *
1385 find_scale (const struct ctables_axis *axis)
1389 else if (axis->op == CTAO_VAR)
1390 return axis->scale ? axis : NULL;
1393 for (size_t i = 0; i < 2; i++)
1395 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1403 static const struct ctables_axis *
1404 find_categorical_summary_spec (const struct ctables_axis *axis)
1408 else if (axis->op == CTAO_VAR)
1409 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1412 for (size_t i = 0; i < 2; i++)
1414 const struct ctables_axis *sum
1415 = find_categorical_summary_spec (axis->subs[i]);
1423 static struct ctables_axis *
1424 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1426 int start_ofs = lex_ofs (ctx->lexer);
1427 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1431 while (lex_match (ctx->lexer, T_GT))
1433 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1436 ctables_axis_destroy (lhs);
1440 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1441 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1443 const struct ctables_axis *outer_scale = find_scale (lhs);
1444 const struct ctables_axis *inner_scale = find_scale (rhs);
1445 if (outer_scale && inner_scale)
1447 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1448 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1449 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1450 ctables_axis_destroy (nest);
1454 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1457 msg_at (SE, nest->loc,
1458 _("Summaries may only be requested for categorical variables "
1459 "at the innermost nesting level."));
1460 msg_at (SN, outer_sum->loc,
1461 _("This outer categorical variable has a summary."));
1462 ctables_axis_destroy (nest);
1472 static struct ctables_axis *
1473 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1475 int start_ofs = lex_ofs (ctx->lexer);
1476 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1480 while (lex_match (ctx->lexer, T_PLUS))
1482 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1485 ctables_axis_destroy (lhs);
1489 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1490 ctx->lexer, start_ofs);
1497 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1498 struct ctables *ct, struct ctables_table *t,
1499 enum pivot_axis_type a)
1501 if (lex_token (lexer) == T_BY
1502 || lex_token (lexer) == T_SLASH
1503 || lex_token (lexer) == T_ENDCMD)
1506 struct ctables_axis_parse_ctx ctx = {
1512 t->axes[a] = ctables_axis_parse_stack (&ctx);
1513 return t->axes[a] != NULL;
1517 ctables_chisq_destroy (struct ctables_chisq *chisq)
1523 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1529 ctables_table_destroy (struct ctables_table *t)
1534 for (size_t i = 0; i < t->n_sections; i++)
1535 ctables_section_uninit (&t->sections[i]);
1538 for (size_t i = 0; i < t->n_categories; i++)
1539 ctables_categories_unref (t->categories[i]);
1540 free (t->categories);
1542 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1544 ctables_axis_destroy (t->axes[a]);
1545 ctables_stack_uninit (&t->stacks[a]);
1547 free (t->summary_specs.specs);
1549 struct ctables_value *ctv, *next_ctv;
1550 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1551 &t->clabels_values_map)
1553 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1554 hmap_delete (&t->clabels_values_map, &ctv->node);
1557 hmap_destroy (&t->clabels_values_map);
1558 free (t->clabels_values);
1564 ctables_chisq_destroy (t->chisq);
1565 ctables_pairwise_destroy (t->pairwise);
1570 ctables_destroy (struct ctables *ct)
1575 struct ctables_postcompute *pc, *next_pc;
1576 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1580 msg_location_destroy (pc->location);
1581 ctables_pcexpr_destroy (pc->expr);
1585 ctables_summary_spec_set_uninit (pc->specs);
1588 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1591 hmap_destroy (&ct->postcomputes);
1593 fmt_settings_uninit (&ct->ctables_formats);
1594 pivot_table_look_unref (ct->look);
1598 for (size_t i = 0; i < ct->n_tables; i++)
1599 ctables_table_destroy (ct->tables[i]);
1604 static struct ctables_category
1605 cct_nrange (double low, double high)
1607 return (struct ctables_category) {
1609 .nrange = { low, high }
1613 static struct ctables_category
1614 cct_srange (struct substring low, struct substring high)
1616 return (struct ctables_category) {
1618 .srange = { low, high }
1623 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1624 struct ctables_category *cat)
1627 if (lex_match (lexer, T_EQUALS))
1629 if (!lex_force_string (lexer))
1632 total_label = ss_xstrdup (lex_tokss (lexer));
1636 total_label = xstrdup (_("Subtotal"));
1638 *cat = (struct ctables_category) {
1639 .type = CCT_SUBTOTAL,
1640 .hide_subcategories = hide_subcategories,
1641 .total_label = total_label
1646 static struct substring
1647 parse_substring (struct lexer *lexer, struct dictionary *dict)
1649 struct substring s = recode_substring_pool (
1650 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1651 ss_rtrim (&s, ss_cstr (" "));
1657 ctables_table_parse_explicit_category (struct lexer *lexer,
1658 struct dictionary *dict,
1660 struct ctables_category *cat)
1662 if (lex_match_id (lexer, "OTHERNM"))
1663 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1664 else if (lex_match_id (lexer, "MISSING"))
1665 *cat = (struct ctables_category) { .type = CCT_MISSING };
1666 else if (lex_match_id (lexer, "SUBTOTAL"))
1667 return ctables_table_parse_subtotal (lexer, false, cat);
1668 else if (lex_match_id (lexer, "HSUBTOTAL"))
1669 return ctables_table_parse_subtotal (lexer, true, cat);
1670 else if (lex_match_id (lexer, "LO"))
1672 if (!lex_force_match_id (lexer, "THRU"))
1674 if (lex_is_string (lexer))
1676 struct substring sr0 = { .string = NULL };
1677 struct substring sr1 = parse_substring (lexer, dict);
1678 *cat = cct_srange (sr0, sr1);
1680 else if (lex_force_num (lexer))
1682 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1688 else if (lex_is_number (lexer))
1690 double number = lex_number (lexer);
1692 if (lex_match_id (lexer, "THRU"))
1694 if (lex_match_id (lexer, "HI"))
1695 *cat = cct_nrange (number, DBL_MAX);
1698 if (!lex_force_num (lexer))
1700 *cat = cct_nrange (number, lex_number (lexer));
1705 *cat = (struct ctables_category) {
1710 else if (lex_is_string (lexer))
1712 struct substring s = parse_substring (lexer, dict);
1713 if (lex_match_id (lexer, "THRU"))
1715 if (lex_match_id (lexer, "HI"))
1717 struct substring sr1 = { .string = NULL };
1718 *cat = cct_srange (s, sr1);
1722 if (!lex_force_string (lexer))
1727 struct substring sr1 = parse_substring (lexer, dict);
1728 *cat = cct_srange (s, sr1);
1732 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1734 else if (lex_match (lexer, T_AND))
1736 if (!lex_force_id (lexer))
1738 struct ctables_postcompute *pc = ctables_find_postcompute (
1739 ct, lex_tokcstr (lexer));
1742 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1743 msg_at (SE, loc, _("Unknown postcompute &%s."),
1744 lex_tokcstr (lexer));
1745 msg_location_destroy (loc);
1750 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1754 lex_error (lexer, NULL);
1762 parse_category_string (struct msg_location *location,
1763 struct substring s, const struct dictionary *dict,
1764 enum fmt_type format, double *n)
1767 char *error = data_in (s, dict_get_encoding (dict), format,
1768 settings_get_fmt_settings (), &v, 0, NULL);
1771 msg_at (SE, location,
1772 _("Failed to parse category specification as format %s: %s."),
1773 fmt_name (format), error);
1782 static struct ctables_category *
1783 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1784 const struct ctables_pcexpr *e)
1786 struct ctables_category *best = NULL;
1787 size_t n_subtotals = 0;
1788 for (size_t i = 0; i < cats->n_cats; i++)
1790 struct ctables_category *cat = &cats->cats[i];
1793 case CTPO_CAT_NUMBER:
1794 if (cat->type == CCT_NUMBER && cat->number == e->number)
1798 case CTPO_CAT_STRING:
1799 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1803 case CTPO_CAT_NRANGE:
1804 if (cat->type == CCT_NRANGE
1805 && cat->nrange[0] == e->nrange[0]
1806 && cat->nrange[1] == e->nrange[1])
1810 case CTPO_CAT_SRANGE:
1811 if (cat->type == CCT_SRANGE
1812 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1813 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1817 case CTPO_CAT_MISSING:
1818 if (cat->type == CCT_MISSING)
1822 case CTPO_CAT_OTHERNM:
1823 if (cat->type == CCT_OTHERNM)
1827 case CTPO_CAT_SUBTOTAL:
1828 if (cat->type == CCT_SUBTOTAL)
1831 if (e->subtotal_index == n_subtotals)
1833 else if (e->subtotal_index == 0)
1838 case CTPO_CAT_TOTAL:
1839 if (cat->type == CCT_TOTAL)
1853 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1858 static struct ctables_category *
1859 ctables_find_category_for_postcompute (const struct dictionary *dict,
1860 const struct ctables_categories *cats,
1861 enum fmt_type parse_format,
1862 const struct ctables_pcexpr *e)
1864 if (parse_format != FMT_F)
1866 if (e->op == CTPO_CAT_STRING)
1869 if (!parse_category_string (e->location, e->string, dict,
1870 parse_format, &number))
1873 struct ctables_pcexpr e2 = {
1874 .op = CTPO_CAT_NUMBER,
1876 .location = e->location,
1878 return ctables_find_category_for_postcompute__ (cats, &e2);
1880 else if (e->op == CTPO_CAT_SRANGE)
1883 if (!e->srange[0].string)
1884 nrange[0] = -DBL_MAX;
1885 else if (!parse_category_string (e->location, e->srange[0], dict,
1886 parse_format, &nrange[0]))
1889 if (!e->srange[1].string)
1890 nrange[1] = DBL_MAX;
1891 else if (!parse_category_string (e->location, e->srange[1], dict,
1892 parse_format, &nrange[1]))
1895 struct ctables_pcexpr e2 = {
1896 .op = CTPO_CAT_NRANGE,
1897 .nrange = { nrange[0], nrange[1] },
1898 .location = e->location,
1900 return ctables_find_category_for_postcompute__ (cats, &e2);
1903 return ctables_find_category_for_postcompute__ (cats, e);
1907 ctables_recursive_check_postcompute (struct dictionary *dict,
1908 const struct ctables_pcexpr *e,
1909 struct ctables_category *pc_cat,
1910 const struct ctables_categories *cats,
1911 const struct msg_location *cats_location)
1915 case CTPO_CAT_NUMBER:
1916 case CTPO_CAT_STRING:
1917 case CTPO_CAT_NRANGE:
1918 case CTPO_CAT_SRANGE:
1919 case CTPO_CAT_MISSING:
1920 case CTPO_CAT_OTHERNM:
1921 case CTPO_CAT_SUBTOTAL:
1922 case CTPO_CAT_TOTAL:
1924 struct ctables_category *cat = ctables_find_category_for_postcompute (
1925 dict, cats, pc_cat->parse_format, e);
1928 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1930 size_t n_subtotals = 0;
1931 for (size_t i = 0; i < cats->n_cats; i++)
1932 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1933 if (n_subtotals > 1)
1935 msg_at (SE, cats_location,
1936 ngettext ("These categories include %zu instance "
1937 "of SUBTOTAL or HSUBTOTAL, so references "
1938 "from computed categories must refer to "
1939 "subtotals by position, "
1940 "e.g. SUBTOTAL[1].",
1941 "These categories include %zu instances "
1942 "of SUBTOTAL or HSUBTOTAL, so references "
1943 "from computed categories must refer to "
1944 "subtotals by position, "
1945 "e.g. SUBTOTAL[1].",
1948 msg_at (SN, e->location,
1949 _("This is the reference that lacks a position."));
1954 msg_at (SE, pc_cat->location,
1955 _("Computed category &%s references a category not included "
1956 "in the category list."),
1958 msg_at (SN, e->location, _("This is the missing category."));
1959 if (e->op == CTPO_CAT_SUBTOTAL)
1960 msg_at (SN, cats_location,
1961 _("To fix the problem, add subtotals to the "
1962 "list of categories here."));
1963 else if (e->op == CTPO_CAT_TOTAL)
1964 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1965 "CATEGORIES specification."));
1967 msg_at (SN, cats_location,
1968 _("To fix the problem, add the missing category to the "
1969 "list of categories here."));
1972 if (pc_cat->pc->hide_source_cats)
1986 for (size_t i = 0; i < 2; i++)
1987 if (e->subs[i] && !ctables_recursive_check_postcompute (
1988 dict, e->subs[i], pc_cat, cats, cats_location))
1997 all_strings (struct variable **vars, size_t n_vars,
1998 const struct ctables_category *cat)
2000 for (size_t j = 0; j < n_vars; j++)
2001 if (var_is_numeric (vars[j]))
2003 msg_at (SE, cat->location,
2004 _("This category specification may be applied only to string "
2005 "variables, but this subcommand tries to apply it to "
2006 "numeric variable %s."),
2007 var_get_name (vars[j]));
2014 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2015 struct ctables *ct, struct ctables_table *t)
2017 if (!lex_match_id (lexer, "VARIABLES"))
2019 lex_match (lexer, T_EQUALS);
2021 struct variable **vars;
2023 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2026 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2027 for (size_t i = 1; i < n_vars; i++)
2029 const struct fmt_spec *f = var_get_print_format (vars[i]);
2030 if (f->type != common_format->type)
2032 common_format = NULL;
2038 && (fmt_get_category (common_format->type)
2039 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2041 struct ctables_categories *c = xmalloc (sizeof *c);
2042 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2043 for (size_t i = 0; i < n_vars; i++)
2045 struct ctables_categories **cp
2046 = &t->categories[var_get_dict_index (vars[i])];
2047 ctables_categories_unref (*cp);
2051 size_t allocated_cats = 0;
2052 int cats_start_ofs = -1;
2053 int cats_end_ofs = -1;
2054 if (lex_match (lexer, T_LBRACK))
2056 cats_start_ofs = lex_ofs (lexer);
2059 if (c->n_cats >= allocated_cats)
2060 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2062 int start_ofs = lex_ofs (lexer);
2063 struct ctables_category *cat = &c->cats[c->n_cats];
2064 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2066 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2069 lex_match (lexer, T_COMMA);
2071 while (!lex_match (lexer, T_RBRACK));
2072 cats_end_ofs = lex_ofs (lexer) - 1;
2075 struct ctables_category cat = {
2077 .include_missing = false,
2078 .sort_ascending = true,
2080 bool show_totals = false;
2081 char *total_label = NULL;
2082 bool totals_before = false;
2083 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2085 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2087 lex_match (lexer, T_EQUALS);
2088 if (lex_match_id (lexer, "A"))
2089 cat.sort_ascending = true;
2090 else if (lex_match_id (lexer, "D"))
2091 cat.sort_ascending = false;
2094 lex_error_expecting (lexer, "A", "D");
2098 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2100 lex_match (lexer, T_EQUALS);
2101 if (lex_match_id (lexer, "VALUE"))
2102 cat.type = CCT_VALUE;
2103 else if (lex_match_id (lexer, "LABEL"))
2104 cat.type = CCT_LABEL;
2107 cat.type = CCT_FUNCTION;
2108 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2109 &cat.weighted, &cat.area))
2112 if (lex_match (lexer, T_LPAREN))
2114 cat.sort_var = parse_variable (lexer, dict);
2118 if (cat.sort_function == CTSF_PTILE)
2120 lex_match (lexer, T_COMMA);
2121 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2123 cat.percentile = lex_number (lexer);
2127 if (!lex_force_match (lexer, T_RPAREN))
2130 else if (ctables_function_availability (cat.sort_function)
2133 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2138 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2140 lex_match (lexer, T_EQUALS);
2141 if (lex_match_id (lexer, "INCLUDE"))
2142 cat.include_missing = true;
2143 else if (lex_match_id (lexer, "EXCLUDE"))
2144 cat.include_missing = false;
2147 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2151 else if (lex_match_id (lexer, "TOTAL"))
2153 lex_match (lexer, T_EQUALS);
2154 if (!parse_bool (lexer, &show_totals))
2157 else if (lex_match_id (lexer, "LABEL"))
2159 lex_match (lexer, T_EQUALS);
2160 if (!lex_force_string (lexer))
2163 total_label = ss_xstrdup (lex_tokss (lexer));
2166 else if (lex_match_id (lexer, "POSITION"))
2168 lex_match (lexer, T_EQUALS);
2169 if (lex_match_id (lexer, "BEFORE"))
2170 totals_before = true;
2171 else if (lex_match_id (lexer, "AFTER"))
2172 totals_before = false;
2175 lex_error_expecting (lexer, "BEFORE", "AFTER");
2179 else if (lex_match_id (lexer, "EMPTY"))
2181 lex_match (lexer, T_EQUALS);
2182 if (lex_match_id (lexer, "INCLUDE"))
2183 c->show_empty = true;
2184 else if (lex_match_id (lexer, "EXCLUDE"))
2185 c->show_empty = false;
2188 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2195 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2196 "TOTAL", "LABEL", "POSITION", "EMPTY");
2198 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2205 if (c->n_cats >= allocated_cats)
2206 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2207 c->cats[c->n_cats++] = cat;
2212 if (c->n_cats >= allocated_cats)
2213 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2215 struct ctables_category *totals;
2218 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2219 totals = &c->cats[0];
2222 totals = &c->cats[c->n_cats];
2225 *totals = (struct ctables_category) {
2227 .total_label = total_label ? total_label : xstrdup (_("Total")),
2231 struct ctables_category *subtotal = NULL;
2232 for (size_t i = totals_before ? 0 : c->n_cats;
2233 totals_before ? i < c->n_cats : i-- > 0;
2234 totals_before ? i++ : 0)
2236 struct ctables_category *cat = &c->cats[i];
2245 cat->subtotal = subtotal;
2248 case CCT_POSTCOMPUTE:
2259 case CCT_EXCLUDED_MISSING:
2264 if (cats_start_ofs != -1)
2266 for (size_t i = 0; i < c->n_cats; i++)
2268 struct ctables_category *cat = &c->cats[i];
2271 case CCT_POSTCOMPUTE:
2272 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2273 struct msg_location *cats_location
2274 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2275 bool ok = ctables_recursive_check_postcompute (
2276 dict, cat->pc->expr, cat, c, cats_location);
2277 msg_location_destroy (cats_location);
2284 for (size_t j = 0; j < n_vars; j++)
2285 if (var_is_alpha (vars[j]))
2287 msg_at (SE, cat->location,
2288 _("This category specification may be applied "
2289 "only to numeric variables, but this "
2290 "subcommand tries to apply it to string "
2292 var_get_name (vars[j]));
2301 if (!parse_category_string (cat->location, cat->string, dict,
2302 common_format->type, &n))
2305 ss_dealloc (&cat->string);
2307 cat->type = CCT_NUMBER;
2310 else if (!all_strings (vars, n_vars, cat))
2319 if (!cat->srange[0].string)
2321 else if (!parse_category_string (cat->location,
2322 cat->srange[0], dict,
2323 common_format->type, &n[0]))
2326 if (!cat->srange[1].string)
2328 else if (!parse_category_string (cat->location,
2329 cat->srange[1], dict,
2330 common_format->type, &n[1]))
2333 ss_dealloc (&cat->srange[0]);
2334 ss_dealloc (&cat->srange[1]);
2336 cat->type = CCT_NRANGE;
2337 cat->nrange[0] = n[0];
2338 cat->nrange[1] = n[1];
2340 else if (!all_strings (vars, n_vars, cat))
2351 case CCT_EXCLUDED_MISSING:
2366 ctables_nest_uninit (struct ctables_nest *nest)
2369 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2370 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2371 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2372 free (nest->areas[at]);
2376 ctables_stack_uninit (struct ctables_stack *stack)
2380 for (size_t i = 0; i < stack->n; i++)
2381 ctables_nest_uninit (&stack->nests[i]);
2382 free (stack->nests);
2386 static struct ctables_stack
2387 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2394 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2395 for (size_t i = 0; i < s0.n; i++)
2396 for (size_t j = 0; j < s1.n; j++)
2398 const struct ctables_nest *a = &s0.nests[i];
2399 const struct ctables_nest *b = &s1.nests[j];
2401 size_t allocate = a->n + b->n;
2402 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2404 for (size_t k = 0; k < a->n; k++)
2405 vars[n++] = a->vars[k];
2406 for (size_t k = 0; k < b->n; k++)
2407 vars[n++] = b->vars[k];
2408 assert (n == allocate);
2410 const struct ctables_nest *summary_src;
2411 if (!a->specs[CSV_CELL].var)
2413 else if (!b->specs[CSV_CELL].var)
2418 struct ctables_nest *new = &stack.nests[stack.n++];
2419 *new = (struct ctables_nest) {
2421 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2422 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2426 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2427 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2429 ctables_stack_uninit (&s0);
2430 ctables_stack_uninit (&s1);
2434 static struct ctables_stack
2435 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2437 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2438 for (size_t i = 0; i < s0.n; i++)
2439 stack.nests[stack.n++] = s0.nests[i];
2440 for (size_t i = 0; i < s1.n; i++)
2442 stack.nests[stack.n] = s1.nests[i];
2443 stack.nests[stack.n].group_head += s0.n;
2446 assert (stack.n == s0.n + s1.n);
2452 static struct ctables_stack
2453 var_fts (const struct ctables_axis *a)
2455 struct variable **vars = xmalloc (sizeof *vars);
2458 struct ctables_nest *nest = xmalloc (sizeof *nest);
2459 *nest = (struct ctables_nest) {
2462 .scale_idx = a->scale ? 0 : SIZE_MAX,
2464 if (a->specs[CSV_CELL].n || a->scale)
2465 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2467 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2468 nest->specs[sv].var = a->var;
2469 nest->specs[sv].is_scale = a->scale;
2471 return (struct ctables_stack) { .nests = nest, .n = 1 };
2474 static struct ctables_stack
2475 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2478 return (struct ctables_stack) { .n = 0 };
2486 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2487 enumerate_fts (axis_type, a->subs[1]));
2490 /* This should consider any of the scale variables found in the result to
2491 be linked to each other listwise for SMISSING=LISTWISE. */
2492 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2493 enumerate_fts (axis_type, a->subs[1]));
2499 union ctables_summary
2501 /* COUNT, VALIDN, TOTALN. */
2504 /* MINIMUM, MAXIMUM, RANGE. */
2511 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2512 struct moments1 *moments;
2514 /* MEDIAN, MODE, PTILE. */
2517 struct casewriter *writer;
2522 /* XXX multiple response */
2526 ctables_summary_init (union ctables_summary *s,
2527 const struct ctables_summary_spec *ss)
2529 switch (ss->function)
2533 case CTSF_areaPCT_COUNT:
2534 case CTSF_areaPCT_VALIDN:
2535 case CTSF_areaPCT_TOTALN:
2550 s->min = s->max = SYSMIS;
2555 case CTSF_areaPCT_SUM:
2556 s->moments = moments1_create (MOMENT_MEAN);
2562 s->moments = moments1_create (MOMENT_VARIANCE);
2569 struct caseproto *proto = caseproto_create ();
2570 proto = caseproto_add_width (proto, 0);
2571 proto = caseproto_add_width (proto, 0);
2573 struct subcase ordering;
2574 subcase_init (&ordering, 0, 0, SC_ASCEND);
2575 s->writer = sort_create_writer (&ordering, proto);
2576 subcase_uninit (&ordering);
2577 caseproto_unref (proto);
2587 ctables_summary_uninit (union ctables_summary *s,
2588 const struct ctables_summary_spec *ss)
2590 switch (ss->function)
2594 case CTSF_areaPCT_COUNT:
2595 case CTSF_areaPCT_VALIDN:
2596 case CTSF_areaPCT_TOTALN:
2617 case CTSF_areaPCT_SUM:
2618 moments1_destroy (s->moments);
2624 casewriter_destroy (s->writer);
2630 ctables_summary_add (union ctables_summary *s,
2631 const struct ctables_summary_spec *ss,
2632 const struct variable *var, const union value *value,
2633 bool is_scale, bool is_scale_missing,
2634 bool is_missing, bool excluded_missing,
2635 double d_weight, double e_weight)
2637 /* To determine whether a case is included in a given table for a particular
2638 kind of summary, consider the following charts for each variable in the
2639 table. Only if "yes" appears for every variable for the summary is the
2642 Categorical variables: VALIDN COUNT TOTALN
2643 Valid values in included categories yes yes yes
2644 Missing values in included categories --- yes yes
2645 Missing values in excluded categories --- --- yes
2646 Valid values in excluded categories --- --- ---
2648 Scale variables: VALIDN COUNT TOTALN
2649 Valid value yes yes yes
2650 Missing value --- yes yes
2652 Missing values include both user- and system-missing. (The system-missing
2653 value is always in an excluded category.)
2655 switch (ss->function)
2658 case CTSF_areaPCT_TOTALN:
2659 s->count += ss->weighted ? d_weight : 1.0;
2663 case CTSF_areaPCT_COUNT:
2664 if (is_scale || !excluded_missing)
2665 s->count += ss->weighted ? d_weight : 1.0;
2669 case CTSF_areaPCT_VALIDN:
2673 s->count += ss->weighted ? d_weight : 1.0;
2683 s->count += ss->weighted ? d_weight : 1.0;
2687 if (is_scale || !excluded_missing)
2688 s->count += e_weight;
2695 s->count += e_weight;
2699 s->count += e_weight;
2705 if (!is_scale_missing)
2707 assert (!var_is_alpha (var)); /* XXX? */
2708 if (s->min == SYSMIS || value->f < s->min)
2710 if (s->max == SYSMIS || value->f > s->max)
2720 if (!is_scale_missing)
2721 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2724 case CTSF_areaPCT_SUM:
2725 if (!is_missing && !is_scale_missing)
2726 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2732 if (!is_scale_missing)
2734 double w = ss->weighted ? e_weight : 1.0;
2737 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2738 *case_num_rw_idx (c, 0) = value->f;
2739 *case_num_rw_idx (c, 1) = w;
2740 casewriter_write (s->writer, c);
2747 ctables_summary_value (const struct ctables_cell *cell,
2748 union ctables_summary *s,
2749 const struct ctables_summary_spec *ss)
2751 switch (ss->function)
2758 return cell->areas[ss->calc_area]->sequence;
2760 case CTSF_areaPCT_COUNT:
2762 const struct ctables_area *a = cell->areas[ss->calc_area];
2763 double a_count = ss->weighted ? a->e_count : a->u_count;
2764 return a_count ? s->count / a_count * 100 : SYSMIS;
2767 case CTSF_areaPCT_VALIDN:
2769 const struct ctables_area *a = cell->areas[ss->calc_area];
2770 double a_valid = ss->weighted ? a->e_valid : a->u_valid;
2771 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2774 case CTSF_areaPCT_TOTALN:
2776 const struct ctables_area *a = cell->areas[ss->calc_area];
2777 double a_total = ss->weighted ? a->e_total : a->u_total;
2778 return a_total ? s->count / a_total * 100 : SYSMIS;
2795 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2800 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2806 double weight, variance;
2807 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2808 return calc_semean (variance, weight);
2814 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2815 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2820 double weight, mean;
2821 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2822 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2828 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2832 case CTSF_areaPCT_SUM:
2834 double weight, mean;
2835 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2836 if (weight == SYSMIS || mean == SYSMIS)
2839 const struct ctables_area *a = cell->areas[ss->calc_area];
2840 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2841 double denom = ss->weighted ? sum->e_sum : sum->u_sum;
2842 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2849 struct casereader *reader = casewriter_make_reader (s->writer);
2852 struct percentile *ptile = percentile_create (
2853 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2854 struct order_stats *os = &ptile->parent;
2855 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2856 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2857 statistic_destroy (&ptile->parent.parent);
2864 struct casereader *reader = casewriter_make_reader (s->writer);
2867 struct mode *mode = mode_create ();
2868 struct order_stats *os = &mode->parent;
2869 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2870 s->ovalue = mode->mode;
2871 statistic_destroy (&mode->parent.parent);
2879 struct ctables_cell_sort_aux
2881 const struct ctables_nest *nest;
2882 enum pivot_axis_type a;
2886 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2888 const struct ctables_cell_sort_aux *aux = aux_;
2889 struct ctables_cell *const *ap = a_;
2890 struct ctables_cell *const *bp = b_;
2891 const struct ctables_cell *a = *ap;
2892 const struct ctables_cell *b = *bp;
2894 const struct ctables_nest *nest = aux->nest;
2895 for (size_t i = 0; i < nest->n; i++)
2896 if (i != nest->scale_idx)
2898 const struct variable *var = nest->vars[i];
2899 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2900 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2901 if (a_cv->category != b_cv->category)
2902 return a_cv->category > b_cv->category ? 1 : -1;
2904 const union value *a_val = &a_cv->value;
2905 const union value *b_val = &b_cv->value;
2906 switch (a_cv->category->type)
2912 case CCT_POSTCOMPUTE:
2913 case CCT_EXCLUDED_MISSING:
2914 /* Must be equal. */
2922 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2930 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2932 return a_cv->category->sort_ascending ? cmp : -cmp;
2938 const char *a_label = var_lookup_value_label (var, a_val);
2939 const char *b_label = var_lookup_value_label (var, b_val);
2945 cmp = strcmp (a_label, b_label);
2951 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2954 return a_cv->category->sort_ascending ? cmp : -cmp;
2966 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2967 const void *aux UNUSED)
2969 struct ctables_cell *const *ap = a_;
2970 struct ctables_cell *const *bp = b_;
2971 const struct ctables_cell *a = *ap;
2972 const struct ctables_cell *b = *bp;
2974 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2976 int al = a->axes[axis].leaf;
2977 int bl = b->axes[axis].leaf;
2979 return al > bl ? 1 : -1;
2987 For each ctables_table:
2988 For each combination of row vars:
2989 For each combination of column vars:
2990 For each combination of layer vars:
2992 Make a table of row values:
2993 Sort entries by row values
2994 Assign a 0-based index to each actual value
2995 Construct a dimension
2996 Make a table of column values
2997 Make a table of layer values
2999 Fill the table entry using the indexes from before.
3002 static struct ctables_area *
3003 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3004 enum ctables_area_type area)
3007 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3009 const struct ctables_nest *nest = s->nests[a];
3010 for (size_t i = 0; i < nest->n_areas[area]; i++)
3012 size_t v_idx = nest->areas[area][i];
3013 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3014 hash = hash_pointer (cv->category, hash);
3015 if (cv->category->type != CCT_TOTAL
3016 && cv->category->type != CCT_SUBTOTAL
3017 && cv->category->type != CCT_POSTCOMPUTE)
3018 hash = value_hash (&cv->value,
3019 var_get_width (nest->vars[v_idx]), hash);
3023 struct ctables_area *a;
3024 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3026 const struct ctables_cell *df = a->example;
3027 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3029 const struct ctables_nest *nest = s->nests[a];
3030 for (size_t i = 0; i < nest->n_areas[area]; i++)
3032 size_t v_idx = nest->areas[area][i];
3033 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3034 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3035 if (cv1->category != cv2->category
3036 || (cv1->category->type != CCT_TOTAL
3037 && cv1->category->type != CCT_SUBTOTAL
3038 && cv1->category->type != CCT_POSTCOMPUTE
3039 && !value_equal (&cv1->value, &cv2->value,
3040 var_get_width (nest->vars[v_idx]))))
3049 struct ctables_sum *sums = (s->table->n_sum_vars
3050 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3053 a = xmalloc (sizeof *a);
3054 *a = (struct ctables_area) { .example = cell, .sums = sums };
3055 hmap_insert (&s->areas[area], &a->node, hash);
3059 static struct substring
3060 rtrim_value (const union value *v, const struct variable *var)
3062 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3063 var_get_width (var));
3064 ss_rtrim (&s, ss_cstr (" "));
3069 in_string_range (const union value *v, const struct variable *var,
3070 const struct substring *srange)
3072 struct substring s = rtrim_value (v, var);
3073 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3074 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3077 static const struct ctables_category *
3078 ctables_categories_match (const struct ctables_categories *c,
3079 const union value *v, const struct variable *var)
3081 if (var_is_numeric (var) && v->f == SYSMIS)
3084 const struct ctables_category *othernm = NULL;
3085 for (size_t i = c->n_cats; i-- > 0; )
3087 const struct ctables_category *cat = &c->cats[i];
3091 if (cat->number == v->f)
3096 if (ss_equals (cat->string, rtrim_value (v, var)))
3101 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3102 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3107 if (in_string_range (v, var, cat->srange))
3112 if (var_is_value_missing (var, v))
3116 case CCT_POSTCOMPUTE:
3131 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3134 case CCT_EXCLUDED_MISSING:
3139 return var_is_value_missing (var, v) ? NULL : othernm;
3142 static const struct ctables_category *
3143 ctables_categories_total (const struct ctables_categories *c)
3145 const struct ctables_category *first = &c->cats[0];
3146 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3147 return (first->type == CCT_TOTAL ? first
3148 : last->type == CCT_TOTAL ? last
3152 static struct ctables_cell *
3153 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3154 const struct ctables_category *cats[PIVOT_N_AXES][10])
3157 enum ctables_summary_variant sv = CSV_CELL;
3158 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3160 const struct ctables_nest *nest = s->nests[a];
3161 for (size_t i = 0; i < nest->n; i++)
3162 if (i != nest->scale_idx)
3164 hash = hash_pointer (cats[a][i], hash);
3165 if (cats[a][i]->type != CCT_TOTAL
3166 && cats[a][i]->type != CCT_SUBTOTAL
3167 && cats[a][i]->type != CCT_POSTCOMPUTE)
3168 hash = value_hash (case_data (c, nest->vars[i]),
3169 var_get_width (nest->vars[i]), hash);
3175 struct ctables_cell *cell;
3176 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3178 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3180 const struct ctables_nest *nest = s->nests[a];
3181 for (size_t i = 0; i < nest->n; i++)
3182 if (i != nest->scale_idx
3183 && (cats[a][i] != cell->axes[a].cvs[i].category
3184 || (cats[a][i]->type != CCT_TOTAL
3185 && cats[a][i]->type != CCT_SUBTOTAL
3186 && cats[a][i]->type != CCT_POSTCOMPUTE
3187 && !value_equal (case_data (c, nest->vars[i]),
3188 &cell->axes[a].cvs[i].value,
3189 var_get_width (nest->vars[i])))))
3198 cell = xmalloc (sizeof *cell);
3201 cell->omit_areas = 0;
3202 cell->postcompute = false;
3203 //struct string name = DS_EMPTY_INITIALIZER;
3204 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3206 const struct ctables_nest *nest = s->nests[a];
3207 cell->axes[a].cvs = (nest->n
3208 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3210 for (size_t i = 0; i < nest->n; i++)
3212 const struct ctables_category *cat = cats[a][i];
3213 const struct variable *var = nest->vars[i];
3214 const union value *value = case_data (c, var);
3215 if (i != nest->scale_idx)
3217 const struct ctables_category *subtotal = cat->subtotal;
3218 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3221 if (cat->type == CCT_TOTAL
3222 || cat->type == CCT_SUBTOTAL
3223 || cat->type == CCT_POSTCOMPUTE)
3225 /* XXX these should be more encompassing I think.*/
3229 case PIVOT_AXIS_COLUMN:
3230 cell->omit_areas |= ((1u << CTAT_TABLE) |
3231 (1u << CTAT_LAYER) |
3232 (1u << CTAT_LAYERCOL) |
3233 (1u << CTAT_SUBTABLE) |
3236 case PIVOT_AXIS_ROW:
3237 cell->omit_areas |= ((1u << CTAT_TABLE) |
3238 (1u << CTAT_LAYER) |
3239 (1u << CTAT_LAYERROW) |
3240 (1u << CTAT_SUBTABLE) |
3243 case PIVOT_AXIS_LAYER:
3244 cell->omit_areas |= ((1u << CTAT_TABLE) |
3245 (1u << CTAT_LAYER));
3249 if (cat->type == CCT_POSTCOMPUTE)
3250 cell->postcompute = true;
3253 cell->axes[a].cvs[i].category = cat;
3254 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3257 if (i != nest->scale_idx)
3259 if (!ds_is_empty (&name))
3260 ds_put_cstr (&name, ", ");
3261 char *value_s = data_out (value, var_get_encoding (var),
3262 var_get_print_format (var),
3263 settings_get_fmt_settings ());
3264 if (cat->type == CCT_TOTAL
3265 || cat->type == CCT_SUBTOTAL
3266 || cat->type == CCT_POSTCOMPUTE)
3267 ds_put_format (&name, "%s=total", var_get_name (var));
3269 ds_put_format (&name, "%s=%s", var_get_name (var),
3270 value_s + strspn (value_s, " "));
3276 //cell->name = ds_steal_cstr (&name);
3278 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3279 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3280 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3281 for (size_t i = 0; i < specs->n; i++)
3282 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3283 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3284 cell->areas[at] = ctables_area_insert (s, cell, at);
3285 hmap_insert (&s->cells, &cell->node, hash);
3290 is_scale_missing (const struct ctables_summary_spec_set *specs,
3291 const struct ccase *c)
3293 if (!specs->is_scale)
3296 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3299 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3301 const struct variable *var = specs->listwise_vars[i];
3302 if (var_is_num_missing (var, case_num (c, var)))
3310 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3311 const struct ctables_category *cats[PIVOT_N_AXES][10],
3312 bool is_missing, bool excluded_missing,
3313 double d_weight, double e_weight)
3315 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3316 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3318 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3320 bool scale_missing = is_scale_missing (specs, c);
3321 for (size_t i = 0; i < specs->n; i++)
3322 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3323 specs->var, case_data (c, specs->var), specs->is_scale,
3324 scale_missing, is_missing, excluded_missing,
3325 d_weight, e_weight);
3326 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3327 if (!(cell->omit_areas && (1u << at)))
3329 struct ctables_area *a = cell->areas[at];
3330 a->d_total += d_weight;
3331 a->e_total += e_weight;
3333 if (!excluded_missing)
3335 a->d_count += d_weight;
3336 a->e_count += e_weight;
3341 a->d_valid += d_weight;
3342 a->e_valid += e_weight;
3345 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3347 /* XXX listwise_missing??? */
3348 const struct variable *var = s->table->sum_vars[i];
3349 double addend = case_num (c, var);
3350 if (!var_is_num_missing (var, addend))
3352 struct ctables_sum *sum = &a->sums[i];
3353 sum->e_sum += addend * e_weight;
3354 sum->u_sum += addend;
3362 recurse_totals (struct ctables_section *s, const struct ccase *c,
3363 const struct ctables_category *cats[PIVOT_N_AXES][10],
3364 bool is_missing, bool excluded_missing,
3365 double d_weight, double e_weight,
3366 enum pivot_axis_type start_axis, size_t start_nest)
3368 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3370 const struct ctables_nest *nest = s->nests[a];
3371 for (size_t i = start_nest; i < nest->n; i++)
3373 if (i == nest->scale_idx)
3376 const struct variable *var = nest->vars[i];
3378 const struct ctables_category *total = ctables_categories_total (
3379 s->table->categories[var_get_dict_index (var)]);
3382 const struct ctables_category *save = cats[a][i];
3384 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3385 d_weight, e_weight);
3386 recurse_totals (s, c, cats, is_missing, excluded_missing,
3387 d_weight, e_weight, a, i + 1);
3396 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3397 const struct ctables_category *cats[PIVOT_N_AXES][10],
3398 bool is_missing, bool excluded_missing,
3399 double d_weight, double e_weight,
3400 enum pivot_axis_type start_axis, size_t start_nest)
3402 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3404 const struct ctables_nest *nest = s->nests[a];
3405 for (size_t i = start_nest; i < nest->n; i++)
3407 if (i == nest->scale_idx)
3410 const struct ctables_category *save = cats[a][i];
3413 cats[a][i] = save->subtotal;
3414 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3415 d_weight, e_weight);
3416 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3417 d_weight, e_weight, a, i + 1);
3426 ctables_add_occurrence (const struct variable *var,
3427 const union value *value,
3428 struct hmap *occurrences)
3430 int width = var_get_width (var);
3431 unsigned int hash = value_hash (value, width, 0);
3433 struct ctables_occurrence *o;
3434 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3436 if (value_equal (value, &o->value, width))
3439 o = xmalloc (sizeof *o);
3440 value_clone (&o->value, value, width);
3441 hmap_insert (occurrences, &o->node, hash);
3445 ctables_cell_insert (struct ctables_section *s,
3446 const struct ccase *c,
3447 double d_weight, double e_weight)
3449 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3451 /* Does at least one categorical variable have a missing value in an included
3452 or excluded category? */
3453 bool is_missing = false;
3455 /* Does at least one categorical variable have a missing value in an excluded
3457 bool excluded_missing = false;
3459 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3461 const struct ctables_nest *nest = s->nests[a];
3462 for (size_t i = 0; i < nest->n; i++)
3464 if (i == nest->scale_idx)
3467 const struct variable *var = nest->vars[i];
3468 const union value *value = case_data (c, var);
3470 bool var_missing = var_is_value_missing (var, value) != 0;
3474 cats[a][i] = ctables_categories_match (
3475 s->table->categories[var_get_dict_index (var)], value, var);
3481 static const struct ctables_category cct_excluded_missing = {
3482 .type = CCT_EXCLUDED_MISSING,
3485 cats[a][i] = &cct_excluded_missing;
3486 excluded_missing = true;
3491 if (!excluded_missing)
3492 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3494 const struct ctables_nest *nest = s->nests[a];
3495 for (size_t i = 0; i < nest->n; i++)
3496 if (i != nest->scale_idx)
3498 const struct variable *var = nest->vars[i];
3499 const union value *value = case_data (c, var);
3500 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3504 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3505 d_weight, e_weight);
3507 //if (!excluded_missing)
3509 recurse_totals (s, c, cats, is_missing, excluded_missing,
3510 d_weight, e_weight, 0, 0);
3511 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3512 d_weight, e_weight, 0, 0);
3518 const struct ctables_summary_spec_set *set;
3523 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3525 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3526 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3527 if (as->function != bs->function)
3528 return as->function > bs->function ? 1 : -1;
3529 else if (as->weighted != bs->weighted)
3530 return as->weighted > bs->weighted ? 1 : -1;
3531 else if (as->calc_area != bs->calc_area)
3532 return as->calc_area > bs->calc_area ? 1 : -1;
3533 else if (as->percentile != bs->percentile)
3534 return as->percentile < bs->percentile ? 1 : -1;
3536 const char *as_label = as->label ? as->label : "";
3537 const char *bs_label = bs->label ? bs->label : "";
3538 return strcmp (as_label, bs_label);
3542 ctables_category_format_number (double number, const struct variable *var,
3545 struct pivot_value *pv = pivot_value_new_var_value (
3546 var, &(union value) { .f = number });
3547 pivot_value_format (pv, NULL, s);
3548 pivot_value_destroy (pv);
3552 ctables_category_format_string (struct substring string,
3553 const struct variable *var, struct string *out)
3555 int width = var_get_width (var);
3556 char *s = xmalloc (width);
3557 buf_copy_rpad (s, width, string.string, string.length, ' ');
3558 struct pivot_value *pv = pivot_value_new_var_value (
3559 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3560 pivot_value_format (pv, NULL, out);
3561 pivot_value_destroy (pv);
3566 ctables_category_format_label (const struct ctables_category *cat,
3567 const struct variable *var,
3573 ctables_category_format_number (cat->number, var, s);
3577 ctables_category_format_string (cat->string, var, s);
3581 ctables_category_format_number (cat->nrange[0], var, s);
3582 ds_put_format (s, " THRU ");
3583 ctables_category_format_number (cat->nrange[1], var, s);
3587 ctables_category_format_string (cat->srange[0], var, s);
3588 ds_put_format (s, " THRU ");
3589 ctables_category_format_string (cat->srange[1], var, s);
3593 ds_put_cstr (s, "MISSING");
3597 ds_put_cstr (s, "OTHERNM");
3600 case CCT_POSTCOMPUTE:
3601 ds_put_format (s, "&%s", cat->pc->name);
3606 ds_put_cstr (s, cat->total_label);
3612 case CCT_EXCLUDED_MISSING:
3619 static struct pivot_value *
3620 ctables_postcompute_label (const struct ctables_categories *cats,
3621 const struct ctables_category *cat,
3622 const struct variable *var)
3624 struct substring in = ss_cstr (cat->pc->label);
3625 struct substring target = ss_cstr (")LABEL[");
3627 struct string out = DS_EMPTY_INITIALIZER;
3630 size_t chunk = ss_find_substring (in, target);
3631 if (chunk == SIZE_MAX)
3633 if (ds_is_empty (&out))
3634 return pivot_value_new_user_text (in.string, in.length);
3637 ds_put_substring (&out, in);
3638 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3642 ds_put_substring (&out, ss_head (in, chunk));
3643 ss_advance (&in, chunk + target.length);
3645 struct substring idx_s;
3646 if (!ss_get_until (&in, ']', &idx_s))
3649 long int idx = strtol (idx_s.string, &tail, 10);
3650 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3653 struct ctables_category *cat2 = &cats->cats[idx - 1];
3654 if (!ctables_category_format_label (cat2, var, &out))
3660 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3663 static struct pivot_value *
3664 ctables_category_create_value_label (const struct ctables_categories *cats,
3665 const struct ctables_category *cat,
3666 const struct variable *var,
3667 const union value *value)
3669 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3670 ? ctables_postcompute_label (cats, cat, var)
3671 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3672 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3673 : pivot_value_new_var_value (var, value));
3676 static struct ctables_value *
3677 ctables_value_find__ (struct ctables_table *t, const union value *value,
3678 int width, unsigned int hash)
3680 struct ctables_value *clv;
3681 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3682 hash, &t->clabels_values_map)
3683 if (value_equal (value, &clv->value, width))
3689 ctables_value_insert (struct ctables_table *t, const union value *value,
3692 unsigned int hash = value_hash (value, width, 0);
3693 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3696 clv = xmalloc (sizeof *clv);
3697 value_clone (&clv->value, value, width);
3698 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3702 static struct ctables_value *
3703 ctables_value_find (struct ctables_table *t,
3704 const union value *value, int width)
3706 return ctables_value_find__ (t, value, width,
3707 value_hash (value, width, 0));
3711 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3712 size_t ix[PIVOT_N_AXES])
3714 if (a < PIVOT_N_AXES)
3716 size_t limit = MAX (t->stacks[a].n, 1);
3717 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3718 ctables_table_add_section (t, a + 1, ix);
3722 struct ctables_section *s = &t->sections[t->n_sections++];
3723 *s = (struct ctables_section) {
3725 .cells = HMAP_INITIALIZER (s->cells),
3727 for (a = 0; a < PIVOT_N_AXES; a++)
3730 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3732 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3733 for (size_t i = 0; i < nest->n; i++)
3734 hmap_init (&s->occurrences[a][i]);
3736 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3737 hmap_init (&s->areas[at]);
3742 ctpo_add (double a, double b)
3748 ctpo_sub (double a, double b)
3754 ctpo_mul (double a, double b)
3760 ctpo_div (double a, double b)
3762 return b ? a / b : SYSMIS;
3766 ctpo_pow (double a, double b)
3768 int save_errno = errno;
3770 double result = pow (a, b);
3778 ctpo_neg (double a, double b UNUSED)
3783 struct ctables_pcexpr_evaluate_ctx
3785 const struct ctables_cell *cell;
3786 const struct ctables_section *section;
3787 const struct ctables_categories *cats;
3788 enum pivot_axis_type pc_a;
3791 enum fmt_type parse_format;
3794 static double ctables_pcexpr_evaluate (
3795 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3798 ctables_pcexpr_evaluate_nonterminal (
3799 const struct ctables_pcexpr_evaluate_ctx *ctx,
3800 const struct ctables_pcexpr *e, size_t n_args,
3801 double evaluate (double, double))
3803 double args[2] = { 0, 0 };
3804 for (size_t i = 0; i < n_args; i++)
3806 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3807 if (!isfinite (args[i]) || args[i] == SYSMIS)
3810 return evaluate (args[0], args[1]);
3814 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3815 const struct ctables_cell_value *pc_cv)
3817 const struct ctables_section *s = ctx->section;
3820 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3822 const struct ctables_nest *nest = s->nests[a];
3823 for (size_t i = 0; i < nest->n; i++)
3824 if (i != nest->scale_idx)
3826 const struct ctables_cell_value *cv
3827 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3828 : &ctx->cell->axes[a].cvs[i]);
3829 hash = hash_pointer (cv->category, hash);
3830 if (cv->category->type != CCT_TOTAL
3831 && cv->category->type != CCT_SUBTOTAL
3832 && cv->category->type != CCT_POSTCOMPUTE)
3833 hash = value_hash (&cv->value,
3834 var_get_width (nest->vars[i]), hash);
3838 struct ctables_cell *tc;
3839 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3841 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3843 const struct ctables_nest *nest = s->nests[a];
3844 for (size_t i = 0; i < nest->n; i++)
3845 if (i != nest->scale_idx)
3847 const struct ctables_cell_value *p_cv
3848 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3849 : &ctx->cell->axes[a].cvs[i]);
3850 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3851 if (p_cv->category != t_cv->category
3852 || (p_cv->category->type != CCT_TOTAL
3853 && p_cv->category->type != CCT_SUBTOTAL
3854 && p_cv->category->type != CCT_POSTCOMPUTE
3855 && !value_equal (&p_cv->value,
3857 var_get_width (nest->vars[i]))))
3869 const struct ctables_table *t = s->table;
3870 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3871 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3872 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3873 &specs->specs[ctx->summary_idx]);
3877 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3878 const struct ctables_pcexpr *e)
3885 case CTPO_CAT_NRANGE:
3886 case CTPO_CAT_SRANGE:
3887 case CTPO_CAT_MISSING:
3888 case CTPO_CAT_OTHERNM:
3890 struct ctables_cell_value cv = {
3891 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3893 assert (cv.category != NULL);
3895 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3896 const struct ctables_occurrence *o;
3899 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3900 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3901 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3903 cv.value = o->value;
3904 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3909 case CTPO_CAT_NUMBER:
3910 case CTPO_CAT_SUBTOTAL:
3911 case CTPO_CAT_TOTAL:
3913 struct ctables_cell_value cv = {
3914 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3915 .value = { .f = e->number },
3917 assert (cv.category != NULL);
3918 return ctables_pcexpr_evaluate_category (ctx, &cv);
3921 case CTPO_CAT_STRING:
3923 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3925 if (width > e->string.length)
3927 s = xmalloc (width);
3928 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3931 const struct ctables_category *category
3932 = ctables_find_category_for_postcompute (
3933 ctx->section->table->ctables->dict,
3934 ctx->cats, ctx->parse_format, e);
3935 assert (category != NULL);
3937 struct ctables_cell_value cv = { .category = category };
3938 if (category->type == CCT_NUMBER)
3939 cv.value.f = category->number;
3940 else if (category->type == CCT_STRING)
3941 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3945 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3951 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3954 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3957 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3960 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3963 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3966 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3972 static const struct ctables_category *
3973 ctables_cell_postcompute (const struct ctables_section *s,
3974 const struct ctables_cell *cell,
3975 enum pivot_axis_type *pc_a_p,
3978 assert (cell->postcompute);
3979 const struct ctables_category *pc_cat = NULL;
3980 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3981 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3983 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3984 if (cv->category->type == CCT_POSTCOMPUTE)
3988 /* Multiple postcomputes cross each other. The value is
3993 pc_cat = cv->category;
3997 *pc_a_idx_p = pc_a_idx;
4001 assert (pc_cat != NULL);
4006 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4007 const struct ctables_cell *cell,
4008 const struct ctables_summary_spec *ss,
4009 struct fmt_spec *format,
4010 bool *is_ctables_format,
4013 enum pivot_axis_type pc_a = 0;
4014 size_t pc_a_idx = 0;
4015 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4016 s, cell, &pc_a, &pc_a_idx);
4020 const struct ctables_postcompute *pc = pc_cat->pc;
4023 for (size_t i = 0; i < pc->specs->n; i++)
4025 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4026 if (ss->function == ss2->function
4027 && ss->weighted == ss2->weighted
4028 && ss->calc_area == ss2->calc_area
4029 && ss->percentile == ss2->percentile)
4031 *format = ss2->format;
4032 *is_ctables_format = ss2->is_ctables_format;
4038 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4039 const struct ctables_categories *cats = s->table->categories[
4040 var_get_dict_index (var)];
4041 struct ctables_pcexpr_evaluate_ctx ctx = {
4046 .pc_a_idx = pc_a_idx,
4047 .summary_idx = summary_idx,
4048 .parse_format = pc_cat->parse_format,
4050 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4054 ctables_format (double d, const struct fmt_spec *format,
4055 const struct fmt_settings *settings)
4057 const union value v = { .f = d };
4058 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4060 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4061 produce the results we want for negative numbers, putting the negative
4062 sign in the wrong spot, before the prefix instead of after it. We can't,
4063 in fact, produce the desired results using a custom-currency
4064 specification. Instead, we postprocess the output, moving the negative
4067 NEQUAL: "-N=3" => "N=-3"
4068 PAREN: "-(3)" => "(-3)"
4069 PCTPAREN: "-(3%)" => "(-3%)"
4071 This transformation doesn't affect NEGPAREN. */
4072 char *minus_src = strchr (s, '-');
4073 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4075 char *n_equals = strstr (s, "N=");
4076 char *lparen = strchr (s, '(');
4077 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4079 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4085 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4087 for (size_t i = 0; i < t->stacks[a].n; i++)
4089 struct ctables_nest *nest = &t->stacks[a].nests[i];
4090 if (nest->n != 1 || nest->scale_idx != 0)
4093 enum ctables_vlabel vlabel
4094 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4095 if (vlabel != CTVL_NONE)
4102 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4104 struct pivot_table *pt = pivot_table_create__ (
4106 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4107 : pivot_value_new_text (N_("Custom Tables"))),
4110 pivot_table_set_caption (
4111 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4113 pivot_table_set_corner_text (
4114 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4116 bool summary_dimension = (t->summary_axis != t->slabels_axis
4117 || (!t->slabels_visible
4118 && t->summary_specs.n > 1));
4119 if (summary_dimension)
4121 struct pivot_dimension *d = pivot_dimension_create (
4122 pt, t->slabels_axis, N_("Statistics"));
4123 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4124 if (!t->slabels_visible)
4125 d->hide_all_labels = true;
4126 for (size_t i = 0; i < specs->n; i++)
4127 pivot_category_create_leaf (
4128 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4131 bool categories_dimension = t->clabels_example != NULL;
4132 if (categories_dimension)
4134 struct pivot_dimension *d = pivot_dimension_create (
4135 pt, t->label_axis[t->clabels_from_axis],
4136 t->clabels_from_axis == PIVOT_AXIS_ROW
4137 ? N_("Row Categories")
4138 : N_("Column Categories"));
4139 const struct variable *var = t->clabels_example;
4140 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4141 for (size_t i = 0; i < t->n_clabels_values; i++)
4143 const struct ctables_value *value = t->clabels_values[i];
4144 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4145 assert (cat != NULL);
4146 pivot_category_create_leaf (
4147 d->root, ctables_category_create_value_label (c, cat,
4153 pivot_table_set_look (pt, ct->look);
4154 struct pivot_dimension *d[PIVOT_N_AXES];
4155 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4157 static const char *names[] = {
4158 [PIVOT_AXIS_ROW] = N_("Rows"),
4159 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4160 [PIVOT_AXIS_LAYER] = N_("Layers"),
4162 d[a] = (t->axes[a] || a == t->summary_axis
4163 ? pivot_dimension_create (pt, a, names[a])
4168 assert (t->axes[a]);
4170 for (size_t i = 0; i < t->stacks[a].n; i++)
4172 struct ctables_nest *nest = &t->stacks[a].nests[i];
4173 struct ctables_section **sections = xnmalloc (t->n_sections,
4175 size_t n_sections = 0;
4177 size_t n_total_cells = 0;
4178 size_t max_depth = 0;
4179 for (size_t j = 0; j < t->n_sections; j++)
4180 if (t->sections[j].nests[a] == nest)
4182 struct ctables_section *s = &t->sections[j];
4183 sections[n_sections++] = s;
4184 n_total_cells += hmap_count (&s->cells);
4186 size_t depth = s->nests[a]->n;
4187 max_depth = MAX (depth, max_depth);
4190 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4192 size_t n_sorted = 0;
4194 for (size_t j = 0; j < n_sections; j++)
4196 struct ctables_section *s = sections[j];
4198 struct ctables_cell *cell;
4199 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4201 sorted[n_sorted++] = cell;
4202 assert (n_sorted <= n_total_cells);
4205 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4206 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4209 if (a == PIVOT_AXIS_ROW)
4211 size_t ids[N_CTATS];
4212 memset (ids, 0, sizeof ids);
4213 for (size_t j = 0; j < n_sorted; j++)
4215 struct ctables_cell *cell = sorted[j];
4216 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4218 struct ctables_area *area = cell->areas[at];
4219 if (!area->sequence)
4220 area->sequence = ++ids[at];
4227 for (size_t j = 0; j < n_sorted; j++)
4229 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0);
4234 struct ctables_level
4236 enum ctables_level_type
4238 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4239 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4240 CTL_SUMMARY, /* Summary functions. */
4244 enum settings_value_show vlabel; /* CTL_VAR only. */
4247 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4248 size_t n_levels = 0;
4249 for (size_t k = 0; k < nest->n; k++)
4251 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4252 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4254 if (vlabel != CTVL_NONE)
4256 levels[n_levels++] = (struct ctables_level) {
4258 .vlabel = (enum settings_value_show) vlabel,
4263 if (nest->scale_idx != k
4264 && (k != nest->n - 1 || t->label_axis[a] == a))
4266 levels[n_levels++] = (struct ctables_level) {
4267 .type = CTL_CATEGORY,
4273 if (!summary_dimension && a == t->slabels_axis)
4275 levels[n_levels++] = (struct ctables_level) {
4276 .type = CTL_SUMMARY,
4277 .var_idx = SIZE_MAX,
4281 /* Pivot categories:
4283 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4284 - category for nest->vars[0], if nest->scale_idx != 0
4285 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4286 - category for nest->vars[1], if nest->scale_idx != 1
4288 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4289 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4290 - summary function, if 'a == t->slabels_axis && a ==
4293 Additional dimensions:
4295 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4297 - If 't->label_axis[b] == a' for some 'b != a', add a category
4302 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4304 for (size_t j = 0; j < n_sorted; j++)
4306 struct ctables_cell *cell = sorted[j];
4307 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4309 size_t n_common = 0;
4312 for (; n_common < n_levels; n_common++)
4314 const struct ctables_level *level = &levels[n_common];
4315 if (level->type == CTL_CATEGORY)
4317 size_t var_idx = level->var_idx;
4318 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4319 if (prev->axes[a].cvs[var_idx].category != c)
4321 else if (c->type != CCT_SUBTOTAL
4322 && c->type != CCT_TOTAL
4323 && c->type != CCT_POSTCOMPUTE
4324 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4325 &cell->axes[a].cvs[var_idx].value,
4326 var_get_type (nest->vars[var_idx])))
4332 for (size_t k = n_common; k < n_levels; k++)
4334 const struct ctables_level *level = &levels[k];
4335 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4336 if (level->type == CTL_SUMMARY)
4338 assert (k == n_levels - 1);
4340 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4341 for (size_t m = 0; m < specs->n; m++)
4343 int leaf = pivot_category_create_leaf (
4344 parent, ctables_summary_label (&specs->specs[m],
4352 const struct variable *var = nest->vars[level->var_idx];
4353 struct pivot_value *label;
4354 if (level->type == CTL_VAR)
4356 label = pivot_value_new_variable (var);
4357 label->variable.show = level->vlabel;
4359 else if (level->type == CTL_CATEGORY)
4361 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4362 label = ctables_category_create_value_label (
4363 t->categories[var_get_dict_index (var)],
4364 cv->category, var, &cv->value);
4369 if (k == n_levels - 1)
4370 prev_leaf = pivot_category_create_leaf (parent, label);
4372 groups[k] = pivot_category_create_group__ (parent, label);
4376 cell->axes[a].leaf = prev_leaf;
4385 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4389 size_t n_total_cells = 0;
4390 for (size_t j = 0; j < t->n_sections; j++)
4391 n_total_cells += hmap_count (&t->sections[j].cells);
4393 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4394 size_t n_sorted = 0;
4395 for (size_t j = 0; j < t->n_sections; j++)
4397 const struct ctables_section *s = &t->sections[j];
4398 struct ctables_cell *cell;
4399 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4401 sorted[n_sorted++] = cell;
4403 assert (n_sorted <= n_total_cells);
4404 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4406 size_t ids[N_CTATS];
4407 memset (ids, 0, sizeof ids);
4408 for (size_t j = 0; j < n_sorted; j++)
4410 struct ctables_cell *cell = sorted[j];
4411 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4413 struct ctables_area *area = cell->areas[at];
4414 if (!area->sequence)
4415 area->sequence = ++ids[at];
4422 for (size_t i = 0; i < t->n_sections; i++)
4424 struct ctables_section *s = &t->sections[i];
4426 struct ctables_cell *cell;
4427 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4432 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4433 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4434 for (size_t j = 0; j < specs->n; j++)
4437 size_t n_dindexes = 0;
4439 if (summary_dimension)
4440 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4442 if (categories_dimension)
4444 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4445 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4446 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4447 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4450 dindexes[n_dindexes++] = ctv->leaf;
4453 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4456 int leaf = cell->axes[a].leaf;
4457 if (a == t->summary_axis && !summary_dimension)
4459 dindexes[n_dindexes++] = leaf;
4462 const struct ctables_summary_spec *ss = &specs->specs[j];
4464 struct fmt_spec format = specs->specs[j].format;
4465 bool is_ctables_format = ss->is_ctables_format;
4466 double d = (cell->postcompute
4467 ? ctables_cell_calculate_postcompute (
4468 s, cell, ss, &format, &is_ctables_format, j)
4469 : ctables_summary_value (cell, &cell->summaries[j],
4472 struct pivot_value *value;
4473 if (ct->hide_threshold != 0
4474 && d < ct->hide_threshold
4475 && ctables_summary_function_is_count (ss->function))
4477 value = pivot_value_new_user_text_nocopy (
4478 xasprintf ("<%d", ct->hide_threshold));
4480 else if (d == 0 && ct->zero)
4481 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4482 else if (d == SYSMIS && ct->missing)
4483 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4484 else if (is_ctables_format)
4485 value = pivot_value_new_user_text_nocopy (
4486 ctables_format (d, &format, &ct->ctables_formats));
4489 value = pivot_value_new_number (d);
4490 value->numeric.format = format;
4492 /* XXX should text values be right-justified? */
4493 pivot_table_put (pt, dindexes, n_dindexes, value);
4498 pivot_table_submit (pt);
4502 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4504 enum pivot_axis_type label_pos = t->label_axis[a];
4508 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4509 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4511 const struct ctables_stack *stack = &t->stacks[a];
4515 const struct ctables_nest *n0 = &stack->nests[0];
4518 assert (stack->n == 1);
4522 const struct variable *v0 = n0->vars[n0->n - 1];
4523 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4524 t->clabels_example = v0;
4526 for (size_t i = 0; i < c0->n_cats; i++)
4527 if (c0->cats[i].type == CCT_FUNCTION)
4529 msg (SE, _("%s=%s is not allowed with sorting based "
4530 "on a summary function."),
4531 subcommand_name, pos_name);
4534 if (n0->n - 1 == n0->scale_idx)
4536 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4537 "but %s is a scale variable."),
4538 subcommand_name, pos_name, var_get_name (v0));
4542 for (size_t i = 1; i < stack->n; i++)
4544 const struct ctables_nest *ni = &stack->nests[i];
4546 const struct variable *vi = ni->vars[ni->n - 1];
4547 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4549 if (ni->n - 1 == ni->scale_idx)
4551 msg (SE, _("%s=%s requires the variables to be moved to be "
4552 "categorical, but %s is a scale variable."),
4553 subcommand_name, pos_name, var_get_name (vi));
4556 if (var_get_width (v0) != var_get_width (vi))
4558 msg (SE, _("%s=%s requires the variables to be "
4559 "moved to have the same width, but %s has "
4560 "width %d and %s has width %d."),
4561 subcommand_name, pos_name,
4562 var_get_name (v0), var_get_width (v0),
4563 var_get_name (vi), var_get_width (vi));
4566 if (!val_labs_equal (var_get_value_labels (v0),
4567 var_get_value_labels (vi)))
4569 msg (SE, _("%s=%s requires the variables to be "
4570 "moved to have the same value labels, but %s "
4571 "and %s have different value labels."),
4572 subcommand_name, pos_name,
4573 var_get_name (v0), var_get_name (vi));
4576 if (!ctables_categories_equal (c0, ci))
4578 msg (SE, _("%s=%s requires the variables to be "
4579 "moved to have the same category "
4580 "specifications, but %s and %s have different "
4581 "category specifications."),
4582 subcommand_name, pos_name,
4583 var_get_name (v0), var_get_name (vi));
4592 add_sum_var (struct variable *var,
4593 struct variable ***sum_vars, size_t *n, size_t *allocated)
4595 for (size_t i = 0; i < *n; i++)
4596 if (var == (*sum_vars)[i])
4599 if (*n >= *allocated)
4600 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4601 (*sum_vars)[*n] = var;
4605 static enum ctables_area_type
4606 rotate_area (enum ctables_area_type area)
4617 return CTAT_LAYERCOL;
4620 return CTAT_LAYERROW;
4633 enumerate_sum_vars (const struct ctables_axis *a,
4634 struct variable ***sum_vars, size_t *n, size_t *allocated)
4642 for (size_t i = 0; i < N_CSVS; i++)
4643 for (size_t j = 0; j < a->specs[i].n; j++)
4645 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4646 if (spec->function == CTSF_areaPCT_SUM)
4647 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4653 for (size_t i = 0; i < 2; i++)
4654 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4660 ctables_prepare_table (struct ctables_table *t)
4662 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4665 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4667 for (size_t j = 0; j < t->stacks[a].n; j++)
4669 struct ctables_nest *nest = &t->stacks[a].nests[j];
4670 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4672 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4673 nest->n_areas[at] = 0;
4675 enum pivot_axis_type ata, atb;
4676 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4678 ata = PIVOT_AXIS_ROW;
4679 atb = PIVOT_AXIS_COLUMN;
4681 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4683 ata = PIVOT_AXIS_COLUMN;
4684 atb = PIVOT_AXIS_ROW;
4687 if (at == CTAT_LAYER
4688 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
4689 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
4690 ? a == atb && t->label_axis[a] != a
4693 for (size_t k = nest->n - 1; k < nest->n; k--)
4694 if (k != nest->scale_idx)
4696 nest->areas[at][nest->n_areas[at]++] = k;
4702 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
4703 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
4704 : at == CTAT_TABLE ? true
4708 for (size_t k = 0; k < nest->n; k++)
4709 if (k != nest->scale_idx)
4710 nest->areas[at][nest->n_areas[at]++] = k;
4716 #define L PIVOT_AXIS_LAYER
4717 n_drop = (t->clabels_from_axis == L ? a != L
4718 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
4719 : t->clabels_from_axis == a ? 2
4726 n_drop = a == ata && t->label_axis[ata] == atb;
4731 n_drop = (a == ata ? t->label_axis[ata] == atb
4733 : t->clabels_from_axis == atb ? -1
4734 : t->clabels_to_axis != atb ? 1
4746 size_t n = nest->n_areas[at];
4749 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4750 nest->n_areas[at]--;
4755 for (int i = 0; i < n_drop; i++)
4756 if (nest->n_areas[at] > 0)
4757 nest->n_areas[at]--;
4764 struct ctables_nest *nest = xmalloc (sizeof *nest);
4765 *nest = (struct ctables_nest) { .n = 0 };
4766 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4768 /* There's no point in moving labels away from an axis that has no
4769 labels, so avoid dealing with the special cases around that. */
4770 t->label_axis[a] = a;
4773 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4774 for (size_t i = 0; i < stack->n; i++)
4776 struct ctables_nest *nest = &stack->nests[i];
4777 if (!nest->specs[CSV_CELL].n)
4779 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4780 specs->specs = xmalloc (sizeof *specs->specs);
4783 enum ctables_summary_function function
4784 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4786 *specs->specs = (struct ctables_summary_spec) {
4787 .function = function,
4789 .format = ctables_summary_default_format (function, specs->var),
4792 specs->var = nest->vars[0];
4794 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4795 &nest->specs[CSV_CELL]);
4797 else if (!nest->specs[CSV_TOTAL].n)
4798 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4799 &nest->specs[CSV_CELL]);
4801 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4802 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4804 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4805 for (size_t i = 0; i < nest->specs[sv].n; i++)
4807 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4808 const struct ctables_function_info *cfi =
4809 &ctables_function_info[ss->function];
4811 ss->calc_area = rotate_area (ss->calc_area);
4815 if (t->ctables->smissing_listwise)
4817 struct variable **listwise_vars = NULL;
4819 size_t allocated = 0;
4821 for (size_t j = nest->group_head; j < stack->n; j++)
4823 const struct ctables_nest *other_nest = &stack->nests[j];
4824 if (other_nest->group_head != nest->group_head)
4827 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4830 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4831 sizeof *listwise_vars);
4832 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4835 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4838 listwise_vars = xmemdup (listwise_vars,
4839 n * sizeof *listwise_vars);
4840 nest->specs[sv].listwise_vars = listwise_vars;
4841 nest->specs[sv].n_listwise_vars = n;
4846 struct ctables_summary_spec_set *merged = &t->summary_specs;
4847 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4849 for (size_t j = 0; j < stack->n; j++)
4851 const struct ctables_nest *nest = &stack->nests[j];
4853 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4854 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4859 struct merge_item min = items[0];
4860 for (size_t j = 1; j < n_left; j++)
4861 if (merge_item_compare_3way (&items[j], &min) < 0)
4864 if (merged->n >= merged->allocated)
4865 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4866 sizeof *merged->specs);
4867 merged->specs[merged->n++] = min.set->specs[min.ofs];
4869 for (size_t j = 0; j < n_left; )
4871 if (merge_item_compare_3way (&items[j], &min) == 0)
4873 struct merge_item *item = &items[j];
4874 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4875 if (++item->ofs >= item->set->n)
4877 items[j] = items[--n_left];
4887 for (size_t j = 0; j < merged->n; j++)
4888 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4890 for (size_t j = 0; j < stack->n; j++)
4892 const struct ctables_nest *nest = &stack->nests[j];
4893 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4895 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4896 for (size_t k = 0; k < specs->n; k++)
4897 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4898 specs->specs[k].axis_idx);
4904 size_t allocated_sum_vars = 0;
4905 enumerate_sum_vars (t->axes[t->summary_axis],
4906 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4908 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4909 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4913 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4914 enum pivot_axis_type a)
4916 struct ctables_stack *stack = &t->stacks[a];
4917 for (size_t i = 0; i < stack->n; i++)
4919 const struct ctables_nest *nest = &stack->nests[i];
4920 const struct variable *var = nest->vars[nest->n - 1];
4921 const union value *value = case_data (c, var);
4923 if (var_is_numeric (var) && value->f == SYSMIS)
4926 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4928 ctables_value_insert (t, value, var_get_width (var));
4933 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4935 const struct ctables_value *const *ap = a_;
4936 const struct ctables_value *const *bp = b_;
4937 const struct ctables_value *a = *ap;
4938 const struct ctables_value *b = *bp;
4939 const int *width = width_;
4940 return value_compare_3way (&a->value, &b->value, *width);
4944 ctables_sort_clabels_values (struct ctables_table *t)
4946 const struct variable *v0 = t->clabels_example;
4947 int width = var_get_width (v0);
4949 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4952 const struct val_labs *val_labs = var_get_value_labels (v0);
4953 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4954 vl = val_labs_next (val_labs, vl))
4955 if (ctables_categories_match (c0, &vl->value, v0))
4956 ctables_value_insert (t, &vl->value, width);
4959 size_t n = hmap_count (&t->clabels_values_map);
4960 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4962 struct ctables_value *clv;
4964 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4965 t->clabels_values[i++] = clv;
4966 t->n_clabels_values = n;
4969 sort (t->clabels_values, n, sizeof *t->clabels_values,
4970 compare_clabels_values_3way, &width);
4972 for (size_t i = 0; i < n; i++)
4973 t->clabels_values[i]->leaf = i;
4977 ctables_add_category_occurrences (const struct variable *var,
4978 struct hmap *occurrences,
4979 const struct ctables_categories *cats)
4981 const struct val_labs *val_labs = var_get_value_labels (var);
4983 for (size_t i = 0; i < cats->n_cats; i++)
4985 const struct ctables_category *c = &cats->cats[i];
4989 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4995 int width = var_get_width (var);
4997 value_init (&value, width);
4998 value_copy_buf_rpad (&value, width,
4999 CHAR_CAST (uint8_t *, c->string.string),
5000 c->string.length, ' ');
5001 ctables_add_occurrence (var, &value, occurrences);
5002 value_destroy (&value, width);
5007 assert (var_is_numeric (var));
5008 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5009 vl = val_labs_next (val_labs, vl))
5010 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5011 ctables_add_occurrence (var, &vl->value, occurrences);
5015 assert (var_is_alpha (var));
5016 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5017 vl = val_labs_next (val_labs, vl))
5018 if (in_string_range (&vl->value, var, c->srange))
5019 ctables_add_occurrence (var, &vl->value, occurrences);
5023 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5024 vl = val_labs_next (val_labs, vl))
5025 if (var_is_value_missing (var, &vl->value))
5026 ctables_add_occurrence (var, &vl->value, occurrences);
5030 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5031 vl = val_labs_next (val_labs, vl))
5032 ctables_add_occurrence (var, &vl->value, occurrences);
5035 case CCT_POSTCOMPUTE:
5045 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5046 vl = val_labs_next (val_labs, vl))
5047 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5048 ctables_add_occurrence (var, &vl->value, occurrences);
5051 case CCT_EXCLUDED_MISSING:
5058 ctables_section_recurse_add_empty_categories (
5059 struct ctables_section *s,
5060 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5061 enum pivot_axis_type a, size_t a_idx)
5063 if (a >= PIVOT_N_AXES)
5064 ctables_cell_insert__ (s, c, cats);
5065 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5066 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5069 const struct variable *var = s->nests[a]->vars[a_idx];
5070 const struct ctables_categories *categories = s->table->categories[
5071 var_get_dict_index (var)];
5072 int width = var_get_width (var);
5073 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5074 const struct ctables_occurrence *o;
5075 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5077 union value *value = case_data_rw (c, var);
5078 value_destroy (value, width);
5079 value_clone (value, &o->value, width);
5080 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5081 assert (cats[a][a_idx] != NULL);
5082 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5085 for (size_t i = 0; i < categories->n_cats; i++)
5087 const struct ctables_category *cat = &categories->cats[i];
5088 if (cat->type == CCT_POSTCOMPUTE)
5090 cats[a][a_idx] = cat;
5091 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5098 ctables_section_add_empty_categories (struct ctables_section *s)
5100 bool show_empty = false;
5101 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5103 for (size_t k = 0; k < s->nests[a]->n; k++)
5104 if (k != s->nests[a]->scale_idx)
5106 const struct variable *var = s->nests[a]->vars[k];
5107 const struct ctables_categories *cats = s->table->categories[
5108 var_get_dict_index (var)];
5109 if (cats->show_empty)
5112 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5118 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5119 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5120 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5125 ctables_section_clear (struct ctables_section *s)
5127 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5129 const struct ctables_nest *nest = s->nests[a];
5130 for (size_t i = 0; i < nest->n; i++)
5131 if (i != nest->scale_idx)
5133 const struct variable *var = nest->vars[i];
5134 int width = var_get_width (var);
5135 struct ctables_occurrence *o, *next;
5136 struct hmap *map = &s->occurrences[a][i];
5137 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5139 value_destroy (&o->value, width);
5140 hmap_delete (map, &o->node);
5147 struct ctables_cell *cell, *next_cell;
5148 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5150 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5152 const struct ctables_nest *nest = s->nests[a];
5153 for (size_t i = 0; i < nest->n; i++)
5154 if (i != nest->scale_idx)
5155 value_destroy (&cell->axes[a].cvs[i].value,
5156 var_get_width (nest->vars[i]));
5157 free (cell->axes[a].cvs);
5160 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5161 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5162 for (size_t i = 0; i < specs->n; i++)
5163 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5164 free (cell->summaries);
5166 hmap_delete (&s->cells, &cell->node);
5169 hmap_shrink (&s->cells);
5171 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5173 struct ctables_area *area, *next_area;
5174 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5178 hmap_delete (&s->areas[at], &area->node);
5181 hmap_shrink (&s->areas[at]);
5186 ctables_section_uninit (struct ctables_section *s)
5188 ctables_section_clear (s);
5190 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5192 struct ctables_nest *nest = s->nests[a];
5193 for (size_t i = 0; i < nest->n; i++)
5194 hmap_destroy (&s->occurrences[a][i]);
5195 free (s->occurrences[a]);
5198 hmap_destroy (&s->cells);
5199 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5200 hmap_destroy (&s->areas[at]);
5204 ctables_table_clear (struct ctables_table *t)
5206 for (size_t i = 0; i < t->n_sections; i++)
5207 ctables_section_clear (&t->sections[i]);
5209 if (t->clabels_example)
5211 int width = var_get_width (t->clabels_example);
5212 struct ctables_value *value, *next_value;
5213 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5214 &t->clabels_values_map)
5216 value_destroy (&value->value, width);
5217 hmap_delete (&t->clabels_values_map, &value->node);
5220 hmap_shrink (&t->clabels_values_map);
5222 free (t->clabels_values);
5223 t->clabels_values = NULL;
5224 t->n_clabels_values = 0;
5229 ctables_execute (struct dataset *ds, struct casereader *input,
5232 for (size_t i = 0; i < ct->n_tables; i++)
5234 struct ctables_table *t = ct->tables[i];
5235 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5236 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5237 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5238 sizeof *t->sections);
5239 size_t ix[PIVOT_N_AXES];
5240 ctables_table_add_section (t, 0, ix);
5243 struct dictionary *dict = dataset_dict (ds);
5245 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5246 struct casegrouper *grouper
5248 ? casegrouper_create_splits (input, dict)
5249 : casegrouper_create_vars (input, NULL, 0));
5250 struct casereader *group;
5251 while (casegrouper_get_next_group (grouper, &group))
5255 struct ccase *c = casereader_peek (group, 0);
5258 output_split_file_values (ds, c);
5263 bool warn_on_invalid = true;
5264 for (struct ccase *c = casereader_read (group); c;
5265 case_unref (c), c = casereader_read (group))
5267 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5268 double e_weight = (ct->e_weight
5269 ? var_force_valid_weight (ct->e_weight,
5270 case_num (c, ct->e_weight),
5274 for (size_t i = 0; i < ct->n_tables; i++)
5276 struct ctables_table *t = ct->tables[i];
5278 for (size_t j = 0; j < t->n_sections; j++)
5279 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5281 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5282 if (t->label_axis[a] != a)
5283 ctables_insert_clabels_values (t, c, a);
5286 casereader_destroy (group);
5288 for (size_t i = 0; i < ct->n_tables; i++)
5290 struct ctables_table *t = ct->tables[i];
5292 if (t->clabels_example)
5293 ctables_sort_clabels_values (t);
5295 for (size_t j = 0; j < t->n_sections; j++)
5296 ctables_section_add_empty_categories (&t->sections[j]);
5298 ctables_table_output (ct, t);
5299 ctables_table_clear (t);
5302 return casegrouper_destroy (grouper);
5307 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5308 struct dictionary *);
5311 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5317 case CTPO_CAT_STRING:
5318 ss_dealloc (&e->string);
5321 case CTPO_CAT_SRANGE:
5322 for (size_t i = 0; i < 2; i++)
5323 ss_dealloc (&e->srange[i]);
5332 for (size_t i = 0; i < 2; i++)
5333 ctables_pcexpr_destroy (e->subs[i]);
5337 case CTPO_CAT_NUMBER:
5338 case CTPO_CAT_NRANGE:
5339 case CTPO_CAT_MISSING:
5340 case CTPO_CAT_OTHERNM:
5341 case CTPO_CAT_SUBTOTAL:
5342 case CTPO_CAT_TOTAL:
5346 msg_location_destroy (e->location);
5351 static struct ctables_pcexpr *
5352 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5353 struct ctables_pcexpr *sub0,
5354 struct ctables_pcexpr *sub1)
5356 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5357 *e = (struct ctables_pcexpr) {
5359 .subs = { sub0, sub1 },
5360 .location = msg_location_merged (sub0->location, sub1->location),
5365 /* How to parse an operator. */
5368 enum token_type token;
5369 enum ctables_postcompute_op op;
5372 static const struct operator *
5373 ctables_pcexpr_match_operator (struct lexer *lexer,
5374 const struct operator ops[], size_t n_ops)
5376 for (const struct operator *op = ops; op < ops + n_ops; op++)
5377 if (lex_token (lexer) == op->token)
5379 if (op->token != T_NEG_NUM)
5388 static struct ctables_pcexpr *
5389 ctables_pcexpr_parse_binary_operators__ (
5390 struct lexer *lexer, struct dictionary *dict,
5391 const struct operator ops[], size_t n_ops,
5392 parse_recursively_func *parse_next_level,
5393 const char *chain_warning, struct ctables_pcexpr *lhs)
5395 for (int op_count = 0; ; op_count++)
5397 const struct operator *op
5398 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5401 if (op_count > 1 && chain_warning)
5402 msg_at (SW, lhs->location, "%s", chain_warning);
5407 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5410 ctables_pcexpr_destroy (lhs);
5414 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5418 static struct ctables_pcexpr *
5419 ctables_pcexpr_parse_binary_operators (
5420 struct lexer *lexer, struct dictionary *dict,
5421 const struct operator ops[], size_t n_ops,
5422 parse_recursively_func *parse_next_level, const char *chain_warning)
5424 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5428 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5430 chain_warning, lhs);
5433 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5434 struct dictionary *);
5436 static struct ctables_pcexpr
5437 ctpo_cat_nrange (double low, double high)
5439 return (struct ctables_pcexpr) {
5440 .op = CTPO_CAT_NRANGE,
5441 .nrange = { low, high },
5445 static struct ctables_pcexpr
5446 ctpo_cat_srange (struct substring low, struct substring high)
5448 return (struct ctables_pcexpr) {
5449 .op = CTPO_CAT_SRANGE,
5450 .srange = { low, high },
5454 static struct ctables_pcexpr *
5455 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5457 int start_ofs = lex_ofs (lexer);
5458 struct ctables_pcexpr e;
5459 if (lex_is_number (lexer))
5461 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5462 .number = lex_number (lexer) };
5465 else if (lex_match_id (lexer, "MISSING"))
5466 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5467 else if (lex_match_id (lexer, "OTHERNM"))
5468 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5469 else if (lex_match_id (lexer, "TOTAL"))
5470 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5471 else if (lex_match_id (lexer, "SUBTOTAL"))
5473 size_t subtotal_index = 0;
5474 if (lex_match (lexer, T_LBRACK))
5476 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5478 subtotal_index = lex_integer (lexer);
5480 if (!lex_force_match (lexer, T_RBRACK))
5483 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5484 .subtotal_index = subtotal_index };
5486 else if (lex_match (lexer, T_LBRACK))
5488 if (lex_match_id (lexer, "LO"))
5490 if (!lex_force_match_id (lexer, "THRU"))
5493 if (lex_is_string (lexer))
5495 struct substring low = { .string = NULL };
5496 struct substring high = parse_substring (lexer, dict);
5497 e = ctpo_cat_srange (low, high);
5501 if (!lex_force_num (lexer))
5503 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5507 else if (lex_is_number (lexer))
5509 double number = lex_number (lexer);
5511 if (lex_match_id (lexer, "THRU"))
5513 if (lex_match_id (lexer, "HI"))
5514 e = ctpo_cat_nrange (number, DBL_MAX);
5517 if (!lex_force_num (lexer))
5519 e = ctpo_cat_nrange (number, lex_number (lexer));
5524 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5527 else if (lex_is_string (lexer))
5529 struct substring s = parse_substring (lexer, dict);
5531 if (lex_match_id (lexer, "THRU"))
5533 struct substring high;
5535 if (lex_match_id (lexer, "HI"))
5536 high = (struct substring) { .string = NULL };
5539 if (!lex_force_string (lexer))
5544 high = parse_substring (lexer, dict);
5547 e = ctpo_cat_srange (s, high);
5550 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5554 lex_error (lexer, NULL);
5558 if (!lex_force_match (lexer, T_RBRACK))
5560 if (e.op == CTPO_CAT_STRING)
5561 ss_dealloc (&e.string);
5562 else if (e.op == CTPO_CAT_SRANGE)
5564 ss_dealloc (&e.srange[0]);
5565 ss_dealloc (&e.srange[1]);
5570 else if (lex_match (lexer, T_LPAREN))
5572 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5575 if (!lex_force_match (lexer, T_RPAREN))
5577 ctables_pcexpr_destroy (ep);
5584 lex_error (lexer, NULL);
5588 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5589 return xmemdup (&e, sizeof e);
5592 static struct ctables_pcexpr *
5593 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5594 struct lexer *lexer, int start_ofs)
5596 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5597 *e = (struct ctables_pcexpr) {
5600 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5605 static struct ctables_pcexpr *
5606 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5608 static const struct operator op = { T_EXP, CTPO_POW };
5610 const char *chain_warning =
5611 _("The exponentiation operator (`**') is left-associative: "
5612 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5613 "To disable this warning, insert parentheses.");
5615 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5616 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5617 ctables_pcexpr_parse_primary,
5620 /* Special case for situations like "-5**6", which must be parsed as
5623 int start_ofs = lex_ofs (lexer);
5624 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5625 *lhs = (struct ctables_pcexpr) {
5626 .op = CTPO_CONSTANT,
5627 .number = -lex_tokval (lexer),
5628 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5632 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5633 lexer, dict, &op, 1,
5634 ctables_pcexpr_parse_primary, chain_warning, lhs);
5638 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5641 /* Parses the unary minus level. */
5642 static struct ctables_pcexpr *
5643 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5645 int start_ofs = lex_ofs (lexer);
5646 if (!lex_match (lexer, T_DASH))
5647 return ctables_pcexpr_parse_exp (lexer, dict);
5649 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5653 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5656 /* Parses the multiplication and division level. */
5657 static struct ctables_pcexpr *
5658 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5660 static const struct operator ops[] =
5662 { T_ASTERISK, CTPO_MUL },
5663 { T_SLASH, CTPO_DIV },
5666 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5667 sizeof ops / sizeof *ops,
5668 ctables_pcexpr_parse_neg, NULL);
5671 /* Parses the addition and subtraction level. */
5672 static struct ctables_pcexpr *
5673 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5675 static const struct operator ops[] =
5677 { T_PLUS, CTPO_ADD },
5678 { T_DASH, CTPO_SUB },
5679 { T_NEG_NUM, CTPO_ADD },
5682 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5683 ops, sizeof ops / sizeof *ops,
5684 ctables_pcexpr_parse_mul, NULL);
5687 static struct ctables_postcompute *
5688 ctables_find_postcompute (struct ctables *ct, const char *name)
5690 struct ctables_postcompute *pc;
5691 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5692 utf8_hash_case_string (name, 0), &ct->postcomputes)
5693 if (!utf8_strcasecmp (pc->name, name))
5699 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5702 int pcompute_start = lex_ofs (lexer) - 1;
5704 if (!lex_match (lexer, T_AND))
5706 lex_error_expecting (lexer, "&");
5709 if (!lex_force_id (lexer))
5712 char *name = ss_xstrdup (lex_tokss (lexer));
5715 if (!lex_force_match (lexer, T_EQUALS)
5716 || !lex_force_match_id (lexer, "EXPR")
5717 || !lex_force_match (lexer, T_LPAREN))
5723 int expr_start = lex_ofs (lexer);
5724 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5725 int expr_end = lex_ofs (lexer) - 1;
5726 if (!expr || !lex_force_match (lexer, T_RPAREN))
5728 ctables_pcexpr_destroy (expr);
5732 int pcompute_end = lex_ofs (lexer) - 1;
5734 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5737 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5740 msg_at (SW, location, _("New definition of &%s will override the "
5741 "previous definition."),
5743 msg_at (SN, pc->location, _("This is the previous definition."));
5745 ctables_pcexpr_destroy (pc->expr);
5746 msg_location_destroy (pc->location);
5751 pc = xmalloc (sizeof *pc);
5752 *pc = (struct ctables_postcompute) { .name = name };
5753 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5754 utf8_hash_case_string (pc->name, 0));
5757 pc->location = location;
5759 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5764 ctables_parse_pproperties_format (struct lexer *lexer,
5765 struct ctables_summary_spec_set *sss)
5767 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5769 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5770 && !(lex_token (lexer) == T_ID
5771 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5772 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5773 lex_tokss (lexer)))))
5775 /* Parse function. */
5776 enum ctables_summary_function function;
5778 enum ctables_area_type area;
5779 if (!parse_ctables_summary_function (lexer, &function, &weighted, &area))
5782 /* Parse percentile. */
5783 double percentile = 0;
5784 if (function == CTSF_PTILE)
5786 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5788 percentile = lex_number (lexer);
5793 struct fmt_spec format;
5794 bool is_ctables_format;
5795 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5798 if (sss->n >= sss->allocated)
5799 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5800 sizeof *sss->specs);
5801 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5802 .function = function,
5803 .weighted = weighted,
5806 .percentile = percentile,
5808 .is_ctables_format = is_ctables_format,
5814 ctables_summary_spec_set_uninit (sss);
5819 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5821 struct ctables_postcompute **pcs = NULL;
5823 size_t allocated_pcs = 0;
5825 while (lex_match (lexer, T_AND))
5827 if (!lex_force_id (lexer))
5829 struct ctables_postcompute *pc
5830 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5833 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5838 if (n_pcs >= allocated_pcs)
5839 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5843 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5845 if (lex_match_id (lexer, "LABEL"))
5847 lex_match (lexer, T_EQUALS);
5848 if (!lex_force_string (lexer))
5851 for (size_t i = 0; i < n_pcs; i++)
5853 free (pcs[i]->label);
5854 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5859 else if (lex_match_id (lexer, "FORMAT"))
5861 lex_match (lexer, T_EQUALS);
5863 struct ctables_summary_spec_set sss;
5864 if (!ctables_parse_pproperties_format (lexer, &sss))
5867 for (size_t i = 0; i < n_pcs; i++)
5870 ctables_summary_spec_set_uninit (pcs[i]->specs);
5872 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5873 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5875 ctables_summary_spec_set_uninit (&sss);
5877 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5879 lex_match (lexer, T_EQUALS);
5880 bool hide_source_cats;
5881 if (!parse_bool (lexer, &hide_source_cats))
5883 for (size_t i = 0; i < n_pcs; i++)
5884 pcs[i]->hide_source_cats = hide_source_cats;
5888 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5901 put_strftime (struct string *out, time_t now, const char *format)
5903 const struct tm *tm = localtime (&now);
5905 strftime (value, sizeof value, format, tm);
5906 ds_put_cstr (out, value);
5910 skip_prefix (struct substring *s, struct substring prefix)
5912 if (ss_starts_with (*s, prefix))
5914 ss_advance (s, prefix.length);
5922 put_table_expression (struct string *out, struct lexer *lexer,
5923 struct dictionary *dict, int expr_start, int expr_end)
5926 for (int ofs = expr_start; ofs < expr_end; ofs++)
5928 const struct token *t = lex_ofs_token (lexer, ofs);
5929 if (t->type == T_LBRACK)
5931 else if (t->type == T_RBRACK && nest > 0)
5937 else if (t->type == T_ID)
5939 const struct variable *var
5940 = dict_lookup_var (dict, t->string.string);
5941 const char *label = var ? var_get_label (var) : NULL;
5942 ds_put_cstr (out, label ? label : t->string.string);
5946 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5947 ds_put_byte (out, ' ');
5949 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5950 ds_put_cstr (out, repr);
5953 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5954 ds_put_byte (out, ' ');
5960 put_title_text (struct string *out, struct substring in, time_t now,
5961 struct lexer *lexer, struct dictionary *dict,
5962 int expr_start, int expr_end)
5966 size_t chunk = ss_find_byte (in, ')');
5967 ds_put_substring (out, ss_head (in, chunk));
5968 ss_advance (&in, chunk);
5969 if (ss_is_empty (in))
5972 if (skip_prefix (&in, ss_cstr (")DATE")))
5973 put_strftime (out, now, "%x");
5974 else if (skip_prefix (&in, ss_cstr (")TIME")))
5975 put_strftime (out, now, "%X");
5976 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5977 put_table_expression (out, lexer, dict, expr_start, expr_end);
5980 ds_put_byte (out, ')');
5981 ss_advance (&in, 1);
5987 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5989 struct casereader *input = NULL;
5991 struct measure_guesser *mg = measure_guesser_create (ds);
5994 input = proc_open (ds);
5995 measure_guesser_run (mg, input);
5996 measure_guesser_destroy (mg);
5999 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6000 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6001 enum settings_value_show tvars = settings_get_show_variables ();
6002 for (size_t i = 0; i < n_vars; i++)
6003 vlabels[i] = (enum ctables_vlabel) tvars;
6005 struct pivot_table_look *look = pivot_table_look_unshare (
6006 pivot_table_look_ref (pivot_table_look_get_default ()));
6007 look->omit_empty = false;
6009 struct ctables *ct = xmalloc (sizeof *ct);
6010 *ct = (struct ctables) {
6011 .dict = dataset_dict (ds),
6013 .ctables_formats = FMT_SETTINGS_INIT,
6015 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6018 time_t now = time (NULL);
6023 const char *dot_string;
6024 const char *comma_string;
6026 static const struct ctf ctfs[4] = {
6027 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6028 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6029 { CTEF_PAREN, "-,(,),", "-.(.)." },
6030 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6032 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6033 for (size_t i = 0; i < 4; i++)
6035 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6036 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6037 fmt_number_style_from_string (s));
6040 if (!lex_force_match (lexer, T_SLASH))
6043 while (!lex_match_id (lexer, "TABLE"))
6045 if (lex_match_id (lexer, "FORMAT"))
6047 double widths[2] = { SYSMIS, SYSMIS };
6048 double units_per_inch = 72.0;
6050 while (lex_token (lexer) != T_SLASH)
6052 if (lex_match_id (lexer, "MINCOLWIDTH"))
6054 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6057 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6059 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6062 else if (lex_match_id (lexer, "UNITS"))
6064 lex_match (lexer, T_EQUALS);
6065 if (lex_match_id (lexer, "POINTS"))
6066 units_per_inch = 72.0;
6067 else if (lex_match_id (lexer, "INCHES"))
6068 units_per_inch = 1.0;
6069 else if (lex_match_id (lexer, "CM"))
6070 units_per_inch = 2.54;
6073 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6077 else if (lex_match_id (lexer, "EMPTY"))
6082 lex_match (lexer, T_EQUALS);
6083 if (lex_match_id (lexer, "ZERO"))
6085 /* Nothing to do. */
6087 else if (lex_match_id (lexer, "BLANK"))
6088 ct->zero = xstrdup ("");
6089 else if (lex_force_string (lexer))
6091 ct->zero = ss_xstrdup (lex_tokss (lexer));
6097 else if (lex_match_id (lexer, "MISSING"))
6099 lex_match (lexer, T_EQUALS);
6100 if (!lex_force_string (lexer))
6104 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6105 ? ss_xstrdup (lex_tokss (lexer))
6111 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6112 "UNITS", "EMPTY", "MISSING");
6117 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6118 && widths[0] > widths[1])
6120 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6124 for (size_t i = 0; i < 2; i++)
6125 if (widths[i] != SYSMIS)
6127 int *wr = ct->look->width_ranges[TABLE_HORZ];
6128 wr[i] = widths[i] / units_per_inch * 96.0;
6133 else if (lex_match_id (lexer, "VLABELS"))
6135 if (!lex_force_match_id (lexer, "VARIABLES"))
6137 lex_match (lexer, T_EQUALS);
6139 struct variable **vars;
6141 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6145 if (!lex_force_match_id (lexer, "DISPLAY"))
6150 lex_match (lexer, T_EQUALS);
6152 enum ctables_vlabel vlabel;
6153 if (lex_match_id (lexer, "DEFAULT"))
6154 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6155 else if (lex_match_id (lexer, "NAME"))
6157 else if (lex_match_id (lexer, "LABEL"))
6158 vlabel = CTVL_LABEL;
6159 else if (lex_match_id (lexer, "BOTH"))
6161 else if (lex_match_id (lexer, "NONE"))
6165 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6171 for (size_t i = 0; i < n_vars; i++)
6172 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6175 else if (lex_match_id (lexer, "MRSETS"))
6177 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6179 lex_match (lexer, T_EQUALS);
6180 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6183 else if (lex_match_id (lexer, "SMISSING"))
6185 if (lex_match_id (lexer, "VARIABLE"))
6186 ct->smissing_listwise = false;
6187 else if (lex_match_id (lexer, "LISTWISE"))
6188 ct->smissing_listwise = true;
6191 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6195 else if (lex_match_id (lexer, "PCOMPUTE"))
6197 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6200 else if (lex_match_id (lexer, "PPROPERTIES"))
6202 if (!ctables_parse_pproperties (lexer, ct))
6205 else if (lex_match_id (lexer, "WEIGHT"))
6207 if (!lex_force_match_id (lexer, "VARIABLE"))
6209 lex_match (lexer, T_EQUALS);
6210 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6214 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6216 if (lex_match_id (lexer, "COUNT"))
6218 lex_match (lexer, T_EQUALS);
6219 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6222 ct->hide_threshold = lex_integer (lexer);
6225 else if (ct->hide_threshold == 0)
6226 ct->hide_threshold = 5;
6230 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6231 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6232 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6236 if (!lex_force_match (lexer, T_SLASH))
6240 size_t allocated_tables = 0;
6243 if (ct->n_tables >= allocated_tables)
6244 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6245 sizeof *ct->tables);
6247 struct ctables_category *cat = xmalloc (sizeof *cat);
6248 *cat = (struct ctables_category) {
6250 .include_missing = false,
6251 .sort_ascending = true,
6254 struct ctables_categories *c = xmalloc (sizeof *c);
6255 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6256 *c = (struct ctables_categories) {
6263 struct ctables_categories **categories = xnmalloc (n_vars,
6264 sizeof *categories);
6265 for (size_t i = 0; i < n_vars; i++)
6268 struct ctables_table *t = xmalloc (sizeof *t);
6269 *t = (struct ctables_table) {
6271 .slabels_axis = PIVOT_AXIS_COLUMN,
6272 .slabels_visible = true,
6273 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6275 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6276 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6277 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6279 .clabels_from_axis = PIVOT_AXIS_LAYER,
6280 .clabels_to_axis = PIVOT_AXIS_LAYER,
6281 .categories = categories,
6282 .n_categories = n_vars,
6285 ct->tables[ct->n_tables++] = t;
6287 lex_match (lexer, T_EQUALS);
6288 int expr_start = lex_ofs (lexer);
6289 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6291 if (lex_match (lexer, T_BY))
6293 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6294 ct, t, PIVOT_AXIS_COLUMN))
6297 if (lex_match (lexer, T_BY))
6299 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6300 ct, t, PIVOT_AXIS_LAYER))
6304 int expr_end = lex_ofs (lexer);
6306 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6307 && !t->axes[PIVOT_AXIS_LAYER])
6309 lex_error (lexer, _("At least one variable must be specified."));
6313 const struct ctables_axis *scales[PIVOT_N_AXES];
6314 size_t n_scales = 0;
6315 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6317 scales[a] = find_scale (t->axes[a]);
6323 msg (SE, _("Scale variables may appear only on one axis."));
6324 if (scales[PIVOT_AXIS_ROW])
6325 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6326 _("This scale variable appears on the rows axis."));
6327 if (scales[PIVOT_AXIS_COLUMN])
6328 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6329 _("This scale variable appears on the columns axis."));
6330 if (scales[PIVOT_AXIS_LAYER])
6331 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6332 _("This scale variable appears on the layer axis."));
6336 const struct ctables_axis *summaries[PIVOT_N_AXES];
6337 size_t n_summaries = 0;
6338 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6340 summaries[a] = (scales[a]
6342 : find_categorical_summary_spec (t->axes[a]));
6346 if (n_summaries > 1)
6348 msg (SE, _("Summaries may appear only on one axis."));
6349 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6352 msg_at (SN, summaries[a]->loc,
6354 ? _("This variable on the rows axis has a summary.")
6355 : a == PIVOT_AXIS_COLUMN
6356 ? _("This variable on the columns axis has a summary.")
6357 : _("This variable on the layers axis has a summary."));
6359 msg_at (SN, summaries[a]->loc,
6360 _("This is a scale variable, so it always has a "
6361 "summary even if the syntax does not explicitly "
6366 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6367 if (n_summaries ? summaries[a] : t->axes[a])
6369 t->summary_axis = a;
6373 if (lex_token (lexer) == T_ENDCMD)
6375 if (!ctables_prepare_table (t))
6379 if (!lex_force_match (lexer, T_SLASH))
6382 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6384 if (lex_match_id (lexer, "SLABELS"))
6386 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6388 if (lex_match_id (lexer, "POSITION"))
6390 lex_match (lexer, T_EQUALS);
6391 if (lex_match_id (lexer, "COLUMN"))
6392 t->slabels_axis = PIVOT_AXIS_COLUMN;
6393 else if (lex_match_id (lexer, "ROW"))
6394 t->slabels_axis = PIVOT_AXIS_ROW;
6395 else if (lex_match_id (lexer, "LAYER"))
6396 t->slabels_axis = PIVOT_AXIS_LAYER;
6399 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6403 else if (lex_match_id (lexer, "VISIBLE"))
6405 lex_match (lexer, T_EQUALS);
6406 if (!parse_bool (lexer, &t->slabels_visible))
6411 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6416 else if (lex_match_id (lexer, "CLABELS"))
6418 if (lex_match_id (lexer, "AUTO"))
6420 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6421 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6423 else if (lex_match_id (lexer, "ROWLABELS"))
6425 lex_match (lexer, T_EQUALS);
6426 if (lex_match_id (lexer, "OPPOSITE"))
6427 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6428 else if (lex_match_id (lexer, "LAYER"))
6429 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6432 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6436 else if (lex_match_id (lexer, "COLLABELS"))
6438 lex_match (lexer, T_EQUALS);
6439 if (lex_match_id (lexer, "OPPOSITE"))
6440 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6441 else if (lex_match_id (lexer, "LAYER"))
6442 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6445 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6451 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6456 else if (lex_match_id (lexer, "CRITERIA"))
6458 if (!lex_force_match_id (lexer, "CILEVEL"))
6460 lex_match (lexer, T_EQUALS);
6462 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6464 t->cilevel = lex_number (lexer);
6467 else if (lex_match_id (lexer, "CATEGORIES"))
6469 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6473 else if (lex_match_id (lexer, "TITLES"))
6478 if (lex_match_id (lexer, "CAPTION"))
6479 textp = &t->caption;
6480 else if (lex_match_id (lexer, "CORNER"))
6482 else if (lex_match_id (lexer, "TITLE"))
6486 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6489 lex_match (lexer, T_EQUALS);
6491 struct string s = DS_EMPTY_INITIALIZER;
6492 while (lex_is_string (lexer))
6494 if (!ds_is_empty (&s))
6495 ds_put_byte (&s, ' ');
6496 put_title_text (&s, lex_tokss (lexer), now,
6497 lexer, dataset_dict (ds),
6498 expr_start, expr_end);
6502 *textp = ds_steal_cstr (&s);
6504 while (lex_token (lexer) != T_SLASH
6505 && lex_token (lexer) != T_ENDCMD);
6507 else if (lex_match_id (lexer, "SIGTEST"))
6511 t->chisq = xmalloc (sizeof *t->chisq);
6512 *t->chisq = (struct ctables_chisq) {
6514 .include_mrsets = true,
6515 .all_visible = true,
6521 if (lex_match_id (lexer, "TYPE"))
6523 lex_match (lexer, T_EQUALS);
6524 if (!lex_force_match_id (lexer, "CHISQUARE"))
6527 else if (lex_match_id (lexer, "ALPHA"))
6529 lex_match (lexer, T_EQUALS);
6530 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6532 t->chisq->alpha = lex_number (lexer);
6535 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6537 lex_match (lexer, T_EQUALS);
6538 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6541 else if (lex_match_id (lexer, "CATEGORIES"))
6543 lex_match (lexer, T_EQUALS);
6544 if (lex_match_id (lexer, "ALLVISIBLE"))
6545 t->chisq->all_visible = true;
6546 else if (lex_match_id (lexer, "SUBTOTALS"))
6547 t->chisq->all_visible = false;
6550 lex_error_expecting (lexer,
6551 "ALLVISIBLE", "SUBTOTALS");
6557 lex_error_expecting (lexer, "TYPE", "ALPHA",
6558 "INCLUDEMRSETS", "CATEGORIES");
6562 while (lex_token (lexer) != T_SLASH
6563 && lex_token (lexer) != T_ENDCMD);
6565 else if (lex_match_id (lexer, "COMPARETEST"))
6569 t->pairwise = xmalloc (sizeof *t->pairwise);
6570 *t->pairwise = (struct ctables_pairwise) {
6572 .alpha = { .05, .05 },
6573 .adjust = BONFERRONI,
6574 .include_mrsets = true,
6575 .meansvariance_allcats = true,
6576 .all_visible = true,
6585 if (lex_match_id (lexer, "TYPE"))
6587 lex_match (lexer, T_EQUALS);
6588 if (lex_match_id (lexer, "PROP"))
6589 t->pairwise->type = PROP;
6590 else if (lex_match_id (lexer, "MEAN"))
6591 t->pairwise->type = MEAN;
6594 lex_error_expecting (lexer, "PROP", "MEAN");
6598 else if (lex_match_id (lexer, "ALPHA"))
6600 lex_match (lexer, T_EQUALS);
6602 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6604 double a0 = lex_number (lexer);
6607 lex_match (lexer, T_COMMA);
6608 if (lex_is_number (lexer))
6610 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6612 double a1 = lex_number (lexer);
6615 t->pairwise->alpha[0] = MIN (a0, a1);
6616 t->pairwise->alpha[1] = MAX (a0, a1);
6619 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6621 else if (lex_match_id (lexer, "ADJUST"))
6623 lex_match (lexer, T_EQUALS);
6624 if (lex_match_id (lexer, "BONFERRONI"))
6625 t->pairwise->adjust = BONFERRONI;
6626 else if (lex_match_id (lexer, "BH"))
6627 t->pairwise->adjust = BH;
6628 else if (lex_match_id (lexer, "NONE"))
6629 t->pairwise->adjust = 0;
6632 lex_error_expecting (lexer, "BONFERRONI", "BH",
6637 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6639 lex_match (lexer, T_EQUALS);
6640 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6643 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6645 lex_match (lexer, T_EQUALS);
6646 if (lex_match_id (lexer, "ALLCATS"))
6647 t->pairwise->meansvariance_allcats = true;
6648 else if (lex_match_id (lexer, "TESTEDCATS"))
6649 t->pairwise->meansvariance_allcats = false;
6652 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6656 else if (lex_match_id (lexer, "CATEGORIES"))
6658 lex_match (lexer, T_EQUALS);
6659 if (lex_match_id (lexer, "ALLVISIBLE"))
6660 t->pairwise->all_visible = true;
6661 else if (lex_match_id (lexer, "SUBTOTALS"))
6662 t->pairwise->all_visible = false;
6665 lex_error_expecting (lexer, "ALLVISIBLE",
6670 else if (lex_match_id (lexer, "MERGE"))
6672 lex_match (lexer, T_EQUALS);
6673 if (!parse_bool (lexer, &t->pairwise->merge))
6676 else if (lex_match_id (lexer, "STYLE"))
6678 lex_match (lexer, T_EQUALS);
6679 if (lex_match_id (lexer, "APA"))
6680 t->pairwise->apa_style = true;
6681 else if (lex_match_id (lexer, "SIMPLE"))
6682 t->pairwise->apa_style = false;
6685 lex_error_expecting (lexer, "APA", "SIMPLE");
6689 else if (lex_match_id (lexer, "SHOWSIG"))
6691 lex_match (lexer, T_EQUALS);
6692 if (!parse_bool (lexer, &t->pairwise->show_sig))
6697 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6698 "INCLUDEMRSETS", "MEANSVARIANCE",
6699 "CATEGORIES", "MERGE", "STYLE",
6704 while (lex_token (lexer) != T_SLASH
6705 && lex_token (lexer) != T_ENDCMD);
6709 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6710 "CRITERIA", "CATEGORIES", "TITLES",
6711 "SIGTEST", "COMPARETEST");
6715 if (!lex_match (lexer, T_SLASH))
6719 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6721 t->clabels_from_axis = PIVOT_AXIS_ROW;
6722 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6724 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6728 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6729 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6730 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6732 if (!ctables_prepare_table (t))
6735 while (lex_token (lexer) != T_ENDCMD);
6738 input = proc_open (ds);
6739 bool ok = ctables_execute (ds, input, ct);
6740 ok = proc_commit (ds) && ok;
6742 ctables_destroy (ct);
6743 return ok ? CMD_SUCCESS : CMD_FAILURE;
6748 ctables_destroy (ct);