1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_function_type
68 /* A function that operates on data in a single cell. The function does
69 not have an unweighted version. */
72 /* A function that operates on data in a single cell. The function has an
73 unweighted version. */
76 /* A function that operates on an area of cells. The function has an
77 unweighted version. */
88 enum ctables_function_availability
90 CTFA_ALL, /* Any variables. */
91 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
92 //CTFA_MRSETS, /* Only multiple-response sets */
95 enum ctables_summary_function
97 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
98 #include "ctables.inc"
103 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
105 #include "ctables.inc"
109 struct ctables_function_info
111 struct substring basename;
112 enum ctables_function_type type;
113 enum ctables_format format;
114 enum ctables_function_availability availability;
116 bool may_be_unweighted;
119 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
120 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
122 .basename = SS_LITERAL_INITIALIZER (NAME), \
125 .availability = AVAILABILITY, \
126 .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA, \
127 .is_area = (TYPE) == CTFT_AREA \
129 #include "ctables.inc"
133 static bool ctables_summary_function_is_count (enum ctables_summary_function);
135 enum ctables_area_type
137 /* Within a section, where stacked variables divide one section from
140 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
141 parse_ctables_summary_function() parses correctly. */
142 CTAT_TABLE, /* All layers of a whole section. */
143 CTAT_LAYERROW, /* Row in one layer within a section. */
144 CTAT_LAYERCOL, /* Column in one layer within a section. */
145 CTAT_LAYER, /* One layer within a section. */
147 /* Within a subtable, where a subtable pairs an innermost row variable with
148 an innermost column variable within a single layer. */
149 CTAT_SUBTABLE, /* Whole subtable. */
150 CTAT_ROW, /* Row within a subtable. */
151 CTAT_COL, /* Column within a subtable. */
155 static const char *ctables_area_type_name[N_CTATS] = {
156 [CTAT_TABLE] = "TABLE",
157 [CTAT_LAYER] = "LAYER",
158 [CTAT_LAYERROW] = "LAYERROW",
159 [CTAT_LAYERCOL] = "LAYERCOL",
160 [CTAT_SUBTABLE] = "SUBTABLE",
167 struct hmap_node node;
169 const struct ctables_cell *example;
172 double d_valid; /* Dictionary weight. */
175 double e_valid; /* Effective weight */
178 double u_valid; /* Unweighted. */
181 struct ctables_sum *sums;
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The areas that contain this cell. */
205 struct ctables_area *areas[N_CTATS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
231 const struct dictionary *dict;
232 struct pivot_table_look *look;
234 /* CTABLES has a number of extra formats that we implement via custom
235 currency specifications on an alternate fmt_settings. */
236 #define CTEF_NEGPAREN FMT_CCA
237 #define CTEF_NEQUAL FMT_CCB
238 #define CTEF_PAREN FMT_CCC
239 #define CTEF_PCTPAREN FMT_CCD
240 struct fmt_settings ctables_formats;
242 /* If this is NULL, zeros are displayed using the normal print format.
243 Otherwise, this string is displayed. */
246 /* If this is NULL, missing values are displayed using the normal print
247 format. Otherwise, this string is displayed. */
250 /* Indexed by variable dictionary index. */
251 enum ctables_vlabel *vlabels;
253 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
255 bool mrsets_count_duplicates; /* MRSETS. */
256 bool smissing_listwise; /* SMISSING. */
257 struct variable *e_weight; /* WEIGHT. */
258 int hide_threshold; /* HIDESMALLCOUNTS. */
260 struct ctables_table **tables;
264 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
267 struct ctables_postcompute
269 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
270 char *name; /* Name, without leading &. */
272 struct msg_location *location; /* Location of definition. */
273 struct ctables_pcexpr *expr;
275 struct ctables_summary_spec_set *specs;
276 bool hide_source_cats;
279 struct ctables_pcexpr
289 enum ctables_postcompute_op
292 CTPO_CONSTANT, /* 5 */
293 CTPO_CAT_NUMBER, /* [5] */
294 CTPO_CAT_STRING, /* ["STRING"] */
295 CTPO_CAT_NRANGE, /* [LO THRU 5] */
296 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
297 CTPO_CAT_MISSING, /* MISSING */
298 CTPO_CAT_OTHERNM, /* OTHERNM */
299 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
300 CTPO_CAT_TOTAL, /* TOTAL */
314 /* CTPO_CAT_NUMBER. */
317 /* CTPO_CAT_STRING, in dictionary encoding. */
318 struct substring string;
320 /* CTPO_CAT_NRANGE. */
323 /* CTPO_CAT_SRANGE. */
324 struct substring srange[2];
326 /* CTPO_CAT_SUBTOTAL. */
327 size_t subtotal_index;
329 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
330 One element: CTPO_NEG. */
331 struct ctables_pcexpr *subs[2];
334 /* Source location. */
335 struct msg_location *location;
338 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
339 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
340 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
341 struct ctables_pcexpr *sub1);
343 struct ctables_summary_spec_set
345 struct ctables_summary_spec *specs;
349 /* The variable to which the summary specs are applied. */
350 struct variable *var;
352 /* Whether the variable to which the summary specs are applied is a scale
353 variable for the purpose of summarization.
355 (VALIDN and TOTALN act differently for summarizing scale and categorical
359 /* If any of these optional additional scale variables are missing, then
360 treat 'var' as if it's missing too. This is for implementing
361 SMISSING=LISTWISE. */
362 struct variable **listwise_vars;
363 size_t n_listwise_vars;
366 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
367 const struct ctables_summary_spec_set *);
368 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
370 /* A nested sequence of variables, e.g. a > b > c. */
373 struct variable **vars;
377 size_t *areas[N_CTATS];
378 size_t n_areas[N_CTATS];
381 struct ctables_summary_spec_set specs[N_CSVS];
384 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
387 struct ctables_nest *nests;
391 static void ctables_stack_uninit (struct ctables_stack *);
395 struct hmap_node node;
400 struct ctables_occurrence
402 struct hmap_node node;
406 struct ctables_section
409 struct ctables_table *table;
410 struct ctables_nest *nests[PIVOT_N_AXES];
413 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
414 struct hmap cells; /* Contains "struct ctables_cell"s. */
415 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
418 static void ctables_section_uninit (struct ctables_section *);
422 struct ctables *ctables;
423 struct ctables_axis *axes[PIVOT_N_AXES];
424 struct ctables_stack stacks[PIVOT_N_AXES];
425 struct ctables_section *sections;
427 enum pivot_axis_type summary_axis;
428 struct ctables_summary_spec_set summary_specs;
429 struct variable **sum_vars;
432 enum pivot_axis_type slabels_axis;
433 bool slabels_visible;
435 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
437 Most commonly, label_axis[a] == a, and in particular we always have
438 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
440 If ROWLABELS or COLLABELS is specified, then one of
441 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
442 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
444 If any category labels are moved, then 'clabels_example' is one of the
445 variables being moved (and it is otherwise NULL). All of the variables
446 being moved have the same width, value labels, and categories, so this
447 example variable can be used to find those out.
449 The remaining members in this group are relevant only if category labels
452 'clabels_values_map' holds a "struct ctables_value" for all the values
453 that appear in all of the variables in the moved categories. It is
454 accumulated as the data is read. Once the data is fully read, its
455 sorted values are put into 'clabels_values' and 'n_clabels_values'.
457 enum pivot_axis_type label_axis[PIVOT_N_AXES];
458 enum pivot_axis_type clabels_from_axis;
459 enum pivot_axis_type clabels_to_axis;
460 const struct variable *clabels_example;
461 struct hmap clabels_values_map;
462 struct ctables_value **clabels_values;
463 size_t n_clabels_values;
465 /* Indexed by variable dictionary index. */
466 struct ctables_categories **categories;
475 struct ctables_chisq *chisq;
476 struct ctables_pairwise *pairwise;
479 struct ctables_categories
482 struct ctables_category *cats;
487 struct ctables_category
489 enum ctables_category_type
491 /* Explicit category lists. */
494 CCT_NRANGE, /* Numerical range. */
495 CCT_SRANGE, /* String range. */
500 /* Totals and subtotals. */
504 /* Implicit category lists. */
509 /* For contributing to TOTALN. */
510 CCT_EXCLUDED_MISSING,
514 struct ctables_category *subtotal;
520 double number; /* CCT_NUMBER. */
521 struct substring string; /* CCT_STRING, in dictionary encoding. */
522 double nrange[2]; /* CCT_NRANGE. */
523 struct substring srange[2]; /* CCT_SRANGE. */
527 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
528 bool hide_subcategories; /* CCT_SUBTOTAL. */
531 /* CCT_POSTCOMPUTE. */
534 const struct ctables_postcompute *pc;
535 enum fmt_type parse_format;
538 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
541 bool include_missing;
545 enum ctables_summary_function sort_function;
547 enum ctables_area_type area;
548 struct variable *sort_var;
553 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
554 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
555 struct msg_location *location;
559 ctables_category_uninit (struct ctables_category *cat)
564 msg_location_destroy (cat->location);
571 case CCT_POSTCOMPUTE:
575 ss_dealloc (&cat->string);
579 ss_dealloc (&cat->srange[0]);
580 ss_dealloc (&cat->srange[1]);
585 free (cat->total_label);
593 case CCT_EXCLUDED_MISSING:
599 nullable_substring_equal (const struct substring *a,
600 const struct substring *b)
602 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
606 ctables_category_equal (const struct ctables_category *a,
607 const struct ctables_category *b)
609 if (a->type != b->type)
615 return a->number == b->number;
618 return ss_equals (a->string, b->string);
621 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
624 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
625 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
631 case CCT_POSTCOMPUTE:
632 return a->pc == b->pc;
636 return !strcmp (a->total_label, b->total_label);
641 return (a->include_missing == b->include_missing
642 && a->sort_ascending == b->sort_ascending
643 && a->sort_function == b->sort_function
644 && a->sort_var == b->sort_var
645 && a->percentile == b->percentile);
647 case CCT_EXCLUDED_MISSING:
655 ctables_categories_unref (struct ctables_categories *c)
660 assert (c->n_refs > 0);
664 for (size_t i = 0; i < c->n_cats; i++)
665 ctables_category_uninit (&c->cats[i]);
671 ctables_categories_equal (const struct ctables_categories *a,
672 const struct ctables_categories *b)
674 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
677 for (size_t i = 0; i < a->n_cats; i++)
678 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
684 /* Chi-square test (SIGTEST). */
692 /* Pairwise comparison test (COMPARETEST). */
693 struct ctables_pairwise
695 enum { PROP, MEAN } type;
698 bool meansvariance_allcats;
700 enum { BONFERRONI = 1, BH } adjust;
724 struct variable *var;
726 struct ctables_summary_spec_set specs[N_CSVS];
730 struct ctables_axis *subs[2];
733 struct msg_location *loc;
736 static void ctables_axis_destroy (struct ctables_axis *);
738 struct ctables_summary_spec
740 /* The calculation to be performed.
742 'function' is the function to calculate. 'weighted' specifies whether
743 to use weighted or unweighted data (for functions that do not support a
744 choice, it must be true). 'calc_area' is the area over which the
745 calculation takes place (for functions that target only an individual
746 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
747 percentile between 0 and 100 (for other functions it must be 0). */
748 enum ctables_summary_function function;
750 enum ctables_area_type calc_area;
751 double percentile; /* CTSF_PTILE only. */
753 /* How to display the result of the calculation.
755 'label' is a user-specified label, NULL if the user didn't specify
758 'user_area' is usually the same as 'calc_area', but when category labels
759 are rotated from one axis to another it swaps rows and columns.
761 'format' is the format for displaying the output. If
762 'is_ctables_format' is true, then 'format.type' is one of the special
763 CTEF_* formats instead of the standard ones. */
765 enum ctables_area_type user_area;
766 struct fmt_spec format;
767 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
774 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
775 const struct ctables_summary_spec *src)
778 dst->label = xstrdup_if_nonnull (src->label);
782 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
789 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
790 const struct ctables_summary_spec_set *src)
792 struct ctables_summary_spec *specs
793 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
794 for (size_t i = 0; i < src->n; i++)
795 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
797 *dst = (struct ctables_summary_spec_set) {
802 .is_scale = src->is_scale,
807 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
809 for (size_t i = 0; i < set->n; i++)
810 ctables_summary_spec_uninit (&set->specs[i]);
811 free (set->listwise_vars);
816 parse_col_width (struct lexer *lexer, const char *name, double *width)
818 lex_match (lexer, T_EQUALS);
819 if (lex_match_id (lexer, "DEFAULT"))
821 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
823 *width = lex_number (lexer);
833 parse_bool (struct lexer *lexer, bool *b)
835 if (lex_match_id (lexer, "NO"))
837 else if (lex_match_id (lexer, "YES"))
841 lex_error_expecting (lexer, "YES", "NO");
847 static enum ctables_function_availability
848 ctables_function_availability (enum ctables_summary_function f)
850 static enum ctables_function_availability availability[] = {
851 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
852 #include "ctables.inc"
856 return availability[f];
860 ctables_summary_function_is_count (enum ctables_summary_function f)
862 return f == CTSF_COUNT || f == CTSF_ECOUNT;
866 parse_ctables_summary_function (struct lexer *lexer,
867 enum ctables_summary_function *function,
869 enum ctables_area_type *area)
871 if (!lex_force_id (lexer))
874 struct substring name = lex_tokss (lexer);
875 *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'));
877 bool has_area = false;
879 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
880 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
885 if (ss_equals_case (name, ss_cstr ("PCT")))
887 /* Special case where .COUNT suffix is omitted. */
888 *function = CTSF_areaPCT_COUNT;
895 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
897 const struct ctables_function_info *cfi = &ctables_function_info[f];
898 if (ss_equals_case (cfi->basename, name))
901 if (!*weighted && !cfi->may_be_unweighted)
903 if (has_area != cfi->is_area)
911 lex_error (lexer, _("Expecting summary function name."));
916 ctables_axis_destroy (struct ctables_axis *axis)
924 for (size_t i = 0; i < N_CSVS; i++)
925 ctables_summary_spec_set_uninit (&axis->specs[i]);
930 ctables_axis_destroy (axis->subs[0]);
931 ctables_axis_destroy (axis->subs[1]);
934 msg_location_destroy (axis->loc);
938 static struct ctables_axis *
939 ctables_axis_new_nonterminal (enum ctables_axis_op op,
940 struct ctables_axis *sub0,
941 struct ctables_axis *sub1,
942 struct lexer *lexer, int start_ofs)
944 struct ctables_axis *axis = xmalloc (sizeof *axis);
945 *axis = (struct ctables_axis) {
947 .subs = { sub0, sub1 },
948 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
953 struct ctables_axis_parse_ctx
956 struct dictionary *dict;
958 struct ctables_table *t;
961 static struct fmt_spec
962 ctables_summary_default_format (enum ctables_summary_function function,
963 const struct variable *var)
965 static const enum ctables_format default_formats[] = {
966 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
967 #include "ctables.inc"
970 switch (default_formats[function])
973 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
976 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
979 return *var_get_print_format (var);
987 ctables_summary_label__ (const struct ctables_summary_spec *spec)
989 bool w = spec->weighted;
990 enum ctables_area_type a = spec->user_area;
991 switch (spec->function)
994 return w ? N_("Count") : N_("Unweighted Count");
997 return N_("Adjusted Count");
999 case CTSF_areaPCT_COUNT:
1002 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1003 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1004 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1005 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1006 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1007 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1008 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1012 case CTSF_areaPCT_VALIDN:
1015 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1016 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1017 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1018 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1019 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1020 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1021 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1025 case CTSF_areaPCT_TOTALN:
1028 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1029 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1030 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1031 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1032 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1033 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1034 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1038 case CTSF_MAXIMUM: return N_("Maximum");
1039 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1040 case CTSF_MEDIAN: return N_("Median");
1041 case CTSF_MINIMUM: return N_("Minimum");
1042 case CTSF_MISSING: return N_("Missing");
1043 case CTSF_MODE: return N_("Mode");
1044 case CTSF_PTILE: NOT_REACHED ();
1045 case CTSF_RANGE: return N_("Range");
1046 case CTSF_SEMEAN: return N_("Std Error of Mean");
1047 case CTSF_STDDEV: return N_("Std Deviation");
1048 case CTSF_SUM: return N_("Sum");
1049 case CTSF_TOTALN: return N_("Total N");
1050 case CTSF_ETOTALN: return N_("Adjusted Total N");
1051 case CTSF_VALIDN: return N_("Valid N");
1052 case CTSF_EVALIDN: return N_("Adjusted Valid N");
1053 case CTSF_VARIANCE: return N_("Variance");
1054 case CTSF_areaPCT_SUM:
1057 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1058 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1059 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1060 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1061 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1062 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1063 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1070 /* Don't bother translating these: they are for developers only. */
1071 case CTAT_TABLE: return "Table ID";
1072 case CTAT_LAYER: return "Layer ID";
1073 case CTAT_LAYERROW: return "Layer Row ID";
1074 case CTAT_LAYERCOL: return "Layer Column ID";
1075 case CTAT_SUBTABLE: return "Subtable ID";
1076 case CTAT_ROW: return "Row ID";
1077 case CTAT_COL: return "Column ID";
1085 static struct pivot_value *
1086 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1090 if (spec->function == CTSF_PTILE)
1092 double p = spec->percentile;
1093 char *s = (spec->weighted
1094 ? xasprintf (_("Percentile %.2f"), p)
1095 : xasprintf (_("Unweighted Percentile %.2f"), p));
1096 return pivot_value_new_user_text_nocopy (s);
1099 return pivot_value_new_text (ctables_summary_label__ (spec));
1103 struct substring in = ss_cstr (spec->label);
1104 struct substring target = ss_cstr (")CILEVEL");
1106 struct string out = DS_EMPTY_INITIALIZER;
1109 size_t chunk = ss_find_substring (in, target);
1110 ds_put_substring (&out, ss_head (in, chunk));
1111 ss_advance (&in, chunk);
1113 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1115 ss_advance (&in, target.length);
1116 ds_put_format (&out, "%g", cilevel);
1122 ctables_summary_function_name (enum ctables_summary_function function,
1124 enum ctables_area_type area,
1125 char *buffer, size_t bufsize)
1127 const struct ctables_function_info *cfi = &ctables_function_info[function];
1128 snprintf (buffer, bufsize, "%s%s%s",
1129 weighted ? "" : "U",
1130 cfi->is_area ? ctables_area_type_name[area] : "",
1131 cfi->basename.string);
1136 add_summary_spec (struct ctables_axis *axis,
1137 enum ctables_summary_function function, bool weighted,
1138 enum ctables_area_type area, double percentile,
1139 const char *label, const struct fmt_spec *format,
1140 bool is_ctables_format, const struct msg_location *loc,
1141 enum ctables_summary_variant sv)
1143 if (axis->op == CTAO_VAR)
1145 char function_name[128];
1146 ctables_summary_function_name (function, weighted, area,
1147 function_name, sizeof function_name);
1148 const char *var_name = var_get_name (axis->var);
1149 switch (ctables_function_availability (function))
1153 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1154 "response sets."), function_name);
1155 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1161 if (!axis->scale && sv != CSV_TOTAL)
1164 _("Summary function %s applies only to scale variables."),
1166 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1176 struct ctables_summary_spec_set *set = &axis->specs[sv];
1177 if (set->n >= set->allocated)
1178 set->specs = x2nrealloc (set->specs, &set->allocated,
1179 sizeof *set->specs);
1181 struct ctables_summary_spec *dst = &set->specs[set->n++];
1182 *dst = (struct ctables_summary_spec) {
1183 .function = function,
1184 .weighted = weighted,
1187 .percentile = percentile,
1188 .label = xstrdup_if_nonnull (label),
1189 .format = (format ? *format
1190 : ctables_summary_default_format (function, axis->var)),
1191 .is_ctables_format = is_ctables_format,
1197 for (size_t i = 0; i < 2; i++)
1198 if (!add_summary_spec (axis->subs[i], function, weighted, area,
1199 percentile, label, format, is_ctables_format,
1206 static struct ctables_axis *ctables_axis_parse_stack (
1207 struct ctables_axis_parse_ctx *);
1210 static struct ctables_axis *
1211 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1213 if (lex_match (ctx->lexer, T_LPAREN))
1215 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1216 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1218 ctables_axis_destroy (sub);
1224 if (!lex_force_id (ctx->lexer))
1227 int start_ofs = lex_ofs (ctx->lexer);
1228 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1232 struct ctables_axis *axis = xmalloc (sizeof *axis);
1233 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1235 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1236 : lex_match_phrase (ctx->lexer, "[C]") ? false
1237 : var_get_measure (var) == MEASURE_SCALE);
1238 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1239 lex_ofs (ctx->lexer) - 1);
1240 if (axis->scale && var_is_alpha (var))
1242 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1244 var_get_name (var));
1245 ctables_axis_destroy (axis);
1253 has_digit (const char *s)
1255 return s[strcspn (s, "0123456789")] != '\0';
1259 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1260 bool *is_ctables_format)
1262 char type[FMT_TYPE_LEN_MAX + 1];
1263 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1266 if (!strcasecmp (type, "NEGPAREN"))
1267 format->type = CTEF_NEGPAREN;
1268 else if (!strcasecmp (type, "NEQUAL"))
1269 format->type = CTEF_NEQUAL;
1270 else if (!strcasecmp (type, "PAREN"))
1271 format->type = CTEF_PAREN;
1272 else if (!strcasecmp (type, "PCTPAREN"))
1273 format->type = CTEF_PCTPAREN;
1276 *is_ctables_format = false;
1277 return (parse_format_specifier (lexer, format)
1278 && fmt_check_output (format)
1279 && fmt_check_type_compat (format, VAL_NUMERIC));
1285 lex_next_error (lexer, -1, -1,
1286 _("Output format %s requires width 2 or greater."), type);
1289 else if (format->d > format->w - 1)
1291 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1292 "greater than decimals."), type);
1297 *is_ctables_format = true;
1302 static struct ctables_axis *
1303 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1305 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1306 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1309 enum ctables_summary_variant sv = CSV_CELL;
1312 int start_ofs = lex_ofs (ctx->lexer);
1314 /* Parse function. */
1315 enum ctables_summary_function function;
1317 enum ctables_area_type area;
1318 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted,
1322 /* Parse percentile. */
1323 double percentile = 0;
1324 if (function == CTSF_PTILE)
1326 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1328 percentile = lex_number (ctx->lexer);
1329 lex_get (ctx->lexer);
1334 if (lex_is_string (ctx->lexer))
1336 label = ss_xstrdup (lex_tokss (ctx->lexer));
1337 lex_get (ctx->lexer);
1341 struct fmt_spec format;
1342 const struct fmt_spec *formatp;
1343 bool is_ctables_format = false;
1344 if (lex_token (ctx->lexer) == T_ID
1345 && has_digit (lex_tokcstr (ctx->lexer)))
1347 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1348 &is_ctables_format))
1358 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1359 lex_ofs (ctx->lexer) - 1);
1360 add_summary_spec (sub, function, weighted, area, percentile, label,
1361 formatp, is_ctables_format, loc, sv);
1363 msg_location_destroy (loc);
1365 lex_match (ctx->lexer, T_COMMA);
1366 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1368 if (!lex_force_match (ctx->lexer, T_LBRACK))
1372 else if (lex_match (ctx->lexer, T_RBRACK))
1374 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1381 ctables_axis_destroy (sub);
1385 static const struct ctables_axis *
1386 find_scale (const struct ctables_axis *axis)
1390 else if (axis->op == CTAO_VAR)
1391 return axis->scale ? axis : NULL;
1394 for (size_t i = 0; i < 2; i++)
1396 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1404 static const struct ctables_axis *
1405 find_categorical_summary_spec (const struct ctables_axis *axis)
1409 else if (axis->op == CTAO_VAR)
1410 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1413 for (size_t i = 0; i < 2; i++)
1415 const struct ctables_axis *sum
1416 = find_categorical_summary_spec (axis->subs[i]);
1424 static struct ctables_axis *
1425 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1427 int start_ofs = lex_ofs (ctx->lexer);
1428 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1432 while (lex_match (ctx->lexer, T_GT))
1434 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1437 ctables_axis_destroy (lhs);
1441 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1442 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1444 const struct ctables_axis *outer_scale = find_scale (lhs);
1445 const struct ctables_axis *inner_scale = find_scale (rhs);
1446 if (outer_scale && inner_scale)
1448 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1449 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1450 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1451 ctables_axis_destroy (nest);
1455 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1458 msg_at (SE, nest->loc,
1459 _("Summaries may only be requested for categorical variables "
1460 "at the innermost nesting level."));
1461 msg_at (SN, outer_sum->loc,
1462 _("This outer categorical variable has a summary."));
1463 ctables_axis_destroy (nest);
1473 static struct ctables_axis *
1474 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1476 int start_ofs = lex_ofs (ctx->lexer);
1477 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1481 while (lex_match (ctx->lexer, T_PLUS))
1483 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1486 ctables_axis_destroy (lhs);
1490 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1491 ctx->lexer, start_ofs);
1498 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1499 struct ctables *ct, struct ctables_table *t,
1500 enum pivot_axis_type a)
1502 if (lex_token (lexer) == T_BY
1503 || lex_token (lexer) == T_SLASH
1504 || lex_token (lexer) == T_ENDCMD)
1507 struct ctables_axis_parse_ctx ctx = {
1513 t->axes[a] = ctables_axis_parse_stack (&ctx);
1514 return t->axes[a] != NULL;
1518 ctables_chisq_destroy (struct ctables_chisq *chisq)
1524 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1530 ctables_table_destroy (struct ctables_table *t)
1535 for (size_t i = 0; i < t->n_sections; i++)
1536 ctables_section_uninit (&t->sections[i]);
1539 for (size_t i = 0; i < t->n_categories; i++)
1540 ctables_categories_unref (t->categories[i]);
1541 free (t->categories);
1543 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1545 ctables_axis_destroy (t->axes[a]);
1546 ctables_stack_uninit (&t->stacks[a]);
1548 free (t->summary_specs.specs);
1550 struct ctables_value *ctv, *next_ctv;
1551 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1552 &t->clabels_values_map)
1554 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1555 hmap_delete (&t->clabels_values_map, &ctv->node);
1558 hmap_destroy (&t->clabels_values_map);
1559 free (t->clabels_values);
1565 ctables_chisq_destroy (t->chisq);
1566 ctables_pairwise_destroy (t->pairwise);
1571 ctables_destroy (struct ctables *ct)
1576 struct ctables_postcompute *pc, *next_pc;
1577 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1581 msg_location_destroy (pc->location);
1582 ctables_pcexpr_destroy (pc->expr);
1586 ctables_summary_spec_set_uninit (pc->specs);
1589 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1592 hmap_destroy (&ct->postcomputes);
1594 fmt_settings_uninit (&ct->ctables_formats);
1595 pivot_table_look_unref (ct->look);
1599 for (size_t i = 0; i < ct->n_tables; i++)
1600 ctables_table_destroy (ct->tables[i]);
1605 static struct ctables_category
1606 cct_nrange (double low, double high)
1608 return (struct ctables_category) {
1610 .nrange = { low, high }
1614 static struct ctables_category
1615 cct_srange (struct substring low, struct substring high)
1617 return (struct ctables_category) {
1619 .srange = { low, high }
1624 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1625 struct ctables_category *cat)
1628 if (lex_match (lexer, T_EQUALS))
1630 if (!lex_force_string (lexer))
1633 total_label = ss_xstrdup (lex_tokss (lexer));
1637 total_label = xstrdup (_("Subtotal"));
1639 *cat = (struct ctables_category) {
1640 .type = CCT_SUBTOTAL,
1641 .hide_subcategories = hide_subcategories,
1642 .total_label = total_label
1647 static struct substring
1648 parse_substring (struct lexer *lexer, struct dictionary *dict)
1650 struct substring s = recode_substring_pool (
1651 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1652 ss_rtrim (&s, ss_cstr (" "));
1658 ctables_table_parse_explicit_category (struct lexer *lexer,
1659 struct dictionary *dict,
1661 struct ctables_category *cat)
1663 if (lex_match_id (lexer, "OTHERNM"))
1664 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1665 else if (lex_match_id (lexer, "MISSING"))
1666 *cat = (struct ctables_category) { .type = CCT_MISSING };
1667 else if (lex_match_id (lexer, "SUBTOTAL"))
1668 return ctables_table_parse_subtotal (lexer, false, cat);
1669 else if (lex_match_id (lexer, "HSUBTOTAL"))
1670 return ctables_table_parse_subtotal (lexer, true, cat);
1671 else if (lex_match_id (lexer, "LO"))
1673 if (!lex_force_match_id (lexer, "THRU"))
1675 if (lex_is_string (lexer))
1677 struct substring sr0 = { .string = NULL };
1678 struct substring sr1 = parse_substring (lexer, dict);
1679 *cat = cct_srange (sr0, sr1);
1681 else if (lex_force_num (lexer))
1683 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1689 else if (lex_is_number (lexer))
1691 double number = lex_number (lexer);
1693 if (lex_match_id (lexer, "THRU"))
1695 if (lex_match_id (lexer, "HI"))
1696 *cat = cct_nrange (number, DBL_MAX);
1699 if (!lex_force_num (lexer))
1701 *cat = cct_nrange (number, lex_number (lexer));
1706 *cat = (struct ctables_category) {
1711 else if (lex_is_string (lexer))
1713 struct substring s = parse_substring (lexer, dict);
1714 if (lex_match_id (lexer, "THRU"))
1716 if (lex_match_id (lexer, "HI"))
1718 struct substring sr1 = { .string = NULL };
1719 *cat = cct_srange (s, sr1);
1723 if (!lex_force_string (lexer))
1728 struct substring sr1 = parse_substring (lexer, dict);
1729 *cat = cct_srange (s, sr1);
1733 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1735 else if (lex_match (lexer, T_AND))
1737 if (!lex_force_id (lexer))
1739 struct ctables_postcompute *pc = ctables_find_postcompute (
1740 ct, lex_tokcstr (lexer));
1743 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1744 msg_at (SE, loc, _("Unknown postcompute &%s."),
1745 lex_tokcstr (lexer));
1746 msg_location_destroy (loc);
1751 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1755 lex_error (lexer, NULL);
1763 parse_category_string (struct msg_location *location,
1764 struct substring s, const struct dictionary *dict,
1765 enum fmt_type format, double *n)
1768 char *error = data_in (s, dict_get_encoding (dict), format,
1769 settings_get_fmt_settings (), &v, 0, NULL);
1772 msg_at (SE, location,
1773 _("Failed to parse category specification as format %s: %s."),
1774 fmt_name (format), error);
1783 static struct ctables_category *
1784 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1785 const struct ctables_pcexpr *e)
1787 struct ctables_category *best = NULL;
1788 size_t n_subtotals = 0;
1789 for (size_t i = 0; i < cats->n_cats; i++)
1791 struct ctables_category *cat = &cats->cats[i];
1794 case CTPO_CAT_NUMBER:
1795 if (cat->type == CCT_NUMBER && cat->number == e->number)
1799 case CTPO_CAT_STRING:
1800 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1804 case CTPO_CAT_NRANGE:
1805 if (cat->type == CCT_NRANGE
1806 && cat->nrange[0] == e->nrange[0]
1807 && cat->nrange[1] == e->nrange[1])
1811 case CTPO_CAT_SRANGE:
1812 if (cat->type == CCT_SRANGE
1813 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1814 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1818 case CTPO_CAT_MISSING:
1819 if (cat->type == CCT_MISSING)
1823 case CTPO_CAT_OTHERNM:
1824 if (cat->type == CCT_OTHERNM)
1828 case CTPO_CAT_SUBTOTAL:
1829 if (cat->type == CCT_SUBTOTAL)
1832 if (e->subtotal_index == n_subtotals)
1834 else if (e->subtotal_index == 0)
1839 case CTPO_CAT_TOTAL:
1840 if (cat->type == CCT_TOTAL)
1854 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1859 static struct ctables_category *
1860 ctables_find_category_for_postcompute (const struct dictionary *dict,
1861 const struct ctables_categories *cats,
1862 enum fmt_type parse_format,
1863 const struct ctables_pcexpr *e)
1865 if (parse_format != FMT_F)
1867 if (e->op == CTPO_CAT_STRING)
1870 if (!parse_category_string (e->location, e->string, dict,
1871 parse_format, &number))
1874 struct ctables_pcexpr e2 = {
1875 .op = CTPO_CAT_NUMBER,
1877 .location = e->location,
1879 return ctables_find_category_for_postcompute__ (cats, &e2);
1881 else if (e->op == CTPO_CAT_SRANGE)
1884 if (!e->srange[0].string)
1885 nrange[0] = -DBL_MAX;
1886 else if (!parse_category_string (e->location, e->srange[0], dict,
1887 parse_format, &nrange[0]))
1890 if (!e->srange[1].string)
1891 nrange[1] = DBL_MAX;
1892 else if (!parse_category_string (e->location, e->srange[1], dict,
1893 parse_format, &nrange[1]))
1896 struct ctables_pcexpr e2 = {
1897 .op = CTPO_CAT_NRANGE,
1898 .nrange = { nrange[0], nrange[1] },
1899 .location = e->location,
1901 return ctables_find_category_for_postcompute__ (cats, &e2);
1904 return ctables_find_category_for_postcompute__ (cats, e);
1908 ctables_recursive_check_postcompute (struct dictionary *dict,
1909 const struct ctables_pcexpr *e,
1910 struct ctables_category *pc_cat,
1911 const struct ctables_categories *cats,
1912 const struct msg_location *cats_location)
1916 case CTPO_CAT_NUMBER:
1917 case CTPO_CAT_STRING:
1918 case CTPO_CAT_NRANGE:
1919 case CTPO_CAT_SRANGE:
1920 case CTPO_CAT_MISSING:
1921 case CTPO_CAT_OTHERNM:
1922 case CTPO_CAT_SUBTOTAL:
1923 case CTPO_CAT_TOTAL:
1925 struct ctables_category *cat = ctables_find_category_for_postcompute (
1926 dict, cats, pc_cat->parse_format, e);
1929 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1931 size_t n_subtotals = 0;
1932 for (size_t i = 0; i < cats->n_cats; i++)
1933 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1934 if (n_subtotals > 1)
1936 msg_at (SE, cats_location,
1937 ngettext ("These categories include %zu instance "
1938 "of SUBTOTAL or HSUBTOTAL, so references "
1939 "from computed categories must refer to "
1940 "subtotals by position, "
1941 "e.g. SUBTOTAL[1].",
1942 "These categories include %zu instances "
1943 "of SUBTOTAL or HSUBTOTAL, so references "
1944 "from computed categories must refer to "
1945 "subtotals by position, "
1946 "e.g. SUBTOTAL[1].",
1949 msg_at (SN, e->location,
1950 _("This is the reference that lacks a position."));
1955 msg_at (SE, pc_cat->location,
1956 _("Computed category &%s references a category not included "
1957 "in the category list."),
1959 msg_at (SN, e->location, _("This is the missing category."));
1960 if (e->op == CTPO_CAT_SUBTOTAL)
1961 msg_at (SN, cats_location,
1962 _("To fix the problem, add subtotals to the "
1963 "list of categories here."));
1964 else if (e->op == CTPO_CAT_TOTAL)
1965 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1966 "CATEGORIES specification."));
1968 msg_at (SN, cats_location,
1969 _("To fix the problem, add the missing category to the "
1970 "list of categories here."));
1973 if (pc_cat->pc->hide_source_cats)
1987 for (size_t i = 0; i < 2; i++)
1988 if (e->subs[i] && !ctables_recursive_check_postcompute (
1989 dict, e->subs[i], pc_cat, cats, cats_location))
1998 all_strings (struct variable **vars, size_t n_vars,
1999 const struct ctables_category *cat)
2001 for (size_t j = 0; j < n_vars; j++)
2002 if (var_is_numeric (vars[j]))
2004 msg_at (SE, cat->location,
2005 _("This category specification may be applied only to string "
2006 "variables, but this subcommand tries to apply it to "
2007 "numeric variable %s."),
2008 var_get_name (vars[j]));
2015 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2016 struct ctables *ct, struct ctables_table *t)
2018 if (!lex_match_id (lexer, "VARIABLES"))
2020 lex_match (lexer, T_EQUALS);
2022 struct variable **vars;
2024 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2027 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2028 for (size_t i = 1; i < n_vars; i++)
2030 const struct fmt_spec *f = var_get_print_format (vars[i]);
2031 if (f->type != common_format->type)
2033 common_format = NULL;
2039 && (fmt_get_category (common_format->type)
2040 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2042 struct ctables_categories *c = xmalloc (sizeof *c);
2043 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2044 for (size_t i = 0; i < n_vars; i++)
2046 struct ctables_categories **cp
2047 = &t->categories[var_get_dict_index (vars[i])];
2048 ctables_categories_unref (*cp);
2052 size_t allocated_cats = 0;
2053 int cats_start_ofs = -1;
2054 int cats_end_ofs = -1;
2055 if (lex_match (lexer, T_LBRACK))
2057 cats_start_ofs = lex_ofs (lexer);
2060 if (c->n_cats >= allocated_cats)
2061 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2063 int start_ofs = lex_ofs (lexer);
2064 struct ctables_category *cat = &c->cats[c->n_cats];
2065 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2067 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2070 lex_match (lexer, T_COMMA);
2072 while (!lex_match (lexer, T_RBRACK));
2073 cats_end_ofs = lex_ofs (lexer) - 1;
2076 struct ctables_category cat = {
2078 .include_missing = false,
2079 .sort_ascending = true,
2081 bool show_totals = false;
2082 char *total_label = NULL;
2083 bool totals_before = false;
2084 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2086 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2088 lex_match (lexer, T_EQUALS);
2089 if (lex_match_id (lexer, "A"))
2090 cat.sort_ascending = true;
2091 else if (lex_match_id (lexer, "D"))
2092 cat.sort_ascending = false;
2095 lex_error_expecting (lexer, "A", "D");
2099 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2101 lex_match (lexer, T_EQUALS);
2102 if (lex_match_id (lexer, "VALUE"))
2103 cat.type = CCT_VALUE;
2104 else if (lex_match_id (lexer, "LABEL"))
2105 cat.type = CCT_LABEL;
2108 cat.type = CCT_FUNCTION;
2109 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2110 &cat.weighted, &cat.area))
2113 if (lex_match (lexer, T_LPAREN))
2115 cat.sort_var = parse_variable (lexer, dict);
2119 if (cat.sort_function == CTSF_PTILE)
2121 lex_match (lexer, T_COMMA);
2122 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2124 cat.percentile = lex_number (lexer);
2128 if (!lex_force_match (lexer, T_RPAREN))
2131 else if (ctables_function_availability (cat.sort_function)
2134 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2139 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2141 lex_match (lexer, T_EQUALS);
2142 if (lex_match_id (lexer, "INCLUDE"))
2143 cat.include_missing = true;
2144 else if (lex_match_id (lexer, "EXCLUDE"))
2145 cat.include_missing = false;
2148 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2152 else if (lex_match_id (lexer, "TOTAL"))
2154 lex_match (lexer, T_EQUALS);
2155 if (!parse_bool (lexer, &show_totals))
2158 else if (lex_match_id (lexer, "LABEL"))
2160 lex_match (lexer, T_EQUALS);
2161 if (!lex_force_string (lexer))
2164 total_label = ss_xstrdup (lex_tokss (lexer));
2167 else if (lex_match_id (lexer, "POSITION"))
2169 lex_match (lexer, T_EQUALS);
2170 if (lex_match_id (lexer, "BEFORE"))
2171 totals_before = true;
2172 else if (lex_match_id (lexer, "AFTER"))
2173 totals_before = false;
2176 lex_error_expecting (lexer, "BEFORE", "AFTER");
2180 else if (lex_match_id (lexer, "EMPTY"))
2182 lex_match (lexer, T_EQUALS);
2183 if (lex_match_id (lexer, "INCLUDE"))
2184 c->show_empty = true;
2185 else if (lex_match_id (lexer, "EXCLUDE"))
2186 c->show_empty = false;
2189 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2196 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2197 "TOTAL", "LABEL", "POSITION", "EMPTY");
2199 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2206 if (c->n_cats >= allocated_cats)
2207 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2208 c->cats[c->n_cats++] = cat;
2213 if (c->n_cats >= allocated_cats)
2214 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2216 struct ctables_category *totals;
2219 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2220 totals = &c->cats[0];
2223 totals = &c->cats[c->n_cats];
2226 *totals = (struct ctables_category) {
2228 .total_label = total_label ? total_label : xstrdup (_("Total")),
2232 struct ctables_category *subtotal = NULL;
2233 for (size_t i = totals_before ? 0 : c->n_cats;
2234 totals_before ? i < c->n_cats : i-- > 0;
2235 totals_before ? i++ : 0)
2237 struct ctables_category *cat = &c->cats[i];
2246 cat->subtotal = subtotal;
2249 case CCT_POSTCOMPUTE:
2260 case CCT_EXCLUDED_MISSING:
2265 if (cats_start_ofs != -1)
2267 for (size_t i = 0; i < c->n_cats; i++)
2269 struct ctables_category *cat = &c->cats[i];
2272 case CCT_POSTCOMPUTE:
2273 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2274 struct msg_location *cats_location
2275 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2276 bool ok = ctables_recursive_check_postcompute (
2277 dict, cat->pc->expr, cat, c, cats_location);
2278 msg_location_destroy (cats_location);
2285 for (size_t j = 0; j < n_vars; j++)
2286 if (var_is_alpha (vars[j]))
2288 msg_at (SE, cat->location,
2289 _("This category specification may be applied "
2290 "only to numeric variables, but this "
2291 "subcommand tries to apply it to string "
2293 var_get_name (vars[j]));
2302 if (!parse_category_string (cat->location, cat->string, dict,
2303 common_format->type, &n))
2306 ss_dealloc (&cat->string);
2308 cat->type = CCT_NUMBER;
2311 else if (!all_strings (vars, n_vars, cat))
2320 if (!cat->srange[0].string)
2322 else if (!parse_category_string (cat->location,
2323 cat->srange[0], dict,
2324 common_format->type, &n[0]))
2327 if (!cat->srange[1].string)
2329 else if (!parse_category_string (cat->location,
2330 cat->srange[1], dict,
2331 common_format->type, &n[1]))
2334 ss_dealloc (&cat->srange[0]);
2335 ss_dealloc (&cat->srange[1]);
2337 cat->type = CCT_NRANGE;
2338 cat->nrange[0] = n[0];
2339 cat->nrange[1] = n[1];
2341 else if (!all_strings (vars, n_vars, cat))
2352 case CCT_EXCLUDED_MISSING:
2367 ctables_nest_uninit (struct ctables_nest *nest)
2370 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2371 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2372 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2373 free (nest->areas[at]);
2377 ctables_stack_uninit (struct ctables_stack *stack)
2381 for (size_t i = 0; i < stack->n; i++)
2382 ctables_nest_uninit (&stack->nests[i]);
2383 free (stack->nests);
2387 static struct ctables_stack
2388 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2395 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2396 for (size_t i = 0; i < s0.n; i++)
2397 for (size_t j = 0; j < s1.n; j++)
2399 const struct ctables_nest *a = &s0.nests[i];
2400 const struct ctables_nest *b = &s1.nests[j];
2402 size_t allocate = a->n + b->n;
2403 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2405 for (size_t k = 0; k < a->n; k++)
2406 vars[n++] = a->vars[k];
2407 for (size_t k = 0; k < b->n; k++)
2408 vars[n++] = b->vars[k];
2409 assert (n == allocate);
2411 const struct ctables_nest *summary_src;
2412 if (!a->specs[CSV_CELL].var)
2414 else if (!b->specs[CSV_CELL].var)
2419 struct ctables_nest *new = &stack.nests[stack.n++];
2420 *new = (struct ctables_nest) {
2422 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2423 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2425 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2426 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2430 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2431 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2433 ctables_stack_uninit (&s0);
2434 ctables_stack_uninit (&s1);
2438 static struct ctables_stack
2439 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2441 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2442 for (size_t i = 0; i < s0.n; i++)
2443 stack.nests[stack.n++] = s0.nests[i];
2444 for (size_t i = 0; i < s1.n; i++)
2446 stack.nests[stack.n] = s1.nests[i];
2447 stack.nests[stack.n].group_head += s0.n;
2450 assert (stack.n == s0.n + s1.n);
2456 static struct ctables_stack
2457 var_fts (const struct ctables_axis *a)
2459 struct variable **vars = xmalloc (sizeof *vars);
2462 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2463 struct ctables_nest *nest = xmalloc (sizeof *nest);
2464 *nest = (struct ctables_nest) {
2467 .scale_idx = a->scale ? 0 : SIZE_MAX,
2468 .summary_idx = is_summary ? 0 : SIZE_MAX,
2471 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2473 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2474 nest->specs[sv].var = a->var;
2475 nest->specs[sv].is_scale = a->scale;
2477 return (struct ctables_stack) { .nests = nest, .n = 1 };
2480 static struct ctables_stack
2481 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2484 return (struct ctables_stack) { .n = 0 };
2492 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2493 enumerate_fts (axis_type, a->subs[1]));
2496 /* This should consider any of the scale variables found in the result to
2497 be linked to each other listwise for SMISSING=LISTWISE. */
2498 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2499 enumerate_fts (axis_type, a->subs[1]));
2505 union ctables_summary
2507 /* COUNT, VALIDN, TOTALN. */
2510 /* MINIMUM, MAXIMUM, RANGE. */
2517 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2518 struct moments1 *moments;
2520 /* MEDIAN, MODE, PTILE. */
2523 struct casewriter *writer;
2528 /* XXX multiple response */
2532 ctables_summary_init (union ctables_summary *s,
2533 const struct ctables_summary_spec *ss)
2535 switch (ss->function)
2539 case CTSF_areaPCT_COUNT:
2540 case CTSF_areaPCT_VALIDN:
2541 case CTSF_areaPCT_TOTALN:
2556 s->min = s->max = SYSMIS;
2561 case CTSF_areaPCT_SUM:
2562 s->moments = moments1_create (MOMENT_MEAN);
2568 s->moments = moments1_create (MOMENT_VARIANCE);
2575 struct caseproto *proto = caseproto_create ();
2576 proto = caseproto_add_width (proto, 0);
2577 proto = caseproto_add_width (proto, 0);
2579 struct subcase ordering;
2580 subcase_init (&ordering, 0, 0, SC_ASCEND);
2581 s->writer = sort_create_writer (&ordering, proto);
2582 subcase_uninit (&ordering);
2583 caseproto_unref (proto);
2593 ctables_summary_uninit (union ctables_summary *s,
2594 const struct ctables_summary_spec *ss)
2596 switch (ss->function)
2600 case CTSF_areaPCT_COUNT:
2601 case CTSF_areaPCT_VALIDN:
2602 case CTSF_areaPCT_TOTALN:
2623 case CTSF_areaPCT_SUM:
2624 moments1_destroy (s->moments);
2630 casewriter_destroy (s->writer);
2636 ctables_summary_add (union ctables_summary *s,
2637 const struct ctables_summary_spec *ss,
2638 const struct variable *var, const union value *value,
2639 bool is_scale, bool is_scale_missing,
2640 bool is_missing, bool is_included,
2641 double d_weight, double e_weight)
2643 /* To determine whether a case is included in a given table for a particular
2644 kind of summary, consider the following charts for each variable in the
2645 table. Only if "yes" appears for every variable for the summary is the
2648 Categorical variables: VALIDN COUNT TOTALN
2649 Valid values in included categories yes yes yes
2650 Missing values in included categories --- yes yes
2651 Missing values in excluded categories --- --- yes
2652 Valid values in excluded categories --- --- ---
2654 Scale variables: VALIDN COUNT TOTALN
2655 Valid value yes yes yes
2656 Missing value --- yes yes
2658 Missing values include both user- and system-missing. (The system-missing
2659 value is always in an excluded category.)
2661 switch (ss->function)
2664 s->count += ss->weighted ? d_weight : 1.0;
2667 case CTSF_areaPCT_TOTALN:
2668 s->count += ss->weighted ? e_weight : 1.0;
2672 if (is_scale || is_included)
2673 s->count += ss->weighted ? d_weight : 1.0;
2676 case CTSF_areaPCT_COUNT:
2677 if (is_scale || is_included)
2678 s->count += ss->weighted ? e_weight : 1.0;
2685 s->count += ss->weighted ? d_weight : 1.0;
2688 case CTSF_areaPCT_VALIDN:
2692 s->count += ss->weighted ? e_weight : 1.0;
2702 s->count += ss->weighted ? e_weight : 1.0;
2706 if (is_scale || is_included)
2707 s->count += e_weight;
2714 s->count += e_weight;
2718 s->count += e_weight;
2724 if (!is_scale_missing)
2726 assert (!var_is_alpha (var)); /* XXX? */
2727 if (s->min == SYSMIS || value->f < s->min)
2729 if (s->max == SYSMIS || value->f > s->max)
2739 if (!is_scale_missing)
2740 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2743 case CTSF_areaPCT_SUM:
2744 if (!is_missing && !is_scale_missing)
2745 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2751 if (!is_scale_missing)
2753 double w = ss->weighted ? e_weight : 1.0;
2756 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2757 *case_num_rw_idx (c, 0) = value->f;
2758 *case_num_rw_idx (c, 1) = w;
2759 casewriter_write (s->writer, c);
2766 ctables_summary_value (const struct ctables_cell *cell,
2767 union ctables_summary *s,
2768 const struct ctables_summary_spec *ss)
2770 switch (ss->function)
2777 return cell->areas[ss->calc_area]->sequence;
2779 case CTSF_areaPCT_COUNT:
2781 const struct ctables_area *a = cell->areas[ss->calc_area];
2782 double a_count = ss->weighted ? a->e_count : a->u_count;
2783 return a_count ? s->count / a_count * 100 : SYSMIS;
2786 case CTSF_areaPCT_VALIDN:
2788 const struct ctables_area *a = cell->areas[ss->calc_area];
2789 double a_valid = ss->weighted ? a->e_valid : a->u_valid;
2790 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2793 case CTSF_areaPCT_TOTALN:
2795 const struct ctables_area *a = cell->areas[ss->calc_area];
2796 double a_total = ss->weighted ? a->e_total : a->u_total;
2797 return a_total ? s->count / a_total * 100 : SYSMIS;
2814 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2819 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2825 double weight, variance;
2826 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2827 return calc_semean (variance, weight);
2833 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2834 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2839 double weight, mean;
2840 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2841 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2847 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2851 case CTSF_areaPCT_SUM:
2853 double weight, mean;
2854 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2855 if (weight == SYSMIS || mean == SYSMIS)
2858 const struct ctables_area *a = cell->areas[ss->calc_area];
2859 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2860 double denom = ss->weighted ? sum->e_sum : sum->u_sum;
2861 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2868 struct casereader *reader = casewriter_make_reader (s->writer);
2871 struct percentile *ptile = percentile_create (
2872 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2873 struct order_stats *os = &ptile->parent;
2874 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2875 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2876 statistic_destroy (&ptile->parent.parent);
2883 struct casereader *reader = casewriter_make_reader (s->writer);
2886 struct mode *mode = mode_create ();
2887 struct order_stats *os = &mode->parent;
2888 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2889 s->ovalue = mode->mode;
2890 statistic_destroy (&mode->parent.parent);
2898 struct ctables_cell_sort_aux
2900 const struct ctables_nest *nest;
2901 enum pivot_axis_type a;
2905 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2907 const struct ctables_cell_sort_aux *aux = aux_;
2908 struct ctables_cell *const *ap = a_;
2909 struct ctables_cell *const *bp = b_;
2910 const struct ctables_cell *a = *ap;
2911 const struct ctables_cell *b = *bp;
2913 const struct ctables_nest *nest = aux->nest;
2914 for (size_t i = 0; i < nest->n; i++)
2915 if (i != nest->scale_idx)
2917 const struct variable *var = nest->vars[i];
2918 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2919 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2920 if (a_cv->category != b_cv->category)
2921 return a_cv->category > b_cv->category ? 1 : -1;
2923 const union value *a_val = &a_cv->value;
2924 const union value *b_val = &b_cv->value;
2925 switch (a_cv->category->type)
2931 case CCT_POSTCOMPUTE:
2932 case CCT_EXCLUDED_MISSING:
2933 /* Must be equal. */
2941 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2949 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2951 return a_cv->category->sort_ascending ? cmp : -cmp;
2957 const char *a_label = var_lookup_value_label (var, a_val);
2958 const char *b_label = var_lookup_value_label (var, b_val);
2964 cmp = strcmp (a_label, b_label);
2970 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2973 return a_cv->category->sort_ascending ? cmp : -cmp;
2985 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2986 const void *aux UNUSED)
2988 struct ctables_cell *const *ap = a_;
2989 struct ctables_cell *const *bp = b_;
2990 const struct ctables_cell *a = *ap;
2991 const struct ctables_cell *b = *bp;
2993 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2995 int al = a->axes[axis].leaf;
2996 int bl = b->axes[axis].leaf;
2998 return al > bl ? 1 : -1;
3006 For each ctables_table:
3007 For each combination of row vars:
3008 For each combination of column vars:
3009 For each combination of layer vars:
3011 Make a table of row values:
3012 Sort entries by row values
3013 Assign a 0-based index to each actual value
3014 Construct a dimension
3015 Make a table of column values
3016 Make a table of layer values
3018 Fill the table entry using the indexes from before.
3021 static struct ctables_area *
3022 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3023 enum ctables_area_type area)
3026 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3028 const struct ctables_nest *nest = s->nests[a];
3029 for (size_t i = 0; i < nest->n_areas[area]; i++)
3031 size_t v_idx = nest->areas[area][i];
3032 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3033 hash = hash_pointer (cv->category, hash);
3034 if (cv->category->type != CCT_TOTAL
3035 && cv->category->type != CCT_SUBTOTAL
3036 && cv->category->type != CCT_POSTCOMPUTE)
3037 hash = value_hash (&cv->value,
3038 var_get_width (nest->vars[v_idx]), hash);
3042 struct ctables_area *a;
3043 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3045 const struct ctables_cell *df = a->example;
3046 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3048 const struct ctables_nest *nest = s->nests[a];
3049 for (size_t i = 0; i < nest->n_areas[area]; i++)
3051 size_t v_idx = nest->areas[area][i];
3052 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3053 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3054 if (cv1->category != cv2->category
3055 || (cv1->category->type != CCT_TOTAL
3056 && cv1->category->type != CCT_SUBTOTAL
3057 && cv1->category->type != CCT_POSTCOMPUTE
3058 && !value_equal (&cv1->value, &cv2->value,
3059 var_get_width (nest->vars[v_idx]))))
3068 struct ctables_sum *sums = (s->table->n_sum_vars
3069 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3072 a = xmalloc (sizeof *a);
3073 *a = (struct ctables_area) { .example = cell, .sums = sums };
3074 hmap_insert (&s->areas[area], &a->node, hash);
3078 static struct substring
3079 rtrim_value (const union value *v, const struct variable *var)
3081 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3082 var_get_width (var));
3083 ss_rtrim (&s, ss_cstr (" "));
3088 in_string_range (const union value *v, const struct variable *var,
3089 const struct substring *srange)
3091 struct substring s = rtrim_value (v, var);
3092 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3093 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3096 static const struct ctables_category *
3097 ctables_categories_match (const struct ctables_categories *c,
3098 const union value *v, const struct variable *var)
3100 if (var_is_numeric (var) && v->f == SYSMIS)
3103 const struct ctables_category *othernm = NULL;
3104 for (size_t i = c->n_cats; i-- > 0; )
3106 const struct ctables_category *cat = &c->cats[i];
3110 if (cat->number == v->f)
3115 if (ss_equals (cat->string, rtrim_value (v, var)))
3120 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3121 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3126 if (in_string_range (v, var, cat->srange))
3131 if (var_is_value_missing (var, v))
3135 case CCT_POSTCOMPUTE:
3150 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3153 case CCT_EXCLUDED_MISSING:
3158 return var_is_value_missing (var, v) ? NULL : othernm;
3161 static const struct ctables_category *
3162 ctables_categories_total (const struct ctables_categories *c)
3164 const struct ctables_category *first = &c->cats[0];
3165 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3166 return (first->type == CCT_TOTAL ? first
3167 : last->type == CCT_TOTAL ? last
3171 static struct ctables_cell *
3172 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3173 const struct ctables_category *cats[PIVOT_N_AXES][10])
3176 enum ctables_summary_variant sv = CSV_CELL;
3177 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3179 const struct ctables_nest *nest = s->nests[a];
3180 for (size_t i = 0; i < nest->n; i++)
3181 if (i != nest->scale_idx)
3183 hash = hash_pointer (cats[a][i], hash);
3184 if (cats[a][i]->type != CCT_TOTAL
3185 && cats[a][i]->type != CCT_SUBTOTAL
3186 && cats[a][i]->type != CCT_POSTCOMPUTE)
3187 hash = value_hash (case_data (c, nest->vars[i]),
3188 var_get_width (nest->vars[i]), hash);
3194 struct ctables_cell *cell;
3195 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3197 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3199 const struct ctables_nest *nest = s->nests[a];
3200 for (size_t i = 0; i < nest->n; i++)
3201 if (i != nest->scale_idx
3202 && (cats[a][i] != cell->axes[a].cvs[i].category
3203 || (cats[a][i]->type != CCT_TOTAL
3204 && cats[a][i]->type != CCT_SUBTOTAL
3205 && cats[a][i]->type != CCT_POSTCOMPUTE
3206 && !value_equal (case_data (c, nest->vars[i]),
3207 &cell->axes[a].cvs[i].value,
3208 var_get_width (nest->vars[i])))))
3217 cell = xmalloc (sizeof *cell);
3220 cell->omit_areas = 0;
3221 cell->postcompute = false;
3222 //struct string name = DS_EMPTY_INITIALIZER;
3223 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3225 const struct ctables_nest *nest = s->nests[a];
3226 cell->axes[a].cvs = (nest->n
3227 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3229 for (size_t i = 0; i < nest->n; i++)
3231 const struct ctables_category *cat = cats[a][i];
3232 const struct variable *var = nest->vars[i];
3233 const union value *value = case_data (c, var);
3234 if (i != nest->scale_idx)
3236 const struct ctables_category *subtotal = cat->subtotal;
3237 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3240 if (cat->type == CCT_TOTAL
3241 || cat->type == CCT_SUBTOTAL
3242 || cat->type == CCT_POSTCOMPUTE)
3244 /* XXX these should be more encompassing I think.*/
3248 case PIVOT_AXIS_COLUMN:
3249 cell->omit_areas |= ((1u << CTAT_TABLE) |
3250 (1u << CTAT_LAYER) |
3251 (1u << CTAT_LAYERCOL) |
3252 (1u << CTAT_SUBTABLE) |
3255 case PIVOT_AXIS_ROW:
3256 cell->omit_areas |= ((1u << CTAT_TABLE) |
3257 (1u << CTAT_LAYER) |
3258 (1u << CTAT_LAYERROW) |
3259 (1u << CTAT_SUBTABLE) |
3262 case PIVOT_AXIS_LAYER:
3263 cell->omit_areas |= ((1u << CTAT_TABLE) |
3264 (1u << CTAT_LAYER));
3268 if (cat->type == CCT_POSTCOMPUTE)
3269 cell->postcompute = true;
3272 cell->axes[a].cvs[i].category = cat;
3273 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3276 if (i != nest->scale_idx)
3278 if (!ds_is_empty (&name))
3279 ds_put_cstr (&name, ", ");
3280 char *value_s = data_out (value, var_get_encoding (var),
3281 var_get_print_format (var),
3282 settings_get_fmt_settings ());
3283 if (cat->type == CCT_TOTAL
3284 || cat->type == CCT_SUBTOTAL
3285 || cat->type == CCT_POSTCOMPUTE)
3286 ds_put_format (&name, "%s=total", var_get_name (var));
3288 ds_put_format (&name, "%s=%s", var_get_name (var),
3289 value_s + strspn (value_s, " "));
3295 //cell->name = ds_steal_cstr (&name);
3297 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3298 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3299 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3300 for (size_t i = 0; i < specs->n; i++)
3301 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3302 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3303 cell->areas[at] = ctables_area_insert (s, cell, at);
3304 hmap_insert (&s->cells, &cell->node, hash);
3309 is_listwise_missing (const struct ctables_summary_spec_set *specs,
3310 const struct ccase *c)
3312 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3314 const struct variable *var = specs->listwise_vars[i];
3315 if (var_is_num_missing (var, case_num (c, var)))
3323 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3324 const struct ctables_category *cats[PIVOT_N_AXES][10],
3325 double d_weight, double e_weight)
3327 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3328 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3330 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3331 const union value *value = case_data (c, specs->var);
3332 bool is_missing = var_is_value_missing (specs->var, value);
3335 if (specs->is_scale)
3337 is_included = !is_missing;
3338 scale_missing = is_missing || is_listwise_missing (specs, c);
3342 is_included = (cats[s->table->summary_axis][ss->summary_idx]->type
3343 != CCT_EXCLUDED_MISSING);
3344 scale_missing = false;
3347 for (size_t i = 0; i < specs->n; i++)
3348 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3349 specs->var, value, specs->is_scale,
3350 scale_missing, is_missing, is_included,
3351 d_weight, e_weight);
3352 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3353 if (!(cell->omit_areas && (1u << at)))
3355 struct ctables_area *a = cell->areas[at];
3356 a->d_total += d_weight;
3357 a->e_total += e_weight;
3361 a->d_count += d_weight;
3362 a->e_count += e_weight;
3367 a->d_valid += d_weight;
3368 a->e_valid += e_weight;
3371 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3373 /* XXX listwise_missing??? */
3374 const struct variable *var = s->table->sum_vars[i];
3375 double addend = case_num (c, var);
3376 if (!var_is_num_missing (var, addend))
3378 struct ctables_sum *sum = &a->sums[i];
3379 sum->e_sum += addend * e_weight;
3380 sum->u_sum += addend;
3388 recurse_totals (struct ctables_section *s, const struct ccase *c,
3389 const struct ctables_category *cats[PIVOT_N_AXES][10],
3390 double d_weight, double e_weight,
3391 enum pivot_axis_type start_axis, size_t start_nest)
3393 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3395 const struct ctables_nest *nest = s->nests[a];
3396 for (size_t i = start_nest; i < nest->n; i++)
3398 if (i == nest->scale_idx)
3401 const struct variable *var = nest->vars[i];
3403 const struct ctables_category *total = ctables_categories_total (
3404 s->table->categories[var_get_dict_index (var)]);
3407 const struct ctables_category *save = cats[a][i];
3408 if (save->type != CCT_EXCLUDED_MISSING)
3409 /* XXX ^^ this shows why we need to keep track of
3410 'excluded_missing' (or 'is_included') at a high level,
3411 because it gets replaced by a total category. So we need to
3412 restore that and plumb it through again. */
3415 ctables_cell_add__ (s, c, cats, d_weight, e_weight);
3416 recurse_totals (s, c, cats, d_weight, e_weight, a, i + 1);
3426 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3427 const struct ctables_category *cats[PIVOT_N_AXES][10],
3428 double d_weight, double e_weight,
3429 enum pivot_axis_type start_axis, size_t start_nest)
3431 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3433 const struct ctables_nest *nest = s->nests[a];
3434 for (size_t i = start_nest; i < nest->n; i++)
3436 if (i == nest->scale_idx)
3439 const struct ctables_category *save = cats[a][i];
3442 cats[a][i] = save->subtotal;
3443 ctables_cell_add__ (s, c, cats, d_weight, e_weight);
3444 recurse_subtotals (s, c, cats, d_weight, e_weight, a, i + 1);
3453 ctables_add_occurrence (const struct variable *var,
3454 const union value *value,
3455 struct hmap *occurrences)
3457 int width = var_get_width (var);
3458 unsigned int hash = value_hash (value, width, 0);
3460 struct ctables_occurrence *o;
3461 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3463 if (value_equal (value, &o->value, width))
3466 o = xmalloc (sizeof *o);
3467 value_clone (&o->value, value, width);
3468 hmap_insert (occurrences, &o->node, hash);
3472 ctables_cell_insert (struct ctables_section *s,
3473 const struct ccase *c,
3474 double d_weight, double e_weight)
3476 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3478 bool excluded_missing = false;
3480 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3482 const struct ctables_nest *nest = s->nests[a];
3483 for (size_t i = 0; i < nest->n; i++)
3484 if (i != nest->scale_idx)
3486 const struct variable *var = nest->vars[i];
3487 const union value *value = case_data (c, var);
3489 cats[a][i] = ctables_categories_match (
3490 s->table->categories[var_get_dict_index (var)], value, var);
3493 if (i != nest->summary_idx)
3496 if (!var_is_value_missing (var, value))
3499 static const struct ctables_category cct_excluded_missing = {
3500 .type = CCT_EXCLUDED_MISSING,
3503 cats[a][i] = &cct_excluded_missing;
3504 excluded_missing = true;
3509 if (!excluded_missing)
3510 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3512 const struct ctables_nest *nest = s->nests[a];
3513 for (size_t i = 0; i < nest->n; i++)
3514 if (i != nest->scale_idx)
3516 const struct variable *var = nest->vars[i];
3517 const union value *value = case_data (c, var);
3518 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3522 ctables_cell_add__ (s, c, cats, d_weight, e_weight);
3523 recurse_totals (s, c, cats, d_weight, e_weight, 0, 0);
3524 recurse_subtotals (s, c, cats, d_weight, e_weight, 0, 0);
3529 const struct ctables_summary_spec_set *set;
3534 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3536 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3537 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3538 if (as->function != bs->function)
3539 return as->function > bs->function ? 1 : -1;
3540 else if (as->weighted != bs->weighted)
3541 return as->weighted > bs->weighted ? 1 : -1;
3542 else if (as->calc_area != bs->calc_area)
3543 return as->calc_area > bs->calc_area ? 1 : -1;
3544 else if (as->percentile != bs->percentile)
3545 return as->percentile < bs->percentile ? 1 : -1;
3547 const char *as_label = as->label ? as->label : "";
3548 const char *bs_label = bs->label ? bs->label : "";
3549 return strcmp (as_label, bs_label);
3553 ctables_category_format_number (double number, const struct variable *var,
3556 struct pivot_value *pv = pivot_value_new_var_value (
3557 var, &(union value) { .f = number });
3558 pivot_value_format (pv, NULL, s);
3559 pivot_value_destroy (pv);
3563 ctables_category_format_string (struct substring string,
3564 const struct variable *var, struct string *out)
3566 int width = var_get_width (var);
3567 char *s = xmalloc (width);
3568 buf_copy_rpad (s, width, string.string, string.length, ' ');
3569 struct pivot_value *pv = pivot_value_new_var_value (
3570 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3571 pivot_value_format (pv, NULL, out);
3572 pivot_value_destroy (pv);
3577 ctables_category_format_label (const struct ctables_category *cat,
3578 const struct variable *var,
3584 ctables_category_format_number (cat->number, var, s);
3588 ctables_category_format_string (cat->string, var, s);
3592 ctables_category_format_number (cat->nrange[0], var, s);
3593 ds_put_format (s, " THRU ");
3594 ctables_category_format_number (cat->nrange[1], var, s);
3598 ctables_category_format_string (cat->srange[0], var, s);
3599 ds_put_format (s, " THRU ");
3600 ctables_category_format_string (cat->srange[1], var, s);
3604 ds_put_cstr (s, "MISSING");
3608 ds_put_cstr (s, "OTHERNM");
3611 case CCT_POSTCOMPUTE:
3612 ds_put_format (s, "&%s", cat->pc->name);
3617 ds_put_cstr (s, cat->total_label);
3623 case CCT_EXCLUDED_MISSING:
3630 static struct pivot_value *
3631 ctables_postcompute_label (const struct ctables_categories *cats,
3632 const struct ctables_category *cat,
3633 const struct variable *var)
3635 struct substring in = ss_cstr (cat->pc->label);
3636 struct substring target = ss_cstr (")LABEL[");
3638 struct string out = DS_EMPTY_INITIALIZER;
3641 size_t chunk = ss_find_substring (in, target);
3642 if (chunk == SIZE_MAX)
3644 if (ds_is_empty (&out))
3645 return pivot_value_new_user_text (in.string, in.length);
3648 ds_put_substring (&out, in);
3649 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3653 ds_put_substring (&out, ss_head (in, chunk));
3654 ss_advance (&in, chunk + target.length);
3656 struct substring idx_s;
3657 if (!ss_get_until (&in, ']', &idx_s))
3660 long int idx = strtol (idx_s.string, &tail, 10);
3661 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3664 struct ctables_category *cat2 = &cats->cats[idx - 1];
3665 if (!ctables_category_format_label (cat2, var, &out))
3671 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3674 static struct pivot_value *
3675 ctables_category_create_value_label (const struct ctables_categories *cats,
3676 const struct ctables_category *cat,
3677 const struct variable *var,
3678 const union value *value)
3680 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3681 ? ctables_postcompute_label (cats, cat, var)
3682 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3683 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3684 : pivot_value_new_var_value (var, value));
3687 static struct ctables_value *
3688 ctables_value_find__ (struct ctables_table *t, const union value *value,
3689 int width, unsigned int hash)
3691 struct ctables_value *clv;
3692 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3693 hash, &t->clabels_values_map)
3694 if (value_equal (value, &clv->value, width))
3700 ctables_value_insert (struct ctables_table *t, const union value *value,
3703 unsigned int hash = value_hash (value, width, 0);
3704 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3707 clv = xmalloc (sizeof *clv);
3708 value_clone (&clv->value, value, width);
3709 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3713 static struct ctables_value *
3714 ctables_value_find (struct ctables_table *t,
3715 const union value *value, int width)
3717 return ctables_value_find__ (t, value, width,
3718 value_hash (value, width, 0));
3722 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3723 size_t ix[PIVOT_N_AXES])
3725 if (a < PIVOT_N_AXES)
3727 size_t limit = MAX (t->stacks[a].n, 1);
3728 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3729 ctables_table_add_section (t, a + 1, ix);
3733 struct ctables_section *s = &t->sections[t->n_sections++];
3734 *s = (struct ctables_section) {
3736 .cells = HMAP_INITIALIZER (s->cells),
3738 for (a = 0; a < PIVOT_N_AXES; a++)
3741 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3743 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3744 for (size_t i = 0; i < nest->n; i++)
3745 hmap_init (&s->occurrences[a][i]);
3747 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3748 hmap_init (&s->areas[at]);
3753 ctpo_add (double a, double b)
3759 ctpo_sub (double a, double b)
3765 ctpo_mul (double a, double b)
3771 ctpo_div (double a, double b)
3773 return b ? a / b : SYSMIS;
3777 ctpo_pow (double a, double b)
3779 int save_errno = errno;
3781 double result = pow (a, b);
3789 ctpo_neg (double a, double b UNUSED)
3794 struct ctables_pcexpr_evaluate_ctx
3796 const struct ctables_cell *cell;
3797 const struct ctables_section *section;
3798 const struct ctables_categories *cats;
3799 enum pivot_axis_type pc_a;
3802 enum fmt_type parse_format;
3805 static double ctables_pcexpr_evaluate (
3806 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3809 ctables_pcexpr_evaluate_nonterminal (
3810 const struct ctables_pcexpr_evaluate_ctx *ctx,
3811 const struct ctables_pcexpr *e, size_t n_args,
3812 double evaluate (double, double))
3814 double args[2] = { 0, 0 };
3815 for (size_t i = 0; i < n_args; i++)
3817 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3818 if (!isfinite (args[i]) || args[i] == SYSMIS)
3821 return evaluate (args[0], args[1]);
3825 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3826 const struct ctables_cell_value *pc_cv)
3828 const struct ctables_section *s = ctx->section;
3831 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3833 const struct ctables_nest *nest = s->nests[a];
3834 for (size_t i = 0; i < nest->n; i++)
3835 if (i != nest->scale_idx)
3837 const struct ctables_cell_value *cv
3838 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3839 : &ctx->cell->axes[a].cvs[i]);
3840 hash = hash_pointer (cv->category, hash);
3841 if (cv->category->type != CCT_TOTAL
3842 && cv->category->type != CCT_SUBTOTAL
3843 && cv->category->type != CCT_POSTCOMPUTE)
3844 hash = value_hash (&cv->value,
3845 var_get_width (nest->vars[i]), hash);
3849 struct ctables_cell *tc;
3850 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3852 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3854 const struct ctables_nest *nest = s->nests[a];
3855 for (size_t i = 0; i < nest->n; i++)
3856 if (i != nest->scale_idx)
3858 const struct ctables_cell_value *p_cv
3859 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3860 : &ctx->cell->axes[a].cvs[i]);
3861 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3862 if (p_cv->category != t_cv->category
3863 || (p_cv->category->type != CCT_TOTAL
3864 && p_cv->category->type != CCT_SUBTOTAL
3865 && p_cv->category->type != CCT_POSTCOMPUTE
3866 && !value_equal (&p_cv->value,
3868 var_get_width (nest->vars[i]))))
3880 const struct ctables_table *t = s->table;
3881 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3882 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3883 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3884 &specs->specs[ctx->summary_idx]);
3888 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3889 const struct ctables_pcexpr *e)
3896 case CTPO_CAT_NRANGE:
3897 case CTPO_CAT_SRANGE:
3898 case CTPO_CAT_MISSING:
3899 case CTPO_CAT_OTHERNM:
3901 struct ctables_cell_value cv = {
3902 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3904 assert (cv.category != NULL);
3906 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3907 const struct ctables_occurrence *o;
3910 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3911 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3912 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3914 cv.value = o->value;
3915 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3920 case CTPO_CAT_NUMBER:
3921 case CTPO_CAT_SUBTOTAL:
3922 case CTPO_CAT_TOTAL:
3924 struct ctables_cell_value cv = {
3925 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3926 .value = { .f = e->number },
3928 assert (cv.category != NULL);
3929 return ctables_pcexpr_evaluate_category (ctx, &cv);
3932 case CTPO_CAT_STRING:
3934 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3936 if (width > e->string.length)
3938 s = xmalloc (width);
3939 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3942 const struct ctables_category *category
3943 = ctables_find_category_for_postcompute (
3944 ctx->section->table->ctables->dict,
3945 ctx->cats, ctx->parse_format, e);
3946 assert (category != NULL);
3948 struct ctables_cell_value cv = { .category = category };
3949 if (category->type == CCT_NUMBER)
3950 cv.value.f = category->number;
3951 else if (category->type == CCT_STRING)
3952 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3956 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3962 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3965 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3968 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3971 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3974 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3977 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3983 static const struct ctables_category *
3984 ctables_cell_postcompute (const struct ctables_section *s,
3985 const struct ctables_cell *cell,
3986 enum pivot_axis_type *pc_a_p,
3989 assert (cell->postcompute);
3990 const struct ctables_category *pc_cat = NULL;
3991 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3992 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3994 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3995 if (cv->category->type == CCT_POSTCOMPUTE)
3999 /* Multiple postcomputes cross each other. The value is
4004 pc_cat = cv->category;
4008 *pc_a_idx_p = pc_a_idx;
4012 assert (pc_cat != NULL);
4017 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4018 const struct ctables_cell *cell,
4019 const struct ctables_summary_spec *ss,
4020 struct fmt_spec *format,
4021 bool *is_ctables_format,
4024 enum pivot_axis_type pc_a = 0;
4025 size_t pc_a_idx = 0;
4026 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4027 s, cell, &pc_a, &pc_a_idx);
4031 const struct ctables_postcompute *pc = pc_cat->pc;
4034 for (size_t i = 0; i < pc->specs->n; i++)
4036 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4037 if (ss->function == ss2->function
4038 && ss->weighted == ss2->weighted
4039 && ss->calc_area == ss2->calc_area
4040 && ss->percentile == ss2->percentile)
4042 *format = ss2->format;
4043 *is_ctables_format = ss2->is_ctables_format;
4049 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4050 const struct ctables_categories *cats = s->table->categories[
4051 var_get_dict_index (var)];
4052 struct ctables_pcexpr_evaluate_ctx ctx = {
4057 .pc_a_idx = pc_a_idx,
4058 .summary_idx = summary_idx,
4059 .parse_format = pc_cat->parse_format,
4061 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4065 ctables_format (double d, const struct fmt_spec *format,
4066 const struct fmt_settings *settings)
4068 const union value v = { .f = d };
4069 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4071 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4072 produce the results we want for negative numbers, putting the negative
4073 sign in the wrong spot, before the prefix instead of after it. We can't,
4074 in fact, produce the desired results using a custom-currency
4075 specification. Instead, we postprocess the output, moving the negative
4078 NEQUAL: "-N=3" => "N=-3"
4079 PAREN: "-(3)" => "(-3)"
4080 PCTPAREN: "-(3%)" => "(-3%)"
4082 This transformation doesn't affect NEGPAREN. */
4083 char *minus_src = strchr (s, '-');
4084 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4086 char *n_equals = strstr (s, "N=");
4087 char *lparen = strchr (s, '(');
4088 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4090 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4096 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4098 for (size_t i = 0; i < t->stacks[a].n; i++)
4100 struct ctables_nest *nest = &t->stacks[a].nests[i];
4101 if (nest->n != 1 || nest->scale_idx != 0)
4104 enum ctables_vlabel vlabel
4105 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4106 if (vlabel != CTVL_NONE)
4113 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4115 struct pivot_table *pt = pivot_table_create__ (
4117 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4118 : pivot_value_new_text (N_("Custom Tables"))),
4121 pivot_table_set_caption (
4122 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4124 pivot_table_set_corner_text (
4125 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4127 bool summary_dimension = (t->summary_axis != t->slabels_axis
4128 || (!t->slabels_visible
4129 && t->summary_specs.n > 1));
4130 if (summary_dimension)
4132 struct pivot_dimension *d = pivot_dimension_create (
4133 pt, t->slabels_axis, N_("Statistics"));
4134 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4135 if (!t->slabels_visible)
4136 d->hide_all_labels = true;
4137 for (size_t i = 0; i < specs->n; i++)
4138 pivot_category_create_leaf (
4139 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4142 bool categories_dimension = t->clabels_example != NULL;
4143 if (categories_dimension)
4145 struct pivot_dimension *d = pivot_dimension_create (
4146 pt, t->label_axis[t->clabels_from_axis],
4147 t->clabels_from_axis == PIVOT_AXIS_ROW
4148 ? N_("Row Categories")
4149 : N_("Column Categories"));
4150 const struct variable *var = t->clabels_example;
4151 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4152 for (size_t i = 0; i < t->n_clabels_values; i++)
4154 const struct ctables_value *value = t->clabels_values[i];
4155 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4156 assert (cat != NULL);
4157 pivot_category_create_leaf (
4158 d->root, ctables_category_create_value_label (c, cat,
4164 pivot_table_set_look (pt, ct->look);
4165 struct pivot_dimension *d[PIVOT_N_AXES];
4166 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4168 static const char *names[] = {
4169 [PIVOT_AXIS_ROW] = N_("Rows"),
4170 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4171 [PIVOT_AXIS_LAYER] = N_("Layers"),
4173 d[a] = (t->axes[a] || a == t->summary_axis
4174 ? pivot_dimension_create (pt, a, names[a])
4179 assert (t->axes[a]);
4181 for (size_t i = 0; i < t->stacks[a].n; i++)
4183 struct ctables_nest *nest = &t->stacks[a].nests[i];
4184 struct ctables_section **sections = xnmalloc (t->n_sections,
4186 size_t n_sections = 0;
4188 size_t n_total_cells = 0;
4189 size_t max_depth = 0;
4190 for (size_t j = 0; j < t->n_sections; j++)
4191 if (t->sections[j].nests[a] == nest)
4193 struct ctables_section *s = &t->sections[j];
4194 sections[n_sections++] = s;
4195 n_total_cells += hmap_count (&s->cells);
4197 size_t depth = s->nests[a]->n;
4198 max_depth = MAX (depth, max_depth);
4201 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4203 size_t n_sorted = 0;
4205 for (size_t j = 0; j < n_sections; j++)
4207 struct ctables_section *s = sections[j];
4209 struct ctables_cell *cell;
4210 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4212 sorted[n_sorted++] = cell;
4213 assert (n_sorted <= n_total_cells);
4216 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4217 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4220 if (a == PIVOT_AXIS_ROW)
4222 size_t ids[N_CTATS];
4223 memset (ids, 0, sizeof ids);
4224 for (size_t j = 0; j < n_sorted; j++)
4226 struct ctables_cell *cell = sorted[j];
4227 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4229 struct ctables_area *area = cell->areas[at];
4230 if (!area->sequence)
4231 area->sequence = ++ids[at];
4238 for (size_t j = 0; j < n_sorted; j++)
4240 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0);
4245 struct ctables_level
4247 enum ctables_level_type
4249 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4250 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4251 CTL_SUMMARY, /* Summary functions. */
4255 enum settings_value_show vlabel; /* CTL_VAR only. */
4258 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4259 size_t n_levels = 0;
4260 for (size_t k = 0; k < nest->n; k++)
4262 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4263 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4265 if (vlabel != CTVL_NONE)
4267 levels[n_levels++] = (struct ctables_level) {
4269 .vlabel = (enum settings_value_show) vlabel,
4274 if (nest->scale_idx != k
4275 && (k != nest->n - 1 || t->label_axis[a] == a))
4277 levels[n_levels++] = (struct ctables_level) {
4278 .type = CTL_CATEGORY,
4284 if (!summary_dimension && a == t->slabels_axis)
4286 levels[n_levels++] = (struct ctables_level) {
4287 .type = CTL_SUMMARY,
4288 .var_idx = SIZE_MAX,
4292 /* Pivot categories:
4294 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4295 - category for nest->vars[0], if nest->scale_idx != 0
4296 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4297 - category for nest->vars[1], if nest->scale_idx != 1
4299 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4300 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4301 - summary function, if 'a == t->slabels_axis && a ==
4304 Additional dimensions:
4306 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4308 - If 't->label_axis[b] == a' for some 'b != a', add a category
4313 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4315 for (size_t j = 0; j < n_sorted; j++)
4317 struct ctables_cell *cell = sorted[j];
4318 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4320 size_t n_common = 0;
4323 for (; n_common < n_levels; n_common++)
4325 const struct ctables_level *level = &levels[n_common];
4326 if (level->type == CTL_CATEGORY)
4328 size_t var_idx = level->var_idx;
4329 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4330 if (prev->axes[a].cvs[var_idx].category != c)
4332 else if (c->type != CCT_SUBTOTAL
4333 && c->type != CCT_TOTAL
4334 && c->type != CCT_POSTCOMPUTE
4335 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4336 &cell->axes[a].cvs[var_idx].value,
4337 var_get_type (nest->vars[var_idx])))
4343 for (size_t k = n_common; k < n_levels; k++)
4345 const struct ctables_level *level = &levels[k];
4346 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4347 if (level->type == CTL_SUMMARY)
4349 assert (k == n_levels - 1);
4351 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4352 for (size_t m = 0; m < specs->n; m++)
4354 int leaf = pivot_category_create_leaf (
4355 parent, ctables_summary_label (&specs->specs[m],
4363 const struct variable *var = nest->vars[level->var_idx];
4364 struct pivot_value *label;
4365 if (level->type == CTL_VAR)
4367 label = pivot_value_new_variable (var);
4368 label->variable.show = level->vlabel;
4370 else if (level->type == CTL_CATEGORY)
4372 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4373 label = ctables_category_create_value_label (
4374 t->categories[var_get_dict_index (var)],
4375 cv->category, var, &cv->value);
4380 if (k == n_levels - 1)
4381 prev_leaf = pivot_category_create_leaf (parent, label);
4383 groups[k] = pivot_category_create_group__ (parent, label);
4387 cell->axes[a].leaf = prev_leaf;
4396 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4400 size_t n_total_cells = 0;
4401 for (size_t j = 0; j < t->n_sections; j++)
4402 n_total_cells += hmap_count (&t->sections[j].cells);
4404 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4405 size_t n_sorted = 0;
4406 for (size_t j = 0; j < t->n_sections; j++)
4408 const struct ctables_section *s = &t->sections[j];
4409 struct ctables_cell *cell;
4410 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4412 sorted[n_sorted++] = cell;
4414 assert (n_sorted <= n_total_cells);
4415 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4417 size_t ids[N_CTATS];
4418 memset (ids, 0, sizeof ids);
4419 for (size_t j = 0; j < n_sorted; j++)
4421 struct ctables_cell *cell = sorted[j];
4422 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4424 struct ctables_area *area = cell->areas[at];
4425 if (!area->sequence)
4426 area->sequence = ++ids[at];
4433 for (size_t i = 0; i < t->n_sections; i++)
4435 struct ctables_section *s = &t->sections[i];
4437 struct ctables_cell *cell;
4438 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4443 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4444 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4445 for (size_t j = 0; j < specs->n; j++)
4448 size_t n_dindexes = 0;
4450 if (summary_dimension)
4451 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4453 if (categories_dimension)
4455 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4456 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4457 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4458 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4461 dindexes[n_dindexes++] = ctv->leaf;
4464 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4467 int leaf = cell->axes[a].leaf;
4468 if (a == t->summary_axis && !summary_dimension)
4470 dindexes[n_dindexes++] = leaf;
4473 const struct ctables_summary_spec *ss = &specs->specs[j];
4475 struct fmt_spec format = specs->specs[j].format;
4476 bool is_ctables_format = ss->is_ctables_format;
4477 double d = (cell->postcompute
4478 ? ctables_cell_calculate_postcompute (
4479 s, cell, ss, &format, &is_ctables_format, j)
4480 : ctables_summary_value (cell, &cell->summaries[j],
4483 struct pivot_value *value;
4484 if (ct->hide_threshold != 0
4485 && d < ct->hide_threshold
4486 && ctables_summary_function_is_count (ss->function))
4488 value = pivot_value_new_user_text_nocopy (
4489 xasprintf ("<%d", ct->hide_threshold));
4491 else if (d == 0 && ct->zero)
4492 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4493 else if (d == SYSMIS && ct->missing)
4494 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4495 else if (is_ctables_format)
4496 value = pivot_value_new_user_text_nocopy (
4497 ctables_format (d, &format, &ct->ctables_formats));
4500 value = pivot_value_new_number (d);
4501 value->numeric.format = format;
4503 /* XXX should text values be right-justified? */
4504 pivot_table_put (pt, dindexes, n_dindexes, value);
4509 pivot_table_submit (pt);
4513 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4515 enum pivot_axis_type label_pos = t->label_axis[a];
4519 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4520 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4522 const struct ctables_stack *stack = &t->stacks[a];
4526 const struct ctables_nest *n0 = &stack->nests[0];
4529 assert (stack->n == 1);
4533 const struct variable *v0 = n0->vars[n0->n - 1];
4534 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4535 t->clabels_example = v0;
4537 for (size_t i = 0; i < c0->n_cats; i++)
4538 if (c0->cats[i].type == CCT_FUNCTION)
4540 msg (SE, _("%s=%s is not allowed with sorting based "
4541 "on a summary function."),
4542 subcommand_name, pos_name);
4545 if (n0->n - 1 == n0->scale_idx)
4547 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4548 "but %s is a scale variable."),
4549 subcommand_name, pos_name, var_get_name (v0));
4553 for (size_t i = 1; i < stack->n; i++)
4555 const struct ctables_nest *ni = &stack->nests[i];
4557 const struct variable *vi = ni->vars[ni->n - 1];
4558 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4560 if (ni->n - 1 == ni->scale_idx)
4562 msg (SE, _("%s=%s requires the variables to be moved to be "
4563 "categorical, but %s is a scale variable."),
4564 subcommand_name, pos_name, var_get_name (vi));
4567 if (var_get_width (v0) != var_get_width (vi))
4569 msg (SE, _("%s=%s requires the variables to be "
4570 "moved to have the same width, but %s has "
4571 "width %d and %s has width %d."),
4572 subcommand_name, pos_name,
4573 var_get_name (v0), var_get_width (v0),
4574 var_get_name (vi), var_get_width (vi));
4577 if (!val_labs_equal (var_get_value_labels (v0),
4578 var_get_value_labels (vi)))
4580 msg (SE, _("%s=%s requires the variables to be "
4581 "moved to have the same value labels, but %s "
4582 "and %s have different value labels."),
4583 subcommand_name, pos_name,
4584 var_get_name (v0), var_get_name (vi));
4587 if (!ctables_categories_equal (c0, ci))
4589 msg (SE, _("%s=%s requires the variables to be "
4590 "moved to have the same category "
4591 "specifications, but %s and %s have different "
4592 "category specifications."),
4593 subcommand_name, pos_name,
4594 var_get_name (v0), var_get_name (vi));
4603 add_sum_var (struct variable *var,
4604 struct variable ***sum_vars, size_t *n, size_t *allocated)
4606 for (size_t i = 0; i < *n; i++)
4607 if (var == (*sum_vars)[i])
4610 if (*n >= *allocated)
4611 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4612 (*sum_vars)[*n] = var;
4616 static enum ctables_area_type
4617 rotate_area (enum ctables_area_type area)
4628 return CTAT_LAYERCOL;
4631 return CTAT_LAYERROW;
4644 enumerate_sum_vars (const struct ctables_axis *a,
4645 struct variable ***sum_vars, size_t *n, size_t *allocated)
4653 for (size_t i = 0; i < N_CSVS; i++)
4654 for (size_t j = 0; j < a->specs[i].n; j++)
4656 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4657 if (spec->function == CTSF_areaPCT_SUM)
4658 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4664 for (size_t i = 0; i < 2; i++)
4665 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4671 ctables_prepare_table (struct ctables_table *t)
4673 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4676 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4678 for (size_t j = 0; j < t->stacks[a].n; j++)
4680 struct ctables_nest *nest = &t->stacks[a].nests[j];
4681 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4683 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4684 nest->n_areas[at] = 0;
4686 enum pivot_axis_type ata, atb;
4687 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4689 ata = PIVOT_AXIS_ROW;
4690 atb = PIVOT_AXIS_COLUMN;
4692 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4694 ata = PIVOT_AXIS_COLUMN;
4695 atb = PIVOT_AXIS_ROW;
4698 if (at == CTAT_LAYER
4699 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
4700 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
4701 ? a == atb && t->label_axis[a] != a
4704 for (size_t k = nest->n - 1; k < nest->n; k--)
4705 if (k != nest->scale_idx)
4707 nest->areas[at][nest->n_areas[at]++] = k;
4713 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
4714 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
4715 : at == CTAT_TABLE ? true
4719 for (size_t k = 0; k < nest->n; k++)
4720 if (k != nest->scale_idx)
4721 nest->areas[at][nest->n_areas[at]++] = k;
4727 #define L PIVOT_AXIS_LAYER
4728 n_drop = (t->clabels_from_axis == L ? a != L
4729 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
4730 : t->clabels_from_axis == a ? 2
4737 n_drop = a == ata && t->label_axis[ata] == atb;
4742 n_drop = (a == ata ? t->label_axis[ata] == atb
4744 : t->clabels_from_axis == atb ? -1
4745 : t->clabels_to_axis != atb ? 1
4757 size_t n = nest->n_areas[at];
4760 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4761 nest->n_areas[at]--;
4766 for (int i = 0; i < n_drop; i++)
4767 if (nest->n_areas[at] > 0)
4768 nest->n_areas[at]--;
4775 struct ctables_nest *nest = xmalloc (sizeof *nest);
4776 *nest = (struct ctables_nest) {
4778 .scale_idx = SIZE_MAX,
4779 .summary_idx = SIZE_MAX
4781 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4783 /* There's no point in moving labels away from an axis that has no
4784 labels, so avoid dealing with the special cases around that. */
4785 t->label_axis[a] = a;
4788 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4789 for (size_t i = 0; i < stack->n; i++)
4791 struct ctables_nest *nest = &stack->nests[i];
4792 if (!nest->specs[CSV_CELL].n)
4794 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
4795 ss->specs = xmalloc (sizeof *ss->specs);
4798 enum ctables_summary_function function
4799 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
4803 nest->summary_idx = nest->n - 1;
4804 ss->var = nest->vars[nest->summary_idx];
4806 *ss->specs = (struct ctables_summary_spec) {
4807 .function = function,
4809 .format = ctables_summary_default_format (function, ss->var),
4812 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4813 &nest->specs[CSV_CELL]);
4815 else if (!nest->specs[CSV_TOTAL].n)
4816 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4817 &nest->specs[CSV_CELL]);
4819 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4820 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4822 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4823 for (size_t i = 0; i < nest->specs[sv].n; i++)
4825 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4826 const struct ctables_function_info *cfi =
4827 &ctables_function_info[ss->function];
4829 ss->calc_area = rotate_area (ss->calc_area);
4833 if (t->ctables->smissing_listwise)
4835 struct variable **listwise_vars = NULL;
4837 size_t allocated = 0;
4839 for (size_t j = nest->group_head; j < stack->n; j++)
4841 const struct ctables_nest *other_nest = &stack->nests[j];
4842 if (other_nest->group_head != nest->group_head)
4845 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4848 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4849 sizeof *listwise_vars);
4850 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4853 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4856 listwise_vars = xmemdup (listwise_vars,
4857 n * sizeof *listwise_vars);
4858 nest->specs[sv].listwise_vars = listwise_vars;
4859 nest->specs[sv].n_listwise_vars = n;
4864 struct ctables_summary_spec_set *merged = &t->summary_specs;
4865 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4867 for (size_t j = 0; j < stack->n; j++)
4869 const struct ctables_nest *nest = &stack->nests[j];
4871 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4872 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4877 struct merge_item min = items[0];
4878 for (size_t j = 1; j < n_left; j++)
4879 if (merge_item_compare_3way (&items[j], &min) < 0)
4882 if (merged->n >= merged->allocated)
4883 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4884 sizeof *merged->specs);
4885 merged->specs[merged->n++] = min.set->specs[min.ofs];
4887 for (size_t j = 0; j < n_left; )
4889 if (merge_item_compare_3way (&items[j], &min) == 0)
4891 struct merge_item *item = &items[j];
4892 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4893 if (++item->ofs >= item->set->n)
4895 items[j] = items[--n_left];
4905 for (size_t j = 0; j < merged->n; j++)
4906 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4908 for (size_t j = 0; j < stack->n; j++)
4910 const struct ctables_nest *nest = &stack->nests[j];
4911 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4913 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4914 for (size_t k = 0; k < specs->n; k++)
4915 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4916 specs->specs[k].axis_idx);
4922 size_t allocated_sum_vars = 0;
4923 enumerate_sum_vars (t->axes[t->summary_axis],
4924 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4926 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4927 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4931 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4932 enum pivot_axis_type a)
4934 struct ctables_stack *stack = &t->stacks[a];
4935 for (size_t i = 0; i < stack->n; i++)
4937 const struct ctables_nest *nest = &stack->nests[i];
4938 const struct variable *var = nest->vars[nest->n - 1];
4939 const union value *value = case_data (c, var);
4941 if (var_is_numeric (var) && value->f == SYSMIS)
4944 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4946 ctables_value_insert (t, value, var_get_width (var));
4951 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4953 const struct ctables_value *const *ap = a_;
4954 const struct ctables_value *const *bp = b_;
4955 const struct ctables_value *a = *ap;
4956 const struct ctables_value *b = *bp;
4957 const int *width = width_;
4958 return value_compare_3way (&a->value, &b->value, *width);
4962 ctables_sort_clabels_values (struct ctables_table *t)
4964 const struct variable *v0 = t->clabels_example;
4965 int width = var_get_width (v0);
4967 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4970 const struct val_labs *val_labs = var_get_value_labels (v0);
4971 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4972 vl = val_labs_next (val_labs, vl))
4973 if (ctables_categories_match (c0, &vl->value, v0))
4974 ctables_value_insert (t, &vl->value, width);
4977 size_t n = hmap_count (&t->clabels_values_map);
4978 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4980 struct ctables_value *clv;
4982 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4983 t->clabels_values[i++] = clv;
4984 t->n_clabels_values = n;
4987 sort (t->clabels_values, n, sizeof *t->clabels_values,
4988 compare_clabels_values_3way, &width);
4990 for (size_t i = 0; i < n; i++)
4991 t->clabels_values[i]->leaf = i;
4995 ctables_add_category_occurrences (const struct variable *var,
4996 struct hmap *occurrences,
4997 const struct ctables_categories *cats)
4999 const struct val_labs *val_labs = var_get_value_labels (var);
5001 for (size_t i = 0; i < cats->n_cats; i++)
5003 const struct ctables_category *c = &cats->cats[i];
5007 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5013 int width = var_get_width (var);
5015 value_init (&value, width);
5016 value_copy_buf_rpad (&value, width,
5017 CHAR_CAST (uint8_t *, c->string.string),
5018 c->string.length, ' ');
5019 ctables_add_occurrence (var, &value, occurrences);
5020 value_destroy (&value, width);
5025 assert (var_is_numeric (var));
5026 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5027 vl = val_labs_next (val_labs, vl))
5028 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5029 ctables_add_occurrence (var, &vl->value, occurrences);
5033 assert (var_is_alpha (var));
5034 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5035 vl = val_labs_next (val_labs, vl))
5036 if (in_string_range (&vl->value, var, c->srange))
5037 ctables_add_occurrence (var, &vl->value, occurrences);
5041 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5042 vl = val_labs_next (val_labs, vl))
5043 if (var_is_value_missing (var, &vl->value))
5044 ctables_add_occurrence (var, &vl->value, occurrences);
5048 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5049 vl = val_labs_next (val_labs, vl))
5050 ctables_add_occurrence (var, &vl->value, occurrences);
5053 case CCT_POSTCOMPUTE:
5063 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5064 vl = val_labs_next (val_labs, vl))
5065 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5066 ctables_add_occurrence (var, &vl->value, occurrences);
5069 case CCT_EXCLUDED_MISSING:
5076 ctables_section_recurse_add_empty_categories (
5077 struct ctables_section *s,
5078 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5079 enum pivot_axis_type a, size_t a_idx)
5081 if (a >= PIVOT_N_AXES)
5082 ctables_cell_insert__ (s, c, cats);
5083 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5084 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5087 const struct variable *var = s->nests[a]->vars[a_idx];
5088 const struct ctables_categories *categories = s->table->categories[
5089 var_get_dict_index (var)];
5090 int width = var_get_width (var);
5091 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5092 const struct ctables_occurrence *o;
5093 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5095 union value *value = case_data_rw (c, var);
5096 value_destroy (value, width);
5097 value_clone (value, &o->value, width);
5098 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5099 assert (cats[a][a_idx] != NULL);
5100 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5103 for (size_t i = 0; i < categories->n_cats; i++)
5105 const struct ctables_category *cat = &categories->cats[i];
5106 if (cat->type == CCT_POSTCOMPUTE)
5108 cats[a][a_idx] = cat;
5109 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5116 ctables_section_add_empty_categories (struct ctables_section *s)
5118 bool show_empty = false;
5119 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5121 for (size_t k = 0; k < s->nests[a]->n; k++)
5122 if (k != s->nests[a]->scale_idx)
5124 const struct variable *var = s->nests[a]->vars[k];
5125 const struct ctables_categories *cats = s->table->categories[
5126 var_get_dict_index (var)];
5127 if (cats->show_empty)
5130 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5136 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5137 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5138 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5143 ctables_section_clear (struct ctables_section *s)
5145 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5147 const struct ctables_nest *nest = s->nests[a];
5148 for (size_t i = 0; i < nest->n; i++)
5149 if (i != nest->scale_idx)
5151 const struct variable *var = nest->vars[i];
5152 int width = var_get_width (var);
5153 struct ctables_occurrence *o, *next;
5154 struct hmap *map = &s->occurrences[a][i];
5155 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5157 value_destroy (&o->value, width);
5158 hmap_delete (map, &o->node);
5165 struct ctables_cell *cell, *next_cell;
5166 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5168 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5170 const struct ctables_nest *nest = s->nests[a];
5171 for (size_t i = 0; i < nest->n; i++)
5172 if (i != nest->scale_idx)
5173 value_destroy (&cell->axes[a].cvs[i].value,
5174 var_get_width (nest->vars[i]));
5175 free (cell->axes[a].cvs);
5178 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5179 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5180 for (size_t i = 0; i < specs->n; i++)
5181 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5182 free (cell->summaries);
5184 hmap_delete (&s->cells, &cell->node);
5187 hmap_shrink (&s->cells);
5189 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5191 struct ctables_area *area, *next_area;
5192 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5196 hmap_delete (&s->areas[at], &area->node);
5199 hmap_shrink (&s->areas[at]);
5204 ctables_section_uninit (struct ctables_section *s)
5206 ctables_section_clear (s);
5208 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5210 struct ctables_nest *nest = s->nests[a];
5211 for (size_t i = 0; i < nest->n; i++)
5212 hmap_destroy (&s->occurrences[a][i]);
5213 free (s->occurrences[a]);
5216 hmap_destroy (&s->cells);
5217 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5218 hmap_destroy (&s->areas[at]);
5222 ctables_table_clear (struct ctables_table *t)
5224 for (size_t i = 0; i < t->n_sections; i++)
5225 ctables_section_clear (&t->sections[i]);
5227 if (t->clabels_example)
5229 int width = var_get_width (t->clabels_example);
5230 struct ctables_value *value, *next_value;
5231 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5232 &t->clabels_values_map)
5234 value_destroy (&value->value, width);
5235 hmap_delete (&t->clabels_values_map, &value->node);
5238 hmap_shrink (&t->clabels_values_map);
5240 free (t->clabels_values);
5241 t->clabels_values = NULL;
5242 t->n_clabels_values = 0;
5247 ctables_execute (struct dataset *ds, struct casereader *input,
5250 for (size_t i = 0; i < ct->n_tables; i++)
5252 struct ctables_table *t = ct->tables[i];
5253 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5254 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5255 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5256 sizeof *t->sections);
5257 size_t ix[PIVOT_N_AXES];
5258 ctables_table_add_section (t, 0, ix);
5261 struct dictionary *dict = dataset_dict (ds);
5263 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5264 struct casegrouper *grouper
5266 ? casegrouper_create_splits (input, dict)
5267 : casegrouper_create_vars (input, NULL, 0));
5268 struct casereader *group;
5269 while (casegrouper_get_next_group (grouper, &group))
5273 struct ccase *c = casereader_peek (group, 0);
5276 output_split_file_values (ds, c);
5281 bool warn_on_invalid = true;
5282 for (struct ccase *c = casereader_read (group); c;
5283 case_unref (c), c = casereader_read (group))
5285 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5286 double e_weight = (ct->e_weight
5287 ? var_force_valid_weight (ct->e_weight,
5288 case_num (c, ct->e_weight),
5292 for (size_t i = 0; i < ct->n_tables; i++)
5294 struct ctables_table *t = ct->tables[i];
5296 for (size_t j = 0; j < t->n_sections; j++)
5297 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5299 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5300 if (t->label_axis[a] != a)
5301 ctables_insert_clabels_values (t, c, a);
5304 casereader_destroy (group);
5306 for (size_t i = 0; i < ct->n_tables; i++)
5308 struct ctables_table *t = ct->tables[i];
5310 if (t->clabels_example)
5311 ctables_sort_clabels_values (t);
5313 for (size_t j = 0; j < t->n_sections; j++)
5314 ctables_section_add_empty_categories (&t->sections[j]);
5316 ctables_table_output (ct, t);
5317 ctables_table_clear (t);
5320 return casegrouper_destroy (grouper);
5325 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5326 struct dictionary *);
5329 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5335 case CTPO_CAT_STRING:
5336 ss_dealloc (&e->string);
5339 case CTPO_CAT_SRANGE:
5340 for (size_t i = 0; i < 2; i++)
5341 ss_dealloc (&e->srange[i]);
5350 for (size_t i = 0; i < 2; i++)
5351 ctables_pcexpr_destroy (e->subs[i]);
5355 case CTPO_CAT_NUMBER:
5356 case CTPO_CAT_NRANGE:
5357 case CTPO_CAT_MISSING:
5358 case CTPO_CAT_OTHERNM:
5359 case CTPO_CAT_SUBTOTAL:
5360 case CTPO_CAT_TOTAL:
5364 msg_location_destroy (e->location);
5369 static struct ctables_pcexpr *
5370 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5371 struct ctables_pcexpr *sub0,
5372 struct ctables_pcexpr *sub1)
5374 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5375 *e = (struct ctables_pcexpr) {
5377 .subs = { sub0, sub1 },
5378 .location = msg_location_merged (sub0->location, sub1->location),
5383 /* How to parse an operator. */
5386 enum token_type token;
5387 enum ctables_postcompute_op op;
5390 static const struct operator *
5391 ctables_pcexpr_match_operator (struct lexer *lexer,
5392 const struct operator ops[], size_t n_ops)
5394 for (const struct operator *op = ops; op < ops + n_ops; op++)
5395 if (lex_token (lexer) == op->token)
5397 if (op->token != T_NEG_NUM)
5406 static struct ctables_pcexpr *
5407 ctables_pcexpr_parse_binary_operators__ (
5408 struct lexer *lexer, struct dictionary *dict,
5409 const struct operator ops[], size_t n_ops,
5410 parse_recursively_func *parse_next_level,
5411 const char *chain_warning, struct ctables_pcexpr *lhs)
5413 for (int op_count = 0; ; op_count++)
5415 const struct operator *op
5416 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5419 if (op_count > 1 && chain_warning)
5420 msg_at (SW, lhs->location, "%s", chain_warning);
5425 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5428 ctables_pcexpr_destroy (lhs);
5432 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5436 static struct ctables_pcexpr *
5437 ctables_pcexpr_parse_binary_operators (
5438 struct lexer *lexer, struct dictionary *dict,
5439 const struct operator ops[], size_t n_ops,
5440 parse_recursively_func *parse_next_level, const char *chain_warning)
5442 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5446 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5448 chain_warning, lhs);
5451 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5452 struct dictionary *);
5454 static struct ctables_pcexpr
5455 ctpo_cat_nrange (double low, double high)
5457 return (struct ctables_pcexpr) {
5458 .op = CTPO_CAT_NRANGE,
5459 .nrange = { low, high },
5463 static struct ctables_pcexpr
5464 ctpo_cat_srange (struct substring low, struct substring high)
5466 return (struct ctables_pcexpr) {
5467 .op = CTPO_CAT_SRANGE,
5468 .srange = { low, high },
5472 static struct ctables_pcexpr *
5473 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5475 int start_ofs = lex_ofs (lexer);
5476 struct ctables_pcexpr e;
5477 if (lex_is_number (lexer))
5479 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5480 .number = lex_number (lexer) };
5483 else if (lex_match_id (lexer, "MISSING"))
5484 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5485 else if (lex_match_id (lexer, "OTHERNM"))
5486 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5487 else if (lex_match_id (lexer, "TOTAL"))
5488 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5489 else if (lex_match_id (lexer, "SUBTOTAL"))
5491 size_t subtotal_index = 0;
5492 if (lex_match (lexer, T_LBRACK))
5494 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5496 subtotal_index = lex_integer (lexer);
5498 if (!lex_force_match (lexer, T_RBRACK))
5501 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5502 .subtotal_index = subtotal_index };
5504 else if (lex_match (lexer, T_LBRACK))
5506 if (lex_match_id (lexer, "LO"))
5508 if (!lex_force_match_id (lexer, "THRU"))
5511 if (lex_is_string (lexer))
5513 struct substring low = { .string = NULL };
5514 struct substring high = parse_substring (lexer, dict);
5515 e = ctpo_cat_srange (low, high);
5519 if (!lex_force_num (lexer))
5521 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5525 else if (lex_is_number (lexer))
5527 double number = lex_number (lexer);
5529 if (lex_match_id (lexer, "THRU"))
5531 if (lex_match_id (lexer, "HI"))
5532 e = ctpo_cat_nrange (number, DBL_MAX);
5535 if (!lex_force_num (lexer))
5537 e = ctpo_cat_nrange (number, lex_number (lexer));
5542 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5545 else if (lex_is_string (lexer))
5547 struct substring s = parse_substring (lexer, dict);
5549 if (lex_match_id (lexer, "THRU"))
5551 struct substring high;
5553 if (lex_match_id (lexer, "HI"))
5554 high = (struct substring) { .string = NULL };
5557 if (!lex_force_string (lexer))
5562 high = parse_substring (lexer, dict);
5565 e = ctpo_cat_srange (s, high);
5568 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5572 lex_error (lexer, NULL);
5576 if (!lex_force_match (lexer, T_RBRACK))
5578 if (e.op == CTPO_CAT_STRING)
5579 ss_dealloc (&e.string);
5580 else if (e.op == CTPO_CAT_SRANGE)
5582 ss_dealloc (&e.srange[0]);
5583 ss_dealloc (&e.srange[1]);
5588 else if (lex_match (lexer, T_LPAREN))
5590 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5593 if (!lex_force_match (lexer, T_RPAREN))
5595 ctables_pcexpr_destroy (ep);
5602 lex_error (lexer, NULL);
5606 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5607 return xmemdup (&e, sizeof e);
5610 static struct ctables_pcexpr *
5611 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5612 struct lexer *lexer, int start_ofs)
5614 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5615 *e = (struct ctables_pcexpr) {
5618 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5623 static struct ctables_pcexpr *
5624 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5626 static const struct operator op = { T_EXP, CTPO_POW };
5628 const char *chain_warning =
5629 _("The exponentiation operator (`**') is left-associative: "
5630 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5631 "To disable this warning, insert parentheses.");
5633 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5634 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5635 ctables_pcexpr_parse_primary,
5638 /* Special case for situations like "-5**6", which must be parsed as
5641 int start_ofs = lex_ofs (lexer);
5642 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5643 *lhs = (struct ctables_pcexpr) {
5644 .op = CTPO_CONSTANT,
5645 .number = -lex_tokval (lexer),
5646 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5650 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5651 lexer, dict, &op, 1,
5652 ctables_pcexpr_parse_primary, chain_warning, lhs);
5656 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5659 /* Parses the unary minus level. */
5660 static struct ctables_pcexpr *
5661 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5663 int start_ofs = lex_ofs (lexer);
5664 if (!lex_match (lexer, T_DASH))
5665 return ctables_pcexpr_parse_exp (lexer, dict);
5667 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5671 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5674 /* Parses the multiplication and division level. */
5675 static struct ctables_pcexpr *
5676 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5678 static const struct operator ops[] =
5680 { T_ASTERISK, CTPO_MUL },
5681 { T_SLASH, CTPO_DIV },
5684 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5685 sizeof ops / sizeof *ops,
5686 ctables_pcexpr_parse_neg, NULL);
5689 /* Parses the addition and subtraction level. */
5690 static struct ctables_pcexpr *
5691 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5693 static const struct operator ops[] =
5695 { T_PLUS, CTPO_ADD },
5696 { T_DASH, CTPO_SUB },
5697 { T_NEG_NUM, CTPO_ADD },
5700 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5701 ops, sizeof ops / sizeof *ops,
5702 ctables_pcexpr_parse_mul, NULL);
5705 static struct ctables_postcompute *
5706 ctables_find_postcompute (struct ctables *ct, const char *name)
5708 struct ctables_postcompute *pc;
5709 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5710 utf8_hash_case_string (name, 0), &ct->postcomputes)
5711 if (!utf8_strcasecmp (pc->name, name))
5717 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5720 int pcompute_start = lex_ofs (lexer) - 1;
5722 if (!lex_match (lexer, T_AND))
5724 lex_error_expecting (lexer, "&");
5727 if (!lex_force_id (lexer))
5730 char *name = ss_xstrdup (lex_tokss (lexer));
5733 if (!lex_force_match (lexer, T_EQUALS)
5734 || !lex_force_match_id (lexer, "EXPR")
5735 || !lex_force_match (lexer, T_LPAREN))
5741 int expr_start = lex_ofs (lexer);
5742 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5743 int expr_end = lex_ofs (lexer) - 1;
5744 if (!expr || !lex_force_match (lexer, T_RPAREN))
5746 ctables_pcexpr_destroy (expr);
5750 int pcompute_end = lex_ofs (lexer) - 1;
5752 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5755 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5758 msg_at (SW, location, _("New definition of &%s will override the "
5759 "previous definition."),
5761 msg_at (SN, pc->location, _("This is the previous definition."));
5763 ctables_pcexpr_destroy (pc->expr);
5764 msg_location_destroy (pc->location);
5769 pc = xmalloc (sizeof *pc);
5770 *pc = (struct ctables_postcompute) { .name = name };
5771 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5772 utf8_hash_case_string (pc->name, 0));
5775 pc->location = location;
5777 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5782 ctables_parse_pproperties_format (struct lexer *lexer,
5783 struct ctables_summary_spec_set *sss)
5785 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5787 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5788 && !(lex_token (lexer) == T_ID
5789 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5790 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5791 lex_tokss (lexer)))))
5793 /* Parse function. */
5794 enum ctables_summary_function function;
5796 enum ctables_area_type area;
5797 if (!parse_ctables_summary_function (lexer, &function, &weighted, &area))
5800 /* Parse percentile. */
5801 double percentile = 0;
5802 if (function == CTSF_PTILE)
5804 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5806 percentile = lex_number (lexer);
5811 struct fmt_spec format;
5812 bool is_ctables_format;
5813 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5816 if (sss->n >= sss->allocated)
5817 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5818 sizeof *sss->specs);
5819 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5820 .function = function,
5821 .weighted = weighted,
5824 .percentile = percentile,
5826 .is_ctables_format = is_ctables_format,
5832 ctables_summary_spec_set_uninit (sss);
5837 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5839 struct ctables_postcompute **pcs = NULL;
5841 size_t allocated_pcs = 0;
5843 while (lex_match (lexer, T_AND))
5845 if (!lex_force_id (lexer))
5847 struct ctables_postcompute *pc
5848 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5851 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5856 if (n_pcs >= allocated_pcs)
5857 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5861 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5863 if (lex_match_id (lexer, "LABEL"))
5865 lex_match (lexer, T_EQUALS);
5866 if (!lex_force_string (lexer))
5869 for (size_t i = 0; i < n_pcs; i++)
5871 free (pcs[i]->label);
5872 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5877 else if (lex_match_id (lexer, "FORMAT"))
5879 lex_match (lexer, T_EQUALS);
5881 struct ctables_summary_spec_set sss;
5882 if (!ctables_parse_pproperties_format (lexer, &sss))
5885 for (size_t i = 0; i < n_pcs; i++)
5888 ctables_summary_spec_set_uninit (pcs[i]->specs);
5890 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5891 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5893 ctables_summary_spec_set_uninit (&sss);
5895 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5897 lex_match (lexer, T_EQUALS);
5898 bool hide_source_cats;
5899 if (!parse_bool (lexer, &hide_source_cats))
5901 for (size_t i = 0; i < n_pcs; i++)
5902 pcs[i]->hide_source_cats = hide_source_cats;
5906 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5919 put_strftime (struct string *out, time_t now, const char *format)
5921 const struct tm *tm = localtime (&now);
5923 strftime (value, sizeof value, format, tm);
5924 ds_put_cstr (out, value);
5928 skip_prefix (struct substring *s, struct substring prefix)
5930 if (ss_starts_with (*s, prefix))
5932 ss_advance (s, prefix.length);
5940 put_table_expression (struct string *out, struct lexer *lexer,
5941 struct dictionary *dict, int expr_start, int expr_end)
5944 for (int ofs = expr_start; ofs < expr_end; ofs++)
5946 const struct token *t = lex_ofs_token (lexer, ofs);
5947 if (t->type == T_LBRACK)
5949 else if (t->type == T_RBRACK && nest > 0)
5955 else if (t->type == T_ID)
5957 const struct variable *var
5958 = dict_lookup_var (dict, t->string.string);
5959 const char *label = var ? var_get_label (var) : NULL;
5960 ds_put_cstr (out, label ? label : t->string.string);
5964 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5965 ds_put_byte (out, ' ');
5967 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5968 ds_put_cstr (out, repr);
5971 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5972 ds_put_byte (out, ' ');
5978 put_title_text (struct string *out, struct substring in, time_t now,
5979 struct lexer *lexer, struct dictionary *dict,
5980 int expr_start, int expr_end)
5984 size_t chunk = ss_find_byte (in, ')');
5985 ds_put_substring (out, ss_head (in, chunk));
5986 ss_advance (&in, chunk);
5987 if (ss_is_empty (in))
5990 if (skip_prefix (&in, ss_cstr (")DATE")))
5991 put_strftime (out, now, "%x");
5992 else if (skip_prefix (&in, ss_cstr (")TIME")))
5993 put_strftime (out, now, "%X");
5994 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5995 put_table_expression (out, lexer, dict, expr_start, expr_end);
5998 ds_put_byte (out, ')');
5999 ss_advance (&in, 1);
6005 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6007 struct casereader *input = NULL;
6009 struct measure_guesser *mg = measure_guesser_create (ds);
6012 input = proc_open (ds);
6013 measure_guesser_run (mg, input);
6014 measure_guesser_destroy (mg);
6017 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6018 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6019 enum settings_value_show tvars = settings_get_show_variables ();
6020 for (size_t i = 0; i < n_vars; i++)
6021 vlabels[i] = (enum ctables_vlabel) tvars;
6023 struct pivot_table_look *look = pivot_table_look_unshare (
6024 pivot_table_look_ref (pivot_table_look_get_default ()));
6025 look->omit_empty = false;
6027 struct ctables *ct = xmalloc (sizeof *ct);
6028 *ct = (struct ctables) {
6029 .dict = dataset_dict (ds),
6031 .ctables_formats = FMT_SETTINGS_INIT,
6033 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6036 time_t now = time (NULL);
6041 const char *dot_string;
6042 const char *comma_string;
6044 static const struct ctf ctfs[4] = {
6045 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6046 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6047 { CTEF_PAREN, "-,(,),", "-.(.)." },
6048 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6050 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6051 for (size_t i = 0; i < 4; i++)
6053 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6054 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6055 fmt_number_style_from_string (s));
6058 if (!lex_force_match (lexer, T_SLASH))
6061 while (!lex_match_id (lexer, "TABLE"))
6063 if (lex_match_id (lexer, "FORMAT"))
6065 double widths[2] = { SYSMIS, SYSMIS };
6066 double units_per_inch = 72.0;
6068 while (lex_token (lexer) != T_SLASH)
6070 if (lex_match_id (lexer, "MINCOLWIDTH"))
6072 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6075 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6077 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6080 else if (lex_match_id (lexer, "UNITS"))
6082 lex_match (lexer, T_EQUALS);
6083 if (lex_match_id (lexer, "POINTS"))
6084 units_per_inch = 72.0;
6085 else if (lex_match_id (lexer, "INCHES"))
6086 units_per_inch = 1.0;
6087 else if (lex_match_id (lexer, "CM"))
6088 units_per_inch = 2.54;
6091 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6095 else if (lex_match_id (lexer, "EMPTY"))
6100 lex_match (lexer, T_EQUALS);
6101 if (lex_match_id (lexer, "ZERO"))
6103 /* Nothing to do. */
6105 else if (lex_match_id (lexer, "BLANK"))
6106 ct->zero = xstrdup ("");
6107 else if (lex_force_string (lexer))
6109 ct->zero = ss_xstrdup (lex_tokss (lexer));
6115 else if (lex_match_id (lexer, "MISSING"))
6117 lex_match (lexer, T_EQUALS);
6118 if (!lex_force_string (lexer))
6122 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6123 ? ss_xstrdup (lex_tokss (lexer))
6129 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6130 "UNITS", "EMPTY", "MISSING");
6135 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6136 && widths[0] > widths[1])
6138 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6142 for (size_t i = 0; i < 2; i++)
6143 if (widths[i] != SYSMIS)
6145 int *wr = ct->look->width_ranges[TABLE_HORZ];
6146 wr[i] = widths[i] / units_per_inch * 96.0;
6151 else if (lex_match_id (lexer, "VLABELS"))
6153 if (!lex_force_match_id (lexer, "VARIABLES"))
6155 lex_match (lexer, T_EQUALS);
6157 struct variable **vars;
6159 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6163 if (!lex_force_match_id (lexer, "DISPLAY"))
6168 lex_match (lexer, T_EQUALS);
6170 enum ctables_vlabel vlabel;
6171 if (lex_match_id (lexer, "DEFAULT"))
6172 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6173 else if (lex_match_id (lexer, "NAME"))
6175 else if (lex_match_id (lexer, "LABEL"))
6176 vlabel = CTVL_LABEL;
6177 else if (lex_match_id (lexer, "BOTH"))
6179 else if (lex_match_id (lexer, "NONE"))
6183 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6189 for (size_t i = 0; i < n_vars; i++)
6190 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6193 else if (lex_match_id (lexer, "MRSETS"))
6195 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6197 lex_match (lexer, T_EQUALS);
6198 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6201 else if (lex_match_id (lexer, "SMISSING"))
6203 if (lex_match_id (lexer, "VARIABLE"))
6204 ct->smissing_listwise = false;
6205 else if (lex_match_id (lexer, "LISTWISE"))
6206 ct->smissing_listwise = true;
6209 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6213 else if (lex_match_id (lexer, "PCOMPUTE"))
6215 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6218 else if (lex_match_id (lexer, "PPROPERTIES"))
6220 if (!ctables_parse_pproperties (lexer, ct))
6223 else if (lex_match_id (lexer, "WEIGHT"))
6225 if (!lex_force_match_id (lexer, "VARIABLE"))
6227 lex_match (lexer, T_EQUALS);
6228 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6232 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6234 if (lex_match_id (lexer, "COUNT"))
6236 lex_match (lexer, T_EQUALS);
6237 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6240 ct->hide_threshold = lex_integer (lexer);
6243 else if (ct->hide_threshold == 0)
6244 ct->hide_threshold = 5;
6248 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6249 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6250 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6254 if (!lex_force_match (lexer, T_SLASH))
6258 size_t allocated_tables = 0;
6261 if (ct->n_tables >= allocated_tables)
6262 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6263 sizeof *ct->tables);
6265 struct ctables_category *cat = xmalloc (sizeof *cat);
6266 *cat = (struct ctables_category) {
6268 .include_missing = false,
6269 .sort_ascending = true,
6272 struct ctables_categories *c = xmalloc (sizeof *c);
6273 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6274 *c = (struct ctables_categories) {
6281 struct ctables_categories **categories = xnmalloc (n_vars,
6282 sizeof *categories);
6283 for (size_t i = 0; i < n_vars; i++)
6286 struct ctables_table *t = xmalloc (sizeof *t);
6287 *t = (struct ctables_table) {
6289 .slabels_axis = PIVOT_AXIS_COLUMN,
6290 .slabels_visible = true,
6291 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6293 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6294 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6295 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6297 .clabels_from_axis = PIVOT_AXIS_LAYER,
6298 .clabels_to_axis = PIVOT_AXIS_LAYER,
6299 .categories = categories,
6300 .n_categories = n_vars,
6303 ct->tables[ct->n_tables++] = t;
6305 lex_match (lexer, T_EQUALS);
6306 int expr_start = lex_ofs (lexer);
6307 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6309 if (lex_match (lexer, T_BY))
6311 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6312 ct, t, PIVOT_AXIS_COLUMN))
6315 if (lex_match (lexer, T_BY))
6317 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6318 ct, t, PIVOT_AXIS_LAYER))
6322 int expr_end = lex_ofs (lexer);
6324 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6325 && !t->axes[PIVOT_AXIS_LAYER])
6327 lex_error (lexer, _("At least one variable must be specified."));
6331 const struct ctables_axis *scales[PIVOT_N_AXES];
6332 size_t n_scales = 0;
6333 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6335 scales[a] = find_scale (t->axes[a]);
6341 msg (SE, _("Scale variables may appear only on one axis."));
6342 if (scales[PIVOT_AXIS_ROW])
6343 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6344 _("This scale variable appears on the rows axis."));
6345 if (scales[PIVOT_AXIS_COLUMN])
6346 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6347 _("This scale variable appears on the columns axis."));
6348 if (scales[PIVOT_AXIS_LAYER])
6349 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6350 _("This scale variable appears on the layer axis."));
6354 const struct ctables_axis *summaries[PIVOT_N_AXES];
6355 size_t n_summaries = 0;
6356 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6358 summaries[a] = (scales[a]
6360 : find_categorical_summary_spec (t->axes[a]));
6364 if (n_summaries > 1)
6366 msg (SE, _("Summaries may appear only on one axis."));
6367 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6370 msg_at (SN, summaries[a]->loc,
6372 ? _("This variable on the rows axis has a summary.")
6373 : a == PIVOT_AXIS_COLUMN
6374 ? _("This variable on the columns axis has a summary.")
6375 : _("This variable on the layers axis has a summary."));
6377 msg_at (SN, summaries[a]->loc,
6378 _("This is a scale variable, so it always has a "
6379 "summary even if the syntax does not explicitly "
6384 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6385 if (n_summaries ? summaries[a] : t->axes[a])
6387 t->summary_axis = a;
6391 if (lex_token (lexer) == T_ENDCMD)
6393 if (!ctables_prepare_table (t))
6397 if (!lex_force_match (lexer, T_SLASH))
6400 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6402 if (lex_match_id (lexer, "SLABELS"))
6404 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6406 if (lex_match_id (lexer, "POSITION"))
6408 lex_match (lexer, T_EQUALS);
6409 if (lex_match_id (lexer, "COLUMN"))
6410 t->slabels_axis = PIVOT_AXIS_COLUMN;
6411 else if (lex_match_id (lexer, "ROW"))
6412 t->slabels_axis = PIVOT_AXIS_ROW;
6413 else if (lex_match_id (lexer, "LAYER"))
6414 t->slabels_axis = PIVOT_AXIS_LAYER;
6417 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6421 else if (lex_match_id (lexer, "VISIBLE"))
6423 lex_match (lexer, T_EQUALS);
6424 if (!parse_bool (lexer, &t->slabels_visible))
6429 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6434 else if (lex_match_id (lexer, "CLABELS"))
6436 if (lex_match_id (lexer, "AUTO"))
6438 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6439 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6441 else if (lex_match_id (lexer, "ROWLABELS"))
6443 lex_match (lexer, T_EQUALS);
6444 if (lex_match_id (lexer, "OPPOSITE"))
6445 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6446 else if (lex_match_id (lexer, "LAYER"))
6447 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6450 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6454 else if (lex_match_id (lexer, "COLLABELS"))
6456 lex_match (lexer, T_EQUALS);
6457 if (lex_match_id (lexer, "OPPOSITE"))
6458 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6459 else if (lex_match_id (lexer, "LAYER"))
6460 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6463 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6469 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6474 else if (lex_match_id (lexer, "CRITERIA"))
6476 if (!lex_force_match_id (lexer, "CILEVEL"))
6478 lex_match (lexer, T_EQUALS);
6480 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6482 t->cilevel = lex_number (lexer);
6485 else if (lex_match_id (lexer, "CATEGORIES"))
6487 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6491 else if (lex_match_id (lexer, "TITLES"))
6496 if (lex_match_id (lexer, "CAPTION"))
6497 textp = &t->caption;
6498 else if (lex_match_id (lexer, "CORNER"))
6500 else if (lex_match_id (lexer, "TITLE"))
6504 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6507 lex_match (lexer, T_EQUALS);
6509 struct string s = DS_EMPTY_INITIALIZER;
6510 while (lex_is_string (lexer))
6512 if (!ds_is_empty (&s))
6513 ds_put_byte (&s, ' ');
6514 put_title_text (&s, lex_tokss (lexer), now,
6515 lexer, dataset_dict (ds),
6516 expr_start, expr_end);
6520 *textp = ds_steal_cstr (&s);
6522 while (lex_token (lexer) != T_SLASH
6523 && lex_token (lexer) != T_ENDCMD);
6525 else if (lex_match_id (lexer, "SIGTEST"))
6529 t->chisq = xmalloc (sizeof *t->chisq);
6530 *t->chisq = (struct ctables_chisq) {
6532 .include_mrsets = true,
6533 .all_visible = true,
6539 if (lex_match_id (lexer, "TYPE"))
6541 lex_match (lexer, T_EQUALS);
6542 if (!lex_force_match_id (lexer, "CHISQUARE"))
6545 else if (lex_match_id (lexer, "ALPHA"))
6547 lex_match (lexer, T_EQUALS);
6548 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6550 t->chisq->alpha = lex_number (lexer);
6553 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6555 lex_match (lexer, T_EQUALS);
6556 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6559 else if (lex_match_id (lexer, "CATEGORIES"))
6561 lex_match (lexer, T_EQUALS);
6562 if (lex_match_id (lexer, "ALLVISIBLE"))
6563 t->chisq->all_visible = true;
6564 else if (lex_match_id (lexer, "SUBTOTALS"))
6565 t->chisq->all_visible = false;
6568 lex_error_expecting (lexer,
6569 "ALLVISIBLE", "SUBTOTALS");
6575 lex_error_expecting (lexer, "TYPE", "ALPHA",
6576 "INCLUDEMRSETS", "CATEGORIES");
6580 while (lex_token (lexer) != T_SLASH
6581 && lex_token (lexer) != T_ENDCMD);
6583 else if (lex_match_id (lexer, "COMPARETEST"))
6587 t->pairwise = xmalloc (sizeof *t->pairwise);
6588 *t->pairwise = (struct ctables_pairwise) {
6590 .alpha = { .05, .05 },
6591 .adjust = BONFERRONI,
6592 .include_mrsets = true,
6593 .meansvariance_allcats = true,
6594 .all_visible = true,
6603 if (lex_match_id (lexer, "TYPE"))
6605 lex_match (lexer, T_EQUALS);
6606 if (lex_match_id (lexer, "PROP"))
6607 t->pairwise->type = PROP;
6608 else if (lex_match_id (lexer, "MEAN"))
6609 t->pairwise->type = MEAN;
6612 lex_error_expecting (lexer, "PROP", "MEAN");
6616 else if (lex_match_id (lexer, "ALPHA"))
6618 lex_match (lexer, T_EQUALS);
6620 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6622 double a0 = lex_number (lexer);
6625 lex_match (lexer, T_COMMA);
6626 if (lex_is_number (lexer))
6628 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6630 double a1 = lex_number (lexer);
6633 t->pairwise->alpha[0] = MIN (a0, a1);
6634 t->pairwise->alpha[1] = MAX (a0, a1);
6637 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6639 else if (lex_match_id (lexer, "ADJUST"))
6641 lex_match (lexer, T_EQUALS);
6642 if (lex_match_id (lexer, "BONFERRONI"))
6643 t->pairwise->adjust = BONFERRONI;
6644 else if (lex_match_id (lexer, "BH"))
6645 t->pairwise->adjust = BH;
6646 else if (lex_match_id (lexer, "NONE"))
6647 t->pairwise->adjust = 0;
6650 lex_error_expecting (lexer, "BONFERRONI", "BH",
6655 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6657 lex_match (lexer, T_EQUALS);
6658 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6661 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6663 lex_match (lexer, T_EQUALS);
6664 if (lex_match_id (lexer, "ALLCATS"))
6665 t->pairwise->meansvariance_allcats = true;
6666 else if (lex_match_id (lexer, "TESTEDCATS"))
6667 t->pairwise->meansvariance_allcats = false;
6670 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6674 else if (lex_match_id (lexer, "CATEGORIES"))
6676 lex_match (lexer, T_EQUALS);
6677 if (lex_match_id (lexer, "ALLVISIBLE"))
6678 t->pairwise->all_visible = true;
6679 else if (lex_match_id (lexer, "SUBTOTALS"))
6680 t->pairwise->all_visible = false;
6683 lex_error_expecting (lexer, "ALLVISIBLE",
6688 else if (lex_match_id (lexer, "MERGE"))
6690 lex_match (lexer, T_EQUALS);
6691 if (!parse_bool (lexer, &t->pairwise->merge))
6694 else if (lex_match_id (lexer, "STYLE"))
6696 lex_match (lexer, T_EQUALS);
6697 if (lex_match_id (lexer, "APA"))
6698 t->pairwise->apa_style = true;
6699 else if (lex_match_id (lexer, "SIMPLE"))
6700 t->pairwise->apa_style = false;
6703 lex_error_expecting (lexer, "APA", "SIMPLE");
6707 else if (lex_match_id (lexer, "SHOWSIG"))
6709 lex_match (lexer, T_EQUALS);
6710 if (!parse_bool (lexer, &t->pairwise->show_sig))
6715 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6716 "INCLUDEMRSETS", "MEANSVARIANCE",
6717 "CATEGORIES", "MERGE", "STYLE",
6722 while (lex_token (lexer) != T_SLASH
6723 && lex_token (lexer) != T_ENDCMD);
6727 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6728 "CRITERIA", "CATEGORIES", "TITLES",
6729 "SIGTEST", "COMPARETEST");
6733 if (!lex_match (lexer, T_SLASH))
6737 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6739 t->clabels_from_axis = PIVOT_AXIS_ROW;
6740 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6742 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6746 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6747 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6748 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6750 if (!ctables_prepare_table (t))
6753 while (lex_token (lexer) != T_ENDCMD);
6756 input = proc_open (ds);
6757 bool ok = ctables_execute (ds, input, ct);
6758 ok = proc_commit (ds) && ok;
6760 ctables_destroy (ct);
6761 return ok ? CMD_SUCCESS : CMD_FAILURE;
6766 ctables_destroy (ct);