1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
66 enum ctables_function_type
68 /* A function that operates on data in a single cell. The function does
69 not have an unweighted version. */
72 /* A function that operates on data in a single cell. The function has an
73 unweighted version. */
76 /* A function that operates on an area of cells. The function has an
77 unweighted version. */
88 enum ctables_function_availability
90 CTFA_ALL, /* Any variables. */
91 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
92 //CTFA_MRSETS, /* Only multiple-response sets */
95 enum ctables_summary_function
97 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
98 #include "ctables.inc"
103 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
105 #include "ctables.inc"
109 struct ctables_function_info
111 struct substring basename;
112 enum ctables_function_type type;
113 enum ctables_format format;
114 enum ctables_function_availability availability;
116 bool may_be_unweighted;
119 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
120 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
122 .basename = SS_LITERAL_INITIALIZER (NAME), \
125 .availability = AVAILABILITY, \
126 .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA, \
127 .is_area = (TYPE) == CTFT_AREA \
129 #include "ctables.inc"
133 static bool ctables_summary_function_is_count (enum ctables_summary_function);
135 enum ctables_area_type
137 /* Within a section, where stacked variables divide one section from
140 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
141 parse_ctables_summary_function() parses correctly. */
142 CTAT_TABLE, /* All layers of a whole section. */
143 CTAT_LAYERROW, /* Row in one layer within a section. */
144 CTAT_LAYERCOL, /* Column in one layer within a section. */
145 CTAT_LAYER, /* One layer within a section. */
147 /* Within a subtable, where a subtable pairs an innermost row variable with
148 an innermost column variable within a single layer. */
149 CTAT_SUBTABLE, /* Whole subtable. */
150 CTAT_ROW, /* Row within a subtable. */
151 CTAT_COL, /* Column within a subtable. */
155 static const char *ctables_area_type_name[N_CTATS] = {
156 [CTAT_TABLE] = "TABLE",
157 [CTAT_LAYER] = "LAYER",
158 [CTAT_LAYERROW] = "LAYERROW",
159 [CTAT_LAYERCOL] = "LAYERCOL",
160 [CTAT_SUBTABLE] = "SUBTABLE",
167 struct hmap_node node;
169 const struct ctables_cell *example;
172 double d_valid; /* Dictionary weight. */
175 double e_valid; /* Effective weight */
178 double u_valid; /* Unweighted. */
181 struct ctables_sum *sums;
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The areas that contain this cell. */
205 struct ctables_area *areas[N_CTATS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
231 const struct dictionary *dict;
232 struct pivot_table_look *look;
234 /* CTABLES has a number of extra formats that we implement via custom
235 currency specifications on an alternate fmt_settings. */
236 #define CTEF_NEGPAREN FMT_CCA
237 #define CTEF_NEQUAL FMT_CCB
238 #define CTEF_PAREN FMT_CCC
239 #define CTEF_PCTPAREN FMT_CCD
240 struct fmt_settings ctables_formats;
242 /* If this is NULL, zeros are displayed using the normal print format.
243 Otherwise, this string is displayed. */
246 /* If this is NULL, missing values are displayed using the normal print
247 format. Otherwise, this string is displayed. */
250 /* Indexed by variable dictionary index. */
251 enum ctables_vlabel *vlabels;
253 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
255 bool mrsets_count_duplicates; /* MRSETS. */
256 bool smissing_listwise; /* SMISSING. */
257 struct variable *e_weight; /* WEIGHT. */
258 int hide_threshold; /* HIDESMALLCOUNTS. */
260 struct ctables_table **tables;
264 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
267 struct ctables_postcompute
269 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
270 char *name; /* Name, without leading &. */
272 struct msg_location *location; /* Location of definition. */
273 struct ctables_pcexpr *expr;
275 struct ctables_summary_spec_set *specs;
276 bool hide_source_cats;
279 struct ctables_pcexpr
289 enum ctables_postcompute_op
292 CTPO_CONSTANT, /* 5 */
293 CTPO_CAT_NUMBER, /* [5] */
294 CTPO_CAT_STRING, /* ["STRING"] */
295 CTPO_CAT_NRANGE, /* [LO THRU 5] */
296 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
297 CTPO_CAT_MISSING, /* MISSING */
298 CTPO_CAT_OTHERNM, /* OTHERNM */
299 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
300 CTPO_CAT_TOTAL, /* TOTAL */
314 /* CTPO_CAT_NUMBER. */
317 /* CTPO_CAT_STRING, in dictionary encoding. */
318 struct substring string;
320 /* CTPO_CAT_NRANGE. */
323 /* CTPO_CAT_SRANGE. */
324 struct substring srange[2];
326 /* CTPO_CAT_SUBTOTAL. */
327 size_t subtotal_index;
329 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
330 One element: CTPO_NEG. */
331 struct ctables_pcexpr *subs[2];
334 /* Source location. */
335 struct msg_location *location;
338 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
339 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
340 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
341 struct ctables_pcexpr *sub1);
343 struct ctables_summary_spec_set
345 struct ctables_summary_spec *specs;
349 /* The variable to which the summary specs are applied. */
350 struct variable *var;
352 /* Whether the variable to which the summary specs are applied is a scale
353 variable for the purpose of summarization.
355 (VALIDN and TOTALN act differently for summarizing scale and categorical
359 /* If any of these optional additional scale variables are missing, then
360 treat 'var' as if it's missing too. This is for implementing
361 SMISSING=LISTWISE. */
362 struct variable **listwise_vars;
363 size_t n_listwise_vars;
366 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
367 const struct ctables_summary_spec_set *);
368 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
370 /* A nested sequence of variables, e.g. a > b > c. */
373 struct variable **vars;
377 size_t *areas[N_CTATS];
378 size_t n_areas[N_CTATS];
381 struct ctables_summary_spec_set specs[N_CSVS];
384 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
387 struct ctables_nest *nests;
391 static void ctables_stack_uninit (struct ctables_stack *);
395 struct hmap_node node;
400 struct ctables_occurrence
402 struct hmap_node node;
406 struct ctables_section
409 struct ctables_table *table;
410 struct ctables_nest *nests[PIVOT_N_AXES];
413 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
414 struct hmap cells; /* Contains "struct ctables_cell"s. */
415 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
418 static void ctables_section_uninit (struct ctables_section *);
422 struct ctables *ctables;
423 struct ctables_axis *axes[PIVOT_N_AXES];
424 struct ctables_stack stacks[PIVOT_N_AXES];
425 struct ctables_section *sections;
427 enum pivot_axis_type summary_axis;
428 struct ctables_summary_spec_set summary_specs;
429 struct variable **sum_vars;
432 enum pivot_axis_type slabels_axis;
433 bool slabels_visible;
435 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
437 Most commonly, label_axis[a] == a, and in particular we always have
438 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
440 If ROWLABELS or COLLABELS is specified, then one of
441 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
442 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
444 If any category labels are moved, then 'clabels_example' is one of the
445 variables being moved (and it is otherwise NULL). All of the variables
446 being moved have the same width, value labels, and categories, so this
447 example variable can be used to find those out.
449 The remaining members in this group are relevant only if category labels
452 'clabels_values_map' holds a "struct ctables_value" for all the values
453 that appear in all of the variables in the moved categories. It is
454 accumulated as the data is read. Once the data is fully read, its
455 sorted values are put into 'clabels_values' and 'n_clabels_values'.
457 enum pivot_axis_type label_axis[PIVOT_N_AXES];
458 enum pivot_axis_type clabels_from_axis;
459 enum pivot_axis_type clabels_to_axis;
460 const struct variable *clabels_example;
461 struct hmap clabels_values_map;
462 struct ctables_value **clabels_values;
463 size_t n_clabels_values;
465 /* Indexed by variable dictionary index. */
466 struct ctables_categories **categories;
475 struct ctables_chisq *chisq;
476 struct ctables_pairwise *pairwise;
479 struct ctables_categories
482 struct ctables_category *cats;
487 struct ctables_category
489 enum ctables_category_type
491 /* Explicit category lists. */
494 CCT_NRANGE, /* Numerical range. */
495 CCT_SRANGE, /* String range. */
500 /* Totals and subtotals. */
504 /* Implicit category lists. */
509 /* For contributing to TOTALN. */
510 CCT_EXCLUDED_MISSING,
514 struct ctables_category *subtotal;
520 double number; /* CCT_NUMBER. */
521 struct substring string; /* CCT_STRING, in dictionary encoding. */
522 double nrange[2]; /* CCT_NRANGE. */
523 struct substring srange[2]; /* CCT_SRANGE. */
527 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
528 bool hide_subcategories; /* CCT_SUBTOTAL. */
531 /* CCT_POSTCOMPUTE. */
534 const struct ctables_postcompute *pc;
535 enum fmt_type parse_format;
538 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
541 bool include_missing;
545 enum ctables_summary_function sort_function;
547 enum ctables_area_type area;
548 struct variable *sort_var;
553 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
554 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
555 struct msg_location *location;
559 ctables_category_uninit (struct ctables_category *cat)
564 msg_location_destroy (cat->location);
571 case CCT_POSTCOMPUTE:
575 ss_dealloc (&cat->string);
579 ss_dealloc (&cat->srange[0]);
580 ss_dealloc (&cat->srange[1]);
585 free (cat->total_label);
593 case CCT_EXCLUDED_MISSING:
599 nullable_substring_equal (const struct substring *a,
600 const struct substring *b)
602 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
606 ctables_category_equal (const struct ctables_category *a,
607 const struct ctables_category *b)
609 if (a->type != b->type)
615 return a->number == b->number;
618 return ss_equals (a->string, b->string);
621 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
624 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
625 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
631 case CCT_POSTCOMPUTE:
632 return a->pc == b->pc;
636 return !strcmp (a->total_label, b->total_label);
641 return (a->include_missing == b->include_missing
642 && a->sort_ascending == b->sort_ascending
643 && a->sort_function == b->sort_function
644 && a->sort_var == b->sort_var
645 && a->percentile == b->percentile);
647 case CCT_EXCLUDED_MISSING:
655 ctables_categories_unref (struct ctables_categories *c)
660 assert (c->n_refs > 0);
664 for (size_t i = 0; i < c->n_cats; i++)
665 ctables_category_uninit (&c->cats[i]);
671 ctables_categories_equal (const struct ctables_categories *a,
672 const struct ctables_categories *b)
674 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
677 for (size_t i = 0; i < a->n_cats; i++)
678 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
684 /* Chi-square test (SIGTEST). */
692 /* Pairwise comparison test (COMPARETEST). */
693 struct ctables_pairwise
695 enum { PROP, MEAN } type;
698 bool meansvariance_allcats;
700 enum { BONFERRONI = 1, BH } adjust;
724 struct variable *var;
726 struct ctables_summary_spec_set specs[N_CSVS];
730 struct ctables_axis *subs[2];
733 struct msg_location *loc;
736 static void ctables_axis_destroy (struct ctables_axis *);
738 struct ctables_summary_spec
740 /* The calculation to be performed.
742 'function' is the function to calculate. 'weighted' specifies whether
743 to use weighted or unweighted data (for functions that do not support a
744 choice, it must be true). 'calc_area' is the area over which the
745 calculation takes place (for functions that target only an individual
746 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
747 percentile between 0 and 100 (for other functions it must be 0). */
748 enum ctables_summary_function function;
750 enum ctables_area_type calc_area;
751 double percentile; /* CTSF_PTILE only. */
753 /* How to display the result of the calculation.
755 'label' is a user-specified label, NULL if the user didn't specify
758 'user_area' is usually the same as 'calc_area', but when category labels
759 are rotated from one axis to another it swaps rows and columns.
761 'format' is the format for displaying the output. If
762 'is_ctables_format' is true, then 'format.type' is one of the special
763 CTEF_* formats instead of the standard ones. */
765 enum ctables_area_type user_area;
766 struct fmt_spec format;
767 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
774 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
775 const struct ctables_summary_spec *src)
778 dst->label = xstrdup_if_nonnull (src->label);
782 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
789 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
790 const struct ctables_summary_spec_set *src)
792 struct ctables_summary_spec *specs
793 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
794 for (size_t i = 0; i < src->n; i++)
795 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
797 *dst = (struct ctables_summary_spec_set) {
802 .is_scale = src->is_scale,
807 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
809 for (size_t i = 0; i < set->n; i++)
810 ctables_summary_spec_uninit (&set->specs[i]);
811 free (set->listwise_vars);
816 parse_col_width (struct lexer *lexer, const char *name, double *width)
818 lex_match (lexer, T_EQUALS);
819 if (lex_match_id (lexer, "DEFAULT"))
821 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
823 *width = lex_number (lexer);
833 parse_bool (struct lexer *lexer, bool *b)
835 if (lex_match_id (lexer, "NO"))
837 else if (lex_match_id (lexer, "YES"))
841 lex_error_expecting (lexer, "YES", "NO");
847 static enum ctables_function_availability
848 ctables_function_availability (enum ctables_summary_function f)
850 static enum ctables_function_availability availability[] = {
851 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
852 #include "ctables.inc"
856 return availability[f];
860 ctables_summary_function_is_count (enum ctables_summary_function f)
862 return f == CTSF_COUNT || f == CTSF_ECOUNT;
866 parse_ctables_summary_function (struct lexer *lexer,
867 enum ctables_summary_function *function,
869 enum ctables_area_type *area)
871 if (!lex_force_id (lexer))
874 struct substring name = lex_tokss (lexer);
875 *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'));
877 bool has_area = false;
879 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
880 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
885 if (ss_equals_case (name, ss_cstr ("PCT")))
887 /* Special case where .COUNT suffix is omitted. */
888 *function = CTSF_areaPCT_COUNT;
895 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
897 const struct ctables_function_info *cfi = &ctables_function_info[f];
898 if (ss_equals_case (cfi->basename, name))
901 if (!*weighted && !cfi->may_be_unweighted)
903 if (has_area != cfi->is_area)
911 lex_error (lexer, _("Expecting summary function name."));
916 ctables_axis_destroy (struct ctables_axis *axis)
924 for (size_t i = 0; i < N_CSVS; i++)
925 ctables_summary_spec_set_uninit (&axis->specs[i]);
930 ctables_axis_destroy (axis->subs[0]);
931 ctables_axis_destroy (axis->subs[1]);
934 msg_location_destroy (axis->loc);
938 static struct ctables_axis *
939 ctables_axis_new_nonterminal (enum ctables_axis_op op,
940 struct ctables_axis *sub0,
941 struct ctables_axis *sub1,
942 struct lexer *lexer, int start_ofs)
944 struct ctables_axis *axis = xmalloc (sizeof *axis);
945 *axis = (struct ctables_axis) {
947 .subs = { sub0, sub1 },
948 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
953 struct ctables_axis_parse_ctx
956 struct dictionary *dict;
958 struct ctables_table *t;
961 static struct fmt_spec
962 ctables_summary_default_format (enum ctables_summary_function function,
963 const struct variable *var)
965 static const enum ctables_format default_formats[] = {
966 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
967 #include "ctables.inc"
970 switch (default_formats[function])
973 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
976 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
979 return *var_get_print_format (var);
987 ctables_summary_label__ (const struct ctables_summary_spec *spec)
989 bool w = spec->weighted;
990 enum ctables_area_type a = spec->user_area;
991 switch (spec->function)
994 return w ? N_("Count") : N_("Unweighted Count");
997 return N_("Adjusted Count");
999 case CTSF_areaPCT_COUNT:
1002 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
1003 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
1004 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
1005 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
1006 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
1007 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
1008 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
1012 case CTSF_areaPCT_VALIDN:
1015 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
1016 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
1017 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
1018 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
1019 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
1020 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
1021 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
1025 case CTSF_areaPCT_TOTALN:
1028 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
1029 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
1030 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
1031 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
1032 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
1033 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
1034 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
1038 case CTSF_MAXIMUM: return N_("Maximum");
1039 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
1040 case CTSF_MEDIAN: return N_("Median");
1041 case CTSF_MINIMUM: return N_("Minimum");
1042 case CTSF_MISSING: return N_("Missing");
1043 case CTSF_MODE: return N_("Mode");
1044 case CTSF_PTILE: NOT_REACHED ();
1045 case CTSF_RANGE: return N_("Range");
1046 case CTSF_SEMEAN: return N_("Std Error of Mean");
1047 case CTSF_STDDEV: return N_("Std Deviation");
1048 case CTSF_SUM: return N_("Sum");
1049 case CTSF_TOTALN: return N_("Total N");
1050 case CTSF_ETOTALN: return N_("Adjusted Total N");
1051 case CTSF_VALIDN: return N_("Valid N");
1052 case CTSF_EVALIDN: return N_("Adjusted Valid N");
1053 case CTSF_VARIANCE: return N_("Variance");
1054 case CTSF_areaPCT_SUM:
1057 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
1058 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
1059 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
1060 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
1061 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
1062 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
1063 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
1070 /* Don't bother translating these: they are for developers only. */
1071 case CTAT_TABLE: return "Table ID";
1072 case CTAT_LAYER: return "Layer ID";
1073 case CTAT_LAYERROW: return "Layer Row ID";
1074 case CTAT_LAYERCOL: return "Layer Column ID";
1075 case CTAT_SUBTABLE: return "Subtable ID";
1076 case CTAT_ROW: return "Row ID";
1077 case CTAT_COL: return "Column ID";
1085 static struct pivot_value *
1086 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1090 if (spec->function == CTSF_PTILE)
1092 double p = spec->percentile;
1093 char *s = (spec->weighted
1094 ? xasprintf (_("Percentile %.2f"), p)
1095 : xasprintf (_("Unweighted Percentile %.2f"), p));
1096 return pivot_value_new_user_text_nocopy (s);
1099 return pivot_value_new_text (ctables_summary_label__ (spec));
1103 struct substring in = ss_cstr (spec->label);
1104 struct substring target = ss_cstr (")CILEVEL");
1106 struct string out = DS_EMPTY_INITIALIZER;
1109 size_t chunk = ss_find_substring (in, target);
1110 ds_put_substring (&out, ss_head (in, chunk));
1111 ss_advance (&in, chunk);
1113 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1115 ss_advance (&in, target.length);
1116 ds_put_format (&out, "%g", cilevel);
1122 ctables_summary_function_name (enum ctables_summary_function function,
1124 enum ctables_area_type area,
1125 char *buffer, size_t bufsize)
1127 const struct ctables_function_info *cfi = &ctables_function_info[function];
1128 snprintf (buffer, bufsize, "%s%s%s",
1129 weighted ? "" : "U",
1130 cfi->is_area ? ctables_area_type_name[area] : "",
1131 cfi->basename.string);
1136 add_summary_spec (struct ctables_axis *axis,
1137 enum ctables_summary_function function, bool weighted,
1138 enum ctables_area_type area, double percentile,
1139 const char *label, const struct fmt_spec *format,
1140 bool is_ctables_format, const struct msg_location *loc,
1141 enum ctables_summary_variant sv)
1143 if (axis->op == CTAO_VAR)
1145 char function_name[128];
1146 ctables_summary_function_name (function, weighted, area,
1147 function_name, sizeof function_name);
1148 const char *var_name = var_get_name (axis->var);
1149 switch (ctables_function_availability (function))
1153 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1154 "response sets."), function_name);
1155 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1161 if (!axis->scale && sv != CSV_TOTAL)
1164 _("Summary function %s applies only to scale variables."),
1166 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1176 struct ctables_summary_spec_set *set = &axis->specs[sv];
1177 if (set->n >= set->allocated)
1178 set->specs = x2nrealloc (set->specs, &set->allocated,
1179 sizeof *set->specs);
1181 struct ctables_summary_spec *dst = &set->specs[set->n++];
1182 *dst = (struct ctables_summary_spec) {
1183 .function = function,
1184 .weighted = weighted,
1187 .percentile = percentile,
1188 .label = xstrdup_if_nonnull (label),
1189 .format = (format ? *format
1190 : ctables_summary_default_format (function, axis->var)),
1191 .is_ctables_format = is_ctables_format,
1197 for (size_t i = 0; i < 2; i++)
1198 if (!add_summary_spec (axis->subs[i], function, weighted, area,
1199 percentile, label, format, is_ctables_format,
1206 static struct ctables_axis *ctables_axis_parse_stack (
1207 struct ctables_axis_parse_ctx *);
1210 static struct ctables_axis *
1211 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1213 if (lex_match (ctx->lexer, T_LPAREN))
1215 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1216 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1218 ctables_axis_destroy (sub);
1224 if (!lex_force_id (ctx->lexer))
1227 int start_ofs = lex_ofs (ctx->lexer);
1228 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1232 struct ctables_axis *axis = xmalloc (sizeof *axis);
1233 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1235 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1236 : lex_match_phrase (ctx->lexer, "[C]") ? false
1237 : var_get_measure (var) == MEASURE_SCALE);
1238 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1239 lex_ofs (ctx->lexer) - 1);
1240 if (axis->scale && var_is_alpha (var))
1242 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1244 var_get_name (var));
1245 ctables_axis_destroy (axis);
1253 has_digit (const char *s)
1255 return s[strcspn (s, "0123456789")] != '\0';
1259 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1260 bool *is_ctables_format)
1262 char type[FMT_TYPE_LEN_MAX + 1];
1263 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1266 if (!strcasecmp (type, "NEGPAREN"))
1267 format->type = CTEF_NEGPAREN;
1268 else if (!strcasecmp (type, "NEQUAL"))
1269 format->type = CTEF_NEQUAL;
1270 else if (!strcasecmp (type, "PAREN"))
1271 format->type = CTEF_PAREN;
1272 else if (!strcasecmp (type, "PCTPAREN"))
1273 format->type = CTEF_PCTPAREN;
1276 *is_ctables_format = false;
1277 return (parse_format_specifier (lexer, format)
1278 && fmt_check_output (format)
1279 && fmt_check_type_compat (format, VAL_NUMERIC));
1285 lex_next_error (lexer, -1, -1,
1286 _("Output format %s requires width 2 or greater."), type);
1289 else if (format->d > format->w - 1)
1291 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1292 "greater than decimals."), type);
1297 *is_ctables_format = true;
1302 static struct ctables_axis *
1303 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1305 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1306 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1309 enum ctables_summary_variant sv = CSV_CELL;
1312 int start_ofs = lex_ofs (ctx->lexer);
1314 /* Parse function. */
1315 enum ctables_summary_function function;
1317 enum ctables_area_type area;
1318 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted,
1322 /* Parse percentile. */
1323 double percentile = 0;
1324 if (function == CTSF_PTILE)
1326 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1328 percentile = lex_number (ctx->lexer);
1329 lex_get (ctx->lexer);
1334 if (lex_is_string (ctx->lexer))
1336 label = ss_xstrdup (lex_tokss (ctx->lexer));
1337 lex_get (ctx->lexer);
1341 struct fmt_spec format;
1342 const struct fmt_spec *formatp;
1343 bool is_ctables_format = false;
1344 if (lex_token (ctx->lexer) == T_ID
1345 && has_digit (lex_tokcstr (ctx->lexer)))
1347 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1348 &is_ctables_format))
1358 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1359 lex_ofs (ctx->lexer) - 1);
1360 add_summary_spec (sub, function, weighted, area, percentile, label,
1361 formatp, is_ctables_format, loc, sv);
1363 msg_location_destroy (loc);
1365 lex_match (ctx->lexer, T_COMMA);
1366 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1368 if (!lex_force_match (ctx->lexer, T_LBRACK))
1372 else if (lex_match (ctx->lexer, T_RBRACK))
1374 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1381 ctables_axis_destroy (sub);
1385 static const struct ctables_axis *
1386 find_scale (const struct ctables_axis *axis)
1390 else if (axis->op == CTAO_VAR)
1391 return axis->scale ? axis : NULL;
1394 for (size_t i = 0; i < 2; i++)
1396 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1404 static const struct ctables_axis *
1405 find_categorical_summary_spec (const struct ctables_axis *axis)
1409 else if (axis->op == CTAO_VAR)
1410 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1413 for (size_t i = 0; i < 2; i++)
1415 const struct ctables_axis *sum
1416 = find_categorical_summary_spec (axis->subs[i]);
1424 static struct ctables_axis *
1425 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1427 int start_ofs = lex_ofs (ctx->lexer);
1428 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1432 while (lex_match (ctx->lexer, T_GT))
1434 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1437 ctables_axis_destroy (lhs);
1441 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1442 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1444 const struct ctables_axis *outer_scale = find_scale (lhs);
1445 const struct ctables_axis *inner_scale = find_scale (rhs);
1446 if (outer_scale && inner_scale)
1448 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1449 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1450 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1451 ctables_axis_destroy (nest);
1455 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1458 msg_at (SE, nest->loc,
1459 _("Summaries may only be requested for categorical variables "
1460 "at the innermost nesting level."));
1461 msg_at (SN, outer_sum->loc,
1462 _("This outer categorical variable has a summary."));
1463 ctables_axis_destroy (nest);
1473 static struct ctables_axis *
1474 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1476 int start_ofs = lex_ofs (ctx->lexer);
1477 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1481 while (lex_match (ctx->lexer, T_PLUS))
1483 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1486 ctables_axis_destroy (lhs);
1490 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1491 ctx->lexer, start_ofs);
1498 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1499 struct ctables *ct, struct ctables_table *t,
1500 enum pivot_axis_type a)
1502 if (lex_token (lexer) == T_BY
1503 || lex_token (lexer) == T_SLASH
1504 || lex_token (lexer) == T_ENDCMD)
1507 struct ctables_axis_parse_ctx ctx = {
1513 t->axes[a] = ctables_axis_parse_stack (&ctx);
1514 return t->axes[a] != NULL;
1518 ctables_chisq_destroy (struct ctables_chisq *chisq)
1524 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1530 ctables_table_destroy (struct ctables_table *t)
1535 for (size_t i = 0; i < t->n_sections; i++)
1536 ctables_section_uninit (&t->sections[i]);
1539 for (size_t i = 0; i < t->n_categories; i++)
1540 ctables_categories_unref (t->categories[i]);
1541 free (t->categories);
1543 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1545 ctables_axis_destroy (t->axes[a]);
1546 ctables_stack_uninit (&t->stacks[a]);
1548 free (t->summary_specs.specs);
1550 struct ctables_value *ctv, *next_ctv;
1551 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1552 &t->clabels_values_map)
1554 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1555 hmap_delete (&t->clabels_values_map, &ctv->node);
1558 hmap_destroy (&t->clabels_values_map);
1559 free (t->clabels_values);
1565 ctables_chisq_destroy (t->chisq);
1566 ctables_pairwise_destroy (t->pairwise);
1571 ctables_destroy (struct ctables *ct)
1576 struct ctables_postcompute *pc, *next_pc;
1577 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1581 msg_location_destroy (pc->location);
1582 ctables_pcexpr_destroy (pc->expr);
1586 ctables_summary_spec_set_uninit (pc->specs);
1589 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1592 hmap_destroy (&ct->postcomputes);
1594 fmt_settings_uninit (&ct->ctables_formats);
1595 pivot_table_look_unref (ct->look);
1599 for (size_t i = 0; i < ct->n_tables; i++)
1600 ctables_table_destroy (ct->tables[i]);
1605 static struct ctables_category
1606 cct_nrange (double low, double high)
1608 return (struct ctables_category) {
1610 .nrange = { low, high }
1614 static struct ctables_category
1615 cct_srange (struct substring low, struct substring high)
1617 return (struct ctables_category) {
1619 .srange = { low, high }
1624 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1625 struct ctables_category *cat)
1628 if (lex_match (lexer, T_EQUALS))
1630 if (!lex_force_string (lexer))
1633 total_label = ss_xstrdup (lex_tokss (lexer));
1637 total_label = xstrdup (_("Subtotal"));
1639 *cat = (struct ctables_category) {
1640 .type = CCT_SUBTOTAL,
1641 .hide_subcategories = hide_subcategories,
1642 .total_label = total_label
1647 static struct substring
1648 parse_substring (struct lexer *lexer, struct dictionary *dict)
1650 struct substring s = recode_substring_pool (
1651 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1652 ss_rtrim (&s, ss_cstr (" "));
1658 ctables_table_parse_explicit_category (struct lexer *lexer,
1659 struct dictionary *dict,
1661 struct ctables_category *cat)
1663 if (lex_match_id (lexer, "OTHERNM"))
1664 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1665 else if (lex_match_id (lexer, "MISSING"))
1666 *cat = (struct ctables_category) { .type = CCT_MISSING };
1667 else if (lex_match_id (lexer, "SUBTOTAL"))
1668 return ctables_table_parse_subtotal (lexer, false, cat);
1669 else if (lex_match_id (lexer, "HSUBTOTAL"))
1670 return ctables_table_parse_subtotal (lexer, true, cat);
1671 else if (lex_match_id (lexer, "LO"))
1673 if (!lex_force_match_id (lexer, "THRU"))
1675 if (lex_is_string (lexer))
1677 struct substring sr0 = { .string = NULL };
1678 struct substring sr1 = parse_substring (lexer, dict);
1679 *cat = cct_srange (sr0, sr1);
1681 else if (lex_force_num (lexer))
1683 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1689 else if (lex_is_number (lexer))
1691 double number = lex_number (lexer);
1693 if (lex_match_id (lexer, "THRU"))
1695 if (lex_match_id (lexer, "HI"))
1696 *cat = cct_nrange (number, DBL_MAX);
1699 if (!lex_force_num (lexer))
1701 *cat = cct_nrange (number, lex_number (lexer));
1706 *cat = (struct ctables_category) {
1711 else if (lex_is_string (lexer))
1713 struct substring s = parse_substring (lexer, dict);
1714 if (lex_match_id (lexer, "THRU"))
1716 if (lex_match_id (lexer, "HI"))
1718 struct substring sr1 = { .string = NULL };
1719 *cat = cct_srange (s, sr1);
1723 if (!lex_force_string (lexer))
1728 struct substring sr1 = parse_substring (lexer, dict);
1729 *cat = cct_srange (s, sr1);
1733 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1735 else if (lex_match (lexer, T_AND))
1737 if (!lex_force_id (lexer))
1739 struct ctables_postcompute *pc = ctables_find_postcompute (
1740 ct, lex_tokcstr (lexer));
1743 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1744 msg_at (SE, loc, _("Unknown postcompute &%s."),
1745 lex_tokcstr (lexer));
1746 msg_location_destroy (loc);
1751 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1755 lex_error (lexer, NULL);
1763 parse_category_string (struct msg_location *location,
1764 struct substring s, const struct dictionary *dict,
1765 enum fmt_type format, double *n)
1768 char *error = data_in (s, dict_get_encoding (dict), format,
1769 settings_get_fmt_settings (), &v, 0, NULL);
1772 msg_at (SE, location,
1773 _("Failed to parse category specification as format %s: %s."),
1774 fmt_name (format), error);
1783 static struct ctables_category *
1784 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1785 const struct ctables_pcexpr *e)
1787 struct ctables_category *best = NULL;
1788 size_t n_subtotals = 0;
1789 for (size_t i = 0; i < cats->n_cats; i++)
1791 struct ctables_category *cat = &cats->cats[i];
1794 case CTPO_CAT_NUMBER:
1795 if (cat->type == CCT_NUMBER && cat->number == e->number)
1799 case CTPO_CAT_STRING:
1800 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1804 case CTPO_CAT_NRANGE:
1805 if (cat->type == CCT_NRANGE
1806 && cat->nrange[0] == e->nrange[0]
1807 && cat->nrange[1] == e->nrange[1])
1811 case CTPO_CAT_SRANGE:
1812 if (cat->type == CCT_SRANGE
1813 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1814 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1818 case CTPO_CAT_MISSING:
1819 if (cat->type == CCT_MISSING)
1823 case CTPO_CAT_OTHERNM:
1824 if (cat->type == CCT_OTHERNM)
1828 case CTPO_CAT_SUBTOTAL:
1829 if (cat->type == CCT_SUBTOTAL)
1832 if (e->subtotal_index == n_subtotals)
1834 else if (e->subtotal_index == 0)
1839 case CTPO_CAT_TOTAL:
1840 if (cat->type == CCT_TOTAL)
1854 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1859 static struct ctables_category *
1860 ctables_find_category_for_postcompute (const struct dictionary *dict,
1861 const struct ctables_categories *cats,
1862 enum fmt_type parse_format,
1863 const struct ctables_pcexpr *e)
1865 if (parse_format != FMT_F)
1867 if (e->op == CTPO_CAT_STRING)
1870 if (!parse_category_string (e->location, e->string, dict,
1871 parse_format, &number))
1874 struct ctables_pcexpr e2 = {
1875 .op = CTPO_CAT_NUMBER,
1877 .location = e->location,
1879 return ctables_find_category_for_postcompute__ (cats, &e2);
1881 else if (e->op == CTPO_CAT_SRANGE)
1884 if (!e->srange[0].string)
1885 nrange[0] = -DBL_MAX;
1886 else if (!parse_category_string (e->location, e->srange[0], dict,
1887 parse_format, &nrange[0]))
1890 if (!e->srange[1].string)
1891 nrange[1] = DBL_MAX;
1892 else if (!parse_category_string (e->location, e->srange[1], dict,
1893 parse_format, &nrange[1]))
1896 struct ctables_pcexpr e2 = {
1897 .op = CTPO_CAT_NRANGE,
1898 .nrange = { nrange[0], nrange[1] },
1899 .location = e->location,
1901 return ctables_find_category_for_postcompute__ (cats, &e2);
1904 return ctables_find_category_for_postcompute__ (cats, e);
1908 ctables_recursive_check_postcompute (struct dictionary *dict,
1909 const struct ctables_pcexpr *e,
1910 struct ctables_category *pc_cat,
1911 const struct ctables_categories *cats,
1912 const struct msg_location *cats_location)
1916 case CTPO_CAT_NUMBER:
1917 case CTPO_CAT_STRING:
1918 case CTPO_CAT_NRANGE:
1919 case CTPO_CAT_SRANGE:
1920 case CTPO_CAT_MISSING:
1921 case CTPO_CAT_OTHERNM:
1922 case CTPO_CAT_SUBTOTAL:
1923 case CTPO_CAT_TOTAL:
1925 struct ctables_category *cat = ctables_find_category_for_postcompute (
1926 dict, cats, pc_cat->parse_format, e);
1929 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1931 size_t n_subtotals = 0;
1932 for (size_t i = 0; i < cats->n_cats; i++)
1933 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1934 if (n_subtotals > 1)
1936 msg_at (SE, cats_location,
1937 ngettext ("These categories include %zu instance "
1938 "of SUBTOTAL or HSUBTOTAL, so references "
1939 "from computed categories must refer to "
1940 "subtotals by position, "
1941 "e.g. SUBTOTAL[1].",
1942 "These categories include %zu instances "
1943 "of SUBTOTAL or HSUBTOTAL, so references "
1944 "from computed categories must refer to "
1945 "subtotals by position, "
1946 "e.g. SUBTOTAL[1].",
1949 msg_at (SN, e->location,
1950 _("This is the reference that lacks a position."));
1955 msg_at (SE, pc_cat->location,
1956 _("Computed category &%s references a category not included "
1957 "in the category list."),
1959 msg_at (SN, e->location, _("This is the missing category."));
1960 if (e->op == CTPO_CAT_SUBTOTAL)
1961 msg_at (SN, cats_location,
1962 _("To fix the problem, add subtotals to the "
1963 "list of categories here."));
1964 else if (e->op == CTPO_CAT_TOTAL)
1965 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1966 "CATEGORIES specification."));
1968 msg_at (SN, cats_location,
1969 _("To fix the problem, add the missing category to the "
1970 "list of categories here."));
1973 if (pc_cat->pc->hide_source_cats)
1987 for (size_t i = 0; i < 2; i++)
1988 if (e->subs[i] && !ctables_recursive_check_postcompute (
1989 dict, e->subs[i], pc_cat, cats, cats_location))
1998 all_strings (struct variable **vars, size_t n_vars,
1999 const struct ctables_category *cat)
2001 for (size_t j = 0; j < n_vars; j++)
2002 if (var_is_numeric (vars[j]))
2004 msg_at (SE, cat->location,
2005 _("This category specification may be applied only to string "
2006 "variables, but this subcommand tries to apply it to "
2007 "numeric variable %s."),
2008 var_get_name (vars[j]));
2015 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2016 struct ctables *ct, struct ctables_table *t)
2018 if (!lex_match_id (lexer, "VARIABLES"))
2020 lex_match (lexer, T_EQUALS);
2022 struct variable **vars;
2024 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2027 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2028 for (size_t i = 1; i < n_vars; i++)
2030 const struct fmt_spec *f = var_get_print_format (vars[i]);
2031 if (f->type != common_format->type)
2033 common_format = NULL;
2039 && (fmt_get_category (common_format->type)
2040 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2042 struct ctables_categories *c = xmalloc (sizeof *c);
2043 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2044 for (size_t i = 0; i < n_vars; i++)
2046 struct ctables_categories **cp
2047 = &t->categories[var_get_dict_index (vars[i])];
2048 ctables_categories_unref (*cp);
2052 size_t allocated_cats = 0;
2053 int cats_start_ofs = -1;
2054 int cats_end_ofs = -1;
2055 if (lex_match (lexer, T_LBRACK))
2057 cats_start_ofs = lex_ofs (lexer);
2060 if (c->n_cats >= allocated_cats)
2061 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2063 int start_ofs = lex_ofs (lexer);
2064 struct ctables_category *cat = &c->cats[c->n_cats];
2065 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2067 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2070 lex_match (lexer, T_COMMA);
2072 while (!lex_match (lexer, T_RBRACK));
2073 cats_end_ofs = lex_ofs (lexer) - 1;
2076 struct ctables_category cat = {
2078 .include_missing = false,
2079 .sort_ascending = true,
2081 bool show_totals = false;
2082 char *total_label = NULL;
2083 bool totals_before = false;
2084 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2086 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2088 lex_match (lexer, T_EQUALS);
2089 if (lex_match_id (lexer, "A"))
2090 cat.sort_ascending = true;
2091 else if (lex_match_id (lexer, "D"))
2092 cat.sort_ascending = false;
2095 lex_error_expecting (lexer, "A", "D");
2099 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2101 lex_match (lexer, T_EQUALS);
2102 if (lex_match_id (lexer, "VALUE"))
2103 cat.type = CCT_VALUE;
2104 else if (lex_match_id (lexer, "LABEL"))
2105 cat.type = CCT_LABEL;
2108 cat.type = CCT_FUNCTION;
2109 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2110 &cat.weighted, &cat.area))
2113 if (lex_match (lexer, T_LPAREN))
2115 cat.sort_var = parse_variable (lexer, dict);
2119 if (cat.sort_function == CTSF_PTILE)
2121 lex_match (lexer, T_COMMA);
2122 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2124 cat.percentile = lex_number (lexer);
2128 if (!lex_force_match (lexer, T_RPAREN))
2131 else if (ctables_function_availability (cat.sort_function)
2134 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2139 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2141 lex_match (lexer, T_EQUALS);
2142 if (lex_match_id (lexer, "INCLUDE"))
2143 cat.include_missing = true;
2144 else if (lex_match_id (lexer, "EXCLUDE"))
2145 cat.include_missing = false;
2148 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2152 else if (lex_match_id (lexer, "TOTAL"))
2154 lex_match (lexer, T_EQUALS);
2155 if (!parse_bool (lexer, &show_totals))
2158 else if (lex_match_id (lexer, "LABEL"))
2160 lex_match (lexer, T_EQUALS);
2161 if (!lex_force_string (lexer))
2164 total_label = ss_xstrdup (lex_tokss (lexer));
2167 else if (lex_match_id (lexer, "POSITION"))
2169 lex_match (lexer, T_EQUALS);
2170 if (lex_match_id (lexer, "BEFORE"))
2171 totals_before = true;
2172 else if (lex_match_id (lexer, "AFTER"))
2173 totals_before = false;
2176 lex_error_expecting (lexer, "BEFORE", "AFTER");
2180 else if (lex_match_id (lexer, "EMPTY"))
2182 lex_match (lexer, T_EQUALS);
2183 if (lex_match_id (lexer, "INCLUDE"))
2184 c->show_empty = true;
2185 else if (lex_match_id (lexer, "EXCLUDE"))
2186 c->show_empty = false;
2189 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2196 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2197 "TOTAL", "LABEL", "POSITION", "EMPTY");
2199 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2206 if (c->n_cats >= allocated_cats)
2207 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2208 c->cats[c->n_cats++] = cat;
2213 if (c->n_cats >= allocated_cats)
2214 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2216 struct ctables_category *totals;
2219 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2220 totals = &c->cats[0];
2223 totals = &c->cats[c->n_cats];
2226 *totals = (struct ctables_category) {
2228 .total_label = total_label ? total_label : xstrdup (_("Total")),
2232 struct ctables_category *subtotal = NULL;
2233 for (size_t i = totals_before ? 0 : c->n_cats;
2234 totals_before ? i < c->n_cats : i-- > 0;
2235 totals_before ? i++ : 0)
2237 struct ctables_category *cat = &c->cats[i];
2246 cat->subtotal = subtotal;
2249 case CCT_POSTCOMPUTE:
2260 case CCT_EXCLUDED_MISSING:
2265 if (cats_start_ofs != -1)
2267 for (size_t i = 0; i < c->n_cats; i++)
2269 struct ctables_category *cat = &c->cats[i];
2272 case CCT_POSTCOMPUTE:
2273 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2274 struct msg_location *cats_location
2275 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2276 bool ok = ctables_recursive_check_postcompute (
2277 dict, cat->pc->expr, cat, c, cats_location);
2278 msg_location_destroy (cats_location);
2285 for (size_t j = 0; j < n_vars; j++)
2286 if (var_is_alpha (vars[j]))
2288 msg_at (SE, cat->location,
2289 _("This category specification may be applied "
2290 "only to numeric variables, but this "
2291 "subcommand tries to apply it to string "
2293 var_get_name (vars[j]));
2302 if (!parse_category_string (cat->location, cat->string, dict,
2303 common_format->type, &n))
2306 ss_dealloc (&cat->string);
2308 cat->type = CCT_NUMBER;
2311 else if (!all_strings (vars, n_vars, cat))
2320 if (!cat->srange[0].string)
2322 else if (!parse_category_string (cat->location,
2323 cat->srange[0], dict,
2324 common_format->type, &n[0]))
2327 if (!cat->srange[1].string)
2329 else if (!parse_category_string (cat->location,
2330 cat->srange[1], dict,
2331 common_format->type, &n[1]))
2334 ss_dealloc (&cat->srange[0]);
2335 ss_dealloc (&cat->srange[1]);
2337 cat->type = CCT_NRANGE;
2338 cat->nrange[0] = n[0];
2339 cat->nrange[1] = n[1];
2341 else if (!all_strings (vars, n_vars, cat))
2352 case CCT_EXCLUDED_MISSING:
2367 ctables_nest_uninit (struct ctables_nest *nest)
2370 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2371 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2372 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2373 free (nest->areas[at]);
2377 ctables_stack_uninit (struct ctables_stack *stack)
2381 for (size_t i = 0; i < stack->n; i++)
2382 ctables_nest_uninit (&stack->nests[i]);
2383 free (stack->nests);
2387 static struct ctables_stack
2388 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2395 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2396 for (size_t i = 0; i < s0.n; i++)
2397 for (size_t j = 0; j < s1.n; j++)
2399 const struct ctables_nest *a = &s0.nests[i];
2400 const struct ctables_nest *b = &s1.nests[j];
2402 size_t allocate = a->n + b->n;
2403 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2405 for (size_t k = 0; k < a->n; k++)
2406 vars[n++] = a->vars[k];
2407 for (size_t k = 0; k < b->n; k++)
2408 vars[n++] = b->vars[k];
2409 assert (n == allocate);
2411 const struct ctables_nest *summary_src;
2412 if (!a->specs[CSV_CELL].var)
2414 else if (!b->specs[CSV_CELL].var)
2419 struct ctables_nest *new = &stack.nests[stack.n++];
2420 *new = (struct ctables_nest) {
2422 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2423 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2425 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2426 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2430 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2431 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2433 ctables_stack_uninit (&s0);
2434 ctables_stack_uninit (&s1);
2438 static struct ctables_stack
2439 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2441 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2442 for (size_t i = 0; i < s0.n; i++)
2443 stack.nests[stack.n++] = s0.nests[i];
2444 for (size_t i = 0; i < s1.n; i++)
2446 stack.nests[stack.n] = s1.nests[i];
2447 stack.nests[stack.n].group_head += s0.n;
2450 assert (stack.n == s0.n + s1.n);
2456 static struct ctables_stack
2457 var_fts (const struct ctables_axis *a)
2459 struct variable **vars = xmalloc (sizeof *vars);
2462 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2463 struct ctables_nest *nest = xmalloc (sizeof *nest);
2464 *nest = (struct ctables_nest) {
2467 .scale_idx = a->scale ? 0 : SIZE_MAX,
2468 .summary_idx = is_summary ? 0 : SIZE_MAX,
2471 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2473 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2474 nest->specs[sv].var = a->var;
2475 nest->specs[sv].is_scale = a->scale;
2477 return (struct ctables_stack) { .nests = nest, .n = 1 };
2480 static struct ctables_stack
2481 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2484 return (struct ctables_stack) { .n = 0 };
2492 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2493 enumerate_fts (axis_type, a->subs[1]));
2496 /* This should consider any of the scale variables found in the result to
2497 be linked to each other listwise for SMISSING=LISTWISE. */
2498 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2499 enumerate_fts (axis_type, a->subs[1]));
2505 union ctables_summary
2507 /* COUNT, VALIDN, TOTALN. */
2510 /* MINIMUM, MAXIMUM, RANGE. */
2517 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2518 struct moments1 *moments;
2520 /* MEDIAN, MODE, PTILE. */
2523 struct casewriter *writer;
2528 /* XXX multiple response */
2532 ctables_summary_init (union ctables_summary *s,
2533 const struct ctables_summary_spec *ss)
2535 switch (ss->function)
2539 case CTSF_areaPCT_COUNT:
2540 case CTSF_areaPCT_VALIDN:
2541 case CTSF_areaPCT_TOTALN:
2556 s->min = s->max = SYSMIS;
2561 case CTSF_areaPCT_SUM:
2562 s->moments = moments1_create (MOMENT_MEAN);
2568 s->moments = moments1_create (MOMENT_VARIANCE);
2575 struct caseproto *proto = caseproto_create ();
2576 proto = caseproto_add_width (proto, 0);
2577 proto = caseproto_add_width (proto, 0);
2579 struct subcase ordering;
2580 subcase_init (&ordering, 0, 0, SC_ASCEND);
2581 s->writer = sort_create_writer (&ordering, proto);
2582 subcase_uninit (&ordering);
2583 caseproto_unref (proto);
2593 ctables_summary_uninit (union ctables_summary *s,
2594 const struct ctables_summary_spec *ss)
2596 switch (ss->function)
2600 case CTSF_areaPCT_COUNT:
2601 case CTSF_areaPCT_VALIDN:
2602 case CTSF_areaPCT_TOTALN:
2623 case CTSF_areaPCT_SUM:
2624 moments1_destroy (s->moments);
2630 casewriter_destroy (s->writer);
2636 ctables_summary_add (union ctables_summary *s,
2637 const struct ctables_summary_spec *ss,
2638 const struct variable *var, const union value *value,
2639 bool is_scale, bool is_scale_missing,
2640 bool is_missing, bool is_included,
2641 double d_weight, double e_weight)
2643 /* To determine whether a case is included in a given table for a particular
2644 kind of summary, consider the following charts for each variable in the
2645 table. Only if "yes" appears for every variable for the summary is the
2648 Categorical variables: VALIDN COUNT TOTALN
2649 Valid values in included categories yes yes yes
2650 Missing values in included categories --- yes yes
2651 Missing values in excluded categories --- --- yes
2652 Valid values in excluded categories --- --- ---
2654 Scale variables: VALIDN COUNT TOTALN
2655 Valid value yes yes yes
2656 Missing value --- yes yes
2658 Missing values include both user- and system-missing. (The system-missing
2659 value is always in an excluded category.)
2661 switch (ss->function)
2664 s->count += ss->weighted ? d_weight : 1.0;
2667 case CTSF_areaPCT_TOTALN:
2668 s->count += ss->weighted ? e_weight : 1.0;
2672 if (is_scale || is_included)
2673 s->count += ss->weighted ? d_weight : 1.0;
2676 case CTSF_areaPCT_COUNT:
2677 if (is_scale || is_included)
2678 s->count += ss->weighted ? e_weight : 1.0;
2685 s->count += ss->weighted ? d_weight : 1.0;
2688 case CTSF_areaPCT_VALIDN:
2692 s->count += ss->weighted ? e_weight : 1.0;
2702 s->count += ss->weighted ? e_weight : 1.0;
2706 if (is_scale || is_included)
2707 s->count += e_weight;
2714 s->count += e_weight;
2718 s->count += e_weight;
2724 if (!is_scale_missing)
2726 assert (!var_is_alpha (var)); /* XXX? */
2727 if (s->min == SYSMIS || value->f < s->min)
2729 if (s->max == SYSMIS || value->f > s->max)
2739 if (!is_scale_missing)
2740 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2743 case CTSF_areaPCT_SUM:
2744 if (!is_missing && !is_scale_missing)
2745 moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
2751 if (!is_scale_missing)
2753 double w = ss->weighted ? e_weight : 1.0;
2756 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2757 *case_num_rw_idx (c, 0) = value->f;
2758 *case_num_rw_idx (c, 1) = w;
2759 casewriter_write (s->writer, c);
2766 ctables_summary_value (const struct ctables_cell *cell,
2767 union ctables_summary *s,
2768 const struct ctables_summary_spec *ss)
2770 switch (ss->function)
2777 return cell->areas[ss->calc_area]->sequence;
2779 case CTSF_areaPCT_COUNT:
2781 const struct ctables_area *a = cell->areas[ss->calc_area];
2782 double a_count = ss->weighted ? a->e_count : a->u_count;
2783 return a_count ? s->count / a_count * 100 : SYSMIS;
2786 case CTSF_areaPCT_VALIDN:
2788 const struct ctables_area *a = cell->areas[ss->calc_area];
2789 double a_valid = ss->weighted ? a->e_valid : a->u_valid;
2790 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2793 case CTSF_areaPCT_TOTALN:
2795 const struct ctables_area *a = cell->areas[ss->calc_area];
2796 double a_total = ss->weighted ? a->e_total : a->u_total;
2797 return a_total ? s->count / a_total * 100 : SYSMIS;
2814 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2819 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2825 double weight, variance;
2826 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2827 return calc_semean (variance, weight);
2833 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2834 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2839 double weight, mean;
2840 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2841 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2847 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2851 case CTSF_areaPCT_SUM:
2853 double weight, mean;
2854 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2855 if (weight == SYSMIS || mean == SYSMIS)
2858 const struct ctables_area *a = cell->areas[ss->calc_area];
2859 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2860 double denom = ss->weighted ? sum->e_sum : sum->u_sum;
2861 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2868 struct casereader *reader = casewriter_make_reader (s->writer);
2871 struct percentile *ptile = percentile_create (
2872 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2873 struct order_stats *os = &ptile->parent;
2874 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2875 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2876 statistic_destroy (&ptile->parent.parent);
2883 struct casereader *reader = casewriter_make_reader (s->writer);
2886 struct mode *mode = mode_create ();
2887 struct order_stats *os = &mode->parent;
2888 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2889 s->ovalue = mode->mode;
2890 statistic_destroy (&mode->parent.parent);
2898 struct ctables_cell_sort_aux
2900 const struct ctables_nest *nest;
2901 enum pivot_axis_type a;
2905 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2907 const struct ctables_cell_sort_aux *aux = aux_;
2908 struct ctables_cell *const *ap = a_;
2909 struct ctables_cell *const *bp = b_;
2910 const struct ctables_cell *a = *ap;
2911 const struct ctables_cell *b = *bp;
2913 const struct ctables_nest *nest = aux->nest;
2914 for (size_t i = 0; i < nest->n; i++)
2915 if (i != nest->scale_idx)
2917 const struct variable *var = nest->vars[i];
2918 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2919 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2920 if (a_cv->category != b_cv->category)
2921 return a_cv->category > b_cv->category ? 1 : -1;
2923 const union value *a_val = &a_cv->value;
2924 const union value *b_val = &b_cv->value;
2925 switch (a_cv->category->type)
2931 case CCT_POSTCOMPUTE:
2932 case CCT_EXCLUDED_MISSING:
2933 /* Must be equal. */
2941 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2949 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2951 return a_cv->category->sort_ascending ? cmp : -cmp;
2957 const char *a_label = var_lookup_value_label (var, a_val);
2958 const char *b_label = var_lookup_value_label (var, b_val);
2964 cmp = strcmp (a_label, b_label);
2970 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2973 return a_cv->category->sort_ascending ? cmp : -cmp;
2985 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
2986 const void *aux UNUSED)
2988 struct ctables_cell *const *ap = a_;
2989 struct ctables_cell *const *bp = b_;
2990 const struct ctables_cell *a = *ap;
2991 const struct ctables_cell *b = *bp;
2993 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
2995 int al = a->axes[axis].leaf;
2996 int bl = b->axes[axis].leaf;
2998 return al > bl ? 1 : -1;
3006 For each ctables_table:
3007 For each combination of row vars:
3008 For each combination of column vars:
3009 For each combination of layer vars:
3011 Make a table of row values:
3012 Sort entries by row values
3013 Assign a 0-based index to each actual value
3014 Construct a dimension
3015 Make a table of column values
3016 Make a table of layer values
3018 Fill the table entry using the indexes from before.
3021 static struct ctables_area *
3022 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3023 enum ctables_area_type area)
3026 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3028 const struct ctables_nest *nest = s->nests[a];
3029 for (size_t i = 0; i < nest->n_areas[area]; i++)
3031 size_t v_idx = nest->areas[area][i];
3032 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3033 hash = hash_pointer (cv->category, hash);
3034 if (cv->category->type != CCT_TOTAL
3035 && cv->category->type != CCT_SUBTOTAL
3036 && cv->category->type != CCT_POSTCOMPUTE)
3037 hash = value_hash (&cv->value,
3038 var_get_width (nest->vars[v_idx]), hash);
3042 struct ctables_area *a;
3043 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3045 const struct ctables_cell *df = a->example;
3046 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3048 const struct ctables_nest *nest = s->nests[a];
3049 for (size_t i = 0; i < nest->n_areas[area]; i++)
3051 size_t v_idx = nest->areas[area][i];
3052 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3053 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3054 if (cv1->category != cv2->category
3055 || (cv1->category->type != CCT_TOTAL
3056 && cv1->category->type != CCT_SUBTOTAL
3057 && cv1->category->type != CCT_POSTCOMPUTE
3058 && !value_equal (&cv1->value, &cv2->value,
3059 var_get_width (nest->vars[v_idx]))))
3068 struct ctables_sum *sums = (s->table->n_sum_vars
3069 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3072 a = xmalloc (sizeof *a);
3073 *a = (struct ctables_area) { .example = cell, .sums = sums };
3074 hmap_insert (&s->areas[area], &a->node, hash);
3078 static struct substring
3079 rtrim_value (const union value *v, const struct variable *var)
3081 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3082 var_get_width (var));
3083 ss_rtrim (&s, ss_cstr (" "));
3088 in_string_range (const union value *v, const struct variable *var,
3089 const struct substring *srange)
3091 struct substring s = rtrim_value (v, var);
3092 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3093 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3096 static const struct ctables_category *
3097 ctables_categories_match (const struct ctables_categories *c,
3098 const union value *v, const struct variable *var)
3100 if (var_is_numeric (var) && v->f == SYSMIS)
3103 const struct ctables_category *othernm = NULL;
3104 for (size_t i = c->n_cats; i-- > 0; )
3106 const struct ctables_category *cat = &c->cats[i];
3110 if (cat->number == v->f)
3115 if (ss_equals (cat->string, rtrim_value (v, var)))
3120 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3121 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3126 if (in_string_range (v, var, cat->srange))
3131 if (var_is_value_missing (var, v))
3135 case CCT_POSTCOMPUTE:
3150 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3153 case CCT_EXCLUDED_MISSING:
3158 return var_is_value_missing (var, v) ? NULL : othernm;
3161 static const struct ctables_category *
3162 ctables_categories_total (const struct ctables_categories *c)
3164 const struct ctables_category *first = &c->cats[0];
3165 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3166 return (first->type == CCT_TOTAL ? first
3167 : last->type == CCT_TOTAL ? last
3171 static struct ctables_cell *
3172 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3173 const struct ctables_category *cats[PIVOT_N_AXES][10])
3176 enum ctables_summary_variant sv = CSV_CELL;
3177 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3179 const struct ctables_nest *nest = s->nests[a];
3180 for (size_t i = 0; i < nest->n; i++)
3181 if (i != nest->scale_idx)
3183 hash = hash_pointer (cats[a][i], hash);
3184 if (cats[a][i]->type != CCT_TOTAL
3185 && cats[a][i]->type != CCT_SUBTOTAL
3186 && cats[a][i]->type != CCT_POSTCOMPUTE)
3187 hash = value_hash (case_data (c, nest->vars[i]),
3188 var_get_width (nest->vars[i]), hash);
3194 struct ctables_cell *cell;
3195 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3197 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3199 const struct ctables_nest *nest = s->nests[a];
3200 for (size_t i = 0; i < nest->n; i++)
3201 if (i != nest->scale_idx
3202 && (cats[a][i] != cell->axes[a].cvs[i].category
3203 || (cats[a][i]->type != CCT_TOTAL
3204 && cats[a][i]->type != CCT_SUBTOTAL
3205 && cats[a][i]->type != CCT_POSTCOMPUTE
3206 && !value_equal (case_data (c, nest->vars[i]),
3207 &cell->axes[a].cvs[i].value,
3208 var_get_width (nest->vars[i])))))
3217 cell = xmalloc (sizeof *cell);
3220 cell->omit_areas = 0;
3221 cell->postcompute = false;
3222 //struct string name = DS_EMPTY_INITIALIZER;
3223 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3225 const struct ctables_nest *nest = s->nests[a];
3226 cell->axes[a].cvs = (nest->n
3227 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3229 for (size_t i = 0; i < nest->n; i++)
3231 const struct ctables_category *cat = cats[a][i];
3232 const struct variable *var = nest->vars[i];
3233 const union value *value = case_data (c, var);
3234 if (i != nest->scale_idx)
3236 const struct ctables_category *subtotal = cat->subtotal;
3237 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3240 if (cat->type == CCT_TOTAL
3241 || cat->type == CCT_SUBTOTAL
3242 || cat->type == CCT_POSTCOMPUTE)
3244 /* XXX these should be more encompassing I think.*/
3248 case PIVOT_AXIS_COLUMN:
3249 cell->omit_areas |= ((1u << CTAT_TABLE) |
3250 (1u << CTAT_LAYER) |
3251 (1u << CTAT_LAYERCOL) |
3252 (1u << CTAT_SUBTABLE) |
3255 case PIVOT_AXIS_ROW:
3256 cell->omit_areas |= ((1u << CTAT_TABLE) |
3257 (1u << CTAT_LAYER) |
3258 (1u << CTAT_LAYERROW) |
3259 (1u << CTAT_SUBTABLE) |
3262 case PIVOT_AXIS_LAYER:
3263 cell->omit_areas |= ((1u << CTAT_TABLE) |
3264 (1u << CTAT_LAYER));
3268 if (cat->type == CCT_POSTCOMPUTE)
3269 cell->postcompute = true;
3272 cell->axes[a].cvs[i].category = cat;
3273 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3276 if (i != nest->scale_idx)
3278 if (!ds_is_empty (&name))
3279 ds_put_cstr (&name, ", ");
3280 char *value_s = data_out (value, var_get_encoding (var),
3281 var_get_print_format (var),
3282 settings_get_fmt_settings ());
3283 if (cat->type == CCT_TOTAL
3284 || cat->type == CCT_SUBTOTAL
3285 || cat->type == CCT_POSTCOMPUTE)
3286 ds_put_format (&name, "%s=total", var_get_name (var));
3288 ds_put_format (&name, "%s=%s", var_get_name (var),
3289 value_s + strspn (value_s, " "));
3295 //cell->name = ds_steal_cstr (&name);
3297 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3298 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3299 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3300 for (size_t i = 0; i < specs->n; i++)
3301 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3302 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3303 cell->areas[at] = ctables_area_insert (s, cell, at);
3304 hmap_insert (&s->cells, &cell->node, hash);
3309 is_listwise_missing (const struct ctables_summary_spec_set *specs,
3310 const struct ccase *c)
3312 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3314 const struct variable *var = specs->listwise_vars[i];
3315 if (var_is_num_missing (var, case_num (c, var)))
3323 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3324 const struct ctables_category *cats[PIVOT_N_AXES][10],
3325 bool is_included, double d_weight, double e_weight)
3327 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3328 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3330 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3331 const union value *value = case_data (c, specs->var);
3332 bool is_missing = var_is_value_missing (specs->var, value);
3333 bool scale_missing = specs->is_scale && (is_missing || is_listwise_missing (specs, c));
3335 for (size_t i = 0; i < specs->n; i++)
3336 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3337 specs->var, value, specs->is_scale,
3338 scale_missing, is_missing, is_included,
3339 d_weight, e_weight);
3340 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3341 if (!(cell->omit_areas && (1u << at)))
3343 struct ctables_area *a = cell->areas[at];
3344 a->d_total += d_weight;
3345 a->e_total += e_weight;
3349 a->d_count += d_weight;
3350 a->e_count += e_weight;
3355 a->d_valid += d_weight;
3356 a->e_valid += e_weight;
3359 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3361 /* XXX listwise_missing??? */
3362 const struct variable *var = s->table->sum_vars[i];
3363 double addend = case_num (c, var);
3364 if (!var_is_num_missing (var, addend))
3366 struct ctables_sum *sum = &a->sums[i];
3367 sum->e_sum += addend * e_weight;
3368 sum->u_sum += addend;
3376 recurse_totals (struct ctables_section *s, const struct ccase *c,
3377 const struct ctables_category *cats[PIVOT_N_AXES][10],
3378 bool is_included, double d_weight, double e_weight,
3379 enum pivot_axis_type start_axis, size_t start_nest)
3381 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3383 const struct ctables_nest *nest = s->nests[a];
3384 for (size_t i = start_nest; i < nest->n; i++)
3386 if (i == nest->scale_idx)
3389 const struct variable *var = nest->vars[i];
3391 const struct ctables_category *total = ctables_categories_total (
3392 s->table->categories[var_get_dict_index (var)]);
3395 const struct ctables_category *save = cats[a][i];
3397 ctables_cell_add__ (s, c, cats, is_included, d_weight, e_weight);
3398 recurse_totals (s, c, cats, is_included, d_weight, e_weight, a, i + 1);
3407 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3408 const struct ctables_category *cats[PIVOT_N_AXES][10],
3409 bool is_included, double d_weight, double e_weight,
3410 enum pivot_axis_type start_axis, size_t start_nest)
3412 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3414 const struct ctables_nest *nest = s->nests[a];
3415 for (size_t i = start_nest; i < nest->n; i++)
3417 if (i == nest->scale_idx)
3420 const struct ctables_category *save = cats[a][i];
3423 cats[a][i] = save->subtotal;
3424 ctables_cell_add__ (s, c, cats, is_included, d_weight, e_weight);
3425 recurse_subtotals (s, c, cats, is_included, d_weight, e_weight, a, i + 1);
3434 ctables_add_occurrence (const struct variable *var,
3435 const union value *value,
3436 struct hmap *occurrences)
3438 int width = var_get_width (var);
3439 unsigned int hash = value_hash (value, width, 0);
3441 struct ctables_occurrence *o;
3442 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3444 if (value_equal (value, &o->value, width))
3447 o = xmalloc (sizeof *o);
3448 value_clone (&o->value, value, width);
3449 hmap_insert (occurrences, &o->node, hash);
3453 ctables_cell_insert (struct ctables_section *s,
3454 const struct ccase *c,
3455 double d_weight, double e_weight)
3457 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3459 bool is_included = true;
3461 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3463 const struct ctables_nest *nest = s->nests[a];
3464 for (size_t i = 0; i < nest->n; i++)
3465 if (i != nest->scale_idx)
3467 const struct variable *var = nest->vars[i];
3468 const union value *value = case_data (c, var);
3470 cats[a][i] = ctables_categories_match (
3471 s->table->categories[var_get_dict_index (var)], value, var);
3474 if (i != nest->summary_idx)
3477 if (!var_is_value_missing (var, value))
3480 static const struct ctables_category cct_excluded_missing = {
3481 .type = CCT_EXCLUDED_MISSING,
3484 cats[a][i] = &cct_excluded_missing;
3485 is_included = false;
3491 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3493 const struct ctables_nest *nest = s->nests[a];
3494 for (size_t i = 0; i < nest->n; i++)
3495 if (i != nest->scale_idx)
3497 const struct variable *var = nest->vars[i];
3498 const union value *value = case_data (c, var);
3499 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3503 ctables_cell_add__ (s, c, cats, is_included, d_weight, e_weight);
3504 recurse_totals (s, c, cats, is_included, d_weight, e_weight, 0, 0);
3505 recurse_subtotals (s, c, cats, is_included, d_weight, e_weight, 0, 0);
3510 const struct ctables_summary_spec_set *set;
3515 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3517 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3518 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3519 if (as->function != bs->function)
3520 return as->function > bs->function ? 1 : -1;
3521 else if (as->weighted != bs->weighted)
3522 return as->weighted > bs->weighted ? 1 : -1;
3523 else if (as->calc_area != bs->calc_area)
3524 return as->calc_area > bs->calc_area ? 1 : -1;
3525 else if (as->percentile != bs->percentile)
3526 return as->percentile < bs->percentile ? 1 : -1;
3528 const char *as_label = as->label ? as->label : "";
3529 const char *bs_label = bs->label ? bs->label : "";
3530 return strcmp (as_label, bs_label);
3534 ctables_category_format_number (double number, const struct variable *var,
3537 struct pivot_value *pv = pivot_value_new_var_value (
3538 var, &(union value) { .f = number });
3539 pivot_value_format (pv, NULL, s);
3540 pivot_value_destroy (pv);
3544 ctables_category_format_string (struct substring string,
3545 const struct variable *var, struct string *out)
3547 int width = var_get_width (var);
3548 char *s = xmalloc (width);
3549 buf_copy_rpad (s, width, string.string, string.length, ' ');
3550 struct pivot_value *pv = pivot_value_new_var_value (
3551 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3552 pivot_value_format (pv, NULL, out);
3553 pivot_value_destroy (pv);
3558 ctables_category_format_label (const struct ctables_category *cat,
3559 const struct variable *var,
3565 ctables_category_format_number (cat->number, var, s);
3569 ctables_category_format_string (cat->string, var, s);
3573 ctables_category_format_number (cat->nrange[0], var, s);
3574 ds_put_format (s, " THRU ");
3575 ctables_category_format_number (cat->nrange[1], var, s);
3579 ctables_category_format_string (cat->srange[0], var, s);
3580 ds_put_format (s, " THRU ");
3581 ctables_category_format_string (cat->srange[1], var, s);
3585 ds_put_cstr (s, "MISSING");
3589 ds_put_cstr (s, "OTHERNM");
3592 case CCT_POSTCOMPUTE:
3593 ds_put_format (s, "&%s", cat->pc->name);
3598 ds_put_cstr (s, cat->total_label);
3604 case CCT_EXCLUDED_MISSING:
3611 static struct pivot_value *
3612 ctables_postcompute_label (const struct ctables_categories *cats,
3613 const struct ctables_category *cat,
3614 const struct variable *var)
3616 struct substring in = ss_cstr (cat->pc->label);
3617 struct substring target = ss_cstr (")LABEL[");
3619 struct string out = DS_EMPTY_INITIALIZER;
3622 size_t chunk = ss_find_substring (in, target);
3623 if (chunk == SIZE_MAX)
3625 if (ds_is_empty (&out))
3626 return pivot_value_new_user_text (in.string, in.length);
3629 ds_put_substring (&out, in);
3630 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3634 ds_put_substring (&out, ss_head (in, chunk));
3635 ss_advance (&in, chunk + target.length);
3637 struct substring idx_s;
3638 if (!ss_get_until (&in, ']', &idx_s))
3641 long int idx = strtol (idx_s.string, &tail, 10);
3642 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3645 struct ctables_category *cat2 = &cats->cats[idx - 1];
3646 if (!ctables_category_format_label (cat2, var, &out))
3652 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3655 static struct pivot_value *
3656 ctables_category_create_value_label (const struct ctables_categories *cats,
3657 const struct ctables_category *cat,
3658 const struct variable *var,
3659 const union value *value)
3661 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3662 ? ctables_postcompute_label (cats, cat, var)
3663 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3664 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3665 : pivot_value_new_var_value (var, value));
3668 static struct ctables_value *
3669 ctables_value_find__ (struct ctables_table *t, const union value *value,
3670 int width, unsigned int hash)
3672 struct ctables_value *clv;
3673 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3674 hash, &t->clabels_values_map)
3675 if (value_equal (value, &clv->value, width))
3681 ctables_value_insert (struct ctables_table *t, const union value *value,
3684 unsigned int hash = value_hash (value, width, 0);
3685 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3688 clv = xmalloc (sizeof *clv);
3689 value_clone (&clv->value, value, width);
3690 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3694 static struct ctables_value *
3695 ctables_value_find (struct ctables_table *t,
3696 const union value *value, int width)
3698 return ctables_value_find__ (t, value, width,
3699 value_hash (value, width, 0));
3703 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3704 size_t ix[PIVOT_N_AXES])
3706 if (a < PIVOT_N_AXES)
3708 size_t limit = MAX (t->stacks[a].n, 1);
3709 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3710 ctables_table_add_section (t, a + 1, ix);
3714 struct ctables_section *s = &t->sections[t->n_sections++];
3715 *s = (struct ctables_section) {
3717 .cells = HMAP_INITIALIZER (s->cells),
3719 for (a = 0; a < PIVOT_N_AXES; a++)
3722 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3724 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3725 for (size_t i = 0; i < nest->n; i++)
3726 hmap_init (&s->occurrences[a][i]);
3728 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3729 hmap_init (&s->areas[at]);
3734 ctpo_add (double a, double b)
3740 ctpo_sub (double a, double b)
3746 ctpo_mul (double a, double b)
3752 ctpo_div (double a, double b)
3754 return b ? a / b : SYSMIS;
3758 ctpo_pow (double a, double b)
3760 int save_errno = errno;
3762 double result = pow (a, b);
3770 ctpo_neg (double a, double b UNUSED)
3775 struct ctables_pcexpr_evaluate_ctx
3777 const struct ctables_cell *cell;
3778 const struct ctables_section *section;
3779 const struct ctables_categories *cats;
3780 enum pivot_axis_type pc_a;
3783 enum fmt_type parse_format;
3786 static double ctables_pcexpr_evaluate (
3787 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3790 ctables_pcexpr_evaluate_nonterminal (
3791 const struct ctables_pcexpr_evaluate_ctx *ctx,
3792 const struct ctables_pcexpr *e, size_t n_args,
3793 double evaluate (double, double))
3795 double args[2] = { 0, 0 };
3796 for (size_t i = 0; i < n_args; i++)
3798 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3799 if (!isfinite (args[i]) || args[i] == SYSMIS)
3802 return evaluate (args[0], args[1]);
3806 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3807 const struct ctables_cell_value *pc_cv)
3809 const struct ctables_section *s = ctx->section;
3812 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3814 const struct ctables_nest *nest = s->nests[a];
3815 for (size_t i = 0; i < nest->n; i++)
3816 if (i != nest->scale_idx)
3818 const struct ctables_cell_value *cv
3819 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3820 : &ctx->cell->axes[a].cvs[i]);
3821 hash = hash_pointer (cv->category, hash);
3822 if (cv->category->type != CCT_TOTAL
3823 && cv->category->type != CCT_SUBTOTAL
3824 && cv->category->type != CCT_POSTCOMPUTE)
3825 hash = value_hash (&cv->value,
3826 var_get_width (nest->vars[i]), hash);
3830 struct ctables_cell *tc;
3831 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3833 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3835 const struct ctables_nest *nest = s->nests[a];
3836 for (size_t i = 0; i < nest->n; i++)
3837 if (i != nest->scale_idx)
3839 const struct ctables_cell_value *p_cv
3840 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3841 : &ctx->cell->axes[a].cvs[i]);
3842 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3843 if (p_cv->category != t_cv->category
3844 || (p_cv->category->type != CCT_TOTAL
3845 && p_cv->category->type != CCT_SUBTOTAL
3846 && p_cv->category->type != CCT_POSTCOMPUTE
3847 && !value_equal (&p_cv->value,
3849 var_get_width (nest->vars[i]))))
3861 const struct ctables_table *t = s->table;
3862 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3863 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3864 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3865 &specs->specs[ctx->summary_idx]);
3869 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3870 const struct ctables_pcexpr *e)
3877 case CTPO_CAT_NRANGE:
3878 case CTPO_CAT_SRANGE:
3879 case CTPO_CAT_MISSING:
3880 case CTPO_CAT_OTHERNM:
3882 struct ctables_cell_value cv = {
3883 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3885 assert (cv.category != NULL);
3887 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3888 const struct ctables_occurrence *o;
3891 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3892 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3893 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3895 cv.value = o->value;
3896 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3901 case CTPO_CAT_NUMBER:
3902 case CTPO_CAT_SUBTOTAL:
3903 case CTPO_CAT_TOTAL:
3905 struct ctables_cell_value cv = {
3906 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3907 .value = { .f = e->number },
3909 assert (cv.category != NULL);
3910 return ctables_pcexpr_evaluate_category (ctx, &cv);
3913 case CTPO_CAT_STRING:
3915 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3917 if (width > e->string.length)
3919 s = xmalloc (width);
3920 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3923 const struct ctables_category *category
3924 = ctables_find_category_for_postcompute (
3925 ctx->section->table->ctables->dict,
3926 ctx->cats, ctx->parse_format, e);
3927 assert (category != NULL);
3929 struct ctables_cell_value cv = { .category = category };
3930 if (category->type == CCT_NUMBER)
3931 cv.value.f = category->number;
3932 else if (category->type == CCT_STRING)
3933 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3937 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3943 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3946 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3949 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3952 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3955 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3958 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3964 static const struct ctables_category *
3965 ctables_cell_postcompute (const struct ctables_section *s,
3966 const struct ctables_cell *cell,
3967 enum pivot_axis_type *pc_a_p,
3970 assert (cell->postcompute);
3971 const struct ctables_category *pc_cat = NULL;
3972 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3973 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3975 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3976 if (cv->category->type == CCT_POSTCOMPUTE)
3980 /* Multiple postcomputes cross each other. The value is
3985 pc_cat = cv->category;
3989 *pc_a_idx_p = pc_a_idx;
3993 assert (pc_cat != NULL);
3998 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3999 const struct ctables_cell *cell,
4000 const struct ctables_summary_spec *ss,
4001 struct fmt_spec *format,
4002 bool *is_ctables_format,
4005 enum pivot_axis_type pc_a = 0;
4006 size_t pc_a_idx = 0;
4007 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4008 s, cell, &pc_a, &pc_a_idx);
4012 const struct ctables_postcompute *pc = pc_cat->pc;
4015 for (size_t i = 0; i < pc->specs->n; i++)
4017 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4018 if (ss->function == ss2->function
4019 && ss->weighted == ss2->weighted
4020 && ss->calc_area == ss2->calc_area
4021 && ss->percentile == ss2->percentile)
4023 *format = ss2->format;
4024 *is_ctables_format = ss2->is_ctables_format;
4030 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4031 const struct ctables_categories *cats = s->table->categories[
4032 var_get_dict_index (var)];
4033 struct ctables_pcexpr_evaluate_ctx ctx = {
4038 .pc_a_idx = pc_a_idx,
4039 .summary_idx = summary_idx,
4040 .parse_format = pc_cat->parse_format,
4042 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4046 ctables_format (double d, const struct fmt_spec *format,
4047 const struct fmt_settings *settings)
4049 const union value v = { .f = d };
4050 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4052 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4053 produce the results we want for negative numbers, putting the negative
4054 sign in the wrong spot, before the prefix instead of after it. We can't,
4055 in fact, produce the desired results using a custom-currency
4056 specification. Instead, we postprocess the output, moving the negative
4059 NEQUAL: "-N=3" => "N=-3"
4060 PAREN: "-(3)" => "(-3)"
4061 PCTPAREN: "-(3%)" => "(-3%)"
4063 This transformation doesn't affect NEGPAREN. */
4064 char *minus_src = strchr (s, '-');
4065 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4067 char *n_equals = strstr (s, "N=");
4068 char *lparen = strchr (s, '(');
4069 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4071 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4077 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4079 for (size_t i = 0; i < t->stacks[a].n; i++)
4081 struct ctables_nest *nest = &t->stacks[a].nests[i];
4082 if (nest->n != 1 || nest->scale_idx != 0)
4085 enum ctables_vlabel vlabel
4086 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4087 if (vlabel != CTVL_NONE)
4094 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4096 struct pivot_table *pt = pivot_table_create__ (
4098 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4099 : pivot_value_new_text (N_("Custom Tables"))),
4102 pivot_table_set_caption (
4103 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4105 pivot_table_set_corner_text (
4106 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4108 bool summary_dimension = (t->summary_axis != t->slabels_axis
4109 || (!t->slabels_visible
4110 && t->summary_specs.n > 1));
4111 if (summary_dimension)
4113 struct pivot_dimension *d = pivot_dimension_create (
4114 pt, t->slabels_axis, N_("Statistics"));
4115 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4116 if (!t->slabels_visible)
4117 d->hide_all_labels = true;
4118 for (size_t i = 0; i < specs->n; i++)
4119 pivot_category_create_leaf (
4120 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4123 bool categories_dimension = t->clabels_example != NULL;
4124 if (categories_dimension)
4126 struct pivot_dimension *d = pivot_dimension_create (
4127 pt, t->label_axis[t->clabels_from_axis],
4128 t->clabels_from_axis == PIVOT_AXIS_ROW
4129 ? N_("Row Categories")
4130 : N_("Column Categories"));
4131 const struct variable *var = t->clabels_example;
4132 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4133 for (size_t i = 0; i < t->n_clabels_values; i++)
4135 const struct ctables_value *value = t->clabels_values[i];
4136 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4137 assert (cat != NULL);
4138 pivot_category_create_leaf (
4139 d->root, ctables_category_create_value_label (c, cat,
4145 pivot_table_set_look (pt, ct->look);
4146 struct pivot_dimension *d[PIVOT_N_AXES];
4147 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4149 static const char *names[] = {
4150 [PIVOT_AXIS_ROW] = N_("Rows"),
4151 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4152 [PIVOT_AXIS_LAYER] = N_("Layers"),
4154 d[a] = (t->axes[a] || a == t->summary_axis
4155 ? pivot_dimension_create (pt, a, names[a])
4160 assert (t->axes[a]);
4162 for (size_t i = 0; i < t->stacks[a].n; i++)
4164 struct ctables_nest *nest = &t->stacks[a].nests[i];
4165 struct ctables_section **sections = xnmalloc (t->n_sections,
4167 size_t n_sections = 0;
4169 size_t n_total_cells = 0;
4170 size_t max_depth = 0;
4171 for (size_t j = 0; j < t->n_sections; j++)
4172 if (t->sections[j].nests[a] == nest)
4174 struct ctables_section *s = &t->sections[j];
4175 sections[n_sections++] = s;
4176 n_total_cells += hmap_count (&s->cells);
4178 size_t depth = s->nests[a]->n;
4179 max_depth = MAX (depth, max_depth);
4182 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4184 size_t n_sorted = 0;
4186 for (size_t j = 0; j < n_sections; j++)
4188 struct ctables_section *s = sections[j];
4190 struct ctables_cell *cell;
4191 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4193 sorted[n_sorted++] = cell;
4194 assert (n_sorted <= n_total_cells);
4197 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4198 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4201 if (a == PIVOT_AXIS_ROW)
4203 size_t ids[N_CTATS];
4204 memset (ids, 0, sizeof ids);
4205 for (size_t j = 0; j < n_sorted; j++)
4207 struct ctables_cell *cell = sorted[j];
4208 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4210 struct ctables_area *area = cell->areas[at];
4211 if (!area->sequence)
4212 area->sequence = ++ids[at];
4219 for (size_t j = 0; j < n_sorted; j++)
4221 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0);
4226 struct ctables_level
4228 enum ctables_level_type
4230 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4231 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4232 CTL_SUMMARY, /* Summary functions. */
4236 enum settings_value_show vlabel; /* CTL_VAR only. */
4239 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4240 size_t n_levels = 0;
4241 for (size_t k = 0; k < nest->n; k++)
4243 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4244 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4246 if (vlabel != CTVL_NONE)
4248 levels[n_levels++] = (struct ctables_level) {
4250 .vlabel = (enum settings_value_show) vlabel,
4255 if (nest->scale_idx != k
4256 && (k != nest->n - 1 || t->label_axis[a] == a))
4258 levels[n_levels++] = (struct ctables_level) {
4259 .type = CTL_CATEGORY,
4265 if (!summary_dimension && a == t->slabels_axis)
4267 levels[n_levels++] = (struct ctables_level) {
4268 .type = CTL_SUMMARY,
4269 .var_idx = SIZE_MAX,
4273 /* Pivot categories:
4275 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4276 - category for nest->vars[0], if nest->scale_idx != 0
4277 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4278 - category for nest->vars[1], if nest->scale_idx != 1
4280 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4281 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4282 - summary function, if 'a == t->slabels_axis && a ==
4285 Additional dimensions:
4287 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4289 - If 't->label_axis[b] == a' for some 'b != a', add a category
4294 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4296 for (size_t j = 0; j < n_sorted; j++)
4298 struct ctables_cell *cell = sorted[j];
4299 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4301 size_t n_common = 0;
4304 for (; n_common < n_levels; n_common++)
4306 const struct ctables_level *level = &levels[n_common];
4307 if (level->type == CTL_CATEGORY)
4309 size_t var_idx = level->var_idx;
4310 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4311 if (prev->axes[a].cvs[var_idx].category != c)
4313 else if (c->type != CCT_SUBTOTAL
4314 && c->type != CCT_TOTAL
4315 && c->type != CCT_POSTCOMPUTE
4316 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4317 &cell->axes[a].cvs[var_idx].value,
4318 var_get_type (nest->vars[var_idx])))
4324 for (size_t k = n_common; k < n_levels; k++)
4326 const struct ctables_level *level = &levels[k];
4327 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4328 if (level->type == CTL_SUMMARY)
4330 assert (k == n_levels - 1);
4332 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4333 for (size_t m = 0; m < specs->n; m++)
4335 int leaf = pivot_category_create_leaf (
4336 parent, ctables_summary_label (&specs->specs[m],
4344 const struct variable *var = nest->vars[level->var_idx];
4345 struct pivot_value *label;
4346 if (level->type == CTL_VAR)
4348 label = pivot_value_new_variable (var);
4349 label->variable.show = level->vlabel;
4351 else if (level->type == CTL_CATEGORY)
4353 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4354 label = ctables_category_create_value_label (
4355 t->categories[var_get_dict_index (var)],
4356 cv->category, var, &cv->value);
4361 if (k == n_levels - 1)
4362 prev_leaf = pivot_category_create_leaf (parent, label);
4364 groups[k] = pivot_category_create_group__ (parent, label);
4368 cell->axes[a].leaf = prev_leaf;
4377 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4381 size_t n_total_cells = 0;
4382 for (size_t j = 0; j < t->n_sections; j++)
4383 n_total_cells += hmap_count (&t->sections[j].cells);
4385 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4386 size_t n_sorted = 0;
4387 for (size_t j = 0; j < t->n_sections; j++)
4389 const struct ctables_section *s = &t->sections[j];
4390 struct ctables_cell *cell;
4391 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4393 sorted[n_sorted++] = cell;
4395 assert (n_sorted <= n_total_cells);
4396 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4398 size_t ids[N_CTATS];
4399 memset (ids, 0, sizeof ids);
4400 for (size_t j = 0; j < n_sorted; j++)
4402 struct ctables_cell *cell = sorted[j];
4403 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4405 struct ctables_area *area = cell->areas[at];
4406 if (!area->sequence)
4407 area->sequence = ++ids[at];
4414 for (size_t i = 0; i < t->n_sections; i++)
4416 struct ctables_section *s = &t->sections[i];
4418 struct ctables_cell *cell;
4419 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4424 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4425 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4426 for (size_t j = 0; j < specs->n; j++)
4429 size_t n_dindexes = 0;
4431 if (summary_dimension)
4432 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4434 if (categories_dimension)
4436 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4437 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4438 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4439 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4442 dindexes[n_dindexes++] = ctv->leaf;
4445 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4448 int leaf = cell->axes[a].leaf;
4449 if (a == t->summary_axis && !summary_dimension)
4451 dindexes[n_dindexes++] = leaf;
4454 const struct ctables_summary_spec *ss = &specs->specs[j];
4456 struct fmt_spec format = specs->specs[j].format;
4457 bool is_ctables_format = ss->is_ctables_format;
4458 double d = (cell->postcompute
4459 ? ctables_cell_calculate_postcompute (
4460 s, cell, ss, &format, &is_ctables_format, j)
4461 : ctables_summary_value (cell, &cell->summaries[j],
4464 struct pivot_value *value;
4465 if (ct->hide_threshold != 0
4466 && d < ct->hide_threshold
4467 && ctables_summary_function_is_count (ss->function))
4469 value = pivot_value_new_user_text_nocopy (
4470 xasprintf ("<%d", ct->hide_threshold));
4472 else if (d == 0 && ct->zero)
4473 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4474 else if (d == SYSMIS && ct->missing)
4475 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4476 else if (is_ctables_format)
4477 value = pivot_value_new_user_text_nocopy (
4478 ctables_format (d, &format, &ct->ctables_formats));
4481 value = pivot_value_new_number (d);
4482 value->numeric.format = format;
4484 /* XXX should text values be right-justified? */
4485 pivot_table_put (pt, dindexes, n_dindexes, value);
4490 pivot_table_submit (pt);
4494 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4496 enum pivot_axis_type label_pos = t->label_axis[a];
4500 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4501 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4503 const struct ctables_stack *stack = &t->stacks[a];
4507 const struct ctables_nest *n0 = &stack->nests[0];
4510 assert (stack->n == 1);
4514 const struct variable *v0 = n0->vars[n0->n - 1];
4515 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4516 t->clabels_example = v0;
4518 for (size_t i = 0; i < c0->n_cats; i++)
4519 if (c0->cats[i].type == CCT_FUNCTION)
4521 msg (SE, _("%s=%s is not allowed with sorting based "
4522 "on a summary function."),
4523 subcommand_name, pos_name);
4526 if (n0->n - 1 == n0->scale_idx)
4528 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4529 "but %s is a scale variable."),
4530 subcommand_name, pos_name, var_get_name (v0));
4534 for (size_t i = 1; i < stack->n; i++)
4536 const struct ctables_nest *ni = &stack->nests[i];
4538 const struct variable *vi = ni->vars[ni->n - 1];
4539 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4541 if (ni->n - 1 == ni->scale_idx)
4543 msg (SE, _("%s=%s requires the variables to be moved to be "
4544 "categorical, but %s is a scale variable."),
4545 subcommand_name, pos_name, var_get_name (vi));
4548 if (var_get_width (v0) != var_get_width (vi))
4550 msg (SE, _("%s=%s requires the variables to be "
4551 "moved to have the same width, but %s has "
4552 "width %d and %s has width %d."),
4553 subcommand_name, pos_name,
4554 var_get_name (v0), var_get_width (v0),
4555 var_get_name (vi), var_get_width (vi));
4558 if (!val_labs_equal (var_get_value_labels (v0),
4559 var_get_value_labels (vi)))
4561 msg (SE, _("%s=%s requires the variables to be "
4562 "moved to have the same value labels, but %s "
4563 "and %s have different value labels."),
4564 subcommand_name, pos_name,
4565 var_get_name (v0), var_get_name (vi));
4568 if (!ctables_categories_equal (c0, ci))
4570 msg (SE, _("%s=%s requires the variables to be "
4571 "moved to have the same category "
4572 "specifications, but %s and %s have different "
4573 "category specifications."),
4574 subcommand_name, pos_name,
4575 var_get_name (v0), var_get_name (vi));
4584 add_sum_var (struct variable *var,
4585 struct variable ***sum_vars, size_t *n, size_t *allocated)
4587 for (size_t i = 0; i < *n; i++)
4588 if (var == (*sum_vars)[i])
4591 if (*n >= *allocated)
4592 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4593 (*sum_vars)[*n] = var;
4597 static enum ctables_area_type
4598 rotate_area (enum ctables_area_type area)
4609 return CTAT_LAYERCOL;
4612 return CTAT_LAYERROW;
4625 enumerate_sum_vars (const struct ctables_axis *a,
4626 struct variable ***sum_vars, size_t *n, size_t *allocated)
4634 for (size_t i = 0; i < N_CSVS; i++)
4635 for (size_t j = 0; j < a->specs[i].n; j++)
4637 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4638 if (spec->function == CTSF_areaPCT_SUM)
4639 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4645 for (size_t i = 0; i < 2; i++)
4646 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4652 ctables_prepare_table (struct ctables_table *t)
4654 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4657 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4659 for (size_t j = 0; j < t->stacks[a].n; j++)
4661 struct ctables_nest *nest = &t->stacks[a].nests[j];
4662 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4664 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4665 nest->n_areas[at] = 0;
4667 enum pivot_axis_type ata, atb;
4668 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4670 ata = PIVOT_AXIS_ROW;
4671 atb = PIVOT_AXIS_COLUMN;
4673 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4675 ata = PIVOT_AXIS_COLUMN;
4676 atb = PIVOT_AXIS_ROW;
4679 if (at == CTAT_LAYER
4680 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
4681 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
4682 ? a == atb && t->label_axis[a] != a
4685 for (size_t k = nest->n - 1; k < nest->n; k--)
4686 if (k != nest->scale_idx)
4688 nest->areas[at][nest->n_areas[at]++] = k;
4694 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
4695 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
4696 : at == CTAT_TABLE ? true
4700 for (size_t k = 0; k < nest->n; k++)
4701 if (k != nest->scale_idx)
4702 nest->areas[at][nest->n_areas[at]++] = k;
4708 #define L PIVOT_AXIS_LAYER
4709 n_drop = (t->clabels_from_axis == L ? a != L
4710 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
4711 : t->clabels_from_axis == a ? 2
4718 n_drop = a == ata && t->label_axis[ata] == atb;
4723 n_drop = (a == ata ? t->label_axis[ata] == atb
4725 : t->clabels_from_axis == atb ? -1
4726 : t->clabels_to_axis != atb ? 1
4738 size_t n = nest->n_areas[at];
4741 nest->areas[at][n - 2] = nest->areas[at][n - 1];
4742 nest->n_areas[at]--;
4747 for (int i = 0; i < n_drop; i++)
4748 if (nest->n_areas[at] > 0)
4749 nest->n_areas[at]--;
4756 struct ctables_nest *nest = xmalloc (sizeof *nest);
4757 *nest = (struct ctables_nest) {
4759 .scale_idx = SIZE_MAX,
4760 .summary_idx = SIZE_MAX
4762 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4764 /* There's no point in moving labels away from an axis that has no
4765 labels, so avoid dealing with the special cases around that. */
4766 t->label_axis[a] = a;
4769 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4770 for (size_t i = 0; i < stack->n; i++)
4772 struct ctables_nest *nest = &stack->nests[i];
4773 if (!nest->specs[CSV_CELL].n)
4775 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
4776 ss->specs = xmalloc (sizeof *ss->specs);
4779 enum ctables_summary_function function
4780 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
4784 nest->summary_idx = nest->n - 1;
4785 ss->var = nest->vars[nest->summary_idx];
4787 *ss->specs = (struct ctables_summary_spec) {
4788 .function = function,
4790 .format = ctables_summary_default_format (function, ss->var),
4793 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4794 &nest->specs[CSV_CELL]);
4796 else if (!nest->specs[CSV_TOTAL].n)
4797 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4798 &nest->specs[CSV_CELL]);
4800 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
4801 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
4803 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4804 for (size_t i = 0; i < nest->specs[sv].n; i++)
4806 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
4807 const struct ctables_function_info *cfi =
4808 &ctables_function_info[ss->function];
4810 ss->calc_area = rotate_area (ss->calc_area);
4814 if (t->ctables->smissing_listwise)
4816 struct variable **listwise_vars = NULL;
4818 size_t allocated = 0;
4820 for (size_t j = nest->group_head; j < stack->n; j++)
4822 const struct ctables_nest *other_nest = &stack->nests[j];
4823 if (other_nest->group_head != nest->group_head)
4826 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4829 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4830 sizeof *listwise_vars);
4831 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4834 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4837 listwise_vars = xmemdup (listwise_vars,
4838 n * sizeof *listwise_vars);
4839 nest->specs[sv].listwise_vars = listwise_vars;
4840 nest->specs[sv].n_listwise_vars = n;
4845 struct ctables_summary_spec_set *merged = &t->summary_specs;
4846 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4848 for (size_t j = 0; j < stack->n; j++)
4850 const struct ctables_nest *nest = &stack->nests[j];
4852 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4853 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4858 struct merge_item min = items[0];
4859 for (size_t j = 1; j < n_left; j++)
4860 if (merge_item_compare_3way (&items[j], &min) < 0)
4863 if (merged->n >= merged->allocated)
4864 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4865 sizeof *merged->specs);
4866 merged->specs[merged->n++] = min.set->specs[min.ofs];
4868 for (size_t j = 0; j < n_left; )
4870 if (merge_item_compare_3way (&items[j], &min) == 0)
4872 struct merge_item *item = &items[j];
4873 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4874 if (++item->ofs >= item->set->n)
4876 items[j] = items[--n_left];
4886 for (size_t j = 0; j < merged->n; j++)
4887 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4889 for (size_t j = 0; j < stack->n; j++)
4891 const struct ctables_nest *nest = &stack->nests[j];
4892 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4894 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4895 for (size_t k = 0; k < specs->n; k++)
4896 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4897 specs->specs[k].axis_idx);
4903 size_t allocated_sum_vars = 0;
4904 enumerate_sum_vars (t->axes[t->summary_axis],
4905 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
4907 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4908 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4912 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4913 enum pivot_axis_type a)
4915 struct ctables_stack *stack = &t->stacks[a];
4916 for (size_t i = 0; i < stack->n; i++)
4918 const struct ctables_nest *nest = &stack->nests[i];
4919 const struct variable *var = nest->vars[nest->n - 1];
4920 const union value *value = case_data (c, var);
4922 if (var_is_numeric (var) && value->f == SYSMIS)
4925 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4927 ctables_value_insert (t, value, var_get_width (var));
4932 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4934 const struct ctables_value *const *ap = a_;
4935 const struct ctables_value *const *bp = b_;
4936 const struct ctables_value *a = *ap;
4937 const struct ctables_value *b = *bp;
4938 const int *width = width_;
4939 return value_compare_3way (&a->value, &b->value, *width);
4943 ctables_sort_clabels_values (struct ctables_table *t)
4945 const struct variable *v0 = t->clabels_example;
4946 int width = var_get_width (v0);
4948 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4951 const struct val_labs *val_labs = var_get_value_labels (v0);
4952 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4953 vl = val_labs_next (val_labs, vl))
4954 if (ctables_categories_match (c0, &vl->value, v0))
4955 ctables_value_insert (t, &vl->value, width);
4958 size_t n = hmap_count (&t->clabels_values_map);
4959 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4961 struct ctables_value *clv;
4963 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4964 t->clabels_values[i++] = clv;
4965 t->n_clabels_values = n;
4968 sort (t->clabels_values, n, sizeof *t->clabels_values,
4969 compare_clabels_values_3way, &width);
4971 for (size_t i = 0; i < n; i++)
4972 t->clabels_values[i]->leaf = i;
4976 ctables_add_category_occurrences (const struct variable *var,
4977 struct hmap *occurrences,
4978 const struct ctables_categories *cats)
4980 const struct val_labs *val_labs = var_get_value_labels (var);
4982 for (size_t i = 0; i < cats->n_cats; i++)
4984 const struct ctables_category *c = &cats->cats[i];
4988 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4994 int width = var_get_width (var);
4996 value_init (&value, width);
4997 value_copy_buf_rpad (&value, width,
4998 CHAR_CAST (uint8_t *, c->string.string),
4999 c->string.length, ' ');
5000 ctables_add_occurrence (var, &value, occurrences);
5001 value_destroy (&value, width);
5006 assert (var_is_numeric (var));
5007 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5008 vl = val_labs_next (val_labs, vl))
5009 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5010 ctables_add_occurrence (var, &vl->value, occurrences);
5014 assert (var_is_alpha (var));
5015 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5016 vl = val_labs_next (val_labs, vl))
5017 if (in_string_range (&vl->value, var, c->srange))
5018 ctables_add_occurrence (var, &vl->value, occurrences);
5022 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5023 vl = val_labs_next (val_labs, vl))
5024 if (var_is_value_missing (var, &vl->value))
5025 ctables_add_occurrence (var, &vl->value, occurrences);
5029 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5030 vl = val_labs_next (val_labs, vl))
5031 ctables_add_occurrence (var, &vl->value, occurrences);
5034 case CCT_POSTCOMPUTE:
5044 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5045 vl = val_labs_next (val_labs, vl))
5046 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5047 ctables_add_occurrence (var, &vl->value, occurrences);
5050 case CCT_EXCLUDED_MISSING:
5057 ctables_section_recurse_add_empty_categories (
5058 struct ctables_section *s,
5059 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5060 enum pivot_axis_type a, size_t a_idx)
5062 if (a >= PIVOT_N_AXES)
5063 ctables_cell_insert__ (s, c, cats);
5064 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5065 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5068 const struct variable *var = s->nests[a]->vars[a_idx];
5069 const struct ctables_categories *categories = s->table->categories[
5070 var_get_dict_index (var)];
5071 int width = var_get_width (var);
5072 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5073 const struct ctables_occurrence *o;
5074 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5076 union value *value = case_data_rw (c, var);
5077 value_destroy (value, width);
5078 value_clone (value, &o->value, width);
5079 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5080 assert (cats[a][a_idx] != NULL);
5081 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5084 for (size_t i = 0; i < categories->n_cats; i++)
5086 const struct ctables_category *cat = &categories->cats[i];
5087 if (cat->type == CCT_POSTCOMPUTE)
5089 cats[a][a_idx] = cat;
5090 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5097 ctables_section_add_empty_categories (struct ctables_section *s)
5099 bool show_empty = false;
5100 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5102 for (size_t k = 0; k < s->nests[a]->n; k++)
5103 if (k != s->nests[a]->scale_idx)
5105 const struct variable *var = s->nests[a]->vars[k];
5106 const struct ctables_categories *cats = s->table->categories[
5107 var_get_dict_index (var)];
5108 if (cats->show_empty)
5111 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5117 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5118 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5119 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5124 ctables_section_clear (struct ctables_section *s)
5126 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5128 const struct ctables_nest *nest = s->nests[a];
5129 for (size_t i = 0; i < nest->n; i++)
5130 if (i != nest->scale_idx)
5132 const struct variable *var = nest->vars[i];
5133 int width = var_get_width (var);
5134 struct ctables_occurrence *o, *next;
5135 struct hmap *map = &s->occurrences[a][i];
5136 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5138 value_destroy (&o->value, width);
5139 hmap_delete (map, &o->node);
5146 struct ctables_cell *cell, *next_cell;
5147 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5149 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5151 const struct ctables_nest *nest = s->nests[a];
5152 for (size_t i = 0; i < nest->n; i++)
5153 if (i != nest->scale_idx)
5154 value_destroy (&cell->axes[a].cvs[i].value,
5155 var_get_width (nest->vars[i]));
5156 free (cell->axes[a].cvs);
5159 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5160 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5161 for (size_t i = 0; i < specs->n; i++)
5162 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5163 free (cell->summaries);
5165 hmap_delete (&s->cells, &cell->node);
5168 hmap_shrink (&s->cells);
5170 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5172 struct ctables_area *area, *next_area;
5173 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5177 hmap_delete (&s->areas[at], &area->node);
5180 hmap_shrink (&s->areas[at]);
5185 ctables_section_uninit (struct ctables_section *s)
5187 ctables_section_clear (s);
5189 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5191 struct ctables_nest *nest = s->nests[a];
5192 for (size_t i = 0; i < nest->n; i++)
5193 hmap_destroy (&s->occurrences[a][i]);
5194 free (s->occurrences[a]);
5197 hmap_destroy (&s->cells);
5198 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5199 hmap_destroy (&s->areas[at]);
5203 ctables_table_clear (struct ctables_table *t)
5205 for (size_t i = 0; i < t->n_sections; i++)
5206 ctables_section_clear (&t->sections[i]);
5208 if (t->clabels_example)
5210 int width = var_get_width (t->clabels_example);
5211 struct ctables_value *value, *next_value;
5212 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5213 &t->clabels_values_map)
5215 value_destroy (&value->value, width);
5216 hmap_delete (&t->clabels_values_map, &value->node);
5219 hmap_shrink (&t->clabels_values_map);
5221 free (t->clabels_values);
5222 t->clabels_values = NULL;
5223 t->n_clabels_values = 0;
5228 ctables_execute (struct dataset *ds, struct casereader *input,
5231 for (size_t i = 0; i < ct->n_tables; i++)
5233 struct ctables_table *t = ct->tables[i];
5234 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5235 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5236 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5237 sizeof *t->sections);
5238 size_t ix[PIVOT_N_AXES];
5239 ctables_table_add_section (t, 0, ix);
5242 struct dictionary *dict = dataset_dict (ds);
5244 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5245 struct casegrouper *grouper
5247 ? casegrouper_create_splits (input, dict)
5248 : casegrouper_create_vars (input, NULL, 0));
5249 struct casereader *group;
5250 while (casegrouper_get_next_group (grouper, &group))
5254 struct ccase *c = casereader_peek (group, 0);
5257 output_split_file_values (ds, c);
5262 bool warn_on_invalid = true;
5263 for (struct ccase *c = casereader_read (group); c;
5264 case_unref (c), c = casereader_read (group))
5266 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5267 double e_weight = (ct->e_weight
5268 ? var_force_valid_weight (ct->e_weight,
5269 case_num (c, ct->e_weight),
5273 for (size_t i = 0; i < ct->n_tables; i++)
5275 struct ctables_table *t = ct->tables[i];
5277 for (size_t j = 0; j < t->n_sections; j++)
5278 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5280 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5281 if (t->label_axis[a] != a)
5282 ctables_insert_clabels_values (t, c, a);
5285 casereader_destroy (group);
5287 for (size_t i = 0; i < ct->n_tables; i++)
5289 struct ctables_table *t = ct->tables[i];
5291 if (t->clabels_example)
5292 ctables_sort_clabels_values (t);
5294 for (size_t j = 0; j < t->n_sections; j++)
5295 ctables_section_add_empty_categories (&t->sections[j]);
5297 ctables_table_output (ct, t);
5298 ctables_table_clear (t);
5301 return casegrouper_destroy (grouper);
5306 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5307 struct dictionary *);
5310 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5316 case CTPO_CAT_STRING:
5317 ss_dealloc (&e->string);
5320 case CTPO_CAT_SRANGE:
5321 for (size_t i = 0; i < 2; i++)
5322 ss_dealloc (&e->srange[i]);
5331 for (size_t i = 0; i < 2; i++)
5332 ctables_pcexpr_destroy (e->subs[i]);
5336 case CTPO_CAT_NUMBER:
5337 case CTPO_CAT_NRANGE:
5338 case CTPO_CAT_MISSING:
5339 case CTPO_CAT_OTHERNM:
5340 case CTPO_CAT_SUBTOTAL:
5341 case CTPO_CAT_TOTAL:
5345 msg_location_destroy (e->location);
5350 static struct ctables_pcexpr *
5351 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5352 struct ctables_pcexpr *sub0,
5353 struct ctables_pcexpr *sub1)
5355 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5356 *e = (struct ctables_pcexpr) {
5358 .subs = { sub0, sub1 },
5359 .location = msg_location_merged (sub0->location, sub1->location),
5364 /* How to parse an operator. */
5367 enum token_type token;
5368 enum ctables_postcompute_op op;
5371 static const struct operator *
5372 ctables_pcexpr_match_operator (struct lexer *lexer,
5373 const struct operator ops[], size_t n_ops)
5375 for (const struct operator *op = ops; op < ops + n_ops; op++)
5376 if (lex_token (lexer) == op->token)
5378 if (op->token != T_NEG_NUM)
5387 static struct ctables_pcexpr *
5388 ctables_pcexpr_parse_binary_operators__ (
5389 struct lexer *lexer, struct dictionary *dict,
5390 const struct operator ops[], size_t n_ops,
5391 parse_recursively_func *parse_next_level,
5392 const char *chain_warning, struct ctables_pcexpr *lhs)
5394 for (int op_count = 0; ; op_count++)
5396 const struct operator *op
5397 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5400 if (op_count > 1 && chain_warning)
5401 msg_at (SW, lhs->location, "%s", chain_warning);
5406 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5409 ctables_pcexpr_destroy (lhs);
5413 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5417 static struct ctables_pcexpr *
5418 ctables_pcexpr_parse_binary_operators (
5419 struct lexer *lexer, struct dictionary *dict,
5420 const struct operator ops[], size_t n_ops,
5421 parse_recursively_func *parse_next_level, const char *chain_warning)
5423 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5427 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5429 chain_warning, lhs);
5432 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5433 struct dictionary *);
5435 static struct ctables_pcexpr
5436 ctpo_cat_nrange (double low, double high)
5438 return (struct ctables_pcexpr) {
5439 .op = CTPO_CAT_NRANGE,
5440 .nrange = { low, high },
5444 static struct ctables_pcexpr
5445 ctpo_cat_srange (struct substring low, struct substring high)
5447 return (struct ctables_pcexpr) {
5448 .op = CTPO_CAT_SRANGE,
5449 .srange = { low, high },
5453 static struct ctables_pcexpr *
5454 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5456 int start_ofs = lex_ofs (lexer);
5457 struct ctables_pcexpr e;
5458 if (lex_is_number (lexer))
5460 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5461 .number = lex_number (lexer) };
5464 else if (lex_match_id (lexer, "MISSING"))
5465 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5466 else if (lex_match_id (lexer, "OTHERNM"))
5467 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5468 else if (lex_match_id (lexer, "TOTAL"))
5469 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5470 else if (lex_match_id (lexer, "SUBTOTAL"))
5472 size_t subtotal_index = 0;
5473 if (lex_match (lexer, T_LBRACK))
5475 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5477 subtotal_index = lex_integer (lexer);
5479 if (!lex_force_match (lexer, T_RBRACK))
5482 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5483 .subtotal_index = subtotal_index };
5485 else if (lex_match (lexer, T_LBRACK))
5487 if (lex_match_id (lexer, "LO"))
5489 if (!lex_force_match_id (lexer, "THRU"))
5492 if (lex_is_string (lexer))
5494 struct substring low = { .string = NULL };
5495 struct substring high = parse_substring (lexer, dict);
5496 e = ctpo_cat_srange (low, high);
5500 if (!lex_force_num (lexer))
5502 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5506 else if (lex_is_number (lexer))
5508 double number = lex_number (lexer);
5510 if (lex_match_id (lexer, "THRU"))
5512 if (lex_match_id (lexer, "HI"))
5513 e = ctpo_cat_nrange (number, DBL_MAX);
5516 if (!lex_force_num (lexer))
5518 e = ctpo_cat_nrange (number, lex_number (lexer));
5523 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5526 else if (lex_is_string (lexer))
5528 struct substring s = parse_substring (lexer, dict);
5530 if (lex_match_id (lexer, "THRU"))
5532 struct substring high;
5534 if (lex_match_id (lexer, "HI"))
5535 high = (struct substring) { .string = NULL };
5538 if (!lex_force_string (lexer))
5543 high = parse_substring (lexer, dict);
5546 e = ctpo_cat_srange (s, high);
5549 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5553 lex_error (lexer, NULL);
5557 if (!lex_force_match (lexer, T_RBRACK))
5559 if (e.op == CTPO_CAT_STRING)
5560 ss_dealloc (&e.string);
5561 else if (e.op == CTPO_CAT_SRANGE)
5563 ss_dealloc (&e.srange[0]);
5564 ss_dealloc (&e.srange[1]);
5569 else if (lex_match (lexer, T_LPAREN))
5571 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5574 if (!lex_force_match (lexer, T_RPAREN))
5576 ctables_pcexpr_destroy (ep);
5583 lex_error (lexer, NULL);
5587 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5588 return xmemdup (&e, sizeof e);
5591 static struct ctables_pcexpr *
5592 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5593 struct lexer *lexer, int start_ofs)
5595 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5596 *e = (struct ctables_pcexpr) {
5599 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5604 static struct ctables_pcexpr *
5605 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5607 static const struct operator op = { T_EXP, CTPO_POW };
5609 const char *chain_warning =
5610 _("The exponentiation operator (`**') is left-associative: "
5611 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5612 "To disable this warning, insert parentheses.");
5614 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5615 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5616 ctables_pcexpr_parse_primary,
5619 /* Special case for situations like "-5**6", which must be parsed as
5622 int start_ofs = lex_ofs (lexer);
5623 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5624 *lhs = (struct ctables_pcexpr) {
5625 .op = CTPO_CONSTANT,
5626 .number = -lex_tokval (lexer),
5627 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5631 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5632 lexer, dict, &op, 1,
5633 ctables_pcexpr_parse_primary, chain_warning, lhs);
5637 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5640 /* Parses the unary minus level. */
5641 static struct ctables_pcexpr *
5642 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5644 int start_ofs = lex_ofs (lexer);
5645 if (!lex_match (lexer, T_DASH))
5646 return ctables_pcexpr_parse_exp (lexer, dict);
5648 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5652 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5655 /* Parses the multiplication and division level. */
5656 static struct ctables_pcexpr *
5657 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5659 static const struct operator ops[] =
5661 { T_ASTERISK, CTPO_MUL },
5662 { T_SLASH, CTPO_DIV },
5665 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5666 sizeof ops / sizeof *ops,
5667 ctables_pcexpr_parse_neg, NULL);
5670 /* Parses the addition and subtraction level. */
5671 static struct ctables_pcexpr *
5672 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5674 static const struct operator ops[] =
5676 { T_PLUS, CTPO_ADD },
5677 { T_DASH, CTPO_SUB },
5678 { T_NEG_NUM, CTPO_ADD },
5681 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5682 ops, sizeof ops / sizeof *ops,
5683 ctables_pcexpr_parse_mul, NULL);
5686 static struct ctables_postcompute *
5687 ctables_find_postcompute (struct ctables *ct, const char *name)
5689 struct ctables_postcompute *pc;
5690 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5691 utf8_hash_case_string (name, 0), &ct->postcomputes)
5692 if (!utf8_strcasecmp (pc->name, name))
5698 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5701 int pcompute_start = lex_ofs (lexer) - 1;
5703 if (!lex_match (lexer, T_AND))
5705 lex_error_expecting (lexer, "&");
5708 if (!lex_force_id (lexer))
5711 char *name = ss_xstrdup (lex_tokss (lexer));
5714 if (!lex_force_match (lexer, T_EQUALS)
5715 || !lex_force_match_id (lexer, "EXPR")
5716 || !lex_force_match (lexer, T_LPAREN))
5722 int expr_start = lex_ofs (lexer);
5723 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5724 int expr_end = lex_ofs (lexer) - 1;
5725 if (!expr || !lex_force_match (lexer, T_RPAREN))
5727 ctables_pcexpr_destroy (expr);
5731 int pcompute_end = lex_ofs (lexer) - 1;
5733 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5736 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5739 msg_at (SW, location, _("New definition of &%s will override the "
5740 "previous definition."),
5742 msg_at (SN, pc->location, _("This is the previous definition."));
5744 ctables_pcexpr_destroy (pc->expr);
5745 msg_location_destroy (pc->location);
5750 pc = xmalloc (sizeof *pc);
5751 *pc = (struct ctables_postcompute) { .name = name };
5752 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5753 utf8_hash_case_string (pc->name, 0));
5756 pc->location = location;
5758 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5763 ctables_parse_pproperties_format (struct lexer *lexer,
5764 struct ctables_summary_spec_set *sss)
5766 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5768 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5769 && !(lex_token (lexer) == T_ID
5770 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5771 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5772 lex_tokss (lexer)))))
5774 /* Parse function. */
5775 enum ctables_summary_function function;
5777 enum ctables_area_type area;
5778 if (!parse_ctables_summary_function (lexer, &function, &weighted, &area))
5781 /* Parse percentile. */
5782 double percentile = 0;
5783 if (function == CTSF_PTILE)
5785 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5787 percentile = lex_number (lexer);
5792 struct fmt_spec format;
5793 bool is_ctables_format;
5794 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5797 if (sss->n >= sss->allocated)
5798 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5799 sizeof *sss->specs);
5800 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5801 .function = function,
5802 .weighted = weighted,
5805 .percentile = percentile,
5807 .is_ctables_format = is_ctables_format,
5813 ctables_summary_spec_set_uninit (sss);
5818 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5820 struct ctables_postcompute **pcs = NULL;
5822 size_t allocated_pcs = 0;
5824 while (lex_match (lexer, T_AND))
5826 if (!lex_force_id (lexer))
5828 struct ctables_postcompute *pc
5829 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5832 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5837 if (n_pcs >= allocated_pcs)
5838 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5842 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5844 if (lex_match_id (lexer, "LABEL"))
5846 lex_match (lexer, T_EQUALS);
5847 if (!lex_force_string (lexer))
5850 for (size_t i = 0; i < n_pcs; i++)
5852 free (pcs[i]->label);
5853 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5858 else if (lex_match_id (lexer, "FORMAT"))
5860 lex_match (lexer, T_EQUALS);
5862 struct ctables_summary_spec_set sss;
5863 if (!ctables_parse_pproperties_format (lexer, &sss))
5866 for (size_t i = 0; i < n_pcs; i++)
5869 ctables_summary_spec_set_uninit (pcs[i]->specs);
5871 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5872 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5874 ctables_summary_spec_set_uninit (&sss);
5876 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5878 lex_match (lexer, T_EQUALS);
5879 bool hide_source_cats;
5880 if (!parse_bool (lexer, &hide_source_cats))
5882 for (size_t i = 0; i < n_pcs; i++)
5883 pcs[i]->hide_source_cats = hide_source_cats;
5887 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5900 put_strftime (struct string *out, time_t now, const char *format)
5902 const struct tm *tm = localtime (&now);
5904 strftime (value, sizeof value, format, tm);
5905 ds_put_cstr (out, value);
5909 skip_prefix (struct substring *s, struct substring prefix)
5911 if (ss_starts_with (*s, prefix))
5913 ss_advance (s, prefix.length);
5921 put_table_expression (struct string *out, struct lexer *lexer,
5922 struct dictionary *dict, int expr_start, int expr_end)
5925 for (int ofs = expr_start; ofs < expr_end; ofs++)
5927 const struct token *t = lex_ofs_token (lexer, ofs);
5928 if (t->type == T_LBRACK)
5930 else if (t->type == T_RBRACK && nest > 0)
5936 else if (t->type == T_ID)
5938 const struct variable *var
5939 = dict_lookup_var (dict, t->string.string);
5940 const char *label = var ? var_get_label (var) : NULL;
5941 ds_put_cstr (out, label ? label : t->string.string);
5945 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5946 ds_put_byte (out, ' ');
5948 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5949 ds_put_cstr (out, repr);
5952 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5953 ds_put_byte (out, ' ');
5959 put_title_text (struct string *out, struct substring in, time_t now,
5960 struct lexer *lexer, struct dictionary *dict,
5961 int expr_start, int expr_end)
5965 size_t chunk = ss_find_byte (in, ')');
5966 ds_put_substring (out, ss_head (in, chunk));
5967 ss_advance (&in, chunk);
5968 if (ss_is_empty (in))
5971 if (skip_prefix (&in, ss_cstr (")DATE")))
5972 put_strftime (out, now, "%x");
5973 else if (skip_prefix (&in, ss_cstr (")TIME")))
5974 put_strftime (out, now, "%X");
5975 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5976 put_table_expression (out, lexer, dict, expr_start, expr_end);
5979 ds_put_byte (out, ')');
5980 ss_advance (&in, 1);
5986 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5988 struct casereader *input = NULL;
5990 struct measure_guesser *mg = measure_guesser_create (ds);
5993 input = proc_open (ds);
5994 measure_guesser_run (mg, input);
5995 measure_guesser_destroy (mg);
5998 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5999 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6000 enum settings_value_show tvars = settings_get_show_variables ();
6001 for (size_t i = 0; i < n_vars; i++)
6002 vlabels[i] = (enum ctables_vlabel) tvars;
6004 struct pivot_table_look *look = pivot_table_look_unshare (
6005 pivot_table_look_ref (pivot_table_look_get_default ()));
6006 look->omit_empty = false;
6008 struct ctables *ct = xmalloc (sizeof *ct);
6009 *ct = (struct ctables) {
6010 .dict = dataset_dict (ds),
6012 .ctables_formats = FMT_SETTINGS_INIT,
6014 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6017 time_t now = time (NULL);
6022 const char *dot_string;
6023 const char *comma_string;
6025 static const struct ctf ctfs[4] = {
6026 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6027 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6028 { CTEF_PAREN, "-,(,),", "-.(.)." },
6029 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6031 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6032 for (size_t i = 0; i < 4; i++)
6034 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6035 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6036 fmt_number_style_from_string (s));
6039 if (!lex_force_match (lexer, T_SLASH))
6042 while (!lex_match_id (lexer, "TABLE"))
6044 if (lex_match_id (lexer, "FORMAT"))
6046 double widths[2] = { SYSMIS, SYSMIS };
6047 double units_per_inch = 72.0;
6049 while (lex_token (lexer) != T_SLASH)
6051 if (lex_match_id (lexer, "MINCOLWIDTH"))
6053 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6056 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6058 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6061 else if (lex_match_id (lexer, "UNITS"))
6063 lex_match (lexer, T_EQUALS);
6064 if (lex_match_id (lexer, "POINTS"))
6065 units_per_inch = 72.0;
6066 else if (lex_match_id (lexer, "INCHES"))
6067 units_per_inch = 1.0;
6068 else if (lex_match_id (lexer, "CM"))
6069 units_per_inch = 2.54;
6072 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6076 else if (lex_match_id (lexer, "EMPTY"))
6081 lex_match (lexer, T_EQUALS);
6082 if (lex_match_id (lexer, "ZERO"))
6084 /* Nothing to do. */
6086 else if (lex_match_id (lexer, "BLANK"))
6087 ct->zero = xstrdup ("");
6088 else if (lex_force_string (lexer))
6090 ct->zero = ss_xstrdup (lex_tokss (lexer));
6096 else if (lex_match_id (lexer, "MISSING"))
6098 lex_match (lexer, T_EQUALS);
6099 if (!lex_force_string (lexer))
6103 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6104 ? ss_xstrdup (lex_tokss (lexer))
6110 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6111 "UNITS", "EMPTY", "MISSING");
6116 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6117 && widths[0] > widths[1])
6119 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6123 for (size_t i = 0; i < 2; i++)
6124 if (widths[i] != SYSMIS)
6126 int *wr = ct->look->width_ranges[TABLE_HORZ];
6127 wr[i] = widths[i] / units_per_inch * 96.0;
6132 else if (lex_match_id (lexer, "VLABELS"))
6134 if (!lex_force_match_id (lexer, "VARIABLES"))
6136 lex_match (lexer, T_EQUALS);
6138 struct variable **vars;
6140 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6144 if (!lex_force_match_id (lexer, "DISPLAY"))
6149 lex_match (lexer, T_EQUALS);
6151 enum ctables_vlabel vlabel;
6152 if (lex_match_id (lexer, "DEFAULT"))
6153 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6154 else if (lex_match_id (lexer, "NAME"))
6156 else if (lex_match_id (lexer, "LABEL"))
6157 vlabel = CTVL_LABEL;
6158 else if (lex_match_id (lexer, "BOTH"))
6160 else if (lex_match_id (lexer, "NONE"))
6164 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6170 for (size_t i = 0; i < n_vars; i++)
6171 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6174 else if (lex_match_id (lexer, "MRSETS"))
6176 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6178 lex_match (lexer, T_EQUALS);
6179 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6182 else if (lex_match_id (lexer, "SMISSING"))
6184 if (lex_match_id (lexer, "VARIABLE"))
6185 ct->smissing_listwise = false;
6186 else if (lex_match_id (lexer, "LISTWISE"))
6187 ct->smissing_listwise = true;
6190 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6194 else if (lex_match_id (lexer, "PCOMPUTE"))
6196 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6199 else if (lex_match_id (lexer, "PPROPERTIES"))
6201 if (!ctables_parse_pproperties (lexer, ct))
6204 else if (lex_match_id (lexer, "WEIGHT"))
6206 if (!lex_force_match_id (lexer, "VARIABLE"))
6208 lex_match (lexer, T_EQUALS);
6209 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6213 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6215 if (lex_match_id (lexer, "COUNT"))
6217 lex_match (lexer, T_EQUALS);
6218 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6221 ct->hide_threshold = lex_integer (lexer);
6224 else if (ct->hide_threshold == 0)
6225 ct->hide_threshold = 5;
6229 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6230 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6231 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6235 if (!lex_force_match (lexer, T_SLASH))
6239 size_t allocated_tables = 0;
6242 if (ct->n_tables >= allocated_tables)
6243 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6244 sizeof *ct->tables);
6246 struct ctables_category *cat = xmalloc (sizeof *cat);
6247 *cat = (struct ctables_category) {
6249 .include_missing = false,
6250 .sort_ascending = true,
6253 struct ctables_categories *c = xmalloc (sizeof *c);
6254 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6255 *c = (struct ctables_categories) {
6262 struct ctables_categories **categories = xnmalloc (n_vars,
6263 sizeof *categories);
6264 for (size_t i = 0; i < n_vars; i++)
6267 struct ctables_table *t = xmalloc (sizeof *t);
6268 *t = (struct ctables_table) {
6270 .slabels_axis = PIVOT_AXIS_COLUMN,
6271 .slabels_visible = true,
6272 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6274 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6275 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6276 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6278 .clabels_from_axis = PIVOT_AXIS_LAYER,
6279 .clabels_to_axis = PIVOT_AXIS_LAYER,
6280 .categories = categories,
6281 .n_categories = n_vars,
6284 ct->tables[ct->n_tables++] = t;
6286 lex_match (lexer, T_EQUALS);
6287 int expr_start = lex_ofs (lexer);
6288 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6290 if (lex_match (lexer, T_BY))
6292 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6293 ct, t, PIVOT_AXIS_COLUMN))
6296 if (lex_match (lexer, T_BY))
6298 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6299 ct, t, PIVOT_AXIS_LAYER))
6303 int expr_end = lex_ofs (lexer);
6305 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6306 && !t->axes[PIVOT_AXIS_LAYER])
6308 lex_error (lexer, _("At least one variable must be specified."));
6312 const struct ctables_axis *scales[PIVOT_N_AXES];
6313 size_t n_scales = 0;
6314 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6316 scales[a] = find_scale (t->axes[a]);
6322 msg (SE, _("Scale variables may appear only on one axis."));
6323 if (scales[PIVOT_AXIS_ROW])
6324 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6325 _("This scale variable appears on the rows axis."));
6326 if (scales[PIVOT_AXIS_COLUMN])
6327 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6328 _("This scale variable appears on the columns axis."));
6329 if (scales[PIVOT_AXIS_LAYER])
6330 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6331 _("This scale variable appears on the layer axis."));
6335 const struct ctables_axis *summaries[PIVOT_N_AXES];
6336 size_t n_summaries = 0;
6337 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6339 summaries[a] = (scales[a]
6341 : find_categorical_summary_spec (t->axes[a]));
6345 if (n_summaries > 1)
6347 msg (SE, _("Summaries may appear only on one axis."));
6348 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6351 msg_at (SN, summaries[a]->loc,
6353 ? _("This variable on the rows axis has a summary.")
6354 : a == PIVOT_AXIS_COLUMN
6355 ? _("This variable on the columns axis has a summary.")
6356 : _("This variable on the layers axis has a summary."));
6358 msg_at (SN, summaries[a]->loc,
6359 _("This is a scale variable, so it always has a "
6360 "summary even if the syntax does not explicitly "
6365 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6366 if (n_summaries ? summaries[a] : t->axes[a])
6368 t->summary_axis = a;
6372 if (lex_token (lexer) == T_ENDCMD)
6374 if (!ctables_prepare_table (t))
6378 if (!lex_force_match (lexer, T_SLASH))
6381 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6383 if (lex_match_id (lexer, "SLABELS"))
6385 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6387 if (lex_match_id (lexer, "POSITION"))
6389 lex_match (lexer, T_EQUALS);
6390 if (lex_match_id (lexer, "COLUMN"))
6391 t->slabels_axis = PIVOT_AXIS_COLUMN;
6392 else if (lex_match_id (lexer, "ROW"))
6393 t->slabels_axis = PIVOT_AXIS_ROW;
6394 else if (lex_match_id (lexer, "LAYER"))
6395 t->slabels_axis = PIVOT_AXIS_LAYER;
6398 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6402 else if (lex_match_id (lexer, "VISIBLE"))
6404 lex_match (lexer, T_EQUALS);
6405 if (!parse_bool (lexer, &t->slabels_visible))
6410 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6415 else if (lex_match_id (lexer, "CLABELS"))
6417 if (lex_match_id (lexer, "AUTO"))
6419 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6420 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6422 else if (lex_match_id (lexer, "ROWLABELS"))
6424 lex_match (lexer, T_EQUALS);
6425 if (lex_match_id (lexer, "OPPOSITE"))
6426 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6427 else if (lex_match_id (lexer, "LAYER"))
6428 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6431 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6435 else if (lex_match_id (lexer, "COLLABELS"))
6437 lex_match (lexer, T_EQUALS);
6438 if (lex_match_id (lexer, "OPPOSITE"))
6439 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6440 else if (lex_match_id (lexer, "LAYER"))
6441 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6444 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6450 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6455 else if (lex_match_id (lexer, "CRITERIA"))
6457 if (!lex_force_match_id (lexer, "CILEVEL"))
6459 lex_match (lexer, T_EQUALS);
6461 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6463 t->cilevel = lex_number (lexer);
6466 else if (lex_match_id (lexer, "CATEGORIES"))
6468 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6472 else if (lex_match_id (lexer, "TITLES"))
6477 if (lex_match_id (lexer, "CAPTION"))
6478 textp = &t->caption;
6479 else if (lex_match_id (lexer, "CORNER"))
6481 else if (lex_match_id (lexer, "TITLE"))
6485 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6488 lex_match (lexer, T_EQUALS);
6490 struct string s = DS_EMPTY_INITIALIZER;
6491 while (lex_is_string (lexer))
6493 if (!ds_is_empty (&s))
6494 ds_put_byte (&s, ' ');
6495 put_title_text (&s, lex_tokss (lexer), now,
6496 lexer, dataset_dict (ds),
6497 expr_start, expr_end);
6501 *textp = ds_steal_cstr (&s);
6503 while (lex_token (lexer) != T_SLASH
6504 && lex_token (lexer) != T_ENDCMD);
6506 else if (lex_match_id (lexer, "SIGTEST"))
6510 t->chisq = xmalloc (sizeof *t->chisq);
6511 *t->chisq = (struct ctables_chisq) {
6513 .include_mrsets = true,
6514 .all_visible = true,
6520 if (lex_match_id (lexer, "TYPE"))
6522 lex_match (lexer, T_EQUALS);
6523 if (!lex_force_match_id (lexer, "CHISQUARE"))
6526 else if (lex_match_id (lexer, "ALPHA"))
6528 lex_match (lexer, T_EQUALS);
6529 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6531 t->chisq->alpha = lex_number (lexer);
6534 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6536 lex_match (lexer, T_EQUALS);
6537 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6540 else if (lex_match_id (lexer, "CATEGORIES"))
6542 lex_match (lexer, T_EQUALS);
6543 if (lex_match_id (lexer, "ALLVISIBLE"))
6544 t->chisq->all_visible = true;
6545 else if (lex_match_id (lexer, "SUBTOTALS"))
6546 t->chisq->all_visible = false;
6549 lex_error_expecting (lexer,
6550 "ALLVISIBLE", "SUBTOTALS");
6556 lex_error_expecting (lexer, "TYPE", "ALPHA",
6557 "INCLUDEMRSETS", "CATEGORIES");
6561 while (lex_token (lexer) != T_SLASH
6562 && lex_token (lexer) != T_ENDCMD);
6564 else if (lex_match_id (lexer, "COMPARETEST"))
6568 t->pairwise = xmalloc (sizeof *t->pairwise);
6569 *t->pairwise = (struct ctables_pairwise) {
6571 .alpha = { .05, .05 },
6572 .adjust = BONFERRONI,
6573 .include_mrsets = true,
6574 .meansvariance_allcats = true,
6575 .all_visible = true,
6584 if (lex_match_id (lexer, "TYPE"))
6586 lex_match (lexer, T_EQUALS);
6587 if (lex_match_id (lexer, "PROP"))
6588 t->pairwise->type = PROP;
6589 else if (lex_match_id (lexer, "MEAN"))
6590 t->pairwise->type = MEAN;
6593 lex_error_expecting (lexer, "PROP", "MEAN");
6597 else if (lex_match_id (lexer, "ALPHA"))
6599 lex_match (lexer, T_EQUALS);
6601 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6603 double a0 = lex_number (lexer);
6606 lex_match (lexer, T_COMMA);
6607 if (lex_is_number (lexer))
6609 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6611 double a1 = lex_number (lexer);
6614 t->pairwise->alpha[0] = MIN (a0, a1);
6615 t->pairwise->alpha[1] = MAX (a0, a1);
6618 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6620 else if (lex_match_id (lexer, "ADJUST"))
6622 lex_match (lexer, T_EQUALS);
6623 if (lex_match_id (lexer, "BONFERRONI"))
6624 t->pairwise->adjust = BONFERRONI;
6625 else if (lex_match_id (lexer, "BH"))
6626 t->pairwise->adjust = BH;
6627 else if (lex_match_id (lexer, "NONE"))
6628 t->pairwise->adjust = 0;
6631 lex_error_expecting (lexer, "BONFERRONI", "BH",
6636 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6638 lex_match (lexer, T_EQUALS);
6639 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6642 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6644 lex_match (lexer, T_EQUALS);
6645 if (lex_match_id (lexer, "ALLCATS"))
6646 t->pairwise->meansvariance_allcats = true;
6647 else if (lex_match_id (lexer, "TESTEDCATS"))
6648 t->pairwise->meansvariance_allcats = false;
6651 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6655 else if (lex_match_id (lexer, "CATEGORIES"))
6657 lex_match (lexer, T_EQUALS);
6658 if (lex_match_id (lexer, "ALLVISIBLE"))
6659 t->pairwise->all_visible = true;
6660 else if (lex_match_id (lexer, "SUBTOTALS"))
6661 t->pairwise->all_visible = false;
6664 lex_error_expecting (lexer, "ALLVISIBLE",
6669 else if (lex_match_id (lexer, "MERGE"))
6671 lex_match (lexer, T_EQUALS);
6672 if (!parse_bool (lexer, &t->pairwise->merge))
6675 else if (lex_match_id (lexer, "STYLE"))
6677 lex_match (lexer, T_EQUALS);
6678 if (lex_match_id (lexer, "APA"))
6679 t->pairwise->apa_style = true;
6680 else if (lex_match_id (lexer, "SIMPLE"))
6681 t->pairwise->apa_style = false;
6684 lex_error_expecting (lexer, "APA", "SIMPLE");
6688 else if (lex_match_id (lexer, "SHOWSIG"))
6690 lex_match (lexer, T_EQUALS);
6691 if (!parse_bool (lexer, &t->pairwise->show_sig))
6696 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6697 "INCLUDEMRSETS", "MEANSVARIANCE",
6698 "CATEGORIES", "MERGE", "STYLE",
6703 while (lex_token (lexer) != T_SLASH
6704 && lex_token (lexer) != T_ENDCMD);
6708 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6709 "CRITERIA", "CATEGORIES", "TITLES",
6710 "SIGTEST", "COMPARETEST");
6714 if (!lex_match (lexer, T_SLASH))
6718 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6720 t->clabels_from_axis = PIVOT_AXIS_ROW;
6721 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6723 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6727 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6728 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6729 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6731 if (!ctables_prepare_table (t))
6734 while (lex_token (lexer) != T_ENDCMD);
6737 input = proc_open (ds);
6738 bool ok = ctables_execute (ds, input, ct);
6739 ok = proc_commit (ds) && ok;
6741 ctables_destroy (ct);
6742 return ok ? CMD_SUCCESS : CMD_FAILURE;
6747 ctables_destroy (ct);