1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 enum ctables_summary_function
75 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
76 #include "ctables.inc"
81 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
83 #include "ctables.inc"
87 static bool ctables_summary_function_is_count (enum ctables_summary_function);
89 enum ctables_domain_type
91 /* Within a section, where stacked variables divide one section from
93 CTDT_TABLE, /* All layers of a whole section. */
94 CTDT_LAYER, /* One layer within a section. */
95 CTDT_LAYERROW, /* Row in one layer within a section. */
96 CTDT_LAYERCOL, /* Column in one layer within a section. */
98 /* Within a subtable, where a subtable pairs an innermost row variable with
99 an innermost column variable within a single layer. */
100 CTDT_SUBTABLE, /* Whole subtable. */
101 CTDT_ROW, /* Row within a subtable. */
102 CTDT_COL, /* Column within a subtable. */
106 struct ctables_domain
108 struct hmap_node node;
110 const struct ctables_cell *example;
113 double d_valid; /* Dictionary weight. */
116 double e_valid; /* Effective weight */
119 double u_valid; /* Unweighted. */
122 struct ctables_sum *sums;
131 enum ctables_summary_variant
140 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
141 all the axes (except the scalar variable, if any). */
142 struct hmap_node node;
144 /* The domains that contain this cell. */
145 uint32_t omit_domains;
146 struct ctables_domain *domains[N_CTDTS];
151 enum ctables_summary_variant sv;
153 struct ctables_cell_axis
155 struct ctables_cell_value
157 const struct ctables_category *category;
165 union ctables_summary *summaries;
172 const struct dictionary *dict;
173 struct pivot_table_look *look;
175 /* CTABLES has a number of extra formats that we implement via custom
176 currency specifications on an alternate fmt_settings. */
177 #define CTEF_NEGPAREN FMT_CCA
178 #define CTEF_NEQUAL FMT_CCB
179 #define CTEF_PAREN FMT_CCC
180 #define CTEF_PCTPAREN FMT_CCD
181 struct fmt_settings ctables_formats;
183 /* If this is NULL, zeros are displayed using the normal print format.
184 Otherwise, this string is displayed. */
187 /* If this is NULL, missing values are displayed using the normal print
188 format. Otherwise, this string is displayed. */
191 /* Indexed by variable dictionary index. */
192 enum ctables_vlabel *vlabels;
194 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
196 bool mrsets_count_duplicates; /* MRSETS. */
197 bool smissing_listwise; /* SMISSING. */
198 struct variable *e_weight; /* WEIGHT. */
199 int hide_threshold; /* HIDESMALLCOUNTS. */
201 struct ctables_table **tables;
205 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
208 struct ctables_postcompute
210 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
211 char *name; /* Name, without leading &. */
213 struct msg_location *location; /* Location of definition. */
214 struct ctables_pcexpr *expr;
216 struct ctables_summary_spec_set *specs;
217 bool hide_source_cats;
220 struct ctables_pcexpr
230 enum ctables_postcompute_op
233 CTPO_CONSTANT, /* 5 */
234 CTPO_CAT_NUMBER, /* [5] */
235 CTPO_CAT_STRING, /* ["STRING"] */
236 CTPO_CAT_NRANGE, /* [LO THRU 5] */
237 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
238 CTPO_CAT_MISSING, /* MISSING */
239 CTPO_CAT_OTHERNM, /* OTHERNM */
240 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
241 CTPO_CAT_TOTAL, /* TOTAL */
255 /* CTPO_CAT_NUMBER. */
258 /* CTPO_CAT_STRING, in dictionary encoding. */
259 struct substring string;
261 /* CTPO_CAT_NRANGE. */
264 /* CTPO_CAT_SRANGE. */
265 struct substring srange[2];
267 /* CTPO_CAT_SUBTOTAL. */
268 size_t subtotal_index;
270 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
271 One element: CTPO_NEG. */
272 struct ctables_pcexpr *subs[2];
275 /* Source location. */
276 struct msg_location *location;
279 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
280 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
281 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
282 struct ctables_pcexpr *sub1);
284 struct ctables_summary_spec_set
286 struct ctables_summary_spec *specs;
290 /* The variable to which the summary specs are applied. */
291 struct variable *var;
293 /* Whether the variable to which the summary specs are applied is a scale
294 variable for the purpose of summarization.
296 (VALIDN and TOTALN act differently for summarizing scale and categorical
300 /* If any of these optional additional scale variables are missing, then
301 treat 'var' as if it's missing too. This is for implementing
302 SMISSING=LISTWISE. */
303 struct variable **listwise_vars;
304 size_t n_listwise_vars;
307 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
308 const struct ctables_summary_spec_set *);
309 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
311 /* A nested sequence of variables, e.g. a > b > c. */
314 struct variable **vars;
317 size_t *domains[N_CTDTS];
318 size_t n_domains[N_CTDTS];
321 struct ctables_summary_spec_set specs[N_CSVS];
324 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
327 struct ctables_nest *nests;
331 static void ctables_stack_uninit (struct ctables_stack *);
335 struct hmap_node node;
340 struct ctables_occurrence
342 struct hmap_node node;
346 struct ctables_section
349 struct ctables_table *table;
350 struct ctables_nest *nests[PIVOT_N_AXES];
353 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
354 struct hmap cells; /* Contains "struct ctables_cell"s. */
355 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
358 static void ctables_section_uninit (struct ctables_section *);
362 struct ctables *ctables;
363 struct ctables_axis *axes[PIVOT_N_AXES];
364 struct ctables_stack stacks[PIVOT_N_AXES];
365 struct ctables_section *sections;
367 enum pivot_axis_type summary_axis;
368 struct ctables_summary_spec_set summary_specs;
369 struct variable **sum_vars;
372 enum pivot_axis_type slabels_axis;
373 bool slabels_visible;
375 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
377 Most commonly, label_axis[a] == a, and in particular we always have
378 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
380 If ROWLABELS or COLLABELS is specified, then one of
381 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
382 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
384 If any category labels are moved, then 'clabels_example' is one of the
385 variables being moved (and it is otherwise NULL). All of the variables
386 being moved have the same width, value labels, and categories, so this
387 example variable can be used to find those out.
389 The remaining members in this group are relevant only if category labels
392 'clabels_values_map' holds a "struct ctables_value" for all the values
393 that appear in all of the variables in the moved categories. It is
394 accumulated as the data is read. Once the data is fully read, its
395 sorted values are put into 'clabels_values' and 'n_clabels_values'.
397 enum pivot_axis_type label_axis[PIVOT_N_AXES];
398 enum pivot_axis_type clabels_from_axis;
399 const struct variable *clabels_example;
400 struct hmap clabels_values_map;
401 struct ctables_value **clabels_values;
402 size_t n_clabels_values;
404 /* Indexed by variable dictionary index. */
405 struct ctables_categories **categories;
414 struct ctables_chisq *chisq;
415 struct ctables_pairwise *pairwise;
418 struct ctables_categories
421 struct ctables_category *cats;
426 struct ctables_category
428 enum ctables_category_type
430 /* Explicit category lists. */
433 CCT_NRANGE, /* Numerical range. */
434 CCT_SRANGE, /* String range. */
439 /* Totals and subtotals. */
443 /* Implicit category lists. */
448 /* For contributing to TOTALN. */
449 CCT_EXCLUDED_MISSING,
453 struct ctables_category *subtotal;
459 double number; /* CCT_NUMBER. */
460 struct substring string; /* CCT_STRING, in dictionary encoding. */
461 double nrange[2]; /* CCT_NRANGE. */
462 struct substring srange[2]; /* CCT_SRANGE. */
466 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
467 bool hide_subcategories; /* CCT_SUBTOTAL. */
470 /* CCT_POSTCOMPUTE. */
473 const struct ctables_postcompute *pc;
474 enum fmt_type parse_format;
477 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
480 bool include_missing;
484 enum ctables_summary_function sort_function;
485 struct variable *sort_var;
490 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
491 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
492 struct msg_location *location;
496 ctables_category_uninit (struct ctables_category *cat)
501 msg_location_destroy (cat->location);
508 case CCT_POSTCOMPUTE:
512 ss_dealloc (&cat->string);
516 ss_dealloc (&cat->srange[0]);
517 ss_dealloc (&cat->srange[1]);
522 free (cat->total_label);
530 case CCT_EXCLUDED_MISSING:
536 nullable_substring_equal (const struct substring *a,
537 const struct substring *b)
539 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
543 ctables_category_equal (const struct ctables_category *a,
544 const struct ctables_category *b)
546 if (a->type != b->type)
552 return a->number == b->number;
555 return ss_equals (a->string, b->string);
558 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
561 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
562 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
568 case CCT_POSTCOMPUTE:
569 return a->pc == b->pc;
573 return !strcmp (a->total_label, b->total_label);
578 return (a->include_missing == b->include_missing
579 && a->sort_ascending == b->sort_ascending
580 && a->sort_function == b->sort_function
581 && a->sort_var == b->sort_var
582 && a->percentile == b->percentile);
584 case CCT_EXCLUDED_MISSING:
592 ctables_categories_unref (struct ctables_categories *c)
597 assert (c->n_refs > 0);
601 for (size_t i = 0; i < c->n_cats; i++)
602 ctables_category_uninit (&c->cats[i]);
608 ctables_categories_equal (const struct ctables_categories *a,
609 const struct ctables_categories *b)
611 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
614 for (size_t i = 0; i < a->n_cats; i++)
615 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
621 /* Chi-square test (SIGTEST). */
629 /* Pairwise comparison test (COMPARETEST). */
630 struct ctables_pairwise
632 enum { PROP, MEAN } type;
635 bool meansvariance_allcats;
637 enum { BONFERRONI = 1, BH } adjust;
661 struct variable *var;
663 struct ctables_summary_spec_set specs[N_CSVS];
667 struct ctables_axis *subs[2];
670 struct msg_location *loc;
673 static void ctables_axis_destroy (struct ctables_axis *);
682 enum ctables_function_availability
684 CTFA_ALL, /* Any variables. */
685 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
686 //CTFA_MRSETS, /* Only multiple-response sets */
689 struct ctables_summary_spec
691 enum ctables_summary_function function;
692 double percentile; /* CTSF_PTILE only. */
695 struct fmt_spec format;
696 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
703 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
704 const struct ctables_summary_spec *src)
707 dst->label = xstrdup_if_nonnull (src->label);
711 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
718 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
719 const struct ctables_summary_spec_set *src)
721 struct ctables_summary_spec *specs
722 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
723 for (size_t i = 0; i < src->n; i++)
724 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
726 *dst = (struct ctables_summary_spec_set) {
731 .is_scale = src->is_scale,
736 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
738 for (size_t i = 0; i < set->n; i++)
739 ctables_summary_spec_uninit (&set->specs[i]);
740 free (set->listwise_vars);
745 parse_col_width (struct lexer *lexer, const char *name, double *width)
747 lex_match (lexer, T_EQUALS);
748 if (lex_match_id (lexer, "DEFAULT"))
750 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
752 *width = lex_number (lexer);
762 parse_bool (struct lexer *lexer, bool *b)
764 if (lex_match_id (lexer, "NO"))
766 else if (lex_match_id (lexer, "YES"))
770 lex_error_expecting (lexer, "YES", "NO");
776 static enum ctables_function_availability
777 ctables_function_availability (enum ctables_summary_function f)
779 static enum ctables_function_availability availability[] = {
780 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
781 #include "ctables.inc"
785 return availability[f];
789 ctables_summary_function_is_count (enum ctables_summary_function f)
791 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
795 parse_ctables_summary_function (struct lexer *lexer,
796 enum ctables_summary_function *f)
800 enum ctables_summary_function function;
801 struct substring name;
803 static struct pair names[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
805 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
806 #include "ctables.inc"
807 /* The .COUNT suffix may be omitted. */
808 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
809 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
810 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
811 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
812 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
813 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
814 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
818 if (!lex_force_id (lexer))
821 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
822 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
824 *f = names[i].function;
829 lex_error (lexer, _("Expecting summary function name."));
834 ctables_axis_destroy (struct ctables_axis *axis)
842 for (size_t i = 0; i < N_CSVS; i++)
843 ctables_summary_spec_set_uninit (&axis->specs[i]);
848 ctables_axis_destroy (axis->subs[0]);
849 ctables_axis_destroy (axis->subs[1]);
852 msg_location_destroy (axis->loc);
856 static struct ctables_axis *
857 ctables_axis_new_nonterminal (enum ctables_axis_op op,
858 struct ctables_axis *sub0,
859 struct ctables_axis *sub1,
860 struct lexer *lexer, int start_ofs)
862 struct ctables_axis *axis = xmalloc (sizeof *axis);
863 *axis = (struct ctables_axis) {
865 .subs = { sub0, sub1 },
866 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
871 struct ctables_axis_parse_ctx
874 struct dictionary *dict;
876 struct ctables_table *t;
879 static struct fmt_spec
880 ctables_summary_default_format (enum ctables_summary_function function,
881 const struct variable *var)
883 static const enum ctables_format default_formats[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
885 #include "ctables.inc"
888 switch (default_formats[function])
891 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
894 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
897 return *var_get_print_format (var);
904 static struct pivot_value *
905 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
909 static const char *default_labels[] = {
910 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
911 #include "ctables.inc"
915 return (spec->function == CTSF_PTILE
916 ? pivot_value_new_text_format (N_("Percentile %.2f"),
918 : pivot_value_new_text (default_labels[spec->function]));
922 struct substring in = ss_cstr (spec->label);
923 struct substring target = ss_cstr (")CILEVEL");
925 struct string out = DS_EMPTY_INITIALIZER;
928 size_t chunk = ss_find_substring (in, target);
929 ds_put_substring (&out, ss_head (in, chunk));
930 ss_advance (&in, chunk);
932 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
934 ss_advance (&in, target.length);
935 ds_put_format (&out, "%g", cilevel);
941 ctables_summary_function_name (enum ctables_summary_function function)
943 static const char *names[] = {
944 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
945 #include "ctables.inc"
948 return names[function];
952 add_summary_spec (struct ctables_axis *axis,
953 enum ctables_summary_function function, double percentile,
954 const char *label, const struct fmt_spec *format,
955 bool is_ctables_format, const struct msg_location *loc,
956 enum ctables_summary_variant sv)
958 if (axis->op == CTAO_VAR)
960 const char *function_name = ctables_summary_function_name (function);
961 const char *var_name = var_get_name (axis->var);
962 switch (ctables_function_availability (function))
966 msg_at (SE, loc, _("Summary function %s applies only to multiple "
967 "response sets."), function_name);
968 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
974 if (!axis->scale && sv != CSV_TOTAL)
977 _("Summary function %s applies only to scale variables."),
979 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
989 struct ctables_summary_spec_set *set = &axis->specs[sv];
990 if (set->n >= set->allocated)
991 set->specs = x2nrealloc (set->specs, &set->allocated,
994 struct ctables_summary_spec *dst = &set->specs[set->n++];
995 *dst = (struct ctables_summary_spec) {
996 .function = function,
997 .percentile = percentile,
998 .label = xstrdup_if_nonnull (label),
999 .format = (format ? *format
1000 : ctables_summary_default_format (function, axis->var)),
1001 .is_ctables_format = is_ctables_format,
1007 for (size_t i = 0; i < 2; i++)
1008 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1009 format, is_ctables_format, loc, sv))
1015 static struct ctables_axis *ctables_axis_parse_stack (
1016 struct ctables_axis_parse_ctx *);
1019 static struct ctables_axis *
1020 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1022 if (lex_match (ctx->lexer, T_LPAREN))
1024 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1025 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1027 ctables_axis_destroy (sub);
1033 if (!lex_force_id (ctx->lexer))
1036 int start_ofs = lex_ofs (ctx->lexer);
1037 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1041 struct ctables_axis *axis = xmalloc (sizeof *axis);
1042 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1044 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1045 : lex_match_phrase (ctx->lexer, "[C]") ? false
1046 : var_get_measure (var) == MEASURE_SCALE);
1047 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1048 lex_ofs (ctx->lexer) - 1);
1049 if (axis->scale && var_is_alpha (var))
1051 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1053 var_get_name (var));
1054 ctables_axis_destroy (axis);
1062 has_digit (const char *s)
1064 return s[strcspn (s, "0123456789")] != '\0';
1068 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1069 bool *is_ctables_format)
1071 char type[FMT_TYPE_LEN_MAX + 1];
1072 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1075 if (!strcasecmp (type, "NEGPAREN"))
1076 format->type = CTEF_NEGPAREN;
1077 else if (!strcasecmp (type, "NEQUAL"))
1078 format->type = CTEF_NEQUAL;
1079 else if (!strcasecmp (type, "PAREN"))
1080 format->type = CTEF_PAREN;
1081 else if (!strcasecmp (type, "PCTPAREN"))
1082 format->type = CTEF_PCTPAREN;
1085 *is_ctables_format = false;
1086 return (parse_format_specifier (lexer, format)
1087 && fmt_check_output (format)
1088 && fmt_check_type_compat (format, VAL_NUMERIC));
1094 lex_next_error (lexer, -1, -1,
1095 _("Output format %s requires width 2 or greater."), type);
1098 else if (format->d > format->w - 1)
1100 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1101 "greater than decimals."), type);
1106 *is_ctables_format = true;
1111 static struct ctables_axis *
1112 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1114 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1115 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1118 enum ctables_summary_variant sv = CSV_CELL;
1121 int start_ofs = lex_ofs (ctx->lexer);
1123 /* Parse function. */
1124 enum ctables_summary_function function;
1125 if (!parse_ctables_summary_function (ctx->lexer, &function))
1128 /* Parse percentile. */
1129 double percentile = 0;
1130 if (function == CTSF_PTILE)
1132 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1134 percentile = lex_number (ctx->lexer);
1135 lex_get (ctx->lexer);
1140 if (lex_is_string (ctx->lexer))
1142 label = ss_xstrdup (lex_tokss (ctx->lexer));
1143 lex_get (ctx->lexer);
1147 struct fmt_spec format;
1148 const struct fmt_spec *formatp;
1149 bool is_ctables_format = false;
1150 if (lex_token (ctx->lexer) == T_ID
1151 && has_digit (lex_tokcstr (ctx->lexer)))
1153 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1154 &is_ctables_format))
1164 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1165 lex_ofs (ctx->lexer) - 1);
1166 add_summary_spec (sub, function, percentile, label, formatp,
1167 is_ctables_format, loc, sv);
1169 msg_location_destroy (loc);
1171 lex_match (ctx->lexer, T_COMMA);
1172 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1174 if (!lex_force_match (ctx->lexer, T_LBRACK))
1178 else if (lex_match (ctx->lexer, T_RBRACK))
1180 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1187 ctables_axis_destroy (sub);
1191 static const struct ctables_axis *
1192 find_scale (const struct ctables_axis *axis)
1196 else if (axis->op == CTAO_VAR)
1197 return axis->scale ? axis : NULL;
1200 for (size_t i = 0; i < 2; i++)
1202 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1210 static const struct ctables_axis *
1211 find_categorical_summary_spec (const struct ctables_axis *axis)
1215 else if (axis->op == CTAO_VAR)
1216 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1219 for (size_t i = 0; i < 2; i++)
1221 const struct ctables_axis *sum
1222 = find_categorical_summary_spec (axis->subs[i]);
1230 static struct ctables_axis *
1231 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1233 int start_ofs = lex_ofs (ctx->lexer);
1234 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1238 while (lex_match (ctx->lexer, T_GT))
1240 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1244 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1245 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1247 const struct ctables_axis *outer_scale = find_scale (lhs);
1248 const struct ctables_axis *inner_scale = find_scale (rhs);
1249 if (outer_scale && inner_scale)
1251 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1252 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1253 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1254 ctables_axis_destroy (nest);
1258 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1261 msg_at (SE, nest->loc,
1262 _("Summaries may only be requested for categorical variables "
1263 "at the innermost nesting level."));
1264 msg_at (SN, outer_sum->loc,
1265 _("This outer categorical variable has a summary."));
1266 ctables_axis_destroy (nest);
1276 static struct ctables_axis *
1277 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1279 int start_ofs = lex_ofs (ctx->lexer);
1280 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1284 while (lex_match (ctx->lexer, T_PLUS))
1286 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1290 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1291 ctx->lexer, start_ofs);
1298 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1299 struct ctables *ct, struct ctables_table *t,
1300 enum pivot_axis_type a)
1302 if (lex_token (lexer) == T_BY
1303 || lex_token (lexer) == T_SLASH
1304 || lex_token (lexer) == T_ENDCMD)
1307 struct ctables_axis_parse_ctx ctx = {
1313 t->axes[a] = ctables_axis_parse_stack (&ctx);
1314 return t->axes[a] != NULL;
1318 ctables_chisq_destroy (struct ctables_chisq *chisq)
1324 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1330 ctables_table_destroy (struct ctables_table *t)
1335 for (size_t i = 0; i < t->n_sections; i++)
1336 ctables_section_uninit (&t->sections[i]);
1339 for (size_t i = 0; i < t->n_categories; i++)
1340 ctables_categories_unref (t->categories[i]);
1341 free (t->categories);
1343 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1345 ctables_axis_destroy (t->axes[a]);
1346 ctables_stack_uninit (&t->stacks[a]);
1348 free (t->summary_specs.specs);
1350 struct ctables_value *ctv, *next_ctv;
1351 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1352 &t->clabels_values_map)
1354 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1355 hmap_delete (&t->clabels_values_map, &ctv->node);
1358 hmap_destroy (&t->clabels_values_map);
1359 free (t->clabels_values);
1365 ctables_chisq_destroy (t->chisq);
1366 ctables_pairwise_destroy (t->pairwise);
1371 ctables_destroy (struct ctables *ct)
1376 struct ctables_postcompute *pc, *next_pc;
1377 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1381 msg_location_destroy (pc->location);
1382 ctables_pcexpr_destroy (pc->expr);
1386 ctables_summary_spec_set_uninit (pc->specs);
1389 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1393 fmt_settings_uninit (&ct->ctables_formats);
1394 pivot_table_look_unref (ct->look);
1398 for (size_t i = 0; i < ct->n_tables; i++)
1399 ctables_table_destroy (ct->tables[i]);
1404 static struct ctables_category
1405 cct_nrange (double low, double high)
1407 return (struct ctables_category) {
1409 .nrange = { low, high }
1413 static struct ctables_category
1414 cct_srange (struct substring low, struct substring high)
1416 return (struct ctables_category) {
1418 .srange = { low, high }
1423 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1424 struct ctables_category *cat)
1427 if (lex_match (lexer, T_EQUALS))
1429 if (!lex_force_string (lexer))
1432 total_label = ss_xstrdup (lex_tokss (lexer));
1436 total_label = xstrdup (_("Subtotal"));
1438 *cat = (struct ctables_category) {
1439 .type = CCT_SUBTOTAL,
1440 .hide_subcategories = hide_subcategories,
1441 .total_label = total_label
1446 static struct substring
1447 parse_substring (struct lexer *lexer, struct dictionary *dict)
1449 struct substring s = recode_substring_pool (
1450 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1451 ss_rtrim (&s, ss_cstr (" "));
1457 ctables_table_parse_explicit_category (struct lexer *lexer,
1458 struct dictionary *dict,
1460 struct ctables_category *cat)
1462 if (lex_match_id (lexer, "OTHERNM"))
1463 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1464 else if (lex_match_id (lexer, "MISSING"))
1465 *cat = (struct ctables_category) { .type = CCT_MISSING };
1466 else if (lex_match_id (lexer, "SUBTOTAL"))
1467 return ctables_table_parse_subtotal (lexer, false, cat);
1468 else if (lex_match_id (lexer, "HSUBTOTAL"))
1469 return ctables_table_parse_subtotal (lexer, true, cat);
1470 else if (lex_match_id (lexer, "LO"))
1472 if (!lex_force_match_id (lexer, "THRU"))
1474 if (lex_is_string (lexer))
1476 struct substring sr0 = { .string = NULL };
1477 struct substring sr1 = parse_substring (lexer, dict);
1478 *cat = cct_srange (sr0, sr1);
1480 else if (lex_force_num (lexer))
1482 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1488 else if (lex_is_number (lexer))
1490 double number = lex_number (lexer);
1492 if (lex_match_id (lexer, "THRU"))
1494 if (lex_match_id (lexer, "HI"))
1495 *cat = cct_nrange (number, DBL_MAX);
1498 if (!lex_force_num (lexer))
1500 *cat = cct_nrange (number, lex_number (lexer));
1505 *cat = (struct ctables_category) {
1510 else if (lex_is_string (lexer))
1512 struct substring s = parse_substring (lexer, dict);
1513 if (lex_match_id (lexer, "THRU"))
1515 if (lex_match_id (lexer, "HI"))
1517 struct substring sr1 = { .string = NULL };
1518 *cat = cct_srange (s, sr1);
1522 if (!lex_force_string (lexer))
1527 struct substring sr1 = parse_substring (lexer, dict);
1528 *cat = cct_srange (s, sr1);
1532 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1534 else if (lex_match (lexer, T_AND))
1536 if (!lex_force_id (lexer))
1538 struct ctables_postcompute *pc = ctables_find_postcompute (
1539 ct, lex_tokcstr (lexer));
1542 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1543 msg_at (SE, loc, _("Unknown postcompute &%s."),
1544 lex_tokcstr (lexer));
1545 msg_location_destroy (loc);
1550 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1554 lex_error (lexer, NULL);
1562 parse_category_string (struct msg_location *location,
1563 struct substring s, const struct dictionary *dict,
1564 enum fmt_type format, double *n)
1567 char *error = data_in (s, dict_get_encoding (dict), format,
1568 settings_get_fmt_settings (), &v, 0, NULL);
1571 msg_at (SE, location,
1572 _("Failed to parse category specification as format %s: %s."),
1573 fmt_name (format), error);
1582 static struct ctables_category *
1583 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1584 const struct ctables_pcexpr *e)
1586 struct ctables_category *best = NULL;
1587 size_t n_subtotals = 0;
1588 for (size_t i = 0; i < cats->n_cats; i++)
1590 struct ctables_category *cat = &cats->cats[i];
1593 case CTPO_CAT_NUMBER:
1594 if (cat->type == CCT_NUMBER && cat->number == e->number)
1598 case CTPO_CAT_STRING:
1599 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1603 case CTPO_CAT_NRANGE:
1604 if (cat->type == CCT_NRANGE
1605 && cat->nrange[0] == e->nrange[0]
1606 && cat->nrange[1] == e->nrange[1])
1610 case CTPO_CAT_SRANGE:
1611 if (cat->type == CCT_SRANGE
1612 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1613 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1617 case CTPO_CAT_MISSING:
1618 if (cat->type == CCT_MISSING)
1622 case CTPO_CAT_OTHERNM:
1623 if (cat->type == CCT_OTHERNM)
1627 case CTPO_CAT_SUBTOTAL:
1628 if (cat->type == CCT_SUBTOTAL)
1631 if (e->subtotal_index == n_subtotals)
1633 else if (e->subtotal_index == 0)
1638 case CTPO_CAT_TOTAL:
1639 if (cat->type == CCT_TOTAL)
1653 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1658 static struct ctables_category *
1659 ctables_find_category_for_postcompute (const struct dictionary *dict,
1660 const struct ctables_categories *cats,
1661 enum fmt_type parse_format,
1662 const struct ctables_pcexpr *e)
1664 if (parse_format != FMT_F)
1666 if (e->op == CTPO_CAT_STRING)
1669 if (!parse_category_string (e->location, e->string, dict,
1670 parse_format, &number))
1673 struct ctables_pcexpr e2 = {
1674 .op = CTPO_CAT_NUMBER,
1676 .location = e->location,
1678 return ctables_find_category_for_postcompute__ (cats, &e2);
1680 else if (e->op == CTPO_CAT_SRANGE)
1683 if (!e->srange[0].string)
1684 nrange[0] = -DBL_MAX;
1685 else if (!parse_category_string (e->location, e->srange[0], dict,
1686 parse_format, &nrange[0]))
1689 if (!e->srange[1].string)
1690 nrange[1] = DBL_MAX;
1691 else if (!parse_category_string (e->location, e->srange[1], dict,
1692 parse_format, &nrange[1]))
1695 struct ctables_pcexpr e2 = {
1696 .op = CTPO_CAT_NRANGE,
1697 .nrange = { nrange[0], nrange[1] },
1698 .location = e->location,
1700 return ctables_find_category_for_postcompute__ (cats, &e2);
1703 return ctables_find_category_for_postcompute__ (cats, e);
1707 ctables_recursive_check_postcompute (struct dictionary *dict,
1708 const struct ctables_pcexpr *e,
1709 struct ctables_category *pc_cat,
1710 const struct ctables_categories *cats,
1711 const struct msg_location *cats_location)
1715 case CTPO_CAT_NUMBER:
1716 case CTPO_CAT_STRING:
1717 case CTPO_CAT_NRANGE:
1718 case CTPO_CAT_SRANGE:
1719 case CTPO_CAT_MISSING:
1720 case CTPO_CAT_OTHERNM:
1721 case CTPO_CAT_SUBTOTAL:
1722 case CTPO_CAT_TOTAL:
1724 struct ctables_category *cat = ctables_find_category_for_postcompute (
1725 dict, cats, pc_cat->parse_format, e);
1728 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1730 size_t n_subtotals = 0;
1731 for (size_t i = 0; i < cats->n_cats; i++)
1732 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1733 if (n_subtotals > 1)
1735 msg_at (SE, cats_location,
1736 ngettext ("These categories include %zu instance "
1737 "of SUBTOTAL or HSUBTOTAL, so references "
1738 "from computed categories must refer to "
1739 "subtotals by position, "
1740 "e.g. SUBTOTAL[1].",
1741 "These categories include %zu instances "
1742 "of SUBTOTAL or HSUBTOTAL, so references "
1743 "from computed categories must refer to "
1744 "subtotals by position, "
1745 "e.g. SUBTOTAL[1].",
1748 msg_at (SN, e->location,
1749 _("This is the reference that lacks a position."));
1754 msg_at (SE, pc_cat->location,
1755 _("Computed category &%s references a category not included "
1756 "in the category list."),
1758 msg_at (SN, e->location, _("This is the missing category."));
1759 if (e->op == CTPO_CAT_SUBTOTAL)
1760 msg_at (SN, cats_location,
1761 _("To fix the problem, add subtotals to the "
1762 "list of categories here."));
1763 else if (e->op == CTPO_CAT_TOTAL)
1764 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1765 "CATEGORIES specification."));
1767 msg_at (SN, cats_location,
1768 _("To fix the problem, add the missing category to the "
1769 "list of categories here."));
1772 if (pc_cat->pc->hide_source_cats)
1786 for (size_t i = 0; i < 2; i++)
1787 if (e->subs[i] && !ctables_recursive_check_postcompute (
1788 dict, e->subs[i], pc_cat, cats, cats_location))
1797 all_strings (struct variable **vars, size_t n_vars,
1798 const struct ctables_category *cat)
1800 for (size_t j = 0; j < n_vars; j++)
1801 if (var_is_numeric (vars[j]))
1803 msg_at (SE, cat->location,
1804 _("This category specification may be applied only to string "
1805 "variables, but this subcommand tries to apply it to "
1806 "numeric variable %s."),
1807 var_get_name (vars[j]));
1814 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1815 struct ctables *ct, struct ctables_table *t)
1817 if (!lex_match_id (lexer, "VARIABLES"))
1819 lex_match (lexer, T_EQUALS);
1821 struct variable **vars;
1823 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1826 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1827 for (size_t i = 1; i < n_vars; i++)
1829 const struct fmt_spec *f = var_get_print_format (vars[i]);
1830 if (f->type != common_format->type)
1832 common_format = NULL;
1838 && (fmt_get_category (common_format->type)
1839 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1841 struct ctables_categories *c = xmalloc (sizeof *c);
1842 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1843 for (size_t i = 0; i < n_vars; i++)
1845 struct ctables_categories **cp
1846 = &t->categories[var_get_dict_index (vars[i])];
1847 ctables_categories_unref (*cp);
1851 size_t allocated_cats = 0;
1852 int cats_start_ofs = -1;
1853 int cats_end_ofs = -1;
1854 if (lex_match (lexer, T_LBRACK))
1856 cats_start_ofs = lex_ofs (lexer);
1859 if (c->n_cats >= allocated_cats)
1860 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1862 int start_ofs = lex_ofs (lexer);
1863 struct ctables_category *cat = &c->cats[c->n_cats];
1864 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1866 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1869 lex_match (lexer, T_COMMA);
1871 while (!lex_match (lexer, T_RBRACK));
1872 cats_end_ofs = lex_ofs (lexer) - 1;
1875 struct ctables_category cat = {
1877 .include_missing = false,
1878 .sort_ascending = true,
1880 bool show_totals = false;
1881 char *total_label = NULL;
1882 bool totals_before = false;
1883 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1885 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1887 lex_match (lexer, T_EQUALS);
1888 if (lex_match_id (lexer, "A"))
1889 cat.sort_ascending = true;
1890 else if (lex_match_id (lexer, "D"))
1891 cat.sort_ascending = false;
1894 lex_error_expecting (lexer, "A", "D");
1898 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1900 lex_match (lexer, T_EQUALS);
1901 if (lex_match_id (lexer, "VALUE"))
1902 cat.type = CCT_VALUE;
1903 else if (lex_match_id (lexer, "LABEL"))
1904 cat.type = CCT_LABEL;
1907 cat.type = CCT_FUNCTION;
1908 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1911 if (lex_match (lexer, T_LPAREN))
1913 cat.sort_var = parse_variable (lexer, dict);
1917 if (cat.sort_function == CTSF_PTILE)
1919 lex_match (lexer, T_COMMA);
1920 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1922 cat.percentile = lex_number (lexer);
1926 if (!lex_force_match (lexer, T_RPAREN))
1929 else if (ctables_function_availability (cat.sort_function)
1932 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1937 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1939 lex_match (lexer, T_EQUALS);
1940 if (lex_match_id (lexer, "INCLUDE"))
1941 cat.include_missing = true;
1942 else if (lex_match_id (lexer, "EXCLUDE"))
1943 cat.include_missing = false;
1946 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1950 else if (lex_match_id (lexer, "TOTAL"))
1952 lex_match (lexer, T_EQUALS);
1953 if (!parse_bool (lexer, &show_totals))
1956 else if (lex_match_id (lexer, "LABEL"))
1958 lex_match (lexer, T_EQUALS);
1959 if (!lex_force_string (lexer))
1962 total_label = ss_xstrdup (lex_tokss (lexer));
1965 else if (lex_match_id (lexer, "POSITION"))
1967 lex_match (lexer, T_EQUALS);
1968 if (lex_match_id (lexer, "BEFORE"))
1969 totals_before = true;
1970 else if (lex_match_id (lexer, "AFTER"))
1971 totals_before = false;
1974 lex_error_expecting (lexer, "BEFORE", "AFTER");
1978 else if (lex_match_id (lexer, "EMPTY"))
1980 lex_match (lexer, T_EQUALS);
1981 if (lex_match_id (lexer, "INCLUDE"))
1982 c->show_empty = true;
1983 else if (lex_match_id (lexer, "EXCLUDE"))
1984 c->show_empty = false;
1987 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1994 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
1995 "TOTAL", "LABEL", "POSITION", "EMPTY");
1997 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2004 if (c->n_cats >= allocated_cats)
2005 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2006 c->cats[c->n_cats++] = cat;
2011 if (c->n_cats >= allocated_cats)
2012 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2014 struct ctables_category *totals;
2017 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2018 totals = &c->cats[0];
2021 totals = &c->cats[c->n_cats];
2024 *totals = (struct ctables_category) {
2026 .total_label = total_label ? total_label : xstrdup (_("Total")),
2030 struct ctables_category *subtotal = NULL;
2031 for (size_t i = totals_before ? 0 : c->n_cats;
2032 totals_before ? i < c->n_cats : i-- > 0;
2033 totals_before ? i++ : 0)
2035 struct ctables_category *cat = &c->cats[i];
2044 cat->subtotal = subtotal;
2047 case CCT_POSTCOMPUTE:
2058 case CCT_EXCLUDED_MISSING:
2063 if (cats_start_ofs != -1)
2065 for (size_t i = 0; i < c->n_cats; i++)
2067 struct ctables_category *cat = &c->cats[i];
2070 case CCT_POSTCOMPUTE:
2071 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2072 struct msg_location *cats_location
2073 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2074 bool ok = ctables_recursive_check_postcompute (
2075 dict, cat->pc->expr, cat, c, cats_location);
2076 msg_location_destroy (cats_location);
2083 for (size_t j = 0; j < n_vars; j++)
2084 if (var_is_alpha (vars[j]))
2086 msg_at (SE, cat->location,
2087 _("This category specification may be applied "
2088 "only to numeric variables, but this "
2089 "subcommand tries to apply it to string "
2091 var_get_name (vars[j]));
2100 if (!parse_category_string (cat->location, cat->string, dict,
2101 common_format->type, &n))
2104 ss_dealloc (&cat->string);
2106 cat->type = CCT_NUMBER;
2109 else if (!all_strings (vars, n_vars, cat))
2118 if (!cat->srange[0].string)
2120 else if (!parse_category_string (cat->location,
2121 cat->srange[0], dict,
2122 common_format->type, &n[0]))
2125 if (!cat->srange[1].string)
2127 else if (!parse_category_string (cat->location,
2128 cat->srange[1], dict,
2129 common_format->type, &n[1]))
2132 ss_dealloc (&cat->srange[0]);
2133 ss_dealloc (&cat->srange[1]);
2135 cat->type = CCT_NRANGE;
2136 cat->nrange[0] = n[0];
2137 cat->nrange[1] = n[1];
2139 else if (!all_strings (vars, n_vars, cat))
2150 case CCT_EXCLUDED_MISSING:
2165 ctables_nest_uninit (struct ctables_nest *nest)
2168 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2169 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2170 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2171 free (nest->domains[dt]);
2175 ctables_stack_uninit (struct ctables_stack *stack)
2179 for (size_t i = 0; i < stack->n; i++)
2180 ctables_nest_uninit (&stack->nests[i]);
2181 free (stack->nests);
2185 static struct ctables_stack
2186 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2193 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2194 for (size_t i = 0; i < s0.n; i++)
2195 for (size_t j = 0; j < s1.n; j++)
2197 const struct ctables_nest *a = &s0.nests[i];
2198 const struct ctables_nest *b = &s1.nests[j];
2200 size_t allocate = a->n + b->n;
2201 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2203 for (size_t k = 0; k < a->n; k++)
2204 vars[n++] = a->vars[k];
2205 for (size_t k = 0; k < b->n; k++)
2206 vars[n++] = b->vars[k];
2207 assert (n == allocate);
2209 const struct ctables_nest *summary_src;
2210 if (!a->specs[CSV_CELL].var)
2212 else if (!b->specs[CSV_CELL].var)
2217 struct ctables_nest *new = &stack.nests[stack.n++];
2218 *new = (struct ctables_nest) {
2220 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2221 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2225 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2226 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2228 ctables_stack_uninit (&s0);
2229 ctables_stack_uninit (&s1);
2233 static struct ctables_stack
2234 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2236 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2237 for (size_t i = 0; i < s0.n; i++)
2238 stack.nests[stack.n++] = s0.nests[i];
2239 for (size_t i = 0; i < s1.n; i++)
2241 stack.nests[stack.n] = s1.nests[i];
2242 stack.nests[stack.n].group_head += s0.n;
2245 assert (stack.n == s0.n + s1.n);
2251 static struct ctables_stack
2252 var_fts (const struct ctables_axis *a)
2254 struct variable **vars = xmalloc (sizeof *vars);
2257 struct ctables_nest *nest = xmalloc (sizeof *nest);
2258 *nest = (struct ctables_nest) {
2261 .scale_idx = a->scale ? 0 : SIZE_MAX,
2263 if (a->specs[CSV_CELL].n || a->scale)
2264 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2266 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2267 nest->specs[sv].var = a->var;
2268 nest->specs[sv].is_scale = a->scale;
2270 return (struct ctables_stack) { .nests = nest, .n = 1 };
2273 static struct ctables_stack
2274 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2277 return (struct ctables_stack) { .n = 0 };
2285 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2286 enumerate_fts (axis_type, a->subs[1]));
2289 /* This should consider any of the scale variables found in the result to
2290 be linked to each other listwise for SMISSING=LISTWISE. */
2291 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2292 enumerate_fts (axis_type, a->subs[1]));
2298 union ctables_summary
2300 /* COUNT, VALIDN, TOTALN. */
2303 /* MINIMUM, MAXIMUM, RANGE. */
2310 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2311 struct moments1 *moments;
2313 /* MEDIAN, MODE, PTILE. */
2316 struct casewriter *writer;
2321 /* XXX multiple response */
2325 ctables_summary_init (union ctables_summary *s,
2326 const struct ctables_summary_spec *ss)
2328 switch (ss->function)
2332 case CTSF_ROWPCT_COUNT:
2333 case CTSF_COLPCT_COUNT:
2334 case CTSF_TABLEPCT_COUNT:
2335 case CTSF_SUBTABLEPCT_COUNT:
2336 case CTSF_LAYERPCT_COUNT:
2337 case CTSF_LAYERROWPCT_COUNT:
2338 case CTSF_LAYERCOLPCT_COUNT:
2339 case CTSF_ROWPCT_VALIDN:
2340 case CTSF_COLPCT_VALIDN:
2341 case CTSF_TABLEPCT_VALIDN:
2342 case CTSF_SUBTABLEPCT_VALIDN:
2343 case CTSF_LAYERPCT_VALIDN:
2344 case CTSF_LAYERROWPCT_VALIDN:
2345 case CTSF_LAYERCOLPCT_VALIDN:
2346 case CTSF_ROWPCT_TOTALN:
2347 case CTSF_COLPCT_TOTALN:
2348 case CTSF_TABLEPCT_TOTALN:
2349 case CTSF_SUBTABLEPCT_TOTALN:
2350 case CTSF_LAYERPCT_TOTALN:
2351 case CTSF_LAYERROWPCT_TOTALN:
2352 case CTSF_LAYERCOLPCT_TOTALN:
2359 case CTSF_UROWPCT_COUNT:
2360 case CTSF_UCOLPCT_COUNT:
2361 case CTSF_UTABLEPCT_COUNT:
2362 case CTSF_USUBTABLEPCT_COUNT:
2363 case CTSF_ULAYERPCT_COUNT:
2364 case CTSF_ULAYERROWPCT_COUNT:
2365 case CTSF_ULAYERCOLPCT_COUNT:
2366 case CTSF_UROWPCT_VALIDN:
2367 case CTSF_UCOLPCT_VALIDN:
2368 case CTSF_UTABLEPCT_VALIDN:
2369 case CTSF_USUBTABLEPCT_VALIDN:
2370 case CTSF_ULAYERPCT_VALIDN:
2371 case CTSF_ULAYERROWPCT_VALIDN:
2372 case CTSF_ULAYERCOLPCT_VALIDN:
2373 case CTSF_UROWPCT_TOTALN:
2374 case CTSF_UCOLPCT_TOTALN:
2375 case CTSF_UTABLEPCT_TOTALN:
2376 case CTSF_USUBTABLEPCT_TOTALN:
2377 case CTSF_ULAYERPCT_TOTALN:
2378 case CTSF_ULAYERROWPCT_TOTALN:
2379 case CTSF_ULAYERCOLPCT_TOTALN:
2389 case CTSF_SUBTABLE_ID:
2391 case CTSF_LAYERROW_ID:
2392 case CTSF_LAYERCOL_ID:
2398 s->min = s->max = SYSMIS;
2406 case CTSF_ROWPCT_SUM:
2407 case CTSF_COLPCT_SUM:
2408 case CTSF_TABLEPCT_SUM:
2409 case CTSF_SUBTABLEPCT_SUM:
2410 case CTSF_LAYERPCT_SUM:
2411 case CTSF_LAYERROWPCT_SUM:
2412 case CTSF_LAYERCOLPCT_SUM:
2417 case CTSF_UVARIANCE:
2418 case CTSF_UROWPCT_SUM:
2419 case CTSF_UCOLPCT_SUM:
2420 case CTSF_UTABLEPCT_SUM:
2421 case CTSF_USUBTABLEPCT_SUM:
2422 case CTSF_ULAYERPCT_SUM:
2423 case CTSF_ULAYERROWPCT_SUM:
2424 case CTSF_ULAYERCOLPCT_SUM:
2425 s->moments = moments1_create (MOMENT_VARIANCE);
2435 struct caseproto *proto = caseproto_create ();
2436 proto = caseproto_add_width (proto, 0);
2437 proto = caseproto_add_width (proto, 0);
2439 struct subcase ordering;
2440 subcase_init (&ordering, 0, 0, SC_ASCEND);
2441 s->writer = sort_create_writer (&ordering, proto);
2442 subcase_uninit (&ordering);
2443 caseproto_unref (proto);
2453 ctables_summary_uninit (union ctables_summary *s,
2454 const struct ctables_summary_spec *ss)
2456 switch (ss->function)
2460 case CTSF_ROWPCT_COUNT:
2461 case CTSF_COLPCT_COUNT:
2462 case CTSF_TABLEPCT_COUNT:
2463 case CTSF_SUBTABLEPCT_COUNT:
2464 case CTSF_LAYERPCT_COUNT:
2465 case CTSF_LAYERROWPCT_COUNT:
2466 case CTSF_LAYERCOLPCT_COUNT:
2467 case CTSF_ROWPCT_VALIDN:
2468 case CTSF_COLPCT_VALIDN:
2469 case CTSF_TABLEPCT_VALIDN:
2470 case CTSF_SUBTABLEPCT_VALIDN:
2471 case CTSF_LAYERPCT_VALIDN:
2472 case CTSF_LAYERROWPCT_VALIDN:
2473 case CTSF_LAYERCOLPCT_VALIDN:
2474 case CTSF_ROWPCT_TOTALN:
2475 case CTSF_COLPCT_TOTALN:
2476 case CTSF_TABLEPCT_TOTALN:
2477 case CTSF_SUBTABLEPCT_TOTALN:
2478 case CTSF_LAYERPCT_TOTALN:
2479 case CTSF_LAYERROWPCT_TOTALN:
2480 case CTSF_LAYERCOLPCT_TOTALN:
2487 case CTSF_UROWPCT_COUNT:
2488 case CTSF_UCOLPCT_COUNT:
2489 case CTSF_UTABLEPCT_COUNT:
2490 case CTSF_USUBTABLEPCT_COUNT:
2491 case CTSF_ULAYERPCT_COUNT:
2492 case CTSF_ULAYERROWPCT_COUNT:
2493 case CTSF_ULAYERCOLPCT_COUNT:
2494 case CTSF_UROWPCT_VALIDN:
2495 case CTSF_UCOLPCT_VALIDN:
2496 case CTSF_UTABLEPCT_VALIDN:
2497 case CTSF_USUBTABLEPCT_VALIDN:
2498 case CTSF_ULAYERPCT_VALIDN:
2499 case CTSF_ULAYERROWPCT_VALIDN:
2500 case CTSF_ULAYERCOLPCT_VALIDN:
2501 case CTSF_UROWPCT_TOTALN:
2502 case CTSF_UCOLPCT_TOTALN:
2503 case CTSF_UTABLEPCT_TOTALN:
2504 case CTSF_USUBTABLEPCT_TOTALN:
2505 case CTSF_ULAYERPCT_TOTALN:
2506 case CTSF_ULAYERROWPCT_TOTALN:
2507 case CTSF_ULAYERCOLPCT_TOTALN:
2516 case CTSF_SUBTABLE_ID:
2518 case CTSF_LAYERROW_ID:
2519 case CTSF_LAYERCOL_ID:
2532 case CTSF_ROWPCT_SUM:
2533 case CTSF_COLPCT_SUM:
2534 case CTSF_TABLEPCT_SUM:
2535 case CTSF_SUBTABLEPCT_SUM:
2536 case CTSF_LAYERPCT_SUM:
2537 case CTSF_LAYERROWPCT_SUM:
2538 case CTSF_LAYERCOLPCT_SUM:
2543 case CTSF_UVARIANCE:
2544 case CTSF_UROWPCT_SUM:
2545 case CTSF_UCOLPCT_SUM:
2546 case CTSF_UTABLEPCT_SUM:
2547 case CTSF_USUBTABLEPCT_SUM:
2548 case CTSF_ULAYERPCT_SUM:
2549 case CTSF_ULAYERROWPCT_SUM:
2550 case CTSF_ULAYERCOLPCT_SUM:
2551 moments1_destroy (s->moments);
2560 casewriter_destroy (s->writer);
2566 ctables_summary_add (union ctables_summary *s,
2567 const struct ctables_summary_spec *ss,
2568 const struct variable *var, const union value *value,
2569 bool is_scale, bool is_scale_missing,
2570 bool is_missing, bool excluded_missing,
2571 double d_weight, double e_weight)
2573 /* To determine whether a case is included in a given table for a particular
2574 kind of summary, consider the following charts for each variable in the
2575 table. Only if "yes" appears for every variable for the summary is the
2578 Categorical variables: VALIDN COUNT TOTALN
2579 Valid values in included categories yes yes yes
2580 Missing values in included categories --- yes yes
2581 Missing values in excluded categories --- --- yes
2582 Valid values in excluded categories --- --- ---
2584 Scale variables: VALIDN COUNT TOTALN
2585 Valid value yes yes yes
2586 Missing value --- yes yes
2588 Missing values include both user- and system-missing. (The system-missing
2589 value is always in an excluded category.)
2591 switch (ss->function)
2594 case CTSF_ROWPCT_TOTALN:
2595 case CTSF_COLPCT_TOTALN:
2596 case CTSF_TABLEPCT_TOTALN:
2597 case CTSF_SUBTABLEPCT_TOTALN:
2598 case CTSF_LAYERPCT_TOTALN:
2599 case CTSF_LAYERROWPCT_TOTALN:
2600 case CTSF_LAYERCOLPCT_TOTALN:
2601 s->count += d_weight;
2605 case CTSF_UROWPCT_TOTALN:
2606 case CTSF_UCOLPCT_TOTALN:
2607 case CTSF_UTABLEPCT_TOTALN:
2608 case CTSF_USUBTABLEPCT_TOTALN:
2609 case CTSF_ULAYERPCT_TOTALN:
2610 case CTSF_ULAYERROWPCT_TOTALN:
2611 case CTSF_ULAYERCOLPCT_TOTALN:
2616 case CTSF_ROWPCT_COUNT:
2617 case CTSF_COLPCT_COUNT:
2618 case CTSF_TABLEPCT_COUNT:
2619 case CTSF_SUBTABLEPCT_COUNT:
2620 case CTSF_LAYERPCT_COUNT:
2621 case CTSF_LAYERROWPCT_COUNT:
2622 case CTSF_LAYERCOLPCT_COUNT:
2623 if (is_scale || !excluded_missing)
2624 s->count += d_weight;
2628 case CTSF_UROWPCT_COUNT:
2629 case CTSF_UCOLPCT_COUNT:
2630 case CTSF_UTABLEPCT_COUNT:
2631 case CTSF_USUBTABLEPCT_COUNT:
2632 case CTSF_ULAYERPCT_COUNT:
2633 case CTSF_ULAYERROWPCT_COUNT:
2634 case CTSF_ULAYERCOLPCT_COUNT:
2635 if (is_scale || !excluded_missing)
2640 case CTSF_ROWPCT_VALIDN:
2641 case CTSF_COLPCT_VALIDN:
2642 case CTSF_TABLEPCT_VALIDN:
2643 case CTSF_SUBTABLEPCT_VALIDN:
2644 case CTSF_LAYERPCT_VALIDN:
2645 case CTSF_LAYERROWPCT_VALIDN:
2646 case CTSF_LAYERCOLPCT_VALIDN:
2650 s->count += d_weight;
2654 case CTSF_UROWPCT_VALIDN:
2655 case CTSF_UCOLPCT_VALIDN:
2656 case CTSF_UTABLEPCT_VALIDN:
2657 case CTSF_USUBTABLEPCT_VALIDN:
2658 case CTSF_ULAYERPCT_VALIDN:
2659 case CTSF_ULAYERROWPCT_VALIDN:
2660 case CTSF_ULAYERCOLPCT_VALIDN:
2670 case CTSF_SUBTABLE_ID:
2672 case CTSF_LAYERROW_ID:
2673 case CTSF_LAYERCOL_ID:
2678 s->count += d_weight;
2687 if (is_scale || !excluded_missing)
2688 s->count += e_weight;
2695 s->count += e_weight;
2699 s->count += e_weight;
2705 if (!is_scale_missing)
2707 assert (!var_is_alpha (var)); /* XXX? */
2708 if (s->min == SYSMIS || value->f < s->min)
2710 if (s->max == SYSMIS || value->f > s->max)
2720 case CTSF_ROWPCT_SUM:
2721 case CTSF_COLPCT_SUM:
2722 case CTSF_TABLEPCT_SUM:
2723 case CTSF_SUBTABLEPCT_SUM:
2724 case CTSF_LAYERPCT_SUM:
2725 case CTSF_LAYERROWPCT_SUM:
2726 case CTSF_LAYERCOLPCT_SUM:
2727 if (!is_scale_missing)
2728 moments1_add (s->moments, value->f, e_weight);
2735 case CTSF_UVARIANCE:
2736 case CTSF_UROWPCT_SUM:
2737 case CTSF_UCOLPCT_SUM:
2738 case CTSF_UTABLEPCT_SUM:
2739 case CTSF_USUBTABLEPCT_SUM:
2740 case CTSF_ULAYERPCT_SUM:
2741 case CTSF_ULAYERROWPCT_SUM:
2742 case CTSF_ULAYERCOLPCT_SUM:
2743 if (!is_scale_missing)
2744 moments1_add (s->moments, value->f, 1.0);
2750 d_weight = e_weight = 1.0;
2755 if (!is_scale_missing)
2757 s->ovalid += e_weight;
2759 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2760 *case_num_rw_idx (c, 0) = value->f;
2761 *case_num_rw_idx (c, 1) = e_weight;
2762 casewriter_write (s->writer, c);
2768 static enum ctables_domain_type
2769 ctables_function_domain (enum ctables_summary_function function)
2799 case CTSF_UVARIANCE:
2805 case CTSF_COLPCT_COUNT:
2806 case CTSF_COLPCT_SUM:
2807 case CTSF_COLPCT_TOTALN:
2808 case CTSF_COLPCT_VALIDN:
2809 case CTSF_UCOLPCT_COUNT:
2810 case CTSF_UCOLPCT_SUM:
2811 case CTSF_UCOLPCT_TOTALN:
2812 case CTSF_UCOLPCT_VALIDN:
2816 case CTSF_LAYERCOLPCT_COUNT:
2817 case CTSF_LAYERCOLPCT_SUM:
2818 case CTSF_LAYERCOLPCT_TOTALN:
2819 case CTSF_LAYERCOLPCT_VALIDN:
2820 case CTSF_ULAYERCOLPCT_COUNT:
2821 case CTSF_ULAYERCOLPCT_SUM:
2822 case CTSF_ULAYERCOLPCT_TOTALN:
2823 case CTSF_ULAYERCOLPCT_VALIDN:
2824 case CTSF_LAYERCOL_ID:
2825 return CTDT_LAYERCOL;
2827 case CTSF_LAYERPCT_COUNT:
2828 case CTSF_LAYERPCT_SUM:
2829 case CTSF_LAYERPCT_TOTALN:
2830 case CTSF_LAYERPCT_VALIDN:
2831 case CTSF_ULAYERPCT_COUNT:
2832 case CTSF_ULAYERPCT_SUM:
2833 case CTSF_ULAYERPCT_TOTALN:
2834 case CTSF_ULAYERPCT_VALIDN:
2838 case CTSF_LAYERROWPCT_COUNT:
2839 case CTSF_LAYERROWPCT_SUM:
2840 case CTSF_LAYERROWPCT_TOTALN:
2841 case CTSF_LAYERROWPCT_VALIDN:
2842 case CTSF_ULAYERROWPCT_COUNT:
2843 case CTSF_ULAYERROWPCT_SUM:
2844 case CTSF_ULAYERROWPCT_TOTALN:
2845 case CTSF_ULAYERROWPCT_VALIDN:
2846 case CTSF_LAYERROW_ID:
2847 return CTDT_LAYERROW;
2849 case CTSF_ROWPCT_COUNT:
2850 case CTSF_ROWPCT_SUM:
2851 case CTSF_ROWPCT_TOTALN:
2852 case CTSF_ROWPCT_VALIDN:
2853 case CTSF_UROWPCT_COUNT:
2854 case CTSF_UROWPCT_SUM:
2855 case CTSF_UROWPCT_TOTALN:
2856 case CTSF_UROWPCT_VALIDN:
2860 case CTSF_SUBTABLEPCT_COUNT:
2861 case CTSF_SUBTABLEPCT_SUM:
2862 case CTSF_SUBTABLEPCT_TOTALN:
2863 case CTSF_SUBTABLEPCT_VALIDN:
2864 case CTSF_USUBTABLEPCT_COUNT:
2865 case CTSF_USUBTABLEPCT_SUM:
2866 case CTSF_USUBTABLEPCT_TOTALN:
2867 case CTSF_USUBTABLEPCT_VALIDN:
2868 case CTSF_SUBTABLE_ID:
2869 return CTDT_SUBTABLE;
2871 case CTSF_TABLEPCT_COUNT:
2872 case CTSF_TABLEPCT_SUM:
2873 case CTSF_TABLEPCT_TOTALN:
2874 case CTSF_TABLEPCT_VALIDN:
2875 case CTSF_UTABLEPCT_COUNT:
2876 case CTSF_UTABLEPCT_SUM:
2877 case CTSF_UTABLEPCT_TOTALN:
2878 case CTSF_UTABLEPCT_VALIDN:
2886 static enum ctables_domain_type
2887 ctables_function_is_pctsum (enum ctables_summary_function function)
2917 case CTSF_UVARIANCE:
2921 case CTSF_COLPCT_COUNT:
2922 case CTSF_COLPCT_TOTALN:
2923 case CTSF_COLPCT_VALIDN:
2924 case CTSF_UCOLPCT_COUNT:
2925 case CTSF_UCOLPCT_TOTALN:
2926 case CTSF_UCOLPCT_VALIDN:
2927 case CTSF_LAYERCOLPCT_COUNT:
2928 case CTSF_LAYERCOLPCT_TOTALN:
2929 case CTSF_LAYERCOLPCT_VALIDN:
2930 case CTSF_ULAYERCOLPCT_COUNT:
2931 case CTSF_ULAYERCOLPCT_TOTALN:
2932 case CTSF_ULAYERCOLPCT_VALIDN:
2933 case CTSF_LAYERPCT_COUNT:
2934 case CTSF_LAYERPCT_TOTALN:
2935 case CTSF_LAYERPCT_VALIDN:
2936 case CTSF_ULAYERPCT_COUNT:
2937 case CTSF_ULAYERPCT_TOTALN:
2938 case CTSF_ULAYERPCT_VALIDN:
2939 case CTSF_LAYERROWPCT_COUNT:
2940 case CTSF_LAYERROWPCT_TOTALN:
2941 case CTSF_LAYERROWPCT_VALIDN:
2942 case CTSF_ULAYERROWPCT_COUNT:
2943 case CTSF_ULAYERROWPCT_TOTALN:
2944 case CTSF_ULAYERROWPCT_VALIDN:
2945 case CTSF_ROWPCT_COUNT:
2946 case CTSF_ROWPCT_TOTALN:
2947 case CTSF_ROWPCT_VALIDN:
2948 case CTSF_UROWPCT_COUNT:
2949 case CTSF_UROWPCT_TOTALN:
2950 case CTSF_UROWPCT_VALIDN:
2951 case CTSF_SUBTABLEPCT_COUNT:
2952 case CTSF_SUBTABLEPCT_TOTALN:
2953 case CTSF_SUBTABLEPCT_VALIDN:
2954 case CTSF_USUBTABLEPCT_COUNT:
2955 case CTSF_USUBTABLEPCT_TOTALN:
2956 case CTSF_USUBTABLEPCT_VALIDN:
2957 case CTSF_TABLEPCT_COUNT:
2958 case CTSF_TABLEPCT_TOTALN:
2959 case CTSF_TABLEPCT_VALIDN:
2960 case CTSF_UTABLEPCT_COUNT:
2961 case CTSF_UTABLEPCT_TOTALN:
2962 case CTSF_UTABLEPCT_VALIDN:
2966 case CTSF_SUBTABLE_ID:
2968 case CTSF_LAYERROW_ID:
2969 case CTSF_LAYERCOL_ID:
2972 case CTSF_COLPCT_SUM:
2973 case CTSF_UCOLPCT_SUM:
2974 case CTSF_LAYERCOLPCT_SUM:
2975 case CTSF_ULAYERCOLPCT_SUM:
2976 case CTSF_LAYERPCT_SUM:
2977 case CTSF_ULAYERPCT_SUM:
2978 case CTSF_LAYERROWPCT_SUM:
2979 case CTSF_ULAYERROWPCT_SUM:
2980 case CTSF_ROWPCT_SUM:
2981 case CTSF_UROWPCT_SUM:
2982 case CTSF_SUBTABLEPCT_SUM:
2983 case CTSF_USUBTABLEPCT_SUM:
2984 case CTSF_TABLEPCT_SUM:
2985 case CTSF_UTABLEPCT_SUM:
2993 ctables_summary_value (const struct ctables_cell *cell,
2994 union ctables_summary *s,
2995 const struct ctables_summary_spec *ss)
2997 switch (ss->function)
3007 case CTSF_SUBTABLE_ID:
3009 case CTSF_LAYERROW_ID:
3010 case CTSF_LAYERCOL_ID:
3011 return cell->domains[ctables_function_domain (ss->function)]->sequence;
3013 case CTSF_ROWPCT_COUNT:
3014 case CTSF_COLPCT_COUNT:
3015 case CTSF_TABLEPCT_COUNT:
3016 case CTSF_SUBTABLEPCT_COUNT:
3017 case CTSF_LAYERPCT_COUNT:
3018 case CTSF_LAYERROWPCT_COUNT:
3019 case CTSF_LAYERCOLPCT_COUNT:
3021 enum ctables_domain_type d = ctables_function_domain (ss->function);
3022 return (cell->domains[d]->e_count
3023 ? s->count / cell->domains[d]->e_count * 100
3027 case CTSF_UROWPCT_COUNT:
3028 case CTSF_UCOLPCT_COUNT:
3029 case CTSF_UTABLEPCT_COUNT:
3030 case CTSF_USUBTABLEPCT_COUNT:
3031 case CTSF_ULAYERPCT_COUNT:
3032 case CTSF_ULAYERROWPCT_COUNT:
3033 case CTSF_ULAYERCOLPCT_COUNT:
3035 enum ctables_domain_type d = ctables_function_domain (ss->function);
3036 return (cell->domains[d]->u_count
3037 ? s->count / cell->domains[d]->u_count * 100
3041 case CTSF_ROWPCT_VALIDN:
3042 case CTSF_COLPCT_VALIDN:
3043 case CTSF_TABLEPCT_VALIDN:
3044 case CTSF_SUBTABLEPCT_VALIDN:
3045 case CTSF_LAYERPCT_VALIDN:
3046 case CTSF_LAYERROWPCT_VALIDN:
3047 case CTSF_LAYERCOLPCT_VALIDN:
3049 enum ctables_domain_type d = ctables_function_domain (ss->function);
3050 return (cell->domains[d]->e_valid
3051 ? s->count / cell->domains[d]->e_valid * 100
3055 case CTSF_UROWPCT_VALIDN:
3056 case CTSF_UCOLPCT_VALIDN:
3057 case CTSF_UTABLEPCT_VALIDN:
3058 case CTSF_USUBTABLEPCT_VALIDN:
3059 case CTSF_ULAYERPCT_VALIDN:
3060 case CTSF_ULAYERROWPCT_VALIDN:
3061 case CTSF_ULAYERCOLPCT_VALIDN:
3063 enum ctables_domain_type d = ctables_function_domain (ss->function);
3064 return (cell->domains[d]->u_valid
3065 ? s->count / cell->domains[d]->u_valid * 100
3069 case CTSF_ROWPCT_TOTALN:
3070 case CTSF_COLPCT_TOTALN:
3071 case CTSF_TABLEPCT_TOTALN:
3072 case CTSF_SUBTABLEPCT_TOTALN:
3073 case CTSF_LAYERPCT_TOTALN:
3074 case CTSF_LAYERROWPCT_TOTALN:
3075 case CTSF_LAYERCOLPCT_TOTALN:
3077 enum ctables_domain_type d = ctables_function_domain (ss->function);
3078 return (cell->domains[d]->e_total
3079 ? s->count / cell->domains[d]->e_total * 100
3083 case CTSF_UROWPCT_TOTALN:
3084 case CTSF_UCOLPCT_TOTALN:
3085 case CTSF_UTABLEPCT_TOTALN:
3086 case CTSF_USUBTABLEPCT_TOTALN:
3087 case CTSF_ULAYERPCT_TOTALN:
3088 case CTSF_ULAYERROWPCT_TOTALN:
3089 case CTSF_ULAYERCOLPCT_TOTALN:
3091 enum ctables_domain_type d = ctables_function_domain (ss->function);
3092 return (cell->domains[d]->u_total
3093 ? s->count / cell->domains[d]->u_total * 100
3114 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3120 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3127 double weight, variance;
3128 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3129 return calc_semean (variance, weight);
3136 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3137 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3143 double weight, mean;
3144 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3145 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3149 case CTSF_UVARIANCE:
3152 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3156 case CTSF_ROWPCT_SUM:
3157 case CTSF_COLPCT_SUM:
3158 case CTSF_TABLEPCT_SUM:
3159 case CTSF_SUBTABLEPCT_SUM:
3160 case CTSF_LAYERPCT_SUM:
3161 case CTSF_LAYERROWPCT_SUM:
3162 case CTSF_LAYERCOLPCT_SUM:
3164 double weight, mean;
3165 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3166 if (weight == SYSMIS || mean == SYSMIS)
3168 enum ctables_domain_type d = ctables_function_domain (ss->function);
3169 double num = weight * mean;
3170 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3171 return denom != 0 ? num / denom * 100 : SYSMIS;
3173 case CTSF_UROWPCT_SUM:
3174 case CTSF_UCOLPCT_SUM:
3175 case CTSF_UTABLEPCT_SUM:
3176 case CTSF_USUBTABLEPCT_SUM:
3177 case CTSF_ULAYERPCT_SUM:
3178 case CTSF_ULAYERROWPCT_SUM:
3179 case CTSF_ULAYERCOLPCT_SUM:
3181 double weight, mean;
3182 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3183 if (weight == SYSMIS || mean == SYSMIS)
3185 enum ctables_domain_type d = ctables_function_domain (ss->function);
3186 double num = weight * mean;
3187 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3188 return denom != 0 ? num / denom * 100 : SYSMIS;
3197 struct casereader *reader = casewriter_make_reader (s->writer);
3200 struct percentile *ptile = percentile_create (
3201 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3202 struct order_stats *os = &ptile->parent;
3203 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3204 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3205 statistic_destroy (&ptile->parent.parent);
3213 struct casereader *reader = casewriter_make_reader (s->writer);
3216 struct mode *mode = mode_create ();
3217 struct order_stats *os = &mode->parent;
3218 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3219 s->ovalue = mode->mode;
3220 statistic_destroy (&mode->parent.parent);
3228 struct ctables_cell_sort_aux
3230 const struct ctables_nest *nest;
3231 enum pivot_axis_type a;
3235 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3237 const struct ctables_cell_sort_aux *aux = aux_;
3238 struct ctables_cell *const *ap = a_;
3239 struct ctables_cell *const *bp = b_;
3240 const struct ctables_cell *a = *ap;
3241 const struct ctables_cell *b = *bp;
3243 const struct ctables_nest *nest = aux->nest;
3244 for (size_t i = 0; i < nest->n; i++)
3245 if (i != nest->scale_idx)
3247 const struct variable *var = nest->vars[i];
3248 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3249 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3250 if (a_cv->category != b_cv->category)
3251 return a_cv->category > b_cv->category ? 1 : -1;
3253 const union value *a_val = &a_cv->value;
3254 const union value *b_val = &b_cv->value;
3255 switch (a_cv->category->type)
3261 case CCT_POSTCOMPUTE:
3262 case CCT_EXCLUDED_MISSING:
3263 /* Must be equal. */
3271 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3279 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3281 return a_cv->category->sort_ascending ? cmp : -cmp;
3287 const char *a_label = var_lookup_value_label (var, a_val);
3288 const char *b_label = var_lookup_value_label (var, b_val);
3294 cmp = strcmp (a_label, b_label);
3300 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3303 return a_cv->category->sort_ascending ? cmp : -cmp;
3317 For each ctables_table:
3318 For each combination of row vars:
3319 For each combination of column vars:
3320 For each combination of layer vars:
3322 Make a table of row values:
3323 Sort entries by row values
3324 Assign a 0-based index to each actual value
3325 Construct a dimension
3326 Make a table of column values
3327 Make a table of layer values
3329 Fill the table entry using the indexes from before.
3332 static struct ctables_domain *
3333 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3334 enum ctables_domain_type domain)
3337 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3339 const struct ctables_nest *nest = s->nests[a];
3340 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3342 size_t v_idx = nest->domains[domain][i];
3343 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3344 hash = hash_pointer (cv->category, hash);
3345 if (cv->category->type != CCT_TOTAL
3346 && cv->category->type != CCT_SUBTOTAL
3347 && cv->category->type != CCT_POSTCOMPUTE)
3348 hash = value_hash (&cv->value,
3349 var_get_width (nest->vars[v_idx]), hash);
3353 struct ctables_domain *d;
3354 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3356 const struct ctables_cell *df = d->example;
3357 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3359 const struct ctables_nest *nest = s->nests[a];
3360 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3362 size_t v_idx = nest->domains[domain][i];
3363 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3364 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3365 if (cv1->category != cv2->category
3366 || (cv1->category->type != CCT_TOTAL
3367 && cv1->category->type != CCT_SUBTOTAL
3368 && cv1->category->type != CCT_POSTCOMPUTE
3369 && !value_equal (&cv1->value, &cv2->value,
3370 var_get_width (nest->vars[v_idx]))))
3379 struct ctables_sum *sums = (s->table->n_sum_vars
3380 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3383 d = xmalloc (sizeof *d);
3384 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3385 hmap_insert (&s->domains[domain], &d->node, hash);
3389 static struct substring
3390 rtrim_value (const union value *v, const struct variable *var)
3392 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3393 var_get_width (var));
3394 ss_rtrim (&s, ss_cstr (" "));
3399 in_string_range (const union value *v, const struct variable *var,
3400 const struct substring *srange)
3402 struct substring s = rtrim_value (v, var);
3403 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3404 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3407 static const struct ctables_category *
3408 ctables_categories_match (const struct ctables_categories *c,
3409 const union value *v, const struct variable *var)
3411 if (var_is_numeric (var) && v->f == SYSMIS)
3414 const struct ctables_category *othernm = NULL;
3415 for (size_t i = c->n_cats; i-- > 0; )
3417 const struct ctables_category *cat = &c->cats[i];
3421 if (cat->number == v->f)
3426 if (ss_equals (cat->string, rtrim_value (v, var)))
3431 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3432 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3437 if (in_string_range (v, var, cat->srange))
3442 if (var_is_value_missing (var, v))
3446 case CCT_POSTCOMPUTE:
3461 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3464 case CCT_EXCLUDED_MISSING:
3469 return var_is_value_missing (var, v) ? NULL : othernm;
3472 static const struct ctables_category *
3473 ctables_categories_total (const struct ctables_categories *c)
3475 const struct ctables_category *first = &c->cats[0];
3476 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3477 return (first->type == CCT_TOTAL ? first
3478 : last->type == CCT_TOTAL ? last
3482 static struct ctables_cell *
3483 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3484 const struct ctables_category *cats[PIVOT_N_AXES][10])
3487 enum ctables_summary_variant sv = CSV_CELL;
3488 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3490 const struct ctables_nest *nest = s->nests[a];
3491 for (size_t i = 0; i < nest->n; i++)
3492 if (i != nest->scale_idx)
3494 hash = hash_pointer (cats[a][i], hash);
3495 if (cats[a][i]->type != CCT_TOTAL
3496 && cats[a][i]->type != CCT_SUBTOTAL
3497 && cats[a][i]->type != CCT_POSTCOMPUTE)
3498 hash = value_hash (case_data (c, nest->vars[i]),
3499 var_get_width (nest->vars[i]), hash);
3505 struct ctables_cell *cell;
3506 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3508 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3510 const struct ctables_nest *nest = s->nests[a];
3511 for (size_t i = 0; i < nest->n; i++)
3512 if (i != nest->scale_idx
3513 && (cats[a][i] != cell->axes[a].cvs[i].category
3514 || (cats[a][i]->type != CCT_TOTAL
3515 && cats[a][i]->type != CCT_SUBTOTAL
3516 && cats[a][i]->type != CCT_POSTCOMPUTE
3517 && !value_equal (case_data (c, nest->vars[i]),
3518 &cell->axes[a].cvs[i].value,
3519 var_get_width (nest->vars[i])))))
3528 cell = xmalloc (sizeof *cell);
3531 cell->omit_domains = 0;
3532 cell->postcompute = false;
3533 //struct string name = DS_EMPTY_INITIALIZER;
3534 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3536 const struct ctables_nest *nest = s->nests[a];
3537 cell->axes[a].cvs = (nest->n
3538 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3540 for (size_t i = 0; i < nest->n; i++)
3542 const struct ctables_category *cat = cats[a][i];
3543 const struct variable *var = nest->vars[i];
3544 const union value *value = case_data (c, var);
3545 if (i != nest->scale_idx)
3547 const struct ctables_category *subtotal = cat->subtotal;
3548 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3551 if (cat->type == CCT_TOTAL
3552 || cat->type == CCT_SUBTOTAL
3553 || cat->type == CCT_POSTCOMPUTE)
3555 /* XXX these should be more encompassing I think.*/
3559 case PIVOT_AXIS_COLUMN:
3560 cell->omit_domains |= ((1u << CTDT_TABLE) |
3561 (1u << CTDT_LAYER) |
3562 (1u << CTDT_LAYERCOL) |
3563 (1u << CTDT_SUBTABLE) |
3566 case PIVOT_AXIS_ROW:
3567 cell->omit_domains |= ((1u << CTDT_TABLE) |
3568 (1u << CTDT_LAYER) |
3569 (1u << CTDT_LAYERROW) |
3570 (1u << CTDT_SUBTABLE) |
3573 case PIVOT_AXIS_LAYER:
3574 cell->omit_domains |= ((1u << CTDT_TABLE) |
3575 (1u << CTDT_LAYER));
3579 if (cat->type == CCT_POSTCOMPUTE)
3580 cell->postcompute = true;
3583 cell->axes[a].cvs[i].category = cat;
3584 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3587 if (i != nest->scale_idx)
3589 if (!ds_is_empty (&name))
3590 ds_put_cstr (&name, ", ");
3591 char *value_s = data_out (value, var_get_encoding (var),
3592 var_get_print_format (var),
3593 settings_get_fmt_settings ());
3594 if (cat->type == CCT_TOTAL
3595 || cat->type == CCT_SUBTOTAL
3596 || cat->type == CCT_POSTCOMPUTE)
3597 ds_put_format (&name, "%s=total", var_get_name (var));
3599 ds_put_format (&name, "%s=%s", var_get_name (var),
3600 value_s + strspn (value_s, " "));
3606 //cell->name = ds_steal_cstr (&name);
3608 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3609 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3610 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3611 for (size_t i = 0; i < specs->n; i++)
3612 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3613 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3614 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3615 hmap_insert (&s->cells, &cell->node, hash);
3620 is_scale_missing (const struct ctables_summary_spec_set *specs,
3621 const struct ccase *c)
3623 if (!specs->is_scale)
3626 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3629 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3631 const struct variable *var = specs->listwise_vars[i];
3632 if (var_is_num_missing (var, case_num (c, var)))
3640 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3641 const struct ctables_category *cats[PIVOT_N_AXES][10],
3642 bool is_missing, bool excluded_missing,
3643 double d_weight, double e_weight)
3645 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3646 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3648 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3650 bool scale_missing = is_scale_missing (specs, c);
3651 for (size_t i = 0; i < specs->n; i++)
3652 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3653 specs->var, case_data (c, specs->var), specs->is_scale,
3654 scale_missing, is_missing, excluded_missing,
3655 d_weight, e_weight);
3656 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3657 if (!(cell->omit_domains && (1u << dt)))
3659 struct ctables_domain *d = cell->domains[dt];
3660 d->d_total += d_weight;
3661 d->e_total += e_weight;
3663 if (!excluded_missing)
3665 d->d_count += d_weight;
3666 d->e_count += e_weight;
3671 d->d_valid += d_weight;
3672 d->e_valid += e_weight;
3675 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3677 /* XXX listwise_missing??? */
3678 const struct variable *var = s->table->sum_vars[i];
3679 double addend = case_num (c, var);
3680 if (!var_is_num_missing (var, addend))
3682 struct ctables_sum *sum = &d->sums[i];
3683 sum->e_sum += addend * e_weight;
3684 sum->u_sum += addend;
3692 recurse_totals (struct ctables_section *s, const struct ccase *c,
3693 const struct ctables_category *cats[PIVOT_N_AXES][10],
3694 bool is_missing, bool excluded_missing,
3695 double d_weight, double e_weight,
3696 enum pivot_axis_type start_axis, size_t start_nest)
3698 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3700 const struct ctables_nest *nest = s->nests[a];
3701 for (size_t i = start_nest; i < nest->n; i++)
3703 if (i == nest->scale_idx)
3706 const struct variable *var = nest->vars[i];
3708 const struct ctables_category *total = ctables_categories_total (
3709 s->table->categories[var_get_dict_index (var)]);
3712 const struct ctables_category *save = cats[a][i];
3714 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3715 d_weight, e_weight);
3716 recurse_totals (s, c, cats, is_missing, excluded_missing,
3717 d_weight, e_weight, a, i + 1);
3726 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3727 const struct ctables_category *cats[PIVOT_N_AXES][10],
3728 bool is_missing, bool excluded_missing,
3729 double d_weight, double e_weight,
3730 enum pivot_axis_type start_axis, size_t start_nest)
3732 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3734 const struct ctables_nest *nest = s->nests[a];
3735 for (size_t i = start_nest; i < nest->n; i++)
3737 if (i == nest->scale_idx)
3740 const struct ctables_category *save = cats[a][i];
3743 cats[a][i] = save->subtotal;
3744 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3745 d_weight, e_weight);
3746 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3747 d_weight, e_weight, a, i + 1);
3756 ctables_add_occurrence (const struct variable *var,
3757 const union value *value,
3758 struct hmap *occurrences)
3760 int width = var_get_width (var);
3761 unsigned int hash = value_hash (value, width, 0);
3763 struct ctables_occurrence *o;
3764 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3766 if (value_equal (value, &o->value, width))
3769 o = xmalloc (sizeof *o);
3770 value_clone (&o->value, value, width);
3771 hmap_insert (occurrences, &o->node, hash);
3775 ctables_cell_insert (struct ctables_section *s,
3776 const struct ccase *c,
3777 double d_weight, double e_weight)
3779 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3781 /* Does at least one categorical variable have a missing value in an included
3782 or excluded category? */
3783 bool is_missing = false;
3785 /* Does at least one categorical variable have a missing value in an excluded
3787 bool excluded_missing = false;
3789 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3791 const struct ctables_nest *nest = s->nests[a];
3792 for (size_t i = 0; i < nest->n; i++)
3794 if (i == nest->scale_idx)
3797 const struct variable *var = nest->vars[i];
3798 const union value *value = case_data (c, var);
3800 bool var_missing = var_is_value_missing (var, value) != 0;
3804 cats[a][i] = ctables_categories_match (
3805 s->table->categories[var_get_dict_index (var)], value, var);
3811 static const struct ctables_category cct_excluded_missing = {
3812 .type = CCT_EXCLUDED_MISSING,
3815 cats[a][i] = &cct_excluded_missing;
3816 excluded_missing = true;
3821 if (!excluded_missing)
3822 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3824 const struct ctables_nest *nest = s->nests[a];
3825 for (size_t i = 0; i < nest->n; i++)
3826 if (i != nest->scale_idx)
3828 const struct variable *var = nest->vars[i];
3829 const union value *value = case_data (c, var);
3830 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3834 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3835 d_weight, e_weight);
3837 //if (!excluded_missing)
3839 recurse_totals (s, c, cats, is_missing, excluded_missing,
3840 d_weight, e_weight, 0, 0);
3841 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3842 d_weight, e_weight, 0, 0);
3848 const struct ctables_summary_spec_set *set;
3853 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3855 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3856 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3857 if (as->function != bs->function)
3858 return as->function > bs->function ? 1 : -1;
3859 else if (as->percentile != bs->percentile)
3860 return as->percentile < bs->percentile ? 1 : -1;
3862 const char *as_label = as->label ? as->label : "";
3863 const char *bs_label = bs->label ? bs->label : "";
3864 return strcmp (as_label, bs_label);
3867 static struct pivot_value *
3868 ctables_category_create_label__ (const struct ctables_category *cat,
3869 const struct variable *var,
3870 const union value *value)
3872 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3873 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3874 : pivot_value_new_var_value (var, value));
3877 static struct pivot_value *
3878 ctables_postcompute_label (const struct ctables_categories *cats,
3879 const struct ctables_category *cat,
3880 const struct variable *var,
3881 const union value *value)
3883 struct substring in = ss_cstr (cat->pc->label);
3884 struct substring target = ss_cstr (")LABEL[");
3886 struct string out = DS_EMPTY_INITIALIZER;
3889 size_t chunk = ss_find_substring (in, target);
3890 if (chunk == SIZE_MAX)
3892 if (ds_is_empty (&out))
3893 return pivot_value_new_user_text (in.string, in.length);
3896 ds_put_substring (&out, in);
3897 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3901 ds_put_substring (&out, ss_head (in, chunk));
3902 ss_advance (&in, chunk + target.length);
3904 struct substring idx_s;
3905 if (!ss_get_until (&in, ']', &idx_s))
3908 long int idx = strtol (idx_s.string, &tail, 10);
3909 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3912 struct ctables_category *cat2 = &cats->cats[idx - 1];
3913 struct pivot_value *label2
3914 = ctables_category_create_label__ (cat2, var, value);
3915 char *label2_s = pivot_value_to_string_defaults (label2);
3916 ds_put_cstr (&out, label2_s);
3918 pivot_value_destroy (label2);
3923 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3926 static struct pivot_value *
3927 ctables_category_create_label (const struct ctables_categories *cats,
3928 const struct ctables_category *cat,
3929 const struct variable *var,
3930 const union value *value)
3932 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3933 ? ctables_postcompute_label (cats, cat, var, value)
3934 : ctables_category_create_label__ (cat, var, value));
3937 static struct ctables_value *
3938 ctables_value_find__ (struct ctables_table *t, const union value *value,
3939 int width, unsigned int hash)
3941 struct ctables_value *clv;
3942 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3943 hash, &t->clabels_values_map)
3944 if (value_equal (value, &clv->value, width))
3950 ctables_value_insert (struct ctables_table *t, const union value *value,
3953 unsigned int hash = value_hash (value, width, 0);
3954 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3957 clv = xmalloc (sizeof *clv);
3958 value_clone (&clv->value, value, width);
3959 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3963 static struct ctables_value *
3964 ctables_value_find (struct ctables_table *t,
3965 const union value *value, int width)
3967 return ctables_value_find__ (t, value, width,
3968 value_hash (value, width, 0));
3972 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3973 size_t ix[PIVOT_N_AXES])
3975 if (a < PIVOT_N_AXES)
3977 size_t limit = MAX (t->stacks[a].n, 1);
3978 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3979 ctables_table_add_section (t, a + 1, ix);
3983 struct ctables_section *s = &t->sections[t->n_sections++];
3984 *s = (struct ctables_section) {
3986 .cells = HMAP_INITIALIZER (s->cells),
3988 for (a = 0; a < PIVOT_N_AXES; a++)
3991 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3993 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3994 for (size_t i = 0; i < nest->n; i++)
3995 hmap_init (&s->occurrences[a][i]);
3997 for (size_t i = 0; i < N_CTDTS; i++)
3998 hmap_init (&s->domains[i]);
4003 ctpo_add (double a, double b)
4009 ctpo_sub (double a, double b)
4015 ctpo_mul (double a, double b)
4021 ctpo_div (double a, double b)
4023 return b ? a / b : SYSMIS;
4027 ctpo_pow (double a, double b)
4029 int save_errno = errno;
4031 double result = pow (a, b);
4039 ctpo_neg (double a, double b UNUSED)
4044 struct ctables_pcexpr_evaluate_ctx
4046 const struct ctables_cell *cell;
4047 const struct ctables_section *section;
4048 const struct ctables_categories *cats;
4049 enum pivot_axis_type pc_a;
4052 enum fmt_type parse_format;
4055 static double ctables_pcexpr_evaluate (
4056 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4059 ctables_pcexpr_evaluate_nonterminal (
4060 const struct ctables_pcexpr_evaluate_ctx *ctx,
4061 const struct ctables_pcexpr *e, size_t n_args,
4062 double evaluate (double, double))
4064 double args[2] = { 0, 0 };
4065 for (size_t i = 0; i < n_args; i++)
4067 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4068 if (!isfinite (args[i]) || args[i] == SYSMIS)
4071 return evaluate (args[0], args[1]);
4075 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4076 const struct ctables_cell_value *pc_cv)
4078 const struct ctables_section *s = ctx->section;
4081 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4083 const struct ctables_nest *nest = s->nests[a];
4084 for (size_t i = 0; i < nest->n; i++)
4085 if (i != nest->scale_idx)
4087 const struct ctables_cell_value *cv
4088 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4089 : &ctx->cell->axes[a].cvs[i]);
4090 hash = hash_pointer (cv->category, hash);
4091 if (cv->category->type != CCT_TOTAL
4092 && cv->category->type != CCT_SUBTOTAL
4093 && cv->category->type != CCT_POSTCOMPUTE)
4094 hash = value_hash (&cv->value,
4095 var_get_width (nest->vars[i]), hash);
4099 struct ctables_cell *tc;
4100 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4102 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4104 const struct ctables_nest *nest = s->nests[a];
4105 for (size_t i = 0; i < nest->n; i++)
4106 if (i != nest->scale_idx)
4108 const struct ctables_cell_value *p_cv
4109 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4110 : &ctx->cell->axes[a].cvs[i]);
4111 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4112 if (p_cv->category != t_cv->category
4113 || (p_cv->category->type != CCT_TOTAL
4114 && p_cv->category->type != CCT_SUBTOTAL
4115 && p_cv->category->type != CCT_POSTCOMPUTE
4116 && !value_equal (&p_cv->value,
4118 var_get_width (nest->vars[i]))))
4130 const struct ctables_table *t = s->table;
4131 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4132 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4133 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4134 &specs->specs[ctx->summary_idx]);
4138 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4139 const struct ctables_pcexpr *e)
4146 case CTPO_CAT_NRANGE:
4147 case CTPO_CAT_SRANGE:
4149 struct ctables_cell_value cv = {
4150 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4152 assert (cv.category != NULL);
4154 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4155 const struct ctables_occurrence *o;
4158 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4159 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4160 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4162 cv.value = o->value;
4163 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4168 case CTPO_CAT_NUMBER:
4169 case CTPO_CAT_MISSING:
4170 case CTPO_CAT_OTHERNM:
4171 case CTPO_CAT_SUBTOTAL:
4172 case CTPO_CAT_TOTAL:
4174 struct ctables_cell_value cv = {
4175 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4176 .value = { .f = e->number },
4178 assert (cv.category != NULL);
4179 return ctables_pcexpr_evaluate_category (ctx, &cv);
4182 case CTPO_CAT_STRING:
4184 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4186 if (width > e->string.length)
4188 s = xmalloc (width);
4189 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4191 struct ctables_cell_value cv = {
4192 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4193 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4195 assert (cv.category != NULL);
4196 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4202 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4205 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4208 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4211 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4214 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4217 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4223 static const struct ctables_category *
4224 ctables_cell_postcompute (const struct ctables_section *s,
4225 const struct ctables_cell *cell,
4226 enum pivot_axis_type *pc_a_p,
4229 assert (cell->postcompute);
4230 const struct ctables_category *pc_cat = NULL;
4231 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4232 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4234 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4235 if (cv->category->type == CCT_POSTCOMPUTE)
4239 /* Multiple postcomputes cross each other. The value is
4244 pc_cat = cv->category;
4248 *pc_a_idx_p = pc_a_idx;
4252 assert (pc_cat != NULL);
4257 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4258 const struct ctables_cell *cell,
4259 const struct ctables_summary_spec *ss,
4260 struct fmt_spec *format,
4261 bool *is_ctables_format,
4264 enum pivot_axis_type pc_a = 0;
4265 size_t pc_a_idx = 0;
4266 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4267 s, cell, &pc_a, &pc_a_idx);
4271 const struct ctables_postcompute *pc = pc_cat->pc;
4274 for (size_t i = 0; i < pc->specs->n; i++)
4276 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4277 if (ss->function == ss2->function
4278 && ss->percentile == ss2->percentile)
4280 *format = ss2->format;
4281 *is_ctables_format = ss2->is_ctables_format;
4287 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4288 const struct ctables_categories *cats = s->table->categories[
4289 var_get_dict_index (var)];
4290 struct ctables_pcexpr_evaluate_ctx ctx = {
4295 .pc_a_idx = pc_a_idx,
4296 .summary_idx = summary_idx,
4297 .parse_format = pc_cat->parse_format,
4299 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4303 ctables_format (double d, const struct fmt_spec *format,
4304 const struct fmt_settings *settings)
4306 const union value v = { .f = d };
4307 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4309 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4310 produce the results we want for negative numbers, putting the negative
4311 sign in the wrong spot, before the prefix instead of after it. We can't,
4312 in fact, produce the desired results using a custom-currency
4313 specification. Instead, we postprocess the output, moving the negative
4316 NEQUAL: "-N=3" => "N=-3"
4317 PAREN: "-(3)" => "(-3)"
4318 PCTPAREN: "-(3%)" => "(-3%)"
4320 This transformation doesn't affect NEGPAREN. */
4321 char *minus_src = strchr (s, '-');
4322 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4324 char *n_equals = strstr (s, "N=");
4325 char *lparen = strchr (s, '(');
4326 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4328 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4334 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4336 struct pivot_table *pt = pivot_table_create__ (
4338 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4339 : pivot_value_new_text (N_("Custom Tables"))),
4342 pivot_table_set_caption (
4343 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4345 pivot_table_set_corner_text (
4346 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4348 bool summary_dimension = (t->summary_axis != t->slabels_axis
4349 || (!t->slabels_visible
4350 && t->summary_specs.n > 1));
4351 if (summary_dimension)
4353 struct pivot_dimension *d = pivot_dimension_create (
4354 pt, t->slabels_axis, N_("Statistics"));
4355 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4356 if (!t->slabels_visible)
4357 d->hide_all_labels = true;
4358 for (size_t i = 0; i < specs->n; i++)
4359 pivot_category_create_leaf (
4360 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4363 bool categories_dimension = t->clabels_example != NULL;
4364 if (categories_dimension)
4366 struct pivot_dimension *d = pivot_dimension_create (
4367 pt, t->label_axis[t->clabels_from_axis],
4368 t->clabels_from_axis == PIVOT_AXIS_ROW
4369 ? N_("Row Categories")
4370 : N_("Column Categories"));
4371 const struct variable *var = t->clabels_example;
4372 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4373 for (size_t i = 0; i < t->n_clabels_values; i++)
4375 const struct ctables_value *value = t->clabels_values[i];
4376 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4377 assert (cat != NULL);
4378 pivot_category_create_leaf (d->root, ctables_category_create_label (
4379 c, cat, t->clabels_example,
4384 pivot_table_set_look (pt, ct->look);
4385 struct pivot_dimension *d[PIVOT_N_AXES];
4386 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4388 static const char *names[] = {
4389 [PIVOT_AXIS_ROW] = N_("Rows"),
4390 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4391 [PIVOT_AXIS_LAYER] = N_("Layers"),
4393 d[a] = (t->axes[a] || a == t->summary_axis
4394 ? pivot_dimension_create (pt, a, names[a])
4399 assert (t->axes[a]);
4401 for (size_t i = 0; i < t->stacks[a].n; i++)
4403 struct ctables_nest *nest = &t->stacks[a].nests[i];
4404 struct ctables_section **sections = xnmalloc (t->n_sections,
4406 size_t n_sections = 0;
4408 size_t n_total_cells = 0;
4409 size_t max_depth = 0;
4410 for (size_t j = 0; j < t->n_sections; j++)
4411 if (t->sections[j].nests[a] == nest)
4413 struct ctables_section *s = &t->sections[j];
4414 sections[n_sections++] = s;
4415 n_total_cells += s->cells.count;
4417 size_t depth = s->nests[a]->n;
4418 max_depth = MAX (depth, max_depth);
4421 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4423 size_t n_sorted = 0;
4425 for (size_t j = 0; j < n_sections; j++)
4427 struct ctables_section *s = sections[j];
4429 struct ctables_cell *cell;
4430 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4432 sorted[n_sorted++] = cell;
4433 assert (n_sorted <= n_total_cells);
4436 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4437 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4440 for (size_t j = 0; j < n_sorted; j++)
4442 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4447 struct ctables_level
4449 enum ctables_level_type
4451 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4452 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4453 CTL_SUMMARY, /* Summary functions. */
4457 enum settings_value_show vlabel; /* CTL_VAR only. */
4460 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4461 size_t n_levels = 0;
4462 for (size_t k = 0; k < nest->n; k++)
4464 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4465 if (vlabel != CTVL_NONE)
4467 levels[n_levels++] = (struct ctables_level) {
4469 .vlabel = (enum settings_value_show) vlabel,
4474 if (nest->scale_idx != k
4475 && (k != nest->n - 1 || t->label_axis[a] == a))
4477 levels[n_levels++] = (struct ctables_level) {
4478 .type = CTL_CATEGORY,
4484 if (!summary_dimension && a == t->slabels_axis)
4486 levels[n_levels++] = (struct ctables_level) {
4487 .type = CTL_SUMMARY,
4488 .var_idx = SIZE_MAX,
4492 /* Pivot categories:
4494 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4495 - category for nest->vars[0], if nest->scale_idx != 0
4496 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4497 - category for nest->vars[1], if nest->scale_idx != 1
4499 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4500 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4501 - summary function, if 'a == t->slabels_axis && a ==
4504 Additional dimensions:
4506 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4508 - If 't->label_axis[b] == a' for some 'b != a', add a category
4513 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4515 for (size_t j = 0; j < n_sorted; j++)
4517 struct ctables_cell *cell = sorted[j];
4518 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4520 struct ctables_domain *domain = cell->domains[CTDT_SUBTABLE];
4521 if (!domain->sequence)
4524 domain->sequence = ++x;
4526 domain = cell->domains[CTDT_TABLE];
4527 if (!domain->sequence)
4530 domain->sequence = ++x;
4533 size_t n_common = 0;
4536 for (; n_common < n_levels; n_common++)
4538 const struct ctables_level *level = &levels[n_common];
4539 if (level->type == CTL_CATEGORY)
4541 size_t var_idx = level->var_idx;
4542 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4543 if (prev->axes[a].cvs[var_idx].category != c)
4545 else if (c->type != CCT_SUBTOTAL
4546 && c->type != CCT_TOTAL
4547 && c->type != CCT_POSTCOMPUTE
4548 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4549 &cell->axes[a].cvs[var_idx].value,
4550 var_get_type (nest->vars[var_idx])))
4556 for (size_t k = n_common; k < n_levels; k++)
4558 const struct ctables_level *level = &levels[k];
4559 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4560 if (level->type == CTL_SUMMARY)
4562 assert (k == n_levels - 1);
4564 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4565 for (size_t m = 0; m < specs->n; m++)
4567 int leaf = pivot_category_create_leaf (
4568 parent, ctables_summary_label (&specs->specs[m],
4576 const struct variable *var = nest->vars[level->var_idx];
4577 struct pivot_value *label;
4578 if (level->type == CTL_VAR)
4580 label = pivot_value_new_variable (var);
4581 label->variable.show = level->vlabel;
4583 else if (level->type == CTL_CATEGORY)
4585 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4586 label = ctables_category_create_label (
4587 t->categories[var_get_dict_index (var)],
4588 cv->category, var, &cv->value);
4593 if (k == n_levels - 1)
4594 prev_leaf = pivot_category_create_leaf (parent, label);
4596 groups[k] = pivot_category_create_group__ (parent, label);
4600 cell->axes[a].leaf = prev_leaf;
4609 for (size_t i = 0; i < t->n_sections; i++)
4611 struct ctables_section *s = &t->sections[i];
4613 struct ctables_cell *cell;
4614 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4619 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4620 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4621 for (size_t j = 0; j < specs->n; j++)
4624 size_t n_dindexes = 0;
4626 if (summary_dimension)
4627 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4629 if (categories_dimension)
4631 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4632 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4633 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4634 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4637 dindexes[n_dindexes++] = ctv->leaf;
4640 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4643 int leaf = cell->axes[a].leaf;
4644 if (a == t->summary_axis && !summary_dimension)
4646 dindexes[n_dindexes++] = leaf;
4649 const struct ctables_summary_spec *ss = &specs->specs[j];
4651 struct fmt_spec format = specs->specs[j].format;
4652 bool is_ctables_format = ss->is_ctables_format;
4653 double d = (cell->postcompute
4654 ? ctables_cell_calculate_postcompute (
4655 s, cell, ss, &format, &is_ctables_format, j)
4656 : ctables_summary_value (cell, &cell->summaries[j],
4659 struct pivot_value *value;
4660 if (ct->hide_threshold != 0
4661 && d < ct->hide_threshold
4662 && ctables_summary_function_is_count (ss->function))
4664 value = pivot_value_new_user_text_nocopy (
4665 xasprintf ("<%d", ct->hide_threshold));
4667 else if (d == 0 && ct->zero)
4668 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4669 else if (d == SYSMIS && ct->missing)
4670 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4671 else if (is_ctables_format)
4672 value = pivot_value_new_user_text_nocopy (
4673 ctables_format (d, &format, &ct->ctables_formats));
4676 value = pivot_value_new_number (d);
4677 value->numeric.format = format;
4679 /* XXX should text values be right-justified? */
4680 pivot_table_put (pt, dindexes, n_dindexes, value);
4685 pivot_table_submit (pt);
4689 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4691 enum pivot_axis_type label_pos = t->label_axis[a];
4695 t->clabels_from_axis = a;
4697 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4698 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4700 const struct ctables_stack *stack = &t->stacks[a];
4704 const struct ctables_nest *n0 = &stack->nests[0];
4707 assert (stack->n == 1);
4711 const struct variable *v0 = n0->vars[n0->n - 1];
4712 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4713 t->clabels_example = v0;
4715 for (size_t i = 0; i < c0->n_cats; i++)
4716 if (c0->cats[i].type == CCT_FUNCTION)
4718 msg (SE, _("%s=%s is not allowed with sorting based "
4719 "on a summary function."),
4720 subcommand_name, pos_name);
4723 if (n0->n - 1 == n0->scale_idx)
4725 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4726 "but %s is a scale variable."),
4727 subcommand_name, pos_name, var_get_name (v0));
4731 for (size_t i = 1; i < stack->n; i++)
4733 const struct ctables_nest *ni = &stack->nests[i];
4735 const struct variable *vi = ni->vars[ni->n - 1];
4736 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4738 if (ni->n - 1 == ni->scale_idx)
4740 msg (SE, _("%s=%s requires the variables to be moved to be "
4741 "categorical, but %s is a scale variable."),
4742 subcommand_name, pos_name, var_get_name (vi));
4745 if (var_get_width (v0) != var_get_width (vi))
4747 msg (SE, _("%s=%s requires the variables to be "
4748 "moved to have the same width, but %s has "
4749 "width %d and %s has width %d."),
4750 subcommand_name, pos_name,
4751 var_get_name (v0), var_get_width (v0),
4752 var_get_name (vi), var_get_width (vi));
4755 if (!val_labs_equal (var_get_value_labels (v0),
4756 var_get_value_labels (vi)))
4758 msg (SE, _("%s=%s requires the variables to be "
4759 "moved to have the same value labels, but %s "
4760 "and %s have different value labels."),
4761 subcommand_name, pos_name,
4762 var_get_name (v0), var_get_name (vi));
4765 if (!ctables_categories_equal (c0, ci))
4767 msg (SE, _("%s=%s requires the variables to be "
4768 "moved to have the same category "
4769 "specifications, but %s and %s have different "
4770 "category specifications."),
4771 subcommand_name, pos_name,
4772 var_get_name (v0), var_get_name (vi));
4781 add_sum_var (struct variable *var,
4782 struct variable ***sum_vars, size_t *n, size_t *allocated)
4784 for (size_t i = 0; i < *n; i++)
4785 if (var == (*sum_vars)[i])
4788 if (*n >= *allocated)
4789 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4790 (*sum_vars)[*n] = var;
4795 enumerate_sum_vars (const struct ctables_axis *a,
4796 struct variable ***sum_vars, size_t *n, size_t *allocated)
4804 for (size_t i = 0; i < N_CSVS; i++)
4805 for (size_t j = 0; j < a->specs[i].n; j++)
4807 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4808 if (ctables_function_is_pctsum (spec->function))
4809 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4815 for (size_t i = 0; i < 2; i++)
4816 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4822 ctables_prepare_table (struct ctables_table *t)
4824 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4827 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4829 for (size_t j = 0; j < t->stacks[a].n; j++)
4831 struct ctables_nest *nest = &t->stacks[a].nests[j];
4832 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4834 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4835 nest->n_domains[dt] = 0;
4837 for (size_t k = 0; k < nest->n; k++)
4839 if (k == nest->scale_idx)
4848 if (a != PIVOT_AXIS_LAYER)
4855 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4856 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4857 : a == PIVOT_AXIS_ROW)
4859 if (k == nest->n - 1
4860 || (nest->scale_idx == nest->n - 1
4861 && k == nest->n - 2))
4867 if (a == PIVOT_AXIS_COLUMN)
4872 if (a == PIVOT_AXIS_ROW)
4877 nest->domains[dt][nest->n_domains[dt]++] = k;
4884 struct ctables_nest *nest = xmalloc (sizeof *nest);
4885 *nest = (struct ctables_nest) { .n = 0 };
4886 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4888 /* There's no point in moving labels away from an axis that has no
4889 labels, so avoid dealing with the special cases around that. */
4890 t->label_axis[a] = a;
4893 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4894 for (size_t i = 0; i < stack->n; i++)
4896 struct ctables_nest *nest = &stack->nests[i];
4897 if (!nest->specs[CSV_CELL].n)
4899 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4900 specs->specs = xmalloc (sizeof *specs->specs);
4903 enum ctables_summary_function function
4904 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4906 *specs->specs = (struct ctables_summary_spec) {
4907 .function = function,
4908 .format = ctables_summary_default_format (function, specs->var),
4911 specs->var = nest->vars[0];
4913 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4914 &nest->specs[CSV_CELL]);
4916 else if (!nest->specs[CSV_TOTAL].n)
4917 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4918 &nest->specs[CSV_CELL]);
4920 if (t->ctables->smissing_listwise)
4922 struct variable **listwise_vars = NULL;
4924 size_t allocated = 0;
4926 for (size_t j = nest->group_head; j < stack->n; j++)
4928 const struct ctables_nest *other_nest = &stack->nests[j];
4929 if (other_nest->group_head != nest->group_head)
4932 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4935 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4936 sizeof *listwise_vars);
4937 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4940 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4943 listwise_vars = xmemdup (listwise_vars,
4944 n * sizeof *listwise_vars);
4945 nest->specs[sv].listwise_vars = listwise_vars;
4946 nest->specs[sv].n_listwise_vars = n;
4951 struct ctables_summary_spec_set *merged = &t->summary_specs;
4952 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4954 for (size_t j = 0; j < stack->n; j++)
4956 const struct ctables_nest *nest = &stack->nests[j];
4958 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4959 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4964 struct merge_item min = items[0];
4965 for (size_t j = 1; j < n_left; j++)
4966 if (merge_item_compare_3way (&items[j], &min) < 0)
4969 if (merged->n >= merged->allocated)
4970 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4971 sizeof *merged->specs);
4972 merged->specs[merged->n++] = min.set->specs[min.ofs];
4974 for (size_t j = 0; j < n_left; )
4976 if (merge_item_compare_3way (&items[j], &min) == 0)
4978 struct merge_item *item = &items[j];
4979 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4980 if (++item->ofs >= item->set->n)
4982 items[j] = items[--n_left];
4992 for (size_t j = 0; j < merged->n; j++)
4993 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4995 for (size_t j = 0; j < stack->n; j++)
4997 const struct ctables_nest *nest = &stack->nests[j];
4998 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5000 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5001 for (size_t k = 0; k < specs->n; k++)
5002 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5003 specs->specs[k].axis_idx);
5009 size_t allocated_sum_vars = 0;
5010 enumerate_sum_vars (t->axes[t->summary_axis],
5011 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5013 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5014 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5018 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5019 enum pivot_axis_type a)
5021 struct ctables_stack *stack = &t->stacks[a];
5022 for (size_t i = 0; i < stack->n; i++)
5024 const struct ctables_nest *nest = &stack->nests[i];
5025 const struct variable *var = nest->vars[nest->n - 1];
5026 const union value *value = case_data (c, var);
5028 if (var_is_numeric (var) && value->f == SYSMIS)
5031 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5033 ctables_value_insert (t, value, var_get_width (var));
5038 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5040 const struct ctables_value *const *ap = a_;
5041 const struct ctables_value *const *bp = b_;
5042 const struct ctables_value *a = *ap;
5043 const struct ctables_value *b = *bp;
5044 const int *width = width_;
5045 return value_compare_3way (&a->value, &b->value, *width);
5049 ctables_sort_clabels_values (struct ctables_table *t)
5051 const struct variable *v0 = t->clabels_example;
5052 int width = var_get_width (v0);
5054 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5057 const struct val_labs *val_labs = var_get_value_labels (v0);
5058 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5059 vl = val_labs_next (val_labs, vl))
5060 if (ctables_categories_match (c0, &vl->value, v0))
5061 ctables_value_insert (t, &vl->value, width);
5064 size_t n = hmap_count (&t->clabels_values_map);
5065 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5067 struct ctables_value *clv;
5069 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5070 t->clabels_values[i++] = clv;
5071 t->n_clabels_values = n;
5074 sort (t->clabels_values, n, sizeof *t->clabels_values,
5075 compare_clabels_values_3way, &width);
5077 for (size_t i = 0; i < n; i++)
5078 t->clabels_values[i]->leaf = i;
5082 ctables_add_category_occurrences (const struct variable *var,
5083 struct hmap *occurrences,
5084 const struct ctables_categories *cats)
5086 const struct val_labs *val_labs = var_get_value_labels (var);
5088 for (size_t i = 0; i < cats->n_cats; i++)
5090 const struct ctables_category *c = &cats->cats[i];
5094 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5100 int width = var_get_width (var);
5102 value_init (&value, width);
5103 value_copy_buf_rpad (&value, width,
5104 CHAR_CAST (uint8_t *, c->string.string),
5105 c->string.length, ' ');
5106 ctables_add_occurrence (var, &value, occurrences);
5107 value_destroy (&value, width);
5112 assert (var_is_numeric (var));
5113 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5114 vl = val_labs_next (val_labs, vl))
5115 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5116 ctables_add_occurrence (var, &vl->value, occurrences);
5120 assert (var_is_alpha (var));
5121 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5122 vl = val_labs_next (val_labs, vl))
5123 if (in_string_range (&vl->value, var, c->srange))
5124 ctables_add_occurrence (var, &vl->value, occurrences);
5128 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5129 vl = val_labs_next (val_labs, vl))
5130 if (var_is_value_missing (var, &vl->value))
5131 ctables_add_occurrence (var, &vl->value, occurrences);
5135 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5136 vl = val_labs_next (val_labs, vl))
5137 ctables_add_occurrence (var, &vl->value, occurrences);
5140 case CCT_POSTCOMPUTE:
5150 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5151 vl = val_labs_next (val_labs, vl))
5152 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5153 ctables_add_occurrence (var, &vl->value, occurrences);
5156 case CCT_EXCLUDED_MISSING:
5163 ctables_section_recurse_add_empty_categories (
5164 struct ctables_section *s,
5165 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5166 enum pivot_axis_type a, size_t a_idx)
5168 if (a >= PIVOT_N_AXES)
5169 ctables_cell_insert__ (s, c, cats);
5170 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5171 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5174 const struct variable *var = s->nests[a]->vars[a_idx];
5175 const struct ctables_categories *categories = s->table->categories[
5176 var_get_dict_index (var)];
5177 int width = var_get_width (var);
5178 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5179 const struct ctables_occurrence *o;
5180 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5182 union value *value = case_data_rw (c, var);
5183 value_destroy (value, width);
5184 value_clone (value, &o->value, width);
5185 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5186 assert (cats[a][a_idx] != NULL);
5187 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5190 for (size_t i = 0; i < categories->n_cats; i++)
5192 const struct ctables_category *cat = &categories->cats[i];
5193 if (cat->type == CCT_POSTCOMPUTE)
5195 cats[a][a_idx] = cat;
5196 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5203 ctables_section_add_empty_categories (struct ctables_section *s)
5205 bool show_empty = false;
5206 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5208 for (size_t k = 0; k < s->nests[a]->n; k++)
5209 if (k != s->nests[a]->scale_idx)
5211 const struct variable *var = s->nests[a]->vars[k];
5212 const struct ctables_categories *cats = s->table->categories[
5213 var_get_dict_index (var)];
5214 if (cats->show_empty)
5217 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5223 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5224 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5225 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5230 ctables_section_clear (struct ctables_section *s)
5232 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5234 const struct ctables_nest *nest = s->nests[a];
5235 for (size_t i = 0; i < nest->n; i++)
5236 if (i != nest->scale_idx)
5238 const struct variable *var = nest->vars[i];
5239 int width = var_get_width (var);
5240 struct ctables_occurrence *o, *next;
5241 struct hmap *map = &s->occurrences[a][i];
5242 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5244 value_destroy (&o->value, width);
5245 hmap_delete (map, &o->node);
5252 struct ctables_cell *cell, *next_cell;
5253 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5255 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5257 const struct ctables_nest *nest = s->nests[a];
5258 for (size_t i = 0; i < nest->n; i++)
5259 if (i != nest->scale_idx)
5260 value_destroy (&cell->axes[a].cvs[i].value,
5261 var_get_width (nest->vars[i]));
5262 free (cell->axes[a].cvs);
5265 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5266 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5267 for (size_t i = 0; i < specs->n; i++)
5268 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5269 free (cell->summaries);
5271 hmap_delete (&s->cells, &cell->node);
5274 hmap_shrink (&s->cells);
5276 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5278 struct ctables_domain *domain, *next_domain;
5279 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5282 free (domain->sums);
5283 hmap_delete (&s->domains[dt], &domain->node);
5286 hmap_shrink (&s->domains[dt]);
5291 ctables_section_uninit (struct ctables_section *s)
5293 ctables_section_clear (s);
5295 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5297 struct ctables_nest *nest = s->nests[a];
5298 for (size_t i = 0; i < nest->n; i++)
5299 hmap_destroy (&s->occurrences[a][i]);
5300 free (s->occurrences[a]);
5303 hmap_destroy (&s->cells);
5304 for (size_t i = 0; i < N_CTDTS; i++)
5305 hmap_destroy (&s->domains[i]);
5309 ctables_table_clear (struct ctables_table *t)
5311 for (size_t i = 0; i < t->n_sections; i++)
5312 ctables_section_clear (&t->sections[i]);
5314 if (t->clabels_example)
5316 int width = var_get_width (t->clabels_example);
5317 struct ctables_value *value, *next_value;
5318 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5319 &t->clabels_values_map)
5321 value_destroy (&value->value, width);
5322 hmap_delete (&t->clabels_values_map, &value->node);
5325 hmap_shrink (&t->clabels_values_map);
5327 free (t->clabels_values);
5328 t->clabels_values = NULL;
5329 t->n_clabels_values = 0;
5334 ctables_execute (struct dataset *ds, struct casereader *input,
5337 for (size_t i = 0; i < ct->n_tables; i++)
5339 struct ctables_table *t = ct->tables[i];
5340 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5341 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5342 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5343 sizeof *t->sections);
5344 size_t ix[PIVOT_N_AXES];
5345 ctables_table_add_section (t, 0, ix);
5348 struct dictionary *dict = dataset_dict (ds);
5349 struct casegrouper *grouper
5350 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5351 ? casegrouper_create_splits (input, dict)
5352 : casegrouper_create_vars (input, NULL, 0));
5353 struct casereader *group;
5354 while (casegrouper_get_next_group (grouper, &group))
5356 /* Output SPLIT FILE variables. */
5357 struct ccase *c = casereader_peek (group, 0);
5360 output_split_file_values (ds, c);
5364 bool warn_on_invalid = true;
5365 for (c = casereader_read (group); c;
5366 case_unref (c), c = casereader_read (group))
5368 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5369 double e_weight = (ct->e_weight
5370 ? var_force_valid_weight (ct->e_weight,
5371 case_num (c, ct->e_weight),
5375 for (size_t i = 0; i < ct->n_tables; i++)
5377 struct ctables_table *t = ct->tables[i];
5379 for (size_t j = 0; j < t->n_sections; j++)
5380 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5382 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5383 if (t->label_axis[a] != a)
5384 ctables_insert_clabels_values (t, c, a);
5387 casereader_destroy (group);
5389 for (size_t i = 0; i < ct->n_tables; i++)
5391 struct ctables_table *t = ct->tables[i];
5393 if (t->clabels_example)
5394 ctables_sort_clabels_values (t);
5396 for (size_t j = 0; j < t->n_sections; j++)
5397 ctables_section_add_empty_categories (&t->sections[j]);
5399 ctables_table_output (ct, t);
5400 ctables_table_clear (t);
5403 return casegrouper_destroy (grouper);
5408 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5409 struct dictionary *);
5412 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5418 case CTPO_CAT_STRING:
5419 ss_dealloc (&e->string);
5422 case CTPO_CAT_SRANGE:
5423 for (size_t i = 0; i < 2; i++)
5424 ss_dealloc (&e->srange[i]);
5433 for (size_t i = 0; i < 2; i++)
5434 ctables_pcexpr_destroy (e->subs[i]);
5438 case CTPO_CAT_NUMBER:
5439 case CTPO_CAT_NRANGE:
5440 case CTPO_CAT_MISSING:
5441 case CTPO_CAT_OTHERNM:
5442 case CTPO_CAT_SUBTOTAL:
5443 case CTPO_CAT_TOTAL:
5447 msg_location_destroy (e->location);
5452 static struct ctables_pcexpr *
5453 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5454 struct ctables_pcexpr *sub0,
5455 struct ctables_pcexpr *sub1)
5457 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5458 *e = (struct ctables_pcexpr) {
5460 .subs = { sub0, sub1 },
5461 .location = msg_location_merged (sub0->location, sub1->location),
5466 /* How to parse an operator. */
5469 enum token_type token;
5470 enum ctables_postcompute_op op;
5473 static const struct operator *
5474 ctables_pcexpr_match_operator (struct lexer *lexer,
5475 const struct operator ops[], size_t n_ops)
5477 for (const struct operator *op = ops; op < ops + n_ops; op++)
5478 if (lex_token (lexer) == op->token)
5480 if (op->token != T_NEG_NUM)
5489 static struct ctables_pcexpr *
5490 ctables_pcexpr_parse_binary_operators__ (
5491 struct lexer *lexer, struct dictionary *dict,
5492 const struct operator ops[], size_t n_ops,
5493 parse_recursively_func *parse_next_level,
5494 const char *chain_warning, struct ctables_pcexpr *lhs)
5496 for (int op_count = 0; ; op_count++)
5498 const struct operator *op
5499 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5502 if (op_count > 1 && chain_warning)
5503 msg_at (SW, lhs->location, "%s", chain_warning);
5508 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5511 ctables_pcexpr_destroy (lhs);
5515 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5519 static struct ctables_pcexpr *
5520 ctables_pcexpr_parse_binary_operators (
5521 struct lexer *lexer, struct dictionary *dict,
5522 const struct operator ops[], size_t n_ops,
5523 parse_recursively_func *parse_next_level, const char *chain_warning)
5525 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5529 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5531 chain_warning, lhs);
5534 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5535 struct dictionary *);
5537 static struct ctables_pcexpr
5538 ctpo_cat_nrange (double low, double high)
5540 return (struct ctables_pcexpr) {
5541 .op = CTPO_CAT_NRANGE,
5542 .nrange = { low, high },
5546 static struct ctables_pcexpr
5547 ctpo_cat_srange (struct substring low, struct substring high)
5549 return (struct ctables_pcexpr) {
5550 .op = CTPO_CAT_SRANGE,
5551 .srange = { low, high },
5555 static struct ctables_pcexpr *
5556 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5558 int start_ofs = lex_ofs (lexer);
5559 struct ctables_pcexpr e;
5560 if (lex_is_number (lexer))
5562 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5563 .number = lex_number (lexer) };
5566 else if (lex_match_id (lexer, "MISSING"))
5567 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5568 else if (lex_match_id (lexer, "OTHERNM"))
5569 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5570 else if (lex_match_id (lexer, "TOTAL"))
5571 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5572 else if (lex_match_id (lexer, "SUBTOTAL"))
5574 size_t subtotal_index = 0;
5575 if (lex_match (lexer, T_LBRACK))
5577 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5579 subtotal_index = lex_integer (lexer);
5581 if (!lex_force_match (lexer, T_RBRACK))
5584 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5585 .subtotal_index = subtotal_index };
5587 else if (lex_match (lexer, T_LBRACK))
5589 if (lex_match_id (lexer, "LO"))
5591 if (!lex_force_match_id (lexer, "THRU"))
5594 if (lex_is_string (lexer))
5596 struct substring low = { .string = NULL };
5597 struct substring high = parse_substring (lexer, dict);
5598 e = ctpo_cat_srange (low, high);
5602 if (!lex_force_num (lexer))
5604 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5608 else if (lex_is_number (lexer))
5610 double number = lex_number (lexer);
5612 if (lex_match_id (lexer, "THRU"))
5614 if (lex_match_id (lexer, "HI"))
5615 e = ctpo_cat_nrange (number, DBL_MAX);
5618 if (!lex_force_num (lexer))
5620 e = ctpo_cat_nrange (number, lex_number (lexer));
5625 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5628 else if (lex_is_string (lexer))
5630 struct substring s = parse_substring (lexer, dict);
5632 if (lex_match_id (lexer, "THRU"))
5634 struct substring high;
5636 if (lex_match_id (lexer, "HI"))
5637 high = (struct substring) { .string = NULL };
5640 if (!lex_force_string (lexer))
5645 high = parse_substring (lexer, dict);
5648 e = ctpo_cat_srange (s, high);
5651 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5655 lex_error (lexer, NULL);
5659 if (!lex_force_match (lexer, T_RBRACK))
5661 if (e.op == CTPO_CAT_STRING)
5662 ss_dealloc (&e.string);
5663 else if (e.op == CTPO_CAT_SRANGE)
5665 ss_dealloc (&e.srange[0]);
5666 ss_dealloc (&e.srange[1]);
5671 else if (lex_match (lexer, T_LPAREN))
5673 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5676 if (!lex_force_match (lexer, T_RPAREN))
5678 ctables_pcexpr_destroy (ep);
5685 lex_error (lexer, NULL);
5689 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5690 return xmemdup (&e, sizeof e);
5693 static struct ctables_pcexpr *
5694 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5695 struct lexer *lexer, int start_ofs)
5697 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5698 *e = (struct ctables_pcexpr) {
5701 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5706 static struct ctables_pcexpr *
5707 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5709 static const struct operator op = { T_EXP, CTPO_POW };
5711 const char *chain_warning =
5712 _("The exponentiation operator (`**') is left-associative: "
5713 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5714 "To disable this warning, insert parentheses.");
5716 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5717 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5718 ctables_pcexpr_parse_primary,
5721 /* Special case for situations like "-5**6", which must be parsed as
5724 int start_ofs = lex_ofs (lexer);
5725 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5726 *lhs = (struct ctables_pcexpr) {
5727 .op = CTPO_CONSTANT,
5728 .number = -lex_tokval (lexer),
5729 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5733 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5734 lexer, dict, &op, 1,
5735 ctables_pcexpr_parse_primary, chain_warning, lhs);
5739 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5742 /* Parses the unary minus level. */
5743 static struct ctables_pcexpr *
5744 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5746 int start_ofs = lex_ofs (lexer);
5747 if (!lex_match (lexer, T_DASH))
5748 return ctables_pcexpr_parse_exp (lexer, dict);
5750 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5754 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5757 /* Parses the multiplication and division level. */
5758 static struct ctables_pcexpr *
5759 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5761 static const struct operator ops[] =
5763 { T_ASTERISK, CTPO_MUL },
5764 { T_SLASH, CTPO_DIV },
5767 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5768 sizeof ops / sizeof *ops,
5769 ctables_pcexpr_parse_neg, NULL);
5772 /* Parses the addition and subtraction level. */
5773 static struct ctables_pcexpr *
5774 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5776 static const struct operator ops[] =
5778 { T_PLUS, CTPO_ADD },
5779 { T_DASH, CTPO_SUB },
5780 { T_NEG_NUM, CTPO_ADD },
5783 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5784 ops, sizeof ops / sizeof *ops,
5785 ctables_pcexpr_parse_mul, NULL);
5788 static struct ctables_postcompute *
5789 ctables_find_postcompute (struct ctables *ct, const char *name)
5791 struct ctables_postcompute *pc;
5792 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5793 utf8_hash_case_string (name, 0), &ct->postcomputes)
5794 if (!utf8_strcasecmp (pc->name, name))
5800 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5803 int pcompute_start = lex_ofs (lexer) - 1;
5805 if (!lex_match (lexer, T_AND))
5807 lex_error_expecting (lexer, "&");
5810 if (!lex_force_id (lexer))
5813 char *name = ss_xstrdup (lex_tokss (lexer));
5816 if (!lex_force_match (lexer, T_EQUALS)
5817 || !lex_force_match_id (lexer, "EXPR")
5818 || !lex_force_match (lexer, T_LPAREN))
5824 int expr_start = lex_ofs (lexer);
5825 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5826 int expr_end = lex_ofs (lexer) - 1;
5827 if (!expr || !lex_force_match (lexer, T_RPAREN))
5829 ctables_pcexpr_destroy (expr);
5833 int pcompute_end = lex_ofs (lexer) - 1;
5835 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5838 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5841 msg_at (SW, location, _("New definition of &%s will override the "
5842 "previous definition."),
5844 msg_at (SN, pc->location, _("This is the previous definition."));
5846 ctables_pcexpr_destroy (pc->expr);
5847 msg_location_destroy (pc->location);
5852 pc = xmalloc (sizeof *pc);
5853 *pc = (struct ctables_postcompute) { .name = name };
5854 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5855 utf8_hash_case_string (pc->name, 0));
5858 pc->location = location;
5860 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5865 ctables_parse_pproperties_format (struct lexer *lexer,
5866 struct ctables_summary_spec_set *sss)
5868 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5870 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5871 && !(lex_token (lexer) == T_ID
5872 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5873 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5874 lex_tokss (lexer)))))
5876 /* Parse function. */
5877 enum ctables_summary_function function;
5878 if (!parse_ctables_summary_function (lexer, &function))
5881 /* Parse percentile. */
5882 double percentile = 0;
5883 if (function == CTSF_PTILE)
5885 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5887 percentile = lex_number (lexer);
5892 struct fmt_spec format;
5893 bool is_ctables_format;
5894 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5897 if (sss->n >= sss->allocated)
5898 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5899 sizeof *sss->specs);
5900 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5901 .function = function,
5902 .percentile = percentile,
5904 .is_ctables_format = is_ctables_format,
5910 ctables_summary_spec_set_uninit (sss);
5915 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5917 struct ctables_postcompute **pcs = NULL;
5919 size_t allocated_pcs = 0;
5921 while (lex_match (lexer, T_AND))
5923 if (!lex_force_id (lexer))
5925 struct ctables_postcompute *pc
5926 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5929 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5934 if (n_pcs >= allocated_pcs)
5935 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5939 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5941 if (lex_match_id (lexer, "LABEL"))
5943 lex_match (lexer, T_EQUALS);
5944 if (!lex_force_string (lexer))
5947 for (size_t i = 0; i < n_pcs; i++)
5949 free (pcs[i]->label);
5950 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5955 else if (lex_match_id (lexer, "FORMAT"))
5957 lex_match (lexer, T_EQUALS);
5959 struct ctables_summary_spec_set sss;
5960 if (!ctables_parse_pproperties_format (lexer, &sss))
5963 for (size_t i = 0; i < n_pcs; i++)
5966 ctables_summary_spec_set_uninit (pcs[i]->specs);
5968 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5969 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5971 ctables_summary_spec_set_uninit (&sss);
5973 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5975 lex_match (lexer, T_EQUALS);
5976 bool hide_source_cats;
5977 if (!parse_bool (lexer, &hide_source_cats))
5979 for (size_t i = 0; i < n_pcs; i++)
5980 pcs[i]->hide_source_cats = hide_source_cats;
5984 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5997 put_strftime (struct string *out, time_t now, const char *format)
5999 const struct tm *tm = localtime (&now);
6001 strftime (value, sizeof value, format, tm);
6002 ds_put_cstr (out, value);
6006 skip_prefix (struct substring *s, struct substring prefix)
6008 if (ss_starts_with (*s, prefix))
6010 ss_advance (s, prefix.length);
6018 put_table_expression (struct string *out, struct lexer *lexer,
6019 struct dictionary *dict, int expr_start, int expr_end)
6022 for (int ofs = expr_start; ofs < expr_end; ofs++)
6024 const struct token *t = lex_ofs_token (lexer, ofs);
6025 if (t->type == T_LBRACK)
6027 else if (t->type == T_RBRACK && nest > 0)
6033 else if (t->type == T_ID)
6035 const struct variable *var
6036 = dict_lookup_var (dict, t->string.string);
6037 const char *label = var ? var_get_label (var) : NULL;
6038 ds_put_cstr (out, label ? label : t->string.string);
6042 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6043 ds_put_byte (out, ' ');
6045 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6046 ds_put_cstr (out, repr);
6049 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6050 ds_put_byte (out, ' ');
6056 put_title_text (struct string *out, struct substring in, time_t now,
6057 struct lexer *lexer, struct dictionary *dict,
6058 int expr_start, int expr_end)
6062 size_t chunk = ss_find_byte (in, ')');
6063 ds_put_substring (out, ss_head (in, chunk));
6064 ss_advance (&in, chunk);
6065 if (ss_is_empty (in))
6068 if (skip_prefix (&in, ss_cstr (")DATE")))
6069 put_strftime (out, now, "%x");
6070 else if (skip_prefix (&in, ss_cstr (")TIME")))
6071 put_strftime (out, now, "%X");
6072 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6073 put_table_expression (out, lexer, dict, expr_start, expr_end);
6076 ds_put_byte (out, ')');
6077 ss_advance (&in, 1);
6083 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6085 struct casereader *input = NULL;
6087 struct measure_guesser *mg = measure_guesser_create (ds);
6090 input = proc_open (ds);
6091 measure_guesser_run (mg, input);
6092 measure_guesser_destroy (mg);
6095 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6096 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6097 enum settings_value_show tvars = settings_get_show_variables ();
6098 for (size_t i = 0; i < n_vars; i++)
6099 vlabels[i] = (enum ctables_vlabel) tvars;
6101 struct pivot_table_look *look = pivot_table_look_unshare (
6102 pivot_table_look_ref (pivot_table_look_get_default ()));
6103 look->omit_empty = false;
6105 struct ctables *ct = xmalloc (sizeof *ct);
6106 *ct = (struct ctables) {
6107 .dict = dataset_dict (ds),
6109 .ctables_formats = FMT_SETTINGS_INIT,
6111 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6114 time_t now = time (NULL);
6119 const char *dot_string;
6120 const char *comma_string;
6122 static const struct ctf ctfs[4] = {
6123 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6124 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6125 { CTEF_PAREN, "-,(,),", "-.(.)." },
6126 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6128 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6129 for (size_t i = 0; i < 4; i++)
6131 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6132 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6133 fmt_number_style_from_string (s));
6136 if (!lex_force_match (lexer, T_SLASH))
6139 while (!lex_match_id (lexer, "TABLE"))
6141 if (lex_match_id (lexer, "FORMAT"))
6143 double widths[2] = { SYSMIS, SYSMIS };
6144 double units_per_inch = 72.0;
6146 while (lex_token (lexer) != T_SLASH)
6148 if (lex_match_id (lexer, "MINCOLWIDTH"))
6150 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6153 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6155 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6158 else if (lex_match_id (lexer, "UNITS"))
6160 lex_match (lexer, T_EQUALS);
6161 if (lex_match_id (lexer, "POINTS"))
6162 units_per_inch = 72.0;
6163 else if (lex_match_id (lexer, "INCHES"))
6164 units_per_inch = 1.0;
6165 else if (lex_match_id (lexer, "CM"))
6166 units_per_inch = 2.54;
6169 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6173 else if (lex_match_id (lexer, "EMPTY"))
6178 lex_match (lexer, T_EQUALS);
6179 if (lex_match_id (lexer, "ZERO"))
6181 /* Nothing to do. */
6183 else if (lex_match_id (lexer, "BLANK"))
6184 ct->zero = xstrdup ("");
6185 else if (lex_force_string (lexer))
6187 ct->zero = ss_xstrdup (lex_tokss (lexer));
6193 else if (lex_match_id (lexer, "MISSING"))
6195 lex_match (lexer, T_EQUALS);
6196 if (!lex_force_string (lexer))
6200 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6201 ? ss_xstrdup (lex_tokss (lexer))
6207 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6208 "UNITS", "EMPTY", "MISSING");
6213 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6214 && widths[0] > widths[1])
6216 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6220 for (size_t i = 0; i < 2; i++)
6221 if (widths[i] != SYSMIS)
6223 int *wr = ct->look->width_ranges[TABLE_HORZ];
6224 wr[i] = widths[i] / units_per_inch * 96.0;
6229 else if (lex_match_id (lexer, "VLABELS"))
6231 if (!lex_force_match_id (lexer, "VARIABLES"))
6233 lex_match (lexer, T_EQUALS);
6235 struct variable **vars;
6237 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6241 if (!lex_force_match_id (lexer, "DISPLAY"))
6246 lex_match (lexer, T_EQUALS);
6248 enum ctables_vlabel vlabel;
6249 if (lex_match_id (lexer, "DEFAULT"))
6250 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6251 else if (lex_match_id (lexer, "NAME"))
6253 else if (lex_match_id (lexer, "LABEL"))
6254 vlabel = CTVL_LABEL;
6255 else if (lex_match_id (lexer, "BOTH"))
6257 else if (lex_match_id (lexer, "NONE"))
6261 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6267 for (size_t i = 0; i < n_vars; i++)
6268 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6271 else if (lex_match_id (lexer, "MRSETS"))
6273 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6275 lex_match (lexer, T_EQUALS);
6276 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6279 else if (lex_match_id (lexer, "SMISSING"))
6281 if (lex_match_id (lexer, "VARIABLE"))
6282 ct->smissing_listwise = false;
6283 else if (lex_match_id (lexer, "LISTWISE"))
6284 ct->smissing_listwise = true;
6287 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6291 else if (lex_match_id (lexer, "PCOMPUTE"))
6293 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6296 else if (lex_match_id (lexer, "PPROPERTIES"))
6298 if (!ctables_parse_pproperties (lexer, ct))
6301 else if (lex_match_id (lexer, "WEIGHT"))
6303 if (!lex_force_match_id (lexer, "VARIABLE"))
6305 lex_match (lexer, T_EQUALS);
6306 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6310 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6312 if (lex_match_id (lexer, "COUNT"))
6314 lex_match (lexer, T_EQUALS);
6315 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6318 ct->hide_threshold = lex_integer (lexer);
6321 else if (ct->hide_threshold == 0)
6322 ct->hide_threshold = 5;
6326 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6327 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6328 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6332 if (!lex_force_match (lexer, T_SLASH))
6336 size_t allocated_tables = 0;
6339 if (ct->n_tables >= allocated_tables)
6340 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6341 sizeof *ct->tables);
6343 struct ctables_category *cat = xmalloc (sizeof *cat);
6344 *cat = (struct ctables_category) {
6346 .include_missing = false,
6347 .sort_ascending = true,
6350 struct ctables_categories *c = xmalloc (sizeof *c);
6351 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6352 *c = (struct ctables_categories) {
6359 struct ctables_categories **categories = xnmalloc (n_vars,
6360 sizeof *categories);
6361 for (size_t i = 0; i < n_vars; i++)
6364 struct ctables_table *t = xmalloc (sizeof *t);
6365 *t = (struct ctables_table) {
6367 .slabels_axis = PIVOT_AXIS_COLUMN,
6368 .slabels_visible = true,
6369 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6371 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6372 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6373 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6375 .clabels_from_axis = PIVOT_AXIS_LAYER,
6376 .categories = categories,
6377 .n_categories = n_vars,
6380 ct->tables[ct->n_tables++] = t;
6382 lex_match (lexer, T_EQUALS);
6383 int expr_start = lex_ofs (lexer);
6384 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6386 if (lex_match (lexer, T_BY))
6388 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6389 ct, t, PIVOT_AXIS_COLUMN))
6392 if (lex_match (lexer, T_BY))
6394 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6395 ct, t, PIVOT_AXIS_LAYER))
6399 int expr_end = lex_ofs (lexer);
6401 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6402 && !t->axes[PIVOT_AXIS_LAYER])
6404 lex_error (lexer, _("At least one variable must be specified."));
6408 const struct ctables_axis *scales[PIVOT_N_AXES];
6409 size_t n_scales = 0;
6410 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6412 scales[a] = find_scale (t->axes[a]);
6418 msg (SE, _("Scale variables may appear only on one axis."));
6419 if (scales[PIVOT_AXIS_ROW])
6420 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6421 _("This scale variable appears on the rows axis."));
6422 if (scales[PIVOT_AXIS_COLUMN])
6423 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6424 _("This scale variable appears on the columns axis."));
6425 if (scales[PIVOT_AXIS_LAYER])
6426 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6427 _("This scale variable appears on the layer axis."));
6431 const struct ctables_axis *summaries[PIVOT_N_AXES];
6432 size_t n_summaries = 0;
6433 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6435 summaries[a] = (scales[a]
6437 : find_categorical_summary_spec (t->axes[a]));
6441 if (n_summaries > 1)
6443 msg (SE, _("Summaries may appear only on one axis."));
6444 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6447 msg_at (SN, summaries[a]->loc,
6449 ? _("This variable on the rows axis has a summary.")
6450 : a == PIVOT_AXIS_COLUMN
6451 ? _("This variable on the columns axis has a summary.")
6452 : _("This variable on the layers axis has a summary."));
6454 msg_at (SN, summaries[a]->loc,
6455 _("This is a scale variable, so it always has a "
6456 "summary even if the syntax does not explicitly "
6461 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6462 if (n_summaries ? summaries[a] : t->axes[a])
6464 t->summary_axis = a;
6468 if (lex_token (lexer) == T_ENDCMD)
6470 if (!ctables_prepare_table (t))
6474 if (!lex_force_match (lexer, T_SLASH))
6477 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6479 if (lex_match_id (lexer, "SLABELS"))
6481 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6483 if (lex_match_id (lexer, "POSITION"))
6485 lex_match (lexer, T_EQUALS);
6486 if (lex_match_id (lexer, "COLUMN"))
6487 t->slabels_axis = PIVOT_AXIS_COLUMN;
6488 else if (lex_match_id (lexer, "ROW"))
6489 t->slabels_axis = PIVOT_AXIS_ROW;
6490 else if (lex_match_id (lexer, "LAYER"))
6491 t->slabels_axis = PIVOT_AXIS_LAYER;
6494 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6498 else if (lex_match_id (lexer, "VISIBLE"))
6500 lex_match (lexer, T_EQUALS);
6501 if (!parse_bool (lexer, &t->slabels_visible))
6506 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6511 else if (lex_match_id (lexer, "CLABELS"))
6513 if (lex_match_id (lexer, "AUTO"))
6515 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6516 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6518 else if (lex_match_id (lexer, "ROWLABELS"))
6520 lex_match (lexer, T_EQUALS);
6521 if (lex_match_id (lexer, "OPPOSITE"))
6522 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6523 else if (lex_match_id (lexer, "LAYER"))
6524 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6527 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6531 else if (lex_match_id (lexer, "COLLABELS"))
6533 lex_match (lexer, T_EQUALS);
6534 if (lex_match_id (lexer, "OPPOSITE"))
6535 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6536 else if (lex_match_id (lexer, "LAYER"))
6537 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6540 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6546 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6551 else if (lex_match_id (lexer, "CRITERIA"))
6553 if (!lex_force_match_id (lexer, "CILEVEL"))
6555 lex_match (lexer, T_EQUALS);
6557 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6559 t->cilevel = lex_number (lexer);
6562 else if (lex_match_id (lexer, "CATEGORIES"))
6564 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6568 else if (lex_match_id (lexer, "TITLES"))
6573 if (lex_match_id (lexer, "CAPTION"))
6574 textp = &t->caption;
6575 else if (lex_match_id (lexer, "CORNER"))
6577 else if (lex_match_id (lexer, "TITLE"))
6581 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6584 lex_match (lexer, T_EQUALS);
6586 struct string s = DS_EMPTY_INITIALIZER;
6587 while (lex_is_string (lexer))
6589 if (!ds_is_empty (&s))
6590 ds_put_byte (&s, ' ');
6591 put_title_text (&s, lex_tokss (lexer), now,
6592 lexer, dataset_dict (ds),
6593 expr_start, expr_end);
6597 *textp = ds_steal_cstr (&s);
6599 while (lex_token (lexer) != T_SLASH
6600 && lex_token (lexer) != T_ENDCMD);
6602 else if (lex_match_id (lexer, "SIGTEST"))
6606 t->chisq = xmalloc (sizeof *t->chisq);
6607 *t->chisq = (struct ctables_chisq) {
6609 .include_mrsets = true,
6610 .all_visible = true,
6616 if (lex_match_id (lexer, "TYPE"))
6618 lex_match (lexer, T_EQUALS);
6619 if (!lex_force_match_id (lexer, "CHISQUARE"))
6622 else if (lex_match_id (lexer, "ALPHA"))
6624 lex_match (lexer, T_EQUALS);
6625 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6627 t->chisq->alpha = lex_number (lexer);
6630 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6632 lex_match (lexer, T_EQUALS);
6633 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6636 else if (lex_match_id (lexer, "CATEGORIES"))
6638 lex_match (lexer, T_EQUALS);
6639 if (lex_match_id (lexer, "ALLVISIBLE"))
6640 t->chisq->all_visible = true;
6641 else if (lex_match_id (lexer, "SUBTOTALS"))
6642 t->chisq->all_visible = false;
6645 lex_error_expecting (lexer,
6646 "ALLVISIBLE", "SUBTOTALS");
6652 lex_error_expecting (lexer, "TYPE", "ALPHA",
6653 "INCLUDEMRSETS", "CATEGORIES");
6657 while (lex_token (lexer) != T_SLASH
6658 && lex_token (lexer) != T_ENDCMD);
6660 else if (lex_match_id (lexer, "COMPARETEST"))
6664 t->pairwise = xmalloc (sizeof *t->pairwise);
6665 *t->pairwise = (struct ctables_pairwise) {
6667 .alpha = { .05, .05 },
6668 .adjust = BONFERRONI,
6669 .include_mrsets = true,
6670 .meansvariance_allcats = true,
6671 .all_visible = true,
6680 if (lex_match_id (lexer, "TYPE"))
6682 lex_match (lexer, T_EQUALS);
6683 if (lex_match_id (lexer, "PROP"))
6684 t->pairwise->type = PROP;
6685 else if (lex_match_id (lexer, "MEAN"))
6686 t->pairwise->type = MEAN;
6689 lex_error_expecting (lexer, "PROP", "MEAN");
6693 else if (lex_match_id (lexer, "ALPHA"))
6695 lex_match (lexer, T_EQUALS);
6697 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6699 double a0 = lex_number (lexer);
6702 lex_match (lexer, T_COMMA);
6703 if (lex_is_number (lexer))
6705 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6707 double a1 = lex_number (lexer);
6710 t->pairwise->alpha[0] = MIN (a0, a1);
6711 t->pairwise->alpha[1] = MAX (a0, a1);
6714 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6716 else if (lex_match_id (lexer, "ADJUST"))
6718 lex_match (lexer, T_EQUALS);
6719 if (lex_match_id (lexer, "BONFERRONI"))
6720 t->pairwise->adjust = BONFERRONI;
6721 else if (lex_match_id (lexer, "BH"))
6722 t->pairwise->adjust = BH;
6723 else if (lex_match_id (lexer, "NONE"))
6724 t->pairwise->adjust = 0;
6727 lex_error_expecting (lexer, "BONFERRONI", "BH",
6732 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6734 lex_match (lexer, T_EQUALS);
6735 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6738 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6740 lex_match (lexer, T_EQUALS);
6741 if (lex_match_id (lexer, "ALLCATS"))
6742 t->pairwise->meansvariance_allcats = true;
6743 else if (lex_match_id (lexer, "TESTEDCATS"))
6744 t->pairwise->meansvariance_allcats = false;
6747 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6751 else if (lex_match_id (lexer, "CATEGORIES"))
6753 lex_match (lexer, T_EQUALS);
6754 if (lex_match_id (lexer, "ALLVISIBLE"))
6755 t->pairwise->all_visible = true;
6756 else if (lex_match_id (lexer, "SUBTOTALS"))
6757 t->pairwise->all_visible = false;
6760 lex_error_expecting (lexer, "ALLVISIBLE",
6765 else if (lex_match_id (lexer, "MERGE"))
6767 lex_match (lexer, T_EQUALS);
6768 if (!parse_bool (lexer, &t->pairwise->merge))
6771 else if (lex_match_id (lexer, "STYLE"))
6773 lex_match (lexer, T_EQUALS);
6774 if (lex_match_id (lexer, "APA"))
6775 t->pairwise->apa_style = true;
6776 else if (lex_match_id (lexer, "SIMPLE"))
6777 t->pairwise->apa_style = false;
6780 lex_error_expecting (lexer, "APA", "SIMPLE");
6784 else if (lex_match_id (lexer, "SHOWSIG"))
6786 lex_match (lexer, T_EQUALS);
6787 if (!parse_bool (lexer, &t->pairwise->show_sig))
6792 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6793 "INCLUDEMRSETS", "MEANSVARIANCE",
6794 "CATEGORIES", "MERGE", "STYLE",
6799 while (lex_token (lexer) != T_SLASH
6800 && lex_token (lexer) != T_ENDCMD);
6804 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6805 "CRITERIA", "CATEGORIES", "TITLES",
6806 "SIGTEST", "COMPARETEST");
6810 if (!lex_match (lexer, T_SLASH))
6814 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6815 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6817 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6821 if (!ctables_prepare_table (t))
6824 while (lex_token (lexer) != T_ENDCMD);
6827 input = proc_open (ds);
6828 bool ok = ctables_execute (ds, input, ct);
6829 ok = proc_commit (ds) && ok;
6831 ctables_destroy (ct);
6832 return ok ? CMD_SUCCESS : CMD_FAILURE;
6837 ctables_destroy (ct);