1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 enum ctables_summary_function
75 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
76 #include "ctables.inc"
81 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
83 #include "ctables.inc"
87 static bool ctables_summary_function_is_count (enum ctables_summary_function);
89 enum ctables_domain_type
91 /* Within a section, where stacked variables divide one section from
93 CTDT_TABLE, /* All layers of a whole section. */
94 CTDT_LAYER, /* One layer within a section. */
95 CTDT_LAYERROW, /* Row in one layer within a section. */
96 CTDT_LAYERCOL, /* Column in one layer within a section. */
98 /* Within a subtable, where a subtable pairs an innermost row variable with
99 an innermost column variable within a single layer. */
100 CTDT_SUBTABLE, /* Whole subtable. */
101 CTDT_ROW, /* Row within a subtable. */
102 CTDT_COL, /* Column within a subtable. */
106 struct ctables_domain
108 struct hmap_node node;
110 const struct ctables_cell *example;
113 double d_valid; /* Dictionary weight. */
116 double e_valid; /* Effective weight */
119 double u_valid; /* Unweighted. */
122 struct ctables_sum *sums;
131 enum ctables_summary_variant
140 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
141 all the axes (except the scalar variable, if any). */
142 struct hmap_node node;
144 /* The domains that contain this cell. */
145 uint32_t omit_domains;
146 struct ctables_domain *domains[N_CTDTS];
151 enum ctables_summary_variant sv;
153 struct ctables_cell_axis
155 struct ctables_cell_value
157 const struct ctables_category *category;
165 union ctables_summary *summaries;
172 const struct dictionary *dict;
173 struct pivot_table_look *look;
175 /* CTABLES has a number of extra formats that we implement via custom
176 currency specifications on an alternate fmt_settings. */
177 #define CTEF_NEGPAREN FMT_CCA
178 #define CTEF_NEQUAL FMT_CCB
179 #define CTEF_PAREN FMT_CCC
180 #define CTEF_PCTPAREN FMT_CCD
181 struct fmt_settings ctables_formats;
183 /* If this is NULL, zeros are displayed using the normal print format.
184 Otherwise, this string is displayed. */
187 /* If this is NULL, missing values are displayed using the normal print
188 format. Otherwise, this string is displayed. */
191 /* Indexed by variable dictionary index. */
192 enum ctables_vlabel *vlabels;
194 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
196 bool mrsets_count_duplicates; /* MRSETS. */
197 bool smissing_listwise; /* SMISSING. */
198 struct variable *e_weight; /* WEIGHT. */
199 int hide_threshold; /* HIDESMALLCOUNTS. */
201 struct ctables_table **tables;
205 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
208 struct ctables_postcompute
210 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
211 char *name; /* Name, without leading &. */
213 struct msg_location *location; /* Location of definition. */
214 struct ctables_pcexpr *expr;
216 struct ctables_summary_spec_set *specs;
217 bool hide_source_cats;
220 struct ctables_pcexpr
230 enum ctables_postcompute_op
233 CTPO_CONSTANT, /* 5 */
234 CTPO_CAT_NUMBER, /* [5] */
235 CTPO_CAT_STRING, /* ["STRING"] */
236 CTPO_CAT_NRANGE, /* [LO THRU 5] */
237 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
238 CTPO_CAT_MISSING, /* MISSING */
239 CTPO_CAT_OTHERNM, /* OTHERNM */
240 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
241 CTPO_CAT_TOTAL, /* TOTAL */
255 /* CTPO_CAT_NUMBER. */
258 /* CTPO_CAT_STRING, in dictionary encoding. */
259 struct substring string;
261 /* CTPO_CAT_NRANGE. */
264 /* CTPO_CAT_SRANGE. */
265 struct substring srange[2];
267 /* CTPO_CAT_SUBTOTAL. */
268 size_t subtotal_index;
270 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
271 One element: CTPO_NEG. */
272 struct ctables_pcexpr *subs[2];
275 /* Source location. */
276 struct msg_location *location;
279 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
280 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
281 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
282 struct ctables_pcexpr *sub1);
284 struct ctables_summary_spec_set
286 struct ctables_summary_spec *specs;
290 /* The variable to which the summary specs are applied. */
291 struct variable *var;
293 /* Whether the variable to which the summary specs are applied is a scale
294 variable for the purpose of summarization.
296 (VALIDN and TOTALN act differently for summarizing scale and categorical
300 /* If any of these optional additional scale variables are missing, then
301 treat 'var' as if it's missing too. This is for implementing
302 SMISSING=LISTWISE. */
303 struct variable **listwise_vars;
304 size_t n_listwise_vars;
307 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
308 const struct ctables_summary_spec_set *);
309 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
311 /* A nested sequence of variables, e.g. a > b > c. */
314 struct variable **vars;
317 size_t *domains[N_CTDTS];
318 size_t n_domains[N_CTDTS];
321 struct ctables_summary_spec_set specs[N_CSVS];
324 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
327 struct ctables_nest *nests;
331 static void ctables_stack_uninit (struct ctables_stack *);
335 struct hmap_node node;
340 struct ctables_occurrence
342 struct hmap_node node;
346 struct ctables_section
349 struct ctables_table *table;
350 struct ctables_nest *nests[PIVOT_N_AXES];
353 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
354 struct hmap cells; /* Contains "struct ctables_cell"s. */
355 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
358 static void ctables_section_uninit (struct ctables_section *);
362 struct ctables *ctables;
363 struct ctables_axis *axes[PIVOT_N_AXES];
364 struct ctables_stack stacks[PIVOT_N_AXES];
365 struct ctables_section *sections;
367 enum pivot_axis_type summary_axis;
368 struct ctables_summary_spec_set summary_specs;
369 struct variable **sum_vars;
372 enum pivot_axis_type slabels_axis;
373 bool slabels_visible;
375 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
377 Most commonly, label_axis[a] == a, and in particular we always have
378 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
380 If ROWLABELS or COLLABELS is specified, then one of
381 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
382 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
384 If any category labels are moved, then 'clabels_example' is one of the
385 variables being moved (and it is otherwise NULL). All of the variables
386 being moved have the same width, value labels, and categories, so this
387 example variable can be used to find those out.
389 The remaining members in this group are relevant only if category labels
392 'clabels_values_map' holds a "struct ctables_value" for all the values
393 that appear in all of the variables in the moved categories. It is
394 accumulated as the data is read. Once the data is fully read, its
395 sorted values are put into 'clabels_values' and 'n_clabels_values'.
397 enum pivot_axis_type label_axis[PIVOT_N_AXES];
398 enum pivot_axis_type clabels_from_axis;
399 const struct variable *clabels_example;
400 struct hmap clabels_values_map;
401 struct ctables_value **clabels_values;
402 size_t n_clabels_values;
404 /* Indexed by variable dictionary index. */
405 struct ctables_categories **categories;
414 struct ctables_chisq *chisq;
415 struct ctables_pairwise *pairwise;
418 struct ctables_categories
421 struct ctables_category *cats;
426 struct ctables_category
428 enum ctables_category_type
430 /* Explicit category lists. */
433 CCT_NRANGE, /* Numerical range. */
434 CCT_SRANGE, /* String range. */
439 /* Totals and subtotals. */
443 /* Implicit category lists. */
448 /* For contributing to TOTALN. */
449 CCT_EXCLUDED_MISSING,
453 struct ctables_category *subtotal;
459 double number; /* CCT_NUMBER. */
460 struct substring string; /* CCT_STRING, in dictionary encoding. */
461 double nrange[2]; /* CCT_NRANGE. */
462 struct substring srange[2]; /* CCT_SRANGE. */
466 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
467 bool hide_subcategories; /* CCT_SUBTOTAL. */
470 /* CCT_POSTCOMPUTE. */
473 const struct ctables_postcompute *pc;
474 enum fmt_type parse_format;
477 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
480 bool include_missing;
484 enum ctables_summary_function sort_function;
485 struct variable *sort_var;
490 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
491 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
492 struct msg_location *location;
496 ctables_category_uninit (struct ctables_category *cat)
501 msg_location_destroy (cat->location);
508 case CCT_POSTCOMPUTE:
512 ss_dealloc (&cat->string);
516 ss_dealloc (&cat->srange[0]);
517 ss_dealloc (&cat->srange[1]);
522 free (cat->total_label);
530 case CCT_EXCLUDED_MISSING:
536 nullable_substring_equal (const struct substring *a,
537 const struct substring *b)
539 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
543 ctables_category_equal (const struct ctables_category *a,
544 const struct ctables_category *b)
546 if (a->type != b->type)
552 return a->number == b->number;
555 return ss_equals (a->string, b->string);
558 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
561 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
562 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
568 case CCT_POSTCOMPUTE:
569 return a->pc == b->pc;
573 return !strcmp (a->total_label, b->total_label);
578 return (a->include_missing == b->include_missing
579 && a->sort_ascending == b->sort_ascending
580 && a->sort_function == b->sort_function
581 && a->sort_var == b->sort_var
582 && a->percentile == b->percentile);
584 case CCT_EXCLUDED_MISSING:
592 ctables_categories_unref (struct ctables_categories *c)
597 assert (c->n_refs > 0);
601 for (size_t i = 0; i < c->n_cats; i++)
602 ctables_category_uninit (&c->cats[i]);
608 ctables_categories_equal (const struct ctables_categories *a,
609 const struct ctables_categories *b)
611 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
614 for (size_t i = 0; i < a->n_cats; i++)
615 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
621 /* Chi-square test (SIGTEST). */
629 /* Pairwise comparison test (COMPARETEST). */
630 struct ctables_pairwise
632 enum { PROP, MEAN } type;
635 bool meansvariance_allcats;
637 enum { BONFERRONI = 1, BH } adjust;
661 struct variable *var;
663 struct ctables_summary_spec_set specs[N_CSVS];
667 struct ctables_axis *subs[2];
670 struct msg_location *loc;
673 static void ctables_axis_destroy (struct ctables_axis *);
682 enum ctables_function_availability
684 CTFA_ALL, /* Any variables. */
685 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
686 //CTFA_MRSETS, /* Only multiple-response sets */
689 struct ctables_summary_spec
691 enum ctables_summary_function function;
692 double percentile; /* CTSF_PTILE only. */
695 struct fmt_spec format;
696 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
703 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
704 const struct ctables_summary_spec *src)
707 dst->label = xstrdup_if_nonnull (src->label);
711 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
718 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
719 const struct ctables_summary_spec_set *src)
721 struct ctables_summary_spec *specs
722 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
723 for (size_t i = 0; i < src->n; i++)
724 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
726 *dst = (struct ctables_summary_spec_set) {
731 .is_scale = src->is_scale,
736 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
738 for (size_t i = 0; i < set->n; i++)
739 ctables_summary_spec_uninit (&set->specs[i]);
740 free (set->listwise_vars);
745 parse_col_width (struct lexer *lexer, const char *name, double *width)
747 lex_match (lexer, T_EQUALS);
748 if (lex_match_id (lexer, "DEFAULT"))
750 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
752 *width = lex_number (lexer);
762 parse_bool (struct lexer *lexer, bool *b)
764 if (lex_match_id (lexer, "NO"))
766 else if (lex_match_id (lexer, "YES"))
770 lex_error_expecting (lexer, "YES", "NO");
776 static enum ctables_function_availability
777 ctables_function_availability (enum ctables_summary_function f)
779 static enum ctables_function_availability availability[] = {
780 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
781 #include "ctables.inc"
785 return availability[f];
789 ctables_summary_function_is_count (enum ctables_summary_function f)
791 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
795 parse_ctables_summary_function (struct lexer *lexer,
796 enum ctables_summary_function *f)
800 enum ctables_summary_function function;
801 struct substring name;
803 static struct pair names[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
805 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
806 #include "ctables.inc"
807 /* The .COUNT suffix may be omitted. */
808 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
809 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
810 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
811 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
812 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
813 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
814 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
818 if (!lex_force_id (lexer))
821 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
822 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
824 *f = names[i].function;
829 lex_error (lexer, _("Expecting summary function name."));
834 ctables_axis_destroy (struct ctables_axis *axis)
842 for (size_t i = 0; i < N_CSVS; i++)
843 ctables_summary_spec_set_uninit (&axis->specs[i]);
848 ctables_axis_destroy (axis->subs[0]);
849 ctables_axis_destroy (axis->subs[1]);
852 msg_location_destroy (axis->loc);
856 static struct ctables_axis *
857 ctables_axis_new_nonterminal (enum ctables_axis_op op,
858 struct ctables_axis *sub0,
859 struct ctables_axis *sub1,
860 struct lexer *lexer, int start_ofs)
862 struct ctables_axis *axis = xmalloc (sizeof *axis);
863 *axis = (struct ctables_axis) {
865 .subs = { sub0, sub1 },
866 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
871 struct ctables_axis_parse_ctx
874 struct dictionary *dict;
876 struct ctables_table *t;
879 static struct fmt_spec
880 ctables_summary_default_format (enum ctables_summary_function function,
881 const struct variable *var)
883 static const enum ctables_format default_formats[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
885 #include "ctables.inc"
888 switch (default_formats[function])
891 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
894 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
897 return *var_get_print_format (var);
904 static struct pivot_value *
905 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
909 static const char *default_labels[] = {
910 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
911 #include "ctables.inc"
915 return (spec->function == CTSF_PTILE
916 ? pivot_value_new_text_format (N_("Percentile %.2f"),
918 : pivot_value_new_text (default_labels[spec->function]));
922 struct substring in = ss_cstr (spec->label);
923 struct substring target = ss_cstr (")CILEVEL");
925 struct string out = DS_EMPTY_INITIALIZER;
928 size_t chunk = ss_find_substring (in, target);
929 ds_put_substring (&out, ss_head (in, chunk));
930 ss_advance (&in, chunk);
932 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
934 ss_advance (&in, target.length);
935 ds_put_format (&out, "%g", cilevel);
941 ctables_summary_function_name (enum ctables_summary_function function)
943 static const char *names[] = {
944 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
945 #include "ctables.inc"
948 return names[function];
952 add_summary_spec (struct ctables_axis *axis,
953 enum ctables_summary_function function, double percentile,
954 const char *label, const struct fmt_spec *format,
955 bool is_ctables_format, const struct msg_location *loc,
956 enum ctables_summary_variant sv)
958 if (axis->op == CTAO_VAR)
960 const char *function_name = ctables_summary_function_name (function);
961 const char *var_name = var_get_name (axis->var);
962 switch (ctables_function_availability (function))
966 msg_at (SE, loc, _("Summary function %s applies only to multiple "
967 "response sets."), function_name);
968 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
974 if (!axis->scale && sv != CSV_TOTAL)
977 _("Summary function %s applies only to scale variables."),
979 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
989 struct ctables_summary_spec_set *set = &axis->specs[sv];
990 if (set->n >= set->allocated)
991 set->specs = x2nrealloc (set->specs, &set->allocated,
994 struct ctables_summary_spec *dst = &set->specs[set->n++];
995 *dst = (struct ctables_summary_spec) {
996 .function = function,
997 .percentile = percentile,
998 .label = xstrdup_if_nonnull (label),
999 .format = (format ? *format
1000 : ctables_summary_default_format (function, axis->var)),
1001 .is_ctables_format = is_ctables_format,
1007 for (size_t i = 0; i < 2; i++)
1008 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1009 format, is_ctables_format, loc, sv))
1015 static struct ctables_axis *ctables_axis_parse_stack (
1016 struct ctables_axis_parse_ctx *);
1019 static struct ctables_axis *
1020 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1022 if (lex_match (ctx->lexer, T_LPAREN))
1024 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1025 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1027 ctables_axis_destroy (sub);
1033 if (!lex_force_id (ctx->lexer))
1036 int start_ofs = lex_ofs (ctx->lexer);
1037 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1041 struct ctables_axis *axis = xmalloc (sizeof *axis);
1042 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1044 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1045 : lex_match_phrase (ctx->lexer, "[C]") ? false
1046 : var_get_measure (var) == MEASURE_SCALE);
1047 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1048 lex_ofs (ctx->lexer) - 1);
1049 if (axis->scale && var_is_alpha (var))
1051 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1053 var_get_name (var));
1054 ctables_axis_destroy (axis);
1062 has_digit (const char *s)
1064 return s[strcspn (s, "0123456789")] != '\0';
1068 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1069 bool *is_ctables_format)
1071 char type[FMT_TYPE_LEN_MAX + 1];
1072 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1075 if (!strcasecmp (type, "NEGPAREN"))
1076 format->type = CTEF_NEGPAREN;
1077 else if (!strcasecmp (type, "NEQUAL"))
1078 format->type = CTEF_NEQUAL;
1079 else if (!strcasecmp (type, "PAREN"))
1080 format->type = CTEF_PAREN;
1081 else if (!strcasecmp (type, "PCTPAREN"))
1082 format->type = CTEF_PCTPAREN;
1085 *is_ctables_format = false;
1086 return (parse_format_specifier (lexer, format)
1087 && fmt_check_output (format)
1088 && fmt_check_type_compat (format, VAL_NUMERIC));
1094 lex_next_error (lexer, -1, -1,
1095 _("Output format %s requires width 2 or greater."), type);
1098 else if (format->d > format->w - 1)
1100 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1101 "greater than decimals."), type);
1106 *is_ctables_format = true;
1111 static struct ctables_axis *
1112 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1114 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1115 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1118 enum ctables_summary_variant sv = CSV_CELL;
1121 int start_ofs = lex_ofs (ctx->lexer);
1123 /* Parse function. */
1124 enum ctables_summary_function function;
1125 if (!parse_ctables_summary_function (ctx->lexer, &function))
1128 /* Parse percentile. */
1129 double percentile = 0;
1130 if (function == CTSF_PTILE)
1132 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1134 percentile = lex_number (ctx->lexer);
1135 lex_get (ctx->lexer);
1140 if (lex_is_string (ctx->lexer))
1142 label = ss_xstrdup (lex_tokss (ctx->lexer));
1143 lex_get (ctx->lexer);
1147 struct fmt_spec format;
1148 const struct fmt_spec *formatp;
1149 bool is_ctables_format = false;
1150 if (lex_token (ctx->lexer) == T_ID
1151 && has_digit (lex_tokcstr (ctx->lexer)))
1153 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1154 &is_ctables_format))
1164 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1165 lex_ofs (ctx->lexer) - 1);
1166 add_summary_spec (sub, function, percentile, label, formatp,
1167 is_ctables_format, loc, sv);
1169 msg_location_destroy (loc);
1171 lex_match (ctx->lexer, T_COMMA);
1172 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1174 if (!lex_force_match (ctx->lexer, T_LBRACK))
1178 else if (lex_match (ctx->lexer, T_RBRACK))
1180 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1187 ctables_axis_destroy (sub);
1191 static const struct ctables_axis *
1192 find_scale (const struct ctables_axis *axis)
1196 else if (axis->op == CTAO_VAR)
1197 return axis->scale ? axis : NULL;
1200 for (size_t i = 0; i < 2; i++)
1202 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1210 static const struct ctables_axis *
1211 find_categorical_summary_spec (const struct ctables_axis *axis)
1215 else if (axis->op == CTAO_VAR)
1216 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1219 for (size_t i = 0; i < 2; i++)
1221 const struct ctables_axis *sum
1222 = find_categorical_summary_spec (axis->subs[i]);
1230 static struct ctables_axis *
1231 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1233 int start_ofs = lex_ofs (ctx->lexer);
1234 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1238 while (lex_match (ctx->lexer, T_GT))
1240 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1243 ctables_axis_destroy (lhs);
1247 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1248 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1250 const struct ctables_axis *outer_scale = find_scale (lhs);
1251 const struct ctables_axis *inner_scale = find_scale (rhs);
1252 if (outer_scale && inner_scale)
1254 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1255 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1256 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1257 ctables_axis_destroy (nest);
1261 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1264 msg_at (SE, nest->loc,
1265 _("Summaries may only be requested for categorical variables "
1266 "at the innermost nesting level."));
1267 msg_at (SN, outer_sum->loc,
1268 _("This outer categorical variable has a summary."));
1269 ctables_axis_destroy (nest);
1279 static struct ctables_axis *
1280 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1282 int start_ofs = lex_ofs (ctx->lexer);
1283 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1287 while (lex_match (ctx->lexer, T_PLUS))
1289 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1292 ctables_axis_destroy (lhs);
1296 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1297 ctx->lexer, start_ofs);
1304 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1305 struct ctables *ct, struct ctables_table *t,
1306 enum pivot_axis_type a)
1308 if (lex_token (lexer) == T_BY
1309 || lex_token (lexer) == T_SLASH
1310 || lex_token (lexer) == T_ENDCMD)
1313 struct ctables_axis_parse_ctx ctx = {
1319 t->axes[a] = ctables_axis_parse_stack (&ctx);
1320 return t->axes[a] != NULL;
1324 ctables_chisq_destroy (struct ctables_chisq *chisq)
1330 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1336 ctables_table_destroy (struct ctables_table *t)
1341 for (size_t i = 0; i < t->n_sections; i++)
1342 ctables_section_uninit (&t->sections[i]);
1345 for (size_t i = 0; i < t->n_categories; i++)
1346 ctables_categories_unref (t->categories[i]);
1347 free (t->categories);
1349 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1351 ctables_axis_destroy (t->axes[a]);
1352 ctables_stack_uninit (&t->stacks[a]);
1354 free (t->summary_specs.specs);
1356 struct ctables_value *ctv, *next_ctv;
1357 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1358 &t->clabels_values_map)
1360 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1361 hmap_delete (&t->clabels_values_map, &ctv->node);
1364 hmap_destroy (&t->clabels_values_map);
1365 free (t->clabels_values);
1371 ctables_chisq_destroy (t->chisq);
1372 ctables_pairwise_destroy (t->pairwise);
1377 ctables_destroy (struct ctables *ct)
1382 struct ctables_postcompute *pc, *next_pc;
1383 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1387 msg_location_destroy (pc->location);
1388 ctables_pcexpr_destroy (pc->expr);
1392 ctables_summary_spec_set_uninit (pc->specs);
1395 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1399 fmt_settings_uninit (&ct->ctables_formats);
1400 pivot_table_look_unref (ct->look);
1404 for (size_t i = 0; i < ct->n_tables; i++)
1405 ctables_table_destroy (ct->tables[i]);
1410 static struct ctables_category
1411 cct_nrange (double low, double high)
1413 return (struct ctables_category) {
1415 .nrange = { low, high }
1419 static struct ctables_category
1420 cct_srange (struct substring low, struct substring high)
1422 return (struct ctables_category) {
1424 .srange = { low, high }
1429 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1430 struct ctables_category *cat)
1433 if (lex_match (lexer, T_EQUALS))
1435 if (!lex_force_string (lexer))
1438 total_label = ss_xstrdup (lex_tokss (lexer));
1442 total_label = xstrdup (_("Subtotal"));
1444 *cat = (struct ctables_category) {
1445 .type = CCT_SUBTOTAL,
1446 .hide_subcategories = hide_subcategories,
1447 .total_label = total_label
1452 static struct substring
1453 parse_substring (struct lexer *lexer, struct dictionary *dict)
1455 struct substring s = recode_substring_pool (
1456 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1457 ss_rtrim (&s, ss_cstr (" "));
1463 ctables_table_parse_explicit_category (struct lexer *lexer,
1464 struct dictionary *dict,
1466 struct ctables_category *cat)
1468 if (lex_match_id (lexer, "OTHERNM"))
1469 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1470 else if (lex_match_id (lexer, "MISSING"))
1471 *cat = (struct ctables_category) { .type = CCT_MISSING };
1472 else if (lex_match_id (lexer, "SUBTOTAL"))
1473 return ctables_table_parse_subtotal (lexer, false, cat);
1474 else if (lex_match_id (lexer, "HSUBTOTAL"))
1475 return ctables_table_parse_subtotal (lexer, true, cat);
1476 else if (lex_match_id (lexer, "LO"))
1478 if (!lex_force_match_id (lexer, "THRU"))
1480 if (lex_is_string (lexer))
1482 struct substring sr0 = { .string = NULL };
1483 struct substring sr1 = parse_substring (lexer, dict);
1484 *cat = cct_srange (sr0, sr1);
1486 else if (lex_force_num (lexer))
1488 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1494 else if (lex_is_number (lexer))
1496 double number = lex_number (lexer);
1498 if (lex_match_id (lexer, "THRU"))
1500 if (lex_match_id (lexer, "HI"))
1501 *cat = cct_nrange (number, DBL_MAX);
1504 if (!lex_force_num (lexer))
1506 *cat = cct_nrange (number, lex_number (lexer));
1511 *cat = (struct ctables_category) {
1516 else if (lex_is_string (lexer))
1518 struct substring s = parse_substring (lexer, dict);
1519 if (lex_match_id (lexer, "THRU"))
1521 if (lex_match_id (lexer, "HI"))
1523 struct substring sr1 = { .string = NULL };
1524 *cat = cct_srange (s, sr1);
1528 if (!lex_force_string (lexer))
1533 struct substring sr1 = parse_substring (lexer, dict);
1534 *cat = cct_srange (s, sr1);
1538 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1540 else if (lex_match (lexer, T_AND))
1542 if (!lex_force_id (lexer))
1544 struct ctables_postcompute *pc = ctables_find_postcompute (
1545 ct, lex_tokcstr (lexer));
1548 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1549 msg_at (SE, loc, _("Unknown postcompute &%s."),
1550 lex_tokcstr (lexer));
1551 msg_location_destroy (loc);
1556 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1560 lex_error (lexer, NULL);
1568 parse_category_string (struct msg_location *location,
1569 struct substring s, const struct dictionary *dict,
1570 enum fmt_type format, double *n)
1573 char *error = data_in (s, dict_get_encoding (dict), format,
1574 settings_get_fmt_settings (), &v, 0, NULL);
1577 msg_at (SE, location,
1578 _("Failed to parse category specification as format %s: %s."),
1579 fmt_name (format), error);
1588 static struct ctables_category *
1589 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1590 const struct ctables_pcexpr *e)
1592 struct ctables_category *best = NULL;
1593 size_t n_subtotals = 0;
1594 for (size_t i = 0; i < cats->n_cats; i++)
1596 struct ctables_category *cat = &cats->cats[i];
1599 case CTPO_CAT_NUMBER:
1600 if (cat->type == CCT_NUMBER && cat->number == e->number)
1604 case CTPO_CAT_STRING:
1605 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1609 case CTPO_CAT_NRANGE:
1610 if (cat->type == CCT_NRANGE
1611 && cat->nrange[0] == e->nrange[0]
1612 && cat->nrange[1] == e->nrange[1])
1616 case CTPO_CAT_SRANGE:
1617 if (cat->type == CCT_SRANGE
1618 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1619 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1623 case CTPO_CAT_MISSING:
1624 if (cat->type == CCT_MISSING)
1628 case CTPO_CAT_OTHERNM:
1629 if (cat->type == CCT_OTHERNM)
1633 case CTPO_CAT_SUBTOTAL:
1634 if (cat->type == CCT_SUBTOTAL)
1637 if (e->subtotal_index == n_subtotals)
1639 else if (e->subtotal_index == 0)
1644 case CTPO_CAT_TOTAL:
1645 if (cat->type == CCT_TOTAL)
1659 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1664 static struct ctables_category *
1665 ctables_find_category_for_postcompute (const struct dictionary *dict,
1666 const struct ctables_categories *cats,
1667 enum fmt_type parse_format,
1668 const struct ctables_pcexpr *e)
1670 if (parse_format != FMT_F)
1672 if (e->op == CTPO_CAT_STRING)
1675 if (!parse_category_string (e->location, e->string, dict,
1676 parse_format, &number))
1679 struct ctables_pcexpr e2 = {
1680 .op = CTPO_CAT_NUMBER,
1682 .location = e->location,
1684 return ctables_find_category_for_postcompute__ (cats, &e2);
1686 else if (e->op == CTPO_CAT_SRANGE)
1689 if (!e->srange[0].string)
1690 nrange[0] = -DBL_MAX;
1691 else if (!parse_category_string (e->location, e->srange[0], dict,
1692 parse_format, &nrange[0]))
1695 if (!e->srange[1].string)
1696 nrange[1] = DBL_MAX;
1697 else if (!parse_category_string (e->location, e->srange[1], dict,
1698 parse_format, &nrange[1]))
1701 struct ctables_pcexpr e2 = {
1702 .op = CTPO_CAT_NRANGE,
1703 .nrange = { nrange[0], nrange[1] },
1704 .location = e->location,
1706 return ctables_find_category_for_postcompute__ (cats, &e2);
1709 return ctables_find_category_for_postcompute__ (cats, e);
1713 ctables_recursive_check_postcompute (struct dictionary *dict,
1714 const struct ctables_pcexpr *e,
1715 struct ctables_category *pc_cat,
1716 const struct ctables_categories *cats,
1717 const struct msg_location *cats_location)
1721 case CTPO_CAT_NUMBER:
1722 case CTPO_CAT_STRING:
1723 case CTPO_CAT_NRANGE:
1724 case CTPO_CAT_SRANGE:
1725 case CTPO_CAT_MISSING:
1726 case CTPO_CAT_OTHERNM:
1727 case CTPO_CAT_SUBTOTAL:
1728 case CTPO_CAT_TOTAL:
1730 struct ctables_category *cat = ctables_find_category_for_postcompute (
1731 dict, cats, pc_cat->parse_format, e);
1734 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1736 size_t n_subtotals = 0;
1737 for (size_t i = 0; i < cats->n_cats; i++)
1738 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1739 if (n_subtotals > 1)
1741 msg_at (SE, cats_location,
1742 ngettext ("These categories include %zu instance "
1743 "of SUBTOTAL or HSUBTOTAL, so references "
1744 "from computed categories must refer to "
1745 "subtotals by position, "
1746 "e.g. SUBTOTAL[1].",
1747 "These categories include %zu instances "
1748 "of SUBTOTAL or HSUBTOTAL, so references "
1749 "from computed categories must refer to "
1750 "subtotals by position, "
1751 "e.g. SUBTOTAL[1].",
1754 msg_at (SN, e->location,
1755 _("This is the reference that lacks a position."));
1760 msg_at (SE, pc_cat->location,
1761 _("Computed category &%s references a category not included "
1762 "in the category list."),
1764 msg_at (SN, e->location, _("This is the missing category."));
1765 if (e->op == CTPO_CAT_SUBTOTAL)
1766 msg_at (SN, cats_location,
1767 _("To fix the problem, add subtotals to the "
1768 "list of categories here."));
1769 else if (e->op == CTPO_CAT_TOTAL)
1770 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1771 "CATEGORIES specification."));
1773 msg_at (SN, cats_location,
1774 _("To fix the problem, add the missing category to the "
1775 "list of categories here."));
1778 if (pc_cat->pc->hide_source_cats)
1792 for (size_t i = 0; i < 2; i++)
1793 if (e->subs[i] && !ctables_recursive_check_postcompute (
1794 dict, e->subs[i], pc_cat, cats, cats_location))
1803 all_strings (struct variable **vars, size_t n_vars,
1804 const struct ctables_category *cat)
1806 for (size_t j = 0; j < n_vars; j++)
1807 if (var_is_numeric (vars[j]))
1809 msg_at (SE, cat->location,
1810 _("This category specification may be applied only to string "
1811 "variables, but this subcommand tries to apply it to "
1812 "numeric variable %s."),
1813 var_get_name (vars[j]));
1820 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1821 struct ctables *ct, struct ctables_table *t)
1823 if (!lex_match_id (lexer, "VARIABLES"))
1825 lex_match (lexer, T_EQUALS);
1827 struct variable **vars;
1829 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1832 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1833 for (size_t i = 1; i < n_vars; i++)
1835 const struct fmt_spec *f = var_get_print_format (vars[i]);
1836 if (f->type != common_format->type)
1838 common_format = NULL;
1844 && (fmt_get_category (common_format->type)
1845 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1847 struct ctables_categories *c = xmalloc (sizeof *c);
1848 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1849 for (size_t i = 0; i < n_vars; i++)
1851 struct ctables_categories **cp
1852 = &t->categories[var_get_dict_index (vars[i])];
1853 ctables_categories_unref (*cp);
1857 size_t allocated_cats = 0;
1858 int cats_start_ofs = -1;
1859 int cats_end_ofs = -1;
1860 if (lex_match (lexer, T_LBRACK))
1862 cats_start_ofs = lex_ofs (lexer);
1865 if (c->n_cats >= allocated_cats)
1866 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1868 int start_ofs = lex_ofs (lexer);
1869 struct ctables_category *cat = &c->cats[c->n_cats];
1870 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1872 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1875 lex_match (lexer, T_COMMA);
1877 while (!lex_match (lexer, T_RBRACK));
1878 cats_end_ofs = lex_ofs (lexer) - 1;
1881 struct ctables_category cat = {
1883 .include_missing = false,
1884 .sort_ascending = true,
1886 bool show_totals = false;
1887 char *total_label = NULL;
1888 bool totals_before = false;
1889 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1891 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1893 lex_match (lexer, T_EQUALS);
1894 if (lex_match_id (lexer, "A"))
1895 cat.sort_ascending = true;
1896 else if (lex_match_id (lexer, "D"))
1897 cat.sort_ascending = false;
1900 lex_error_expecting (lexer, "A", "D");
1904 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1906 lex_match (lexer, T_EQUALS);
1907 if (lex_match_id (lexer, "VALUE"))
1908 cat.type = CCT_VALUE;
1909 else if (lex_match_id (lexer, "LABEL"))
1910 cat.type = CCT_LABEL;
1913 cat.type = CCT_FUNCTION;
1914 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1917 if (lex_match (lexer, T_LPAREN))
1919 cat.sort_var = parse_variable (lexer, dict);
1923 if (cat.sort_function == CTSF_PTILE)
1925 lex_match (lexer, T_COMMA);
1926 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1928 cat.percentile = lex_number (lexer);
1932 if (!lex_force_match (lexer, T_RPAREN))
1935 else if (ctables_function_availability (cat.sort_function)
1938 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1943 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1945 lex_match (lexer, T_EQUALS);
1946 if (lex_match_id (lexer, "INCLUDE"))
1947 cat.include_missing = true;
1948 else if (lex_match_id (lexer, "EXCLUDE"))
1949 cat.include_missing = false;
1952 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1956 else if (lex_match_id (lexer, "TOTAL"))
1958 lex_match (lexer, T_EQUALS);
1959 if (!parse_bool (lexer, &show_totals))
1962 else if (lex_match_id (lexer, "LABEL"))
1964 lex_match (lexer, T_EQUALS);
1965 if (!lex_force_string (lexer))
1968 total_label = ss_xstrdup (lex_tokss (lexer));
1971 else if (lex_match_id (lexer, "POSITION"))
1973 lex_match (lexer, T_EQUALS);
1974 if (lex_match_id (lexer, "BEFORE"))
1975 totals_before = true;
1976 else if (lex_match_id (lexer, "AFTER"))
1977 totals_before = false;
1980 lex_error_expecting (lexer, "BEFORE", "AFTER");
1984 else if (lex_match_id (lexer, "EMPTY"))
1986 lex_match (lexer, T_EQUALS);
1987 if (lex_match_id (lexer, "INCLUDE"))
1988 c->show_empty = true;
1989 else if (lex_match_id (lexer, "EXCLUDE"))
1990 c->show_empty = false;
1993 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2000 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2001 "TOTAL", "LABEL", "POSITION", "EMPTY");
2003 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2010 if (c->n_cats >= allocated_cats)
2011 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2012 c->cats[c->n_cats++] = cat;
2017 if (c->n_cats >= allocated_cats)
2018 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2020 struct ctables_category *totals;
2023 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2024 totals = &c->cats[0];
2027 totals = &c->cats[c->n_cats];
2030 *totals = (struct ctables_category) {
2032 .total_label = total_label ? total_label : xstrdup (_("Total")),
2036 struct ctables_category *subtotal = NULL;
2037 for (size_t i = totals_before ? 0 : c->n_cats;
2038 totals_before ? i < c->n_cats : i-- > 0;
2039 totals_before ? i++ : 0)
2041 struct ctables_category *cat = &c->cats[i];
2050 cat->subtotal = subtotal;
2053 case CCT_POSTCOMPUTE:
2064 case CCT_EXCLUDED_MISSING:
2069 if (cats_start_ofs != -1)
2071 for (size_t i = 0; i < c->n_cats; i++)
2073 struct ctables_category *cat = &c->cats[i];
2076 case CCT_POSTCOMPUTE:
2077 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2078 struct msg_location *cats_location
2079 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2080 bool ok = ctables_recursive_check_postcompute (
2081 dict, cat->pc->expr, cat, c, cats_location);
2082 msg_location_destroy (cats_location);
2089 for (size_t j = 0; j < n_vars; j++)
2090 if (var_is_alpha (vars[j]))
2092 msg_at (SE, cat->location,
2093 _("This category specification may be applied "
2094 "only to numeric variables, but this "
2095 "subcommand tries to apply it to string "
2097 var_get_name (vars[j]));
2106 if (!parse_category_string (cat->location, cat->string, dict,
2107 common_format->type, &n))
2110 ss_dealloc (&cat->string);
2112 cat->type = CCT_NUMBER;
2115 else if (!all_strings (vars, n_vars, cat))
2124 if (!cat->srange[0].string)
2126 else if (!parse_category_string (cat->location,
2127 cat->srange[0], dict,
2128 common_format->type, &n[0]))
2131 if (!cat->srange[1].string)
2133 else if (!parse_category_string (cat->location,
2134 cat->srange[1], dict,
2135 common_format->type, &n[1]))
2138 ss_dealloc (&cat->srange[0]);
2139 ss_dealloc (&cat->srange[1]);
2141 cat->type = CCT_NRANGE;
2142 cat->nrange[0] = n[0];
2143 cat->nrange[1] = n[1];
2145 else if (!all_strings (vars, n_vars, cat))
2156 case CCT_EXCLUDED_MISSING:
2171 ctables_nest_uninit (struct ctables_nest *nest)
2174 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2175 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2176 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2177 free (nest->domains[dt]);
2181 ctables_stack_uninit (struct ctables_stack *stack)
2185 for (size_t i = 0; i < stack->n; i++)
2186 ctables_nest_uninit (&stack->nests[i]);
2187 free (stack->nests);
2191 static struct ctables_stack
2192 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2199 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2200 for (size_t i = 0; i < s0.n; i++)
2201 for (size_t j = 0; j < s1.n; j++)
2203 const struct ctables_nest *a = &s0.nests[i];
2204 const struct ctables_nest *b = &s1.nests[j];
2206 size_t allocate = a->n + b->n;
2207 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2209 for (size_t k = 0; k < a->n; k++)
2210 vars[n++] = a->vars[k];
2211 for (size_t k = 0; k < b->n; k++)
2212 vars[n++] = b->vars[k];
2213 assert (n == allocate);
2215 const struct ctables_nest *summary_src;
2216 if (!a->specs[CSV_CELL].var)
2218 else if (!b->specs[CSV_CELL].var)
2223 struct ctables_nest *new = &stack.nests[stack.n++];
2224 *new = (struct ctables_nest) {
2226 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2227 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2231 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2232 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2234 ctables_stack_uninit (&s0);
2235 ctables_stack_uninit (&s1);
2239 static struct ctables_stack
2240 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2242 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2243 for (size_t i = 0; i < s0.n; i++)
2244 stack.nests[stack.n++] = s0.nests[i];
2245 for (size_t i = 0; i < s1.n; i++)
2247 stack.nests[stack.n] = s1.nests[i];
2248 stack.nests[stack.n].group_head += s0.n;
2251 assert (stack.n == s0.n + s1.n);
2257 static struct ctables_stack
2258 var_fts (const struct ctables_axis *a)
2260 struct variable **vars = xmalloc (sizeof *vars);
2263 struct ctables_nest *nest = xmalloc (sizeof *nest);
2264 *nest = (struct ctables_nest) {
2267 .scale_idx = a->scale ? 0 : SIZE_MAX,
2269 if (a->specs[CSV_CELL].n || a->scale)
2270 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2272 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2273 nest->specs[sv].var = a->var;
2274 nest->specs[sv].is_scale = a->scale;
2276 return (struct ctables_stack) { .nests = nest, .n = 1 };
2279 static struct ctables_stack
2280 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2283 return (struct ctables_stack) { .n = 0 };
2291 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2292 enumerate_fts (axis_type, a->subs[1]));
2295 /* This should consider any of the scale variables found in the result to
2296 be linked to each other listwise for SMISSING=LISTWISE. */
2297 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2298 enumerate_fts (axis_type, a->subs[1]));
2304 union ctables_summary
2306 /* COUNT, VALIDN, TOTALN. */
2309 /* MINIMUM, MAXIMUM, RANGE. */
2316 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2317 struct moments1 *moments;
2319 /* MEDIAN, MODE, PTILE. */
2322 struct casewriter *writer;
2327 /* XXX multiple response */
2331 ctables_summary_init (union ctables_summary *s,
2332 const struct ctables_summary_spec *ss)
2334 switch (ss->function)
2338 case CTSF_ROWPCT_COUNT:
2339 case CTSF_COLPCT_COUNT:
2340 case CTSF_TABLEPCT_COUNT:
2341 case CTSF_SUBTABLEPCT_COUNT:
2342 case CTSF_LAYERPCT_COUNT:
2343 case CTSF_LAYERROWPCT_COUNT:
2344 case CTSF_LAYERCOLPCT_COUNT:
2345 case CTSF_ROWPCT_VALIDN:
2346 case CTSF_COLPCT_VALIDN:
2347 case CTSF_TABLEPCT_VALIDN:
2348 case CTSF_SUBTABLEPCT_VALIDN:
2349 case CTSF_LAYERPCT_VALIDN:
2350 case CTSF_LAYERROWPCT_VALIDN:
2351 case CTSF_LAYERCOLPCT_VALIDN:
2352 case CTSF_ROWPCT_TOTALN:
2353 case CTSF_COLPCT_TOTALN:
2354 case CTSF_TABLEPCT_TOTALN:
2355 case CTSF_SUBTABLEPCT_TOTALN:
2356 case CTSF_LAYERPCT_TOTALN:
2357 case CTSF_LAYERROWPCT_TOTALN:
2358 case CTSF_LAYERCOLPCT_TOTALN:
2365 case CTSF_UROWPCT_COUNT:
2366 case CTSF_UCOLPCT_COUNT:
2367 case CTSF_UTABLEPCT_COUNT:
2368 case CTSF_USUBTABLEPCT_COUNT:
2369 case CTSF_ULAYERPCT_COUNT:
2370 case CTSF_ULAYERROWPCT_COUNT:
2371 case CTSF_ULAYERCOLPCT_COUNT:
2372 case CTSF_UROWPCT_VALIDN:
2373 case CTSF_UCOLPCT_VALIDN:
2374 case CTSF_UTABLEPCT_VALIDN:
2375 case CTSF_USUBTABLEPCT_VALIDN:
2376 case CTSF_ULAYERPCT_VALIDN:
2377 case CTSF_ULAYERROWPCT_VALIDN:
2378 case CTSF_ULAYERCOLPCT_VALIDN:
2379 case CTSF_UROWPCT_TOTALN:
2380 case CTSF_UCOLPCT_TOTALN:
2381 case CTSF_UTABLEPCT_TOTALN:
2382 case CTSF_USUBTABLEPCT_TOTALN:
2383 case CTSF_ULAYERPCT_TOTALN:
2384 case CTSF_ULAYERROWPCT_TOTALN:
2385 case CTSF_ULAYERCOLPCT_TOTALN:
2395 case CTSF_SUBTABLE_ID:
2397 case CTSF_LAYERROW_ID:
2398 case CTSF_LAYERCOL_ID:
2404 s->min = s->max = SYSMIS;
2412 case CTSF_ROWPCT_SUM:
2413 case CTSF_COLPCT_SUM:
2414 case CTSF_TABLEPCT_SUM:
2415 case CTSF_SUBTABLEPCT_SUM:
2416 case CTSF_LAYERPCT_SUM:
2417 case CTSF_LAYERROWPCT_SUM:
2418 case CTSF_LAYERCOLPCT_SUM:
2423 case CTSF_UVARIANCE:
2424 case CTSF_UROWPCT_SUM:
2425 case CTSF_UCOLPCT_SUM:
2426 case CTSF_UTABLEPCT_SUM:
2427 case CTSF_USUBTABLEPCT_SUM:
2428 case CTSF_ULAYERPCT_SUM:
2429 case CTSF_ULAYERROWPCT_SUM:
2430 case CTSF_ULAYERCOLPCT_SUM:
2431 s->moments = moments1_create (MOMENT_VARIANCE);
2441 struct caseproto *proto = caseproto_create ();
2442 proto = caseproto_add_width (proto, 0);
2443 proto = caseproto_add_width (proto, 0);
2445 struct subcase ordering;
2446 subcase_init (&ordering, 0, 0, SC_ASCEND);
2447 s->writer = sort_create_writer (&ordering, proto);
2448 subcase_uninit (&ordering);
2449 caseproto_unref (proto);
2459 ctables_summary_uninit (union ctables_summary *s,
2460 const struct ctables_summary_spec *ss)
2462 switch (ss->function)
2466 case CTSF_ROWPCT_COUNT:
2467 case CTSF_COLPCT_COUNT:
2468 case CTSF_TABLEPCT_COUNT:
2469 case CTSF_SUBTABLEPCT_COUNT:
2470 case CTSF_LAYERPCT_COUNT:
2471 case CTSF_LAYERROWPCT_COUNT:
2472 case CTSF_LAYERCOLPCT_COUNT:
2473 case CTSF_ROWPCT_VALIDN:
2474 case CTSF_COLPCT_VALIDN:
2475 case CTSF_TABLEPCT_VALIDN:
2476 case CTSF_SUBTABLEPCT_VALIDN:
2477 case CTSF_LAYERPCT_VALIDN:
2478 case CTSF_LAYERROWPCT_VALIDN:
2479 case CTSF_LAYERCOLPCT_VALIDN:
2480 case CTSF_ROWPCT_TOTALN:
2481 case CTSF_COLPCT_TOTALN:
2482 case CTSF_TABLEPCT_TOTALN:
2483 case CTSF_SUBTABLEPCT_TOTALN:
2484 case CTSF_LAYERPCT_TOTALN:
2485 case CTSF_LAYERROWPCT_TOTALN:
2486 case CTSF_LAYERCOLPCT_TOTALN:
2493 case CTSF_UROWPCT_COUNT:
2494 case CTSF_UCOLPCT_COUNT:
2495 case CTSF_UTABLEPCT_COUNT:
2496 case CTSF_USUBTABLEPCT_COUNT:
2497 case CTSF_ULAYERPCT_COUNT:
2498 case CTSF_ULAYERROWPCT_COUNT:
2499 case CTSF_ULAYERCOLPCT_COUNT:
2500 case CTSF_UROWPCT_VALIDN:
2501 case CTSF_UCOLPCT_VALIDN:
2502 case CTSF_UTABLEPCT_VALIDN:
2503 case CTSF_USUBTABLEPCT_VALIDN:
2504 case CTSF_ULAYERPCT_VALIDN:
2505 case CTSF_ULAYERROWPCT_VALIDN:
2506 case CTSF_ULAYERCOLPCT_VALIDN:
2507 case CTSF_UROWPCT_TOTALN:
2508 case CTSF_UCOLPCT_TOTALN:
2509 case CTSF_UTABLEPCT_TOTALN:
2510 case CTSF_USUBTABLEPCT_TOTALN:
2511 case CTSF_ULAYERPCT_TOTALN:
2512 case CTSF_ULAYERROWPCT_TOTALN:
2513 case CTSF_ULAYERCOLPCT_TOTALN:
2522 case CTSF_SUBTABLE_ID:
2524 case CTSF_LAYERROW_ID:
2525 case CTSF_LAYERCOL_ID:
2538 case CTSF_ROWPCT_SUM:
2539 case CTSF_COLPCT_SUM:
2540 case CTSF_TABLEPCT_SUM:
2541 case CTSF_SUBTABLEPCT_SUM:
2542 case CTSF_LAYERPCT_SUM:
2543 case CTSF_LAYERROWPCT_SUM:
2544 case CTSF_LAYERCOLPCT_SUM:
2549 case CTSF_UVARIANCE:
2550 case CTSF_UROWPCT_SUM:
2551 case CTSF_UCOLPCT_SUM:
2552 case CTSF_UTABLEPCT_SUM:
2553 case CTSF_USUBTABLEPCT_SUM:
2554 case CTSF_ULAYERPCT_SUM:
2555 case CTSF_ULAYERROWPCT_SUM:
2556 case CTSF_ULAYERCOLPCT_SUM:
2557 moments1_destroy (s->moments);
2566 casewriter_destroy (s->writer);
2572 ctables_summary_add (union ctables_summary *s,
2573 const struct ctables_summary_spec *ss,
2574 const struct variable *var, const union value *value,
2575 bool is_scale, bool is_scale_missing,
2576 bool is_missing, bool excluded_missing,
2577 double d_weight, double e_weight)
2579 /* To determine whether a case is included in a given table for a particular
2580 kind of summary, consider the following charts for each variable in the
2581 table. Only if "yes" appears for every variable for the summary is the
2584 Categorical variables: VALIDN COUNT TOTALN
2585 Valid values in included categories yes yes yes
2586 Missing values in included categories --- yes yes
2587 Missing values in excluded categories --- --- yes
2588 Valid values in excluded categories --- --- ---
2590 Scale variables: VALIDN COUNT TOTALN
2591 Valid value yes yes yes
2592 Missing value --- yes yes
2594 Missing values include both user- and system-missing. (The system-missing
2595 value is always in an excluded category.)
2597 switch (ss->function)
2600 case CTSF_ROWPCT_TOTALN:
2601 case CTSF_COLPCT_TOTALN:
2602 case CTSF_TABLEPCT_TOTALN:
2603 case CTSF_SUBTABLEPCT_TOTALN:
2604 case CTSF_LAYERPCT_TOTALN:
2605 case CTSF_LAYERROWPCT_TOTALN:
2606 case CTSF_LAYERCOLPCT_TOTALN:
2607 s->count += d_weight;
2611 case CTSF_UROWPCT_TOTALN:
2612 case CTSF_UCOLPCT_TOTALN:
2613 case CTSF_UTABLEPCT_TOTALN:
2614 case CTSF_USUBTABLEPCT_TOTALN:
2615 case CTSF_ULAYERPCT_TOTALN:
2616 case CTSF_ULAYERROWPCT_TOTALN:
2617 case CTSF_ULAYERCOLPCT_TOTALN:
2622 case CTSF_ROWPCT_COUNT:
2623 case CTSF_COLPCT_COUNT:
2624 case CTSF_TABLEPCT_COUNT:
2625 case CTSF_SUBTABLEPCT_COUNT:
2626 case CTSF_LAYERPCT_COUNT:
2627 case CTSF_LAYERROWPCT_COUNT:
2628 case CTSF_LAYERCOLPCT_COUNT:
2629 if (is_scale || !excluded_missing)
2630 s->count += d_weight;
2634 case CTSF_UROWPCT_COUNT:
2635 case CTSF_UCOLPCT_COUNT:
2636 case CTSF_UTABLEPCT_COUNT:
2637 case CTSF_USUBTABLEPCT_COUNT:
2638 case CTSF_ULAYERPCT_COUNT:
2639 case CTSF_ULAYERROWPCT_COUNT:
2640 case CTSF_ULAYERCOLPCT_COUNT:
2641 if (is_scale || !excluded_missing)
2646 case CTSF_ROWPCT_VALIDN:
2647 case CTSF_COLPCT_VALIDN:
2648 case CTSF_TABLEPCT_VALIDN:
2649 case CTSF_SUBTABLEPCT_VALIDN:
2650 case CTSF_LAYERPCT_VALIDN:
2651 case CTSF_LAYERROWPCT_VALIDN:
2652 case CTSF_LAYERCOLPCT_VALIDN:
2656 s->count += d_weight;
2660 case CTSF_UROWPCT_VALIDN:
2661 case CTSF_UCOLPCT_VALIDN:
2662 case CTSF_UTABLEPCT_VALIDN:
2663 case CTSF_USUBTABLEPCT_VALIDN:
2664 case CTSF_ULAYERPCT_VALIDN:
2665 case CTSF_ULAYERROWPCT_VALIDN:
2666 case CTSF_ULAYERCOLPCT_VALIDN:
2676 case CTSF_SUBTABLE_ID:
2678 case CTSF_LAYERROW_ID:
2679 case CTSF_LAYERCOL_ID:
2686 s->count += d_weight;
2697 if (is_scale || !excluded_missing)
2698 s->count += e_weight;
2705 s->count += e_weight;
2709 s->count += e_weight;
2715 if (!is_scale_missing)
2717 assert (!var_is_alpha (var)); /* XXX? */
2718 if (s->min == SYSMIS || value->f < s->min)
2720 if (s->max == SYSMIS || value->f > s->max)
2730 case CTSF_ROWPCT_SUM:
2731 case CTSF_COLPCT_SUM:
2732 case CTSF_TABLEPCT_SUM:
2733 case CTSF_SUBTABLEPCT_SUM:
2734 case CTSF_LAYERPCT_SUM:
2735 case CTSF_LAYERROWPCT_SUM:
2736 case CTSF_LAYERCOLPCT_SUM:
2737 if (!is_scale_missing)
2738 moments1_add (s->moments, value->f, e_weight);
2745 case CTSF_UVARIANCE:
2746 case CTSF_UROWPCT_SUM:
2747 case CTSF_UCOLPCT_SUM:
2748 case CTSF_UTABLEPCT_SUM:
2749 case CTSF_USUBTABLEPCT_SUM:
2750 case CTSF_ULAYERPCT_SUM:
2751 case CTSF_ULAYERROWPCT_SUM:
2752 case CTSF_ULAYERCOLPCT_SUM:
2753 if (!is_scale_missing)
2754 moments1_add (s->moments, value->f, 1.0);
2760 d_weight = e_weight = 1.0;
2765 if (!is_scale_missing)
2767 s->ovalid += e_weight;
2769 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2770 *case_num_rw_idx (c, 0) = value->f;
2771 *case_num_rw_idx (c, 1) = e_weight;
2772 casewriter_write (s->writer, c);
2778 static enum ctables_domain_type
2779 ctables_function_domain (enum ctables_summary_function function)
2809 case CTSF_UVARIANCE:
2815 case CTSF_COLPCT_COUNT:
2816 case CTSF_COLPCT_SUM:
2817 case CTSF_COLPCT_TOTALN:
2818 case CTSF_COLPCT_VALIDN:
2819 case CTSF_UCOLPCT_COUNT:
2820 case CTSF_UCOLPCT_SUM:
2821 case CTSF_UCOLPCT_TOTALN:
2822 case CTSF_UCOLPCT_VALIDN:
2826 case CTSF_LAYERCOLPCT_COUNT:
2827 case CTSF_LAYERCOLPCT_SUM:
2828 case CTSF_LAYERCOLPCT_TOTALN:
2829 case CTSF_LAYERCOLPCT_VALIDN:
2830 case CTSF_ULAYERCOLPCT_COUNT:
2831 case CTSF_ULAYERCOLPCT_SUM:
2832 case CTSF_ULAYERCOLPCT_TOTALN:
2833 case CTSF_ULAYERCOLPCT_VALIDN:
2834 case CTSF_LAYERCOL_ID:
2835 return CTDT_LAYERCOL;
2837 case CTSF_LAYERPCT_COUNT:
2838 case CTSF_LAYERPCT_SUM:
2839 case CTSF_LAYERPCT_TOTALN:
2840 case CTSF_LAYERPCT_VALIDN:
2841 case CTSF_ULAYERPCT_COUNT:
2842 case CTSF_ULAYERPCT_SUM:
2843 case CTSF_ULAYERPCT_TOTALN:
2844 case CTSF_ULAYERPCT_VALIDN:
2848 case CTSF_LAYERROWPCT_COUNT:
2849 case CTSF_LAYERROWPCT_SUM:
2850 case CTSF_LAYERROWPCT_TOTALN:
2851 case CTSF_LAYERROWPCT_VALIDN:
2852 case CTSF_ULAYERROWPCT_COUNT:
2853 case CTSF_ULAYERROWPCT_SUM:
2854 case CTSF_ULAYERROWPCT_TOTALN:
2855 case CTSF_ULAYERROWPCT_VALIDN:
2856 case CTSF_LAYERROW_ID:
2857 return CTDT_LAYERROW;
2859 case CTSF_ROWPCT_COUNT:
2860 case CTSF_ROWPCT_SUM:
2861 case CTSF_ROWPCT_TOTALN:
2862 case CTSF_ROWPCT_VALIDN:
2863 case CTSF_UROWPCT_COUNT:
2864 case CTSF_UROWPCT_SUM:
2865 case CTSF_UROWPCT_TOTALN:
2866 case CTSF_UROWPCT_VALIDN:
2870 case CTSF_SUBTABLEPCT_COUNT:
2871 case CTSF_SUBTABLEPCT_SUM:
2872 case CTSF_SUBTABLEPCT_TOTALN:
2873 case CTSF_SUBTABLEPCT_VALIDN:
2874 case CTSF_USUBTABLEPCT_COUNT:
2875 case CTSF_USUBTABLEPCT_SUM:
2876 case CTSF_USUBTABLEPCT_TOTALN:
2877 case CTSF_USUBTABLEPCT_VALIDN:
2878 case CTSF_SUBTABLE_ID:
2879 return CTDT_SUBTABLE;
2881 case CTSF_TABLEPCT_COUNT:
2882 case CTSF_TABLEPCT_SUM:
2883 case CTSF_TABLEPCT_TOTALN:
2884 case CTSF_TABLEPCT_VALIDN:
2885 case CTSF_UTABLEPCT_COUNT:
2886 case CTSF_UTABLEPCT_SUM:
2887 case CTSF_UTABLEPCT_TOTALN:
2888 case CTSF_UTABLEPCT_VALIDN:
2896 static enum ctables_domain_type
2897 ctables_function_is_pctsum (enum ctables_summary_function function)
2927 case CTSF_UVARIANCE:
2931 case CTSF_COLPCT_COUNT:
2932 case CTSF_COLPCT_TOTALN:
2933 case CTSF_COLPCT_VALIDN:
2934 case CTSF_UCOLPCT_COUNT:
2935 case CTSF_UCOLPCT_TOTALN:
2936 case CTSF_UCOLPCT_VALIDN:
2937 case CTSF_LAYERCOLPCT_COUNT:
2938 case CTSF_LAYERCOLPCT_TOTALN:
2939 case CTSF_LAYERCOLPCT_VALIDN:
2940 case CTSF_ULAYERCOLPCT_COUNT:
2941 case CTSF_ULAYERCOLPCT_TOTALN:
2942 case CTSF_ULAYERCOLPCT_VALIDN:
2943 case CTSF_LAYERPCT_COUNT:
2944 case CTSF_LAYERPCT_TOTALN:
2945 case CTSF_LAYERPCT_VALIDN:
2946 case CTSF_ULAYERPCT_COUNT:
2947 case CTSF_ULAYERPCT_TOTALN:
2948 case CTSF_ULAYERPCT_VALIDN:
2949 case CTSF_LAYERROWPCT_COUNT:
2950 case CTSF_LAYERROWPCT_TOTALN:
2951 case CTSF_LAYERROWPCT_VALIDN:
2952 case CTSF_ULAYERROWPCT_COUNT:
2953 case CTSF_ULAYERROWPCT_TOTALN:
2954 case CTSF_ULAYERROWPCT_VALIDN:
2955 case CTSF_ROWPCT_COUNT:
2956 case CTSF_ROWPCT_TOTALN:
2957 case CTSF_ROWPCT_VALIDN:
2958 case CTSF_UROWPCT_COUNT:
2959 case CTSF_UROWPCT_TOTALN:
2960 case CTSF_UROWPCT_VALIDN:
2961 case CTSF_SUBTABLEPCT_COUNT:
2962 case CTSF_SUBTABLEPCT_TOTALN:
2963 case CTSF_SUBTABLEPCT_VALIDN:
2964 case CTSF_USUBTABLEPCT_COUNT:
2965 case CTSF_USUBTABLEPCT_TOTALN:
2966 case CTSF_USUBTABLEPCT_VALIDN:
2967 case CTSF_TABLEPCT_COUNT:
2968 case CTSF_TABLEPCT_TOTALN:
2969 case CTSF_TABLEPCT_VALIDN:
2970 case CTSF_UTABLEPCT_COUNT:
2971 case CTSF_UTABLEPCT_TOTALN:
2972 case CTSF_UTABLEPCT_VALIDN:
2976 case CTSF_SUBTABLE_ID:
2978 case CTSF_LAYERROW_ID:
2979 case CTSF_LAYERCOL_ID:
2982 case CTSF_COLPCT_SUM:
2983 case CTSF_UCOLPCT_SUM:
2984 case CTSF_LAYERCOLPCT_SUM:
2985 case CTSF_ULAYERCOLPCT_SUM:
2986 case CTSF_LAYERPCT_SUM:
2987 case CTSF_ULAYERPCT_SUM:
2988 case CTSF_LAYERROWPCT_SUM:
2989 case CTSF_ULAYERROWPCT_SUM:
2990 case CTSF_ROWPCT_SUM:
2991 case CTSF_UROWPCT_SUM:
2992 case CTSF_SUBTABLEPCT_SUM:
2993 case CTSF_USUBTABLEPCT_SUM:
2994 case CTSF_TABLEPCT_SUM:
2995 case CTSF_UTABLEPCT_SUM:
3003 ctables_summary_value (const struct ctables_cell *cell,
3004 union ctables_summary *s,
3005 const struct ctables_summary_spec *ss)
3007 switch (ss->function)
3017 case CTSF_SUBTABLE_ID:
3019 case CTSF_LAYERROW_ID:
3020 case CTSF_LAYERCOL_ID:
3021 return cell->domains[ctables_function_domain (ss->function)]->sequence;
3023 case CTSF_ROWPCT_COUNT:
3024 case CTSF_COLPCT_COUNT:
3025 case CTSF_TABLEPCT_COUNT:
3026 case CTSF_SUBTABLEPCT_COUNT:
3027 case CTSF_LAYERPCT_COUNT:
3028 case CTSF_LAYERROWPCT_COUNT:
3029 case CTSF_LAYERCOLPCT_COUNT:
3031 enum ctables_domain_type d = ctables_function_domain (ss->function);
3032 return (cell->domains[d]->e_count
3033 ? s->count / cell->domains[d]->e_count * 100
3037 case CTSF_UROWPCT_COUNT:
3038 case CTSF_UCOLPCT_COUNT:
3039 case CTSF_UTABLEPCT_COUNT:
3040 case CTSF_USUBTABLEPCT_COUNT:
3041 case CTSF_ULAYERPCT_COUNT:
3042 case CTSF_ULAYERROWPCT_COUNT:
3043 case CTSF_ULAYERCOLPCT_COUNT:
3045 enum ctables_domain_type d = ctables_function_domain (ss->function);
3046 return (cell->domains[d]->u_count
3047 ? s->count / cell->domains[d]->u_count * 100
3051 case CTSF_ROWPCT_VALIDN:
3052 case CTSF_COLPCT_VALIDN:
3053 case CTSF_TABLEPCT_VALIDN:
3054 case CTSF_SUBTABLEPCT_VALIDN:
3055 case CTSF_LAYERPCT_VALIDN:
3056 case CTSF_LAYERROWPCT_VALIDN:
3057 case CTSF_LAYERCOLPCT_VALIDN:
3059 enum ctables_domain_type d = ctables_function_domain (ss->function);
3060 return (cell->domains[d]->e_valid
3061 ? s->count / cell->domains[d]->e_valid * 100
3065 case CTSF_UROWPCT_VALIDN:
3066 case CTSF_UCOLPCT_VALIDN:
3067 case CTSF_UTABLEPCT_VALIDN:
3068 case CTSF_USUBTABLEPCT_VALIDN:
3069 case CTSF_ULAYERPCT_VALIDN:
3070 case CTSF_ULAYERROWPCT_VALIDN:
3071 case CTSF_ULAYERCOLPCT_VALIDN:
3073 enum ctables_domain_type d = ctables_function_domain (ss->function);
3074 return (cell->domains[d]->u_valid
3075 ? s->count / cell->domains[d]->u_valid * 100
3079 case CTSF_ROWPCT_TOTALN:
3080 case CTSF_COLPCT_TOTALN:
3081 case CTSF_TABLEPCT_TOTALN:
3082 case CTSF_SUBTABLEPCT_TOTALN:
3083 case CTSF_LAYERPCT_TOTALN:
3084 case CTSF_LAYERROWPCT_TOTALN:
3085 case CTSF_LAYERCOLPCT_TOTALN:
3087 enum ctables_domain_type d = ctables_function_domain (ss->function);
3088 return (cell->domains[d]->e_total
3089 ? s->count / cell->domains[d]->e_total * 100
3093 case CTSF_UROWPCT_TOTALN:
3094 case CTSF_UCOLPCT_TOTALN:
3095 case CTSF_UTABLEPCT_TOTALN:
3096 case CTSF_USUBTABLEPCT_TOTALN:
3097 case CTSF_ULAYERPCT_TOTALN:
3098 case CTSF_ULAYERROWPCT_TOTALN:
3099 case CTSF_ULAYERCOLPCT_TOTALN:
3101 enum ctables_domain_type d = ctables_function_domain (ss->function);
3102 return (cell->domains[d]->u_total
3103 ? s->count / cell->domains[d]->u_total * 100
3124 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3130 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3137 double weight, variance;
3138 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3139 return calc_semean (variance, weight);
3146 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3147 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3153 double weight, mean;
3154 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3155 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3159 case CTSF_UVARIANCE:
3162 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3166 case CTSF_ROWPCT_SUM:
3167 case CTSF_COLPCT_SUM:
3168 case CTSF_TABLEPCT_SUM:
3169 case CTSF_SUBTABLEPCT_SUM:
3170 case CTSF_LAYERPCT_SUM:
3171 case CTSF_LAYERROWPCT_SUM:
3172 case CTSF_LAYERCOLPCT_SUM:
3174 double weight, mean;
3175 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3176 if (weight == SYSMIS || mean == SYSMIS)
3178 enum ctables_domain_type d = ctables_function_domain (ss->function);
3179 double num = weight * mean;
3180 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3181 return denom != 0 ? num / denom * 100 : SYSMIS;
3183 case CTSF_UROWPCT_SUM:
3184 case CTSF_UCOLPCT_SUM:
3185 case CTSF_UTABLEPCT_SUM:
3186 case CTSF_USUBTABLEPCT_SUM:
3187 case CTSF_ULAYERPCT_SUM:
3188 case CTSF_ULAYERROWPCT_SUM:
3189 case CTSF_ULAYERCOLPCT_SUM:
3191 double weight, mean;
3192 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3193 if (weight == SYSMIS || mean == SYSMIS)
3195 enum ctables_domain_type d = ctables_function_domain (ss->function);
3196 double num = weight * mean;
3197 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3198 return denom != 0 ? num / denom * 100 : SYSMIS;
3207 struct casereader *reader = casewriter_make_reader (s->writer);
3210 struct percentile *ptile = percentile_create (
3211 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3212 struct order_stats *os = &ptile->parent;
3213 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3214 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3215 statistic_destroy (&ptile->parent.parent);
3223 struct casereader *reader = casewriter_make_reader (s->writer);
3226 struct mode *mode = mode_create ();
3227 struct order_stats *os = &mode->parent;
3228 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3229 s->ovalue = mode->mode;
3230 statistic_destroy (&mode->parent.parent);
3238 struct ctables_cell_sort_aux
3240 const struct ctables_nest *nest;
3241 enum pivot_axis_type a;
3245 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3247 const struct ctables_cell_sort_aux *aux = aux_;
3248 struct ctables_cell *const *ap = a_;
3249 struct ctables_cell *const *bp = b_;
3250 const struct ctables_cell *a = *ap;
3251 const struct ctables_cell *b = *bp;
3253 const struct ctables_nest *nest = aux->nest;
3254 for (size_t i = 0; i < nest->n; i++)
3255 if (i != nest->scale_idx)
3257 const struct variable *var = nest->vars[i];
3258 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3259 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3260 if (a_cv->category != b_cv->category)
3261 return a_cv->category > b_cv->category ? 1 : -1;
3263 const union value *a_val = &a_cv->value;
3264 const union value *b_val = &b_cv->value;
3265 switch (a_cv->category->type)
3271 case CCT_POSTCOMPUTE:
3272 case CCT_EXCLUDED_MISSING:
3273 /* Must be equal. */
3281 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3289 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3291 return a_cv->category->sort_ascending ? cmp : -cmp;
3297 const char *a_label = var_lookup_value_label (var, a_val);
3298 const char *b_label = var_lookup_value_label (var, b_val);
3304 cmp = strcmp (a_label, b_label);
3310 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3313 return a_cv->category->sort_ascending ? cmp : -cmp;
3325 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3326 const void *aux UNUSED)
3328 struct ctables_cell *const *ap = a_;
3329 struct ctables_cell *const *bp = b_;
3330 const struct ctables_cell *a = *ap;
3331 const struct ctables_cell *b = *bp;
3333 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3335 int al = a->axes[axis].leaf;
3336 int bl = b->axes[axis].leaf;
3338 return al > bl ? 1 : -1;
3346 For each ctables_table:
3347 For each combination of row vars:
3348 For each combination of column vars:
3349 For each combination of layer vars:
3351 Make a table of row values:
3352 Sort entries by row values
3353 Assign a 0-based index to each actual value
3354 Construct a dimension
3355 Make a table of column values
3356 Make a table of layer values
3358 Fill the table entry using the indexes from before.
3361 static struct ctables_domain *
3362 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3363 enum ctables_domain_type domain)
3366 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3368 const struct ctables_nest *nest = s->nests[a];
3369 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3371 size_t v_idx = nest->domains[domain][i];
3372 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3373 hash = hash_pointer (cv->category, hash);
3374 if (cv->category->type != CCT_TOTAL
3375 && cv->category->type != CCT_SUBTOTAL
3376 && cv->category->type != CCT_POSTCOMPUTE)
3377 hash = value_hash (&cv->value,
3378 var_get_width (nest->vars[v_idx]), hash);
3382 struct ctables_domain *d;
3383 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3385 const struct ctables_cell *df = d->example;
3386 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3388 const struct ctables_nest *nest = s->nests[a];
3389 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3391 size_t v_idx = nest->domains[domain][i];
3392 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3393 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3394 if (cv1->category != cv2->category
3395 || (cv1->category->type != CCT_TOTAL
3396 && cv1->category->type != CCT_SUBTOTAL
3397 && cv1->category->type != CCT_POSTCOMPUTE
3398 && !value_equal (&cv1->value, &cv2->value,
3399 var_get_width (nest->vars[v_idx]))))
3408 struct ctables_sum *sums = (s->table->n_sum_vars
3409 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3412 d = xmalloc (sizeof *d);
3413 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3414 hmap_insert (&s->domains[domain], &d->node, hash);
3418 static struct substring
3419 rtrim_value (const union value *v, const struct variable *var)
3421 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3422 var_get_width (var));
3423 ss_rtrim (&s, ss_cstr (" "));
3428 in_string_range (const union value *v, const struct variable *var,
3429 const struct substring *srange)
3431 struct substring s = rtrim_value (v, var);
3432 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3433 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3436 static const struct ctables_category *
3437 ctables_categories_match (const struct ctables_categories *c,
3438 const union value *v, const struct variable *var)
3440 if (var_is_numeric (var) && v->f == SYSMIS)
3443 const struct ctables_category *othernm = NULL;
3444 for (size_t i = c->n_cats; i-- > 0; )
3446 const struct ctables_category *cat = &c->cats[i];
3450 if (cat->number == v->f)
3455 if (ss_equals (cat->string, rtrim_value (v, var)))
3460 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3461 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3466 if (in_string_range (v, var, cat->srange))
3471 if (var_is_value_missing (var, v))
3475 case CCT_POSTCOMPUTE:
3490 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3493 case CCT_EXCLUDED_MISSING:
3498 return var_is_value_missing (var, v) ? NULL : othernm;
3501 static const struct ctables_category *
3502 ctables_categories_total (const struct ctables_categories *c)
3504 const struct ctables_category *first = &c->cats[0];
3505 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3506 return (first->type == CCT_TOTAL ? first
3507 : last->type == CCT_TOTAL ? last
3511 static struct ctables_cell *
3512 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3513 const struct ctables_category *cats[PIVOT_N_AXES][10])
3516 enum ctables_summary_variant sv = CSV_CELL;
3517 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3519 const struct ctables_nest *nest = s->nests[a];
3520 for (size_t i = 0; i < nest->n; i++)
3521 if (i != nest->scale_idx)
3523 hash = hash_pointer (cats[a][i], hash);
3524 if (cats[a][i]->type != CCT_TOTAL
3525 && cats[a][i]->type != CCT_SUBTOTAL
3526 && cats[a][i]->type != CCT_POSTCOMPUTE)
3527 hash = value_hash (case_data (c, nest->vars[i]),
3528 var_get_width (nest->vars[i]), hash);
3534 struct ctables_cell *cell;
3535 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3537 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3539 const struct ctables_nest *nest = s->nests[a];
3540 for (size_t i = 0; i < nest->n; i++)
3541 if (i != nest->scale_idx
3542 && (cats[a][i] != cell->axes[a].cvs[i].category
3543 || (cats[a][i]->type != CCT_TOTAL
3544 && cats[a][i]->type != CCT_SUBTOTAL
3545 && cats[a][i]->type != CCT_POSTCOMPUTE
3546 && !value_equal (case_data (c, nest->vars[i]),
3547 &cell->axes[a].cvs[i].value,
3548 var_get_width (nest->vars[i])))))
3557 cell = xmalloc (sizeof *cell);
3560 cell->omit_domains = 0;
3561 cell->postcompute = false;
3562 //struct string name = DS_EMPTY_INITIALIZER;
3563 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3565 const struct ctables_nest *nest = s->nests[a];
3566 cell->axes[a].cvs = (nest->n
3567 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3569 for (size_t i = 0; i < nest->n; i++)
3571 const struct ctables_category *cat = cats[a][i];
3572 const struct variable *var = nest->vars[i];
3573 const union value *value = case_data (c, var);
3574 if (i != nest->scale_idx)
3576 const struct ctables_category *subtotal = cat->subtotal;
3577 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3580 if (cat->type == CCT_TOTAL
3581 || cat->type == CCT_SUBTOTAL
3582 || cat->type == CCT_POSTCOMPUTE)
3584 /* XXX these should be more encompassing I think.*/
3588 case PIVOT_AXIS_COLUMN:
3589 cell->omit_domains |= ((1u << CTDT_TABLE) |
3590 (1u << CTDT_LAYER) |
3591 (1u << CTDT_LAYERCOL) |
3592 (1u << CTDT_SUBTABLE) |
3595 case PIVOT_AXIS_ROW:
3596 cell->omit_domains |= ((1u << CTDT_TABLE) |
3597 (1u << CTDT_LAYER) |
3598 (1u << CTDT_LAYERROW) |
3599 (1u << CTDT_SUBTABLE) |
3602 case PIVOT_AXIS_LAYER:
3603 cell->omit_domains |= ((1u << CTDT_TABLE) |
3604 (1u << CTDT_LAYER));
3608 if (cat->type == CCT_POSTCOMPUTE)
3609 cell->postcompute = true;
3612 cell->axes[a].cvs[i].category = cat;
3613 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3616 if (i != nest->scale_idx)
3618 if (!ds_is_empty (&name))
3619 ds_put_cstr (&name, ", ");
3620 char *value_s = data_out (value, var_get_encoding (var),
3621 var_get_print_format (var),
3622 settings_get_fmt_settings ());
3623 if (cat->type == CCT_TOTAL
3624 || cat->type == CCT_SUBTOTAL
3625 || cat->type == CCT_POSTCOMPUTE)
3626 ds_put_format (&name, "%s=total", var_get_name (var));
3628 ds_put_format (&name, "%s=%s", var_get_name (var),
3629 value_s + strspn (value_s, " "));
3635 //cell->name = ds_steal_cstr (&name);
3637 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3638 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3639 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3640 for (size_t i = 0; i < specs->n; i++)
3641 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3642 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3643 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3644 hmap_insert (&s->cells, &cell->node, hash);
3649 is_scale_missing (const struct ctables_summary_spec_set *specs,
3650 const struct ccase *c)
3652 if (!specs->is_scale)
3655 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3658 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3660 const struct variable *var = specs->listwise_vars[i];
3661 if (var_is_num_missing (var, case_num (c, var)))
3669 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3670 const struct ctables_category *cats[PIVOT_N_AXES][10],
3671 bool is_missing, bool excluded_missing,
3672 double d_weight, double e_weight)
3674 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3675 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3677 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3679 bool scale_missing = is_scale_missing (specs, c);
3680 for (size_t i = 0; i < specs->n; i++)
3681 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3682 specs->var, case_data (c, specs->var), specs->is_scale,
3683 scale_missing, is_missing, excluded_missing,
3684 d_weight, e_weight);
3685 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3686 if (!(cell->omit_domains && (1u << dt)))
3688 struct ctables_domain *d = cell->domains[dt];
3689 d->d_total += d_weight;
3690 d->e_total += e_weight;
3692 if (!excluded_missing)
3694 d->d_count += d_weight;
3695 d->e_count += e_weight;
3700 d->d_valid += d_weight;
3701 d->e_valid += e_weight;
3704 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3706 /* XXX listwise_missing??? */
3707 const struct variable *var = s->table->sum_vars[i];
3708 double addend = case_num (c, var);
3709 if (!var_is_num_missing (var, addend))
3711 struct ctables_sum *sum = &d->sums[i];
3712 sum->e_sum += addend * e_weight;
3713 sum->u_sum += addend;
3721 recurse_totals (struct ctables_section *s, const struct ccase *c,
3722 const struct ctables_category *cats[PIVOT_N_AXES][10],
3723 bool is_missing, bool excluded_missing,
3724 double d_weight, double e_weight,
3725 enum pivot_axis_type start_axis, size_t start_nest)
3727 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3729 const struct ctables_nest *nest = s->nests[a];
3730 for (size_t i = start_nest; i < nest->n; i++)
3732 if (i == nest->scale_idx)
3735 const struct variable *var = nest->vars[i];
3737 const struct ctables_category *total = ctables_categories_total (
3738 s->table->categories[var_get_dict_index (var)]);
3741 const struct ctables_category *save = cats[a][i];
3743 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3744 d_weight, e_weight);
3745 recurse_totals (s, c, cats, is_missing, excluded_missing,
3746 d_weight, e_weight, a, i + 1);
3755 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3756 const struct ctables_category *cats[PIVOT_N_AXES][10],
3757 bool is_missing, bool excluded_missing,
3758 double d_weight, double e_weight,
3759 enum pivot_axis_type start_axis, size_t start_nest)
3761 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3763 const struct ctables_nest *nest = s->nests[a];
3764 for (size_t i = start_nest; i < nest->n; i++)
3766 if (i == nest->scale_idx)
3769 const struct ctables_category *save = cats[a][i];
3772 cats[a][i] = save->subtotal;
3773 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3774 d_weight, e_weight);
3775 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3776 d_weight, e_weight, a, i + 1);
3785 ctables_add_occurrence (const struct variable *var,
3786 const union value *value,
3787 struct hmap *occurrences)
3789 int width = var_get_width (var);
3790 unsigned int hash = value_hash (value, width, 0);
3792 struct ctables_occurrence *o;
3793 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3795 if (value_equal (value, &o->value, width))
3798 o = xmalloc (sizeof *o);
3799 value_clone (&o->value, value, width);
3800 hmap_insert (occurrences, &o->node, hash);
3804 ctables_cell_insert (struct ctables_section *s,
3805 const struct ccase *c,
3806 double d_weight, double e_weight)
3808 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3810 /* Does at least one categorical variable have a missing value in an included
3811 or excluded category? */
3812 bool is_missing = false;
3814 /* Does at least one categorical variable have a missing value in an excluded
3816 bool excluded_missing = false;
3818 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3820 const struct ctables_nest *nest = s->nests[a];
3821 for (size_t i = 0; i < nest->n; i++)
3823 if (i == nest->scale_idx)
3826 const struct variable *var = nest->vars[i];
3827 const union value *value = case_data (c, var);
3829 bool var_missing = var_is_value_missing (var, value) != 0;
3833 cats[a][i] = ctables_categories_match (
3834 s->table->categories[var_get_dict_index (var)], value, var);
3840 static const struct ctables_category cct_excluded_missing = {
3841 .type = CCT_EXCLUDED_MISSING,
3844 cats[a][i] = &cct_excluded_missing;
3845 excluded_missing = true;
3850 if (!excluded_missing)
3851 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3853 const struct ctables_nest *nest = s->nests[a];
3854 for (size_t i = 0; i < nest->n; i++)
3855 if (i != nest->scale_idx)
3857 const struct variable *var = nest->vars[i];
3858 const union value *value = case_data (c, var);
3859 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3863 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3864 d_weight, e_weight);
3866 //if (!excluded_missing)
3868 recurse_totals (s, c, cats, is_missing, excluded_missing,
3869 d_weight, e_weight, 0, 0);
3870 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3871 d_weight, e_weight, 0, 0);
3877 const struct ctables_summary_spec_set *set;
3882 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3884 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3885 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3886 if (as->function != bs->function)
3887 return as->function > bs->function ? 1 : -1;
3888 else if (as->percentile != bs->percentile)
3889 return as->percentile < bs->percentile ? 1 : -1;
3891 const char *as_label = as->label ? as->label : "";
3892 const char *bs_label = bs->label ? bs->label : "";
3893 return strcmp (as_label, bs_label);
3897 ctables_category_format_number (double number, const struct variable *var,
3900 struct pivot_value *pv = pivot_value_new_var_value (
3901 var, &(union value) { .f = number });
3902 pivot_value_format (pv, NULL, s);
3903 pivot_value_destroy (pv);
3907 ctables_category_format_string (struct substring string,
3908 const struct variable *var, struct string *out)
3910 int width = var_get_width (var);
3911 char *s = xmalloc (width);
3912 buf_copy_rpad (s, width, string.string, string.length, ' ');
3913 struct pivot_value *pv = pivot_value_new_var_value (
3914 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3915 pivot_value_format (pv, NULL, out);
3916 pivot_value_destroy (pv);
3921 ctables_category_format_label (const struct ctables_category *cat,
3922 const struct variable *var,
3928 ctables_category_format_number (cat->number, var, s);
3932 ctables_category_format_string (cat->string, var, s);
3936 ctables_category_format_number (cat->nrange[0], var, s);
3937 ds_put_format (s, " THRU ");
3938 ctables_category_format_number (cat->nrange[1], var, s);
3942 ctables_category_format_string (cat->srange[0], var, s);
3943 ds_put_format (s, " THRU ");
3944 ctables_category_format_string (cat->srange[1], var, s);
3948 ds_put_cstr (s, "MISSING");
3952 ds_put_cstr (s, "OTHERNM");
3955 case CCT_POSTCOMPUTE:
3956 ds_put_format (s, "&%s", cat->pc->name);
3961 ds_put_cstr (s, cat->total_label);
3967 case CCT_EXCLUDED_MISSING:
3974 static struct pivot_value *
3975 ctables_postcompute_label (const struct ctables_categories *cats,
3976 const struct ctables_category *cat,
3977 const struct variable *var)
3979 struct substring in = ss_cstr (cat->pc->label);
3980 struct substring target = ss_cstr (")LABEL[");
3982 struct string out = DS_EMPTY_INITIALIZER;
3985 size_t chunk = ss_find_substring (in, target);
3986 if (chunk == SIZE_MAX)
3988 if (ds_is_empty (&out))
3989 return pivot_value_new_user_text (in.string, in.length);
3992 ds_put_substring (&out, in);
3993 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3997 ds_put_substring (&out, ss_head (in, chunk));
3998 ss_advance (&in, chunk + target.length);
4000 struct substring idx_s;
4001 if (!ss_get_until (&in, ']', &idx_s))
4004 long int idx = strtol (idx_s.string, &tail, 10);
4005 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4008 struct ctables_category *cat2 = &cats->cats[idx - 1];
4009 if (!ctables_category_format_label (cat2, var, &out))
4015 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4018 static struct pivot_value *
4019 ctables_category_create_value_label (const struct ctables_categories *cats,
4020 const struct ctables_category *cat,
4021 const struct variable *var,
4022 const union value *value)
4024 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4025 ? ctables_postcompute_label (cats, cat, var)
4026 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4027 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4028 : pivot_value_new_var_value (var, value));
4031 static struct ctables_value *
4032 ctables_value_find__ (struct ctables_table *t, const union value *value,
4033 int width, unsigned int hash)
4035 struct ctables_value *clv;
4036 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4037 hash, &t->clabels_values_map)
4038 if (value_equal (value, &clv->value, width))
4044 ctables_value_insert (struct ctables_table *t, const union value *value,
4047 unsigned int hash = value_hash (value, width, 0);
4048 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4051 clv = xmalloc (sizeof *clv);
4052 value_clone (&clv->value, value, width);
4053 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4057 static struct ctables_value *
4058 ctables_value_find (struct ctables_table *t,
4059 const union value *value, int width)
4061 return ctables_value_find__ (t, value, width,
4062 value_hash (value, width, 0));
4066 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4067 size_t ix[PIVOT_N_AXES])
4069 if (a < PIVOT_N_AXES)
4071 size_t limit = MAX (t->stacks[a].n, 1);
4072 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4073 ctables_table_add_section (t, a + 1, ix);
4077 struct ctables_section *s = &t->sections[t->n_sections++];
4078 *s = (struct ctables_section) {
4080 .cells = HMAP_INITIALIZER (s->cells),
4082 for (a = 0; a < PIVOT_N_AXES; a++)
4085 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4087 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4088 for (size_t i = 0; i < nest->n; i++)
4089 hmap_init (&s->occurrences[a][i]);
4091 for (size_t i = 0; i < N_CTDTS; i++)
4092 hmap_init (&s->domains[i]);
4097 ctpo_add (double a, double b)
4103 ctpo_sub (double a, double b)
4109 ctpo_mul (double a, double b)
4115 ctpo_div (double a, double b)
4117 return b ? a / b : SYSMIS;
4121 ctpo_pow (double a, double b)
4123 int save_errno = errno;
4125 double result = pow (a, b);
4133 ctpo_neg (double a, double b UNUSED)
4138 struct ctables_pcexpr_evaluate_ctx
4140 const struct ctables_cell *cell;
4141 const struct ctables_section *section;
4142 const struct ctables_categories *cats;
4143 enum pivot_axis_type pc_a;
4146 enum fmt_type parse_format;
4149 static double ctables_pcexpr_evaluate (
4150 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4153 ctables_pcexpr_evaluate_nonterminal (
4154 const struct ctables_pcexpr_evaluate_ctx *ctx,
4155 const struct ctables_pcexpr *e, size_t n_args,
4156 double evaluate (double, double))
4158 double args[2] = { 0, 0 };
4159 for (size_t i = 0; i < n_args; i++)
4161 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4162 if (!isfinite (args[i]) || args[i] == SYSMIS)
4165 return evaluate (args[0], args[1]);
4169 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4170 const struct ctables_cell_value *pc_cv)
4172 const struct ctables_section *s = ctx->section;
4175 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4177 const struct ctables_nest *nest = s->nests[a];
4178 for (size_t i = 0; i < nest->n; i++)
4179 if (i != nest->scale_idx)
4181 const struct ctables_cell_value *cv
4182 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4183 : &ctx->cell->axes[a].cvs[i]);
4184 hash = hash_pointer (cv->category, hash);
4185 if (cv->category->type != CCT_TOTAL
4186 && cv->category->type != CCT_SUBTOTAL
4187 && cv->category->type != CCT_POSTCOMPUTE)
4188 hash = value_hash (&cv->value,
4189 var_get_width (nest->vars[i]), hash);
4193 struct ctables_cell *tc;
4194 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4196 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4198 const struct ctables_nest *nest = s->nests[a];
4199 for (size_t i = 0; i < nest->n; i++)
4200 if (i != nest->scale_idx)
4202 const struct ctables_cell_value *p_cv
4203 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4204 : &ctx->cell->axes[a].cvs[i]);
4205 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4206 if (p_cv->category != t_cv->category
4207 || (p_cv->category->type != CCT_TOTAL
4208 && p_cv->category->type != CCT_SUBTOTAL
4209 && p_cv->category->type != CCT_POSTCOMPUTE
4210 && !value_equal (&p_cv->value,
4212 var_get_width (nest->vars[i]))))
4224 const struct ctables_table *t = s->table;
4225 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4226 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4227 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4228 &specs->specs[ctx->summary_idx]);
4232 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4233 const struct ctables_pcexpr *e)
4240 case CTPO_CAT_NRANGE:
4241 case CTPO_CAT_SRANGE:
4243 struct ctables_cell_value cv = {
4244 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4246 assert (cv.category != NULL);
4248 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4249 const struct ctables_occurrence *o;
4252 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4253 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4254 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4256 cv.value = o->value;
4257 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4262 case CTPO_CAT_NUMBER:
4263 case CTPO_CAT_MISSING:
4264 case CTPO_CAT_OTHERNM:
4265 case CTPO_CAT_SUBTOTAL:
4266 case CTPO_CAT_TOTAL:
4268 struct ctables_cell_value cv = {
4269 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4270 .value = { .f = e->number },
4272 assert (cv.category != NULL);
4273 return ctables_pcexpr_evaluate_category (ctx, &cv);
4276 case CTPO_CAT_STRING:
4278 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4280 if (width > e->string.length)
4282 s = xmalloc (width);
4283 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4285 struct ctables_cell_value cv = {
4286 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4287 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4289 assert (cv.category != NULL);
4290 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4296 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4299 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4302 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4305 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4308 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4311 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4317 static const struct ctables_category *
4318 ctables_cell_postcompute (const struct ctables_section *s,
4319 const struct ctables_cell *cell,
4320 enum pivot_axis_type *pc_a_p,
4323 assert (cell->postcompute);
4324 const struct ctables_category *pc_cat = NULL;
4325 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4326 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4328 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4329 if (cv->category->type == CCT_POSTCOMPUTE)
4333 /* Multiple postcomputes cross each other. The value is
4338 pc_cat = cv->category;
4342 *pc_a_idx_p = pc_a_idx;
4346 assert (pc_cat != NULL);
4351 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4352 const struct ctables_cell *cell,
4353 const struct ctables_summary_spec *ss,
4354 struct fmt_spec *format,
4355 bool *is_ctables_format,
4358 enum pivot_axis_type pc_a = 0;
4359 size_t pc_a_idx = 0;
4360 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4361 s, cell, &pc_a, &pc_a_idx);
4365 const struct ctables_postcompute *pc = pc_cat->pc;
4368 for (size_t i = 0; i < pc->specs->n; i++)
4370 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4371 if (ss->function == ss2->function
4372 && ss->percentile == ss2->percentile)
4374 *format = ss2->format;
4375 *is_ctables_format = ss2->is_ctables_format;
4381 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4382 const struct ctables_categories *cats = s->table->categories[
4383 var_get_dict_index (var)];
4384 struct ctables_pcexpr_evaluate_ctx ctx = {
4389 .pc_a_idx = pc_a_idx,
4390 .summary_idx = summary_idx,
4391 .parse_format = pc_cat->parse_format,
4393 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4397 ctables_format (double d, const struct fmt_spec *format,
4398 const struct fmt_settings *settings)
4400 const union value v = { .f = d };
4401 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4403 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4404 produce the results we want for negative numbers, putting the negative
4405 sign in the wrong spot, before the prefix instead of after it. We can't,
4406 in fact, produce the desired results using a custom-currency
4407 specification. Instead, we postprocess the output, moving the negative
4410 NEQUAL: "-N=3" => "N=-3"
4411 PAREN: "-(3)" => "(-3)"
4412 PCTPAREN: "-(3%)" => "(-3%)"
4414 This transformation doesn't affect NEGPAREN. */
4415 char *minus_src = strchr (s, '-');
4416 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4418 char *n_equals = strstr (s, "N=");
4419 char *lparen = strchr (s, '(');
4420 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4422 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4428 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4430 for (size_t i = 0; i < t->stacks[a].n; i++)
4432 struct ctables_nest *nest = &t->stacks[a].nests[i];
4433 if (nest->n != 1 || nest->scale_idx != 0)
4436 enum ctables_vlabel vlabel
4437 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4438 if (vlabel != CTVL_NONE)
4445 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4447 struct pivot_table *pt = pivot_table_create__ (
4449 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4450 : pivot_value_new_text (N_("Custom Tables"))),
4453 pivot_table_set_caption (
4454 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4456 pivot_table_set_corner_text (
4457 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4459 bool summary_dimension = (t->summary_axis != t->slabels_axis
4460 || (!t->slabels_visible
4461 && t->summary_specs.n > 1));
4462 if (summary_dimension)
4464 struct pivot_dimension *d = pivot_dimension_create (
4465 pt, t->slabels_axis, N_("Statistics"));
4466 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4467 if (!t->slabels_visible)
4468 d->hide_all_labels = true;
4469 for (size_t i = 0; i < specs->n; i++)
4470 pivot_category_create_leaf (
4471 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4474 bool categories_dimension = t->clabels_example != NULL;
4475 if (categories_dimension)
4477 struct pivot_dimension *d = pivot_dimension_create (
4478 pt, t->label_axis[t->clabels_from_axis],
4479 t->clabels_from_axis == PIVOT_AXIS_ROW
4480 ? N_("Row Categories")
4481 : N_("Column Categories"));
4482 const struct variable *var = t->clabels_example;
4483 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4484 for (size_t i = 0; i < t->n_clabels_values; i++)
4486 const struct ctables_value *value = t->clabels_values[i];
4487 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4488 assert (cat != NULL);
4489 pivot_category_create_leaf (
4490 d->root, ctables_category_create_value_label (c, cat,
4496 pivot_table_set_look (pt, ct->look);
4497 struct pivot_dimension *d[PIVOT_N_AXES];
4498 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4500 static const char *names[] = {
4501 [PIVOT_AXIS_ROW] = N_("Rows"),
4502 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4503 [PIVOT_AXIS_LAYER] = N_("Layers"),
4505 d[a] = (t->axes[a] || a == t->summary_axis
4506 ? pivot_dimension_create (pt, a, names[a])
4511 assert (t->axes[a]);
4513 for (size_t i = 0; i < t->stacks[a].n; i++)
4515 struct ctables_nest *nest = &t->stacks[a].nests[i];
4516 struct ctables_section **sections = xnmalloc (t->n_sections,
4518 size_t n_sections = 0;
4520 size_t n_total_cells = 0;
4521 size_t max_depth = 0;
4522 for (size_t j = 0; j < t->n_sections; j++)
4523 if (t->sections[j].nests[a] == nest)
4525 struct ctables_section *s = &t->sections[j];
4526 sections[n_sections++] = s;
4527 n_total_cells += hmap_count (&s->cells);
4529 size_t depth = s->nests[a]->n;
4530 max_depth = MAX (depth, max_depth);
4533 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4535 size_t n_sorted = 0;
4537 for (size_t j = 0; j < n_sections; j++)
4539 struct ctables_section *s = sections[j];
4541 struct ctables_cell *cell;
4542 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4544 sorted[n_sorted++] = cell;
4545 assert (n_sorted <= n_total_cells);
4548 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4549 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4552 if (a == PIVOT_AXIS_ROW)
4554 size_t ids[N_CTDTS];
4555 memset (ids, 0, sizeof ids);
4556 for (size_t j = 0; j < n_sorted; j++)
4558 struct ctables_cell *cell = sorted[j];
4559 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4561 struct ctables_domain *domain = cell->domains[dt];
4562 if (!domain->sequence)
4563 domain->sequence = ++ids[dt];
4570 for (size_t j = 0; j < n_sorted; j++)
4572 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4577 struct ctables_level
4579 enum ctables_level_type
4581 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4582 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4583 CTL_SUMMARY, /* Summary functions. */
4587 enum settings_value_show vlabel; /* CTL_VAR only. */
4590 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4591 size_t n_levels = 0;
4592 for (size_t k = 0; k < nest->n; k++)
4594 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4595 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4597 if (vlabel != CTVL_NONE)
4599 levels[n_levels++] = (struct ctables_level) {
4601 .vlabel = (enum settings_value_show) vlabel,
4606 if (nest->scale_idx != k
4607 && (k != nest->n - 1 || t->label_axis[a] == a))
4609 levels[n_levels++] = (struct ctables_level) {
4610 .type = CTL_CATEGORY,
4616 if (!summary_dimension && a == t->slabels_axis)
4618 levels[n_levels++] = (struct ctables_level) {
4619 .type = CTL_SUMMARY,
4620 .var_idx = SIZE_MAX,
4624 /* Pivot categories:
4626 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4627 - category for nest->vars[0], if nest->scale_idx != 0
4628 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4629 - category for nest->vars[1], if nest->scale_idx != 1
4631 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4632 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4633 - summary function, if 'a == t->slabels_axis && a ==
4636 Additional dimensions:
4638 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4640 - If 't->label_axis[b] == a' for some 'b != a', add a category
4645 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4647 for (size_t j = 0; j < n_sorted; j++)
4649 struct ctables_cell *cell = sorted[j];
4650 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4652 size_t n_common = 0;
4655 for (; n_common < n_levels; n_common++)
4657 const struct ctables_level *level = &levels[n_common];
4658 if (level->type == CTL_CATEGORY)
4660 size_t var_idx = level->var_idx;
4661 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4662 if (prev->axes[a].cvs[var_idx].category != c)
4664 else if (c->type != CCT_SUBTOTAL
4665 && c->type != CCT_TOTAL
4666 && c->type != CCT_POSTCOMPUTE
4667 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4668 &cell->axes[a].cvs[var_idx].value,
4669 var_get_type (nest->vars[var_idx])))
4675 for (size_t k = n_common; k < n_levels; k++)
4677 const struct ctables_level *level = &levels[k];
4678 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4679 if (level->type == CTL_SUMMARY)
4681 assert (k == n_levels - 1);
4683 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4684 for (size_t m = 0; m < specs->n; m++)
4686 int leaf = pivot_category_create_leaf (
4687 parent, ctables_summary_label (&specs->specs[m],
4695 const struct variable *var = nest->vars[level->var_idx];
4696 struct pivot_value *label;
4697 if (level->type == CTL_VAR)
4699 label = pivot_value_new_variable (var);
4700 label->variable.show = level->vlabel;
4702 else if (level->type == CTL_CATEGORY)
4704 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4705 label = ctables_category_create_value_label (
4706 t->categories[var_get_dict_index (var)],
4707 cv->category, var, &cv->value);
4712 if (k == n_levels - 1)
4713 prev_leaf = pivot_category_create_leaf (parent, label);
4715 groups[k] = pivot_category_create_group__ (parent, label);
4719 cell->axes[a].leaf = prev_leaf;
4728 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4732 size_t n_total_cells = 0;
4733 for (size_t j = 0; j < t->n_sections; j++)
4734 n_total_cells += hmap_count (&t->sections[j].cells);
4736 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4737 size_t n_sorted = 0;
4738 for (size_t j = 0; j < t->n_sections; j++)
4740 const struct ctables_section *s = &t->sections[j];
4741 struct ctables_cell *cell;
4742 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4744 sorted[n_sorted++] = cell;
4746 assert (n_sorted <= n_total_cells);
4747 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4749 size_t ids[N_CTDTS];
4750 memset (ids, 0, sizeof ids);
4751 for (size_t j = 0; j < n_sorted; j++)
4753 struct ctables_cell *cell = sorted[j];
4754 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4756 struct ctables_domain *domain = cell->domains[dt];
4757 if (!domain->sequence)
4758 domain->sequence = ++ids[dt];
4765 for (size_t i = 0; i < t->n_sections; i++)
4767 struct ctables_section *s = &t->sections[i];
4769 struct ctables_cell *cell;
4770 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4775 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4776 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4777 for (size_t j = 0; j < specs->n; j++)
4780 size_t n_dindexes = 0;
4782 if (summary_dimension)
4783 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4785 if (categories_dimension)
4787 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4788 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4789 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4790 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4793 dindexes[n_dindexes++] = ctv->leaf;
4796 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4799 int leaf = cell->axes[a].leaf;
4800 if (a == t->summary_axis && !summary_dimension)
4802 dindexes[n_dindexes++] = leaf;
4805 const struct ctables_summary_spec *ss = &specs->specs[j];
4807 struct fmt_spec format = specs->specs[j].format;
4808 bool is_ctables_format = ss->is_ctables_format;
4809 double d = (cell->postcompute
4810 ? ctables_cell_calculate_postcompute (
4811 s, cell, ss, &format, &is_ctables_format, j)
4812 : ctables_summary_value (cell, &cell->summaries[j],
4815 struct pivot_value *value;
4816 if (ct->hide_threshold != 0
4817 && d < ct->hide_threshold
4818 && ctables_summary_function_is_count (ss->function))
4820 value = pivot_value_new_user_text_nocopy (
4821 xasprintf ("<%d", ct->hide_threshold));
4823 else if (d == 0 && ct->zero)
4824 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4825 else if (d == SYSMIS && ct->missing)
4826 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4827 else if (is_ctables_format)
4828 value = pivot_value_new_user_text_nocopy (
4829 ctables_format (d, &format, &ct->ctables_formats));
4832 value = pivot_value_new_number (d);
4833 value->numeric.format = format;
4835 /* XXX should text values be right-justified? */
4836 pivot_table_put (pt, dindexes, n_dindexes, value);
4841 pivot_table_submit (pt);
4845 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4847 enum pivot_axis_type label_pos = t->label_axis[a];
4851 t->clabels_from_axis = a;
4853 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4854 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4856 const struct ctables_stack *stack = &t->stacks[a];
4860 const struct ctables_nest *n0 = &stack->nests[0];
4863 assert (stack->n == 1);
4867 const struct variable *v0 = n0->vars[n0->n - 1];
4868 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4869 t->clabels_example = v0;
4871 for (size_t i = 0; i < c0->n_cats; i++)
4872 if (c0->cats[i].type == CCT_FUNCTION)
4874 msg (SE, _("%s=%s is not allowed with sorting based "
4875 "on a summary function."),
4876 subcommand_name, pos_name);
4879 if (n0->n - 1 == n0->scale_idx)
4881 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4882 "but %s is a scale variable."),
4883 subcommand_name, pos_name, var_get_name (v0));
4887 for (size_t i = 1; i < stack->n; i++)
4889 const struct ctables_nest *ni = &stack->nests[i];
4891 const struct variable *vi = ni->vars[ni->n - 1];
4892 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4894 if (ni->n - 1 == ni->scale_idx)
4896 msg (SE, _("%s=%s requires the variables to be moved to be "
4897 "categorical, but %s is a scale variable."),
4898 subcommand_name, pos_name, var_get_name (vi));
4901 if (var_get_width (v0) != var_get_width (vi))
4903 msg (SE, _("%s=%s requires the variables to be "
4904 "moved to have the same width, but %s has "
4905 "width %d and %s has width %d."),
4906 subcommand_name, pos_name,
4907 var_get_name (v0), var_get_width (v0),
4908 var_get_name (vi), var_get_width (vi));
4911 if (!val_labs_equal (var_get_value_labels (v0),
4912 var_get_value_labels (vi)))
4914 msg (SE, _("%s=%s requires the variables to be "
4915 "moved to have the same value labels, but %s "
4916 "and %s have different value labels."),
4917 subcommand_name, pos_name,
4918 var_get_name (v0), var_get_name (vi));
4921 if (!ctables_categories_equal (c0, ci))
4923 msg (SE, _("%s=%s requires the variables to be "
4924 "moved to have the same category "
4925 "specifications, but %s and %s have different "
4926 "category specifications."),
4927 subcommand_name, pos_name,
4928 var_get_name (v0), var_get_name (vi));
4937 add_sum_var (struct variable *var,
4938 struct variable ***sum_vars, size_t *n, size_t *allocated)
4940 for (size_t i = 0; i < *n; i++)
4941 if (var == (*sum_vars)[i])
4944 if (*n >= *allocated)
4945 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4946 (*sum_vars)[*n] = var;
4951 enumerate_sum_vars (const struct ctables_axis *a,
4952 struct variable ***sum_vars, size_t *n, size_t *allocated)
4960 for (size_t i = 0; i < N_CSVS; i++)
4961 for (size_t j = 0; j < a->specs[i].n; j++)
4963 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4964 if (ctables_function_is_pctsum (spec->function))
4965 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4971 for (size_t i = 0; i < 2; i++)
4972 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4978 ctables_prepare_table (struct ctables_table *t)
4980 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4983 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4985 for (size_t j = 0; j < t->stacks[a].n; j++)
4987 struct ctables_nest *nest = &t->stacks[a].nests[j];
4988 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4990 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4991 nest->n_domains[dt] = 0;
4993 for (size_t k = 0; k < nest->n; k++)
4995 if (k == nest->scale_idx)
5004 if (a != PIVOT_AXIS_LAYER)
5011 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
5012 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
5013 : a == PIVOT_AXIS_ROW)
5015 if (k == nest->n - 1
5016 || (nest->scale_idx == nest->n - 1
5017 && k == nest->n - 2))
5023 if (a == PIVOT_AXIS_COLUMN)
5028 if (a == PIVOT_AXIS_ROW)
5033 nest->domains[dt][nest->n_domains[dt]++] = k;
5040 struct ctables_nest *nest = xmalloc (sizeof *nest);
5041 *nest = (struct ctables_nest) { .n = 0 };
5042 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5044 /* There's no point in moving labels away from an axis that has no
5045 labels, so avoid dealing with the special cases around that. */
5046 t->label_axis[a] = a;
5049 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5050 for (size_t i = 0; i < stack->n; i++)
5052 struct ctables_nest *nest = &stack->nests[i];
5053 if (!nest->specs[CSV_CELL].n)
5055 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
5056 specs->specs = xmalloc (sizeof *specs->specs);
5059 enum ctables_summary_function function
5060 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
5062 *specs->specs = (struct ctables_summary_spec) {
5063 .function = function,
5064 .format = ctables_summary_default_format (function, specs->var),
5067 specs->var = nest->vars[0];
5069 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5070 &nest->specs[CSV_CELL]);
5072 else if (!nest->specs[CSV_TOTAL].n)
5073 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5074 &nest->specs[CSV_CELL]);
5076 if (t->ctables->smissing_listwise)
5078 struct variable **listwise_vars = NULL;
5080 size_t allocated = 0;
5082 for (size_t j = nest->group_head; j < stack->n; j++)
5084 const struct ctables_nest *other_nest = &stack->nests[j];
5085 if (other_nest->group_head != nest->group_head)
5088 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5091 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5092 sizeof *listwise_vars);
5093 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5096 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5099 listwise_vars = xmemdup (listwise_vars,
5100 n * sizeof *listwise_vars);
5101 nest->specs[sv].listwise_vars = listwise_vars;
5102 nest->specs[sv].n_listwise_vars = n;
5107 struct ctables_summary_spec_set *merged = &t->summary_specs;
5108 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5110 for (size_t j = 0; j < stack->n; j++)
5112 const struct ctables_nest *nest = &stack->nests[j];
5114 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5115 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5120 struct merge_item min = items[0];
5121 for (size_t j = 1; j < n_left; j++)
5122 if (merge_item_compare_3way (&items[j], &min) < 0)
5125 if (merged->n >= merged->allocated)
5126 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5127 sizeof *merged->specs);
5128 merged->specs[merged->n++] = min.set->specs[min.ofs];
5130 for (size_t j = 0; j < n_left; )
5132 if (merge_item_compare_3way (&items[j], &min) == 0)
5134 struct merge_item *item = &items[j];
5135 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5136 if (++item->ofs >= item->set->n)
5138 items[j] = items[--n_left];
5148 for (size_t j = 0; j < merged->n; j++)
5149 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5151 for (size_t j = 0; j < stack->n; j++)
5153 const struct ctables_nest *nest = &stack->nests[j];
5154 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5156 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5157 for (size_t k = 0; k < specs->n; k++)
5158 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5159 specs->specs[k].axis_idx);
5165 size_t allocated_sum_vars = 0;
5166 enumerate_sum_vars (t->axes[t->summary_axis],
5167 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5169 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5170 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5174 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5175 enum pivot_axis_type a)
5177 struct ctables_stack *stack = &t->stacks[a];
5178 for (size_t i = 0; i < stack->n; i++)
5180 const struct ctables_nest *nest = &stack->nests[i];
5181 const struct variable *var = nest->vars[nest->n - 1];
5182 const union value *value = case_data (c, var);
5184 if (var_is_numeric (var) && value->f == SYSMIS)
5187 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5189 ctables_value_insert (t, value, var_get_width (var));
5194 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5196 const struct ctables_value *const *ap = a_;
5197 const struct ctables_value *const *bp = b_;
5198 const struct ctables_value *a = *ap;
5199 const struct ctables_value *b = *bp;
5200 const int *width = width_;
5201 return value_compare_3way (&a->value, &b->value, *width);
5205 ctables_sort_clabels_values (struct ctables_table *t)
5207 const struct variable *v0 = t->clabels_example;
5208 int width = var_get_width (v0);
5210 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5213 const struct val_labs *val_labs = var_get_value_labels (v0);
5214 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5215 vl = val_labs_next (val_labs, vl))
5216 if (ctables_categories_match (c0, &vl->value, v0))
5217 ctables_value_insert (t, &vl->value, width);
5220 size_t n = hmap_count (&t->clabels_values_map);
5221 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5223 struct ctables_value *clv;
5225 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5226 t->clabels_values[i++] = clv;
5227 t->n_clabels_values = n;
5230 sort (t->clabels_values, n, sizeof *t->clabels_values,
5231 compare_clabels_values_3way, &width);
5233 for (size_t i = 0; i < n; i++)
5234 t->clabels_values[i]->leaf = i;
5238 ctables_add_category_occurrences (const struct variable *var,
5239 struct hmap *occurrences,
5240 const struct ctables_categories *cats)
5242 const struct val_labs *val_labs = var_get_value_labels (var);
5244 for (size_t i = 0; i < cats->n_cats; i++)
5246 const struct ctables_category *c = &cats->cats[i];
5250 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5256 int width = var_get_width (var);
5258 value_init (&value, width);
5259 value_copy_buf_rpad (&value, width,
5260 CHAR_CAST (uint8_t *, c->string.string),
5261 c->string.length, ' ');
5262 ctables_add_occurrence (var, &value, occurrences);
5263 value_destroy (&value, width);
5268 assert (var_is_numeric (var));
5269 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5270 vl = val_labs_next (val_labs, vl))
5271 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5272 ctables_add_occurrence (var, &vl->value, occurrences);
5276 assert (var_is_alpha (var));
5277 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5278 vl = val_labs_next (val_labs, vl))
5279 if (in_string_range (&vl->value, var, c->srange))
5280 ctables_add_occurrence (var, &vl->value, occurrences);
5284 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5285 vl = val_labs_next (val_labs, vl))
5286 if (var_is_value_missing (var, &vl->value))
5287 ctables_add_occurrence (var, &vl->value, occurrences);
5291 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5292 vl = val_labs_next (val_labs, vl))
5293 ctables_add_occurrence (var, &vl->value, occurrences);
5296 case CCT_POSTCOMPUTE:
5306 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5307 vl = val_labs_next (val_labs, vl))
5308 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5309 ctables_add_occurrence (var, &vl->value, occurrences);
5312 case CCT_EXCLUDED_MISSING:
5319 ctables_section_recurse_add_empty_categories (
5320 struct ctables_section *s,
5321 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5322 enum pivot_axis_type a, size_t a_idx)
5324 if (a >= PIVOT_N_AXES)
5325 ctables_cell_insert__ (s, c, cats);
5326 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5327 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5330 const struct variable *var = s->nests[a]->vars[a_idx];
5331 const struct ctables_categories *categories = s->table->categories[
5332 var_get_dict_index (var)];
5333 int width = var_get_width (var);
5334 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5335 const struct ctables_occurrence *o;
5336 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5338 union value *value = case_data_rw (c, var);
5339 value_destroy (value, width);
5340 value_clone (value, &o->value, width);
5341 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5342 assert (cats[a][a_idx] != NULL);
5343 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5346 for (size_t i = 0; i < categories->n_cats; i++)
5348 const struct ctables_category *cat = &categories->cats[i];
5349 if (cat->type == CCT_POSTCOMPUTE)
5351 cats[a][a_idx] = cat;
5352 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5359 ctables_section_add_empty_categories (struct ctables_section *s)
5361 bool show_empty = false;
5362 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5364 for (size_t k = 0; k < s->nests[a]->n; k++)
5365 if (k != s->nests[a]->scale_idx)
5367 const struct variable *var = s->nests[a]->vars[k];
5368 const struct ctables_categories *cats = s->table->categories[
5369 var_get_dict_index (var)];
5370 if (cats->show_empty)
5373 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5379 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5380 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5381 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5386 ctables_section_clear (struct ctables_section *s)
5388 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5390 const struct ctables_nest *nest = s->nests[a];
5391 for (size_t i = 0; i < nest->n; i++)
5392 if (i != nest->scale_idx)
5394 const struct variable *var = nest->vars[i];
5395 int width = var_get_width (var);
5396 struct ctables_occurrence *o, *next;
5397 struct hmap *map = &s->occurrences[a][i];
5398 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5400 value_destroy (&o->value, width);
5401 hmap_delete (map, &o->node);
5408 struct ctables_cell *cell, *next_cell;
5409 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5411 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5413 const struct ctables_nest *nest = s->nests[a];
5414 for (size_t i = 0; i < nest->n; i++)
5415 if (i != nest->scale_idx)
5416 value_destroy (&cell->axes[a].cvs[i].value,
5417 var_get_width (nest->vars[i]));
5418 free (cell->axes[a].cvs);
5421 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5422 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5423 for (size_t i = 0; i < specs->n; i++)
5424 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5425 free (cell->summaries);
5427 hmap_delete (&s->cells, &cell->node);
5430 hmap_shrink (&s->cells);
5432 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5434 struct ctables_domain *domain, *next_domain;
5435 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5438 free (domain->sums);
5439 hmap_delete (&s->domains[dt], &domain->node);
5442 hmap_shrink (&s->domains[dt]);
5447 ctables_section_uninit (struct ctables_section *s)
5449 ctables_section_clear (s);
5451 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5453 struct ctables_nest *nest = s->nests[a];
5454 for (size_t i = 0; i < nest->n; i++)
5455 hmap_destroy (&s->occurrences[a][i]);
5456 free (s->occurrences[a]);
5459 hmap_destroy (&s->cells);
5460 for (size_t i = 0; i < N_CTDTS; i++)
5461 hmap_destroy (&s->domains[i]);
5465 ctables_table_clear (struct ctables_table *t)
5467 for (size_t i = 0; i < t->n_sections; i++)
5468 ctables_section_clear (&t->sections[i]);
5470 if (t->clabels_example)
5472 int width = var_get_width (t->clabels_example);
5473 struct ctables_value *value, *next_value;
5474 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5475 &t->clabels_values_map)
5477 value_destroy (&value->value, width);
5478 hmap_delete (&t->clabels_values_map, &value->node);
5481 hmap_shrink (&t->clabels_values_map);
5483 free (t->clabels_values);
5484 t->clabels_values = NULL;
5485 t->n_clabels_values = 0;
5490 ctables_execute (struct dataset *ds, struct casereader *input,
5493 for (size_t i = 0; i < ct->n_tables; i++)
5495 struct ctables_table *t = ct->tables[i];
5496 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5497 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5498 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5499 sizeof *t->sections);
5500 size_t ix[PIVOT_N_AXES];
5501 ctables_table_add_section (t, 0, ix);
5504 struct dictionary *dict = dataset_dict (ds);
5505 struct casegrouper *grouper
5506 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5507 ? casegrouper_create_splits (input, dict)
5508 : casegrouper_create_vars (input, NULL, 0));
5509 struct casereader *group;
5510 while (casegrouper_get_next_group (grouper, &group))
5512 /* Output SPLIT FILE variables. */
5513 struct ccase *c = casereader_peek (group, 0);
5516 output_split_file_values (ds, c);
5520 bool warn_on_invalid = true;
5521 for (c = casereader_read (group); c;
5522 case_unref (c), c = casereader_read (group))
5524 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5525 double e_weight = (ct->e_weight
5526 ? var_force_valid_weight (ct->e_weight,
5527 case_num (c, ct->e_weight),
5531 for (size_t i = 0; i < ct->n_tables; i++)
5533 struct ctables_table *t = ct->tables[i];
5535 for (size_t j = 0; j < t->n_sections; j++)
5536 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5538 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5539 if (t->label_axis[a] != a)
5540 ctables_insert_clabels_values (t, c, a);
5543 casereader_destroy (group);
5545 for (size_t i = 0; i < ct->n_tables; i++)
5547 struct ctables_table *t = ct->tables[i];
5549 if (t->clabels_example)
5550 ctables_sort_clabels_values (t);
5552 for (size_t j = 0; j < t->n_sections; j++)
5553 ctables_section_add_empty_categories (&t->sections[j]);
5555 ctables_table_output (ct, t);
5556 ctables_table_clear (t);
5559 return casegrouper_destroy (grouper);
5564 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5565 struct dictionary *);
5568 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5574 case CTPO_CAT_STRING:
5575 ss_dealloc (&e->string);
5578 case CTPO_CAT_SRANGE:
5579 for (size_t i = 0; i < 2; i++)
5580 ss_dealloc (&e->srange[i]);
5589 for (size_t i = 0; i < 2; i++)
5590 ctables_pcexpr_destroy (e->subs[i]);
5594 case CTPO_CAT_NUMBER:
5595 case CTPO_CAT_NRANGE:
5596 case CTPO_CAT_MISSING:
5597 case CTPO_CAT_OTHERNM:
5598 case CTPO_CAT_SUBTOTAL:
5599 case CTPO_CAT_TOTAL:
5603 msg_location_destroy (e->location);
5608 static struct ctables_pcexpr *
5609 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5610 struct ctables_pcexpr *sub0,
5611 struct ctables_pcexpr *sub1)
5613 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5614 *e = (struct ctables_pcexpr) {
5616 .subs = { sub0, sub1 },
5617 .location = msg_location_merged (sub0->location, sub1->location),
5622 /* How to parse an operator. */
5625 enum token_type token;
5626 enum ctables_postcompute_op op;
5629 static const struct operator *
5630 ctables_pcexpr_match_operator (struct lexer *lexer,
5631 const struct operator ops[], size_t n_ops)
5633 for (const struct operator *op = ops; op < ops + n_ops; op++)
5634 if (lex_token (lexer) == op->token)
5636 if (op->token != T_NEG_NUM)
5645 static struct ctables_pcexpr *
5646 ctables_pcexpr_parse_binary_operators__ (
5647 struct lexer *lexer, struct dictionary *dict,
5648 const struct operator ops[], size_t n_ops,
5649 parse_recursively_func *parse_next_level,
5650 const char *chain_warning, struct ctables_pcexpr *lhs)
5652 for (int op_count = 0; ; op_count++)
5654 const struct operator *op
5655 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5658 if (op_count > 1 && chain_warning)
5659 msg_at (SW, lhs->location, "%s", chain_warning);
5664 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5667 ctables_pcexpr_destroy (lhs);
5671 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5675 static struct ctables_pcexpr *
5676 ctables_pcexpr_parse_binary_operators (
5677 struct lexer *lexer, struct dictionary *dict,
5678 const struct operator ops[], size_t n_ops,
5679 parse_recursively_func *parse_next_level, const char *chain_warning)
5681 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5685 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5687 chain_warning, lhs);
5690 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5691 struct dictionary *);
5693 static struct ctables_pcexpr
5694 ctpo_cat_nrange (double low, double high)
5696 return (struct ctables_pcexpr) {
5697 .op = CTPO_CAT_NRANGE,
5698 .nrange = { low, high },
5702 static struct ctables_pcexpr
5703 ctpo_cat_srange (struct substring low, struct substring high)
5705 return (struct ctables_pcexpr) {
5706 .op = CTPO_CAT_SRANGE,
5707 .srange = { low, high },
5711 static struct ctables_pcexpr *
5712 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5714 int start_ofs = lex_ofs (lexer);
5715 struct ctables_pcexpr e;
5716 if (lex_is_number (lexer))
5718 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5719 .number = lex_number (lexer) };
5722 else if (lex_match_id (lexer, "MISSING"))
5723 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5724 else if (lex_match_id (lexer, "OTHERNM"))
5725 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5726 else if (lex_match_id (lexer, "TOTAL"))
5727 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5728 else if (lex_match_id (lexer, "SUBTOTAL"))
5730 size_t subtotal_index = 0;
5731 if (lex_match (lexer, T_LBRACK))
5733 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5735 subtotal_index = lex_integer (lexer);
5737 if (!lex_force_match (lexer, T_RBRACK))
5740 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5741 .subtotal_index = subtotal_index };
5743 else if (lex_match (lexer, T_LBRACK))
5745 if (lex_match_id (lexer, "LO"))
5747 if (!lex_force_match_id (lexer, "THRU"))
5750 if (lex_is_string (lexer))
5752 struct substring low = { .string = NULL };
5753 struct substring high = parse_substring (lexer, dict);
5754 e = ctpo_cat_srange (low, high);
5758 if (!lex_force_num (lexer))
5760 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5764 else if (lex_is_number (lexer))
5766 double number = lex_number (lexer);
5768 if (lex_match_id (lexer, "THRU"))
5770 if (lex_match_id (lexer, "HI"))
5771 e = ctpo_cat_nrange (number, DBL_MAX);
5774 if (!lex_force_num (lexer))
5776 e = ctpo_cat_nrange (number, lex_number (lexer));
5781 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5784 else if (lex_is_string (lexer))
5786 struct substring s = parse_substring (lexer, dict);
5788 if (lex_match_id (lexer, "THRU"))
5790 struct substring high;
5792 if (lex_match_id (lexer, "HI"))
5793 high = (struct substring) { .string = NULL };
5796 if (!lex_force_string (lexer))
5801 high = parse_substring (lexer, dict);
5804 e = ctpo_cat_srange (s, high);
5807 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5811 lex_error (lexer, NULL);
5815 if (!lex_force_match (lexer, T_RBRACK))
5817 if (e.op == CTPO_CAT_STRING)
5818 ss_dealloc (&e.string);
5819 else if (e.op == CTPO_CAT_SRANGE)
5821 ss_dealloc (&e.srange[0]);
5822 ss_dealloc (&e.srange[1]);
5827 else if (lex_match (lexer, T_LPAREN))
5829 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5832 if (!lex_force_match (lexer, T_RPAREN))
5834 ctables_pcexpr_destroy (ep);
5841 lex_error (lexer, NULL);
5845 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5846 return xmemdup (&e, sizeof e);
5849 static struct ctables_pcexpr *
5850 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5851 struct lexer *lexer, int start_ofs)
5853 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5854 *e = (struct ctables_pcexpr) {
5857 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5862 static struct ctables_pcexpr *
5863 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5865 static const struct operator op = { T_EXP, CTPO_POW };
5867 const char *chain_warning =
5868 _("The exponentiation operator (`**') is left-associative: "
5869 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5870 "To disable this warning, insert parentheses.");
5872 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5873 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5874 ctables_pcexpr_parse_primary,
5877 /* Special case for situations like "-5**6", which must be parsed as
5880 int start_ofs = lex_ofs (lexer);
5881 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5882 *lhs = (struct ctables_pcexpr) {
5883 .op = CTPO_CONSTANT,
5884 .number = -lex_tokval (lexer),
5885 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5889 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5890 lexer, dict, &op, 1,
5891 ctables_pcexpr_parse_primary, chain_warning, lhs);
5895 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5898 /* Parses the unary minus level. */
5899 static struct ctables_pcexpr *
5900 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5902 int start_ofs = lex_ofs (lexer);
5903 if (!lex_match (lexer, T_DASH))
5904 return ctables_pcexpr_parse_exp (lexer, dict);
5906 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5910 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5913 /* Parses the multiplication and division level. */
5914 static struct ctables_pcexpr *
5915 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5917 static const struct operator ops[] =
5919 { T_ASTERISK, CTPO_MUL },
5920 { T_SLASH, CTPO_DIV },
5923 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5924 sizeof ops / sizeof *ops,
5925 ctables_pcexpr_parse_neg, NULL);
5928 /* Parses the addition and subtraction level. */
5929 static struct ctables_pcexpr *
5930 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5932 static const struct operator ops[] =
5934 { T_PLUS, CTPO_ADD },
5935 { T_DASH, CTPO_SUB },
5936 { T_NEG_NUM, CTPO_ADD },
5939 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5940 ops, sizeof ops / sizeof *ops,
5941 ctables_pcexpr_parse_mul, NULL);
5944 static struct ctables_postcompute *
5945 ctables_find_postcompute (struct ctables *ct, const char *name)
5947 struct ctables_postcompute *pc;
5948 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5949 utf8_hash_case_string (name, 0), &ct->postcomputes)
5950 if (!utf8_strcasecmp (pc->name, name))
5956 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5959 int pcompute_start = lex_ofs (lexer) - 1;
5961 if (!lex_match (lexer, T_AND))
5963 lex_error_expecting (lexer, "&");
5966 if (!lex_force_id (lexer))
5969 char *name = ss_xstrdup (lex_tokss (lexer));
5972 if (!lex_force_match (lexer, T_EQUALS)
5973 || !lex_force_match_id (lexer, "EXPR")
5974 || !lex_force_match (lexer, T_LPAREN))
5980 int expr_start = lex_ofs (lexer);
5981 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5982 int expr_end = lex_ofs (lexer) - 1;
5983 if (!expr || !lex_force_match (lexer, T_RPAREN))
5985 ctables_pcexpr_destroy (expr);
5989 int pcompute_end = lex_ofs (lexer) - 1;
5991 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5994 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5997 msg_at (SW, location, _("New definition of &%s will override the "
5998 "previous definition."),
6000 msg_at (SN, pc->location, _("This is the previous definition."));
6002 ctables_pcexpr_destroy (pc->expr);
6003 msg_location_destroy (pc->location);
6008 pc = xmalloc (sizeof *pc);
6009 *pc = (struct ctables_postcompute) { .name = name };
6010 hmap_insert (&ct->postcomputes, &pc->hmap_node,
6011 utf8_hash_case_string (pc->name, 0));
6014 pc->location = location;
6016 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
6021 ctables_parse_pproperties_format (struct lexer *lexer,
6022 struct ctables_summary_spec_set *sss)
6024 *sss = (struct ctables_summary_spec_set) { .n = 0 };
6026 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
6027 && !(lex_token (lexer) == T_ID
6028 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
6029 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
6030 lex_tokss (lexer)))))
6032 /* Parse function. */
6033 enum ctables_summary_function function;
6034 if (!parse_ctables_summary_function (lexer, &function))
6037 /* Parse percentile. */
6038 double percentile = 0;
6039 if (function == CTSF_PTILE)
6041 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
6043 percentile = lex_number (lexer);
6048 struct fmt_spec format;
6049 bool is_ctables_format;
6050 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
6053 if (sss->n >= sss->allocated)
6054 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
6055 sizeof *sss->specs);
6056 sss->specs[sss->n++] = (struct ctables_summary_spec) {
6057 .function = function,
6058 .percentile = percentile,
6060 .is_ctables_format = is_ctables_format,
6066 ctables_summary_spec_set_uninit (sss);
6071 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
6073 struct ctables_postcompute **pcs = NULL;
6075 size_t allocated_pcs = 0;
6077 while (lex_match (lexer, T_AND))
6079 if (!lex_force_id (lexer))
6081 struct ctables_postcompute *pc
6082 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
6085 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6090 if (n_pcs >= allocated_pcs)
6091 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6095 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6097 if (lex_match_id (lexer, "LABEL"))
6099 lex_match (lexer, T_EQUALS);
6100 if (!lex_force_string (lexer))
6103 for (size_t i = 0; i < n_pcs; i++)
6105 free (pcs[i]->label);
6106 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6111 else if (lex_match_id (lexer, "FORMAT"))
6113 lex_match (lexer, T_EQUALS);
6115 struct ctables_summary_spec_set sss;
6116 if (!ctables_parse_pproperties_format (lexer, &sss))
6119 for (size_t i = 0; i < n_pcs; i++)
6122 ctables_summary_spec_set_uninit (pcs[i]->specs);
6124 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6125 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6127 ctables_summary_spec_set_uninit (&sss);
6129 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6131 lex_match (lexer, T_EQUALS);
6132 bool hide_source_cats;
6133 if (!parse_bool (lexer, &hide_source_cats))
6135 for (size_t i = 0; i < n_pcs; i++)
6136 pcs[i]->hide_source_cats = hide_source_cats;
6140 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6153 put_strftime (struct string *out, time_t now, const char *format)
6155 const struct tm *tm = localtime (&now);
6157 strftime (value, sizeof value, format, tm);
6158 ds_put_cstr (out, value);
6162 skip_prefix (struct substring *s, struct substring prefix)
6164 if (ss_starts_with (*s, prefix))
6166 ss_advance (s, prefix.length);
6174 put_table_expression (struct string *out, struct lexer *lexer,
6175 struct dictionary *dict, int expr_start, int expr_end)
6178 for (int ofs = expr_start; ofs < expr_end; ofs++)
6180 const struct token *t = lex_ofs_token (lexer, ofs);
6181 if (t->type == T_LBRACK)
6183 else if (t->type == T_RBRACK && nest > 0)
6189 else if (t->type == T_ID)
6191 const struct variable *var
6192 = dict_lookup_var (dict, t->string.string);
6193 const char *label = var ? var_get_label (var) : NULL;
6194 ds_put_cstr (out, label ? label : t->string.string);
6198 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6199 ds_put_byte (out, ' ');
6201 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6202 ds_put_cstr (out, repr);
6205 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6206 ds_put_byte (out, ' ');
6212 put_title_text (struct string *out, struct substring in, time_t now,
6213 struct lexer *lexer, struct dictionary *dict,
6214 int expr_start, int expr_end)
6218 size_t chunk = ss_find_byte (in, ')');
6219 ds_put_substring (out, ss_head (in, chunk));
6220 ss_advance (&in, chunk);
6221 if (ss_is_empty (in))
6224 if (skip_prefix (&in, ss_cstr (")DATE")))
6225 put_strftime (out, now, "%x");
6226 else if (skip_prefix (&in, ss_cstr (")TIME")))
6227 put_strftime (out, now, "%X");
6228 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6229 put_table_expression (out, lexer, dict, expr_start, expr_end);
6232 ds_put_byte (out, ')');
6233 ss_advance (&in, 1);
6239 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6241 struct casereader *input = NULL;
6243 struct measure_guesser *mg = measure_guesser_create (ds);
6246 input = proc_open (ds);
6247 measure_guesser_run (mg, input);
6248 measure_guesser_destroy (mg);
6251 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6252 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6253 enum settings_value_show tvars = settings_get_show_variables ();
6254 for (size_t i = 0; i < n_vars; i++)
6255 vlabels[i] = (enum ctables_vlabel) tvars;
6257 struct pivot_table_look *look = pivot_table_look_unshare (
6258 pivot_table_look_ref (pivot_table_look_get_default ()));
6259 look->omit_empty = false;
6261 struct ctables *ct = xmalloc (sizeof *ct);
6262 *ct = (struct ctables) {
6263 .dict = dataset_dict (ds),
6265 .ctables_formats = FMT_SETTINGS_INIT,
6267 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6270 time_t now = time (NULL);
6275 const char *dot_string;
6276 const char *comma_string;
6278 static const struct ctf ctfs[4] = {
6279 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6280 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6281 { CTEF_PAREN, "-,(,),", "-.(.)." },
6282 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6284 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6285 for (size_t i = 0; i < 4; i++)
6287 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6288 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6289 fmt_number_style_from_string (s));
6292 if (!lex_force_match (lexer, T_SLASH))
6295 while (!lex_match_id (lexer, "TABLE"))
6297 if (lex_match_id (lexer, "FORMAT"))
6299 double widths[2] = { SYSMIS, SYSMIS };
6300 double units_per_inch = 72.0;
6302 while (lex_token (lexer) != T_SLASH)
6304 if (lex_match_id (lexer, "MINCOLWIDTH"))
6306 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6309 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6311 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6314 else if (lex_match_id (lexer, "UNITS"))
6316 lex_match (lexer, T_EQUALS);
6317 if (lex_match_id (lexer, "POINTS"))
6318 units_per_inch = 72.0;
6319 else if (lex_match_id (lexer, "INCHES"))
6320 units_per_inch = 1.0;
6321 else if (lex_match_id (lexer, "CM"))
6322 units_per_inch = 2.54;
6325 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6329 else if (lex_match_id (lexer, "EMPTY"))
6334 lex_match (lexer, T_EQUALS);
6335 if (lex_match_id (lexer, "ZERO"))
6337 /* Nothing to do. */
6339 else if (lex_match_id (lexer, "BLANK"))
6340 ct->zero = xstrdup ("");
6341 else if (lex_force_string (lexer))
6343 ct->zero = ss_xstrdup (lex_tokss (lexer));
6349 else if (lex_match_id (lexer, "MISSING"))
6351 lex_match (lexer, T_EQUALS);
6352 if (!lex_force_string (lexer))
6356 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6357 ? ss_xstrdup (lex_tokss (lexer))
6363 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6364 "UNITS", "EMPTY", "MISSING");
6369 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6370 && widths[0] > widths[1])
6372 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6376 for (size_t i = 0; i < 2; i++)
6377 if (widths[i] != SYSMIS)
6379 int *wr = ct->look->width_ranges[TABLE_HORZ];
6380 wr[i] = widths[i] / units_per_inch * 96.0;
6385 else if (lex_match_id (lexer, "VLABELS"))
6387 if (!lex_force_match_id (lexer, "VARIABLES"))
6389 lex_match (lexer, T_EQUALS);
6391 struct variable **vars;
6393 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6397 if (!lex_force_match_id (lexer, "DISPLAY"))
6402 lex_match (lexer, T_EQUALS);
6404 enum ctables_vlabel vlabel;
6405 if (lex_match_id (lexer, "DEFAULT"))
6406 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6407 else if (lex_match_id (lexer, "NAME"))
6409 else if (lex_match_id (lexer, "LABEL"))
6410 vlabel = CTVL_LABEL;
6411 else if (lex_match_id (lexer, "BOTH"))
6413 else if (lex_match_id (lexer, "NONE"))
6417 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6423 for (size_t i = 0; i < n_vars; i++)
6424 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6427 else if (lex_match_id (lexer, "MRSETS"))
6429 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6431 lex_match (lexer, T_EQUALS);
6432 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6435 else if (lex_match_id (lexer, "SMISSING"))
6437 if (lex_match_id (lexer, "VARIABLE"))
6438 ct->smissing_listwise = false;
6439 else if (lex_match_id (lexer, "LISTWISE"))
6440 ct->smissing_listwise = true;
6443 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6447 else if (lex_match_id (lexer, "PCOMPUTE"))
6449 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6452 else if (lex_match_id (lexer, "PPROPERTIES"))
6454 if (!ctables_parse_pproperties (lexer, ct))
6457 else if (lex_match_id (lexer, "WEIGHT"))
6459 if (!lex_force_match_id (lexer, "VARIABLE"))
6461 lex_match (lexer, T_EQUALS);
6462 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6466 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6468 if (lex_match_id (lexer, "COUNT"))
6470 lex_match (lexer, T_EQUALS);
6471 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6474 ct->hide_threshold = lex_integer (lexer);
6477 else if (ct->hide_threshold == 0)
6478 ct->hide_threshold = 5;
6482 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6483 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6484 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6488 if (!lex_force_match (lexer, T_SLASH))
6492 size_t allocated_tables = 0;
6495 if (ct->n_tables >= allocated_tables)
6496 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6497 sizeof *ct->tables);
6499 struct ctables_category *cat = xmalloc (sizeof *cat);
6500 *cat = (struct ctables_category) {
6502 .include_missing = false,
6503 .sort_ascending = true,
6506 struct ctables_categories *c = xmalloc (sizeof *c);
6507 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6508 *c = (struct ctables_categories) {
6515 struct ctables_categories **categories = xnmalloc (n_vars,
6516 sizeof *categories);
6517 for (size_t i = 0; i < n_vars; i++)
6520 struct ctables_table *t = xmalloc (sizeof *t);
6521 *t = (struct ctables_table) {
6523 .slabels_axis = PIVOT_AXIS_COLUMN,
6524 .slabels_visible = true,
6525 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6527 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6528 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6529 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6531 .clabels_from_axis = PIVOT_AXIS_LAYER,
6532 .categories = categories,
6533 .n_categories = n_vars,
6536 ct->tables[ct->n_tables++] = t;
6538 lex_match (lexer, T_EQUALS);
6539 int expr_start = lex_ofs (lexer);
6540 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6542 if (lex_match (lexer, T_BY))
6544 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6545 ct, t, PIVOT_AXIS_COLUMN))
6548 if (lex_match (lexer, T_BY))
6550 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6551 ct, t, PIVOT_AXIS_LAYER))
6555 int expr_end = lex_ofs (lexer);
6557 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6558 && !t->axes[PIVOT_AXIS_LAYER])
6560 lex_error (lexer, _("At least one variable must be specified."));
6564 const struct ctables_axis *scales[PIVOT_N_AXES];
6565 size_t n_scales = 0;
6566 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6568 scales[a] = find_scale (t->axes[a]);
6574 msg (SE, _("Scale variables may appear only on one axis."));
6575 if (scales[PIVOT_AXIS_ROW])
6576 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6577 _("This scale variable appears on the rows axis."));
6578 if (scales[PIVOT_AXIS_COLUMN])
6579 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6580 _("This scale variable appears on the columns axis."));
6581 if (scales[PIVOT_AXIS_LAYER])
6582 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6583 _("This scale variable appears on the layer axis."));
6587 const struct ctables_axis *summaries[PIVOT_N_AXES];
6588 size_t n_summaries = 0;
6589 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6591 summaries[a] = (scales[a]
6593 : find_categorical_summary_spec (t->axes[a]));
6597 if (n_summaries > 1)
6599 msg (SE, _("Summaries may appear only on one axis."));
6600 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6603 msg_at (SN, summaries[a]->loc,
6605 ? _("This variable on the rows axis has a summary.")
6606 : a == PIVOT_AXIS_COLUMN
6607 ? _("This variable on the columns axis has a summary.")
6608 : _("This variable on the layers axis has a summary."));
6610 msg_at (SN, summaries[a]->loc,
6611 _("This is a scale variable, so it always has a "
6612 "summary even if the syntax does not explicitly "
6617 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6618 if (n_summaries ? summaries[a] : t->axes[a])
6620 t->summary_axis = a;
6624 if (lex_token (lexer) == T_ENDCMD)
6626 if (!ctables_prepare_table (t))
6630 if (!lex_force_match (lexer, T_SLASH))
6633 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6635 if (lex_match_id (lexer, "SLABELS"))
6637 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6639 if (lex_match_id (lexer, "POSITION"))
6641 lex_match (lexer, T_EQUALS);
6642 if (lex_match_id (lexer, "COLUMN"))
6643 t->slabels_axis = PIVOT_AXIS_COLUMN;
6644 else if (lex_match_id (lexer, "ROW"))
6645 t->slabels_axis = PIVOT_AXIS_ROW;
6646 else if (lex_match_id (lexer, "LAYER"))
6647 t->slabels_axis = PIVOT_AXIS_LAYER;
6650 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6654 else if (lex_match_id (lexer, "VISIBLE"))
6656 lex_match (lexer, T_EQUALS);
6657 if (!parse_bool (lexer, &t->slabels_visible))
6662 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6667 else if (lex_match_id (lexer, "CLABELS"))
6669 if (lex_match_id (lexer, "AUTO"))
6671 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6672 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6674 else if (lex_match_id (lexer, "ROWLABELS"))
6676 lex_match (lexer, T_EQUALS);
6677 if (lex_match_id (lexer, "OPPOSITE"))
6678 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6679 else if (lex_match_id (lexer, "LAYER"))
6680 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6683 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6687 else if (lex_match_id (lexer, "COLLABELS"))
6689 lex_match (lexer, T_EQUALS);
6690 if (lex_match_id (lexer, "OPPOSITE"))
6691 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6692 else if (lex_match_id (lexer, "LAYER"))
6693 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6696 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6702 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6707 else if (lex_match_id (lexer, "CRITERIA"))
6709 if (!lex_force_match_id (lexer, "CILEVEL"))
6711 lex_match (lexer, T_EQUALS);
6713 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6715 t->cilevel = lex_number (lexer);
6718 else if (lex_match_id (lexer, "CATEGORIES"))
6720 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6724 else if (lex_match_id (lexer, "TITLES"))
6729 if (lex_match_id (lexer, "CAPTION"))
6730 textp = &t->caption;
6731 else if (lex_match_id (lexer, "CORNER"))
6733 else if (lex_match_id (lexer, "TITLE"))
6737 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6740 lex_match (lexer, T_EQUALS);
6742 struct string s = DS_EMPTY_INITIALIZER;
6743 while (lex_is_string (lexer))
6745 if (!ds_is_empty (&s))
6746 ds_put_byte (&s, ' ');
6747 put_title_text (&s, lex_tokss (lexer), now,
6748 lexer, dataset_dict (ds),
6749 expr_start, expr_end);
6753 *textp = ds_steal_cstr (&s);
6755 while (lex_token (lexer) != T_SLASH
6756 && lex_token (lexer) != T_ENDCMD);
6758 else if (lex_match_id (lexer, "SIGTEST"))
6762 t->chisq = xmalloc (sizeof *t->chisq);
6763 *t->chisq = (struct ctables_chisq) {
6765 .include_mrsets = true,
6766 .all_visible = true,
6772 if (lex_match_id (lexer, "TYPE"))
6774 lex_match (lexer, T_EQUALS);
6775 if (!lex_force_match_id (lexer, "CHISQUARE"))
6778 else if (lex_match_id (lexer, "ALPHA"))
6780 lex_match (lexer, T_EQUALS);
6781 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6783 t->chisq->alpha = lex_number (lexer);
6786 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6788 lex_match (lexer, T_EQUALS);
6789 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6792 else if (lex_match_id (lexer, "CATEGORIES"))
6794 lex_match (lexer, T_EQUALS);
6795 if (lex_match_id (lexer, "ALLVISIBLE"))
6796 t->chisq->all_visible = true;
6797 else if (lex_match_id (lexer, "SUBTOTALS"))
6798 t->chisq->all_visible = false;
6801 lex_error_expecting (lexer,
6802 "ALLVISIBLE", "SUBTOTALS");
6808 lex_error_expecting (lexer, "TYPE", "ALPHA",
6809 "INCLUDEMRSETS", "CATEGORIES");
6813 while (lex_token (lexer) != T_SLASH
6814 && lex_token (lexer) != T_ENDCMD);
6816 else if (lex_match_id (lexer, "COMPARETEST"))
6820 t->pairwise = xmalloc (sizeof *t->pairwise);
6821 *t->pairwise = (struct ctables_pairwise) {
6823 .alpha = { .05, .05 },
6824 .adjust = BONFERRONI,
6825 .include_mrsets = true,
6826 .meansvariance_allcats = true,
6827 .all_visible = true,
6836 if (lex_match_id (lexer, "TYPE"))
6838 lex_match (lexer, T_EQUALS);
6839 if (lex_match_id (lexer, "PROP"))
6840 t->pairwise->type = PROP;
6841 else if (lex_match_id (lexer, "MEAN"))
6842 t->pairwise->type = MEAN;
6845 lex_error_expecting (lexer, "PROP", "MEAN");
6849 else if (lex_match_id (lexer, "ALPHA"))
6851 lex_match (lexer, T_EQUALS);
6853 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6855 double a0 = lex_number (lexer);
6858 lex_match (lexer, T_COMMA);
6859 if (lex_is_number (lexer))
6861 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6863 double a1 = lex_number (lexer);
6866 t->pairwise->alpha[0] = MIN (a0, a1);
6867 t->pairwise->alpha[1] = MAX (a0, a1);
6870 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6872 else if (lex_match_id (lexer, "ADJUST"))
6874 lex_match (lexer, T_EQUALS);
6875 if (lex_match_id (lexer, "BONFERRONI"))
6876 t->pairwise->adjust = BONFERRONI;
6877 else if (lex_match_id (lexer, "BH"))
6878 t->pairwise->adjust = BH;
6879 else if (lex_match_id (lexer, "NONE"))
6880 t->pairwise->adjust = 0;
6883 lex_error_expecting (lexer, "BONFERRONI", "BH",
6888 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6890 lex_match (lexer, T_EQUALS);
6891 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6894 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6896 lex_match (lexer, T_EQUALS);
6897 if (lex_match_id (lexer, "ALLCATS"))
6898 t->pairwise->meansvariance_allcats = true;
6899 else if (lex_match_id (lexer, "TESTEDCATS"))
6900 t->pairwise->meansvariance_allcats = false;
6903 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6907 else if (lex_match_id (lexer, "CATEGORIES"))
6909 lex_match (lexer, T_EQUALS);
6910 if (lex_match_id (lexer, "ALLVISIBLE"))
6911 t->pairwise->all_visible = true;
6912 else if (lex_match_id (lexer, "SUBTOTALS"))
6913 t->pairwise->all_visible = false;
6916 lex_error_expecting (lexer, "ALLVISIBLE",
6921 else if (lex_match_id (lexer, "MERGE"))
6923 lex_match (lexer, T_EQUALS);
6924 if (!parse_bool (lexer, &t->pairwise->merge))
6927 else if (lex_match_id (lexer, "STYLE"))
6929 lex_match (lexer, T_EQUALS);
6930 if (lex_match_id (lexer, "APA"))
6931 t->pairwise->apa_style = true;
6932 else if (lex_match_id (lexer, "SIMPLE"))
6933 t->pairwise->apa_style = false;
6936 lex_error_expecting (lexer, "APA", "SIMPLE");
6940 else if (lex_match_id (lexer, "SHOWSIG"))
6942 lex_match (lexer, T_EQUALS);
6943 if (!parse_bool (lexer, &t->pairwise->show_sig))
6948 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6949 "INCLUDEMRSETS", "MEANSVARIANCE",
6950 "CATEGORIES", "MERGE", "STYLE",
6955 while (lex_token (lexer) != T_SLASH
6956 && lex_token (lexer) != T_ENDCMD);
6960 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6961 "CRITERIA", "CATEGORIES", "TITLES",
6962 "SIGTEST", "COMPARETEST");
6966 if (!lex_match (lexer, T_SLASH))
6970 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6971 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6973 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6977 if (!ctables_prepare_table (t))
6980 while (lex_token (lexer) != T_ENDCMD);
6983 input = proc_open (ds);
6984 bool ok = ctables_execute (ds, input, ct);
6985 ok = proc_commit (ds) && ok;
6987 ctables_destroy (ct);
6988 return ok ? CMD_SUCCESS : CMD_FAILURE;
6993 ctables_destroy (ct);