1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 enum ctables_summary_function
75 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
76 #include "ctables.inc"
81 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
83 #include "ctables.inc"
87 static bool ctables_summary_function_is_count (enum ctables_summary_function);
89 enum ctables_domain_type
91 /* Within a section, where stacked variables divide one section from
93 CTDT_TABLE, /* All layers of a whole section. */
94 CTDT_LAYER, /* One layer within a section. */
95 CTDT_LAYERROW, /* Row in one layer within a section. */
96 CTDT_LAYERCOL, /* Column in one layer within a section. */
98 /* Within a subtable, where a subtable pairs an innermost row variable with
99 an innermost column variable within a single layer. */
100 CTDT_SUBTABLE, /* Whole subtable. */
101 CTDT_ROW, /* Row within a subtable. */
102 CTDT_COL, /* Column within a subtable. */
106 struct ctables_domain
108 struct hmap_node node;
110 const struct ctables_cell *example;
113 double d_valid; /* Dictionary weight. */
116 double e_valid; /* Effective weight */
119 double u_valid; /* Unweighted. */
122 struct ctables_sum *sums;
131 enum ctables_summary_variant
140 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
141 all the axes (except the scalar variable, if any). */
142 struct hmap_node node;
144 /* The domains that contain this cell. */
145 uint32_t omit_domains;
146 struct ctables_domain *domains[N_CTDTS];
151 enum ctables_summary_variant sv;
153 struct ctables_cell_axis
155 struct ctables_cell_value
157 const struct ctables_category *category;
165 union ctables_summary *summaries;
172 const struct dictionary *dict;
173 struct pivot_table_look *look;
175 /* CTABLES has a number of extra formats that we implement via custom
176 currency specifications on an alternate fmt_settings. */
177 #define CTEF_NEGPAREN FMT_CCA
178 #define CTEF_NEQUAL FMT_CCB
179 #define CTEF_PAREN FMT_CCC
180 #define CTEF_PCTPAREN FMT_CCD
181 struct fmt_settings ctables_formats;
183 /* If this is NULL, zeros are displayed using the normal print format.
184 Otherwise, this string is displayed. */
187 /* If this is NULL, missing values are displayed using the normal print
188 format. Otherwise, this string is displayed. */
191 /* Indexed by variable dictionary index. */
192 enum ctables_vlabel *vlabels;
194 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
196 bool mrsets_count_duplicates; /* MRSETS. */
197 bool smissing_listwise; /* SMISSING. */
198 struct variable *e_weight; /* WEIGHT. */
199 int hide_threshold; /* HIDESMALLCOUNTS. */
201 struct ctables_table **tables;
205 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
208 struct ctables_postcompute
210 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
211 char *name; /* Name, without leading &. */
213 struct msg_location *location; /* Location of definition. */
214 struct ctables_pcexpr *expr;
216 struct ctables_summary_spec_set *specs;
217 bool hide_source_cats;
220 struct ctables_pcexpr
230 enum ctables_postcompute_op
233 CTPO_CONSTANT, /* 5 */
234 CTPO_CAT_NUMBER, /* [5] */
235 CTPO_CAT_STRING, /* ["STRING"] */
236 CTPO_CAT_NRANGE, /* [LO THRU 5] */
237 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
238 CTPO_CAT_MISSING, /* MISSING */
239 CTPO_CAT_OTHERNM, /* OTHERNM */
240 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
241 CTPO_CAT_TOTAL, /* TOTAL */
255 /* CTPO_CAT_NUMBER. */
258 /* CTPO_CAT_STRING, in dictionary encoding. */
259 struct substring string;
261 /* CTPO_CAT_NRANGE. */
264 /* CTPO_CAT_SRANGE. */
265 struct substring srange[2];
267 /* CTPO_CAT_SUBTOTAL. */
268 size_t subtotal_index;
270 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
271 One element: CTPO_NEG. */
272 struct ctables_pcexpr *subs[2];
275 /* Source location. */
276 struct msg_location *location;
279 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
280 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
281 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
282 struct ctables_pcexpr *sub1);
284 struct ctables_summary_spec_set
286 struct ctables_summary_spec *specs;
290 /* The variable to which the summary specs are applied. */
291 struct variable *var;
293 /* Whether the variable to which the summary specs are applied is a scale
294 variable for the purpose of summarization.
296 (VALIDN and TOTALN act differently for summarizing scale and categorical
300 /* If any of these optional additional scale variables are missing, then
301 treat 'var' as if it's missing too. This is for implementing
302 SMISSING=LISTWISE. */
303 struct variable **listwise_vars;
304 size_t n_listwise_vars;
307 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
308 const struct ctables_summary_spec_set *);
309 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
311 /* A nested sequence of variables, e.g. a > b > c. */
314 struct variable **vars;
317 size_t *domains[N_CTDTS];
318 size_t n_domains[N_CTDTS];
321 struct ctables_summary_spec_set specs[N_CSVS];
324 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
327 struct ctables_nest *nests;
331 static void ctables_stack_uninit (struct ctables_stack *);
335 struct hmap_node node;
340 struct ctables_occurrence
342 struct hmap_node node;
346 struct ctables_section
349 struct ctables_table *table;
350 struct ctables_nest *nests[PIVOT_N_AXES];
353 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
354 struct hmap cells; /* Contains "struct ctables_cell"s. */
355 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
358 static void ctables_section_uninit (struct ctables_section *);
362 struct ctables *ctables;
363 struct ctables_axis *axes[PIVOT_N_AXES];
364 struct ctables_stack stacks[PIVOT_N_AXES];
365 struct ctables_section *sections;
367 enum pivot_axis_type summary_axis;
368 struct ctables_summary_spec_set summary_specs;
369 struct variable **sum_vars;
372 enum pivot_axis_type slabels_axis;
373 bool slabels_visible;
375 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
377 Most commonly, label_axis[a] == a, and in particular we always have
378 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
380 If ROWLABELS or COLLABELS is specified, then one of
381 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
382 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
384 If any category labels are moved, then 'clabels_example' is one of the
385 variables being moved (and it is otherwise NULL). All of the variables
386 being moved have the same width, value labels, and categories, so this
387 example variable can be used to find those out.
389 The remaining members in this group are relevant only if category labels
392 'clabels_values_map' holds a "struct ctables_value" for all the values
393 that appear in all of the variables in the moved categories. It is
394 accumulated as the data is read. Once the data is fully read, its
395 sorted values are put into 'clabels_values' and 'n_clabels_values'.
397 enum pivot_axis_type label_axis[PIVOT_N_AXES];
398 enum pivot_axis_type clabels_from_axis;
399 const struct variable *clabels_example;
400 struct hmap clabels_values_map;
401 struct ctables_value **clabels_values;
402 size_t n_clabels_values;
404 /* Indexed by variable dictionary index. */
405 struct ctables_categories **categories;
414 struct ctables_chisq *chisq;
415 struct ctables_pairwise *pairwise;
418 struct ctables_categories
421 struct ctables_category *cats;
426 struct ctables_category
428 enum ctables_category_type
430 /* Explicit category lists. */
433 CCT_NRANGE, /* Numerical range. */
434 CCT_SRANGE, /* String range. */
439 /* Totals and subtotals. */
443 /* Implicit category lists. */
448 /* For contributing to TOTALN. */
449 CCT_EXCLUDED_MISSING,
453 struct ctables_category *subtotal;
459 double number; /* CCT_NUMBER. */
460 struct substring string; /* CCT_STRING, in dictionary encoding. */
461 double nrange[2]; /* CCT_NRANGE. */
462 struct substring srange[2]; /* CCT_SRANGE. */
466 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
467 bool hide_subcategories; /* CCT_SUBTOTAL. */
470 /* CCT_POSTCOMPUTE. */
473 const struct ctables_postcompute *pc;
474 enum fmt_type parse_format;
477 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
480 bool include_missing;
484 enum ctables_summary_function sort_function;
485 struct variable *sort_var;
490 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
491 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
492 struct msg_location *location;
496 ctables_category_uninit (struct ctables_category *cat)
501 msg_location_destroy (cat->location);
508 case CCT_POSTCOMPUTE:
512 ss_dealloc (&cat->string);
516 ss_dealloc (&cat->srange[0]);
517 ss_dealloc (&cat->srange[1]);
522 free (cat->total_label);
530 case CCT_EXCLUDED_MISSING:
536 nullable_substring_equal (const struct substring *a,
537 const struct substring *b)
539 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
543 ctables_category_equal (const struct ctables_category *a,
544 const struct ctables_category *b)
546 if (a->type != b->type)
552 return a->number == b->number;
555 return ss_equals (a->string, b->string);
558 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
561 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
562 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
568 case CCT_POSTCOMPUTE:
569 return a->pc == b->pc;
573 return !strcmp (a->total_label, b->total_label);
578 return (a->include_missing == b->include_missing
579 && a->sort_ascending == b->sort_ascending
580 && a->sort_function == b->sort_function
581 && a->sort_var == b->sort_var
582 && a->percentile == b->percentile);
584 case CCT_EXCLUDED_MISSING:
592 ctables_categories_unref (struct ctables_categories *c)
597 assert (c->n_refs > 0);
601 for (size_t i = 0; i < c->n_cats; i++)
602 ctables_category_uninit (&c->cats[i]);
608 ctables_categories_equal (const struct ctables_categories *a,
609 const struct ctables_categories *b)
611 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
614 for (size_t i = 0; i < a->n_cats; i++)
615 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
621 /* Chi-square test (SIGTEST). */
629 /* Pairwise comparison test (COMPARETEST). */
630 struct ctables_pairwise
632 enum { PROP, MEAN } type;
635 bool meansvariance_allcats;
637 enum { BONFERRONI = 1, BH } adjust;
661 struct variable *var;
663 struct ctables_summary_spec_set specs[N_CSVS];
667 struct ctables_axis *subs[2];
670 struct msg_location *loc;
673 static void ctables_axis_destroy (struct ctables_axis *);
682 enum ctables_function_availability
684 CTFA_ALL, /* Any variables. */
685 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
686 //CTFA_MRSETS, /* Only multiple-response sets */
689 struct ctables_summary_spec
691 enum ctables_summary_function function;
692 double percentile; /* CTSF_PTILE only. */
695 struct fmt_spec format;
696 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
703 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
704 const struct ctables_summary_spec *src)
707 dst->label = xstrdup_if_nonnull (src->label);
711 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
718 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
719 const struct ctables_summary_spec_set *src)
721 struct ctables_summary_spec *specs
722 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
723 for (size_t i = 0; i < src->n; i++)
724 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
726 *dst = (struct ctables_summary_spec_set) {
731 .is_scale = src->is_scale,
736 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
738 for (size_t i = 0; i < set->n; i++)
739 ctables_summary_spec_uninit (&set->specs[i]);
740 free (set->listwise_vars);
745 parse_col_width (struct lexer *lexer, const char *name, double *width)
747 lex_match (lexer, T_EQUALS);
748 if (lex_match_id (lexer, "DEFAULT"))
750 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
752 *width = lex_number (lexer);
762 parse_bool (struct lexer *lexer, bool *b)
764 if (lex_match_id (lexer, "NO"))
766 else if (lex_match_id (lexer, "YES"))
770 lex_error_expecting (lexer, "YES", "NO");
776 static enum ctables_function_availability
777 ctables_function_availability (enum ctables_summary_function f)
779 static enum ctables_function_availability availability[] = {
780 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
781 #include "ctables.inc"
785 return availability[f];
789 ctables_summary_function_is_count (enum ctables_summary_function f)
791 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
795 parse_ctables_summary_function (struct lexer *lexer,
796 enum ctables_summary_function *f)
800 enum ctables_summary_function function;
801 struct substring name;
803 static struct pair names[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
805 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
806 #include "ctables.inc"
807 /* The .COUNT suffix may be omitted. */
808 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
809 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
810 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
811 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
812 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
813 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
814 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
818 if (!lex_force_id (lexer))
821 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
822 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
824 *f = names[i].function;
829 lex_error (lexer, _("Expecting summary function name."));
834 ctables_axis_destroy (struct ctables_axis *axis)
842 for (size_t i = 0; i < N_CSVS; i++)
843 ctables_summary_spec_set_uninit (&axis->specs[i]);
848 ctables_axis_destroy (axis->subs[0]);
849 ctables_axis_destroy (axis->subs[1]);
852 msg_location_destroy (axis->loc);
856 static struct ctables_axis *
857 ctables_axis_new_nonterminal (enum ctables_axis_op op,
858 struct ctables_axis *sub0,
859 struct ctables_axis *sub1,
860 struct lexer *lexer, int start_ofs)
862 struct ctables_axis *axis = xmalloc (sizeof *axis);
863 *axis = (struct ctables_axis) {
865 .subs = { sub0, sub1 },
866 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
871 struct ctables_axis_parse_ctx
874 struct dictionary *dict;
876 struct ctables_table *t;
879 static struct fmt_spec
880 ctables_summary_default_format (enum ctables_summary_function function,
881 const struct variable *var)
883 static const enum ctables_format default_formats[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
885 #include "ctables.inc"
888 switch (default_formats[function])
891 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
894 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
897 return *var_get_print_format (var);
904 static struct pivot_value *
905 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
909 static const char *default_labels[] = {
910 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
911 #include "ctables.inc"
915 return (spec->function == CTSF_PTILE
916 ? pivot_value_new_text_format (N_("Percentile %.2f"),
918 : pivot_value_new_text (default_labels[spec->function]));
922 struct substring in = ss_cstr (spec->label);
923 struct substring target = ss_cstr (")CILEVEL");
925 struct string out = DS_EMPTY_INITIALIZER;
928 size_t chunk = ss_find_substring (in, target);
929 ds_put_substring (&out, ss_head (in, chunk));
930 ss_advance (&in, chunk);
932 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
934 ss_advance (&in, target.length);
935 ds_put_format (&out, "%g", cilevel);
941 ctables_summary_function_name (enum ctables_summary_function function)
943 static const char *names[] = {
944 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
945 #include "ctables.inc"
948 return names[function];
952 add_summary_spec (struct ctables_axis *axis,
953 enum ctables_summary_function function, double percentile,
954 const char *label, const struct fmt_spec *format,
955 bool is_ctables_format, const struct msg_location *loc,
956 enum ctables_summary_variant sv)
958 if (axis->op == CTAO_VAR)
960 const char *function_name = ctables_summary_function_name (function);
961 const char *var_name = var_get_name (axis->var);
962 switch (ctables_function_availability (function))
966 msg_at (SE, loc, _("Summary function %s applies only to multiple "
967 "response sets."), function_name);
968 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
974 if (!axis->scale && sv != CSV_TOTAL)
977 _("Summary function %s applies only to scale variables."),
979 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
989 struct ctables_summary_spec_set *set = &axis->specs[sv];
990 if (set->n >= set->allocated)
991 set->specs = x2nrealloc (set->specs, &set->allocated,
994 struct ctables_summary_spec *dst = &set->specs[set->n++];
995 *dst = (struct ctables_summary_spec) {
996 .function = function,
997 .percentile = percentile,
998 .label = xstrdup_if_nonnull (label),
999 .format = (format ? *format
1000 : ctables_summary_default_format (function, axis->var)),
1001 .is_ctables_format = is_ctables_format,
1007 for (size_t i = 0; i < 2; i++)
1008 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1009 format, is_ctables_format, loc, sv))
1015 static struct ctables_axis *ctables_axis_parse_stack (
1016 struct ctables_axis_parse_ctx *);
1019 static struct ctables_axis *
1020 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1022 if (lex_match (ctx->lexer, T_LPAREN))
1024 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1025 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1027 ctables_axis_destroy (sub);
1033 if (!lex_force_id (ctx->lexer))
1036 int start_ofs = lex_ofs (ctx->lexer);
1037 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1041 struct ctables_axis *axis = xmalloc (sizeof *axis);
1042 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1044 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1045 : lex_match_phrase (ctx->lexer, "[C]") ? false
1046 : var_get_measure (var) == MEASURE_SCALE);
1047 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1048 lex_ofs (ctx->lexer) - 1);
1049 if (axis->scale && var_is_alpha (var))
1051 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1053 var_get_name (var));
1054 ctables_axis_destroy (axis);
1062 has_digit (const char *s)
1064 return s[strcspn (s, "0123456789")] != '\0';
1068 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1069 bool *is_ctables_format)
1071 char type[FMT_TYPE_LEN_MAX + 1];
1072 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1075 if (!strcasecmp (type, "NEGPAREN"))
1076 format->type = CTEF_NEGPAREN;
1077 else if (!strcasecmp (type, "NEQUAL"))
1078 format->type = CTEF_NEQUAL;
1079 else if (!strcasecmp (type, "PAREN"))
1080 format->type = CTEF_PAREN;
1081 else if (!strcasecmp (type, "PCTPAREN"))
1082 format->type = CTEF_PCTPAREN;
1085 *is_ctables_format = false;
1086 return (parse_format_specifier (lexer, format)
1087 && fmt_check_output (format)
1088 && fmt_check_type_compat (format, VAL_NUMERIC));
1094 lex_next_error (lexer, -1, -1,
1095 _("Output format %s requires width 2 or greater."), type);
1098 else if (format->d > format->w - 1)
1100 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1101 "greater than decimals."), type);
1106 *is_ctables_format = true;
1111 static struct ctables_axis *
1112 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1114 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1115 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1118 enum ctables_summary_variant sv = CSV_CELL;
1121 int start_ofs = lex_ofs (ctx->lexer);
1123 /* Parse function. */
1124 enum ctables_summary_function function;
1125 if (!parse_ctables_summary_function (ctx->lexer, &function))
1128 /* Parse percentile. */
1129 double percentile = 0;
1130 if (function == CTSF_PTILE)
1132 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1134 percentile = lex_number (ctx->lexer);
1135 lex_get (ctx->lexer);
1140 if (lex_is_string (ctx->lexer))
1142 label = ss_xstrdup (lex_tokss (ctx->lexer));
1143 lex_get (ctx->lexer);
1147 struct fmt_spec format;
1148 const struct fmt_spec *formatp;
1149 bool is_ctables_format = false;
1150 if (lex_token (ctx->lexer) == T_ID
1151 && has_digit (lex_tokcstr (ctx->lexer)))
1153 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1154 &is_ctables_format))
1164 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1165 lex_ofs (ctx->lexer) - 1);
1166 add_summary_spec (sub, function, percentile, label, formatp,
1167 is_ctables_format, loc, sv);
1169 msg_location_destroy (loc);
1171 lex_match (ctx->lexer, T_COMMA);
1172 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1174 if (!lex_force_match (ctx->lexer, T_LBRACK))
1178 else if (lex_match (ctx->lexer, T_RBRACK))
1180 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1187 ctables_axis_destroy (sub);
1191 static const struct ctables_axis *
1192 find_scale (const struct ctables_axis *axis)
1196 else if (axis->op == CTAO_VAR)
1197 return axis->scale ? axis : NULL;
1200 for (size_t i = 0; i < 2; i++)
1202 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1210 static const struct ctables_axis *
1211 find_categorical_summary_spec (const struct ctables_axis *axis)
1215 else if (axis->op == CTAO_VAR)
1216 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1219 for (size_t i = 0; i < 2; i++)
1221 const struct ctables_axis *sum
1222 = find_categorical_summary_spec (axis->subs[i]);
1230 static struct ctables_axis *
1231 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1233 int start_ofs = lex_ofs (ctx->lexer);
1234 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1238 while (lex_match (ctx->lexer, T_GT))
1240 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1243 ctables_axis_destroy (lhs);
1247 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1248 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1250 const struct ctables_axis *outer_scale = find_scale (lhs);
1251 const struct ctables_axis *inner_scale = find_scale (rhs);
1252 if (outer_scale && inner_scale)
1254 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1255 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1256 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1257 ctables_axis_destroy (nest);
1261 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1264 msg_at (SE, nest->loc,
1265 _("Summaries may only be requested for categorical variables "
1266 "at the innermost nesting level."));
1267 msg_at (SN, outer_sum->loc,
1268 _("This outer categorical variable has a summary."));
1269 ctables_axis_destroy (nest);
1279 static struct ctables_axis *
1280 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1282 int start_ofs = lex_ofs (ctx->lexer);
1283 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1287 while (lex_match (ctx->lexer, T_PLUS))
1289 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1292 ctables_axis_destroy (lhs);
1296 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1297 ctx->lexer, start_ofs);
1304 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1305 struct ctables *ct, struct ctables_table *t,
1306 enum pivot_axis_type a)
1308 if (lex_token (lexer) == T_BY
1309 || lex_token (lexer) == T_SLASH
1310 || lex_token (lexer) == T_ENDCMD)
1313 struct ctables_axis_parse_ctx ctx = {
1319 t->axes[a] = ctables_axis_parse_stack (&ctx);
1320 return t->axes[a] != NULL;
1324 ctables_chisq_destroy (struct ctables_chisq *chisq)
1330 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1336 ctables_table_destroy (struct ctables_table *t)
1341 for (size_t i = 0; i < t->n_sections; i++)
1342 ctables_section_uninit (&t->sections[i]);
1345 for (size_t i = 0; i < t->n_categories; i++)
1346 ctables_categories_unref (t->categories[i]);
1347 free (t->categories);
1349 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1351 ctables_axis_destroy (t->axes[a]);
1352 ctables_stack_uninit (&t->stacks[a]);
1354 free (t->summary_specs.specs);
1356 struct ctables_value *ctv, *next_ctv;
1357 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1358 &t->clabels_values_map)
1360 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1361 hmap_delete (&t->clabels_values_map, &ctv->node);
1364 hmap_destroy (&t->clabels_values_map);
1365 free (t->clabels_values);
1371 ctables_chisq_destroy (t->chisq);
1372 ctables_pairwise_destroy (t->pairwise);
1377 ctables_destroy (struct ctables *ct)
1382 struct ctables_postcompute *pc, *next_pc;
1383 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1387 msg_location_destroy (pc->location);
1388 ctables_pcexpr_destroy (pc->expr);
1392 ctables_summary_spec_set_uninit (pc->specs);
1395 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1398 hmap_destroy (&ct->postcomputes);
1400 fmt_settings_uninit (&ct->ctables_formats);
1401 pivot_table_look_unref (ct->look);
1405 for (size_t i = 0; i < ct->n_tables; i++)
1406 ctables_table_destroy (ct->tables[i]);
1411 static struct ctables_category
1412 cct_nrange (double low, double high)
1414 return (struct ctables_category) {
1416 .nrange = { low, high }
1420 static struct ctables_category
1421 cct_srange (struct substring low, struct substring high)
1423 return (struct ctables_category) {
1425 .srange = { low, high }
1430 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1431 struct ctables_category *cat)
1434 if (lex_match (lexer, T_EQUALS))
1436 if (!lex_force_string (lexer))
1439 total_label = ss_xstrdup (lex_tokss (lexer));
1443 total_label = xstrdup (_("Subtotal"));
1445 *cat = (struct ctables_category) {
1446 .type = CCT_SUBTOTAL,
1447 .hide_subcategories = hide_subcategories,
1448 .total_label = total_label
1453 static struct substring
1454 parse_substring (struct lexer *lexer, struct dictionary *dict)
1456 struct substring s = recode_substring_pool (
1457 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1458 ss_rtrim (&s, ss_cstr (" "));
1464 ctables_table_parse_explicit_category (struct lexer *lexer,
1465 struct dictionary *dict,
1467 struct ctables_category *cat)
1469 if (lex_match_id (lexer, "OTHERNM"))
1470 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1471 else if (lex_match_id (lexer, "MISSING"))
1472 *cat = (struct ctables_category) { .type = CCT_MISSING };
1473 else if (lex_match_id (lexer, "SUBTOTAL"))
1474 return ctables_table_parse_subtotal (lexer, false, cat);
1475 else if (lex_match_id (lexer, "HSUBTOTAL"))
1476 return ctables_table_parse_subtotal (lexer, true, cat);
1477 else if (lex_match_id (lexer, "LO"))
1479 if (!lex_force_match_id (lexer, "THRU"))
1481 if (lex_is_string (lexer))
1483 struct substring sr0 = { .string = NULL };
1484 struct substring sr1 = parse_substring (lexer, dict);
1485 *cat = cct_srange (sr0, sr1);
1487 else if (lex_force_num (lexer))
1489 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1495 else if (lex_is_number (lexer))
1497 double number = lex_number (lexer);
1499 if (lex_match_id (lexer, "THRU"))
1501 if (lex_match_id (lexer, "HI"))
1502 *cat = cct_nrange (number, DBL_MAX);
1505 if (!lex_force_num (lexer))
1507 *cat = cct_nrange (number, lex_number (lexer));
1512 *cat = (struct ctables_category) {
1517 else if (lex_is_string (lexer))
1519 struct substring s = parse_substring (lexer, dict);
1520 if (lex_match_id (lexer, "THRU"))
1522 if (lex_match_id (lexer, "HI"))
1524 struct substring sr1 = { .string = NULL };
1525 *cat = cct_srange (s, sr1);
1529 if (!lex_force_string (lexer))
1534 struct substring sr1 = parse_substring (lexer, dict);
1535 *cat = cct_srange (s, sr1);
1539 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1541 else if (lex_match (lexer, T_AND))
1543 if (!lex_force_id (lexer))
1545 struct ctables_postcompute *pc = ctables_find_postcompute (
1546 ct, lex_tokcstr (lexer));
1549 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1550 msg_at (SE, loc, _("Unknown postcompute &%s."),
1551 lex_tokcstr (lexer));
1552 msg_location_destroy (loc);
1557 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1561 lex_error (lexer, NULL);
1569 parse_category_string (struct msg_location *location,
1570 struct substring s, const struct dictionary *dict,
1571 enum fmt_type format, double *n)
1574 char *error = data_in (s, dict_get_encoding (dict), format,
1575 settings_get_fmt_settings (), &v, 0, NULL);
1578 msg_at (SE, location,
1579 _("Failed to parse category specification as format %s: %s."),
1580 fmt_name (format), error);
1589 static struct ctables_category *
1590 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1591 const struct ctables_pcexpr *e)
1593 struct ctables_category *best = NULL;
1594 size_t n_subtotals = 0;
1595 for (size_t i = 0; i < cats->n_cats; i++)
1597 struct ctables_category *cat = &cats->cats[i];
1600 case CTPO_CAT_NUMBER:
1601 if (cat->type == CCT_NUMBER && cat->number == e->number)
1605 case CTPO_CAT_STRING:
1606 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1610 case CTPO_CAT_NRANGE:
1611 if (cat->type == CCT_NRANGE
1612 && cat->nrange[0] == e->nrange[0]
1613 && cat->nrange[1] == e->nrange[1])
1617 case CTPO_CAT_SRANGE:
1618 if (cat->type == CCT_SRANGE
1619 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1620 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1624 case CTPO_CAT_MISSING:
1625 if (cat->type == CCT_MISSING)
1629 case CTPO_CAT_OTHERNM:
1630 if (cat->type == CCT_OTHERNM)
1634 case CTPO_CAT_SUBTOTAL:
1635 if (cat->type == CCT_SUBTOTAL)
1638 if (e->subtotal_index == n_subtotals)
1640 else if (e->subtotal_index == 0)
1645 case CTPO_CAT_TOTAL:
1646 if (cat->type == CCT_TOTAL)
1660 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1665 static struct ctables_category *
1666 ctables_find_category_for_postcompute (const struct dictionary *dict,
1667 const struct ctables_categories *cats,
1668 enum fmt_type parse_format,
1669 const struct ctables_pcexpr *e)
1671 if (parse_format != FMT_F)
1673 if (e->op == CTPO_CAT_STRING)
1676 if (!parse_category_string (e->location, e->string, dict,
1677 parse_format, &number))
1680 struct ctables_pcexpr e2 = {
1681 .op = CTPO_CAT_NUMBER,
1683 .location = e->location,
1685 return ctables_find_category_for_postcompute__ (cats, &e2);
1687 else if (e->op == CTPO_CAT_SRANGE)
1690 if (!e->srange[0].string)
1691 nrange[0] = -DBL_MAX;
1692 else if (!parse_category_string (e->location, e->srange[0], dict,
1693 parse_format, &nrange[0]))
1696 if (!e->srange[1].string)
1697 nrange[1] = DBL_MAX;
1698 else if (!parse_category_string (e->location, e->srange[1], dict,
1699 parse_format, &nrange[1]))
1702 struct ctables_pcexpr e2 = {
1703 .op = CTPO_CAT_NRANGE,
1704 .nrange = { nrange[0], nrange[1] },
1705 .location = e->location,
1707 return ctables_find_category_for_postcompute__ (cats, &e2);
1710 return ctables_find_category_for_postcompute__ (cats, e);
1714 ctables_recursive_check_postcompute (struct dictionary *dict,
1715 const struct ctables_pcexpr *e,
1716 struct ctables_category *pc_cat,
1717 const struct ctables_categories *cats,
1718 const struct msg_location *cats_location)
1722 case CTPO_CAT_NUMBER:
1723 case CTPO_CAT_STRING:
1724 case CTPO_CAT_NRANGE:
1725 case CTPO_CAT_SRANGE:
1726 case CTPO_CAT_MISSING:
1727 case CTPO_CAT_OTHERNM:
1728 case CTPO_CAT_SUBTOTAL:
1729 case CTPO_CAT_TOTAL:
1731 struct ctables_category *cat = ctables_find_category_for_postcompute (
1732 dict, cats, pc_cat->parse_format, e);
1735 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1737 size_t n_subtotals = 0;
1738 for (size_t i = 0; i < cats->n_cats; i++)
1739 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1740 if (n_subtotals > 1)
1742 msg_at (SE, cats_location,
1743 ngettext ("These categories include %zu instance "
1744 "of SUBTOTAL or HSUBTOTAL, so references "
1745 "from computed categories must refer to "
1746 "subtotals by position, "
1747 "e.g. SUBTOTAL[1].",
1748 "These categories include %zu instances "
1749 "of SUBTOTAL or HSUBTOTAL, so references "
1750 "from computed categories must refer to "
1751 "subtotals by position, "
1752 "e.g. SUBTOTAL[1].",
1755 msg_at (SN, e->location,
1756 _("This is the reference that lacks a position."));
1761 msg_at (SE, pc_cat->location,
1762 _("Computed category &%s references a category not included "
1763 "in the category list."),
1765 msg_at (SN, e->location, _("This is the missing category."));
1766 if (e->op == CTPO_CAT_SUBTOTAL)
1767 msg_at (SN, cats_location,
1768 _("To fix the problem, add subtotals to the "
1769 "list of categories here."));
1770 else if (e->op == CTPO_CAT_TOTAL)
1771 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1772 "CATEGORIES specification."));
1774 msg_at (SN, cats_location,
1775 _("To fix the problem, add the missing category to the "
1776 "list of categories here."));
1779 if (pc_cat->pc->hide_source_cats)
1793 for (size_t i = 0; i < 2; i++)
1794 if (e->subs[i] && !ctables_recursive_check_postcompute (
1795 dict, e->subs[i], pc_cat, cats, cats_location))
1804 all_strings (struct variable **vars, size_t n_vars,
1805 const struct ctables_category *cat)
1807 for (size_t j = 0; j < n_vars; j++)
1808 if (var_is_numeric (vars[j]))
1810 msg_at (SE, cat->location,
1811 _("This category specification may be applied only to string "
1812 "variables, but this subcommand tries to apply it to "
1813 "numeric variable %s."),
1814 var_get_name (vars[j]));
1821 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1822 struct ctables *ct, struct ctables_table *t)
1824 if (!lex_match_id (lexer, "VARIABLES"))
1826 lex_match (lexer, T_EQUALS);
1828 struct variable **vars;
1830 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1833 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1834 for (size_t i = 1; i < n_vars; i++)
1836 const struct fmt_spec *f = var_get_print_format (vars[i]);
1837 if (f->type != common_format->type)
1839 common_format = NULL;
1845 && (fmt_get_category (common_format->type)
1846 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1848 struct ctables_categories *c = xmalloc (sizeof *c);
1849 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1850 for (size_t i = 0; i < n_vars; i++)
1852 struct ctables_categories **cp
1853 = &t->categories[var_get_dict_index (vars[i])];
1854 ctables_categories_unref (*cp);
1858 size_t allocated_cats = 0;
1859 int cats_start_ofs = -1;
1860 int cats_end_ofs = -1;
1861 if (lex_match (lexer, T_LBRACK))
1863 cats_start_ofs = lex_ofs (lexer);
1866 if (c->n_cats >= allocated_cats)
1867 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1869 int start_ofs = lex_ofs (lexer);
1870 struct ctables_category *cat = &c->cats[c->n_cats];
1871 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1873 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1876 lex_match (lexer, T_COMMA);
1878 while (!lex_match (lexer, T_RBRACK));
1879 cats_end_ofs = lex_ofs (lexer) - 1;
1882 struct ctables_category cat = {
1884 .include_missing = false,
1885 .sort_ascending = true,
1887 bool show_totals = false;
1888 char *total_label = NULL;
1889 bool totals_before = false;
1890 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1892 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1894 lex_match (lexer, T_EQUALS);
1895 if (lex_match_id (lexer, "A"))
1896 cat.sort_ascending = true;
1897 else if (lex_match_id (lexer, "D"))
1898 cat.sort_ascending = false;
1901 lex_error_expecting (lexer, "A", "D");
1905 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1907 lex_match (lexer, T_EQUALS);
1908 if (lex_match_id (lexer, "VALUE"))
1909 cat.type = CCT_VALUE;
1910 else if (lex_match_id (lexer, "LABEL"))
1911 cat.type = CCT_LABEL;
1914 cat.type = CCT_FUNCTION;
1915 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1918 if (lex_match (lexer, T_LPAREN))
1920 cat.sort_var = parse_variable (lexer, dict);
1924 if (cat.sort_function == CTSF_PTILE)
1926 lex_match (lexer, T_COMMA);
1927 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1929 cat.percentile = lex_number (lexer);
1933 if (!lex_force_match (lexer, T_RPAREN))
1936 else if (ctables_function_availability (cat.sort_function)
1939 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1944 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1946 lex_match (lexer, T_EQUALS);
1947 if (lex_match_id (lexer, "INCLUDE"))
1948 cat.include_missing = true;
1949 else if (lex_match_id (lexer, "EXCLUDE"))
1950 cat.include_missing = false;
1953 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1957 else if (lex_match_id (lexer, "TOTAL"))
1959 lex_match (lexer, T_EQUALS);
1960 if (!parse_bool (lexer, &show_totals))
1963 else if (lex_match_id (lexer, "LABEL"))
1965 lex_match (lexer, T_EQUALS);
1966 if (!lex_force_string (lexer))
1969 total_label = ss_xstrdup (lex_tokss (lexer));
1972 else if (lex_match_id (lexer, "POSITION"))
1974 lex_match (lexer, T_EQUALS);
1975 if (lex_match_id (lexer, "BEFORE"))
1976 totals_before = true;
1977 else if (lex_match_id (lexer, "AFTER"))
1978 totals_before = false;
1981 lex_error_expecting (lexer, "BEFORE", "AFTER");
1985 else if (lex_match_id (lexer, "EMPTY"))
1987 lex_match (lexer, T_EQUALS);
1988 if (lex_match_id (lexer, "INCLUDE"))
1989 c->show_empty = true;
1990 else if (lex_match_id (lexer, "EXCLUDE"))
1991 c->show_empty = false;
1994 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2001 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2002 "TOTAL", "LABEL", "POSITION", "EMPTY");
2004 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2011 if (c->n_cats >= allocated_cats)
2012 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2013 c->cats[c->n_cats++] = cat;
2018 if (c->n_cats >= allocated_cats)
2019 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2021 struct ctables_category *totals;
2024 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2025 totals = &c->cats[0];
2028 totals = &c->cats[c->n_cats];
2031 *totals = (struct ctables_category) {
2033 .total_label = total_label ? total_label : xstrdup (_("Total")),
2037 struct ctables_category *subtotal = NULL;
2038 for (size_t i = totals_before ? 0 : c->n_cats;
2039 totals_before ? i < c->n_cats : i-- > 0;
2040 totals_before ? i++ : 0)
2042 struct ctables_category *cat = &c->cats[i];
2051 cat->subtotal = subtotal;
2054 case CCT_POSTCOMPUTE:
2065 case CCT_EXCLUDED_MISSING:
2070 if (cats_start_ofs != -1)
2072 for (size_t i = 0; i < c->n_cats; i++)
2074 struct ctables_category *cat = &c->cats[i];
2077 case CCT_POSTCOMPUTE:
2078 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2079 struct msg_location *cats_location
2080 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2081 bool ok = ctables_recursive_check_postcompute (
2082 dict, cat->pc->expr, cat, c, cats_location);
2083 msg_location_destroy (cats_location);
2090 for (size_t j = 0; j < n_vars; j++)
2091 if (var_is_alpha (vars[j]))
2093 msg_at (SE, cat->location,
2094 _("This category specification may be applied "
2095 "only to numeric variables, but this "
2096 "subcommand tries to apply it to string "
2098 var_get_name (vars[j]));
2107 if (!parse_category_string (cat->location, cat->string, dict,
2108 common_format->type, &n))
2111 ss_dealloc (&cat->string);
2113 cat->type = CCT_NUMBER;
2116 else if (!all_strings (vars, n_vars, cat))
2125 if (!cat->srange[0].string)
2127 else if (!parse_category_string (cat->location,
2128 cat->srange[0], dict,
2129 common_format->type, &n[0]))
2132 if (!cat->srange[1].string)
2134 else if (!parse_category_string (cat->location,
2135 cat->srange[1], dict,
2136 common_format->type, &n[1]))
2139 ss_dealloc (&cat->srange[0]);
2140 ss_dealloc (&cat->srange[1]);
2142 cat->type = CCT_NRANGE;
2143 cat->nrange[0] = n[0];
2144 cat->nrange[1] = n[1];
2146 else if (!all_strings (vars, n_vars, cat))
2157 case CCT_EXCLUDED_MISSING:
2172 ctables_nest_uninit (struct ctables_nest *nest)
2175 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2176 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2177 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2178 free (nest->domains[dt]);
2182 ctables_stack_uninit (struct ctables_stack *stack)
2186 for (size_t i = 0; i < stack->n; i++)
2187 ctables_nest_uninit (&stack->nests[i]);
2188 free (stack->nests);
2192 static struct ctables_stack
2193 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2200 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2201 for (size_t i = 0; i < s0.n; i++)
2202 for (size_t j = 0; j < s1.n; j++)
2204 const struct ctables_nest *a = &s0.nests[i];
2205 const struct ctables_nest *b = &s1.nests[j];
2207 size_t allocate = a->n + b->n;
2208 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2210 for (size_t k = 0; k < a->n; k++)
2211 vars[n++] = a->vars[k];
2212 for (size_t k = 0; k < b->n; k++)
2213 vars[n++] = b->vars[k];
2214 assert (n == allocate);
2216 const struct ctables_nest *summary_src;
2217 if (!a->specs[CSV_CELL].var)
2219 else if (!b->specs[CSV_CELL].var)
2224 struct ctables_nest *new = &stack.nests[stack.n++];
2225 *new = (struct ctables_nest) {
2227 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2228 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2232 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2233 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2235 ctables_stack_uninit (&s0);
2236 ctables_stack_uninit (&s1);
2240 static struct ctables_stack
2241 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2243 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2244 for (size_t i = 0; i < s0.n; i++)
2245 stack.nests[stack.n++] = s0.nests[i];
2246 for (size_t i = 0; i < s1.n; i++)
2248 stack.nests[stack.n] = s1.nests[i];
2249 stack.nests[stack.n].group_head += s0.n;
2252 assert (stack.n == s0.n + s1.n);
2258 static struct ctables_stack
2259 var_fts (const struct ctables_axis *a)
2261 struct variable **vars = xmalloc (sizeof *vars);
2264 struct ctables_nest *nest = xmalloc (sizeof *nest);
2265 *nest = (struct ctables_nest) {
2268 .scale_idx = a->scale ? 0 : SIZE_MAX,
2270 if (a->specs[CSV_CELL].n || a->scale)
2271 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2273 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2274 nest->specs[sv].var = a->var;
2275 nest->specs[sv].is_scale = a->scale;
2277 return (struct ctables_stack) { .nests = nest, .n = 1 };
2280 static struct ctables_stack
2281 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2284 return (struct ctables_stack) { .n = 0 };
2292 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2293 enumerate_fts (axis_type, a->subs[1]));
2296 /* This should consider any of the scale variables found in the result to
2297 be linked to each other listwise for SMISSING=LISTWISE. */
2298 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2299 enumerate_fts (axis_type, a->subs[1]));
2305 union ctables_summary
2307 /* COUNT, VALIDN, TOTALN. */
2310 /* MINIMUM, MAXIMUM, RANGE. */
2317 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2318 struct moments1 *moments;
2320 /* MEDIAN, MODE, PTILE. */
2323 struct casewriter *writer;
2328 /* XXX multiple response */
2332 ctables_summary_init (union ctables_summary *s,
2333 const struct ctables_summary_spec *ss)
2335 switch (ss->function)
2339 case CTSF_ROWPCT_COUNT:
2340 case CTSF_COLPCT_COUNT:
2341 case CTSF_TABLEPCT_COUNT:
2342 case CTSF_SUBTABLEPCT_COUNT:
2343 case CTSF_LAYERPCT_COUNT:
2344 case CTSF_LAYERROWPCT_COUNT:
2345 case CTSF_LAYERCOLPCT_COUNT:
2346 case CTSF_ROWPCT_VALIDN:
2347 case CTSF_COLPCT_VALIDN:
2348 case CTSF_TABLEPCT_VALIDN:
2349 case CTSF_SUBTABLEPCT_VALIDN:
2350 case CTSF_LAYERPCT_VALIDN:
2351 case CTSF_LAYERROWPCT_VALIDN:
2352 case CTSF_LAYERCOLPCT_VALIDN:
2353 case CTSF_ROWPCT_TOTALN:
2354 case CTSF_COLPCT_TOTALN:
2355 case CTSF_TABLEPCT_TOTALN:
2356 case CTSF_SUBTABLEPCT_TOTALN:
2357 case CTSF_LAYERPCT_TOTALN:
2358 case CTSF_LAYERROWPCT_TOTALN:
2359 case CTSF_LAYERCOLPCT_TOTALN:
2366 case CTSF_UROWPCT_COUNT:
2367 case CTSF_UCOLPCT_COUNT:
2368 case CTSF_UTABLEPCT_COUNT:
2369 case CTSF_USUBTABLEPCT_COUNT:
2370 case CTSF_ULAYERPCT_COUNT:
2371 case CTSF_ULAYERROWPCT_COUNT:
2372 case CTSF_ULAYERCOLPCT_COUNT:
2373 case CTSF_UROWPCT_VALIDN:
2374 case CTSF_UCOLPCT_VALIDN:
2375 case CTSF_UTABLEPCT_VALIDN:
2376 case CTSF_USUBTABLEPCT_VALIDN:
2377 case CTSF_ULAYERPCT_VALIDN:
2378 case CTSF_ULAYERROWPCT_VALIDN:
2379 case CTSF_ULAYERCOLPCT_VALIDN:
2380 case CTSF_UROWPCT_TOTALN:
2381 case CTSF_UCOLPCT_TOTALN:
2382 case CTSF_UTABLEPCT_TOTALN:
2383 case CTSF_USUBTABLEPCT_TOTALN:
2384 case CTSF_ULAYERPCT_TOTALN:
2385 case CTSF_ULAYERROWPCT_TOTALN:
2386 case CTSF_ULAYERCOLPCT_TOTALN:
2396 case CTSF_SUBTABLE_ID:
2398 case CTSF_LAYERROW_ID:
2399 case CTSF_LAYERCOL_ID:
2405 s->min = s->max = SYSMIS;
2413 case CTSF_ROWPCT_SUM:
2414 case CTSF_COLPCT_SUM:
2415 case CTSF_TABLEPCT_SUM:
2416 case CTSF_SUBTABLEPCT_SUM:
2417 case CTSF_LAYERPCT_SUM:
2418 case CTSF_LAYERROWPCT_SUM:
2419 case CTSF_LAYERCOLPCT_SUM:
2424 case CTSF_UVARIANCE:
2425 case CTSF_UROWPCT_SUM:
2426 case CTSF_UCOLPCT_SUM:
2427 case CTSF_UTABLEPCT_SUM:
2428 case CTSF_USUBTABLEPCT_SUM:
2429 case CTSF_ULAYERPCT_SUM:
2430 case CTSF_ULAYERROWPCT_SUM:
2431 case CTSF_ULAYERCOLPCT_SUM:
2432 s->moments = moments1_create (MOMENT_VARIANCE);
2442 struct caseproto *proto = caseproto_create ();
2443 proto = caseproto_add_width (proto, 0);
2444 proto = caseproto_add_width (proto, 0);
2446 struct subcase ordering;
2447 subcase_init (&ordering, 0, 0, SC_ASCEND);
2448 s->writer = sort_create_writer (&ordering, proto);
2449 subcase_uninit (&ordering);
2450 caseproto_unref (proto);
2460 ctables_summary_uninit (union ctables_summary *s,
2461 const struct ctables_summary_spec *ss)
2463 switch (ss->function)
2467 case CTSF_ROWPCT_COUNT:
2468 case CTSF_COLPCT_COUNT:
2469 case CTSF_TABLEPCT_COUNT:
2470 case CTSF_SUBTABLEPCT_COUNT:
2471 case CTSF_LAYERPCT_COUNT:
2472 case CTSF_LAYERROWPCT_COUNT:
2473 case CTSF_LAYERCOLPCT_COUNT:
2474 case CTSF_ROWPCT_VALIDN:
2475 case CTSF_COLPCT_VALIDN:
2476 case CTSF_TABLEPCT_VALIDN:
2477 case CTSF_SUBTABLEPCT_VALIDN:
2478 case CTSF_LAYERPCT_VALIDN:
2479 case CTSF_LAYERROWPCT_VALIDN:
2480 case CTSF_LAYERCOLPCT_VALIDN:
2481 case CTSF_ROWPCT_TOTALN:
2482 case CTSF_COLPCT_TOTALN:
2483 case CTSF_TABLEPCT_TOTALN:
2484 case CTSF_SUBTABLEPCT_TOTALN:
2485 case CTSF_LAYERPCT_TOTALN:
2486 case CTSF_LAYERROWPCT_TOTALN:
2487 case CTSF_LAYERCOLPCT_TOTALN:
2494 case CTSF_UROWPCT_COUNT:
2495 case CTSF_UCOLPCT_COUNT:
2496 case CTSF_UTABLEPCT_COUNT:
2497 case CTSF_USUBTABLEPCT_COUNT:
2498 case CTSF_ULAYERPCT_COUNT:
2499 case CTSF_ULAYERROWPCT_COUNT:
2500 case CTSF_ULAYERCOLPCT_COUNT:
2501 case CTSF_UROWPCT_VALIDN:
2502 case CTSF_UCOLPCT_VALIDN:
2503 case CTSF_UTABLEPCT_VALIDN:
2504 case CTSF_USUBTABLEPCT_VALIDN:
2505 case CTSF_ULAYERPCT_VALIDN:
2506 case CTSF_ULAYERROWPCT_VALIDN:
2507 case CTSF_ULAYERCOLPCT_VALIDN:
2508 case CTSF_UROWPCT_TOTALN:
2509 case CTSF_UCOLPCT_TOTALN:
2510 case CTSF_UTABLEPCT_TOTALN:
2511 case CTSF_USUBTABLEPCT_TOTALN:
2512 case CTSF_ULAYERPCT_TOTALN:
2513 case CTSF_ULAYERROWPCT_TOTALN:
2514 case CTSF_ULAYERCOLPCT_TOTALN:
2523 case CTSF_SUBTABLE_ID:
2525 case CTSF_LAYERROW_ID:
2526 case CTSF_LAYERCOL_ID:
2539 case CTSF_ROWPCT_SUM:
2540 case CTSF_COLPCT_SUM:
2541 case CTSF_TABLEPCT_SUM:
2542 case CTSF_SUBTABLEPCT_SUM:
2543 case CTSF_LAYERPCT_SUM:
2544 case CTSF_LAYERROWPCT_SUM:
2545 case CTSF_LAYERCOLPCT_SUM:
2550 case CTSF_UVARIANCE:
2551 case CTSF_UROWPCT_SUM:
2552 case CTSF_UCOLPCT_SUM:
2553 case CTSF_UTABLEPCT_SUM:
2554 case CTSF_USUBTABLEPCT_SUM:
2555 case CTSF_ULAYERPCT_SUM:
2556 case CTSF_ULAYERROWPCT_SUM:
2557 case CTSF_ULAYERCOLPCT_SUM:
2558 moments1_destroy (s->moments);
2567 casewriter_destroy (s->writer);
2573 ctables_summary_add (union ctables_summary *s,
2574 const struct ctables_summary_spec *ss,
2575 const struct variable *var, const union value *value,
2576 bool is_scale, bool is_scale_missing,
2577 bool is_missing, bool excluded_missing,
2578 double d_weight, double e_weight)
2580 /* To determine whether a case is included in a given table for a particular
2581 kind of summary, consider the following charts for each variable in the
2582 table. Only if "yes" appears for every variable for the summary is the
2585 Categorical variables: VALIDN COUNT TOTALN
2586 Valid values in included categories yes yes yes
2587 Missing values in included categories --- yes yes
2588 Missing values in excluded categories --- --- yes
2589 Valid values in excluded categories --- --- ---
2591 Scale variables: VALIDN COUNT TOTALN
2592 Valid value yes yes yes
2593 Missing value --- yes yes
2595 Missing values include both user- and system-missing. (The system-missing
2596 value is always in an excluded category.)
2598 switch (ss->function)
2601 case CTSF_ROWPCT_TOTALN:
2602 case CTSF_COLPCT_TOTALN:
2603 case CTSF_TABLEPCT_TOTALN:
2604 case CTSF_SUBTABLEPCT_TOTALN:
2605 case CTSF_LAYERPCT_TOTALN:
2606 case CTSF_LAYERROWPCT_TOTALN:
2607 case CTSF_LAYERCOLPCT_TOTALN:
2608 s->count += d_weight;
2612 case CTSF_UROWPCT_TOTALN:
2613 case CTSF_UCOLPCT_TOTALN:
2614 case CTSF_UTABLEPCT_TOTALN:
2615 case CTSF_USUBTABLEPCT_TOTALN:
2616 case CTSF_ULAYERPCT_TOTALN:
2617 case CTSF_ULAYERROWPCT_TOTALN:
2618 case CTSF_ULAYERCOLPCT_TOTALN:
2623 case CTSF_ROWPCT_COUNT:
2624 case CTSF_COLPCT_COUNT:
2625 case CTSF_TABLEPCT_COUNT:
2626 case CTSF_SUBTABLEPCT_COUNT:
2627 case CTSF_LAYERPCT_COUNT:
2628 case CTSF_LAYERROWPCT_COUNT:
2629 case CTSF_LAYERCOLPCT_COUNT:
2630 if (is_scale || !excluded_missing)
2631 s->count += d_weight;
2635 case CTSF_UROWPCT_COUNT:
2636 case CTSF_UCOLPCT_COUNT:
2637 case CTSF_UTABLEPCT_COUNT:
2638 case CTSF_USUBTABLEPCT_COUNT:
2639 case CTSF_ULAYERPCT_COUNT:
2640 case CTSF_ULAYERROWPCT_COUNT:
2641 case CTSF_ULAYERCOLPCT_COUNT:
2642 if (is_scale || !excluded_missing)
2647 case CTSF_ROWPCT_VALIDN:
2648 case CTSF_COLPCT_VALIDN:
2649 case CTSF_TABLEPCT_VALIDN:
2650 case CTSF_SUBTABLEPCT_VALIDN:
2651 case CTSF_LAYERPCT_VALIDN:
2652 case CTSF_LAYERROWPCT_VALIDN:
2653 case CTSF_LAYERCOLPCT_VALIDN:
2657 s->count += d_weight;
2661 case CTSF_UROWPCT_VALIDN:
2662 case CTSF_UCOLPCT_VALIDN:
2663 case CTSF_UTABLEPCT_VALIDN:
2664 case CTSF_USUBTABLEPCT_VALIDN:
2665 case CTSF_ULAYERPCT_VALIDN:
2666 case CTSF_ULAYERROWPCT_VALIDN:
2667 case CTSF_ULAYERCOLPCT_VALIDN:
2677 case CTSF_SUBTABLE_ID:
2679 case CTSF_LAYERROW_ID:
2680 case CTSF_LAYERCOL_ID:
2687 s->count += d_weight;
2698 if (is_scale || !excluded_missing)
2699 s->count += e_weight;
2706 s->count += e_weight;
2710 s->count += e_weight;
2716 if (!is_scale_missing)
2718 assert (!var_is_alpha (var)); /* XXX? */
2719 if (s->min == SYSMIS || value->f < s->min)
2721 if (s->max == SYSMIS || value->f > s->max)
2731 case CTSF_ROWPCT_SUM:
2732 case CTSF_COLPCT_SUM:
2733 case CTSF_TABLEPCT_SUM:
2734 case CTSF_SUBTABLEPCT_SUM:
2735 case CTSF_LAYERPCT_SUM:
2736 case CTSF_LAYERROWPCT_SUM:
2737 case CTSF_LAYERCOLPCT_SUM:
2738 if (!is_scale_missing)
2739 moments1_add (s->moments, value->f, e_weight);
2746 case CTSF_UVARIANCE:
2747 case CTSF_UROWPCT_SUM:
2748 case CTSF_UCOLPCT_SUM:
2749 case CTSF_UTABLEPCT_SUM:
2750 case CTSF_USUBTABLEPCT_SUM:
2751 case CTSF_ULAYERPCT_SUM:
2752 case CTSF_ULAYERROWPCT_SUM:
2753 case CTSF_ULAYERCOLPCT_SUM:
2754 if (!is_scale_missing)
2755 moments1_add (s->moments, value->f, 1.0);
2761 d_weight = e_weight = 1.0;
2766 if (!is_scale_missing)
2768 s->ovalid += e_weight;
2770 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2771 *case_num_rw_idx (c, 0) = value->f;
2772 *case_num_rw_idx (c, 1) = e_weight;
2773 casewriter_write (s->writer, c);
2779 static enum ctables_domain_type
2780 ctables_function_domain (enum ctables_summary_function function)
2810 case CTSF_UVARIANCE:
2816 case CTSF_COLPCT_COUNT:
2817 case CTSF_COLPCT_SUM:
2818 case CTSF_COLPCT_TOTALN:
2819 case CTSF_COLPCT_VALIDN:
2820 case CTSF_UCOLPCT_COUNT:
2821 case CTSF_UCOLPCT_SUM:
2822 case CTSF_UCOLPCT_TOTALN:
2823 case CTSF_UCOLPCT_VALIDN:
2827 case CTSF_LAYERCOLPCT_COUNT:
2828 case CTSF_LAYERCOLPCT_SUM:
2829 case CTSF_LAYERCOLPCT_TOTALN:
2830 case CTSF_LAYERCOLPCT_VALIDN:
2831 case CTSF_ULAYERCOLPCT_COUNT:
2832 case CTSF_ULAYERCOLPCT_SUM:
2833 case CTSF_ULAYERCOLPCT_TOTALN:
2834 case CTSF_ULAYERCOLPCT_VALIDN:
2835 case CTSF_LAYERCOL_ID:
2836 return CTDT_LAYERCOL;
2838 case CTSF_LAYERPCT_COUNT:
2839 case CTSF_LAYERPCT_SUM:
2840 case CTSF_LAYERPCT_TOTALN:
2841 case CTSF_LAYERPCT_VALIDN:
2842 case CTSF_ULAYERPCT_COUNT:
2843 case CTSF_ULAYERPCT_SUM:
2844 case CTSF_ULAYERPCT_TOTALN:
2845 case CTSF_ULAYERPCT_VALIDN:
2849 case CTSF_LAYERROWPCT_COUNT:
2850 case CTSF_LAYERROWPCT_SUM:
2851 case CTSF_LAYERROWPCT_TOTALN:
2852 case CTSF_LAYERROWPCT_VALIDN:
2853 case CTSF_ULAYERROWPCT_COUNT:
2854 case CTSF_ULAYERROWPCT_SUM:
2855 case CTSF_ULAYERROWPCT_TOTALN:
2856 case CTSF_ULAYERROWPCT_VALIDN:
2857 case CTSF_LAYERROW_ID:
2858 return CTDT_LAYERROW;
2860 case CTSF_ROWPCT_COUNT:
2861 case CTSF_ROWPCT_SUM:
2862 case CTSF_ROWPCT_TOTALN:
2863 case CTSF_ROWPCT_VALIDN:
2864 case CTSF_UROWPCT_COUNT:
2865 case CTSF_UROWPCT_SUM:
2866 case CTSF_UROWPCT_TOTALN:
2867 case CTSF_UROWPCT_VALIDN:
2871 case CTSF_SUBTABLEPCT_COUNT:
2872 case CTSF_SUBTABLEPCT_SUM:
2873 case CTSF_SUBTABLEPCT_TOTALN:
2874 case CTSF_SUBTABLEPCT_VALIDN:
2875 case CTSF_USUBTABLEPCT_COUNT:
2876 case CTSF_USUBTABLEPCT_SUM:
2877 case CTSF_USUBTABLEPCT_TOTALN:
2878 case CTSF_USUBTABLEPCT_VALIDN:
2879 case CTSF_SUBTABLE_ID:
2880 return CTDT_SUBTABLE;
2882 case CTSF_TABLEPCT_COUNT:
2883 case CTSF_TABLEPCT_SUM:
2884 case CTSF_TABLEPCT_TOTALN:
2885 case CTSF_TABLEPCT_VALIDN:
2886 case CTSF_UTABLEPCT_COUNT:
2887 case CTSF_UTABLEPCT_SUM:
2888 case CTSF_UTABLEPCT_TOTALN:
2889 case CTSF_UTABLEPCT_VALIDN:
2897 static enum ctables_domain_type
2898 ctables_function_is_pctsum (enum ctables_summary_function function)
2928 case CTSF_UVARIANCE:
2932 case CTSF_COLPCT_COUNT:
2933 case CTSF_COLPCT_TOTALN:
2934 case CTSF_COLPCT_VALIDN:
2935 case CTSF_UCOLPCT_COUNT:
2936 case CTSF_UCOLPCT_TOTALN:
2937 case CTSF_UCOLPCT_VALIDN:
2938 case CTSF_LAYERCOLPCT_COUNT:
2939 case CTSF_LAYERCOLPCT_TOTALN:
2940 case CTSF_LAYERCOLPCT_VALIDN:
2941 case CTSF_ULAYERCOLPCT_COUNT:
2942 case CTSF_ULAYERCOLPCT_TOTALN:
2943 case CTSF_ULAYERCOLPCT_VALIDN:
2944 case CTSF_LAYERPCT_COUNT:
2945 case CTSF_LAYERPCT_TOTALN:
2946 case CTSF_LAYERPCT_VALIDN:
2947 case CTSF_ULAYERPCT_COUNT:
2948 case CTSF_ULAYERPCT_TOTALN:
2949 case CTSF_ULAYERPCT_VALIDN:
2950 case CTSF_LAYERROWPCT_COUNT:
2951 case CTSF_LAYERROWPCT_TOTALN:
2952 case CTSF_LAYERROWPCT_VALIDN:
2953 case CTSF_ULAYERROWPCT_COUNT:
2954 case CTSF_ULAYERROWPCT_TOTALN:
2955 case CTSF_ULAYERROWPCT_VALIDN:
2956 case CTSF_ROWPCT_COUNT:
2957 case CTSF_ROWPCT_TOTALN:
2958 case CTSF_ROWPCT_VALIDN:
2959 case CTSF_UROWPCT_COUNT:
2960 case CTSF_UROWPCT_TOTALN:
2961 case CTSF_UROWPCT_VALIDN:
2962 case CTSF_SUBTABLEPCT_COUNT:
2963 case CTSF_SUBTABLEPCT_TOTALN:
2964 case CTSF_SUBTABLEPCT_VALIDN:
2965 case CTSF_USUBTABLEPCT_COUNT:
2966 case CTSF_USUBTABLEPCT_TOTALN:
2967 case CTSF_USUBTABLEPCT_VALIDN:
2968 case CTSF_TABLEPCT_COUNT:
2969 case CTSF_TABLEPCT_TOTALN:
2970 case CTSF_TABLEPCT_VALIDN:
2971 case CTSF_UTABLEPCT_COUNT:
2972 case CTSF_UTABLEPCT_TOTALN:
2973 case CTSF_UTABLEPCT_VALIDN:
2977 case CTSF_SUBTABLE_ID:
2979 case CTSF_LAYERROW_ID:
2980 case CTSF_LAYERCOL_ID:
2983 case CTSF_COLPCT_SUM:
2984 case CTSF_UCOLPCT_SUM:
2985 case CTSF_LAYERCOLPCT_SUM:
2986 case CTSF_ULAYERCOLPCT_SUM:
2987 case CTSF_LAYERPCT_SUM:
2988 case CTSF_ULAYERPCT_SUM:
2989 case CTSF_LAYERROWPCT_SUM:
2990 case CTSF_ULAYERROWPCT_SUM:
2991 case CTSF_ROWPCT_SUM:
2992 case CTSF_UROWPCT_SUM:
2993 case CTSF_SUBTABLEPCT_SUM:
2994 case CTSF_USUBTABLEPCT_SUM:
2995 case CTSF_TABLEPCT_SUM:
2996 case CTSF_UTABLEPCT_SUM:
3004 ctables_summary_value (const struct ctables_cell *cell,
3005 union ctables_summary *s,
3006 const struct ctables_summary_spec *ss)
3008 switch (ss->function)
3018 case CTSF_SUBTABLE_ID:
3020 case CTSF_LAYERROW_ID:
3021 case CTSF_LAYERCOL_ID:
3022 return cell->domains[ctables_function_domain (ss->function)]->sequence;
3024 case CTSF_ROWPCT_COUNT:
3025 case CTSF_COLPCT_COUNT:
3026 case CTSF_TABLEPCT_COUNT:
3027 case CTSF_SUBTABLEPCT_COUNT:
3028 case CTSF_LAYERPCT_COUNT:
3029 case CTSF_LAYERROWPCT_COUNT:
3030 case CTSF_LAYERCOLPCT_COUNT:
3032 enum ctables_domain_type d = ctables_function_domain (ss->function);
3033 return (cell->domains[d]->e_count
3034 ? s->count / cell->domains[d]->e_count * 100
3038 case CTSF_UROWPCT_COUNT:
3039 case CTSF_UCOLPCT_COUNT:
3040 case CTSF_UTABLEPCT_COUNT:
3041 case CTSF_USUBTABLEPCT_COUNT:
3042 case CTSF_ULAYERPCT_COUNT:
3043 case CTSF_ULAYERROWPCT_COUNT:
3044 case CTSF_ULAYERCOLPCT_COUNT:
3046 enum ctables_domain_type d = ctables_function_domain (ss->function);
3047 return (cell->domains[d]->u_count
3048 ? s->count / cell->domains[d]->u_count * 100
3052 case CTSF_ROWPCT_VALIDN:
3053 case CTSF_COLPCT_VALIDN:
3054 case CTSF_TABLEPCT_VALIDN:
3055 case CTSF_SUBTABLEPCT_VALIDN:
3056 case CTSF_LAYERPCT_VALIDN:
3057 case CTSF_LAYERROWPCT_VALIDN:
3058 case CTSF_LAYERCOLPCT_VALIDN:
3060 enum ctables_domain_type d = ctables_function_domain (ss->function);
3061 return (cell->domains[d]->e_valid
3062 ? s->count / cell->domains[d]->e_valid * 100
3066 case CTSF_UROWPCT_VALIDN:
3067 case CTSF_UCOLPCT_VALIDN:
3068 case CTSF_UTABLEPCT_VALIDN:
3069 case CTSF_USUBTABLEPCT_VALIDN:
3070 case CTSF_ULAYERPCT_VALIDN:
3071 case CTSF_ULAYERROWPCT_VALIDN:
3072 case CTSF_ULAYERCOLPCT_VALIDN:
3074 enum ctables_domain_type d = ctables_function_domain (ss->function);
3075 return (cell->domains[d]->u_valid
3076 ? s->count / cell->domains[d]->u_valid * 100
3080 case CTSF_ROWPCT_TOTALN:
3081 case CTSF_COLPCT_TOTALN:
3082 case CTSF_TABLEPCT_TOTALN:
3083 case CTSF_SUBTABLEPCT_TOTALN:
3084 case CTSF_LAYERPCT_TOTALN:
3085 case CTSF_LAYERROWPCT_TOTALN:
3086 case CTSF_LAYERCOLPCT_TOTALN:
3088 enum ctables_domain_type d = ctables_function_domain (ss->function);
3089 return (cell->domains[d]->e_total
3090 ? s->count / cell->domains[d]->e_total * 100
3094 case CTSF_UROWPCT_TOTALN:
3095 case CTSF_UCOLPCT_TOTALN:
3096 case CTSF_UTABLEPCT_TOTALN:
3097 case CTSF_USUBTABLEPCT_TOTALN:
3098 case CTSF_ULAYERPCT_TOTALN:
3099 case CTSF_ULAYERROWPCT_TOTALN:
3100 case CTSF_ULAYERCOLPCT_TOTALN:
3102 enum ctables_domain_type d = ctables_function_domain (ss->function);
3103 return (cell->domains[d]->u_total
3104 ? s->count / cell->domains[d]->u_total * 100
3125 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3131 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3138 double weight, variance;
3139 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3140 return calc_semean (variance, weight);
3147 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3148 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3154 double weight, mean;
3155 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3156 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3160 case CTSF_UVARIANCE:
3163 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3167 case CTSF_ROWPCT_SUM:
3168 case CTSF_COLPCT_SUM:
3169 case CTSF_TABLEPCT_SUM:
3170 case CTSF_SUBTABLEPCT_SUM:
3171 case CTSF_LAYERPCT_SUM:
3172 case CTSF_LAYERROWPCT_SUM:
3173 case CTSF_LAYERCOLPCT_SUM:
3175 double weight, mean;
3176 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3177 if (weight == SYSMIS || mean == SYSMIS)
3179 enum ctables_domain_type d = ctables_function_domain (ss->function);
3180 double num = weight * mean;
3181 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3182 return denom != 0 ? num / denom * 100 : SYSMIS;
3184 case CTSF_UROWPCT_SUM:
3185 case CTSF_UCOLPCT_SUM:
3186 case CTSF_UTABLEPCT_SUM:
3187 case CTSF_USUBTABLEPCT_SUM:
3188 case CTSF_ULAYERPCT_SUM:
3189 case CTSF_ULAYERROWPCT_SUM:
3190 case CTSF_ULAYERCOLPCT_SUM:
3192 double weight, mean;
3193 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3194 if (weight == SYSMIS || mean == SYSMIS)
3196 enum ctables_domain_type d = ctables_function_domain (ss->function);
3197 double num = weight * mean;
3198 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3199 return denom != 0 ? num / denom * 100 : SYSMIS;
3208 struct casereader *reader = casewriter_make_reader (s->writer);
3211 struct percentile *ptile = percentile_create (
3212 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3213 struct order_stats *os = &ptile->parent;
3214 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3215 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3216 statistic_destroy (&ptile->parent.parent);
3224 struct casereader *reader = casewriter_make_reader (s->writer);
3227 struct mode *mode = mode_create ();
3228 struct order_stats *os = &mode->parent;
3229 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3230 s->ovalue = mode->mode;
3231 statistic_destroy (&mode->parent.parent);
3239 struct ctables_cell_sort_aux
3241 const struct ctables_nest *nest;
3242 enum pivot_axis_type a;
3246 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3248 const struct ctables_cell_sort_aux *aux = aux_;
3249 struct ctables_cell *const *ap = a_;
3250 struct ctables_cell *const *bp = b_;
3251 const struct ctables_cell *a = *ap;
3252 const struct ctables_cell *b = *bp;
3254 const struct ctables_nest *nest = aux->nest;
3255 for (size_t i = 0; i < nest->n; i++)
3256 if (i != nest->scale_idx)
3258 const struct variable *var = nest->vars[i];
3259 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3260 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3261 if (a_cv->category != b_cv->category)
3262 return a_cv->category > b_cv->category ? 1 : -1;
3264 const union value *a_val = &a_cv->value;
3265 const union value *b_val = &b_cv->value;
3266 switch (a_cv->category->type)
3272 case CCT_POSTCOMPUTE:
3273 case CCT_EXCLUDED_MISSING:
3274 /* Must be equal. */
3282 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3290 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3292 return a_cv->category->sort_ascending ? cmp : -cmp;
3298 const char *a_label = var_lookup_value_label (var, a_val);
3299 const char *b_label = var_lookup_value_label (var, b_val);
3305 cmp = strcmp (a_label, b_label);
3311 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3314 return a_cv->category->sort_ascending ? cmp : -cmp;
3326 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3327 const void *aux UNUSED)
3329 struct ctables_cell *const *ap = a_;
3330 struct ctables_cell *const *bp = b_;
3331 const struct ctables_cell *a = *ap;
3332 const struct ctables_cell *b = *bp;
3334 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3336 int al = a->axes[axis].leaf;
3337 int bl = b->axes[axis].leaf;
3339 return al > bl ? 1 : -1;
3347 For each ctables_table:
3348 For each combination of row vars:
3349 For each combination of column vars:
3350 For each combination of layer vars:
3352 Make a table of row values:
3353 Sort entries by row values
3354 Assign a 0-based index to each actual value
3355 Construct a dimension
3356 Make a table of column values
3357 Make a table of layer values
3359 Fill the table entry using the indexes from before.
3362 static struct ctables_domain *
3363 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3364 enum ctables_domain_type domain)
3367 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3369 const struct ctables_nest *nest = s->nests[a];
3370 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3372 size_t v_idx = nest->domains[domain][i];
3373 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3374 hash = hash_pointer (cv->category, hash);
3375 if (cv->category->type != CCT_TOTAL
3376 && cv->category->type != CCT_SUBTOTAL
3377 && cv->category->type != CCT_POSTCOMPUTE)
3378 hash = value_hash (&cv->value,
3379 var_get_width (nest->vars[v_idx]), hash);
3383 struct ctables_domain *d;
3384 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3386 const struct ctables_cell *df = d->example;
3387 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3389 const struct ctables_nest *nest = s->nests[a];
3390 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3392 size_t v_idx = nest->domains[domain][i];
3393 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3394 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3395 if (cv1->category != cv2->category
3396 || (cv1->category->type != CCT_TOTAL
3397 && cv1->category->type != CCT_SUBTOTAL
3398 && cv1->category->type != CCT_POSTCOMPUTE
3399 && !value_equal (&cv1->value, &cv2->value,
3400 var_get_width (nest->vars[v_idx]))))
3409 struct ctables_sum *sums = (s->table->n_sum_vars
3410 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3413 d = xmalloc (sizeof *d);
3414 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3415 hmap_insert (&s->domains[domain], &d->node, hash);
3419 static struct substring
3420 rtrim_value (const union value *v, const struct variable *var)
3422 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3423 var_get_width (var));
3424 ss_rtrim (&s, ss_cstr (" "));
3429 in_string_range (const union value *v, const struct variable *var,
3430 const struct substring *srange)
3432 struct substring s = rtrim_value (v, var);
3433 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3434 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3437 static const struct ctables_category *
3438 ctables_categories_match (const struct ctables_categories *c,
3439 const union value *v, const struct variable *var)
3441 if (var_is_numeric (var) && v->f == SYSMIS)
3444 const struct ctables_category *othernm = NULL;
3445 for (size_t i = c->n_cats; i-- > 0; )
3447 const struct ctables_category *cat = &c->cats[i];
3451 if (cat->number == v->f)
3456 if (ss_equals (cat->string, rtrim_value (v, var)))
3461 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3462 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3467 if (in_string_range (v, var, cat->srange))
3472 if (var_is_value_missing (var, v))
3476 case CCT_POSTCOMPUTE:
3491 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3494 case CCT_EXCLUDED_MISSING:
3499 return var_is_value_missing (var, v) ? NULL : othernm;
3502 static const struct ctables_category *
3503 ctables_categories_total (const struct ctables_categories *c)
3505 const struct ctables_category *first = &c->cats[0];
3506 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3507 return (first->type == CCT_TOTAL ? first
3508 : last->type == CCT_TOTAL ? last
3512 static struct ctables_cell *
3513 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3514 const struct ctables_category *cats[PIVOT_N_AXES][10])
3517 enum ctables_summary_variant sv = CSV_CELL;
3518 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3520 const struct ctables_nest *nest = s->nests[a];
3521 for (size_t i = 0; i < nest->n; i++)
3522 if (i != nest->scale_idx)
3524 hash = hash_pointer (cats[a][i], hash);
3525 if (cats[a][i]->type != CCT_TOTAL
3526 && cats[a][i]->type != CCT_SUBTOTAL
3527 && cats[a][i]->type != CCT_POSTCOMPUTE)
3528 hash = value_hash (case_data (c, nest->vars[i]),
3529 var_get_width (nest->vars[i]), hash);
3535 struct ctables_cell *cell;
3536 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3538 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3540 const struct ctables_nest *nest = s->nests[a];
3541 for (size_t i = 0; i < nest->n; i++)
3542 if (i != nest->scale_idx
3543 && (cats[a][i] != cell->axes[a].cvs[i].category
3544 || (cats[a][i]->type != CCT_TOTAL
3545 && cats[a][i]->type != CCT_SUBTOTAL
3546 && cats[a][i]->type != CCT_POSTCOMPUTE
3547 && !value_equal (case_data (c, nest->vars[i]),
3548 &cell->axes[a].cvs[i].value,
3549 var_get_width (nest->vars[i])))))
3558 cell = xmalloc (sizeof *cell);
3561 cell->omit_domains = 0;
3562 cell->postcompute = false;
3563 //struct string name = DS_EMPTY_INITIALIZER;
3564 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3566 const struct ctables_nest *nest = s->nests[a];
3567 cell->axes[a].cvs = (nest->n
3568 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3570 for (size_t i = 0; i < nest->n; i++)
3572 const struct ctables_category *cat = cats[a][i];
3573 const struct variable *var = nest->vars[i];
3574 const union value *value = case_data (c, var);
3575 if (i != nest->scale_idx)
3577 const struct ctables_category *subtotal = cat->subtotal;
3578 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3581 if (cat->type == CCT_TOTAL
3582 || cat->type == CCT_SUBTOTAL
3583 || cat->type == CCT_POSTCOMPUTE)
3585 /* XXX these should be more encompassing I think.*/
3589 case PIVOT_AXIS_COLUMN:
3590 cell->omit_domains |= ((1u << CTDT_TABLE) |
3591 (1u << CTDT_LAYER) |
3592 (1u << CTDT_LAYERCOL) |
3593 (1u << CTDT_SUBTABLE) |
3596 case PIVOT_AXIS_ROW:
3597 cell->omit_domains |= ((1u << CTDT_TABLE) |
3598 (1u << CTDT_LAYER) |
3599 (1u << CTDT_LAYERROW) |
3600 (1u << CTDT_SUBTABLE) |
3603 case PIVOT_AXIS_LAYER:
3604 cell->omit_domains |= ((1u << CTDT_TABLE) |
3605 (1u << CTDT_LAYER));
3609 if (cat->type == CCT_POSTCOMPUTE)
3610 cell->postcompute = true;
3613 cell->axes[a].cvs[i].category = cat;
3614 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3617 if (i != nest->scale_idx)
3619 if (!ds_is_empty (&name))
3620 ds_put_cstr (&name, ", ");
3621 char *value_s = data_out (value, var_get_encoding (var),
3622 var_get_print_format (var),
3623 settings_get_fmt_settings ());
3624 if (cat->type == CCT_TOTAL
3625 || cat->type == CCT_SUBTOTAL
3626 || cat->type == CCT_POSTCOMPUTE)
3627 ds_put_format (&name, "%s=total", var_get_name (var));
3629 ds_put_format (&name, "%s=%s", var_get_name (var),
3630 value_s + strspn (value_s, " "));
3636 //cell->name = ds_steal_cstr (&name);
3638 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3639 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3640 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3641 for (size_t i = 0; i < specs->n; i++)
3642 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3643 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3644 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3645 hmap_insert (&s->cells, &cell->node, hash);
3650 is_scale_missing (const struct ctables_summary_spec_set *specs,
3651 const struct ccase *c)
3653 if (!specs->is_scale)
3656 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3659 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3661 const struct variable *var = specs->listwise_vars[i];
3662 if (var_is_num_missing (var, case_num (c, var)))
3670 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3671 const struct ctables_category *cats[PIVOT_N_AXES][10],
3672 bool is_missing, bool excluded_missing,
3673 double d_weight, double e_weight)
3675 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3676 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3678 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3680 bool scale_missing = is_scale_missing (specs, c);
3681 for (size_t i = 0; i < specs->n; i++)
3682 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3683 specs->var, case_data (c, specs->var), specs->is_scale,
3684 scale_missing, is_missing, excluded_missing,
3685 d_weight, e_weight);
3686 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3687 if (!(cell->omit_domains && (1u << dt)))
3689 struct ctables_domain *d = cell->domains[dt];
3690 d->d_total += d_weight;
3691 d->e_total += e_weight;
3693 if (!excluded_missing)
3695 d->d_count += d_weight;
3696 d->e_count += e_weight;
3701 d->d_valid += d_weight;
3702 d->e_valid += e_weight;
3705 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3707 /* XXX listwise_missing??? */
3708 const struct variable *var = s->table->sum_vars[i];
3709 double addend = case_num (c, var);
3710 if (!var_is_num_missing (var, addend))
3712 struct ctables_sum *sum = &d->sums[i];
3713 sum->e_sum += addend * e_weight;
3714 sum->u_sum += addend;
3722 recurse_totals (struct ctables_section *s, const struct ccase *c,
3723 const struct ctables_category *cats[PIVOT_N_AXES][10],
3724 bool is_missing, bool excluded_missing,
3725 double d_weight, double e_weight,
3726 enum pivot_axis_type start_axis, size_t start_nest)
3728 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3730 const struct ctables_nest *nest = s->nests[a];
3731 for (size_t i = start_nest; i < nest->n; i++)
3733 if (i == nest->scale_idx)
3736 const struct variable *var = nest->vars[i];
3738 const struct ctables_category *total = ctables_categories_total (
3739 s->table->categories[var_get_dict_index (var)]);
3742 const struct ctables_category *save = cats[a][i];
3744 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3745 d_weight, e_weight);
3746 recurse_totals (s, c, cats, is_missing, excluded_missing,
3747 d_weight, e_weight, a, i + 1);
3756 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3757 const struct ctables_category *cats[PIVOT_N_AXES][10],
3758 bool is_missing, bool excluded_missing,
3759 double d_weight, double e_weight,
3760 enum pivot_axis_type start_axis, size_t start_nest)
3762 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3764 const struct ctables_nest *nest = s->nests[a];
3765 for (size_t i = start_nest; i < nest->n; i++)
3767 if (i == nest->scale_idx)
3770 const struct ctables_category *save = cats[a][i];
3773 cats[a][i] = save->subtotal;
3774 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3775 d_weight, e_weight);
3776 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3777 d_weight, e_weight, a, i + 1);
3786 ctables_add_occurrence (const struct variable *var,
3787 const union value *value,
3788 struct hmap *occurrences)
3790 int width = var_get_width (var);
3791 unsigned int hash = value_hash (value, width, 0);
3793 struct ctables_occurrence *o;
3794 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3796 if (value_equal (value, &o->value, width))
3799 o = xmalloc (sizeof *o);
3800 value_clone (&o->value, value, width);
3801 hmap_insert (occurrences, &o->node, hash);
3805 ctables_cell_insert (struct ctables_section *s,
3806 const struct ccase *c,
3807 double d_weight, double e_weight)
3809 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3811 /* Does at least one categorical variable have a missing value in an included
3812 or excluded category? */
3813 bool is_missing = false;
3815 /* Does at least one categorical variable have a missing value in an excluded
3817 bool excluded_missing = false;
3819 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3821 const struct ctables_nest *nest = s->nests[a];
3822 for (size_t i = 0; i < nest->n; i++)
3824 if (i == nest->scale_idx)
3827 const struct variable *var = nest->vars[i];
3828 const union value *value = case_data (c, var);
3830 bool var_missing = var_is_value_missing (var, value) != 0;
3834 cats[a][i] = ctables_categories_match (
3835 s->table->categories[var_get_dict_index (var)], value, var);
3841 static const struct ctables_category cct_excluded_missing = {
3842 .type = CCT_EXCLUDED_MISSING,
3845 cats[a][i] = &cct_excluded_missing;
3846 excluded_missing = true;
3851 if (!excluded_missing)
3852 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3854 const struct ctables_nest *nest = s->nests[a];
3855 for (size_t i = 0; i < nest->n; i++)
3856 if (i != nest->scale_idx)
3858 const struct variable *var = nest->vars[i];
3859 const union value *value = case_data (c, var);
3860 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3864 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3865 d_weight, e_weight);
3867 //if (!excluded_missing)
3869 recurse_totals (s, c, cats, is_missing, excluded_missing,
3870 d_weight, e_weight, 0, 0);
3871 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3872 d_weight, e_weight, 0, 0);
3878 const struct ctables_summary_spec_set *set;
3883 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3885 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3886 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3887 if (as->function != bs->function)
3888 return as->function > bs->function ? 1 : -1;
3889 else if (as->percentile != bs->percentile)
3890 return as->percentile < bs->percentile ? 1 : -1;
3892 const char *as_label = as->label ? as->label : "";
3893 const char *bs_label = bs->label ? bs->label : "";
3894 return strcmp (as_label, bs_label);
3898 ctables_category_format_number (double number, const struct variable *var,
3901 struct pivot_value *pv = pivot_value_new_var_value (
3902 var, &(union value) { .f = number });
3903 pivot_value_format (pv, NULL, s);
3904 pivot_value_destroy (pv);
3908 ctables_category_format_string (struct substring string,
3909 const struct variable *var, struct string *out)
3911 int width = var_get_width (var);
3912 char *s = xmalloc (width);
3913 buf_copy_rpad (s, width, string.string, string.length, ' ');
3914 struct pivot_value *pv = pivot_value_new_var_value (
3915 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3916 pivot_value_format (pv, NULL, out);
3917 pivot_value_destroy (pv);
3922 ctables_category_format_label (const struct ctables_category *cat,
3923 const struct variable *var,
3929 ctables_category_format_number (cat->number, var, s);
3933 ctables_category_format_string (cat->string, var, s);
3937 ctables_category_format_number (cat->nrange[0], var, s);
3938 ds_put_format (s, " THRU ");
3939 ctables_category_format_number (cat->nrange[1], var, s);
3943 ctables_category_format_string (cat->srange[0], var, s);
3944 ds_put_format (s, " THRU ");
3945 ctables_category_format_string (cat->srange[1], var, s);
3949 ds_put_cstr (s, "MISSING");
3953 ds_put_cstr (s, "OTHERNM");
3956 case CCT_POSTCOMPUTE:
3957 ds_put_format (s, "&%s", cat->pc->name);
3962 ds_put_cstr (s, cat->total_label);
3968 case CCT_EXCLUDED_MISSING:
3975 static struct pivot_value *
3976 ctables_postcompute_label (const struct ctables_categories *cats,
3977 const struct ctables_category *cat,
3978 const struct variable *var)
3980 struct substring in = ss_cstr (cat->pc->label);
3981 struct substring target = ss_cstr (")LABEL[");
3983 struct string out = DS_EMPTY_INITIALIZER;
3986 size_t chunk = ss_find_substring (in, target);
3987 if (chunk == SIZE_MAX)
3989 if (ds_is_empty (&out))
3990 return pivot_value_new_user_text (in.string, in.length);
3993 ds_put_substring (&out, in);
3994 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3998 ds_put_substring (&out, ss_head (in, chunk));
3999 ss_advance (&in, chunk + target.length);
4001 struct substring idx_s;
4002 if (!ss_get_until (&in, ']', &idx_s))
4005 long int idx = strtol (idx_s.string, &tail, 10);
4006 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4009 struct ctables_category *cat2 = &cats->cats[idx - 1];
4010 if (!ctables_category_format_label (cat2, var, &out))
4016 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4019 static struct pivot_value *
4020 ctables_category_create_value_label (const struct ctables_categories *cats,
4021 const struct ctables_category *cat,
4022 const struct variable *var,
4023 const union value *value)
4025 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4026 ? ctables_postcompute_label (cats, cat, var)
4027 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4028 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4029 : pivot_value_new_var_value (var, value));
4032 static struct ctables_value *
4033 ctables_value_find__ (struct ctables_table *t, const union value *value,
4034 int width, unsigned int hash)
4036 struct ctables_value *clv;
4037 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4038 hash, &t->clabels_values_map)
4039 if (value_equal (value, &clv->value, width))
4045 ctables_value_insert (struct ctables_table *t, const union value *value,
4048 unsigned int hash = value_hash (value, width, 0);
4049 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4052 clv = xmalloc (sizeof *clv);
4053 value_clone (&clv->value, value, width);
4054 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4058 static struct ctables_value *
4059 ctables_value_find (struct ctables_table *t,
4060 const union value *value, int width)
4062 return ctables_value_find__ (t, value, width,
4063 value_hash (value, width, 0));
4067 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4068 size_t ix[PIVOT_N_AXES])
4070 if (a < PIVOT_N_AXES)
4072 size_t limit = MAX (t->stacks[a].n, 1);
4073 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4074 ctables_table_add_section (t, a + 1, ix);
4078 struct ctables_section *s = &t->sections[t->n_sections++];
4079 *s = (struct ctables_section) {
4081 .cells = HMAP_INITIALIZER (s->cells),
4083 for (a = 0; a < PIVOT_N_AXES; a++)
4086 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4088 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4089 for (size_t i = 0; i < nest->n; i++)
4090 hmap_init (&s->occurrences[a][i]);
4092 for (size_t i = 0; i < N_CTDTS; i++)
4093 hmap_init (&s->domains[i]);
4098 ctpo_add (double a, double b)
4104 ctpo_sub (double a, double b)
4110 ctpo_mul (double a, double b)
4116 ctpo_div (double a, double b)
4118 return b ? a / b : SYSMIS;
4122 ctpo_pow (double a, double b)
4124 int save_errno = errno;
4126 double result = pow (a, b);
4134 ctpo_neg (double a, double b UNUSED)
4139 struct ctables_pcexpr_evaluate_ctx
4141 const struct ctables_cell *cell;
4142 const struct ctables_section *section;
4143 const struct ctables_categories *cats;
4144 enum pivot_axis_type pc_a;
4147 enum fmt_type parse_format;
4150 static double ctables_pcexpr_evaluate (
4151 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4154 ctables_pcexpr_evaluate_nonterminal (
4155 const struct ctables_pcexpr_evaluate_ctx *ctx,
4156 const struct ctables_pcexpr *e, size_t n_args,
4157 double evaluate (double, double))
4159 double args[2] = { 0, 0 };
4160 for (size_t i = 0; i < n_args; i++)
4162 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4163 if (!isfinite (args[i]) || args[i] == SYSMIS)
4166 return evaluate (args[0], args[1]);
4170 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4171 const struct ctables_cell_value *pc_cv)
4173 const struct ctables_section *s = ctx->section;
4176 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4178 const struct ctables_nest *nest = s->nests[a];
4179 for (size_t i = 0; i < nest->n; i++)
4180 if (i != nest->scale_idx)
4182 const struct ctables_cell_value *cv
4183 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4184 : &ctx->cell->axes[a].cvs[i]);
4185 hash = hash_pointer (cv->category, hash);
4186 if (cv->category->type != CCT_TOTAL
4187 && cv->category->type != CCT_SUBTOTAL
4188 && cv->category->type != CCT_POSTCOMPUTE)
4189 hash = value_hash (&cv->value,
4190 var_get_width (nest->vars[i]), hash);
4194 struct ctables_cell *tc;
4195 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4197 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4199 const struct ctables_nest *nest = s->nests[a];
4200 for (size_t i = 0; i < nest->n; i++)
4201 if (i != nest->scale_idx)
4203 const struct ctables_cell_value *p_cv
4204 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4205 : &ctx->cell->axes[a].cvs[i]);
4206 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4207 if (p_cv->category != t_cv->category
4208 || (p_cv->category->type != CCT_TOTAL
4209 && p_cv->category->type != CCT_SUBTOTAL
4210 && p_cv->category->type != CCT_POSTCOMPUTE
4211 && !value_equal (&p_cv->value,
4213 var_get_width (nest->vars[i]))))
4225 const struct ctables_table *t = s->table;
4226 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4227 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4228 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4229 &specs->specs[ctx->summary_idx]);
4233 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4234 const struct ctables_pcexpr *e)
4241 case CTPO_CAT_NRANGE:
4242 case CTPO_CAT_SRANGE:
4243 case CTPO_CAT_MISSING:
4244 case CTPO_CAT_OTHERNM:
4246 struct ctables_cell_value cv = {
4247 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4249 assert (cv.category != NULL);
4251 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4252 const struct ctables_occurrence *o;
4255 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4256 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4257 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4259 cv.value = o->value;
4260 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4265 case CTPO_CAT_NUMBER:
4266 case CTPO_CAT_SUBTOTAL:
4267 case CTPO_CAT_TOTAL:
4269 struct ctables_cell_value cv = {
4270 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4271 .value = { .f = e->number },
4273 assert (cv.category != NULL);
4274 return ctables_pcexpr_evaluate_category (ctx, &cv);
4277 case CTPO_CAT_STRING:
4279 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4281 if (width > e->string.length)
4283 s = xmalloc (width);
4284 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4286 struct ctables_cell_value cv = {
4287 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4288 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4290 assert (cv.category != NULL);
4291 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4297 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4300 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4303 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4306 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4309 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4312 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4318 static const struct ctables_category *
4319 ctables_cell_postcompute (const struct ctables_section *s,
4320 const struct ctables_cell *cell,
4321 enum pivot_axis_type *pc_a_p,
4324 assert (cell->postcompute);
4325 const struct ctables_category *pc_cat = NULL;
4326 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4327 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4329 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4330 if (cv->category->type == CCT_POSTCOMPUTE)
4334 /* Multiple postcomputes cross each other. The value is
4339 pc_cat = cv->category;
4343 *pc_a_idx_p = pc_a_idx;
4347 assert (pc_cat != NULL);
4352 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4353 const struct ctables_cell *cell,
4354 const struct ctables_summary_spec *ss,
4355 struct fmt_spec *format,
4356 bool *is_ctables_format,
4359 enum pivot_axis_type pc_a = 0;
4360 size_t pc_a_idx = 0;
4361 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4362 s, cell, &pc_a, &pc_a_idx);
4366 const struct ctables_postcompute *pc = pc_cat->pc;
4369 for (size_t i = 0; i < pc->specs->n; i++)
4371 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4372 if (ss->function == ss2->function
4373 && ss->percentile == ss2->percentile)
4375 *format = ss2->format;
4376 *is_ctables_format = ss2->is_ctables_format;
4382 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4383 const struct ctables_categories *cats = s->table->categories[
4384 var_get_dict_index (var)];
4385 struct ctables_pcexpr_evaluate_ctx ctx = {
4390 .pc_a_idx = pc_a_idx,
4391 .summary_idx = summary_idx,
4392 .parse_format = pc_cat->parse_format,
4394 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4398 ctables_format (double d, const struct fmt_spec *format,
4399 const struct fmt_settings *settings)
4401 const union value v = { .f = d };
4402 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4404 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4405 produce the results we want for negative numbers, putting the negative
4406 sign in the wrong spot, before the prefix instead of after it. We can't,
4407 in fact, produce the desired results using a custom-currency
4408 specification. Instead, we postprocess the output, moving the negative
4411 NEQUAL: "-N=3" => "N=-3"
4412 PAREN: "-(3)" => "(-3)"
4413 PCTPAREN: "-(3%)" => "(-3%)"
4415 This transformation doesn't affect NEGPAREN. */
4416 char *minus_src = strchr (s, '-');
4417 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4419 char *n_equals = strstr (s, "N=");
4420 char *lparen = strchr (s, '(');
4421 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4423 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4429 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4431 for (size_t i = 0; i < t->stacks[a].n; i++)
4433 struct ctables_nest *nest = &t->stacks[a].nests[i];
4434 if (nest->n != 1 || nest->scale_idx != 0)
4437 enum ctables_vlabel vlabel
4438 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4439 if (vlabel != CTVL_NONE)
4446 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4448 struct pivot_table *pt = pivot_table_create__ (
4450 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4451 : pivot_value_new_text (N_("Custom Tables"))),
4454 pivot_table_set_caption (
4455 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4457 pivot_table_set_corner_text (
4458 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4460 bool summary_dimension = (t->summary_axis != t->slabels_axis
4461 || (!t->slabels_visible
4462 && t->summary_specs.n > 1));
4463 if (summary_dimension)
4465 struct pivot_dimension *d = pivot_dimension_create (
4466 pt, t->slabels_axis, N_("Statistics"));
4467 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4468 if (!t->slabels_visible)
4469 d->hide_all_labels = true;
4470 for (size_t i = 0; i < specs->n; i++)
4471 pivot_category_create_leaf (
4472 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4475 bool categories_dimension = t->clabels_example != NULL;
4476 if (categories_dimension)
4478 struct pivot_dimension *d = pivot_dimension_create (
4479 pt, t->label_axis[t->clabels_from_axis],
4480 t->clabels_from_axis == PIVOT_AXIS_ROW
4481 ? N_("Row Categories")
4482 : N_("Column Categories"));
4483 const struct variable *var = t->clabels_example;
4484 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4485 for (size_t i = 0; i < t->n_clabels_values; i++)
4487 const struct ctables_value *value = t->clabels_values[i];
4488 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4489 assert (cat != NULL);
4490 pivot_category_create_leaf (
4491 d->root, ctables_category_create_value_label (c, cat,
4497 pivot_table_set_look (pt, ct->look);
4498 struct pivot_dimension *d[PIVOT_N_AXES];
4499 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4501 static const char *names[] = {
4502 [PIVOT_AXIS_ROW] = N_("Rows"),
4503 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4504 [PIVOT_AXIS_LAYER] = N_("Layers"),
4506 d[a] = (t->axes[a] || a == t->summary_axis
4507 ? pivot_dimension_create (pt, a, names[a])
4512 assert (t->axes[a]);
4514 for (size_t i = 0; i < t->stacks[a].n; i++)
4516 struct ctables_nest *nest = &t->stacks[a].nests[i];
4517 struct ctables_section **sections = xnmalloc (t->n_sections,
4519 size_t n_sections = 0;
4521 size_t n_total_cells = 0;
4522 size_t max_depth = 0;
4523 for (size_t j = 0; j < t->n_sections; j++)
4524 if (t->sections[j].nests[a] == nest)
4526 struct ctables_section *s = &t->sections[j];
4527 sections[n_sections++] = s;
4528 n_total_cells += hmap_count (&s->cells);
4530 size_t depth = s->nests[a]->n;
4531 max_depth = MAX (depth, max_depth);
4534 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4536 size_t n_sorted = 0;
4538 for (size_t j = 0; j < n_sections; j++)
4540 struct ctables_section *s = sections[j];
4542 struct ctables_cell *cell;
4543 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4545 sorted[n_sorted++] = cell;
4546 assert (n_sorted <= n_total_cells);
4549 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4550 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4553 if (a == PIVOT_AXIS_ROW)
4555 size_t ids[N_CTDTS];
4556 memset (ids, 0, sizeof ids);
4557 for (size_t j = 0; j < n_sorted; j++)
4559 struct ctables_cell *cell = sorted[j];
4560 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4562 struct ctables_domain *domain = cell->domains[dt];
4563 if (!domain->sequence)
4564 domain->sequence = ++ids[dt];
4571 for (size_t j = 0; j < n_sorted; j++)
4573 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4578 struct ctables_level
4580 enum ctables_level_type
4582 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4583 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4584 CTL_SUMMARY, /* Summary functions. */
4588 enum settings_value_show vlabel; /* CTL_VAR only. */
4591 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4592 size_t n_levels = 0;
4593 for (size_t k = 0; k < nest->n; k++)
4595 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4596 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4598 if (vlabel != CTVL_NONE)
4600 levels[n_levels++] = (struct ctables_level) {
4602 .vlabel = (enum settings_value_show) vlabel,
4607 if (nest->scale_idx != k
4608 && (k != nest->n - 1 || t->label_axis[a] == a))
4610 levels[n_levels++] = (struct ctables_level) {
4611 .type = CTL_CATEGORY,
4617 if (!summary_dimension && a == t->slabels_axis)
4619 levels[n_levels++] = (struct ctables_level) {
4620 .type = CTL_SUMMARY,
4621 .var_idx = SIZE_MAX,
4625 /* Pivot categories:
4627 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4628 - category for nest->vars[0], if nest->scale_idx != 0
4629 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4630 - category for nest->vars[1], if nest->scale_idx != 1
4632 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4633 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4634 - summary function, if 'a == t->slabels_axis && a ==
4637 Additional dimensions:
4639 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4641 - If 't->label_axis[b] == a' for some 'b != a', add a category
4646 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4648 for (size_t j = 0; j < n_sorted; j++)
4650 struct ctables_cell *cell = sorted[j];
4651 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4653 size_t n_common = 0;
4656 for (; n_common < n_levels; n_common++)
4658 const struct ctables_level *level = &levels[n_common];
4659 if (level->type == CTL_CATEGORY)
4661 size_t var_idx = level->var_idx;
4662 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4663 if (prev->axes[a].cvs[var_idx].category != c)
4665 else if (c->type != CCT_SUBTOTAL
4666 && c->type != CCT_TOTAL
4667 && c->type != CCT_POSTCOMPUTE
4668 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4669 &cell->axes[a].cvs[var_idx].value,
4670 var_get_type (nest->vars[var_idx])))
4676 for (size_t k = n_common; k < n_levels; k++)
4678 const struct ctables_level *level = &levels[k];
4679 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4680 if (level->type == CTL_SUMMARY)
4682 assert (k == n_levels - 1);
4684 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4685 for (size_t m = 0; m < specs->n; m++)
4687 int leaf = pivot_category_create_leaf (
4688 parent, ctables_summary_label (&specs->specs[m],
4696 const struct variable *var = nest->vars[level->var_idx];
4697 struct pivot_value *label;
4698 if (level->type == CTL_VAR)
4700 label = pivot_value_new_variable (var);
4701 label->variable.show = level->vlabel;
4703 else if (level->type == CTL_CATEGORY)
4705 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4706 label = ctables_category_create_value_label (
4707 t->categories[var_get_dict_index (var)],
4708 cv->category, var, &cv->value);
4713 if (k == n_levels - 1)
4714 prev_leaf = pivot_category_create_leaf (parent, label);
4716 groups[k] = pivot_category_create_group__ (parent, label);
4720 cell->axes[a].leaf = prev_leaf;
4729 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4733 size_t n_total_cells = 0;
4734 for (size_t j = 0; j < t->n_sections; j++)
4735 n_total_cells += hmap_count (&t->sections[j].cells);
4737 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4738 size_t n_sorted = 0;
4739 for (size_t j = 0; j < t->n_sections; j++)
4741 const struct ctables_section *s = &t->sections[j];
4742 struct ctables_cell *cell;
4743 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4745 sorted[n_sorted++] = cell;
4747 assert (n_sorted <= n_total_cells);
4748 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4750 size_t ids[N_CTDTS];
4751 memset (ids, 0, sizeof ids);
4752 for (size_t j = 0; j < n_sorted; j++)
4754 struct ctables_cell *cell = sorted[j];
4755 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4757 struct ctables_domain *domain = cell->domains[dt];
4758 if (!domain->sequence)
4759 domain->sequence = ++ids[dt];
4766 for (size_t i = 0; i < t->n_sections; i++)
4768 struct ctables_section *s = &t->sections[i];
4770 struct ctables_cell *cell;
4771 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4776 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4777 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4778 for (size_t j = 0; j < specs->n; j++)
4781 size_t n_dindexes = 0;
4783 if (summary_dimension)
4784 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4786 if (categories_dimension)
4788 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4789 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4790 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4791 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4794 dindexes[n_dindexes++] = ctv->leaf;
4797 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4800 int leaf = cell->axes[a].leaf;
4801 if (a == t->summary_axis && !summary_dimension)
4803 dindexes[n_dindexes++] = leaf;
4806 const struct ctables_summary_spec *ss = &specs->specs[j];
4808 struct fmt_spec format = specs->specs[j].format;
4809 bool is_ctables_format = ss->is_ctables_format;
4810 double d = (cell->postcompute
4811 ? ctables_cell_calculate_postcompute (
4812 s, cell, ss, &format, &is_ctables_format, j)
4813 : ctables_summary_value (cell, &cell->summaries[j],
4816 struct pivot_value *value;
4817 if (ct->hide_threshold != 0
4818 && d < ct->hide_threshold
4819 && ctables_summary_function_is_count (ss->function))
4821 value = pivot_value_new_user_text_nocopy (
4822 xasprintf ("<%d", ct->hide_threshold));
4824 else if (d == 0 && ct->zero)
4825 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4826 else if (d == SYSMIS && ct->missing)
4827 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4828 else if (is_ctables_format)
4829 value = pivot_value_new_user_text_nocopy (
4830 ctables_format (d, &format, &ct->ctables_formats));
4833 value = pivot_value_new_number (d);
4834 value->numeric.format = format;
4836 /* XXX should text values be right-justified? */
4837 pivot_table_put (pt, dindexes, n_dindexes, value);
4842 pivot_table_submit (pt);
4846 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4848 enum pivot_axis_type label_pos = t->label_axis[a];
4852 t->clabels_from_axis = a;
4854 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4855 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4857 const struct ctables_stack *stack = &t->stacks[a];
4861 const struct ctables_nest *n0 = &stack->nests[0];
4864 assert (stack->n == 1);
4868 const struct variable *v0 = n0->vars[n0->n - 1];
4869 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4870 t->clabels_example = v0;
4872 for (size_t i = 0; i < c0->n_cats; i++)
4873 if (c0->cats[i].type == CCT_FUNCTION)
4875 msg (SE, _("%s=%s is not allowed with sorting based "
4876 "on a summary function."),
4877 subcommand_name, pos_name);
4880 if (n0->n - 1 == n0->scale_idx)
4882 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4883 "but %s is a scale variable."),
4884 subcommand_name, pos_name, var_get_name (v0));
4888 for (size_t i = 1; i < stack->n; i++)
4890 const struct ctables_nest *ni = &stack->nests[i];
4892 const struct variable *vi = ni->vars[ni->n - 1];
4893 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4895 if (ni->n - 1 == ni->scale_idx)
4897 msg (SE, _("%s=%s requires the variables to be moved to be "
4898 "categorical, but %s is a scale variable."),
4899 subcommand_name, pos_name, var_get_name (vi));
4902 if (var_get_width (v0) != var_get_width (vi))
4904 msg (SE, _("%s=%s requires the variables to be "
4905 "moved to have the same width, but %s has "
4906 "width %d and %s has width %d."),
4907 subcommand_name, pos_name,
4908 var_get_name (v0), var_get_width (v0),
4909 var_get_name (vi), var_get_width (vi));
4912 if (!val_labs_equal (var_get_value_labels (v0),
4913 var_get_value_labels (vi)))
4915 msg (SE, _("%s=%s requires the variables to be "
4916 "moved to have the same value labels, but %s "
4917 "and %s have different value labels."),
4918 subcommand_name, pos_name,
4919 var_get_name (v0), var_get_name (vi));
4922 if (!ctables_categories_equal (c0, ci))
4924 msg (SE, _("%s=%s requires the variables to be "
4925 "moved to have the same category "
4926 "specifications, but %s and %s have different "
4927 "category specifications."),
4928 subcommand_name, pos_name,
4929 var_get_name (v0), var_get_name (vi));
4938 add_sum_var (struct variable *var,
4939 struct variable ***sum_vars, size_t *n, size_t *allocated)
4941 for (size_t i = 0; i < *n; i++)
4942 if (var == (*sum_vars)[i])
4945 if (*n >= *allocated)
4946 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4947 (*sum_vars)[*n] = var;
4952 enumerate_sum_vars (const struct ctables_axis *a,
4953 struct variable ***sum_vars, size_t *n, size_t *allocated)
4961 for (size_t i = 0; i < N_CSVS; i++)
4962 for (size_t j = 0; j < a->specs[i].n; j++)
4964 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4965 if (ctables_function_is_pctsum (spec->function))
4966 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4972 for (size_t i = 0; i < 2; i++)
4973 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4979 ctables_prepare_table (struct ctables_table *t)
4981 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4984 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4986 for (size_t j = 0; j < t->stacks[a].n; j++)
4988 struct ctables_nest *nest = &t->stacks[a].nests[j];
4989 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4991 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4992 nest->n_domains[dt] = 0;
4994 for (size_t k = 0; k < nest->n; k++)
4996 if (k == nest->scale_idx)
5005 if (a != PIVOT_AXIS_LAYER)
5012 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
5013 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
5014 : a == PIVOT_AXIS_ROW)
5016 if (k == nest->n - 1
5017 || (nest->scale_idx == nest->n - 1
5018 && k == nest->n - 2))
5024 if (a == PIVOT_AXIS_COLUMN)
5029 if (a == PIVOT_AXIS_ROW)
5034 nest->domains[dt][nest->n_domains[dt]++] = k;
5041 struct ctables_nest *nest = xmalloc (sizeof *nest);
5042 *nest = (struct ctables_nest) { .n = 0 };
5043 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5045 /* There's no point in moving labels away from an axis that has no
5046 labels, so avoid dealing with the special cases around that. */
5047 t->label_axis[a] = a;
5050 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5051 for (size_t i = 0; i < stack->n; i++)
5053 struct ctables_nest *nest = &stack->nests[i];
5054 if (!nest->specs[CSV_CELL].n)
5056 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
5057 specs->specs = xmalloc (sizeof *specs->specs);
5060 enum ctables_summary_function function
5061 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
5063 *specs->specs = (struct ctables_summary_spec) {
5064 .function = function,
5065 .format = ctables_summary_default_format (function, specs->var),
5068 specs->var = nest->vars[0];
5070 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5071 &nest->specs[CSV_CELL]);
5073 else if (!nest->specs[CSV_TOTAL].n)
5074 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5075 &nest->specs[CSV_CELL]);
5077 if (t->ctables->smissing_listwise)
5079 struct variable **listwise_vars = NULL;
5081 size_t allocated = 0;
5083 for (size_t j = nest->group_head; j < stack->n; j++)
5085 const struct ctables_nest *other_nest = &stack->nests[j];
5086 if (other_nest->group_head != nest->group_head)
5089 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5092 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5093 sizeof *listwise_vars);
5094 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5097 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5100 listwise_vars = xmemdup (listwise_vars,
5101 n * sizeof *listwise_vars);
5102 nest->specs[sv].listwise_vars = listwise_vars;
5103 nest->specs[sv].n_listwise_vars = n;
5108 struct ctables_summary_spec_set *merged = &t->summary_specs;
5109 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5111 for (size_t j = 0; j < stack->n; j++)
5113 const struct ctables_nest *nest = &stack->nests[j];
5115 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5116 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5121 struct merge_item min = items[0];
5122 for (size_t j = 1; j < n_left; j++)
5123 if (merge_item_compare_3way (&items[j], &min) < 0)
5126 if (merged->n >= merged->allocated)
5127 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5128 sizeof *merged->specs);
5129 merged->specs[merged->n++] = min.set->specs[min.ofs];
5131 for (size_t j = 0; j < n_left; )
5133 if (merge_item_compare_3way (&items[j], &min) == 0)
5135 struct merge_item *item = &items[j];
5136 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5137 if (++item->ofs >= item->set->n)
5139 items[j] = items[--n_left];
5149 for (size_t j = 0; j < merged->n; j++)
5150 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5152 for (size_t j = 0; j < stack->n; j++)
5154 const struct ctables_nest *nest = &stack->nests[j];
5155 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5157 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5158 for (size_t k = 0; k < specs->n; k++)
5159 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5160 specs->specs[k].axis_idx);
5166 size_t allocated_sum_vars = 0;
5167 enumerate_sum_vars (t->axes[t->summary_axis],
5168 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5170 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5171 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5175 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5176 enum pivot_axis_type a)
5178 struct ctables_stack *stack = &t->stacks[a];
5179 for (size_t i = 0; i < stack->n; i++)
5181 const struct ctables_nest *nest = &stack->nests[i];
5182 const struct variable *var = nest->vars[nest->n - 1];
5183 const union value *value = case_data (c, var);
5185 if (var_is_numeric (var) && value->f == SYSMIS)
5188 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5190 ctables_value_insert (t, value, var_get_width (var));
5195 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5197 const struct ctables_value *const *ap = a_;
5198 const struct ctables_value *const *bp = b_;
5199 const struct ctables_value *a = *ap;
5200 const struct ctables_value *b = *bp;
5201 const int *width = width_;
5202 return value_compare_3way (&a->value, &b->value, *width);
5206 ctables_sort_clabels_values (struct ctables_table *t)
5208 const struct variable *v0 = t->clabels_example;
5209 int width = var_get_width (v0);
5211 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5214 const struct val_labs *val_labs = var_get_value_labels (v0);
5215 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5216 vl = val_labs_next (val_labs, vl))
5217 if (ctables_categories_match (c0, &vl->value, v0))
5218 ctables_value_insert (t, &vl->value, width);
5221 size_t n = hmap_count (&t->clabels_values_map);
5222 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5224 struct ctables_value *clv;
5226 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5227 t->clabels_values[i++] = clv;
5228 t->n_clabels_values = n;
5231 sort (t->clabels_values, n, sizeof *t->clabels_values,
5232 compare_clabels_values_3way, &width);
5234 for (size_t i = 0; i < n; i++)
5235 t->clabels_values[i]->leaf = i;
5239 ctables_add_category_occurrences (const struct variable *var,
5240 struct hmap *occurrences,
5241 const struct ctables_categories *cats)
5243 const struct val_labs *val_labs = var_get_value_labels (var);
5245 for (size_t i = 0; i < cats->n_cats; i++)
5247 const struct ctables_category *c = &cats->cats[i];
5251 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5257 int width = var_get_width (var);
5259 value_init (&value, width);
5260 value_copy_buf_rpad (&value, width,
5261 CHAR_CAST (uint8_t *, c->string.string),
5262 c->string.length, ' ');
5263 ctables_add_occurrence (var, &value, occurrences);
5264 value_destroy (&value, width);
5269 assert (var_is_numeric (var));
5270 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5271 vl = val_labs_next (val_labs, vl))
5272 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5273 ctables_add_occurrence (var, &vl->value, occurrences);
5277 assert (var_is_alpha (var));
5278 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5279 vl = val_labs_next (val_labs, vl))
5280 if (in_string_range (&vl->value, var, c->srange))
5281 ctables_add_occurrence (var, &vl->value, occurrences);
5285 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5286 vl = val_labs_next (val_labs, vl))
5287 if (var_is_value_missing (var, &vl->value))
5288 ctables_add_occurrence (var, &vl->value, occurrences);
5292 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5293 vl = val_labs_next (val_labs, vl))
5294 ctables_add_occurrence (var, &vl->value, occurrences);
5297 case CCT_POSTCOMPUTE:
5307 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5308 vl = val_labs_next (val_labs, vl))
5309 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5310 ctables_add_occurrence (var, &vl->value, occurrences);
5313 case CCT_EXCLUDED_MISSING:
5320 ctables_section_recurse_add_empty_categories (
5321 struct ctables_section *s,
5322 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5323 enum pivot_axis_type a, size_t a_idx)
5325 if (a >= PIVOT_N_AXES)
5326 ctables_cell_insert__ (s, c, cats);
5327 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5328 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5331 const struct variable *var = s->nests[a]->vars[a_idx];
5332 const struct ctables_categories *categories = s->table->categories[
5333 var_get_dict_index (var)];
5334 int width = var_get_width (var);
5335 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5336 const struct ctables_occurrence *o;
5337 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5339 union value *value = case_data_rw (c, var);
5340 value_destroy (value, width);
5341 value_clone (value, &o->value, width);
5342 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5343 assert (cats[a][a_idx] != NULL);
5344 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5347 for (size_t i = 0; i < categories->n_cats; i++)
5349 const struct ctables_category *cat = &categories->cats[i];
5350 if (cat->type == CCT_POSTCOMPUTE)
5352 cats[a][a_idx] = cat;
5353 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5360 ctables_section_add_empty_categories (struct ctables_section *s)
5362 bool show_empty = false;
5363 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5365 for (size_t k = 0; k < s->nests[a]->n; k++)
5366 if (k != s->nests[a]->scale_idx)
5368 const struct variable *var = s->nests[a]->vars[k];
5369 const struct ctables_categories *cats = s->table->categories[
5370 var_get_dict_index (var)];
5371 if (cats->show_empty)
5374 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5380 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5381 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5382 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5387 ctables_section_clear (struct ctables_section *s)
5389 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5391 const struct ctables_nest *nest = s->nests[a];
5392 for (size_t i = 0; i < nest->n; i++)
5393 if (i != nest->scale_idx)
5395 const struct variable *var = nest->vars[i];
5396 int width = var_get_width (var);
5397 struct ctables_occurrence *o, *next;
5398 struct hmap *map = &s->occurrences[a][i];
5399 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5401 value_destroy (&o->value, width);
5402 hmap_delete (map, &o->node);
5409 struct ctables_cell *cell, *next_cell;
5410 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5412 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5414 const struct ctables_nest *nest = s->nests[a];
5415 for (size_t i = 0; i < nest->n; i++)
5416 if (i != nest->scale_idx)
5417 value_destroy (&cell->axes[a].cvs[i].value,
5418 var_get_width (nest->vars[i]));
5419 free (cell->axes[a].cvs);
5422 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5423 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5424 for (size_t i = 0; i < specs->n; i++)
5425 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5426 free (cell->summaries);
5428 hmap_delete (&s->cells, &cell->node);
5431 hmap_shrink (&s->cells);
5433 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5435 struct ctables_domain *domain, *next_domain;
5436 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5439 free (domain->sums);
5440 hmap_delete (&s->domains[dt], &domain->node);
5443 hmap_shrink (&s->domains[dt]);
5448 ctables_section_uninit (struct ctables_section *s)
5450 ctables_section_clear (s);
5452 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5454 struct ctables_nest *nest = s->nests[a];
5455 for (size_t i = 0; i < nest->n; i++)
5456 hmap_destroy (&s->occurrences[a][i]);
5457 free (s->occurrences[a]);
5460 hmap_destroy (&s->cells);
5461 for (size_t i = 0; i < N_CTDTS; i++)
5462 hmap_destroy (&s->domains[i]);
5466 ctables_table_clear (struct ctables_table *t)
5468 for (size_t i = 0; i < t->n_sections; i++)
5469 ctables_section_clear (&t->sections[i]);
5471 if (t->clabels_example)
5473 int width = var_get_width (t->clabels_example);
5474 struct ctables_value *value, *next_value;
5475 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5476 &t->clabels_values_map)
5478 value_destroy (&value->value, width);
5479 hmap_delete (&t->clabels_values_map, &value->node);
5482 hmap_shrink (&t->clabels_values_map);
5484 free (t->clabels_values);
5485 t->clabels_values = NULL;
5486 t->n_clabels_values = 0;
5491 ctables_execute (struct dataset *ds, struct casereader *input,
5494 for (size_t i = 0; i < ct->n_tables; i++)
5496 struct ctables_table *t = ct->tables[i];
5497 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5498 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5499 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5500 sizeof *t->sections);
5501 size_t ix[PIVOT_N_AXES];
5502 ctables_table_add_section (t, 0, ix);
5505 struct dictionary *dict = dataset_dict (ds);
5506 struct casegrouper *grouper
5507 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5508 ? casegrouper_create_splits (input, dict)
5509 : casegrouper_create_vars (input, NULL, 0));
5510 struct casereader *group;
5511 while (casegrouper_get_next_group (grouper, &group))
5513 /* Output SPLIT FILE variables. */
5514 struct ccase *c = casereader_peek (group, 0);
5517 output_split_file_values (ds, c);
5521 bool warn_on_invalid = true;
5522 for (c = casereader_read (group); c;
5523 case_unref (c), c = casereader_read (group))
5525 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5526 double e_weight = (ct->e_weight
5527 ? var_force_valid_weight (ct->e_weight,
5528 case_num (c, ct->e_weight),
5532 for (size_t i = 0; i < ct->n_tables; i++)
5534 struct ctables_table *t = ct->tables[i];
5536 for (size_t j = 0; j < t->n_sections; j++)
5537 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5539 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5540 if (t->label_axis[a] != a)
5541 ctables_insert_clabels_values (t, c, a);
5544 casereader_destroy (group);
5546 for (size_t i = 0; i < ct->n_tables; i++)
5548 struct ctables_table *t = ct->tables[i];
5550 if (t->clabels_example)
5551 ctables_sort_clabels_values (t);
5553 for (size_t j = 0; j < t->n_sections; j++)
5554 ctables_section_add_empty_categories (&t->sections[j]);
5556 ctables_table_output (ct, t);
5557 ctables_table_clear (t);
5560 return casegrouper_destroy (grouper);
5565 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5566 struct dictionary *);
5569 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5575 case CTPO_CAT_STRING:
5576 ss_dealloc (&e->string);
5579 case CTPO_CAT_SRANGE:
5580 for (size_t i = 0; i < 2; i++)
5581 ss_dealloc (&e->srange[i]);
5590 for (size_t i = 0; i < 2; i++)
5591 ctables_pcexpr_destroy (e->subs[i]);
5595 case CTPO_CAT_NUMBER:
5596 case CTPO_CAT_NRANGE:
5597 case CTPO_CAT_MISSING:
5598 case CTPO_CAT_OTHERNM:
5599 case CTPO_CAT_SUBTOTAL:
5600 case CTPO_CAT_TOTAL:
5604 msg_location_destroy (e->location);
5609 static struct ctables_pcexpr *
5610 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5611 struct ctables_pcexpr *sub0,
5612 struct ctables_pcexpr *sub1)
5614 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5615 *e = (struct ctables_pcexpr) {
5617 .subs = { sub0, sub1 },
5618 .location = msg_location_merged (sub0->location, sub1->location),
5623 /* How to parse an operator. */
5626 enum token_type token;
5627 enum ctables_postcompute_op op;
5630 static const struct operator *
5631 ctables_pcexpr_match_operator (struct lexer *lexer,
5632 const struct operator ops[], size_t n_ops)
5634 for (const struct operator *op = ops; op < ops + n_ops; op++)
5635 if (lex_token (lexer) == op->token)
5637 if (op->token != T_NEG_NUM)
5646 static struct ctables_pcexpr *
5647 ctables_pcexpr_parse_binary_operators__ (
5648 struct lexer *lexer, struct dictionary *dict,
5649 const struct operator ops[], size_t n_ops,
5650 parse_recursively_func *parse_next_level,
5651 const char *chain_warning, struct ctables_pcexpr *lhs)
5653 for (int op_count = 0; ; op_count++)
5655 const struct operator *op
5656 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5659 if (op_count > 1 && chain_warning)
5660 msg_at (SW, lhs->location, "%s", chain_warning);
5665 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5668 ctables_pcexpr_destroy (lhs);
5672 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5676 static struct ctables_pcexpr *
5677 ctables_pcexpr_parse_binary_operators (
5678 struct lexer *lexer, struct dictionary *dict,
5679 const struct operator ops[], size_t n_ops,
5680 parse_recursively_func *parse_next_level, const char *chain_warning)
5682 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5686 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5688 chain_warning, lhs);
5691 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5692 struct dictionary *);
5694 static struct ctables_pcexpr
5695 ctpo_cat_nrange (double low, double high)
5697 return (struct ctables_pcexpr) {
5698 .op = CTPO_CAT_NRANGE,
5699 .nrange = { low, high },
5703 static struct ctables_pcexpr
5704 ctpo_cat_srange (struct substring low, struct substring high)
5706 return (struct ctables_pcexpr) {
5707 .op = CTPO_CAT_SRANGE,
5708 .srange = { low, high },
5712 static struct ctables_pcexpr *
5713 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5715 int start_ofs = lex_ofs (lexer);
5716 struct ctables_pcexpr e;
5717 if (lex_is_number (lexer))
5719 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5720 .number = lex_number (lexer) };
5723 else if (lex_match_id (lexer, "MISSING"))
5724 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5725 else if (lex_match_id (lexer, "OTHERNM"))
5726 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5727 else if (lex_match_id (lexer, "TOTAL"))
5728 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5729 else if (lex_match_id (lexer, "SUBTOTAL"))
5731 size_t subtotal_index = 0;
5732 if (lex_match (lexer, T_LBRACK))
5734 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5736 subtotal_index = lex_integer (lexer);
5738 if (!lex_force_match (lexer, T_RBRACK))
5741 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5742 .subtotal_index = subtotal_index };
5744 else if (lex_match (lexer, T_LBRACK))
5746 if (lex_match_id (lexer, "LO"))
5748 if (!lex_force_match_id (lexer, "THRU"))
5751 if (lex_is_string (lexer))
5753 struct substring low = { .string = NULL };
5754 struct substring high = parse_substring (lexer, dict);
5755 e = ctpo_cat_srange (low, high);
5759 if (!lex_force_num (lexer))
5761 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5765 else if (lex_is_number (lexer))
5767 double number = lex_number (lexer);
5769 if (lex_match_id (lexer, "THRU"))
5771 if (lex_match_id (lexer, "HI"))
5772 e = ctpo_cat_nrange (number, DBL_MAX);
5775 if (!lex_force_num (lexer))
5777 e = ctpo_cat_nrange (number, lex_number (lexer));
5782 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5785 else if (lex_is_string (lexer))
5787 struct substring s = parse_substring (lexer, dict);
5789 if (lex_match_id (lexer, "THRU"))
5791 struct substring high;
5793 if (lex_match_id (lexer, "HI"))
5794 high = (struct substring) { .string = NULL };
5797 if (!lex_force_string (lexer))
5802 high = parse_substring (lexer, dict);
5805 e = ctpo_cat_srange (s, high);
5808 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5812 lex_error (lexer, NULL);
5816 if (!lex_force_match (lexer, T_RBRACK))
5818 if (e.op == CTPO_CAT_STRING)
5819 ss_dealloc (&e.string);
5820 else if (e.op == CTPO_CAT_SRANGE)
5822 ss_dealloc (&e.srange[0]);
5823 ss_dealloc (&e.srange[1]);
5828 else if (lex_match (lexer, T_LPAREN))
5830 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5833 if (!lex_force_match (lexer, T_RPAREN))
5835 ctables_pcexpr_destroy (ep);
5842 lex_error (lexer, NULL);
5846 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5847 return xmemdup (&e, sizeof e);
5850 static struct ctables_pcexpr *
5851 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5852 struct lexer *lexer, int start_ofs)
5854 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5855 *e = (struct ctables_pcexpr) {
5858 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5863 static struct ctables_pcexpr *
5864 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5866 static const struct operator op = { T_EXP, CTPO_POW };
5868 const char *chain_warning =
5869 _("The exponentiation operator (`**') is left-associative: "
5870 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5871 "To disable this warning, insert parentheses.");
5873 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5874 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5875 ctables_pcexpr_parse_primary,
5878 /* Special case for situations like "-5**6", which must be parsed as
5881 int start_ofs = lex_ofs (lexer);
5882 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5883 *lhs = (struct ctables_pcexpr) {
5884 .op = CTPO_CONSTANT,
5885 .number = -lex_tokval (lexer),
5886 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5890 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5891 lexer, dict, &op, 1,
5892 ctables_pcexpr_parse_primary, chain_warning, lhs);
5896 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5899 /* Parses the unary minus level. */
5900 static struct ctables_pcexpr *
5901 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5903 int start_ofs = lex_ofs (lexer);
5904 if (!lex_match (lexer, T_DASH))
5905 return ctables_pcexpr_parse_exp (lexer, dict);
5907 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5911 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5914 /* Parses the multiplication and division level. */
5915 static struct ctables_pcexpr *
5916 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5918 static const struct operator ops[] =
5920 { T_ASTERISK, CTPO_MUL },
5921 { T_SLASH, CTPO_DIV },
5924 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5925 sizeof ops / sizeof *ops,
5926 ctables_pcexpr_parse_neg, NULL);
5929 /* Parses the addition and subtraction level. */
5930 static struct ctables_pcexpr *
5931 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5933 static const struct operator ops[] =
5935 { T_PLUS, CTPO_ADD },
5936 { T_DASH, CTPO_SUB },
5937 { T_NEG_NUM, CTPO_ADD },
5940 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5941 ops, sizeof ops / sizeof *ops,
5942 ctables_pcexpr_parse_mul, NULL);
5945 static struct ctables_postcompute *
5946 ctables_find_postcompute (struct ctables *ct, const char *name)
5948 struct ctables_postcompute *pc;
5949 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5950 utf8_hash_case_string (name, 0), &ct->postcomputes)
5951 if (!utf8_strcasecmp (pc->name, name))
5957 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5960 int pcompute_start = lex_ofs (lexer) - 1;
5962 if (!lex_match (lexer, T_AND))
5964 lex_error_expecting (lexer, "&");
5967 if (!lex_force_id (lexer))
5970 char *name = ss_xstrdup (lex_tokss (lexer));
5973 if (!lex_force_match (lexer, T_EQUALS)
5974 || !lex_force_match_id (lexer, "EXPR")
5975 || !lex_force_match (lexer, T_LPAREN))
5981 int expr_start = lex_ofs (lexer);
5982 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5983 int expr_end = lex_ofs (lexer) - 1;
5984 if (!expr || !lex_force_match (lexer, T_RPAREN))
5986 ctables_pcexpr_destroy (expr);
5990 int pcompute_end = lex_ofs (lexer) - 1;
5992 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5995 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5998 msg_at (SW, location, _("New definition of &%s will override the "
5999 "previous definition."),
6001 msg_at (SN, pc->location, _("This is the previous definition."));
6003 ctables_pcexpr_destroy (pc->expr);
6004 msg_location_destroy (pc->location);
6009 pc = xmalloc (sizeof *pc);
6010 *pc = (struct ctables_postcompute) { .name = name };
6011 hmap_insert (&ct->postcomputes, &pc->hmap_node,
6012 utf8_hash_case_string (pc->name, 0));
6015 pc->location = location;
6017 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
6022 ctables_parse_pproperties_format (struct lexer *lexer,
6023 struct ctables_summary_spec_set *sss)
6025 *sss = (struct ctables_summary_spec_set) { .n = 0 };
6027 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
6028 && !(lex_token (lexer) == T_ID
6029 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
6030 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
6031 lex_tokss (lexer)))))
6033 /* Parse function. */
6034 enum ctables_summary_function function;
6035 if (!parse_ctables_summary_function (lexer, &function))
6038 /* Parse percentile. */
6039 double percentile = 0;
6040 if (function == CTSF_PTILE)
6042 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
6044 percentile = lex_number (lexer);
6049 struct fmt_spec format;
6050 bool is_ctables_format;
6051 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
6054 if (sss->n >= sss->allocated)
6055 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
6056 sizeof *sss->specs);
6057 sss->specs[sss->n++] = (struct ctables_summary_spec) {
6058 .function = function,
6059 .percentile = percentile,
6061 .is_ctables_format = is_ctables_format,
6067 ctables_summary_spec_set_uninit (sss);
6072 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
6074 struct ctables_postcompute **pcs = NULL;
6076 size_t allocated_pcs = 0;
6078 while (lex_match (lexer, T_AND))
6080 if (!lex_force_id (lexer))
6082 struct ctables_postcompute *pc
6083 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
6086 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6091 if (n_pcs >= allocated_pcs)
6092 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6096 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6098 if (lex_match_id (lexer, "LABEL"))
6100 lex_match (lexer, T_EQUALS);
6101 if (!lex_force_string (lexer))
6104 for (size_t i = 0; i < n_pcs; i++)
6106 free (pcs[i]->label);
6107 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6112 else if (lex_match_id (lexer, "FORMAT"))
6114 lex_match (lexer, T_EQUALS);
6116 struct ctables_summary_spec_set sss;
6117 if (!ctables_parse_pproperties_format (lexer, &sss))
6120 for (size_t i = 0; i < n_pcs; i++)
6123 ctables_summary_spec_set_uninit (pcs[i]->specs);
6125 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6126 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6128 ctables_summary_spec_set_uninit (&sss);
6130 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6132 lex_match (lexer, T_EQUALS);
6133 bool hide_source_cats;
6134 if (!parse_bool (lexer, &hide_source_cats))
6136 for (size_t i = 0; i < n_pcs; i++)
6137 pcs[i]->hide_source_cats = hide_source_cats;
6141 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6154 put_strftime (struct string *out, time_t now, const char *format)
6156 const struct tm *tm = localtime (&now);
6158 strftime (value, sizeof value, format, tm);
6159 ds_put_cstr (out, value);
6163 skip_prefix (struct substring *s, struct substring prefix)
6165 if (ss_starts_with (*s, prefix))
6167 ss_advance (s, prefix.length);
6175 put_table_expression (struct string *out, struct lexer *lexer,
6176 struct dictionary *dict, int expr_start, int expr_end)
6179 for (int ofs = expr_start; ofs < expr_end; ofs++)
6181 const struct token *t = lex_ofs_token (lexer, ofs);
6182 if (t->type == T_LBRACK)
6184 else if (t->type == T_RBRACK && nest > 0)
6190 else if (t->type == T_ID)
6192 const struct variable *var
6193 = dict_lookup_var (dict, t->string.string);
6194 const char *label = var ? var_get_label (var) : NULL;
6195 ds_put_cstr (out, label ? label : t->string.string);
6199 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6200 ds_put_byte (out, ' ');
6202 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6203 ds_put_cstr (out, repr);
6206 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6207 ds_put_byte (out, ' ');
6213 put_title_text (struct string *out, struct substring in, time_t now,
6214 struct lexer *lexer, struct dictionary *dict,
6215 int expr_start, int expr_end)
6219 size_t chunk = ss_find_byte (in, ')');
6220 ds_put_substring (out, ss_head (in, chunk));
6221 ss_advance (&in, chunk);
6222 if (ss_is_empty (in))
6225 if (skip_prefix (&in, ss_cstr (")DATE")))
6226 put_strftime (out, now, "%x");
6227 else if (skip_prefix (&in, ss_cstr (")TIME")))
6228 put_strftime (out, now, "%X");
6229 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6230 put_table_expression (out, lexer, dict, expr_start, expr_end);
6233 ds_put_byte (out, ')');
6234 ss_advance (&in, 1);
6240 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6242 struct casereader *input = NULL;
6244 struct measure_guesser *mg = measure_guesser_create (ds);
6247 input = proc_open (ds);
6248 measure_guesser_run (mg, input);
6249 measure_guesser_destroy (mg);
6252 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6253 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6254 enum settings_value_show tvars = settings_get_show_variables ();
6255 for (size_t i = 0; i < n_vars; i++)
6256 vlabels[i] = (enum ctables_vlabel) tvars;
6258 struct pivot_table_look *look = pivot_table_look_unshare (
6259 pivot_table_look_ref (pivot_table_look_get_default ()));
6260 look->omit_empty = false;
6262 struct ctables *ct = xmalloc (sizeof *ct);
6263 *ct = (struct ctables) {
6264 .dict = dataset_dict (ds),
6266 .ctables_formats = FMT_SETTINGS_INIT,
6268 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6271 time_t now = time (NULL);
6276 const char *dot_string;
6277 const char *comma_string;
6279 static const struct ctf ctfs[4] = {
6280 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6281 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6282 { CTEF_PAREN, "-,(,),", "-.(.)." },
6283 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6285 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6286 for (size_t i = 0; i < 4; i++)
6288 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6289 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6290 fmt_number_style_from_string (s));
6293 if (!lex_force_match (lexer, T_SLASH))
6296 while (!lex_match_id (lexer, "TABLE"))
6298 if (lex_match_id (lexer, "FORMAT"))
6300 double widths[2] = { SYSMIS, SYSMIS };
6301 double units_per_inch = 72.0;
6303 while (lex_token (lexer) != T_SLASH)
6305 if (lex_match_id (lexer, "MINCOLWIDTH"))
6307 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6310 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6312 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6315 else if (lex_match_id (lexer, "UNITS"))
6317 lex_match (lexer, T_EQUALS);
6318 if (lex_match_id (lexer, "POINTS"))
6319 units_per_inch = 72.0;
6320 else if (lex_match_id (lexer, "INCHES"))
6321 units_per_inch = 1.0;
6322 else if (lex_match_id (lexer, "CM"))
6323 units_per_inch = 2.54;
6326 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6330 else if (lex_match_id (lexer, "EMPTY"))
6335 lex_match (lexer, T_EQUALS);
6336 if (lex_match_id (lexer, "ZERO"))
6338 /* Nothing to do. */
6340 else if (lex_match_id (lexer, "BLANK"))
6341 ct->zero = xstrdup ("");
6342 else if (lex_force_string (lexer))
6344 ct->zero = ss_xstrdup (lex_tokss (lexer));
6350 else if (lex_match_id (lexer, "MISSING"))
6352 lex_match (lexer, T_EQUALS);
6353 if (!lex_force_string (lexer))
6357 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6358 ? ss_xstrdup (lex_tokss (lexer))
6364 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6365 "UNITS", "EMPTY", "MISSING");
6370 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6371 && widths[0] > widths[1])
6373 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6377 for (size_t i = 0; i < 2; i++)
6378 if (widths[i] != SYSMIS)
6380 int *wr = ct->look->width_ranges[TABLE_HORZ];
6381 wr[i] = widths[i] / units_per_inch * 96.0;
6386 else if (lex_match_id (lexer, "VLABELS"))
6388 if (!lex_force_match_id (lexer, "VARIABLES"))
6390 lex_match (lexer, T_EQUALS);
6392 struct variable **vars;
6394 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6398 if (!lex_force_match_id (lexer, "DISPLAY"))
6403 lex_match (lexer, T_EQUALS);
6405 enum ctables_vlabel vlabel;
6406 if (lex_match_id (lexer, "DEFAULT"))
6407 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6408 else if (lex_match_id (lexer, "NAME"))
6410 else if (lex_match_id (lexer, "LABEL"))
6411 vlabel = CTVL_LABEL;
6412 else if (lex_match_id (lexer, "BOTH"))
6414 else if (lex_match_id (lexer, "NONE"))
6418 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6424 for (size_t i = 0; i < n_vars; i++)
6425 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6428 else if (lex_match_id (lexer, "MRSETS"))
6430 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6432 lex_match (lexer, T_EQUALS);
6433 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6436 else if (lex_match_id (lexer, "SMISSING"))
6438 if (lex_match_id (lexer, "VARIABLE"))
6439 ct->smissing_listwise = false;
6440 else if (lex_match_id (lexer, "LISTWISE"))
6441 ct->smissing_listwise = true;
6444 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6448 else if (lex_match_id (lexer, "PCOMPUTE"))
6450 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6453 else if (lex_match_id (lexer, "PPROPERTIES"))
6455 if (!ctables_parse_pproperties (lexer, ct))
6458 else if (lex_match_id (lexer, "WEIGHT"))
6460 if (!lex_force_match_id (lexer, "VARIABLE"))
6462 lex_match (lexer, T_EQUALS);
6463 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6467 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6469 if (lex_match_id (lexer, "COUNT"))
6471 lex_match (lexer, T_EQUALS);
6472 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6475 ct->hide_threshold = lex_integer (lexer);
6478 else if (ct->hide_threshold == 0)
6479 ct->hide_threshold = 5;
6483 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6484 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6485 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6489 if (!lex_force_match (lexer, T_SLASH))
6493 size_t allocated_tables = 0;
6496 if (ct->n_tables >= allocated_tables)
6497 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6498 sizeof *ct->tables);
6500 struct ctables_category *cat = xmalloc (sizeof *cat);
6501 *cat = (struct ctables_category) {
6503 .include_missing = false,
6504 .sort_ascending = true,
6507 struct ctables_categories *c = xmalloc (sizeof *c);
6508 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6509 *c = (struct ctables_categories) {
6516 struct ctables_categories **categories = xnmalloc (n_vars,
6517 sizeof *categories);
6518 for (size_t i = 0; i < n_vars; i++)
6521 struct ctables_table *t = xmalloc (sizeof *t);
6522 *t = (struct ctables_table) {
6524 .slabels_axis = PIVOT_AXIS_COLUMN,
6525 .slabels_visible = true,
6526 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6528 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6529 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6530 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6532 .clabels_from_axis = PIVOT_AXIS_LAYER,
6533 .categories = categories,
6534 .n_categories = n_vars,
6537 ct->tables[ct->n_tables++] = t;
6539 lex_match (lexer, T_EQUALS);
6540 int expr_start = lex_ofs (lexer);
6541 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6543 if (lex_match (lexer, T_BY))
6545 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6546 ct, t, PIVOT_AXIS_COLUMN))
6549 if (lex_match (lexer, T_BY))
6551 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6552 ct, t, PIVOT_AXIS_LAYER))
6556 int expr_end = lex_ofs (lexer);
6558 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6559 && !t->axes[PIVOT_AXIS_LAYER])
6561 lex_error (lexer, _("At least one variable must be specified."));
6565 const struct ctables_axis *scales[PIVOT_N_AXES];
6566 size_t n_scales = 0;
6567 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6569 scales[a] = find_scale (t->axes[a]);
6575 msg (SE, _("Scale variables may appear only on one axis."));
6576 if (scales[PIVOT_AXIS_ROW])
6577 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6578 _("This scale variable appears on the rows axis."));
6579 if (scales[PIVOT_AXIS_COLUMN])
6580 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6581 _("This scale variable appears on the columns axis."));
6582 if (scales[PIVOT_AXIS_LAYER])
6583 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6584 _("This scale variable appears on the layer axis."));
6588 const struct ctables_axis *summaries[PIVOT_N_AXES];
6589 size_t n_summaries = 0;
6590 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6592 summaries[a] = (scales[a]
6594 : find_categorical_summary_spec (t->axes[a]));
6598 if (n_summaries > 1)
6600 msg (SE, _("Summaries may appear only on one axis."));
6601 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6604 msg_at (SN, summaries[a]->loc,
6606 ? _("This variable on the rows axis has a summary.")
6607 : a == PIVOT_AXIS_COLUMN
6608 ? _("This variable on the columns axis has a summary.")
6609 : _("This variable on the layers axis has a summary."));
6611 msg_at (SN, summaries[a]->loc,
6612 _("This is a scale variable, so it always has a "
6613 "summary even if the syntax does not explicitly "
6618 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6619 if (n_summaries ? summaries[a] : t->axes[a])
6621 t->summary_axis = a;
6625 if (lex_token (lexer) == T_ENDCMD)
6627 if (!ctables_prepare_table (t))
6631 if (!lex_force_match (lexer, T_SLASH))
6634 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6636 if (lex_match_id (lexer, "SLABELS"))
6638 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6640 if (lex_match_id (lexer, "POSITION"))
6642 lex_match (lexer, T_EQUALS);
6643 if (lex_match_id (lexer, "COLUMN"))
6644 t->slabels_axis = PIVOT_AXIS_COLUMN;
6645 else if (lex_match_id (lexer, "ROW"))
6646 t->slabels_axis = PIVOT_AXIS_ROW;
6647 else if (lex_match_id (lexer, "LAYER"))
6648 t->slabels_axis = PIVOT_AXIS_LAYER;
6651 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6655 else if (lex_match_id (lexer, "VISIBLE"))
6657 lex_match (lexer, T_EQUALS);
6658 if (!parse_bool (lexer, &t->slabels_visible))
6663 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6668 else if (lex_match_id (lexer, "CLABELS"))
6670 if (lex_match_id (lexer, "AUTO"))
6672 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6673 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6675 else if (lex_match_id (lexer, "ROWLABELS"))
6677 lex_match (lexer, T_EQUALS);
6678 if (lex_match_id (lexer, "OPPOSITE"))
6679 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6680 else if (lex_match_id (lexer, "LAYER"))
6681 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6684 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6688 else if (lex_match_id (lexer, "COLLABELS"))
6690 lex_match (lexer, T_EQUALS);
6691 if (lex_match_id (lexer, "OPPOSITE"))
6692 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6693 else if (lex_match_id (lexer, "LAYER"))
6694 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6697 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6703 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6708 else if (lex_match_id (lexer, "CRITERIA"))
6710 if (!lex_force_match_id (lexer, "CILEVEL"))
6712 lex_match (lexer, T_EQUALS);
6714 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6716 t->cilevel = lex_number (lexer);
6719 else if (lex_match_id (lexer, "CATEGORIES"))
6721 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6725 else if (lex_match_id (lexer, "TITLES"))
6730 if (lex_match_id (lexer, "CAPTION"))
6731 textp = &t->caption;
6732 else if (lex_match_id (lexer, "CORNER"))
6734 else if (lex_match_id (lexer, "TITLE"))
6738 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6741 lex_match (lexer, T_EQUALS);
6743 struct string s = DS_EMPTY_INITIALIZER;
6744 while (lex_is_string (lexer))
6746 if (!ds_is_empty (&s))
6747 ds_put_byte (&s, ' ');
6748 put_title_text (&s, lex_tokss (lexer), now,
6749 lexer, dataset_dict (ds),
6750 expr_start, expr_end);
6754 *textp = ds_steal_cstr (&s);
6756 while (lex_token (lexer) != T_SLASH
6757 && lex_token (lexer) != T_ENDCMD);
6759 else if (lex_match_id (lexer, "SIGTEST"))
6763 t->chisq = xmalloc (sizeof *t->chisq);
6764 *t->chisq = (struct ctables_chisq) {
6766 .include_mrsets = true,
6767 .all_visible = true,
6773 if (lex_match_id (lexer, "TYPE"))
6775 lex_match (lexer, T_EQUALS);
6776 if (!lex_force_match_id (lexer, "CHISQUARE"))
6779 else if (lex_match_id (lexer, "ALPHA"))
6781 lex_match (lexer, T_EQUALS);
6782 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6784 t->chisq->alpha = lex_number (lexer);
6787 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6789 lex_match (lexer, T_EQUALS);
6790 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6793 else if (lex_match_id (lexer, "CATEGORIES"))
6795 lex_match (lexer, T_EQUALS);
6796 if (lex_match_id (lexer, "ALLVISIBLE"))
6797 t->chisq->all_visible = true;
6798 else if (lex_match_id (lexer, "SUBTOTALS"))
6799 t->chisq->all_visible = false;
6802 lex_error_expecting (lexer,
6803 "ALLVISIBLE", "SUBTOTALS");
6809 lex_error_expecting (lexer, "TYPE", "ALPHA",
6810 "INCLUDEMRSETS", "CATEGORIES");
6814 while (lex_token (lexer) != T_SLASH
6815 && lex_token (lexer) != T_ENDCMD);
6817 else if (lex_match_id (lexer, "COMPARETEST"))
6821 t->pairwise = xmalloc (sizeof *t->pairwise);
6822 *t->pairwise = (struct ctables_pairwise) {
6824 .alpha = { .05, .05 },
6825 .adjust = BONFERRONI,
6826 .include_mrsets = true,
6827 .meansvariance_allcats = true,
6828 .all_visible = true,
6837 if (lex_match_id (lexer, "TYPE"))
6839 lex_match (lexer, T_EQUALS);
6840 if (lex_match_id (lexer, "PROP"))
6841 t->pairwise->type = PROP;
6842 else if (lex_match_id (lexer, "MEAN"))
6843 t->pairwise->type = MEAN;
6846 lex_error_expecting (lexer, "PROP", "MEAN");
6850 else if (lex_match_id (lexer, "ALPHA"))
6852 lex_match (lexer, T_EQUALS);
6854 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6856 double a0 = lex_number (lexer);
6859 lex_match (lexer, T_COMMA);
6860 if (lex_is_number (lexer))
6862 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6864 double a1 = lex_number (lexer);
6867 t->pairwise->alpha[0] = MIN (a0, a1);
6868 t->pairwise->alpha[1] = MAX (a0, a1);
6871 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6873 else if (lex_match_id (lexer, "ADJUST"))
6875 lex_match (lexer, T_EQUALS);
6876 if (lex_match_id (lexer, "BONFERRONI"))
6877 t->pairwise->adjust = BONFERRONI;
6878 else if (lex_match_id (lexer, "BH"))
6879 t->pairwise->adjust = BH;
6880 else if (lex_match_id (lexer, "NONE"))
6881 t->pairwise->adjust = 0;
6884 lex_error_expecting (lexer, "BONFERRONI", "BH",
6889 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6891 lex_match (lexer, T_EQUALS);
6892 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6895 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6897 lex_match (lexer, T_EQUALS);
6898 if (lex_match_id (lexer, "ALLCATS"))
6899 t->pairwise->meansvariance_allcats = true;
6900 else if (lex_match_id (lexer, "TESTEDCATS"))
6901 t->pairwise->meansvariance_allcats = false;
6904 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6908 else if (lex_match_id (lexer, "CATEGORIES"))
6910 lex_match (lexer, T_EQUALS);
6911 if (lex_match_id (lexer, "ALLVISIBLE"))
6912 t->pairwise->all_visible = true;
6913 else if (lex_match_id (lexer, "SUBTOTALS"))
6914 t->pairwise->all_visible = false;
6917 lex_error_expecting (lexer, "ALLVISIBLE",
6922 else if (lex_match_id (lexer, "MERGE"))
6924 lex_match (lexer, T_EQUALS);
6925 if (!parse_bool (lexer, &t->pairwise->merge))
6928 else if (lex_match_id (lexer, "STYLE"))
6930 lex_match (lexer, T_EQUALS);
6931 if (lex_match_id (lexer, "APA"))
6932 t->pairwise->apa_style = true;
6933 else if (lex_match_id (lexer, "SIMPLE"))
6934 t->pairwise->apa_style = false;
6937 lex_error_expecting (lexer, "APA", "SIMPLE");
6941 else if (lex_match_id (lexer, "SHOWSIG"))
6943 lex_match (lexer, T_EQUALS);
6944 if (!parse_bool (lexer, &t->pairwise->show_sig))
6949 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6950 "INCLUDEMRSETS", "MEANSVARIANCE",
6951 "CATEGORIES", "MERGE", "STYLE",
6956 while (lex_token (lexer) != T_SLASH
6957 && lex_token (lexer) != T_ENDCMD);
6961 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6962 "CRITERIA", "CATEGORIES", "TITLES",
6963 "SIGTEST", "COMPARETEST");
6967 if (!lex_match (lexer, T_SLASH))
6971 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6972 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6974 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6978 if (!ctables_prepare_table (t))
6981 while (lex_token (lexer) != T_ENDCMD);
6984 input = proc_open (ds);
6985 bool ok = ctables_execute (ds, input, ct);
6986 ok = proc_commit (ds) && ok;
6988 ctables_destroy (ct);
6989 return ok ? CMD_SUCCESS : CMD_FAILURE;
6994 ctables_destroy (ct);