1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 enum ctables_summary_function
75 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
76 #include "ctables.inc"
81 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
83 #include "ctables.inc"
87 static bool ctables_summary_function_is_count (enum ctables_summary_function);
89 enum ctables_domain_type
91 /* Within a section, where stacked variables divide one section from
93 CTDT_TABLE, /* All layers of a whole section. */
94 CTDT_LAYER, /* One layer within a section. */
95 CTDT_LAYERROW, /* Row in one layer within a section. */
96 CTDT_LAYERCOL, /* Column in one layer within a section. */
98 /* Within a subtable, where a subtable pairs an innermost row variable with
99 an innermost column variable within a single layer. */
100 CTDT_SUBTABLE, /* Whole subtable. */
101 CTDT_ROW, /* Row within a subtable. */
102 CTDT_COL, /* Column within a subtable. */
106 struct ctables_domain
108 struct hmap_node node;
110 const struct ctables_cell *example;
113 double d_valid; /* Dictionary weight. */
116 double e_valid; /* Effective weight */
119 double u_valid; /* Unweighted. */
122 struct ctables_sum *sums;
131 enum ctables_summary_variant
140 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
141 all the axes (except the scalar variable, if any). */
142 struct hmap_node node;
144 /* The domains that contain this cell. */
145 uint32_t omit_domains;
146 struct ctables_domain *domains[N_CTDTS];
151 enum ctables_summary_variant sv;
153 struct ctables_cell_axis
155 struct ctables_cell_value
157 const struct ctables_category *category;
165 union ctables_summary *summaries;
172 const struct dictionary *dict;
173 struct pivot_table_look *look;
175 /* CTABLES has a number of extra formats that we implement via custom
176 currency specifications on an alternate fmt_settings. */
177 #define CTEF_NEGPAREN FMT_CCA
178 #define CTEF_NEQUAL FMT_CCB
179 #define CTEF_PAREN FMT_CCC
180 #define CTEF_PCTPAREN FMT_CCD
181 struct fmt_settings ctables_formats;
183 /* If this is NULL, zeros are displayed using the normal print format.
184 Otherwise, this string is displayed. */
187 /* If this is NULL, missing values are displayed using the normal print
188 format. Otherwise, this string is displayed. */
191 /* Indexed by variable dictionary index. */
192 enum ctables_vlabel *vlabels;
194 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
196 bool mrsets_count_duplicates; /* MRSETS. */
197 bool smissing_listwise; /* SMISSING. */
198 struct variable *e_weight; /* WEIGHT. */
199 int hide_threshold; /* HIDESMALLCOUNTS. */
201 struct ctables_table **tables;
205 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
208 struct ctables_postcompute
210 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
211 char *name; /* Name, without leading &. */
213 struct msg_location *location; /* Location of definition. */
214 struct ctables_pcexpr *expr;
216 struct ctables_summary_spec_set *specs;
217 bool hide_source_cats;
220 struct ctables_pcexpr
230 enum ctables_postcompute_op
233 CTPO_CONSTANT, /* 5 */
234 CTPO_CAT_NUMBER, /* [5] */
235 CTPO_CAT_STRING, /* ["STRING"] */
236 CTPO_CAT_NRANGE, /* [LO THRU 5] */
237 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
238 CTPO_CAT_MISSING, /* MISSING */
239 CTPO_CAT_OTHERNM, /* OTHERNM */
240 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
241 CTPO_CAT_TOTAL, /* TOTAL */
255 /* CTPO_CAT_NUMBER. */
258 /* CTPO_CAT_STRING, in dictionary encoding. */
259 struct substring string;
261 /* CTPO_CAT_NRANGE. */
264 /* CTPO_CAT_SRANGE. */
265 struct substring srange[2];
267 /* CTPO_CAT_SUBTOTAL. */
268 size_t subtotal_index;
270 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
271 One element: CTPO_NEG. */
272 struct ctables_pcexpr *subs[2];
275 /* Source location. */
276 struct msg_location *location;
279 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
280 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
281 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
282 struct ctables_pcexpr *sub1);
284 struct ctables_summary_spec_set
286 struct ctables_summary_spec *specs;
290 /* The variable to which the summary specs are applied. */
291 struct variable *var;
293 /* Whether the variable to which the summary specs are applied is a scale
294 variable for the purpose of summarization.
296 (VALIDN and TOTALN act differently for summarizing scale and categorical
300 /* If any of these optional additional scale variables are missing, then
301 treat 'var' as if it's missing too. This is for implementing
302 SMISSING=LISTWISE. */
303 struct variable **listwise_vars;
304 size_t n_listwise_vars;
307 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
308 const struct ctables_summary_spec_set *);
309 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
311 /* A nested sequence of variables, e.g. a > b > c. */
314 struct variable **vars;
317 size_t *domains[N_CTDTS];
318 size_t n_domains[N_CTDTS];
321 struct ctables_summary_spec_set specs[N_CSVS];
324 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
327 struct ctables_nest *nests;
331 static void ctables_stack_uninit (struct ctables_stack *);
335 struct hmap_node node;
340 struct ctables_occurrence
342 struct hmap_node node;
346 struct ctables_section
349 struct ctables_table *table;
350 struct ctables_nest *nests[PIVOT_N_AXES];
353 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
354 struct hmap cells; /* Contains "struct ctables_cell"s. */
355 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
358 static void ctables_section_uninit (struct ctables_section *);
362 struct ctables *ctables;
363 struct ctables_axis *axes[PIVOT_N_AXES];
364 struct ctables_stack stacks[PIVOT_N_AXES];
365 struct ctables_section *sections;
367 enum pivot_axis_type summary_axis;
368 struct ctables_summary_spec_set summary_specs;
369 struct variable **sum_vars;
372 enum pivot_axis_type slabels_axis;
373 bool slabels_visible;
375 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
377 Most commonly, label_axis[a] == a, and in particular we always have
378 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
380 If ROWLABELS or COLLABELS is specified, then one of
381 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
382 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
384 If any category labels are moved, then 'clabels_example' is one of the
385 variables being moved (and it is otherwise NULL). All of the variables
386 being moved have the same width, value labels, and categories, so this
387 example variable can be used to find those out.
389 The remaining members in this group are relevant only if category labels
392 'clabels_values_map' holds a "struct ctables_value" for all the values
393 that appear in all of the variables in the moved categories. It is
394 accumulated as the data is read. Once the data is fully read, its
395 sorted values are put into 'clabels_values' and 'n_clabels_values'.
397 enum pivot_axis_type label_axis[PIVOT_N_AXES];
398 enum pivot_axis_type clabels_from_axis;
399 const struct variable *clabels_example;
400 struct hmap clabels_values_map;
401 struct ctables_value **clabels_values;
402 size_t n_clabels_values;
404 /* Indexed by variable dictionary index. */
405 struct ctables_categories **categories;
414 struct ctables_chisq *chisq;
415 struct ctables_pairwise *pairwise;
418 struct ctables_categories
421 struct ctables_category *cats;
426 struct ctables_category
428 enum ctables_category_type
430 /* Explicit category lists. */
433 CCT_NRANGE, /* Numerical range. */
434 CCT_SRANGE, /* String range. */
439 /* Totals and subtotals. */
443 /* Implicit category lists. */
448 /* For contributing to TOTALN. */
449 CCT_EXCLUDED_MISSING,
453 struct ctables_category *subtotal;
459 double number; /* CCT_NUMBER. */
460 struct substring string; /* CCT_STRING, in dictionary encoding. */
461 double nrange[2]; /* CCT_NRANGE. */
462 struct substring srange[2]; /* CCT_SRANGE. */
466 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
467 bool hide_subcategories; /* CCT_SUBTOTAL. */
470 /* CCT_POSTCOMPUTE. */
473 const struct ctables_postcompute *pc;
474 enum fmt_type parse_format;
477 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
480 bool include_missing;
484 enum ctables_summary_function sort_function;
485 struct variable *sort_var;
490 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
491 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
492 struct msg_location *location;
496 ctables_category_uninit (struct ctables_category *cat)
501 msg_location_destroy (cat->location);
508 case CCT_POSTCOMPUTE:
512 ss_dealloc (&cat->string);
516 ss_dealloc (&cat->srange[0]);
517 ss_dealloc (&cat->srange[1]);
522 free (cat->total_label);
530 case CCT_EXCLUDED_MISSING:
536 nullable_substring_equal (const struct substring *a,
537 const struct substring *b)
539 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
543 ctables_category_equal (const struct ctables_category *a,
544 const struct ctables_category *b)
546 if (a->type != b->type)
552 return a->number == b->number;
555 return ss_equals (a->string, b->string);
558 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
561 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
562 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
568 case CCT_POSTCOMPUTE:
569 return a->pc == b->pc;
573 return !strcmp (a->total_label, b->total_label);
578 return (a->include_missing == b->include_missing
579 && a->sort_ascending == b->sort_ascending
580 && a->sort_function == b->sort_function
581 && a->sort_var == b->sort_var
582 && a->percentile == b->percentile);
584 case CCT_EXCLUDED_MISSING:
592 ctables_categories_unref (struct ctables_categories *c)
597 assert (c->n_refs > 0);
601 for (size_t i = 0; i < c->n_cats; i++)
602 ctables_category_uninit (&c->cats[i]);
608 ctables_categories_equal (const struct ctables_categories *a,
609 const struct ctables_categories *b)
611 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
614 for (size_t i = 0; i < a->n_cats; i++)
615 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
621 /* Chi-square test (SIGTEST). */
629 /* Pairwise comparison test (COMPARETEST). */
630 struct ctables_pairwise
632 enum { PROP, MEAN } type;
635 bool meansvariance_allcats;
637 enum { BONFERRONI = 1, BH } adjust;
661 struct variable *var;
663 struct ctables_summary_spec_set specs[N_CSVS];
667 struct ctables_axis *subs[2];
670 struct msg_location *loc;
673 static void ctables_axis_destroy (struct ctables_axis *);
682 enum ctables_function_availability
684 CTFA_ALL, /* Any variables. */
685 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
686 //CTFA_MRSETS, /* Only multiple-response sets */
689 struct ctables_summary_spec
691 enum ctables_summary_function function;
692 double percentile; /* CTSF_PTILE only. */
695 struct fmt_spec format;
696 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
703 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
704 const struct ctables_summary_spec *src)
707 dst->label = xstrdup_if_nonnull (src->label);
711 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
718 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
719 const struct ctables_summary_spec_set *src)
721 struct ctables_summary_spec *specs
722 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
723 for (size_t i = 0; i < src->n; i++)
724 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
726 *dst = (struct ctables_summary_spec_set) {
731 .is_scale = src->is_scale,
736 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
738 for (size_t i = 0; i < set->n; i++)
739 ctables_summary_spec_uninit (&set->specs[i]);
740 free (set->listwise_vars);
745 parse_col_width (struct lexer *lexer, const char *name, double *width)
747 lex_match (lexer, T_EQUALS);
748 if (lex_match_id (lexer, "DEFAULT"))
750 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
752 *width = lex_number (lexer);
762 parse_bool (struct lexer *lexer, bool *b)
764 if (lex_match_id (lexer, "NO"))
766 else if (lex_match_id (lexer, "YES"))
770 lex_error_expecting (lexer, "YES", "NO");
776 static enum ctables_function_availability
777 ctables_function_availability (enum ctables_summary_function f)
779 static enum ctables_function_availability availability[] = {
780 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
781 #include "ctables.inc"
785 return availability[f];
789 ctables_summary_function_is_count (enum ctables_summary_function f)
791 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
795 parse_ctables_summary_function (struct lexer *lexer,
796 enum ctables_summary_function *f)
800 enum ctables_summary_function function;
801 struct substring name;
803 static struct pair names[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
805 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
806 #include "ctables.inc"
807 /* The .COUNT suffix may be omitted. */
808 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
809 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
810 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
811 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
812 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
813 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
814 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
818 if (!lex_force_id (lexer))
821 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
822 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
824 *f = names[i].function;
829 lex_error (lexer, _("Expecting summary function name."));
834 ctables_axis_destroy (struct ctables_axis *axis)
842 for (size_t i = 0; i < N_CSVS; i++)
843 ctables_summary_spec_set_uninit (&axis->specs[i]);
848 ctables_axis_destroy (axis->subs[0]);
849 ctables_axis_destroy (axis->subs[1]);
852 msg_location_destroy (axis->loc);
856 static struct ctables_axis *
857 ctables_axis_new_nonterminal (enum ctables_axis_op op,
858 struct ctables_axis *sub0,
859 struct ctables_axis *sub1,
860 struct lexer *lexer, int start_ofs)
862 struct ctables_axis *axis = xmalloc (sizeof *axis);
863 *axis = (struct ctables_axis) {
865 .subs = { sub0, sub1 },
866 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
871 struct ctables_axis_parse_ctx
874 struct dictionary *dict;
876 struct ctables_table *t;
879 static struct fmt_spec
880 ctables_summary_default_format (enum ctables_summary_function function,
881 const struct variable *var)
883 static const enum ctables_format default_formats[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
885 #include "ctables.inc"
888 switch (default_formats[function])
891 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
894 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
897 return *var_get_print_format (var);
904 static struct pivot_value *
905 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
909 static const char *default_labels[] = {
910 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
911 #include "ctables.inc"
915 return (spec->function == CTSF_PTILE
916 ? pivot_value_new_text_format (N_("Percentile %.2f"),
918 : pivot_value_new_text (default_labels[spec->function]));
922 struct substring in = ss_cstr (spec->label);
923 struct substring target = ss_cstr (")CILEVEL");
925 struct string out = DS_EMPTY_INITIALIZER;
928 size_t chunk = ss_find_substring (in, target);
929 ds_put_substring (&out, ss_head (in, chunk));
930 ss_advance (&in, chunk);
932 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
934 ss_advance (&in, target.length);
935 ds_put_format (&out, "%g", cilevel);
941 ctables_summary_function_name (enum ctables_summary_function function)
943 static const char *names[] = {
944 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
945 #include "ctables.inc"
948 return names[function];
952 add_summary_spec (struct ctables_axis *axis,
953 enum ctables_summary_function function, double percentile,
954 const char *label, const struct fmt_spec *format,
955 bool is_ctables_format, const struct msg_location *loc,
956 enum ctables_summary_variant sv)
958 if (axis->op == CTAO_VAR)
960 const char *function_name = ctables_summary_function_name (function);
961 const char *var_name = var_get_name (axis->var);
962 switch (ctables_function_availability (function))
966 msg_at (SE, loc, _("Summary function %s applies only to multiple "
967 "response sets."), function_name);
968 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
974 if (!axis->scale && sv != CSV_TOTAL)
977 _("Summary function %s applies only to scale variables."),
979 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
989 struct ctables_summary_spec_set *set = &axis->specs[sv];
990 if (set->n >= set->allocated)
991 set->specs = x2nrealloc (set->specs, &set->allocated,
994 struct ctables_summary_spec *dst = &set->specs[set->n++];
995 *dst = (struct ctables_summary_spec) {
996 .function = function,
997 .percentile = percentile,
998 .label = xstrdup_if_nonnull (label),
999 .format = (format ? *format
1000 : ctables_summary_default_format (function, axis->var)),
1001 .is_ctables_format = is_ctables_format,
1007 for (size_t i = 0; i < 2; i++)
1008 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1009 format, is_ctables_format, loc, sv))
1015 static struct ctables_axis *ctables_axis_parse_stack (
1016 struct ctables_axis_parse_ctx *);
1019 static struct ctables_axis *
1020 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1022 if (lex_match (ctx->lexer, T_LPAREN))
1024 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1025 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1027 ctables_axis_destroy (sub);
1033 if (!lex_force_id (ctx->lexer))
1036 int start_ofs = lex_ofs (ctx->lexer);
1037 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1041 struct ctables_axis *axis = xmalloc (sizeof *axis);
1042 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1044 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1045 : lex_match_phrase (ctx->lexer, "[C]") ? false
1046 : var_get_measure (var) == MEASURE_SCALE);
1047 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1048 lex_ofs (ctx->lexer) - 1);
1049 if (axis->scale && var_is_alpha (var))
1051 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1053 var_get_name (var));
1054 ctables_axis_destroy (axis);
1062 has_digit (const char *s)
1064 return s[strcspn (s, "0123456789")] != '\0';
1068 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1069 bool *is_ctables_format)
1071 char type[FMT_TYPE_LEN_MAX + 1];
1072 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1075 if (!strcasecmp (type, "NEGPAREN"))
1076 format->type = CTEF_NEGPAREN;
1077 else if (!strcasecmp (type, "NEQUAL"))
1078 format->type = CTEF_NEQUAL;
1079 else if (!strcasecmp (type, "PAREN"))
1080 format->type = CTEF_PAREN;
1081 else if (!strcasecmp (type, "PCTPAREN"))
1082 format->type = CTEF_PCTPAREN;
1085 *is_ctables_format = false;
1086 return (parse_format_specifier (lexer, format)
1087 && fmt_check_output (format)
1088 && fmt_check_type_compat (format, VAL_NUMERIC));
1094 lex_next_error (lexer, -1, -1,
1095 _("Output format %s requires width 2 or greater."), type);
1098 else if (format->d > format->w - 1)
1100 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1101 "greater than decimals."), type);
1106 *is_ctables_format = true;
1111 static struct ctables_axis *
1112 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1114 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1115 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1118 enum ctables_summary_variant sv = CSV_CELL;
1121 int start_ofs = lex_ofs (ctx->lexer);
1123 /* Parse function. */
1124 enum ctables_summary_function function;
1125 if (!parse_ctables_summary_function (ctx->lexer, &function))
1128 /* Parse percentile. */
1129 double percentile = 0;
1130 if (function == CTSF_PTILE)
1132 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1134 percentile = lex_number (ctx->lexer);
1135 lex_get (ctx->lexer);
1140 if (lex_is_string (ctx->lexer))
1142 label = ss_xstrdup (lex_tokss (ctx->lexer));
1143 lex_get (ctx->lexer);
1147 struct fmt_spec format;
1148 const struct fmt_spec *formatp;
1149 bool is_ctables_format = false;
1150 if (lex_token (ctx->lexer) == T_ID
1151 && has_digit (lex_tokcstr (ctx->lexer)))
1153 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1154 &is_ctables_format))
1164 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1165 lex_ofs (ctx->lexer) - 1);
1166 add_summary_spec (sub, function, percentile, label, formatp,
1167 is_ctables_format, loc, sv);
1169 msg_location_destroy (loc);
1171 lex_match (ctx->lexer, T_COMMA);
1172 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1174 if (!lex_force_match (ctx->lexer, T_LBRACK))
1178 else if (lex_match (ctx->lexer, T_RBRACK))
1180 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1187 ctables_axis_destroy (sub);
1191 static const struct ctables_axis *
1192 find_scale (const struct ctables_axis *axis)
1196 else if (axis->op == CTAO_VAR)
1197 return axis->scale ? axis : NULL;
1200 for (size_t i = 0; i < 2; i++)
1202 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1210 static const struct ctables_axis *
1211 find_categorical_summary_spec (const struct ctables_axis *axis)
1215 else if (axis->op == CTAO_VAR)
1216 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1219 for (size_t i = 0; i < 2; i++)
1221 const struct ctables_axis *sum
1222 = find_categorical_summary_spec (axis->subs[i]);
1230 static struct ctables_axis *
1231 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1233 int start_ofs = lex_ofs (ctx->lexer);
1234 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1238 while (lex_match (ctx->lexer, T_GT))
1240 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1243 ctables_axis_destroy (lhs);
1247 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1248 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1250 const struct ctables_axis *outer_scale = find_scale (lhs);
1251 const struct ctables_axis *inner_scale = find_scale (rhs);
1252 if (outer_scale && inner_scale)
1254 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1255 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1256 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1257 ctables_axis_destroy (nest);
1261 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1264 msg_at (SE, nest->loc,
1265 _("Summaries may only be requested for categorical variables "
1266 "at the innermost nesting level."));
1267 msg_at (SN, outer_sum->loc,
1268 _("This outer categorical variable has a summary."));
1269 ctables_axis_destroy (nest);
1279 static struct ctables_axis *
1280 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1282 int start_ofs = lex_ofs (ctx->lexer);
1283 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1287 while (lex_match (ctx->lexer, T_PLUS))
1289 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1292 ctables_axis_destroy (lhs);
1296 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1297 ctx->lexer, start_ofs);
1304 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1305 struct ctables *ct, struct ctables_table *t,
1306 enum pivot_axis_type a)
1308 if (lex_token (lexer) == T_BY
1309 || lex_token (lexer) == T_SLASH
1310 || lex_token (lexer) == T_ENDCMD)
1313 struct ctables_axis_parse_ctx ctx = {
1319 t->axes[a] = ctables_axis_parse_stack (&ctx);
1320 return t->axes[a] != NULL;
1324 ctables_chisq_destroy (struct ctables_chisq *chisq)
1330 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1336 ctables_table_destroy (struct ctables_table *t)
1341 for (size_t i = 0; i < t->n_sections; i++)
1342 ctables_section_uninit (&t->sections[i]);
1345 for (size_t i = 0; i < t->n_categories; i++)
1346 ctables_categories_unref (t->categories[i]);
1347 free (t->categories);
1349 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1351 ctables_axis_destroy (t->axes[a]);
1352 ctables_stack_uninit (&t->stacks[a]);
1354 free (t->summary_specs.specs);
1356 struct ctables_value *ctv, *next_ctv;
1357 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1358 &t->clabels_values_map)
1360 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1361 hmap_delete (&t->clabels_values_map, &ctv->node);
1364 hmap_destroy (&t->clabels_values_map);
1365 free (t->clabels_values);
1371 ctables_chisq_destroy (t->chisq);
1372 ctables_pairwise_destroy (t->pairwise);
1377 ctables_destroy (struct ctables *ct)
1382 struct ctables_postcompute *pc, *next_pc;
1383 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1387 msg_location_destroy (pc->location);
1388 ctables_pcexpr_destroy (pc->expr);
1392 ctables_summary_spec_set_uninit (pc->specs);
1395 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1398 hmap_destroy (&ct->postcomputes);
1400 fmt_settings_uninit (&ct->ctables_formats);
1401 pivot_table_look_unref (ct->look);
1405 for (size_t i = 0; i < ct->n_tables; i++)
1406 ctables_table_destroy (ct->tables[i]);
1411 static struct ctables_category
1412 cct_nrange (double low, double high)
1414 return (struct ctables_category) {
1416 .nrange = { low, high }
1420 static struct ctables_category
1421 cct_srange (struct substring low, struct substring high)
1423 return (struct ctables_category) {
1425 .srange = { low, high }
1430 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1431 struct ctables_category *cat)
1434 if (lex_match (lexer, T_EQUALS))
1436 if (!lex_force_string (lexer))
1439 total_label = ss_xstrdup (lex_tokss (lexer));
1443 total_label = xstrdup (_("Subtotal"));
1445 *cat = (struct ctables_category) {
1446 .type = CCT_SUBTOTAL,
1447 .hide_subcategories = hide_subcategories,
1448 .total_label = total_label
1453 static struct substring
1454 parse_substring (struct lexer *lexer, struct dictionary *dict)
1456 struct substring s = recode_substring_pool (
1457 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1458 ss_rtrim (&s, ss_cstr (" "));
1464 ctables_table_parse_explicit_category (struct lexer *lexer,
1465 struct dictionary *dict,
1467 struct ctables_category *cat)
1469 if (lex_match_id (lexer, "OTHERNM"))
1470 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1471 else if (lex_match_id (lexer, "MISSING"))
1472 *cat = (struct ctables_category) { .type = CCT_MISSING };
1473 else if (lex_match_id (lexer, "SUBTOTAL"))
1474 return ctables_table_parse_subtotal (lexer, false, cat);
1475 else if (lex_match_id (lexer, "HSUBTOTAL"))
1476 return ctables_table_parse_subtotal (lexer, true, cat);
1477 else if (lex_match_id (lexer, "LO"))
1479 if (!lex_force_match_id (lexer, "THRU"))
1481 if (lex_is_string (lexer))
1483 struct substring sr0 = { .string = NULL };
1484 struct substring sr1 = parse_substring (lexer, dict);
1485 *cat = cct_srange (sr0, sr1);
1487 else if (lex_force_num (lexer))
1489 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1495 else if (lex_is_number (lexer))
1497 double number = lex_number (lexer);
1499 if (lex_match_id (lexer, "THRU"))
1501 if (lex_match_id (lexer, "HI"))
1502 *cat = cct_nrange (number, DBL_MAX);
1505 if (!lex_force_num (lexer))
1507 *cat = cct_nrange (number, lex_number (lexer));
1512 *cat = (struct ctables_category) {
1517 else if (lex_is_string (lexer))
1519 struct substring s = parse_substring (lexer, dict);
1520 if (lex_match_id (lexer, "THRU"))
1522 if (lex_match_id (lexer, "HI"))
1524 struct substring sr1 = { .string = NULL };
1525 *cat = cct_srange (s, sr1);
1529 if (!lex_force_string (lexer))
1534 struct substring sr1 = parse_substring (lexer, dict);
1535 *cat = cct_srange (s, sr1);
1539 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1541 else if (lex_match (lexer, T_AND))
1543 if (!lex_force_id (lexer))
1545 struct ctables_postcompute *pc = ctables_find_postcompute (
1546 ct, lex_tokcstr (lexer));
1549 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1550 msg_at (SE, loc, _("Unknown postcompute &%s."),
1551 lex_tokcstr (lexer));
1552 msg_location_destroy (loc);
1557 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1561 lex_error (lexer, NULL);
1569 parse_category_string (struct msg_location *location,
1570 struct substring s, const struct dictionary *dict,
1571 enum fmt_type format, double *n)
1574 char *error = data_in (s, dict_get_encoding (dict), format,
1575 settings_get_fmt_settings (), &v, 0, NULL);
1578 msg_at (SE, location,
1579 _("Failed to parse category specification as format %s: %s."),
1580 fmt_name (format), error);
1589 static struct ctables_category *
1590 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1591 const struct ctables_pcexpr *e)
1593 struct ctables_category *best = NULL;
1594 size_t n_subtotals = 0;
1595 for (size_t i = 0; i < cats->n_cats; i++)
1597 struct ctables_category *cat = &cats->cats[i];
1600 case CTPO_CAT_NUMBER:
1601 if (cat->type == CCT_NUMBER && cat->number == e->number)
1605 case CTPO_CAT_STRING:
1606 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1610 case CTPO_CAT_NRANGE:
1611 if (cat->type == CCT_NRANGE
1612 && cat->nrange[0] == e->nrange[0]
1613 && cat->nrange[1] == e->nrange[1])
1617 case CTPO_CAT_SRANGE:
1618 if (cat->type == CCT_SRANGE
1619 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1620 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1624 case CTPO_CAT_MISSING:
1625 if (cat->type == CCT_MISSING)
1629 case CTPO_CAT_OTHERNM:
1630 if (cat->type == CCT_OTHERNM)
1634 case CTPO_CAT_SUBTOTAL:
1635 if (cat->type == CCT_SUBTOTAL)
1638 if (e->subtotal_index == n_subtotals)
1640 else if (e->subtotal_index == 0)
1645 case CTPO_CAT_TOTAL:
1646 if (cat->type == CCT_TOTAL)
1660 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1665 static struct ctables_category *
1666 ctables_find_category_for_postcompute (const struct dictionary *dict,
1667 const struct ctables_categories *cats,
1668 enum fmt_type parse_format,
1669 const struct ctables_pcexpr *e)
1671 if (parse_format != FMT_F)
1673 if (e->op == CTPO_CAT_STRING)
1676 if (!parse_category_string (e->location, e->string, dict,
1677 parse_format, &number))
1680 struct ctables_pcexpr e2 = {
1681 .op = CTPO_CAT_NUMBER,
1683 .location = e->location,
1685 return ctables_find_category_for_postcompute__ (cats, &e2);
1687 else if (e->op == CTPO_CAT_SRANGE)
1690 if (!e->srange[0].string)
1691 nrange[0] = -DBL_MAX;
1692 else if (!parse_category_string (e->location, e->srange[0], dict,
1693 parse_format, &nrange[0]))
1696 if (!e->srange[1].string)
1697 nrange[1] = DBL_MAX;
1698 else if (!parse_category_string (e->location, e->srange[1], dict,
1699 parse_format, &nrange[1]))
1702 struct ctables_pcexpr e2 = {
1703 .op = CTPO_CAT_NRANGE,
1704 .nrange = { nrange[0], nrange[1] },
1705 .location = e->location,
1707 return ctables_find_category_for_postcompute__ (cats, &e2);
1710 return ctables_find_category_for_postcompute__ (cats, e);
1714 ctables_recursive_check_postcompute (struct dictionary *dict,
1715 const struct ctables_pcexpr *e,
1716 struct ctables_category *pc_cat,
1717 const struct ctables_categories *cats,
1718 const struct msg_location *cats_location)
1722 case CTPO_CAT_NUMBER:
1723 case CTPO_CAT_STRING:
1724 case CTPO_CAT_NRANGE:
1725 case CTPO_CAT_SRANGE:
1726 case CTPO_CAT_MISSING:
1727 case CTPO_CAT_OTHERNM:
1728 case CTPO_CAT_SUBTOTAL:
1729 case CTPO_CAT_TOTAL:
1731 struct ctables_category *cat = ctables_find_category_for_postcompute (
1732 dict, cats, pc_cat->parse_format, e);
1735 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1737 size_t n_subtotals = 0;
1738 for (size_t i = 0; i < cats->n_cats; i++)
1739 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1740 if (n_subtotals > 1)
1742 msg_at (SE, cats_location,
1743 ngettext ("These categories include %zu instance "
1744 "of SUBTOTAL or HSUBTOTAL, so references "
1745 "from computed categories must refer to "
1746 "subtotals by position, "
1747 "e.g. SUBTOTAL[1].",
1748 "These categories include %zu instances "
1749 "of SUBTOTAL or HSUBTOTAL, so references "
1750 "from computed categories must refer to "
1751 "subtotals by position, "
1752 "e.g. SUBTOTAL[1].",
1755 msg_at (SN, e->location,
1756 _("This is the reference that lacks a position."));
1761 msg_at (SE, pc_cat->location,
1762 _("Computed category &%s references a category not included "
1763 "in the category list."),
1765 msg_at (SN, e->location, _("This is the missing category."));
1766 if (e->op == CTPO_CAT_SUBTOTAL)
1767 msg_at (SN, cats_location,
1768 _("To fix the problem, add subtotals to the "
1769 "list of categories here."));
1770 else if (e->op == CTPO_CAT_TOTAL)
1771 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1772 "CATEGORIES specification."));
1774 msg_at (SN, cats_location,
1775 _("To fix the problem, add the missing category to the "
1776 "list of categories here."));
1779 if (pc_cat->pc->hide_source_cats)
1793 for (size_t i = 0; i < 2; i++)
1794 if (e->subs[i] && !ctables_recursive_check_postcompute (
1795 dict, e->subs[i], pc_cat, cats, cats_location))
1804 all_strings (struct variable **vars, size_t n_vars,
1805 const struct ctables_category *cat)
1807 for (size_t j = 0; j < n_vars; j++)
1808 if (var_is_numeric (vars[j]))
1810 msg_at (SE, cat->location,
1811 _("This category specification may be applied only to string "
1812 "variables, but this subcommand tries to apply it to "
1813 "numeric variable %s."),
1814 var_get_name (vars[j]));
1821 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1822 struct ctables *ct, struct ctables_table *t)
1824 if (!lex_match_id (lexer, "VARIABLES"))
1826 lex_match (lexer, T_EQUALS);
1828 struct variable **vars;
1830 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1833 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1834 for (size_t i = 1; i < n_vars; i++)
1836 const struct fmt_spec *f = var_get_print_format (vars[i]);
1837 if (f->type != common_format->type)
1839 common_format = NULL;
1845 && (fmt_get_category (common_format->type)
1846 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1848 struct ctables_categories *c = xmalloc (sizeof *c);
1849 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1850 for (size_t i = 0; i < n_vars; i++)
1852 struct ctables_categories **cp
1853 = &t->categories[var_get_dict_index (vars[i])];
1854 ctables_categories_unref (*cp);
1858 size_t allocated_cats = 0;
1859 int cats_start_ofs = -1;
1860 int cats_end_ofs = -1;
1861 if (lex_match (lexer, T_LBRACK))
1863 cats_start_ofs = lex_ofs (lexer);
1866 if (c->n_cats >= allocated_cats)
1867 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1869 int start_ofs = lex_ofs (lexer);
1870 struct ctables_category *cat = &c->cats[c->n_cats];
1871 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1873 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1876 lex_match (lexer, T_COMMA);
1878 while (!lex_match (lexer, T_RBRACK));
1879 cats_end_ofs = lex_ofs (lexer) - 1;
1882 struct ctables_category cat = {
1884 .include_missing = false,
1885 .sort_ascending = true,
1887 bool show_totals = false;
1888 char *total_label = NULL;
1889 bool totals_before = false;
1890 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1892 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1894 lex_match (lexer, T_EQUALS);
1895 if (lex_match_id (lexer, "A"))
1896 cat.sort_ascending = true;
1897 else if (lex_match_id (lexer, "D"))
1898 cat.sort_ascending = false;
1901 lex_error_expecting (lexer, "A", "D");
1905 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1907 lex_match (lexer, T_EQUALS);
1908 if (lex_match_id (lexer, "VALUE"))
1909 cat.type = CCT_VALUE;
1910 else if (lex_match_id (lexer, "LABEL"))
1911 cat.type = CCT_LABEL;
1914 cat.type = CCT_FUNCTION;
1915 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1918 if (lex_match (lexer, T_LPAREN))
1920 cat.sort_var = parse_variable (lexer, dict);
1924 if (cat.sort_function == CTSF_PTILE)
1926 lex_match (lexer, T_COMMA);
1927 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1929 cat.percentile = lex_number (lexer);
1933 if (!lex_force_match (lexer, T_RPAREN))
1936 else if (ctables_function_availability (cat.sort_function)
1939 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1944 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1946 lex_match (lexer, T_EQUALS);
1947 if (lex_match_id (lexer, "INCLUDE"))
1948 cat.include_missing = true;
1949 else if (lex_match_id (lexer, "EXCLUDE"))
1950 cat.include_missing = false;
1953 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1957 else if (lex_match_id (lexer, "TOTAL"))
1959 lex_match (lexer, T_EQUALS);
1960 if (!parse_bool (lexer, &show_totals))
1963 else if (lex_match_id (lexer, "LABEL"))
1965 lex_match (lexer, T_EQUALS);
1966 if (!lex_force_string (lexer))
1969 total_label = ss_xstrdup (lex_tokss (lexer));
1972 else if (lex_match_id (lexer, "POSITION"))
1974 lex_match (lexer, T_EQUALS);
1975 if (lex_match_id (lexer, "BEFORE"))
1976 totals_before = true;
1977 else if (lex_match_id (lexer, "AFTER"))
1978 totals_before = false;
1981 lex_error_expecting (lexer, "BEFORE", "AFTER");
1985 else if (lex_match_id (lexer, "EMPTY"))
1987 lex_match (lexer, T_EQUALS);
1988 if (lex_match_id (lexer, "INCLUDE"))
1989 c->show_empty = true;
1990 else if (lex_match_id (lexer, "EXCLUDE"))
1991 c->show_empty = false;
1994 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2001 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2002 "TOTAL", "LABEL", "POSITION", "EMPTY");
2004 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2011 if (c->n_cats >= allocated_cats)
2012 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2013 c->cats[c->n_cats++] = cat;
2018 if (c->n_cats >= allocated_cats)
2019 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2021 struct ctables_category *totals;
2024 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2025 totals = &c->cats[0];
2028 totals = &c->cats[c->n_cats];
2031 *totals = (struct ctables_category) {
2033 .total_label = total_label ? total_label : xstrdup (_("Total")),
2037 struct ctables_category *subtotal = NULL;
2038 for (size_t i = totals_before ? 0 : c->n_cats;
2039 totals_before ? i < c->n_cats : i-- > 0;
2040 totals_before ? i++ : 0)
2042 struct ctables_category *cat = &c->cats[i];
2051 cat->subtotal = subtotal;
2054 case CCT_POSTCOMPUTE:
2065 case CCT_EXCLUDED_MISSING:
2070 if (cats_start_ofs != -1)
2072 for (size_t i = 0; i < c->n_cats; i++)
2074 struct ctables_category *cat = &c->cats[i];
2077 case CCT_POSTCOMPUTE:
2078 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2079 struct msg_location *cats_location
2080 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2081 bool ok = ctables_recursive_check_postcompute (
2082 dict, cat->pc->expr, cat, c, cats_location);
2083 msg_location_destroy (cats_location);
2090 for (size_t j = 0; j < n_vars; j++)
2091 if (var_is_alpha (vars[j]))
2093 msg_at (SE, cat->location,
2094 _("This category specification may be applied "
2095 "only to numeric variables, but this "
2096 "subcommand tries to apply it to string "
2098 var_get_name (vars[j]));
2107 if (!parse_category_string (cat->location, cat->string, dict,
2108 common_format->type, &n))
2111 ss_dealloc (&cat->string);
2113 cat->type = CCT_NUMBER;
2116 else if (!all_strings (vars, n_vars, cat))
2125 if (!cat->srange[0].string)
2127 else if (!parse_category_string (cat->location,
2128 cat->srange[0], dict,
2129 common_format->type, &n[0]))
2132 if (!cat->srange[1].string)
2134 else if (!parse_category_string (cat->location,
2135 cat->srange[1], dict,
2136 common_format->type, &n[1]))
2139 ss_dealloc (&cat->srange[0]);
2140 ss_dealloc (&cat->srange[1]);
2142 cat->type = CCT_NRANGE;
2143 cat->nrange[0] = n[0];
2144 cat->nrange[1] = n[1];
2146 else if (!all_strings (vars, n_vars, cat))
2157 case CCT_EXCLUDED_MISSING:
2172 ctables_nest_uninit (struct ctables_nest *nest)
2175 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2176 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2177 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2178 free (nest->domains[dt]);
2182 ctables_stack_uninit (struct ctables_stack *stack)
2186 for (size_t i = 0; i < stack->n; i++)
2187 ctables_nest_uninit (&stack->nests[i]);
2188 free (stack->nests);
2192 static struct ctables_stack
2193 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2200 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2201 for (size_t i = 0; i < s0.n; i++)
2202 for (size_t j = 0; j < s1.n; j++)
2204 const struct ctables_nest *a = &s0.nests[i];
2205 const struct ctables_nest *b = &s1.nests[j];
2207 size_t allocate = a->n + b->n;
2208 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2210 for (size_t k = 0; k < a->n; k++)
2211 vars[n++] = a->vars[k];
2212 for (size_t k = 0; k < b->n; k++)
2213 vars[n++] = b->vars[k];
2214 assert (n == allocate);
2216 const struct ctables_nest *summary_src;
2217 if (!a->specs[CSV_CELL].var)
2219 else if (!b->specs[CSV_CELL].var)
2224 struct ctables_nest *new = &stack.nests[stack.n++];
2225 *new = (struct ctables_nest) {
2227 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2228 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2232 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2233 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2235 ctables_stack_uninit (&s0);
2236 ctables_stack_uninit (&s1);
2240 static struct ctables_stack
2241 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2243 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2244 for (size_t i = 0; i < s0.n; i++)
2245 stack.nests[stack.n++] = s0.nests[i];
2246 for (size_t i = 0; i < s1.n; i++)
2248 stack.nests[stack.n] = s1.nests[i];
2249 stack.nests[stack.n].group_head += s0.n;
2252 assert (stack.n == s0.n + s1.n);
2258 static struct ctables_stack
2259 var_fts (const struct ctables_axis *a)
2261 struct variable **vars = xmalloc (sizeof *vars);
2264 struct ctables_nest *nest = xmalloc (sizeof *nest);
2265 *nest = (struct ctables_nest) {
2268 .scale_idx = a->scale ? 0 : SIZE_MAX,
2270 if (a->specs[CSV_CELL].n || a->scale)
2271 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2273 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2274 nest->specs[sv].var = a->var;
2275 nest->specs[sv].is_scale = a->scale;
2277 return (struct ctables_stack) { .nests = nest, .n = 1 };
2280 static struct ctables_stack
2281 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2284 return (struct ctables_stack) { .n = 0 };
2292 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2293 enumerate_fts (axis_type, a->subs[1]));
2296 /* This should consider any of the scale variables found in the result to
2297 be linked to each other listwise for SMISSING=LISTWISE. */
2298 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2299 enumerate_fts (axis_type, a->subs[1]));
2305 union ctables_summary
2307 /* COUNT, VALIDN, TOTALN. */
2310 /* MINIMUM, MAXIMUM, RANGE. */
2317 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2318 struct moments1 *moments;
2320 /* MEDIAN, MODE, PTILE. */
2323 struct casewriter *writer;
2328 /* XXX multiple response */
2332 ctables_summary_init (union ctables_summary *s,
2333 const struct ctables_summary_spec *ss)
2335 switch (ss->function)
2339 case CTSF_ROWPCT_COUNT:
2340 case CTSF_COLPCT_COUNT:
2341 case CTSF_TABLEPCT_COUNT:
2342 case CTSF_SUBTABLEPCT_COUNT:
2343 case CTSF_LAYERPCT_COUNT:
2344 case CTSF_LAYERROWPCT_COUNT:
2345 case CTSF_LAYERCOLPCT_COUNT:
2346 case CTSF_ROWPCT_VALIDN:
2347 case CTSF_COLPCT_VALIDN:
2348 case CTSF_TABLEPCT_VALIDN:
2349 case CTSF_SUBTABLEPCT_VALIDN:
2350 case CTSF_LAYERPCT_VALIDN:
2351 case CTSF_LAYERROWPCT_VALIDN:
2352 case CTSF_LAYERCOLPCT_VALIDN:
2353 case CTSF_ROWPCT_TOTALN:
2354 case CTSF_COLPCT_TOTALN:
2355 case CTSF_TABLEPCT_TOTALN:
2356 case CTSF_SUBTABLEPCT_TOTALN:
2357 case CTSF_LAYERPCT_TOTALN:
2358 case CTSF_LAYERROWPCT_TOTALN:
2359 case CTSF_LAYERCOLPCT_TOTALN:
2366 case CTSF_UROWPCT_COUNT:
2367 case CTSF_UCOLPCT_COUNT:
2368 case CTSF_UTABLEPCT_COUNT:
2369 case CTSF_USUBTABLEPCT_COUNT:
2370 case CTSF_ULAYERPCT_COUNT:
2371 case CTSF_ULAYERROWPCT_COUNT:
2372 case CTSF_ULAYERCOLPCT_COUNT:
2373 case CTSF_UROWPCT_VALIDN:
2374 case CTSF_UCOLPCT_VALIDN:
2375 case CTSF_UTABLEPCT_VALIDN:
2376 case CTSF_USUBTABLEPCT_VALIDN:
2377 case CTSF_ULAYERPCT_VALIDN:
2378 case CTSF_ULAYERROWPCT_VALIDN:
2379 case CTSF_ULAYERCOLPCT_VALIDN:
2380 case CTSF_UROWPCT_TOTALN:
2381 case CTSF_UCOLPCT_TOTALN:
2382 case CTSF_UTABLEPCT_TOTALN:
2383 case CTSF_USUBTABLEPCT_TOTALN:
2384 case CTSF_ULAYERPCT_TOTALN:
2385 case CTSF_ULAYERROWPCT_TOTALN:
2386 case CTSF_ULAYERCOLPCT_TOTALN:
2396 case CTSF_SUBTABLE_ID:
2398 case CTSF_LAYERROW_ID:
2399 case CTSF_LAYERCOL_ID:
2405 s->min = s->max = SYSMIS;
2413 case CTSF_ROWPCT_SUM:
2414 case CTSF_COLPCT_SUM:
2415 case CTSF_TABLEPCT_SUM:
2416 case CTSF_SUBTABLEPCT_SUM:
2417 case CTSF_LAYERPCT_SUM:
2418 case CTSF_LAYERROWPCT_SUM:
2419 case CTSF_LAYERCOLPCT_SUM:
2424 case CTSF_UVARIANCE:
2425 case CTSF_UROWPCT_SUM:
2426 case CTSF_UCOLPCT_SUM:
2427 case CTSF_UTABLEPCT_SUM:
2428 case CTSF_USUBTABLEPCT_SUM:
2429 case CTSF_ULAYERPCT_SUM:
2430 case CTSF_ULAYERROWPCT_SUM:
2431 case CTSF_ULAYERCOLPCT_SUM:
2432 s->moments = moments1_create (MOMENT_VARIANCE);
2442 struct caseproto *proto = caseproto_create ();
2443 proto = caseproto_add_width (proto, 0);
2444 proto = caseproto_add_width (proto, 0);
2446 struct subcase ordering;
2447 subcase_init (&ordering, 0, 0, SC_ASCEND);
2448 s->writer = sort_create_writer (&ordering, proto);
2449 subcase_uninit (&ordering);
2450 caseproto_unref (proto);
2460 ctables_summary_uninit (union ctables_summary *s,
2461 const struct ctables_summary_spec *ss)
2463 switch (ss->function)
2467 case CTSF_ROWPCT_COUNT:
2468 case CTSF_COLPCT_COUNT:
2469 case CTSF_TABLEPCT_COUNT:
2470 case CTSF_SUBTABLEPCT_COUNT:
2471 case CTSF_LAYERPCT_COUNT:
2472 case CTSF_LAYERROWPCT_COUNT:
2473 case CTSF_LAYERCOLPCT_COUNT:
2474 case CTSF_ROWPCT_VALIDN:
2475 case CTSF_COLPCT_VALIDN:
2476 case CTSF_TABLEPCT_VALIDN:
2477 case CTSF_SUBTABLEPCT_VALIDN:
2478 case CTSF_LAYERPCT_VALIDN:
2479 case CTSF_LAYERROWPCT_VALIDN:
2480 case CTSF_LAYERCOLPCT_VALIDN:
2481 case CTSF_ROWPCT_TOTALN:
2482 case CTSF_COLPCT_TOTALN:
2483 case CTSF_TABLEPCT_TOTALN:
2484 case CTSF_SUBTABLEPCT_TOTALN:
2485 case CTSF_LAYERPCT_TOTALN:
2486 case CTSF_LAYERROWPCT_TOTALN:
2487 case CTSF_LAYERCOLPCT_TOTALN:
2494 case CTSF_UROWPCT_COUNT:
2495 case CTSF_UCOLPCT_COUNT:
2496 case CTSF_UTABLEPCT_COUNT:
2497 case CTSF_USUBTABLEPCT_COUNT:
2498 case CTSF_ULAYERPCT_COUNT:
2499 case CTSF_ULAYERROWPCT_COUNT:
2500 case CTSF_ULAYERCOLPCT_COUNT:
2501 case CTSF_UROWPCT_VALIDN:
2502 case CTSF_UCOLPCT_VALIDN:
2503 case CTSF_UTABLEPCT_VALIDN:
2504 case CTSF_USUBTABLEPCT_VALIDN:
2505 case CTSF_ULAYERPCT_VALIDN:
2506 case CTSF_ULAYERROWPCT_VALIDN:
2507 case CTSF_ULAYERCOLPCT_VALIDN:
2508 case CTSF_UROWPCT_TOTALN:
2509 case CTSF_UCOLPCT_TOTALN:
2510 case CTSF_UTABLEPCT_TOTALN:
2511 case CTSF_USUBTABLEPCT_TOTALN:
2512 case CTSF_ULAYERPCT_TOTALN:
2513 case CTSF_ULAYERROWPCT_TOTALN:
2514 case CTSF_ULAYERCOLPCT_TOTALN:
2523 case CTSF_SUBTABLE_ID:
2525 case CTSF_LAYERROW_ID:
2526 case CTSF_LAYERCOL_ID:
2539 case CTSF_ROWPCT_SUM:
2540 case CTSF_COLPCT_SUM:
2541 case CTSF_TABLEPCT_SUM:
2542 case CTSF_SUBTABLEPCT_SUM:
2543 case CTSF_LAYERPCT_SUM:
2544 case CTSF_LAYERROWPCT_SUM:
2545 case CTSF_LAYERCOLPCT_SUM:
2550 case CTSF_UVARIANCE:
2551 case CTSF_UROWPCT_SUM:
2552 case CTSF_UCOLPCT_SUM:
2553 case CTSF_UTABLEPCT_SUM:
2554 case CTSF_USUBTABLEPCT_SUM:
2555 case CTSF_ULAYERPCT_SUM:
2556 case CTSF_ULAYERROWPCT_SUM:
2557 case CTSF_ULAYERCOLPCT_SUM:
2558 moments1_destroy (s->moments);
2567 casewriter_destroy (s->writer);
2573 ctables_summary_add (union ctables_summary *s,
2574 const struct ctables_summary_spec *ss,
2575 const struct variable *var, const union value *value,
2576 bool is_scale, bool is_scale_missing,
2577 bool is_missing, bool excluded_missing,
2578 double d_weight, double e_weight)
2580 /* To determine whether a case is included in a given table for a particular
2581 kind of summary, consider the following charts for each variable in the
2582 table. Only if "yes" appears for every variable for the summary is the
2585 Categorical variables: VALIDN COUNT TOTALN
2586 Valid values in included categories yes yes yes
2587 Missing values in included categories --- yes yes
2588 Missing values in excluded categories --- --- yes
2589 Valid values in excluded categories --- --- ---
2591 Scale variables: VALIDN COUNT TOTALN
2592 Valid value yes yes yes
2593 Missing value --- yes yes
2595 Missing values include both user- and system-missing. (The system-missing
2596 value is always in an excluded category.)
2598 switch (ss->function)
2601 case CTSF_ROWPCT_TOTALN:
2602 case CTSF_COLPCT_TOTALN:
2603 case CTSF_TABLEPCT_TOTALN:
2604 case CTSF_SUBTABLEPCT_TOTALN:
2605 case CTSF_LAYERPCT_TOTALN:
2606 case CTSF_LAYERROWPCT_TOTALN:
2607 case CTSF_LAYERCOLPCT_TOTALN:
2608 s->count += d_weight;
2612 case CTSF_UROWPCT_TOTALN:
2613 case CTSF_UCOLPCT_TOTALN:
2614 case CTSF_UTABLEPCT_TOTALN:
2615 case CTSF_USUBTABLEPCT_TOTALN:
2616 case CTSF_ULAYERPCT_TOTALN:
2617 case CTSF_ULAYERROWPCT_TOTALN:
2618 case CTSF_ULAYERCOLPCT_TOTALN:
2623 case CTSF_ROWPCT_COUNT:
2624 case CTSF_COLPCT_COUNT:
2625 case CTSF_TABLEPCT_COUNT:
2626 case CTSF_SUBTABLEPCT_COUNT:
2627 case CTSF_LAYERPCT_COUNT:
2628 case CTSF_LAYERROWPCT_COUNT:
2629 case CTSF_LAYERCOLPCT_COUNT:
2630 if (is_scale || !excluded_missing)
2631 s->count += d_weight;
2635 case CTSF_UROWPCT_COUNT:
2636 case CTSF_UCOLPCT_COUNT:
2637 case CTSF_UTABLEPCT_COUNT:
2638 case CTSF_USUBTABLEPCT_COUNT:
2639 case CTSF_ULAYERPCT_COUNT:
2640 case CTSF_ULAYERROWPCT_COUNT:
2641 case CTSF_ULAYERCOLPCT_COUNT:
2642 if (is_scale || !excluded_missing)
2647 case CTSF_ROWPCT_VALIDN:
2648 case CTSF_COLPCT_VALIDN:
2649 case CTSF_TABLEPCT_VALIDN:
2650 case CTSF_SUBTABLEPCT_VALIDN:
2651 case CTSF_LAYERPCT_VALIDN:
2652 case CTSF_LAYERROWPCT_VALIDN:
2653 case CTSF_LAYERCOLPCT_VALIDN:
2657 s->count += d_weight;
2661 case CTSF_UROWPCT_VALIDN:
2662 case CTSF_UCOLPCT_VALIDN:
2663 case CTSF_UTABLEPCT_VALIDN:
2664 case CTSF_USUBTABLEPCT_VALIDN:
2665 case CTSF_ULAYERPCT_VALIDN:
2666 case CTSF_ULAYERROWPCT_VALIDN:
2667 case CTSF_ULAYERCOLPCT_VALIDN:
2677 case CTSF_SUBTABLE_ID:
2679 case CTSF_LAYERROW_ID:
2680 case CTSF_LAYERCOL_ID:
2687 s->count += d_weight;
2698 if (is_scale || !excluded_missing)
2699 s->count += e_weight;
2706 s->count += e_weight;
2710 s->count += e_weight;
2716 if (!is_scale_missing)
2718 assert (!var_is_alpha (var)); /* XXX? */
2719 if (s->min == SYSMIS || value->f < s->min)
2721 if (s->max == SYSMIS || value->f > s->max)
2731 case CTSF_ROWPCT_SUM:
2732 case CTSF_COLPCT_SUM:
2733 case CTSF_TABLEPCT_SUM:
2734 case CTSF_SUBTABLEPCT_SUM:
2735 case CTSF_LAYERPCT_SUM:
2736 case CTSF_LAYERROWPCT_SUM:
2737 case CTSF_LAYERCOLPCT_SUM:
2738 if (!is_scale_missing)
2739 moments1_add (s->moments, value->f, e_weight);
2746 case CTSF_UVARIANCE:
2747 case CTSF_UROWPCT_SUM:
2748 case CTSF_UCOLPCT_SUM:
2749 case CTSF_UTABLEPCT_SUM:
2750 case CTSF_USUBTABLEPCT_SUM:
2751 case CTSF_ULAYERPCT_SUM:
2752 case CTSF_ULAYERROWPCT_SUM:
2753 case CTSF_ULAYERCOLPCT_SUM:
2754 if (!is_scale_missing)
2755 moments1_add (s->moments, value->f, 1.0);
2761 d_weight = e_weight = 1.0;
2766 if (!is_scale_missing)
2768 s->ovalid += e_weight;
2770 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2771 *case_num_rw_idx (c, 0) = value->f;
2772 *case_num_rw_idx (c, 1) = e_weight;
2773 casewriter_write (s->writer, c);
2779 static enum ctables_domain_type
2780 ctables_function_domain (enum ctables_summary_function function)
2810 case CTSF_UVARIANCE:
2816 case CTSF_COLPCT_COUNT:
2817 case CTSF_COLPCT_SUM:
2818 case CTSF_COLPCT_TOTALN:
2819 case CTSF_COLPCT_VALIDN:
2820 case CTSF_UCOLPCT_COUNT:
2821 case CTSF_UCOLPCT_SUM:
2822 case CTSF_UCOLPCT_TOTALN:
2823 case CTSF_UCOLPCT_VALIDN:
2827 case CTSF_LAYERCOLPCT_COUNT:
2828 case CTSF_LAYERCOLPCT_SUM:
2829 case CTSF_LAYERCOLPCT_TOTALN:
2830 case CTSF_LAYERCOLPCT_VALIDN:
2831 case CTSF_ULAYERCOLPCT_COUNT:
2832 case CTSF_ULAYERCOLPCT_SUM:
2833 case CTSF_ULAYERCOLPCT_TOTALN:
2834 case CTSF_ULAYERCOLPCT_VALIDN:
2835 case CTSF_LAYERCOL_ID:
2836 return CTDT_LAYERCOL;
2838 case CTSF_LAYERPCT_COUNT:
2839 case CTSF_LAYERPCT_SUM:
2840 case CTSF_LAYERPCT_TOTALN:
2841 case CTSF_LAYERPCT_VALIDN:
2842 case CTSF_ULAYERPCT_COUNT:
2843 case CTSF_ULAYERPCT_SUM:
2844 case CTSF_ULAYERPCT_TOTALN:
2845 case CTSF_ULAYERPCT_VALIDN:
2849 case CTSF_LAYERROWPCT_COUNT:
2850 case CTSF_LAYERROWPCT_SUM:
2851 case CTSF_LAYERROWPCT_TOTALN:
2852 case CTSF_LAYERROWPCT_VALIDN:
2853 case CTSF_ULAYERROWPCT_COUNT:
2854 case CTSF_ULAYERROWPCT_SUM:
2855 case CTSF_ULAYERROWPCT_TOTALN:
2856 case CTSF_ULAYERROWPCT_VALIDN:
2857 case CTSF_LAYERROW_ID:
2858 return CTDT_LAYERROW;
2860 case CTSF_ROWPCT_COUNT:
2861 case CTSF_ROWPCT_SUM:
2862 case CTSF_ROWPCT_TOTALN:
2863 case CTSF_ROWPCT_VALIDN:
2864 case CTSF_UROWPCT_COUNT:
2865 case CTSF_UROWPCT_SUM:
2866 case CTSF_UROWPCT_TOTALN:
2867 case CTSF_UROWPCT_VALIDN:
2871 case CTSF_SUBTABLEPCT_COUNT:
2872 case CTSF_SUBTABLEPCT_SUM:
2873 case CTSF_SUBTABLEPCT_TOTALN:
2874 case CTSF_SUBTABLEPCT_VALIDN:
2875 case CTSF_USUBTABLEPCT_COUNT:
2876 case CTSF_USUBTABLEPCT_SUM:
2877 case CTSF_USUBTABLEPCT_TOTALN:
2878 case CTSF_USUBTABLEPCT_VALIDN:
2879 case CTSF_SUBTABLE_ID:
2880 return CTDT_SUBTABLE;
2882 case CTSF_TABLEPCT_COUNT:
2883 case CTSF_TABLEPCT_SUM:
2884 case CTSF_TABLEPCT_TOTALN:
2885 case CTSF_TABLEPCT_VALIDN:
2886 case CTSF_UTABLEPCT_COUNT:
2887 case CTSF_UTABLEPCT_SUM:
2888 case CTSF_UTABLEPCT_TOTALN:
2889 case CTSF_UTABLEPCT_VALIDN:
2897 static enum ctables_domain_type
2898 ctables_function_is_pctsum (enum ctables_summary_function function)
2928 case CTSF_UVARIANCE:
2932 case CTSF_COLPCT_COUNT:
2933 case CTSF_COLPCT_TOTALN:
2934 case CTSF_COLPCT_VALIDN:
2935 case CTSF_UCOLPCT_COUNT:
2936 case CTSF_UCOLPCT_TOTALN:
2937 case CTSF_UCOLPCT_VALIDN:
2938 case CTSF_LAYERCOLPCT_COUNT:
2939 case CTSF_LAYERCOLPCT_TOTALN:
2940 case CTSF_LAYERCOLPCT_VALIDN:
2941 case CTSF_ULAYERCOLPCT_COUNT:
2942 case CTSF_ULAYERCOLPCT_TOTALN:
2943 case CTSF_ULAYERCOLPCT_VALIDN:
2944 case CTSF_LAYERPCT_COUNT:
2945 case CTSF_LAYERPCT_TOTALN:
2946 case CTSF_LAYERPCT_VALIDN:
2947 case CTSF_ULAYERPCT_COUNT:
2948 case CTSF_ULAYERPCT_TOTALN:
2949 case CTSF_ULAYERPCT_VALIDN:
2950 case CTSF_LAYERROWPCT_COUNT:
2951 case CTSF_LAYERROWPCT_TOTALN:
2952 case CTSF_LAYERROWPCT_VALIDN:
2953 case CTSF_ULAYERROWPCT_COUNT:
2954 case CTSF_ULAYERROWPCT_TOTALN:
2955 case CTSF_ULAYERROWPCT_VALIDN:
2956 case CTSF_ROWPCT_COUNT:
2957 case CTSF_ROWPCT_TOTALN:
2958 case CTSF_ROWPCT_VALIDN:
2959 case CTSF_UROWPCT_COUNT:
2960 case CTSF_UROWPCT_TOTALN:
2961 case CTSF_UROWPCT_VALIDN:
2962 case CTSF_SUBTABLEPCT_COUNT:
2963 case CTSF_SUBTABLEPCT_TOTALN:
2964 case CTSF_SUBTABLEPCT_VALIDN:
2965 case CTSF_USUBTABLEPCT_COUNT:
2966 case CTSF_USUBTABLEPCT_TOTALN:
2967 case CTSF_USUBTABLEPCT_VALIDN:
2968 case CTSF_TABLEPCT_COUNT:
2969 case CTSF_TABLEPCT_TOTALN:
2970 case CTSF_TABLEPCT_VALIDN:
2971 case CTSF_UTABLEPCT_COUNT:
2972 case CTSF_UTABLEPCT_TOTALN:
2973 case CTSF_UTABLEPCT_VALIDN:
2977 case CTSF_SUBTABLE_ID:
2979 case CTSF_LAYERROW_ID:
2980 case CTSF_LAYERCOL_ID:
2983 case CTSF_COLPCT_SUM:
2984 case CTSF_UCOLPCT_SUM:
2985 case CTSF_LAYERCOLPCT_SUM:
2986 case CTSF_ULAYERCOLPCT_SUM:
2987 case CTSF_LAYERPCT_SUM:
2988 case CTSF_ULAYERPCT_SUM:
2989 case CTSF_LAYERROWPCT_SUM:
2990 case CTSF_ULAYERROWPCT_SUM:
2991 case CTSF_ROWPCT_SUM:
2992 case CTSF_UROWPCT_SUM:
2993 case CTSF_SUBTABLEPCT_SUM:
2994 case CTSF_USUBTABLEPCT_SUM:
2995 case CTSF_TABLEPCT_SUM:
2996 case CTSF_UTABLEPCT_SUM:
3004 ctables_summary_value (const struct ctables_cell *cell,
3005 union ctables_summary *s,
3006 const struct ctables_summary_spec *ss)
3008 switch (ss->function)
3018 case CTSF_SUBTABLE_ID:
3020 case CTSF_LAYERROW_ID:
3021 case CTSF_LAYERCOL_ID:
3022 return cell->domains[ctables_function_domain (ss->function)]->sequence;
3024 case CTSF_ROWPCT_COUNT:
3025 case CTSF_COLPCT_COUNT:
3026 case CTSF_TABLEPCT_COUNT:
3027 case CTSF_SUBTABLEPCT_COUNT:
3028 case CTSF_LAYERPCT_COUNT:
3029 case CTSF_LAYERROWPCT_COUNT:
3030 case CTSF_LAYERCOLPCT_COUNT:
3032 enum ctables_domain_type d = ctables_function_domain (ss->function);
3033 return (cell->domains[d]->e_count
3034 ? s->count / cell->domains[d]->e_count * 100
3038 case CTSF_UROWPCT_COUNT:
3039 case CTSF_UCOLPCT_COUNT:
3040 case CTSF_UTABLEPCT_COUNT:
3041 case CTSF_USUBTABLEPCT_COUNT:
3042 case CTSF_ULAYERPCT_COUNT:
3043 case CTSF_ULAYERROWPCT_COUNT:
3044 case CTSF_ULAYERCOLPCT_COUNT:
3046 enum ctables_domain_type d = ctables_function_domain (ss->function);
3047 return (cell->domains[d]->u_count
3048 ? s->count / cell->domains[d]->u_count * 100
3052 case CTSF_ROWPCT_VALIDN:
3053 case CTSF_COLPCT_VALIDN:
3054 case CTSF_TABLEPCT_VALIDN:
3055 case CTSF_SUBTABLEPCT_VALIDN:
3056 case CTSF_LAYERPCT_VALIDN:
3057 case CTSF_LAYERROWPCT_VALIDN:
3058 case CTSF_LAYERCOLPCT_VALIDN:
3060 enum ctables_domain_type d = ctables_function_domain (ss->function);
3061 return (cell->domains[d]->e_valid
3062 ? s->count / cell->domains[d]->e_valid * 100
3066 case CTSF_UROWPCT_VALIDN:
3067 case CTSF_UCOLPCT_VALIDN:
3068 case CTSF_UTABLEPCT_VALIDN:
3069 case CTSF_USUBTABLEPCT_VALIDN:
3070 case CTSF_ULAYERPCT_VALIDN:
3071 case CTSF_ULAYERROWPCT_VALIDN:
3072 case CTSF_ULAYERCOLPCT_VALIDN:
3074 enum ctables_domain_type d = ctables_function_domain (ss->function);
3075 return (cell->domains[d]->u_valid
3076 ? s->count / cell->domains[d]->u_valid * 100
3080 case CTSF_ROWPCT_TOTALN:
3081 case CTSF_COLPCT_TOTALN:
3082 case CTSF_TABLEPCT_TOTALN:
3083 case CTSF_SUBTABLEPCT_TOTALN:
3084 case CTSF_LAYERPCT_TOTALN:
3085 case CTSF_LAYERROWPCT_TOTALN:
3086 case CTSF_LAYERCOLPCT_TOTALN:
3088 enum ctables_domain_type d = ctables_function_domain (ss->function);
3089 return (cell->domains[d]->e_total
3090 ? s->count / cell->domains[d]->e_total * 100
3094 case CTSF_UROWPCT_TOTALN:
3095 case CTSF_UCOLPCT_TOTALN:
3096 case CTSF_UTABLEPCT_TOTALN:
3097 case CTSF_USUBTABLEPCT_TOTALN:
3098 case CTSF_ULAYERPCT_TOTALN:
3099 case CTSF_ULAYERROWPCT_TOTALN:
3100 case CTSF_ULAYERCOLPCT_TOTALN:
3102 enum ctables_domain_type d = ctables_function_domain (ss->function);
3103 return (cell->domains[d]->u_total
3104 ? s->count / cell->domains[d]->u_total * 100
3125 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3131 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3138 double weight, variance;
3139 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3140 return calc_semean (variance, weight);
3147 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3148 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3154 double weight, mean;
3155 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3156 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3160 case CTSF_UVARIANCE:
3163 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3167 case CTSF_ROWPCT_SUM:
3168 case CTSF_COLPCT_SUM:
3169 case CTSF_TABLEPCT_SUM:
3170 case CTSF_SUBTABLEPCT_SUM:
3171 case CTSF_LAYERPCT_SUM:
3172 case CTSF_LAYERROWPCT_SUM:
3173 case CTSF_LAYERCOLPCT_SUM:
3175 double weight, mean;
3176 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3177 if (weight == SYSMIS || mean == SYSMIS)
3179 enum ctables_domain_type d = ctables_function_domain (ss->function);
3180 double num = weight * mean;
3181 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3182 return denom != 0 ? num / denom * 100 : SYSMIS;
3184 case CTSF_UROWPCT_SUM:
3185 case CTSF_UCOLPCT_SUM:
3186 case CTSF_UTABLEPCT_SUM:
3187 case CTSF_USUBTABLEPCT_SUM:
3188 case CTSF_ULAYERPCT_SUM:
3189 case CTSF_ULAYERROWPCT_SUM:
3190 case CTSF_ULAYERCOLPCT_SUM:
3192 double weight, mean;
3193 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3194 if (weight == SYSMIS || mean == SYSMIS)
3196 enum ctables_domain_type d = ctables_function_domain (ss->function);
3197 double num = weight * mean;
3198 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3199 return denom != 0 ? num / denom * 100 : SYSMIS;
3208 struct casereader *reader = casewriter_make_reader (s->writer);
3211 struct percentile *ptile = percentile_create (
3212 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3213 struct order_stats *os = &ptile->parent;
3214 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3215 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3216 statistic_destroy (&ptile->parent.parent);
3224 struct casereader *reader = casewriter_make_reader (s->writer);
3227 struct mode *mode = mode_create ();
3228 struct order_stats *os = &mode->parent;
3229 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3230 s->ovalue = mode->mode;
3231 statistic_destroy (&mode->parent.parent);
3239 struct ctables_cell_sort_aux
3241 const struct ctables_nest *nest;
3242 enum pivot_axis_type a;
3246 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3248 const struct ctables_cell_sort_aux *aux = aux_;
3249 struct ctables_cell *const *ap = a_;
3250 struct ctables_cell *const *bp = b_;
3251 const struct ctables_cell *a = *ap;
3252 const struct ctables_cell *b = *bp;
3254 const struct ctables_nest *nest = aux->nest;
3255 for (size_t i = 0; i < nest->n; i++)
3256 if (i != nest->scale_idx)
3258 const struct variable *var = nest->vars[i];
3259 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3260 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3261 if (a_cv->category != b_cv->category)
3262 return a_cv->category > b_cv->category ? 1 : -1;
3264 const union value *a_val = &a_cv->value;
3265 const union value *b_val = &b_cv->value;
3266 switch (a_cv->category->type)
3272 case CCT_POSTCOMPUTE:
3273 case CCT_EXCLUDED_MISSING:
3274 /* Must be equal. */
3282 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3290 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3292 return a_cv->category->sort_ascending ? cmp : -cmp;
3298 const char *a_label = var_lookup_value_label (var, a_val);
3299 const char *b_label = var_lookup_value_label (var, b_val);
3305 cmp = strcmp (a_label, b_label);
3311 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3314 return a_cv->category->sort_ascending ? cmp : -cmp;
3326 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3327 const void *aux UNUSED)
3329 struct ctables_cell *const *ap = a_;
3330 struct ctables_cell *const *bp = b_;
3331 const struct ctables_cell *a = *ap;
3332 const struct ctables_cell *b = *bp;
3334 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3336 int al = a->axes[axis].leaf;
3337 int bl = b->axes[axis].leaf;
3339 return al > bl ? 1 : -1;
3347 For each ctables_table:
3348 For each combination of row vars:
3349 For each combination of column vars:
3350 For each combination of layer vars:
3352 Make a table of row values:
3353 Sort entries by row values
3354 Assign a 0-based index to each actual value
3355 Construct a dimension
3356 Make a table of column values
3357 Make a table of layer values
3359 Fill the table entry using the indexes from before.
3362 static struct ctables_domain *
3363 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3364 enum ctables_domain_type domain)
3367 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3369 const struct ctables_nest *nest = s->nests[a];
3370 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3372 size_t v_idx = nest->domains[domain][i];
3373 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3374 hash = hash_pointer (cv->category, hash);
3375 if (cv->category->type != CCT_TOTAL
3376 && cv->category->type != CCT_SUBTOTAL
3377 && cv->category->type != CCT_POSTCOMPUTE)
3378 hash = value_hash (&cv->value,
3379 var_get_width (nest->vars[v_idx]), hash);
3383 struct ctables_domain *d;
3384 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3386 const struct ctables_cell *df = d->example;
3387 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3389 const struct ctables_nest *nest = s->nests[a];
3390 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3392 size_t v_idx = nest->domains[domain][i];
3393 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3394 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3395 if (cv1->category != cv2->category
3396 || (cv1->category->type != CCT_TOTAL
3397 && cv1->category->type != CCT_SUBTOTAL
3398 && cv1->category->type != CCT_POSTCOMPUTE
3399 && !value_equal (&cv1->value, &cv2->value,
3400 var_get_width (nest->vars[v_idx]))))
3409 struct ctables_sum *sums = (s->table->n_sum_vars
3410 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3413 d = xmalloc (sizeof *d);
3414 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3415 hmap_insert (&s->domains[domain], &d->node, hash);
3419 static struct substring
3420 rtrim_value (const union value *v, const struct variable *var)
3422 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3423 var_get_width (var));
3424 ss_rtrim (&s, ss_cstr (" "));
3429 in_string_range (const union value *v, const struct variable *var,
3430 const struct substring *srange)
3432 struct substring s = rtrim_value (v, var);
3433 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3434 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3437 static const struct ctables_category *
3438 ctables_categories_match (const struct ctables_categories *c,
3439 const union value *v, const struct variable *var)
3441 if (var_is_numeric (var) && v->f == SYSMIS)
3444 const struct ctables_category *othernm = NULL;
3445 for (size_t i = c->n_cats; i-- > 0; )
3447 const struct ctables_category *cat = &c->cats[i];
3451 if (cat->number == v->f)
3456 if (ss_equals (cat->string, rtrim_value (v, var)))
3461 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3462 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3467 if (in_string_range (v, var, cat->srange))
3472 if (var_is_value_missing (var, v))
3476 case CCT_POSTCOMPUTE:
3491 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3494 case CCT_EXCLUDED_MISSING:
3499 return var_is_value_missing (var, v) ? NULL : othernm;
3502 static const struct ctables_category *
3503 ctables_categories_total (const struct ctables_categories *c)
3505 const struct ctables_category *first = &c->cats[0];
3506 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3507 return (first->type == CCT_TOTAL ? first
3508 : last->type == CCT_TOTAL ? last
3512 static struct ctables_cell *
3513 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3514 const struct ctables_category *cats[PIVOT_N_AXES][10])
3517 enum ctables_summary_variant sv = CSV_CELL;
3518 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3520 const struct ctables_nest *nest = s->nests[a];
3521 for (size_t i = 0; i < nest->n; i++)
3522 if (i != nest->scale_idx)
3524 hash = hash_pointer (cats[a][i], hash);
3525 if (cats[a][i]->type != CCT_TOTAL
3526 && cats[a][i]->type != CCT_SUBTOTAL
3527 && cats[a][i]->type != CCT_POSTCOMPUTE)
3528 hash = value_hash (case_data (c, nest->vars[i]),
3529 var_get_width (nest->vars[i]), hash);
3535 struct ctables_cell *cell;
3536 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3538 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3540 const struct ctables_nest *nest = s->nests[a];
3541 for (size_t i = 0; i < nest->n; i++)
3542 if (i != nest->scale_idx
3543 && (cats[a][i] != cell->axes[a].cvs[i].category
3544 || (cats[a][i]->type != CCT_TOTAL
3545 && cats[a][i]->type != CCT_SUBTOTAL
3546 && cats[a][i]->type != CCT_POSTCOMPUTE
3547 && !value_equal (case_data (c, nest->vars[i]),
3548 &cell->axes[a].cvs[i].value,
3549 var_get_width (nest->vars[i])))))
3558 cell = xmalloc (sizeof *cell);
3561 cell->omit_domains = 0;
3562 cell->postcompute = false;
3563 //struct string name = DS_EMPTY_INITIALIZER;
3564 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3566 const struct ctables_nest *nest = s->nests[a];
3567 cell->axes[a].cvs = (nest->n
3568 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3570 for (size_t i = 0; i < nest->n; i++)
3572 const struct ctables_category *cat = cats[a][i];
3573 const struct variable *var = nest->vars[i];
3574 const union value *value = case_data (c, var);
3575 if (i != nest->scale_idx)
3577 const struct ctables_category *subtotal = cat->subtotal;
3578 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3581 if (cat->type == CCT_TOTAL
3582 || cat->type == CCT_SUBTOTAL
3583 || cat->type == CCT_POSTCOMPUTE)
3585 /* XXX these should be more encompassing I think.*/
3589 case PIVOT_AXIS_COLUMN:
3590 cell->omit_domains |= ((1u << CTDT_TABLE) |
3591 (1u << CTDT_LAYER) |
3592 (1u << CTDT_LAYERCOL) |
3593 (1u << CTDT_SUBTABLE) |
3596 case PIVOT_AXIS_ROW:
3597 cell->omit_domains |= ((1u << CTDT_TABLE) |
3598 (1u << CTDT_LAYER) |
3599 (1u << CTDT_LAYERROW) |
3600 (1u << CTDT_SUBTABLE) |
3603 case PIVOT_AXIS_LAYER:
3604 cell->omit_domains |= ((1u << CTDT_TABLE) |
3605 (1u << CTDT_LAYER));
3609 if (cat->type == CCT_POSTCOMPUTE)
3610 cell->postcompute = true;
3613 cell->axes[a].cvs[i].category = cat;
3614 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3617 if (i != nest->scale_idx)
3619 if (!ds_is_empty (&name))
3620 ds_put_cstr (&name, ", ");
3621 char *value_s = data_out (value, var_get_encoding (var),
3622 var_get_print_format (var),
3623 settings_get_fmt_settings ());
3624 if (cat->type == CCT_TOTAL
3625 || cat->type == CCT_SUBTOTAL
3626 || cat->type == CCT_POSTCOMPUTE)
3627 ds_put_format (&name, "%s=total", var_get_name (var));
3629 ds_put_format (&name, "%s=%s", var_get_name (var),
3630 value_s + strspn (value_s, " "));
3636 //cell->name = ds_steal_cstr (&name);
3638 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3639 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3640 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3641 for (size_t i = 0; i < specs->n; i++)
3642 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3643 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3644 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3645 hmap_insert (&s->cells, &cell->node, hash);
3650 is_scale_missing (const struct ctables_summary_spec_set *specs,
3651 const struct ccase *c)
3653 if (!specs->is_scale)
3656 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3659 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3661 const struct variable *var = specs->listwise_vars[i];
3662 if (var_is_num_missing (var, case_num (c, var)))
3670 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3671 const struct ctables_category *cats[PIVOT_N_AXES][10],
3672 bool is_missing, bool excluded_missing,
3673 double d_weight, double e_weight)
3675 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3676 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3678 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3680 bool scale_missing = is_scale_missing (specs, c);
3681 for (size_t i = 0; i < specs->n; i++)
3682 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3683 specs->var, case_data (c, specs->var), specs->is_scale,
3684 scale_missing, is_missing, excluded_missing,
3685 d_weight, e_weight);
3686 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3687 if (!(cell->omit_domains && (1u << dt)))
3689 struct ctables_domain *d = cell->domains[dt];
3690 d->d_total += d_weight;
3691 d->e_total += e_weight;
3693 if (!excluded_missing)
3695 d->d_count += d_weight;
3696 d->e_count += e_weight;
3701 d->d_valid += d_weight;
3702 d->e_valid += e_weight;
3705 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3707 /* XXX listwise_missing??? */
3708 const struct variable *var = s->table->sum_vars[i];
3709 double addend = case_num (c, var);
3710 if (!var_is_num_missing (var, addend))
3712 struct ctables_sum *sum = &d->sums[i];
3713 sum->e_sum += addend * e_weight;
3714 sum->u_sum += addend;
3722 recurse_totals (struct ctables_section *s, const struct ccase *c,
3723 const struct ctables_category *cats[PIVOT_N_AXES][10],
3724 bool is_missing, bool excluded_missing,
3725 double d_weight, double e_weight,
3726 enum pivot_axis_type start_axis, size_t start_nest)
3728 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3730 const struct ctables_nest *nest = s->nests[a];
3731 for (size_t i = start_nest; i < nest->n; i++)
3733 if (i == nest->scale_idx)
3736 const struct variable *var = nest->vars[i];
3738 const struct ctables_category *total = ctables_categories_total (
3739 s->table->categories[var_get_dict_index (var)]);
3742 const struct ctables_category *save = cats[a][i];
3744 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3745 d_weight, e_weight);
3746 recurse_totals (s, c, cats, is_missing, excluded_missing,
3747 d_weight, e_weight, a, i + 1);
3756 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3757 const struct ctables_category *cats[PIVOT_N_AXES][10],
3758 bool is_missing, bool excluded_missing,
3759 double d_weight, double e_weight,
3760 enum pivot_axis_type start_axis, size_t start_nest)
3762 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3764 const struct ctables_nest *nest = s->nests[a];
3765 for (size_t i = start_nest; i < nest->n; i++)
3767 if (i == nest->scale_idx)
3770 const struct ctables_category *save = cats[a][i];
3773 cats[a][i] = save->subtotal;
3774 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3775 d_weight, e_weight);
3776 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3777 d_weight, e_weight, a, i + 1);
3786 ctables_add_occurrence (const struct variable *var,
3787 const union value *value,
3788 struct hmap *occurrences)
3790 int width = var_get_width (var);
3791 unsigned int hash = value_hash (value, width, 0);
3793 struct ctables_occurrence *o;
3794 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3796 if (value_equal (value, &o->value, width))
3799 o = xmalloc (sizeof *o);
3800 value_clone (&o->value, value, width);
3801 hmap_insert (occurrences, &o->node, hash);
3805 ctables_cell_insert (struct ctables_section *s,
3806 const struct ccase *c,
3807 double d_weight, double e_weight)
3809 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3811 /* Does at least one categorical variable have a missing value in an included
3812 or excluded category? */
3813 bool is_missing = false;
3815 /* Does at least one categorical variable have a missing value in an excluded
3817 bool excluded_missing = false;
3819 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3821 const struct ctables_nest *nest = s->nests[a];
3822 for (size_t i = 0; i < nest->n; i++)
3824 if (i == nest->scale_idx)
3827 const struct variable *var = nest->vars[i];
3828 const union value *value = case_data (c, var);
3830 bool var_missing = var_is_value_missing (var, value) != 0;
3834 cats[a][i] = ctables_categories_match (
3835 s->table->categories[var_get_dict_index (var)], value, var);
3841 static const struct ctables_category cct_excluded_missing = {
3842 .type = CCT_EXCLUDED_MISSING,
3845 cats[a][i] = &cct_excluded_missing;
3846 excluded_missing = true;
3851 if (!excluded_missing)
3852 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3854 const struct ctables_nest *nest = s->nests[a];
3855 for (size_t i = 0; i < nest->n; i++)
3856 if (i != nest->scale_idx)
3858 const struct variable *var = nest->vars[i];
3859 const union value *value = case_data (c, var);
3860 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3864 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3865 d_weight, e_weight);
3867 //if (!excluded_missing)
3869 recurse_totals (s, c, cats, is_missing, excluded_missing,
3870 d_weight, e_weight, 0, 0);
3871 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3872 d_weight, e_weight, 0, 0);
3878 const struct ctables_summary_spec_set *set;
3883 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3885 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3886 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3887 if (as->function != bs->function)
3888 return as->function > bs->function ? 1 : -1;
3889 else if (as->percentile != bs->percentile)
3890 return as->percentile < bs->percentile ? 1 : -1;
3892 const char *as_label = as->label ? as->label : "";
3893 const char *bs_label = bs->label ? bs->label : "";
3894 return strcmp (as_label, bs_label);
3898 ctables_category_format_number (double number, const struct variable *var,
3901 struct pivot_value *pv = pivot_value_new_var_value (
3902 var, &(union value) { .f = number });
3903 pivot_value_format (pv, NULL, s);
3904 pivot_value_destroy (pv);
3908 ctables_category_format_string (struct substring string,
3909 const struct variable *var, struct string *out)
3911 int width = var_get_width (var);
3912 char *s = xmalloc (width);
3913 buf_copy_rpad (s, width, string.string, string.length, ' ');
3914 struct pivot_value *pv = pivot_value_new_var_value (
3915 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3916 pivot_value_format (pv, NULL, out);
3917 pivot_value_destroy (pv);
3922 ctables_category_format_label (const struct ctables_category *cat,
3923 const struct variable *var,
3929 ctables_category_format_number (cat->number, var, s);
3933 ctables_category_format_string (cat->string, var, s);
3937 ctables_category_format_number (cat->nrange[0], var, s);
3938 ds_put_format (s, " THRU ");
3939 ctables_category_format_number (cat->nrange[1], var, s);
3943 ctables_category_format_string (cat->srange[0], var, s);
3944 ds_put_format (s, " THRU ");
3945 ctables_category_format_string (cat->srange[1], var, s);
3949 ds_put_cstr (s, "MISSING");
3953 ds_put_cstr (s, "OTHERNM");
3956 case CCT_POSTCOMPUTE:
3957 ds_put_format (s, "&%s", cat->pc->name);
3962 ds_put_cstr (s, cat->total_label);
3968 case CCT_EXCLUDED_MISSING:
3975 static struct pivot_value *
3976 ctables_postcompute_label (const struct ctables_categories *cats,
3977 const struct ctables_category *cat,
3978 const struct variable *var)
3980 struct substring in = ss_cstr (cat->pc->label);
3981 struct substring target = ss_cstr (")LABEL[");
3983 struct string out = DS_EMPTY_INITIALIZER;
3986 size_t chunk = ss_find_substring (in, target);
3987 if (chunk == SIZE_MAX)
3989 if (ds_is_empty (&out))
3990 return pivot_value_new_user_text (in.string, in.length);
3993 ds_put_substring (&out, in);
3994 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3998 ds_put_substring (&out, ss_head (in, chunk));
3999 ss_advance (&in, chunk + target.length);
4001 struct substring idx_s;
4002 if (!ss_get_until (&in, ']', &idx_s))
4005 long int idx = strtol (idx_s.string, &tail, 10);
4006 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4009 struct ctables_category *cat2 = &cats->cats[idx - 1];
4010 if (!ctables_category_format_label (cat2, var, &out))
4016 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4019 static struct pivot_value *
4020 ctables_category_create_value_label (const struct ctables_categories *cats,
4021 const struct ctables_category *cat,
4022 const struct variable *var,
4023 const union value *value)
4025 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4026 ? ctables_postcompute_label (cats, cat, var)
4027 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4028 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4029 : pivot_value_new_var_value (var, value));
4032 static struct ctables_value *
4033 ctables_value_find__ (struct ctables_table *t, const union value *value,
4034 int width, unsigned int hash)
4036 struct ctables_value *clv;
4037 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4038 hash, &t->clabels_values_map)
4039 if (value_equal (value, &clv->value, width))
4045 ctables_value_insert (struct ctables_table *t, const union value *value,
4048 unsigned int hash = value_hash (value, width, 0);
4049 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4052 clv = xmalloc (sizeof *clv);
4053 value_clone (&clv->value, value, width);
4054 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4058 static struct ctables_value *
4059 ctables_value_find (struct ctables_table *t,
4060 const union value *value, int width)
4062 return ctables_value_find__ (t, value, width,
4063 value_hash (value, width, 0));
4067 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4068 size_t ix[PIVOT_N_AXES])
4070 if (a < PIVOT_N_AXES)
4072 size_t limit = MAX (t->stacks[a].n, 1);
4073 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4074 ctables_table_add_section (t, a + 1, ix);
4078 struct ctables_section *s = &t->sections[t->n_sections++];
4079 *s = (struct ctables_section) {
4081 .cells = HMAP_INITIALIZER (s->cells),
4083 for (a = 0; a < PIVOT_N_AXES; a++)
4086 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4088 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4089 for (size_t i = 0; i < nest->n; i++)
4090 hmap_init (&s->occurrences[a][i]);
4092 for (size_t i = 0; i < N_CTDTS; i++)
4093 hmap_init (&s->domains[i]);
4098 ctpo_add (double a, double b)
4104 ctpo_sub (double a, double b)
4110 ctpo_mul (double a, double b)
4116 ctpo_div (double a, double b)
4118 return b ? a / b : SYSMIS;
4122 ctpo_pow (double a, double b)
4124 int save_errno = errno;
4126 double result = pow (a, b);
4134 ctpo_neg (double a, double b UNUSED)
4139 struct ctables_pcexpr_evaluate_ctx
4141 const struct ctables_cell *cell;
4142 const struct ctables_section *section;
4143 const struct ctables_categories *cats;
4144 enum pivot_axis_type pc_a;
4147 enum fmt_type parse_format;
4150 static double ctables_pcexpr_evaluate (
4151 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4154 ctables_pcexpr_evaluate_nonterminal (
4155 const struct ctables_pcexpr_evaluate_ctx *ctx,
4156 const struct ctables_pcexpr *e, size_t n_args,
4157 double evaluate (double, double))
4159 double args[2] = { 0, 0 };
4160 for (size_t i = 0; i < n_args; i++)
4162 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4163 if (!isfinite (args[i]) || args[i] == SYSMIS)
4166 return evaluate (args[0], args[1]);
4170 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4171 const struct ctables_cell_value *pc_cv)
4173 const struct ctables_section *s = ctx->section;
4176 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4178 const struct ctables_nest *nest = s->nests[a];
4179 for (size_t i = 0; i < nest->n; i++)
4180 if (i != nest->scale_idx)
4182 const struct ctables_cell_value *cv
4183 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4184 : &ctx->cell->axes[a].cvs[i]);
4185 hash = hash_pointer (cv->category, hash);
4186 if (cv->category->type != CCT_TOTAL
4187 && cv->category->type != CCT_SUBTOTAL
4188 && cv->category->type != CCT_POSTCOMPUTE)
4189 hash = value_hash (&cv->value,
4190 var_get_width (nest->vars[i]), hash);
4194 struct ctables_cell *tc;
4195 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4197 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4199 const struct ctables_nest *nest = s->nests[a];
4200 for (size_t i = 0; i < nest->n; i++)
4201 if (i != nest->scale_idx)
4203 const struct ctables_cell_value *p_cv
4204 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4205 : &ctx->cell->axes[a].cvs[i]);
4206 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4207 if (p_cv->category != t_cv->category
4208 || (p_cv->category->type != CCT_TOTAL
4209 && p_cv->category->type != CCT_SUBTOTAL
4210 && p_cv->category->type != CCT_POSTCOMPUTE
4211 && !value_equal (&p_cv->value,
4213 var_get_width (nest->vars[i]))))
4225 const struct ctables_table *t = s->table;
4226 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4227 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4228 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4229 &specs->specs[ctx->summary_idx]);
4233 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4234 const struct ctables_pcexpr *e)
4241 case CTPO_CAT_NRANGE:
4242 case CTPO_CAT_SRANGE:
4243 case CTPO_CAT_MISSING:
4244 case CTPO_CAT_OTHERNM:
4246 struct ctables_cell_value cv = {
4247 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4249 assert (cv.category != NULL);
4251 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4252 const struct ctables_occurrence *o;
4255 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4256 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4257 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4259 cv.value = o->value;
4260 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4265 case CTPO_CAT_NUMBER:
4266 case CTPO_CAT_SUBTOTAL:
4267 case CTPO_CAT_TOTAL:
4269 struct ctables_cell_value cv = {
4270 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4271 .value = { .f = e->number },
4273 assert (cv.category != NULL);
4274 return ctables_pcexpr_evaluate_category (ctx, &cv);
4277 case CTPO_CAT_STRING:
4279 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4281 if (width > e->string.length)
4283 s = xmalloc (width);
4284 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4287 const struct ctables_category *category
4288 = ctables_find_category_for_postcompute (
4289 ctx->section->table->ctables->dict,
4290 ctx->cats, ctx->parse_format, e);
4291 assert (category != NULL);
4293 struct ctables_cell_value cv = { .category = category };
4294 if (category->type == CCT_NUMBER)
4295 cv.value.f = category->number;
4296 else if (category->type == CCT_STRING)
4297 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
4301 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4307 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4310 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4313 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4316 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4319 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4322 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4328 static const struct ctables_category *
4329 ctables_cell_postcompute (const struct ctables_section *s,
4330 const struct ctables_cell *cell,
4331 enum pivot_axis_type *pc_a_p,
4334 assert (cell->postcompute);
4335 const struct ctables_category *pc_cat = NULL;
4336 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4337 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4339 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4340 if (cv->category->type == CCT_POSTCOMPUTE)
4344 /* Multiple postcomputes cross each other. The value is
4349 pc_cat = cv->category;
4353 *pc_a_idx_p = pc_a_idx;
4357 assert (pc_cat != NULL);
4362 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4363 const struct ctables_cell *cell,
4364 const struct ctables_summary_spec *ss,
4365 struct fmt_spec *format,
4366 bool *is_ctables_format,
4369 enum pivot_axis_type pc_a = 0;
4370 size_t pc_a_idx = 0;
4371 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4372 s, cell, &pc_a, &pc_a_idx);
4376 const struct ctables_postcompute *pc = pc_cat->pc;
4379 for (size_t i = 0; i < pc->specs->n; i++)
4381 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4382 if (ss->function == ss2->function
4383 && ss->percentile == ss2->percentile)
4385 *format = ss2->format;
4386 *is_ctables_format = ss2->is_ctables_format;
4392 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4393 const struct ctables_categories *cats = s->table->categories[
4394 var_get_dict_index (var)];
4395 struct ctables_pcexpr_evaluate_ctx ctx = {
4400 .pc_a_idx = pc_a_idx,
4401 .summary_idx = summary_idx,
4402 .parse_format = pc_cat->parse_format,
4404 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4408 ctables_format (double d, const struct fmt_spec *format,
4409 const struct fmt_settings *settings)
4411 const union value v = { .f = d };
4412 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4414 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4415 produce the results we want for negative numbers, putting the negative
4416 sign in the wrong spot, before the prefix instead of after it. We can't,
4417 in fact, produce the desired results using a custom-currency
4418 specification. Instead, we postprocess the output, moving the negative
4421 NEQUAL: "-N=3" => "N=-3"
4422 PAREN: "-(3)" => "(-3)"
4423 PCTPAREN: "-(3%)" => "(-3%)"
4425 This transformation doesn't affect NEGPAREN. */
4426 char *minus_src = strchr (s, '-');
4427 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4429 char *n_equals = strstr (s, "N=");
4430 char *lparen = strchr (s, '(');
4431 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4433 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4439 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4441 for (size_t i = 0; i < t->stacks[a].n; i++)
4443 struct ctables_nest *nest = &t->stacks[a].nests[i];
4444 if (nest->n != 1 || nest->scale_idx != 0)
4447 enum ctables_vlabel vlabel
4448 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4449 if (vlabel != CTVL_NONE)
4456 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4458 struct pivot_table *pt = pivot_table_create__ (
4460 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4461 : pivot_value_new_text (N_("Custom Tables"))),
4464 pivot_table_set_caption (
4465 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4467 pivot_table_set_corner_text (
4468 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4470 bool summary_dimension = (t->summary_axis != t->slabels_axis
4471 || (!t->slabels_visible
4472 && t->summary_specs.n > 1));
4473 if (summary_dimension)
4475 struct pivot_dimension *d = pivot_dimension_create (
4476 pt, t->slabels_axis, N_("Statistics"));
4477 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4478 if (!t->slabels_visible)
4479 d->hide_all_labels = true;
4480 for (size_t i = 0; i < specs->n; i++)
4481 pivot_category_create_leaf (
4482 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4485 bool categories_dimension = t->clabels_example != NULL;
4486 if (categories_dimension)
4488 struct pivot_dimension *d = pivot_dimension_create (
4489 pt, t->label_axis[t->clabels_from_axis],
4490 t->clabels_from_axis == PIVOT_AXIS_ROW
4491 ? N_("Row Categories")
4492 : N_("Column Categories"));
4493 const struct variable *var = t->clabels_example;
4494 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4495 for (size_t i = 0; i < t->n_clabels_values; i++)
4497 const struct ctables_value *value = t->clabels_values[i];
4498 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4499 assert (cat != NULL);
4500 pivot_category_create_leaf (
4501 d->root, ctables_category_create_value_label (c, cat,
4507 pivot_table_set_look (pt, ct->look);
4508 struct pivot_dimension *d[PIVOT_N_AXES];
4509 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4511 static const char *names[] = {
4512 [PIVOT_AXIS_ROW] = N_("Rows"),
4513 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4514 [PIVOT_AXIS_LAYER] = N_("Layers"),
4516 d[a] = (t->axes[a] || a == t->summary_axis
4517 ? pivot_dimension_create (pt, a, names[a])
4522 assert (t->axes[a]);
4524 for (size_t i = 0; i < t->stacks[a].n; i++)
4526 struct ctables_nest *nest = &t->stacks[a].nests[i];
4527 struct ctables_section **sections = xnmalloc (t->n_sections,
4529 size_t n_sections = 0;
4531 size_t n_total_cells = 0;
4532 size_t max_depth = 0;
4533 for (size_t j = 0; j < t->n_sections; j++)
4534 if (t->sections[j].nests[a] == nest)
4536 struct ctables_section *s = &t->sections[j];
4537 sections[n_sections++] = s;
4538 n_total_cells += hmap_count (&s->cells);
4540 size_t depth = s->nests[a]->n;
4541 max_depth = MAX (depth, max_depth);
4544 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4546 size_t n_sorted = 0;
4548 for (size_t j = 0; j < n_sections; j++)
4550 struct ctables_section *s = sections[j];
4552 struct ctables_cell *cell;
4553 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4555 sorted[n_sorted++] = cell;
4556 assert (n_sorted <= n_total_cells);
4559 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4560 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4563 if (a == PIVOT_AXIS_ROW)
4565 size_t ids[N_CTDTS];
4566 memset (ids, 0, sizeof ids);
4567 for (size_t j = 0; j < n_sorted; j++)
4569 struct ctables_cell *cell = sorted[j];
4570 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4572 struct ctables_domain *domain = cell->domains[dt];
4573 if (!domain->sequence)
4574 domain->sequence = ++ids[dt];
4581 for (size_t j = 0; j < n_sorted; j++)
4583 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4588 struct ctables_level
4590 enum ctables_level_type
4592 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4593 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4594 CTL_SUMMARY, /* Summary functions. */
4598 enum settings_value_show vlabel; /* CTL_VAR only. */
4601 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4602 size_t n_levels = 0;
4603 for (size_t k = 0; k < nest->n; k++)
4605 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4606 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4608 if (vlabel != CTVL_NONE)
4610 levels[n_levels++] = (struct ctables_level) {
4612 .vlabel = (enum settings_value_show) vlabel,
4617 if (nest->scale_idx != k
4618 && (k != nest->n - 1 || t->label_axis[a] == a))
4620 levels[n_levels++] = (struct ctables_level) {
4621 .type = CTL_CATEGORY,
4627 if (!summary_dimension && a == t->slabels_axis)
4629 levels[n_levels++] = (struct ctables_level) {
4630 .type = CTL_SUMMARY,
4631 .var_idx = SIZE_MAX,
4635 /* Pivot categories:
4637 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4638 - category for nest->vars[0], if nest->scale_idx != 0
4639 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4640 - category for nest->vars[1], if nest->scale_idx != 1
4642 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4643 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4644 - summary function, if 'a == t->slabels_axis && a ==
4647 Additional dimensions:
4649 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4651 - If 't->label_axis[b] == a' for some 'b != a', add a category
4656 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4658 for (size_t j = 0; j < n_sorted; j++)
4660 struct ctables_cell *cell = sorted[j];
4661 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4663 size_t n_common = 0;
4666 for (; n_common < n_levels; n_common++)
4668 const struct ctables_level *level = &levels[n_common];
4669 if (level->type == CTL_CATEGORY)
4671 size_t var_idx = level->var_idx;
4672 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4673 if (prev->axes[a].cvs[var_idx].category != c)
4675 else if (c->type != CCT_SUBTOTAL
4676 && c->type != CCT_TOTAL
4677 && c->type != CCT_POSTCOMPUTE
4678 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4679 &cell->axes[a].cvs[var_idx].value,
4680 var_get_type (nest->vars[var_idx])))
4686 for (size_t k = n_common; k < n_levels; k++)
4688 const struct ctables_level *level = &levels[k];
4689 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4690 if (level->type == CTL_SUMMARY)
4692 assert (k == n_levels - 1);
4694 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4695 for (size_t m = 0; m < specs->n; m++)
4697 int leaf = pivot_category_create_leaf (
4698 parent, ctables_summary_label (&specs->specs[m],
4706 const struct variable *var = nest->vars[level->var_idx];
4707 struct pivot_value *label;
4708 if (level->type == CTL_VAR)
4710 label = pivot_value_new_variable (var);
4711 label->variable.show = level->vlabel;
4713 else if (level->type == CTL_CATEGORY)
4715 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4716 label = ctables_category_create_value_label (
4717 t->categories[var_get_dict_index (var)],
4718 cv->category, var, &cv->value);
4723 if (k == n_levels - 1)
4724 prev_leaf = pivot_category_create_leaf (parent, label);
4726 groups[k] = pivot_category_create_group__ (parent, label);
4730 cell->axes[a].leaf = prev_leaf;
4739 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4743 size_t n_total_cells = 0;
4744 for (size_t j = 0; j < t->n_sections; j++)
4745 n_total_cells += hmap_count (&t->sections[j].cells);
4747 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4748 size_t n_sorted = 0;
4749 for (size_t j = 0; j < t->n_sections; j++)
4751 const struct ctables_section *s = &t->sections[j];
4752 struct ctables_cell *cell;
4753 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4755 sorted[n_sorted++] = cell;
4757 assert (n_sorted <= n_total_cells);
4758 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4760 size_t ids[N_CTDTS];
4761 memset (ids, 0, sizeof ids);
4762 for (size_t j = 0; j < n_sorted; j++)
4764 struct ctables_cell *cell = sorted[j];
4765 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4767 struct ctables_domain *domain = cell->domains[dt];
4768 if (!domain->sequence)
4769 domain->sequence = ++ids[dt];
4776 for (size_t i = 0; i < t->n_sections; i++)
4778 struct ctables_section *s = &t->sections[i];
4780 struct ctables_cell *cell;
4781 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4786 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4787 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4788 for (size_t j = 0; j < specs->n; j++)
4791 size_t n_dindexes = 0;
4793 if (summary_dimension)
4794 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4796 if (categories_dimension)
4798 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4799 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4800 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4801 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4804 dindexes[n_dindexes++] = ctv->leaf;
4807 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4810 int leaf = cell->axes[a].leaf;
4811 if (a == t->summary_axis && !summary_dimension)
4813 dindexes[n_dindexes++] = leaf;
4816 const struct ctables_summary_spec *ss = &specs->specs[j];
4818 struct fmt_spec format = specs->specs[j].format;
4819 bool is_ctables_format = ss->is_ctables_format;
4820 double d = (cell->postcompute
4821 ? ctables_cell_calculate_postcompute (
4822 s, cell, ss, &format, &is_ctables_format, j)
4823 : ctables_summary_value (cell, &cell->summaries[j],
4826 struct pivot_value *value;
4827 if (ct->hide_threshold != 0
4828 && d < ct->hide_threshold
4829 && ctables_summary_function_is_count (ss->function))
4831 value = pivot_value_new_user_text_nocopy (
4832 xasprintf ("<%d", ct->hide_threshold));
4834 else if (d == 0 && ct->zero)
4835 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4836 else if (d == SYSMIS && ct->missing)
4837 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4838 else if (is_ctables_format)
4839 value = pivot_value_new_user_text_nocopy (
4840 ctables_format (d, &format, &ct->ctables_formats));
4843 value = pivot_value_new_number (d);
4844 value->numeric.format = format;
4846 /* XXX should text values be right-justified? */
4847 pivot_table_put (pt, dindexes, n_dindexes, value);
4852 pivot_table_submit (pt);
4856 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4858 enum pivot_axis_type label_pos = t->label_axis[a];
4862 t->clabels_from_axis = a;
4864 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4865 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4867 const struct ctables_stack *stack = &t->stacks[a];
4871 const struct ctables_nest *n0 = &stack->nests[0];
4874 assert (stack->n == 1);
4878 const struct variable *v0 = n0->vars[n0->n - 1];
4879 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4880 t->clabels_example = v0;
4882 for (size_t i = 0; i < c0->n_cats; i++)
4883 if (c0->cats[i].type == CCT_FUNCTION)
4885 msg (SE, _("%s=%s is not allowed with sorting based "
4886 "on a summary function."),
4887 subcommand_name, pos_name);
4890 if (n0->n - 1 == n0->scale_idx)
4892 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4893 "but %s is a scale variable."),
4894 subcommand_name, pos_name, var_get_name (v0));
4898 for (size_t i = 1; i < stack->n; i++)
4900 const struct ctables_nest *ni = &stack->nests[i];
4902 const struct variable *vi = ni->vars[ni->n - 1];
4903 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4905 if (ni->n - 1 == ni->scale_idx)
4907 msg (SE, _("%s=%s requires the variables to be moved to be "
4908 "categorical, but %s is a scale variable."),
4909 subcommand_name, pos_name, var_get_name (vi));
4912 if (var_get_width (v0) != var_get_width (vi))
4914 msg (SE, _("%s=%s requires the variables to be "
4915 "moved to have the same width, but %s has "
4916 "width %d and %s has width %d."),
4917 subcommand_name, pos_name,
4918 var_get_name (v0), var_get_width (v0),
4919 var_get_name (vi), var_get_width (vi));
4922 if (!val_labs_equal (var_get_value_labels (v0),
4923 var_get_value_labels (vi)))
4925 msg (SE, _("%s=%s requires the variables to be "
4926 "moved to have the same value labels, but %s "
4927 "and %s have different value labels."),
4928 subcommand_name, pos_name,
4929 var_get_name (v0), var_get_name (vi));
4932 if (!ctables_categories_equal (c0, ci))
4934 msg (SE, _("%s=%s requires the variables to be "
4935 "moved to have the same category "
4936 "specifications, but %s and %s have different "
4937 "category specifications."),
4938 subcommand_name, pos_name,
4939 var_get_name (v0), var_get_name (vi));
4948 add_sum_var (struct variable *var,
4949 struct variable ***sum_vars, size_t *n, size_t *allocated)
4951 for (size_t i = 0; i < *n; i++)
4952 if (var == (*sum_vars)[i])
4955 if (*n >= *allocated)
4956 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4957 (*sum_vars)[*n] = var;
4962 enumerate_sum_vars (const struct ctables_axis *a,
4963 struct variable ***sum_vars, size_t *n, size_t *allocated)
4971 for (size_t i = 0; i < N_CSVS; i++)
4972 for (size_t j = 0; j < a->specs[i].n; j++)
4974 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4975 if (ctables_function_is_pctsum (spec->function))
4976 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4982 for (size_t i = 0; i < 2; i++)
4983 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4989 ctables_prepare_table (struct ctables_table *t)
4991 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4994 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4996 for (size_t j = 0; j < t->stacks[a].n; j++)
4998 struct ctables_nest *nest = &t->stacks[a].nests[j];
4999 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5001 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
5002 nest->n_domains[dt] = 0;
5004 for (size_t k = 0; k < nest->n; k++)
5006 if (k == nest->scale_idx)
5015 if (a != PIVOT_AXIS_LAYER)
5022 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
5023 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
5024 : a == PIVOT_AXIS_ROW)
5026 if (k == nest->n - 1
5027 || (nest->scale_idx == nest->n - 1
5028 && k == nest->n - 2))
5034 if (a == PIVOT_AXIS_COLUMN)
5039 if (a == PIVOT_AXIS_ROW)
5044 nest->domains[dt][nest->n_domains[dt]++] = k;
5051 struct ctables_nest *nest = xmalloc (sizeof *nest);
5052 *nest = (struct ctables_nest) { .n = 0 };
5053 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5055 /* There's no point in moving labels away from an axis that has no
5056 labels, so avoid dealing with the special cases around that. */
5057 t->label_axis[a] = a;
5060 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5061 for (size_t i = 0; i < stack->n; i++)
5063 struct ctables_nest *nest = &stack->nests[i];
5064 if (!nest->specs[CSV_CELL].n)
5066 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
5067 specs->specs = xmalloc (sizeof *specs->specs);
5070 enum ctables_summary_function function
5071 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
5073 *specs->specs = (struct ctables_summary_spec) {
5074 .function = function,
5075 .format = ctables_summary_default_format (function, specs->var),
5078 specs->var = nest->vars[0];
5080 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5081 &nest->specs[CSV_CELL]);
5083 else if (!nest->specs[CSV_TOTAL].n)
5084 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5085 &nest->specs[CSV_CELL]);
5087 if (t->ctables->smissing_listwise)
5089 struct variable **listwise_vars = NULL;
5091 size_t allocated = 0;
5093 for (size_t j = nest->group_head; j < stack->n; j++)
5095 const struct ctables_nest *other_nest = &stack->nests[j];
5096 if (other_nest->group_head != nest->group_head)
5099 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5102 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5103 sizeof *listwise_vars);
5104 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5107 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5110 listwise_vars = xmemdup (listwise_vars,
5111 n * sizeof *listwise_vars);
5112 nest->specs[sv].listwise_vars = listwise_vars;
5113 nest->specs[sv].n_listwise_vars = n;
5118 struct ctables_summary_spec_set *merged = &t->summary_specs;
5119 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5121 for (size_t j = 0; j < stack->n; j++)
5123 const struct ctables_nest *nest = &stack->nests[j];
5125 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5126 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5131 struct merge_item min = items[0];
5132 for (size_t j = 1; j < n_left; j++)
5133 if (merge_item_compare_3way (&items[j], &min) < 0)
5136 if (merged->n >= merged->allocated)
5137 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5138 sizeof *merged->specs);
5139 merged->specs[merged->n++] = min.set->specs[min.ofs];
5141 for (size_t j = 0; j < n_left; )
5143 if (merge_item_compare_3way (&items[j], &min) == 0)
5145 struct merge_item *item = &items[j];
5146 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5147 if (++item->ofs >= item->set->n)
5149 items[j] = items[--n_left];
5159 for (size_t j = 0; j < merged->n; j++)
5160 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5162 for (size_t j = 0; j < stack->n; j++)
5164 const struct ctables_nest *nest = &stack->nests[j];
5165 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5167 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5168 for (size_t k = 0; k < specs->n; k++)
5169 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5170 specs->specs[k].axis_idx);
5176 size_t allocated_sum_vars = 0;
5177 enumerate_sum_vars (t->axes[t->summary_axis],
5178 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5180 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5181 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5185 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5186 enum pivot_axis_type a)
5188 struct ctables_stack *stack = &t->stacks[a];
5189 for (size_t i = 0; i < stack->n; i++)
5191 const struct ctables_nest *nest = &stack->nests[i];
5192 const struct variable *var = nest->vars[nest->n - 1];
5193 const union value *value = case_data (c, var);
5195 if (var_is_numeric (var) && value->f == SYSMIS)
5198 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5200 ctables_value_insert (t, value, var_get_width (var));
5205 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5207 const struct ctables_value *const *ap = a_;
5208 const struct ctables_value *const *bp = b_;
5209 const struct ctables_value *a = *ap;
5210 const struct ctables_value *b = *bp;
5211 const int *width = width_;
5212 return value_compare_3way (&a->value, &b->value, *width);
5216 ctables_sort_clabels_values (struct ctables_table *t)
5218 const struct variable *v0 = t->clabels_example;
5219 int width = var_get_width (v0);
5221 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5224 const struct val_labs *val_labs = var_get_value_labels (v0);
5225 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5226 vl = val_labs_next (val_labs, vl))
5227 if (ctables_categories_match (c0, &vl->value, v0))
5228 ctables_value_insert (t, &vl->value, width);
5231 size_t n = hmap_count (&t->clabels_values_map);
5232 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5234 struct ctables_value *clv;
5236 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5237 t->clabels_values[i++] = clv;
5238 t->n_clabels_values = n;
5241 sort (t->clabels_values, n, sizeof *t->clabels_values,
5242 compare_clabels_values_3way, &width);
5244 for (size_t i = 0; i < n; i++)
5245 t->clabels_values[i]->leaf = i;
5249 ctables_add_category_occurrences (const struct variable *var,
5250 struct hmap *occurrences,
5251 const struct ctables_categories *cats)
5253 const struct val_labs *val_labs = var_get_value_labels (var);
5255 for (size_t i = 0; i < cats->n_cats; i++)
5257 const struct ctables_category *c = &cats->cats[i];
5261 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5267 int width = var_get_width (var);
5269 value_init (&value, width);
5270 value_copy_buf_rpad (&value, width,
5271 CHAR_CAST (uint8_t *, c->string.string),
5272 c->string.length, ' ');
5273 ctables_add_occurrence (var, &value, occurrences);
5274 value_destroy (&value, width);
5279 assert (var_is_numeric (var));
5280 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5281 vl = val_labs_next (val_labs, vl))
5282 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5283 ctables_add_occurrence (var, &vl->value, occurrences);
5287 assert (var_is_alpha (var));
5288 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5289 vl = val_labs_next (val_labs, vl))
5290 if (in_string_range (&vl->value, var, c->srange))
5291 ctables_add_occurrence (var, &vl->value, occurrences);
5295 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5296 vl = val_labs_next (val_labs, vl))
5297 if (var_is_value_missing (var, &vl->value))
5298 ctables_add_occurrence (var, &vl->value, occurrences);
5302 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5303 vl = val_labs_next (val_labs, vl))
5304 ctables_add_occurrence (var, &vl->value, occurrences);
5307 case CCT_POSTCOMPUTE:
5317 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5318 vl = val_labs_next (val_labs, vl))
5319 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5320 ctables_add_occurrence (var, &vl->value, occurrences);
5323 case CCT_EXCLUDED_MISSING:
5330 ctables_section_recurse_add_empty_categories (
5331 struct ctables_section *s,
5332 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5333 enum pivot_axis_type a, size_t a_idx)
5335 if (a >= PIVOT_N_AXES)
5336 ctables_cell_insert__ (s, c, cats);
5337 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5338 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5341 const struct variable *var = s->nests[a]->vars[a_idx];
5342 const struct ctables_categories *categories = s->table->categories[
5343 var_get_dict_index (var)];
5344 int width = var_get_width (var);
5345 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5346 const struct ctables_occurrence *o;
5347 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5349 union value *value = case_data_rw (c, var);
5350 value_destroy (value, width);
5351 value_clone (value, &o->value, width);
5352 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5353 assert (cats[a][a_idx] != NULL);
5354 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5357 for (size_t i = 0; i < categories->n_cats; i++)
5359 const struct ctables_category *cat = &categories->cats[i];
5360 if (cat->type == CCT_POSTCOMPUTE)
5362 cats[a][a_idx] = cat;
5363 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5370 ctables_section_add_empty_categories (struct ctables_section *s)
5372 bool show_empty = false;
5373 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5375 for (size_t k = 0; k < s->nests[a]->n; k++)
5376 if (k != s->nests[a]->scale_idx)
5378 const struct variable *var = s->nests[a]->vars[k];
5379 const struct ctables_categories *cats = s->table->categories[
5380 var_get_dict_index (var)];
5381 if (cats->show_empty)
5384 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5390 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5391 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5392 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5397 ctables_section_clear (struct ctables_section *s)
5399 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5401 const struct ctables_nest *nest = s->nests[a];
5402 for (size_t i = 0; i < nest->n; i++)
5403 if (i != nest->scale_idx)
5405 const struct variable *var = nest->vars[i];
5406 int width = var_get_width (var);
5407 struct ctables_occurrence *o, *next;
5408 struct hmap *map = &s->occurrences[a][i];
5409 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5411 value_destroy (&o->value, width);
5412 hmap_delete (map, &o->node);
5419 struct ctables_cell *cell, *next_cell;
5420 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5422 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5424 const struct ctables_nest *nest = s->nests[a];
5425 for (size_t i = 0; i < nest->n; i++)
5426 if (i != nest->scale_idx)
5427 value_destroy (&cell->axes[a].cvs[i].value,
5428 var_get_width (nest->vars[i]));
5429 free (cell->axes[a].cvs);
5432 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5433 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5434 for (size_t i = 0; i < specs->n; i++)
5435 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5436 free (cell->summaries);
5438 hmap_delete (&s->cells, &cell->node);
5441 hmap_shrink (&s->cells);
5443 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5445 struct ctables_domain *domain, *next_domain;
5446 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5449 free (domain->sums);
5450 hmap_delete (&s->domains[dt], &domain->node);
5453 hmap_shrink (&s->domains[dt]);
5458 ctables_section_uninit (struct ctables_section *s)
5460 ctables_section_clear (s);
5462 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5464 struct ctables_nest *nest = s->nests[a];
5465 for (size_t i = 0; i < nest->n; i++)
5466 hmap_destroy (&s->occurrences[a][i]);
5467 free (s->occurrences[a]);
5470 hmap_destroy (&s->cells);
5471 for (size_t i = 0; i < N_CTDTS; i++)
5472 hmap_destroy (&s->domains[i]);
5476 ctables_table_clear (struct ctables_table *t)
5478 for (size_t i = 0; i < t->n_sections; i++)
5479 ctables_section_clear (&t->sections[i]);
5481 if (t->clabels_example)
5483 int width = var_get_width (t->clabels_example);
5484 struct ctables_value *value, *next_value;
5485 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5486 &t->clabels_values_map)
5488 value_destroy (&value->value, width);
5489 hmap_delete (&t->clabels_values_map, &value->node);
5492 hmap_shrink (&t->clabels_values_map);
5494 free (t->clabels_values);
5495 t->clabels_values = NULL;
5496 t->n_clabels_values = 0;
5501 ctables_execute (struct dataset *ds, struct casereader *input,
5504 for (size_t i = 0; i < ct->n_tables; i++)
5506 struct ctables_table *t = ct->tables[i];
5507 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5508 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5509 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5510 sizeof *t->sections);
5511 size_t ix[PIVOT_N_AXES];
5512 ctables_table_add_section (t, 0, ix);
5515 struct dictionary *dict = dataset_dict (ds);
5517 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5518 struct casegrouper *grouper
5520 ? casegrouper_create_splits (input, dict)
5521 : casegrouper_create_vars (input, NULL, 0));
5522 struct casereader *group;
5523 while (casegrouper_get_next_group (grouper, &group))
5527 struct ccase *c = casereader_peek (group, 0);
5530 output_split_file_values (ds, c);
5535 bool warn_on_invalid = true;
5536 for (struct ccase *c = casereader_read (group); c;
5537 case_unref (c), c = casereader_read (group))
5539 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5540 double e_weight = (ct->e_weight
5541 ? var_force_valid_weight (ct->e_weight,
5542 case_num (c, ct->e_weight),
5546 for (size_t i = 0; i < ct->n_tables; i++)
5548 struct ctables_table *t = ct->tables[i];
5550 for (size_t j = 0; j < t->n_sections; j++)
5551 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5553 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5554 if (t->label_axis[a] != a)
5555 ctables_insert_clabels_values (t, c, a);
5558 casereader_destroy (group);
5560 for (size_t i = 0; i < ct->n_tables; i++)
5562 struct ctables_table *t = ct->tables[i];
5564 if (t->clabels_example)
5565 ctables_sort_clabels_values (t);
5567 for (size_t j = 0; j < t->n_sections; j++)
5568 ctables_section_add_empty_categories (&t->sections[j]);
5570 ctables_table_output (ct, t);
5571 ctables_table_clear (t);
5574 return casegrouper_destroy (grouper);
5579 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5580 struct dictionary *);
5583 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5589 case CTPO_CAT_STRING:
5590 ss_dealloc (&e->string);
5593 case CTPO_CAT_SRANGE:
5594 for (size_t i = 0; i < 2; i++)
5595 ss_dealloc (&e->srange[i]);
5604 for (size_t i = 0; i < 2; i++)
5605 ctables_pcexpr_destroy (e->subs[i]);
5609 case CTPO_CAT_NUMBER:
5610 case CTPO_CAT_NRANGE:
5611 case CTPO_CAT_MISSING:
5612 case CTPO_CAT_OTHERNM:
5613 case CTPO_CAT_SUBTOTAL:
5614 case CTPO_CAT_TOTAL:
5618 msg_location_destroy (e->location);
5623 static struct ctables_pcexpr *
5624 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5625 struct ctables_pcexpr *sub0,
5626 struct ctables_pcexpr *sub1)
5628 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5629 *e = (struct ctables_pcexpr) {
5631 .subs = { sub0, sub1 },
5632 .location = msg_location_merged (sub0->location, sub1->location),
5637 /* How to parse an operator. */
5640 enum token_type token;
5641 enum ctables_postcompute_op op;
5644 static const struct operator *
5645 ctables_pcexpr_match_operator (struct lexer *lexer,
5646 const struct operator ops[], size_t n_ops)
5648 for (const struct operator *op = ops; op < ops + n_ops; op++)
5649 if (lex_token (lexer) == op->token)
5651 if (op->token != T_NEG_NUM)
5660 static struct ctables_pcexpr *
5661 ctables_pcexpr_parse_binary_operators__ (
5662 struct lexer *lexer, struct dictionary *dict,
5663 const struct operator ops[], size_t n_ops,
5664 parse_recursively_func *parse_next_level,
5665 const char *chain_warning, struct ctables_pcexpr *lhs)
5667 for (int op_count = 0; ; op_count++)
5669 const struct operator *op
5670 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5673 if (op_count > 1 && chain_warning)
5674 msg_at (SW, lhs->location, "%s", chain_warning);
5679 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5682 ctables_pcexpr_destroy (lhs);
5686 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5690 static struct ctables_pcexpr *
5691 ctables_pcexpr_parse_binary_operators (
5692 struct lexer *lexer, struct dictionary *dict,
5693 const struct operator ops[], size_t n_ops,
5694 parse_recursively_func *parse_next_level, const char *chain_warning)
5696 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5700 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5702 chain_warning, lhs);
5705 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5706 struct dictionary *);
5708 static struct ctables_pcexpr
5709 ctpo_cat_nrange (double low, double high)
5711 return (struct ctables_pcexpr) {
5712 .op = CTPO_CAT_NRANGE,
5713 .nrange = { low, high },
5717 static struct ctables_pcexpr
5718 ctpo_cat_srange (struct substring low, struct substring high)
5720 return (struct ctables_pcexpr) {
5721 .op = CTPO_CAT_SRANGE,
5722 .srange = { low, high },
5726 static struct ctables_pcexpr *
5727 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5729 int start_ofs = lex_ofs (lexer);
5730 struct ctables_pcexpr e;
5731 if (lex_is_number (lexer))
5733 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5734 .number = lex_number (lexer) };
5737 else if (lex_match_id (lexer, "MISSING"))
5738 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5739 else if (lex_match_id (lexer, "OTHERNM"))
5740 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5741 else if (lex_match_id (lexer, "TOTAL"))
5742 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5743 else if (lex_match_id (lexer, "SUBTOTAL"))
5745 size_t subtotal_index = 0;
5746 if (lex_match (lexer, T_LBRACK))
5748 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5750 subtotal_index = lex_integer (lexer);
5752 if (!lex_force_match (lexer, T_RBRACK))
5755 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5756 .subtotal_index = subtotal_index };
5758 else if (lex_match (lexer, T_LBRACK))
5760 if (lex_match_id (lexer, "LO"))
5762 if (!lex_force_match_id (lexer, "THRU"))
5765 if (lex_is_string (lexer))
5767 struct substring low = { .string = NULL };
5768 struct substring high = parse_substring (lexer, dict);
5769 e = ctpo_cat_srange (low, high);
5773 if (!lex_force_num (lexer))
5775 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5779 else if (lex_is_number (lexer))
5781 double number = lex_number (lexer);
5783 if (lex_match_id (lexer, "THRU"))
5785 if (lex_match_id (lexer, "HI"))
5786 e = ctpo_cat_nrange (number, DBL_MAX);
5789 if (!lex_force_num (lexer))
5791 e = ctpo_cat_nrange (number, lex_number (lexer));
5796 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5799 else if (lex_is_string (lexer))
5801 struct substring s = parse_substring (lexer, dict);
5803 if (lex_match_id (lexer, "THRU"))
5805 struct substring high;
5807 if (lex_match_id (lexer, "HI"))
5808 high = (struct substring) { .string = NULL };
5811 if (!lex_force_string (lexer))
5816 high = parse_substring (lexer, dict);
5819 e = ctpo_cat_srange (s, high);
5822 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5826 lex_error (lexer, NULL);
5830 if (!lex_force_match (lexer, T_RBRACK))
5832 if (e.op == CTPO_CAT_STRING)
5833 ss_dealloc (&e.string);
5834 else if (e.op == CTPO_CAT_SRANGE)
5836 ss_dealloc (&e.srange[0]);
5837 ss_dealloc (&e.srange[1]);
5842 else if (lex_match (lexer, T_LPAREN))
5844 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5847 if (!lex_force_match (lexer, T_RPAREN))
5849 ctables_pcexpr_destroy (ep);
5856 lex_error (lexer, NULL);
5860 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5861 return xmemdup (&e, sizeof e);
5864 static struct ctables_pcexpr *
5865 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5866 struct lexer *lexer, int start_ofs)
5868 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5869 *e = (struct ctables_pcexpr) {
5872 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5877 static struct ctables_pcexpr *
5878 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5880 static const struct operator op = { T_EXP, CTPO_POW };
5882 const char *chain_warning =
5883 _("The exponentiation operator (`**') is left-associative: "
5884 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5885 "To disable this warning, insert parentheses.");
5887 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5888 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5889 ctables_pcexpr_parse_primary,
5892 /* Special case for situations like "-5**6", which must be parsed as
5895 int start_ofs = lex_ofs (lexer);
5896 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5897 *lhs = (struct ctables_pcexpr) {
5898 .op = CTPO_CONSTANT,
5899 .number = -lex_tokval (lexer),
5900 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5904 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5905 lexer, dict, &op, 1,
5906 ctables_pcexpr_parse_primary, chain_warning, lhs);
5910 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5913 /* Parses the unary minus level. */
5914 static struct ctables_pcexpr *
5915 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5917 int start_ofs = lex_ofs (lexer);
5918 if (!lex_match (lexer, T_DASH))
5919 return ctables_pcexpr_parse_exp (lexer, dict);
5921 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5925 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5928 /* Parses the multiplication and division level. */
5929 static struct ctables_pcexpr *
5930 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5932 static const struct operator ops[] =
5934 { T_ASTERISK, CTPO_MUL },
5935 { T_SLASH, CTPO_DIV },
5938 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5939 sizeof ops / sizeof *ops,
5940 ctables_pcexpr_parse_neg, NULL);
5943 /* Parses the addition and subtraction level. */
5944 static struct ctables_pcexpr *
5945 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5947 static const struct operator ops[] =
5949 { T_PLUS, CTPO_ADD },
5950 { T_DASH, CTPO_SUB },
5951 { T_NEG_NUM, CTPO_ADD },
5954 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5955 ops, sizeof ops / sizeof *ops,
5956 ctables_pcexpr_parse_mul, NULL);
5959 static struct ctables_postcompute *
5960 ctables_find_postcompute (struct ctables *ct, const char *name)
5962 struct ctables_postcompute *pc;
5963 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5964 utf8_hash_case_string (name, 0), &ct->postcomputes)
5965 if (!utf8_strcasecmp (pc->name, name))
5971 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5974 int pcompute_start = lex_ofs (lexer) - 1;
5976 if (!lex_match (lexer, T_AND))
5978 lex_error_expecting (lexer, "&");
5981 if (!lex_force_id (lexer))
5984 char *name = ss_xstrdup (lex_tokss (lexer));
5987 if (!lex_force_match (lexer, T_EQUALS)
5988 || !lex_force_match_id (lexer, "EXPR")
5989 || !lex_force_match (lexer, T_LPAREN))
5995 int expr_start = lex_ofs (lexer);
5996 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5997 int expr_end = lex_ofs (lexer) - 1;
5998 if (!expr || !lex_force_match (lexer, T_RPAREN))
6000 ctables_pcexpr_destroy (expr);
6004 int pcompute_end = lex_ofs (lexer) - 1;
6006 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
6009 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
6012 msg_at (SW, location, _("New definition of &%s will override the "
6013 "previous definition."),
6015 msg_at (SN, pc->location, _("This is the previous definition."));
6017 ctables_pcexpr_destroy (pc->expr);
6018 msg_location_destroy (pc->location);
6023 pc = xmalloc (sizeof *pc);
6024 *pc = (struct ctables_postcompute) { .name = name };
6025 hmap_insert (&ct->postcomputes, &pc->hmap_node,
6026 utf8_hash_case_string (pc->name, 0));
6029 pc->location = location;
6031 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
6036 ctables_parse_pproperties_format (struct lexer *lexer,
6037 struct ctables_summary_spec_set *sss)
6039 *sss = (struct ctables_summary_spec_set) { .n = 0 };
6041 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
6042 && !(lex_token (lexer) == T_ID
6043 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
6044 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
6045 lex_tokss (lexer)))))
6047 /* Parse function. */
6048 enum ctables_summary_function function;
6049 if (!parse_ctables_summary_function (lexer, &function))
6052 /* Parse percentile. */
6053 double percentile = 0;
6054 if (function == CTSF_PTILE)
6056 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
6058 percentile = lex_number (lexer);
6063 struct fmt_spec format;
6064 bool is_ctables_format;
6065 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
6068 if (sss->n >= sss->allocated)
6069 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
6070 sizeof *sss->specs);
6071 sss->specs[sss->n++] = (struct ctables_summary_spec) {
6072 .function = function,
6073 .percentile = percentile,
6075 .is_ctables_format = is_ctables_format,
6081 ctables_summary_spec_set_uninit (sss);
6086 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
6088 struct ctables_postcompute **pcs = NULL;
6090 size_t allocated_pcs = 0;
6092 while (lex_match (lexer, T_AND))
6094 if (!lex_force_id (lexer))
6096 struct ctables_postcompute *pc
6097 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
6100 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6105 if (n_pcs >= allocated_pcs)
6106 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6110 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6112 if (lex_match_id (lexer, "LABEL"))
6114 lex_match (lexer, T_EQUALS);
6115 if (!lex_force_string (lexer))
6118 for (size_t i = 0; i < n_pcs; i++)
6120 free (pcs[i]->label);
6121 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6126 else if (lex_match_id (lexer, "FORMAT"))
6128 lex_match (lexer, T_EQUALS);
6130 struct ctables_summary_spec_set sss;
6131 if (!ctables_parse_pproperties_format (lexer, &sss))
6134 for (size_t i = 0; i < n_pcs; i++)
6137 ctables_summary_spec_set_uninit (pcs[i]->specs);
6139 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6140 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6142 ctables_summary_spec_set_uninit (&sss);
6144 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6146 lex_match (lexer, T_EQUALS);
6147 bool hide_source_cats;
6148 if (!parse_bool (lexer, &hide_source_cats))
6150 for (size_t i = 0; i < n_pcs; i++)
6151 pcs[i]->hide_source_cats = hide_source_cats;
6155 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6168 put_strftime (struct string *out, time_t now, const char *format)
6170 const struct tm *tm = localtime (&now);
6172 strftime (value, sizeof value, format, tm);
6173 ds_put_cstr (out, value);
6177 skip_prefix (struct substring *s, struct substring prefix)
6179 if (ss_starts_with (*s, prefix))
6181 ss_advance (s, prefix.length);
6189 put_table_expression (struct string *out, struct lexer *lexer,
6190 struct dictionary *dict, int expr_start, int expr_end)
6193 for (int ofs = expr_start; ofs < expr_end; ofs++)
6195 const struct token *t = lex_ofs_token (lexer, ofs);
6196 if (t->type == T_LBRACK)
6198 else if (t->type == T_RBRACK && nest > 0)
6204 else if (t->type == T_ID)
6206 const struct variable *var
6207 = dict_lookup_var (dict, t->string.string);
6208 const char *label = var ? var_get_label (var) : NULL;
6209 ds_put_cstr (out, label ? label : t->string.string);
6213 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6214 ds_put_byte (out, ' ');
6216 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6217 ds_put_cstr (out, repr);
6220 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6221 ds_put_byte (out, ' ');
6227 put_title_text (struct string *out, struct substring in, time_t now,
6228 struct lexer *lexer, struct dictionary *dict,
6229 int expr_start, int expr_end)
6233 size_t chunk = ss_find_byte (in, ')');
6234 ds_put_substring (out, ss_head (in, chunk));
6235 ss_advance (&in, chunk);
6236 if (ss_is_empty (in))
6239 if (skip_prefix (&in, ss_cstr (")DATE")))
6240 put_strftime (out, now, "%x");
6241 else if (skip_prefix (&in, ss_cstr (")TIME")))
6242 put_strftime (out, now, "%X");
6243 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6244 put_table_expression (out, lexer, dict, expr_start, expr_end);
6247 ds_put_byte (out, ')');
6248 ss_advance (&in, 1);
6254 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6256 struct casereader *input = NULL;
6258 struct measure_guesser *mg = measure_guesser_create (ds);
6261 input = proc_open (ds);
6262 measure_guesser_run (mg, input);
6263 measure_guesser_destroy (mg);
6266 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6267 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6268 enum settings_value_show tvars = settings_get_show_variables ();
6269 for (size_t i = 0; i < n_vars; i++)
6270 vlabels[i] = (enum ctables_vlabel) tvars;
6272 struct pivot_table_look *look = pivot_table_look_unshare (
6273 pivot_table_look_ref (pivot_table_look_get_default ()));
6274 look->omit_empty = false;
6276 struct ctables *ct = xmalloc (sizeof *ct);
6277 *ct = (struct ctables) {
6278 .dict = dataset_dict (ds),
6280 .ctables_formats = FMT_SETTINGS_INIT,
6282 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6285 time_t now = time (NULL);
6290 const char *dot_string;
6291 const char *comma_string;
6293 static const struct ctf ctfs[4] = {
6294 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6295 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6296 { CTEF_PAREN, "-,(,),", "-.(.)." },
6297 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6299 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6300 for (size_t i = 0; i < 4; i++)
6302 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6303 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6304 fmt_number_style_from_string (s));
6307 if (!lex_force_match (lexer, T_SLASH))
6310 while (!lex_match_id (lexer, "TABLE"))
6312 if (lex_match_id (lexer, "FORMAT"))
6314 double widths[2] = { SYSMIS, SYSMIS };
6315 double units_per_inch = 72.0;
6317 while (lex_token (lexer) != T_SLASH)
6319 if (lex_match_id (lexer, "MINCOLWIDTH"))
6321 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6324 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6326 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6329 else if (lex_match_id (lexer, "UNITS"))
6331 lex_match (lexer, T_EQUALS);
6332 if (lex_match_id (lexer, "POINTS"))
6333 units_per_inch = 72.0;
6334 else if (lex_match_id (lexer, "INCHES"))
6335 units_per_inch = 1.0;
6336 else if (lex_match_id (lexer, "CM"))
6337 units_per_inch = 2.54;
6340 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6344 else if (lex_match_id (lexer, "EMPTY"))
6349 lex_match (lexer, T_EQUALS);
6350 if (lex_match_id (lexer, "ZERO"))
6352 /* Nothing to do. */
6354 else if (lex_match_id (lexer, "BLANK"))
6355 ct->zero = xstrdup ("");
6356 else if (lex_force_string (lexer))
6358 ct->zero = ss_xstrdup (lex_tokss (lexer));
6364 else if (lex_match_id (lexer, "MISSING"))
6366 lex_match (lexer, T_EQUALS);
6367 if (!lex_force_string (lexer))
6371 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6372 ? ss_xstrdup (lex_tokss (lexer))
6378 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6379 "UNITS", "EMPTY", "MISSING");
6384 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6385 && widths[0] > widths[1])
6387 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6391 for (size_t i = 0; i < 2; i++)
6392 if (widths[i] != SYSMIS)
6394 int *wr = ct->look->width_ranges[TABLE_HORZ];
6395 wr[i] = widths[i] / units_per_inch * 96.0;
6400 else if (lex_match_id (lexer, "VLABELS"))
6402 if (!lex_force_match_id (lexer, "VARIABLES"))
6404 lex_match (lexer, T_EQUALS);
6406 struct variable **vars;
6408 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6412 if (!lex_force_match_id (lexer, "DISPLAY"))
6417 lex_match (lexer, T_EQUALS);
6419 enum ctables_vlabel vlabel;
6420 if (lex_match_id (lexer, "DEFAULT"))
6421 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6422 else if (lex_match_id (lexer, "NAME"))
6424 else if (lex_match_id (lexer, "LABEL"))
6425 vlabel = CTVL_LABEL;
6426 else if (lex_match_id (lexer, "BOTH"))
6428 else if (lex_match_id (lexer, "NONE"))
6432 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6438 for (size_t i = 0; i < n_vars; i++)
6439 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6442 else if (lex_match_id (lexer, "MRSETS"))
6444 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6446 lex_match (lexer, T_EQUALS);
6447 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6450 else if (lex_match_id (lexer, "SMISSING"))
6452 if (lex_match_id (lexer, "VARIABLE"))
6453 ct->smissing_listwise = false;
6454 else if (lex_match_id (lexer, "LISTWISE"))
6455 ct->smissing_listwise = true;
6458 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6462 else if (lex_match_id (lexer, "PCOMPUTE"))
6464 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6467 else if (lex_match_id (lexer, "PPROPERTIES"))
6469 if (!ctables_parse_pproperties (lexer, ct))
6472 else if (lex_match_id (lexer, "WEIGHT"))
6474 if (!lex_force_match_id (lexer, "VARIABLE"))
6476 lex_match (lexer, T_EQUALS);
6477 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6481 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6483 if (lex_match_id (lexer, "COUNT"))
6485 lex_match (lexer, T_EQUALS);
6486 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6489 ct->hide_threshold = lex_integer (lexer);
6492 else if (ct->hide_threshold == 0)
6493 ct->hide_threshold = 5;
6497 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6498 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6499 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6503 if (!lex_force_match (lexer, T_SLASH))
6507 size_t allocated_tables = 0;
6510 if (ct->n_tables >= allocated_tables)
6511 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6512 sizeof *ct->tables);
6514 struct ctables_category *cat = xmalloc (sizeof *cat);
6515 *cat = (struct ctables_category) {
6517 .include_missing = false,
6518 .sort_ascending = true,
6521 struct ctables_categories *c = xmalloc (sizeof *c);
6522 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6523 *c = (struct ctables_categories) {
6530 struct ctables_categories **categories = xnmalloc (n_vars,
6531 sizeof *categories);
6532 for (size_t i = 0; i < n_vars; i++)
6535 struct ctables_table *t = xmalloc (sizeof *t);
6536 *t = (struct ctables_table) {
6538 .slabels_axis = PIVOT_AXIS_COLUMN,
6539 .slabels_visible = true,
6540 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6542 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6543 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6544 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6546 .clabels_from_axis = PIVOT_AXIS_LAYER,
6547 .categories = categories,
6548 .n_categories = n_vars,
6551 ct->tables[ct->n_tables++] = t;
6553 lex_match (lexer, T_EQUALS);
6554 int expr_start = lex_ofs (lexer);
6555 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6557 if (lex_match (lexer, T_BY))
6559 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6560 ct, t, PIVOT_AXIS_COLUMN))
6563 if (lex_match (lexer, T_BY))
6565 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6566 ct, t, PIVOT_AXIS_LAYER))
6570 int expr_end = lex_ofs (lexer);
6572 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6573 && !t->axes[PIVOT_AXIS_LAYER])
6575 lex_error (lexer, _("At least one variable must be specified."));
6579 const struct ctables_axis *scales[PIVOT_N_AXES];
6580 size_t n_scales = 0;
6581 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6583 scales[a] = find_scale (t->axes[a]);
6589 msg (SE, _("Scale variables may appear only on one axis."));
6590 if (scales[PIVOT_AXIS_ROW])
6591 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6592 _("This scale variable appears on the rows axis."));
6593 if (scales[PIVOT_AXIS_COLUMN])
6594 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6595 _("This scale variable appears on the columns axis."));
6596 if (scales[PIVOT_AXIS_LAYER])
6597 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6598 _("This scale variable appears on the layer axis."));
6602 const struct ctables_axis *summaries[PIVOT_N_AXES];
6603 size_t n_summaries = 0;
6604 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6606 summaries[a] = (scales[a]
6608 : find_categorical_summary_spec (t->axes[a]));
6612 if (n_summaries > 1)
6614 msg (SE, _("Summaries may appear only on one axis."));
6615 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6618 msg_at (SN, summaries[a]->loc,
6620 ? _("This variable on the rows axis has a summary.")
6621 : a == PIVOT_AXIS_COLUMN
6622 ? _("This variable on the columns axis has a summary.")
6623 : _("This variable on the layers axis has a summary."));
6625 msg_at (SN, summaries[a]->loc,
6626 _("This is a scale variable, so it always has a "
6627 "summary even if the syntax does not explicitly "
6632 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6633 if (n_summaries ? summaries[a] : t->axes[a])
6635 t->summary_axis = a;
6639 if (lex_token (lexer) == T_ENDCMD)
6641 if (!ctables_prepare_table (t))
6645 if (!lex_force_match (lexer, T_SLASH))
6648 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6650 if (lex_match_id (lexer, "SLABELS"))
6652 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6654 if (lex_match_id (lexer, "POSITION"))
6656 lex_match (lexer, T_EQUALS);
6657 if (lex_match_id (lexer, "COLUMN"))
6658 t->slabels_axis = PIVOT_AXIS_COLUMN;
6659 else if (lex_match_id (lexer, "ROW"))
6660 t->slabels_axis = PIVOT_AXIS_ROW;
6661 else if (lex_match_id (lexer, "LAYER"))
6662 t->slabels_axis = PIVOT_AXIS_LAYER;
6665 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6669 else if (lex_match_id (lexer, "VISIBLE"))
6671 lex_match (lexer, T_EQUALS);
6672 if (!parse_bool (lexer, &t->slabels_visible))
6677 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6682 else if (lex_match_id (lexer, "CLABELS"))
6684 if (lex_match_id (lexer, "AUTO"))
6686 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6687 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6689 else if (lex_match_id (lexer, "ROWLABELS"))
6691 lex_match (lexer, T_EQUALS);
6692 if (lex_match_id (lexer, "OPPOSITE"))
6693 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6694 else if (lex_match_id (lexer, "LAYER"))
6695 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6698 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6702 else if (lex_match_id (lexer, "COLLABELS"))
6704 lex_match (lexer, T_EQUALS);
6705 if (lex_match_id (lexer, "OPPOSITE"))
6706 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6707 else if (lex_match_id (lexer, "LAYER"))
6708 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6711 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6717 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6722 else if (lex_match_id (lexer, "CRITERIA"))
6724 if (!lex_force_match_id (lexer, "CILEVEL"))
6726 lex_match (lexer, T_EQUALS);
6728 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6730 t->cilevel = lex_number (lexer);
6733 else if (lex_match_id (lexer, "CATEGORIES"))
6735 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6739 else if (lex_match_id (lexer, "TITLES"))
6744 if (lex_match_id (lexer, "CAPTION"))
6745 textp = &t->caption;
6746 else if (lex_match_id (lexer, "CORNER"))
6748 else if (lex_match_id (lexer, "TITLE"))
6752 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6755 lex_match (lexer, T_EQUALS);
6757 struct string s = DS_EMPTY_INITIALIZER;
6758 while (lex_is_string (lexer))
6760 if (!ds_is_empty (&s))
6761 ds_put_byte (&s, ' ');
6762 put_title_text (&s, lex_tokss (lexer), now,
6763 lexer, dataset_dict (ds),
6764 expr_start, expr_end);
6768 *textp = ds_steal_cstr (&s);
6770 while (lex_token (lexer) != T_SLASH
6771 && lex_token (lexer) != T_ENDCMD);
6773 else if (lex_match_id (lexer, "SIGTEST"))
6777 t->chisq = xmalloc (sizeof *t->chisq);
6778 *t->chisq = (struct ctables_chisq) {
6780 .include_mrsets = true,
6781 .all_visible = true,
6787 if (lex_match_id (lexer, "TYPE"))
6789 lex_match (lexer, T_EQUALS);
6790 if (!lex_force_match_id (lexer, "CHISQUARE"))
6793 else if (lex_match_id (lexer, "ALPHA"))
6795 lex_match (lexer, T_EQUALS);
6796 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6798 t->chisq->alpha = lex_number (lexer);
6801 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6803 lex_match (lexer, T_EQUALS);
6804 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6807 else if (lex_match_id (lexer, "CATEGORIES"))
6809 lex_match (lexer, T_EQUALS);
6810 if (lex_match_id (lexer, "ALLVISIBLE"))
6811 t->chisq->all_visible = true;
6812 else if (lex_match_id (lexer, "SUBTOTALS"))
6813 t->chisq->all_visible = false;
6816 lex_error_expecting (lexer,
6817 "ALLVISIBLE", "SUBTOTALS");
6823 lex_error_expecting (lexer, "TYPE", "ALPHA",
6824 "INCLUDEMRSETS", "CATEGORIES");
6828 while (lex_token (lexer) != T_SLASH
6829 && lex_token (lexer) != T_ENDCMD);
6831 else if (lex_match_id (lexer, "COMPARETEST"))
6835 t->pairwise = xmalloc (sizeof *t->pairwise);
6836 *t->pairwise = (struct ctables_pairwise) {
6838 .alpha = { .05, .05 },
6839 .adjust = BONFERRONI,
6840 .include_mrsets = true,
6841 .meansvariance_allcats = true,
6842 .all_visible = true,
6851 if (lex_match_id (lexer, "TYPE"))
6853 lex_match (lexer, T_EQUALS);
6854 if (lex_match_id (lexer, "PROP"))
6855 t->pairwise->type = PROP;
6856 else if (lex_match_id (lexer, "MEAN"))
6857 t->pairwise->type = MEAN;
6860 lex_error_expecting (lexer, "PROP", "MEAN");
6864 else if (lex_match_id (lexer, "ALPHA"))
6866 lex_match (lexer, T_EQUALS);
6868 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6870 double a0 = lex_number (lexer);
6873 lex_match (lexer, T_COMMA);
6874 if (lex_is_number (lexer))
6876 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6878 double a1 = lex_number (lexer);
6881 t->pairwise->alpha[0] = MIN (a0, a1);
6882 t->pairwise->alpha[1] = MAX (a0, a1);
6885 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6887 else if (lex_match_id (lexer, "ADJUST"))
6889 lex_match (lexer, T_EQUALS);
6890 if (lex_match_id (lexer, "BONFERRONI"))
6891 t->pairwise->adjust = BONFERRONI;
6892 else if (lex_match_id (lexer, "BH"))
6893 t->pairwise->adjust = BH;
6894 else if (lex_match_id (lexer, "NONE"))
6895 t->pairwise->adjust = 0;
6898 lex_error_expecting (lexer, "BONFERRONI", "BH",
6903 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6905 lex_match (lexer, T_EQUALS);
6906 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6909 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6911 lex_match (lexer, T_EQUALS);
6912 if (lex_match_id (lexer, "ALLCATS"))
6913 t->pairwise->meansvariance_allcats = true;
6914 else if (lex_match_id (lexer, "TESTEDCATS"))
6915 t->pairwise->meansvariance_allcats = false;
6918 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6922 else if (lex_match_id (lexer, "CATEGORIES"))
6924 lex_match (lexer, T_EQUALS);
6925 if (lex_match_id (lexer, "ALLVISIBLE"))
6926 t->pairwise->all_visible = true;
6927 else if (lex_match_id (lexer, "SUBTOTALS"))
6928 t->pairwise->all_visible = false;
6931 lex_error_expecting (lexer, "ALLVISIBLE",
6936 else if (lex_match_id (lexer, "MERGE"))
6938 lex_match (lexer, T_EQUALS);
6939 if (!parse_bool (lexer, &t->pairwise->merge))
6942 else if (lex_match_id (lexer, "STYLE"))
6944 lex_match (lexer, T_EQUALS);
6945 if (lex_match_id (lexer, "APA"))
6946 t->pairwise->apa_style = true;
6947 else if (lex_match_id (lexer, "SIMPLE"))
6948 t->pairwise->apa_style = false;
6951 lex_error_expecting (lexer, "APA", "SIMPLE");
6955 else if (lex_match_id (lexer, "SHOWSIG"))
6957 lex_match (lexer, T_EQUALS);
6958 if (!parse_bool (lexer, &t->pairwise->show_sig))
6963 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6964 "INCLUDEMRSETS", "MEANSVARIANCE",
6965 "CATEGORIES", "MERGE", "STYLE",
6970 while (lex_token (lexer) != T_SLASH
6971 && lex_token (lexer) != T_ENDCMD);
6975 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6976 "CRITERIA", "CATEGORIES", "TITLES",
6977 "SIGTEST", "COMPARETEST");
6981 if (!lex_match (lexer, T_SLASH))
6985 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6986 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6988 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6992 if (!ctables_prepare_table (t))
6995 while (lex_token (lexer) != T_ENDCMD);
6998 input = proc_open (ds);
6999 bool ok = ctables_execute (ds, input, ct);
7000 ok = proc_commit (ds) && ok;
7002 ctables_destroy (ct);
7003 return ok ? CMD_SUCCESS : CMD_FAILURE;
7008 ctables_destroy (ct);