1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 enum ctables_summary_function
75 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
76 #include "ctables.inc"
81 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
83 #include "ctables.inc"
87 static bool ctables_summary_function_is_count (enum ctables_summary_function);
89 enum ctables_domain_type
91 /* Within a section, where stacked variables divide one section from
93 CTDT_TABLE, /* All layers of a whole section. */
94 CTDT_LAYER, /* One layer within a section. */
95 CTDT_LAYERROW, /* Row in one layer within a section. */
96 CTDT_LAYERCOL, /* Column in one layer within a section. */
98 /* Within a subtable, where a subtable pairs an innermost row variable with
99 an innermost column variable within a single layer. */
100 CTDT_SUBTABLE, /* Whole subtable. */
101 CTDT_ROW, /* Row within a subtable. */
102 CTDT_COL, /* Column within a subtable. */
106 struct ctables_domain
108 struct hmap_node node;
110 const struct ctables_cell *example;
113 double d_valid; /* Dictionary weight. */
116 double e_valid; /* Effective weight */
119 double u_valid; /* Unweighted. */
122 struct ctables_sum *sums;
131 enum ctables_summary_variant
140 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
141 all the axes (except the scalar variable, if any). */
142 struct hmap_node node;
144 /* The domains that contain this cell. */
145 uint32_t omit_domains;
146 struct ctables_domain *domains[N_CTDTS];
151 enum ctables_summary_variant sv;
153 struct ctables_cell_axis
155 struct ctables_cell_value
157 const struct ctables_category *category;
165 union ctables_summary *summaries;
172 const struct dictionary *dict;
173 struct pivot_table_look *look;
175 /* CTABLES has a number of extra formats that we implement via custom
176 currency specifications on an alternate fmt_settings. */
177 #define CTEF_NEGPAREN FMT_CCA
178 #define CTEF_NEQUAL FMT_CCB
179 #define CTEF_PAREN FMT_CCC
180 #define CTEF_PCTPAREN FMT_CCD
181 struct fmt_settings ctables_formats;
183 /* If this is NULL, zeros are displayed using the normal print format.
184 Otherwise, this string is displayed. */
187 /* If this is NULL, missing values are displayed using the normal print
188 format. Otherwise, this string is displayed. */
191 /* Indexed by variable dictionary index. */
192 enum ctables_vlabel *vlabels;
194 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
196 bool mrsets_count_duplicates; /* MRSETS. */
197 bool smissing_listwise; /* SMISSING. */
198 struct variable *e_weight; /* WEIGHT. */
199 int hide_threshold; /* HIDESMALLCOUNTS. */
201 struct ctables_table **tables;
205 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
208 struct ctables_postcompute
210 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
211 char *name; /* Name, without leading &. */
213 struct msg_location *location; /* Location of definition. */
214 struct ctables_pcexpr *expr;
216 struct ctables_summary_spec_set *specs;
217 bool hide_source_cats;
220 struct ctables_pcexpr
230 enum ctables_postcompute_op
233 CTPO_CONSTANT, /* 5 */
234 CTPO_CAT_NUMBER, /* [5] */
235 CTPO_CAT_STRING, /* ["STRING"] */
236 CTPO_CAT_NRANGE, /* [LO THRU 5] */
237 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
238 CTPO_CAT_MISSING, /* MISSING */
239 CTPO_CAT_OTHERNM, /* OTHERNM */
240 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
241 CTPO_CAT_TOTAL, /* TOTAL */
255 /* CTPO_CAT_NUMBER. */
258 /* CTPO_CAT_STRING, in dictionary encoding. */
259 struct substring string;
261 /* CTPO_CAT_NRANGE. */
264 /* CTPO_CAT_SRANGE. */
265 struct substring srange[2];
267 /* CTPO_CAT_SUBTOTAL. */
268 size_t subtotal_index;
270 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
271 One element: CTPO_NEG. */
272 struct ctables_pcexpr *subs[2];
275 /* Source location. */
276 struct msg_location *location;
279 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
280 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
281 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
282 struct ctables_pcexpr *sub1);
284 struct ctables_summary_spec_set
286 struct ctables_summary_spec *specs;
290 /* The variable to which the summary specs are applied. */
291 struct variable *var;
293 /* Whether the variable to which the summary specs are applied is a scale
294 variable for the purpose of summarization.
296 (VALIDN and TOTALN act differently for summarizing scale and categorical
300 /* If any of these optional additional scale variables are missing, then
301 treat 'var' as if it's missing too. This is for implementing
302 SMISSING=LISTWISE. */
303 struct variable **listwise_vars;
304 size_t n_listwise_vars;
307 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
308 const struct ctables_summary_spec_set *);
309 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
311 /* A nested sequence of variables, e.g. a > b > c. */
314 struct variable **vars;
317 size_t *domains[N_CTDTS];
318 size_t n_domains[N_CTDTS];
321 struct ctables_summary_spec_set specs[N_CSVS];
324 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
327 struct ctables_nest *nests;
331 static void ctables_stack_uninit (struct ctables_stack *);
335 struct hmap_node node;
340 struct ctables_occurrence
342 struct hmap_node node;
346 struct ctables_section
349 struct ctables_table *table;
350 struct ctables_nest *nests[PIVOT_N_AXES];
353 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
354 struct hmap cells; /* Contains "struct ctables_cell"s. */
355 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
358 static void ctables_section_uninit (struct ctables_section *);
362 struct ctables *ctables;
363 struct ctables_axis *axes[PIVOT_N_AXES];
364 struct ctables_stack stacks[PIVOT_N_AXES];
365 struct ctables_section *sections;
367 enum pivot_axis_type summary_axis;
368 struct ctables_summary_spec_set summary_specs;
369 struct variable **sum_vars;
372 enum pivot_axis_type slabels_axis;
373 bool slabels_visible;
375 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
377 Most commonly, label_axis[a] == a, and in particular we always have
378 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
380 If ROWLABELS or COLLABELS is specified, then one of
381 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
382 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
384 If any category labels are moved, then 'clabels_example' is one of the
385 variables being moved (and it is otherwise NULL). All of the variables
386 being moved have the same width, value labels, and categories, so this
387 example variable can be used to find those out.
389 The remaining members in this group are relevant only if category labels
392 'clabels_values_map' holds a "struct ctables_value" for all the values
393 that appear in all of the variables in the moved categories. It is
394 accumulated as the data is read. Once the data is fully read, its
395 sorted values are put into 'clabels_values' and 'n_clabels_values'.
397 enum pivot_axis_type label_axis[PIVOT_N_AXES];
398 enum pivot_axis_type clabels_from_axis;
399 const struct variable *clabels_example;
400 struct hmap clabels_values_map;
401 struct ctables_value **clabels_values;
402 size_t n_clabels_values;
404 /* Indexed by variable dictionary index. */
405 struct ctables_categories **categories;
414 struct ctables_chisq *chisq;
415 struct ctables_pairwise *pairwise;
418 struct ctables_categories
421 struct ctables_category *cats;
426 struct ctables_category
428 enum ctables_category_type
430 /* Explicit category lists. */
433 CCT_NRANGE, /* Numerical range. */
434 CCT_SRANGE, /* String range. */
439 /* Totals and subtotals. */
443 /* Implicit category lists. */
448 /* For contributing to TOTALN. */
449 CCT_EXCLUDED_MISSING,
453 struct ctables_category *subtotal;
459 double number; /* CCT_NUMBER. */
460 struct substring string; /* CCT_STRING, in dictionary encoding. */
461 double nrange[2]; /* CCT_NRANGE. */
462 struct substring srange[2]; /* CCT_SRANGE. */
466 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
467 bool hide_subcategories; /* CCT_SUBTOTAL. */
470 /* CCT_POSTCOMPUTE. */
473 const struct ctables_postcompute *pc;
474 enum fmt_type parse_format;
477 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
480 bool include_missing;
484 enum ctables_summary_function sort_function;
485 struct variable *sort_var;
490 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
491 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
492 struct msg_location *location;
496 ctables_category_uninit (struct ctables_category *cat)
501 msg_location_destroy (cat->location);
508 case CCT_POSTCOMPUTE:
512 ss_dealloc (&cat->string);
516 ss_dealloc (&cat->srange[0]);
517 ss_dealloc (&cat->srange[1]);
522 free (cat->total_label);
530 case CCT_EXCLUDED_MISSING:
536 nullable_substring_equal (const struct substring *a,
537 const struct substring *b)
539 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
543 ctables_category_equal (const struct ctables_category *a,
544 const struct ctables_category *b)
546 if (a->type != b->type)
552 return a->number == b->number;
555 return ss_equals (a->string, b->string);
558 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
561 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
562 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
568 case CCT_POSTCOMPUTE:
569 return a->pc == b->pc;
573 return !strcmp (a->total_label, b->total_label);
578 return (a->include_missing == b->include_missing
579 && a->sort_ascending == b->sort_ascending
580 && a->sort_function == b->sort_function
581 && a->sort_var == b->sort_var
582 && a->percentile == b->percentile);
584 case CCT_EXCLUDED_MISSING:
592 ctables_categories_unref (struct ctables_categories *c)
597 assert (c->n_refs > 0);
601 for (size_t i = 0; i < c->n_cats; i++)
602 ctables_category_uninit (&c->cats[i]);
608 ctables_categories_equal (const struct ctables_categories *a,
609 const struct ctables_categories *b)
611 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
614 for (size_t i = 0; i < a->n_cats; i++)
615 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
621 /* Chi-square test (SIGTEST). */
629 /* Pairwise comparison test (COMPARETEST). */
630 struct ctables_pairwise
632 enum { PROP, MEAN } type;
635 bool meansvariance_allcats;
637 enum { BONFERRONI = 1, BH } adjust;
661 struct variable *var;
663 struct ctables_summary_spec_set specs[N_CSVS];
667 struct ctables_axis *subs[2];
670 struct msg_location *loc;
673 static void ctables_axis_destroy (struct ctables_axis *);
682 enum ctables_function_availability
684 CTFA_ALL, /* Any variables. */
685 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
686 //CTFA_MRSETS, /* Only multiple-response sets */
689 struct ctables_summary_spec
691 enum ctables_summary_function function;
692 double percentile; /* CTSF_PTILE only. */
695 struct fmt_spec format;
696 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
703 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
704 const struct ctables_summary_spec *src)
707 dst->label = xstrdup_if_nonnull (src->label);
711 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
718 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
719 const struct ctables_summary_spec_set *src)
721 struct ctables_summary_spec *specs
722 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
723 for (size_t i = 0; i < src->n; i++)
724 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
726 *dst = (struct ctables_summary_spec_set) {
731 .is_scale = src->is_scale,
736 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
738 for (size_t i = 0; i < set->n; i++)
739 ctables_summary_spec_uninit (&set->specs[i]);
740 free (set->listwise_vars);
745 parse_col_width (struct lexer *lexer, const char *name, double *width)
747 lex_match (lexer, T_EQUALS);
748 if (lex_match_id (lexer, "DEFAULT"))
750 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
752 *width = lex_number (lexer);
762 parse_bool (struct lexer *lexer, bool *b)
764 if (lex_match_id (lexer, "NO"))
766 else if (lex_match_id (lexer, "YES"))
770 lex_error_expecting (lexer, "YES", "NO");
776 static enum ctables_function_availability
777 ctables_function_availability (enum ctables_summary_function f)
779 static enum ctables_function_availability availability[] = {
780 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
781 #include "ctables.inc"
785 return availability[f];
789 ctables_summary_function_is_count (enum ctables_summary_function f)
791 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
795 parse_ctables_summary_function (struct lexer *lexer,
796 enum ctables_summary_function *f)
800 enum ctables_summary_function function;
801 struct substring name;
803 static struct pair names[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
805 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
806 #include "ctables.inc"
807 /* The .COUNT suffix may be omitted. */
808 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
809 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
810 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
811 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
812 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
813 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
814 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
818 if (!lex_force_id (lexer))
821 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
822 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
824 *f = names[i].function;
829 lex_error (lexer, _("Expecting summary function name."));
834 ctables_axis_destroy (struct ctables_axis *axis)
842 for (size_t i = 0; i < N_CSVS; i++)
843 ctables_summary_spec_set_uninit (&axis->specs[i]);
848 ctables_axis_destroy (axis->subs[0]);
849 ctables_axis_destroy (axis->subs[1]);
852 msg_location_destroy (axis->loc);
856 static struct ctables_axis *
857 ctables_axis_new_nonterminal (enum ctables_axis_op op,
858 struct ctables_axis *sub0,
859 struct ctables_axis *sub1,
860 struct lexer *lexer, int start_ofs)
862 struct ctables_axis *axis = xmalloc (sizeof *axis);
863 *axis = (struct ctables_axis) {
865 .subs = { sub0, sub1 },
866 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
871 struct ctables_axis_parse_ctx
874 struct dictionary *dict;
876 struct ctables_table *t;
879 static struct fmt_spec
880 ctables_summary_default_format (enum ctables_summary_function function,
881 const struct variable *var)
883 static const enum ctables_format default_formats[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
885 #include "ctables.inc"
888 switch (default_formats[function])
891 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
894 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
897 return *var_get_print_format (var);
904 static struct pivot_value *
905 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
909 static const char *default_labels[] = {
910 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
911 #include "ctables.inc"
915 return (spec->function == CTSF_PTILE
916 ? pivot_value_new_text_format (N_("Percentile %.2f"),
918 : pivot_value_new_text (default_labels[spec->function]));
922 struct substring in = ss_cstr (spec->label);
923 struct substring target = ss_cstr (")CILEVEL");
925 struct string out = DS_EMPTY_INITIALIZER;
928 size_t chunk = ss_find_substring (in, target);
929 ds_put_substring (&out, ss_head (in, chunk));
930 ss_advance (&in, chunk);
932 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
934 ss_advance (&in, target.length);
935 ds_put_format (&out, "%g", cilevel);
941 ctables_summary_function_name (enum ctables_summary_function function)
943 static const char *names[] = {
944 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
945 #include "ctables.inc"
948 return names[function];
952 add_summary_spec (struct ctables_axis *axis,
953 enum ctables_summary_function function, double percentile,
954 const char *label, const struct fmt_spec *format,
955 bool is_ctables_format, const struct msg_location *loc,
956 enum ctables_summary_variant sv)
958 if (axis->op == CTAO_VAR)
960 const char *function_name = ctables_summary_function_name (function);
961 const char *var_name = var_get_name (axis->var);
962 switch (ctables_function_availability (function))
966 msg_at (SE, loc, _("Summary function %s applies only to multiple "
967 "response sets."), function_name);
968 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
974 if (!axis->scale && sv != CSV_TOTAL)
977 _("Summary function %s applies only to scale variables."),
979 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
989 struct ctables_summary_spec_set *set = &axis->specs[sv];
990 if (set->n >= set->allocated)
991 set->specs = x2nrealloc (set->specs, &set->allocated,
994 struct ctables_summary_spec *dst = &set->specs[set->n++];
995 *dst = (struct ctables_summary_spec) {
996 .function = function,
997 .percentile = percentile,
998 .label = xstrdup_if_nonnull (label),
999 .format = (format ? *format
1000 : ctables_summary_default_format (function, axis->var)),
1001 .is_ctables_format = is_ctables_format,
1007 for (size_t i = 0; i < 2; i++)
1008 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1009 format, is_ctables_format, loc, sv))
1015 static struct ctables_axis *ctables_axis_parse_stack (
1016 struct ctables_axis_parse_ctx *);
1019 static struct ctables_axis *
1020 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1022 if (lex_match (ctx->lexer, T_LPAREN))
1024 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1025 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1027 ctables_axis_destroy (sub);
1033 if (!lex_force_id (ctx->lexer))
1036 int start_ofs = lex_ofs (ctx->lexer);
1037 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1041 struct ctables_axis *axis = xmalloc (sizeof *axis);
1042 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1044 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1045 : lex_match_phrase (ctx->lexer, "[C]") ? false
1046 : var_get_measure (var) == MEASURE_SCALE);
1047 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1048 lex_ofs (ctx->lexer) - 1);
1049 if (axis->scale && var_is_alpha (var))
1051 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1053 var_get_name (var));
1054 ctables_axis_destroy (axis);
1062 has_digit (const char *s)
1064 return s[strcspn (s, "0123456789")] != '\0';
1068 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1069 bool *is_ctables_format)
1071 char type[FMT_TYPE_LEN_MAX + 1];
1072 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1075 if (!strcasecmp (type, "NEGPAREN"))
1076 format->type = CTEF_NEGPAREN;
1077 else if (!strcasecmp (type, "NEQUAL"))
1078 format->type = CTEF_NEQUAL;
1079 else if (!strcasecmp (type, "PAREN"))
1080 format->type = CTEF_PAREN;
1081 else if (!strcasecmp (type, "PCTPAREN"))
1082 format->type = CTEF_PCTPAREN;
1085 *is_ctables_format = false;
1086 return (parse_format_specifier (lexer, format)
1087 && fmt_check_output (format)
1088 && fmt_check_type_compat (format, VAL_NUMERIC));
1094 lex_next_error (lexer, -1, -1,
1095 _("Output format %s requires width 2 or greater."), type);
1098 else if (format->d > format->w - 1)
1100 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1101 "greater than decimals."), type);
1106 *is_ctables_format = true;
1111 static struct ctables_axis *
1112 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1114 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1115 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1118 enum ctables_summary_variant sv = CSV_CELL;
1121 int start_ofs = lex_ofs (ctx->lexer);
1123 /* Parse function. */
1124 enum ctables_summary_function function;
1125 if (!parse_ctables_summary_function (ctx->lexer, &function))
1128 /* Parse percentile. */
1129 double percentile = 0;
1130 if (function == CTSF_PTILE)
1132 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1134 percentile = lex_number (ctx->lexer);
1135 lex_get (ctx->lexer);
1140 if (lex_is_string (ctx->lexer))
1142 label = ss_xstrdup (lex_tokss (ctx->lexer));
1143 lex_get (ctx->lexer);
1147 struct fmt_spec format;
1148 const struct fmt_spec *formatp;
1149 bool is_ctables_format = false;
1150 if (lex_token (ctx->lexer) == T_ID
1151 && has_digit (lex_tokcstr (ctx->lexer)))
1153 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1154 &is_ctables_format))
1164 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1165 lex_ofs (ctx->lexer) - 1);
1166 add_summary_spec (sub, function, percentile, label, formatp,
1167 is_ctables_format, loc, sv);
1169 msg_location_destroy (loc);
1171 lex_match (ctx->lexer, T_COMMA);
1172 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1174 if (!lex_force_match (ctx->lexer, T_LBRACK))
1178 else if (lex_match (ctx->lexer, T_RBRACK))
1180 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1187 ctables_axis_destroy (sub);
1191 static const struct ctables_axis *
1192 find_scale (const struct ctables_axis *axis)
1196 else if (axis->op == CTAO_VAR)
1197 return axis->scale ? axis : NULL;
1200 for (size_t i = 0; i < 2; i++)
1202 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1210 static const struct ctables_axis *
1211 find_categorical_summary_spec (const struct ctables_axis *axis)
1215 else if (axis->op == CTAO_VAR)
1216 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1219 for (size_t i = 0; i < 2; i++)
1221 const struct ctables_axis *sum
1222 = find_categorical_summary_spec (axis->subs[i]);
1230 static struct ctables_axis *
1231 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1233 int start_ofs = lex_ofs (ctx->lexer);
1234 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1238 while (lex_match (ctx->lexer, T_GT))
1240 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1243 ctables_axis_destroy (lhs);
1247 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1248 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1250 const struct ctables_axis *outer_scale = find_scale (lhs);
1251 const struct ctables_axis *inner_scale = find_scale (rhs);
1252 if (outer_scale && inner_scale)
1254 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1255 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1256 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1257 ctables_axis_destroy (nest);
1261 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1264 msg_at (SE, nest->loc,
1265 _("Summaries may only be requested for categorical variables "
1266 "at the innermost nesting level."));
1267 msg_at (SN, outer_sum->loc,
1268 _("This outer categorical variable has a summary."));
1269 ctables_axis_destroy (nest);
1279 static struct ctables_axis *
1280 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1282 int start_ofs = lex_ofs (ctx->lexer);
1283 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1287 while (lex_match (ctx->lexer, T_PLUS))
1289 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1292 ctables_axis_destroy (lhs);
1296 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1297 ctx->lexer, start_ofs);
1304 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1305 struct ctables *ct, struct ctables_table *t,
1306 enum pivot_axis_type a)
1308 if (lex_token (lexer) == T_BY
1309 || lex_token (lexer) == T_SLASH
1310 || lex_token (lexer) == T_ENDCMD)
1313 struct ctables_axis_parse_ctx ctx = {
1319 t->axes[a] = ctables_axis_parse_stack (&ctx);
1320 return t->axes[a] != NULL;
1324 ctables_chisq_destroy (struct ctables_chisq *chisq)
1330 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1336 ctables_table_destroy (struct ctables_table *t)
1341 for (size_t i = 0; i < t->n_sections; i++)
1342 ctables_section_uninit (&t->sections[i]);
1345 for (size_t i = 0; i < t->n_categories; i++)
1346 ctables_categories_unref (t->categories[i]);
1347 free (t->categories);
1349 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1351 ctables_axis_destroy (t->axes[a]);
1352 ctables_stack_uninit (&t->stacks[a]);
1354 free (t->summary_specs.specs);
1356 struct ctables_value *ctv, *next_ctv;
1357 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1358 &t->clabels_values_map)
1360 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1361 hmap_delete (&t->clabels_values_map, &ctv->node);
1364 hmap_destroy (&t->clabels_values_map);
1365 free (t->clabels_values);
1371 ctables_chisq_destroy (t->chisq);
1372 ctables_pairwise_destroy (t->pairwise);
1377 ctables_destroy (struct ctables *ct)
1382 struct ctables_postcompute *pc, *next_pc;
1383 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1387 msg_location_destroy (pc->location);
1388 ctables_pcexpr_destroy (pc->expr);
1392 ctables_summary_spec_set_uninit (pc->specs);
1395 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1398 hmap_destroy (&ct->postcomputes);
1400 fmt_settings_uninit (&ct->ctables_formats);
1401 pivot_table_look_unref (ct->look);
1405 for (size_t i = 0; i < ct->n_tables; i++)
1406 ctables_table_destroy (ct->tables[i]);
1411 static struct ctables_category
1412 cct_nrange (double low, double high)
1414 return (struct ctables_category) {
1416 .nrange = { low, high }
1420 static struct ctables_category
1421 cct_srange (struct substring low, struct substring high)
1423 return (struct ctables_category) {
1425 .srange = { low, high }
1430 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1431 struct ctables_category *cat)
1434 if (lex_match (lexer, T_EQUALS))
1436 if (!lex_force_string (lexer))
1439 total_label = ss_xstrdup (lex_tokss (lexer));
1443 total_label = xstrdup (_("Subtotal"));
1445 *cat = (struct ctables_category) {
1446 .type = CCT_SUBTOTAL,
1447 .hide_subcategories = hide_subcategories,
1448 .total_label = total_label
1453 static struct substring
1454 parse_substring (struct lexer *lexer, struct dictionary *dict)
1456 struct substring s = recode_substring_pool (
1457 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1458 ss_rtrim (&s, ss_cstr (" "));
1464 ctables_table_parse_explicit_category (struct lexer *lexer,
1465 struct dictionary *dict,
1467 struct ctables_category *cat)
1469 if (lex_match_id (lexer, "OTHERNM"))
1470 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1471 else if (lex_match_id (lexer, "MISSING"))
1472 *cat = (struct ctables_category) { .type = CCT_MISSING };
1473 else if (lex_match_id (lexer, "SUBTOTAL"))
1474 return ctables_table_parse_subtotal (lexer, false, cat);
1475 else if (lex_match_id (lexer, "HSUBTOTAL"))
1476 return ctables_table_parse_subtotal (lexer, true, cat);
1477 else if (lex_match_id (lexer, "LO"))
1479 if (!lex_force_match_id (lexer, "THRU"))
1481 if (lex_is_string (lexer))
1483 struct substring sr0 = { .string = NULL };
1484 struct substring sr1 = parse_substring (lexer, dict);
1485 *cat = cct_srange (sr0, sr1);
1487 else if (lex_force_num (lexer))
1489 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1495 else if (lex_is_number (lexer))
1497 double number = lex_number (lexer);
1499 if (lex_match_id (lexer, "THRU"))
1501 if (lex_match_id (lexer, "HI"))
1502 *cat = cct_nrange (number, DBL_MAX);
1505 if (!lex_force_num (lexer))
1507 *cat = cct_nrange (number, lex_number (lexer));
1512 *cat = (struct ctables_category) {
1517 else if (lex_is_string (lexer))
1519 struct substring s = parse_substring (lexer, dict);
1520 if (lex_match_id (lexer, "THRU"))
1522 if (lex_match_id (lexer, "HI"))
1524 struct substring sr1 = { .string = NULL };
1525 *cat = cct_srange (s, sr1);
1529 if (!lex_force_string (lexer))
1534 struct substring sr1 = parse_substring (lexer, dict);
1535 *cat = cct_srange (s, sr1);
1539 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1541 else if (lex_match (lexer, T_AND))
1543 if (!lex_force_id (lexer))
1545 struct ctables_postcompute *pc = ctables_find_postcompute (
1546 ct, lex_tokcstr (lexer));
1549 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1550 msg_at (SE, loc, _("Unknown postcompute &%s."),
1551 lex_tokcstr (lexer));
1552 msg_location_destroy (loc);
1557 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1561 lex_error (lexer, NULL);
1569 parse_category_string (struct msg_location *location,
1570 struct substring s, const struct dictionary *dict,
1571 enum fmt_type format, double *n)
1574 char *error = data_in (s, dict_get_encoding (dict), format,
1575 settings_get_fmt_settings (), &v, 0, NULL);
1578 msg_at (SE, location,
1579 _("Failed to parse category specification as format %s: %s."),
1580 fmt_name (format), error);
1589 static struct ctables_category *
1590 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1591 const struct ctables_pcexpr *e)
1593 struct ctables_category *best = NULL;
1594 size_t n_subtotals = 0;
1595 for (size_t i = 0; i < cats->n_cats; i++)
1597 struct ctables_category *cat = &cats->cats[i];
1600 case CTPO_CAT_NUMBER:
1601 if (cat->type == CCT_NUMBER && cat->number == e->number)
1605 case CTPO_CAT_STRING:
1606 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1610 case CTPO_CAT_NRANGE:
1611 if (cat->type == CCT_NRANGE
1612 && cat->nrange[0] == e->nrange[0]
1613 && cat->nrange[1] == e->nrange[1])
1617 case CTPO_CAT_SRANGE:
1618 if (cat->type == CCT_SRANGE
1619 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1620 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1624 case CTPO_CAT_MISSING:
1625 if (cat->type == CCT_MISSING)
1629 case CTPO_CAT_OTHERNM:
1630 if (cat->type == CCT_OTHERNM)
1634 case CTPO_CAT_SUBTOTAL:
1635 if (cat->type == CCT_SUBTOTAL)
1638 if (e->subtotal_index == n_subtotals)
1640 else if (e->subtotal_index == 0)
1645 case CTPO_CAT_TOTAL:
1646 if (cat->type == CCT_TOTAL)
1660 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1665 static struct ctables_category *
1666 ctables_find_category_for_postcompute (const struct dictionary *dict,
1667 const struct ctables_categories *cats,
1668 enum fmt_type parse_format,
1669 const struct ctables_pcexpr *e)
1671 if (parse_format != FMT_F)
1673 if (e->op == CTPO_CAT_STRING)
1676 if (!parse_category_string (e->location, e->string, dict,
1677 parse_format, &number))
1680 struct ctables_pcexpr e2 = {
1681 .op = CTPO_CAT_NUMBER,
1683 .location = e->location,
1685 return ctables_find_category_for_postcompute__ (cats, &e2);
1687 else if (e->op == CTPO_CAT_SRANGE)
1690 if (!e->srange[0].string)
1691 nrange[0] = -DBL_MAX;
1692 else if (!parse_category_string (e->location, e->srange[0], dict,
1693 parse_format, &nrange[0]))
1696 if (!e->srange[1].string)
1697 nrange[1] = DBL_MAX;
1698 else if (!parse_category_string (e->location, e->srange[1], dict,
1699 parse_format, &nrange[1]))
1702 struct ctables_pcexpr e2 = {
1703 .op = CTPO_CAT_NRANGE,
1704 .nrange = { nrange[0], nrange[1] },
1705 .location = e->location,
1707 return ctables_find_category_for_postcompute__ (cats, &e2);
1710 return ctables_find_category_for_postcompute__ (cats, e);
1714 ctables_recursive_check_postcompute (struct dictionary *dict,
1715 const struct ctables_pcexpr *e,
1716 struct ctables_category *pc_cat,
1717 const struct ctables_categories *cats,
1718 const struct msg_location *cats_location)
1722 case CTPO_CAT_NUMBER:
1723 case CTPO_CAT_STRING:
1724 case CTPO_CAT_NRANGE:
1725 case CTPO_CAT_SRANGE:
1726 case CTPO_CAT_MISSING:
1727 case CTPO_CAT_OTHERNM:
1728 case CTPO_CAT_SUBTOTAL:
1729 case CTPO_CAT_TOTAL:
1731 struct ctables_category *cat = ctables_find_category_for_postcompute (
1732 dict, cats, pc_cat->parse_format, e);
1735 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1737 size_t n_subtotals = 0;
1738 for (size_t i = 0; i < cats->n_cats; i++)
1739 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1740 if (n_subtotals > 1)
1742 msg_at (SE, cats_location,
1743 ngettext ("These categories include %zu instance "
1744 "of SUBTOTAL or HSUBTOTAL, so references "
1745 "from computed categories must refer to "
1746 "subtotals by position, "
1747 "e.g. SUBTOTAL[1].",
1748 "These categories include %zu instances "
1749 "of SUBTOTAL or HSUBTOTAL, so references "
1750 "from computed categories must refer to "
1751 "subtotals by position, "
1752 "e.g. SUBTOTAL[1].",
1755 msg_at (SN, e->location,
1756 _("This is the reference that lacks a position."));
1761 msg_at (SE, pc_cat->location,
1762 _("Computed category &%s references a category not included "
1763 "in the category list."),
1765 msg_at (SN, e->location, _("This is the missing category."));
1766 if (e->op == CTPO_CAT_SUBTOTAL)
1767 msg_at (SN, cats_location,
1768 _("To fix the problem, add subtotals to the "
1769 "list of categories here."));
1770 else if (e->op == CTPO_CAT_TOTAL)
1771 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1772 "CATEGORIES specification."));
1774 msg_at (SN, cats_location,
1775 _("To fix the problem, add the missing category to the "
1776 "list of categories here."));
1779 if (pc_cat->pc->hide_source_cats)
1793 for (size_t i = 0; i < 2; i++)
1794 if (e->subs[i] && !ctables_recursive_check_postcompute (
1795 dict, e->subs[i], pc_cat, cats, cats_location))
1804 all_strings (struct variable **vars, size_t n_vars,
1805 const struct ctables_category *cat)
1807 for (size_t j = 0; j < n_vars; j++)
1808 if (var_is_numeric (vars[j]))
1810 msg_at (SE, cat->location,
1811 _("This category specification may be applied only to string "
1812 "variables, but this subcommand tries to apply it to "
1813 "numeric variable %s."),
1814 var_get_name (vars[j]));
1821 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1822 struct ctables *ct, struct ctables_table *t)
1824 if (!lex_match_id (lexer, "VARIABLES"))
1826 lex_match (lexer, T_EQUALS);
1828 struct variable **vars;
1830 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1833 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1834 for (size_t i = 1; i < n_vars; i++)
1836 const struct fmt_spec *f = var_get_print_format (vars[i]);
1837 if (f->type != common_format->type)
1839 common_format = NULL;
1845 && (fmt_get_category (common_format->type)
1846 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1848 struct ctables_categories *c = xmalloc (sizeof *c);
1849 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1850 for (size_t i = 0; i < n_vars; i++)
1852 struct ctables_categories **cp
1853 = &t->categories[var_get_dict_index (vars[i])];
1854 ctables_categories_unref (*cp);
1858 size_t allocated_cats = 0;
1859 int cats_start_ofs = -1;
1860 int cats_end_ofs = -1;
1861 if (lex_match (lexer, T_LBRACK))
1863 cats_start_ofs = lex_ofs (lexer);
1866 if (c->n_cats >= allocated_cats)
1867 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1869 int start_ofs = lex_ofs (lexer);
1870 struct ctables_category *cat = &c->cats[c->n_cats];
1871 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1873 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1876 lex_match (lexer, T_COMMA);
1878 while (!lex_match (lexer, T_RBRACK));
1879 cats_end_ofs = lex_ofs (lexer) - 1;
1882 struct ctables_category cat = {
1884 .include_missing = false,
1885 .sort_ascending = true,
1887 bool show_totals = false;
1888 char *total_label = NULL;
1889 bool totals_before = false;
1890 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1892 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1894 lex_match (lexer, T_EQUALS);
1895 if (lex_match_id (lexer, "A"))
1896 cat.sort_ascending = true;
1897 else if (lex_match_id (lexer, "D"))
1898 cat.sort_ascending = false;
1901 lex_error_expecting (lexer, "A", "D");
1905 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1907 lex_match (lexer, T_EQUALS);
1908 if (lex_match_id (lexer, "VALUE"))
1909 cat.type = CCT_VALUE;
1910 else if (lex_match_id (lexer, "LABEL"))
1911 cat.type = CCT_LABEL;
1914 cat.type = CCT_FUNCTION;
1915 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1918 if (lex_match (lexer, T_LPAREN))
1920 cat.sort_var = parse_variable (lexer, dict);
1924 if (cat.sort_function == CTSF_PTILE)
1926 lex_match (lexer, T_COMMA);
1927 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1929 cat.percentile = lex_number (lexer);
1933 if (!lex_force_match (lexer, T_RPAREN))
1936 else if (ctables_function_availability (cat.sort_function)
1939 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1944 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1946 lex_match (lexer, T_EQUALS);
1947 if (lex_match_id (lexer, "INCLUDE"))
1948 cat.include_missing = true;
1949 else if (lex_match_id (lexer, "EXCLUDE"))
1950 cat.include_missing = false;
1953 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1957 else if (lex_match_id (lexer, "TOTAL"))
1959 lex_match (lexer, T_EQUALS);
1960 if (!parse_bool (lexer, &show_totals))
1963 else if (lex_match_id (lexer, "LABEL"))
1965 lex_match (lexer, T_EQUALS);
1966 if (!lex_force_string (lexer))
1969 total_label = ss_xstrdup (lex_tokss (lexer));
1972 else if (lex_match_id (lexer, "POSITION"))
1974 lex_match (lexer, T_EQUALS);
1975 if (lex_match_id (lexer, "BEFORE"))
1976 totals_before = true;
1977 else if (lex_match_id (lexer, "AFTER"))
1978 totals_before = false;
1981 lex_error_expecting (lexer, "BEFORE", "AFTER");
1985 else if (lex_match_id (lexer, "EMPTY"))
1987 lex_match (lexer, T_EQUALS);
1988 if (lex_match_id (lexer, "INCLUDE"))
1989 c->show_empty = true;
1990 else if (lex_match_id (lexer, "EXCLUDE"))
1991 c->show_empty = false;
1994 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2001 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2002 "TOTAL", "LABEL", "POSITION", "EMPTY");
2004 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2011 if (c->n_cats >= allocated_cats)
2012 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2013 c->cats[c->n_cats++] = cat;
2018 if (c->n_cats >= allocated_cats)
2019 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2021 struct ctables_category *totals;
2024 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2025 totals = &c->cats[0];
2028 totals = &c->cats[c->n_cats];
2031 *totals = (struct ctables_category) {
2033 .total_label = total_label ? total_label : xstrdup (_("Total")),
2037 struct ctables_category *subtotal = NULL;
2038 for (size_t i = totals_before ? 0 : c->n_cats;
2039 totals_before ? i < c->n_cats : i-- > 0;
2040 totals_before ? i++ : 0)
2042 struct ctables_category *cat = &c->cats[i];
2051 cat->subtotal = subtotal;
2054 case CCT_POSTCOMPUTE:
2065 case CCT_EXCLUDED_MISSING:
2070 if (cats_start_ofs != -1)
2072 for (size_t i = 0; i < c->n_cats; i++)
2074 struct ctables_category *cat = &c->cats[i];
2077 case CCT_POSTCOMPUTE:
2078 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2079 struct msg_location *cats_location
2080 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2081 bool ok = ctables_recursive_check_postcompute (
2082 dict, cat->pc->expr, cat, c, cats_location);
2083 msg_location_destroy (cats_location);
2090 for (size_t j = 0; j < n_vars; j++)
2091 if (var_is_alpha (vars[j]))
2093 msg_at (SE, cat->location,
2094 _("This category specification may be applied "
2095 "only to numeric variables, but this "
2096 "subcommand tries to apply it to string "
2098 var_get_name (vars[j]));
2107 if (!parse_category_string (cat->location, cat->string, dict,
2108 common_format->type, &n))
2111 ss_dealloc (&cat->string);
2113 cat->type = CCT_NUMBER;
2116 else if (!all_strings (vars, n_vars, cat))
2125 if (!cat->srange[0].string)
2127 else if (!parse_category_string (cat->location,
2128 cat->srange[0], dict,
2129 common_format->type, &n[0]))
2132 if (!cat->srange[1].string)
2134 else if (!parse_category_string (cat->location,
2135 cat->srange[1], dict,
2136 common_format->type, &n[1]))
2139 ss_dealloc (&cat->srange[0]);
2140 ss_dealloc (&cat->srange[1]);
2142 cat->type = CCT_NRANGE;
2143 cat->nrange[0] = n[0];
2144 cat->nrange[1] = n[1];
2146 else if (!all_strings (vars, n_vars, cat))
2157 case CCT_EXCLUDED_MISSING:
2172 ctables_nest_uninit (struct ctables_nest *nest)
2175 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2176 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2177 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2178 free (nest->domains[dt]);
2182 ctables_stack_uninit (struct ctables_stack *stack)
2186 for (size_t i = 0; i < stack->n; i++)
2187 ctables_nest_uninit (&stack->nests[i]);
2188 free (stack->nests);
2192 static struct ctables_stack
2193 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2200 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2201 for (size_t i = 0; i < s0.n; i++)
2202 for (size_t j = 0; j < s1.n; j++)
2204 const struct ctables_nest *a = &s0.nests[i];
2205 const struct ctables_nest *b = &s1.nests[j];
2207 size_t allocate = a->n + b->n;
2208 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2210 for (size_t k = 0; k < a->n; k++)
2211 vars[n++] = a->vars[k];
2212 for (size_t k = 0; k < b->n; k++)
2213 vars[n++] = b->vars[k];
2214 assert (n == allocate);
2216 const struct ctables_nest *summary_src;
2217 if (!a->specs[CSV_CELL].var)
2219 else if (!b->specs[CSV_CELL].var)
2224 struct ctables_nest *new = &stack.nests[stack.n++];
2225 *new = (struct ctables_nest) {
2227 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2228 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2232 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2233 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2235 ctables_stack_uninit (&s0);
2236 ctables_stack_uninit (&s1);
2240 static struct ctables_stack
2241 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2243 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2244 for (size_t i = 0; i < s0.n; i++)
2245 stack.nests[stack.n++] = s0.nests[i];
2246 for (size_t i = 0; i < s1.n; i++)
2248 stack.nests[stack.n] = s1.nests[i];
2249 stack.nests[stack.n].group_head += s0.n;
2252 assert (stack.n == s0.n + s1.n);
2258 static struct ctables_stack
2259 var_fts (const struct ctables_axis *a)
2261 struct variable **vars = xmalloc (sizeof *vars);
2264 struct ctables_nest *nest = xmalloc (sizeof *nest);
2265 *nest = (struct ctables_nest) {
2268 .scale_idx = a->scale ? 0 : SIZE_MAX,
2270 if (a->specs[CSV_CELL].n || a->scale)
2271 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2273 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2274 nest->specs[sv].var = a->var;
2275 nest->specs[sv].is_scale = a->scale;
2277 return (struct ctables_stack) { .nests = nest, .n = 1 };
2280 static struct ctables_stack
2281 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2284 return (struct ctables_stack) { .n = 0 };
2292 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2293 enumerate_fts (axis_type, a->subs[1]));
2296 /* This should consider any of the scale variables found in the result to
2297 be linked to each other listwise for SMISSING=LISTWISE. */
2298 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2299 enumerate_fts (axis_type, a->subs[1]));
2305 union ctables_summary
2307 /* COUNT, VALIDN, TOTALN. */
2310 /* MINIMUM, MAXIMUM, RANGE. */
2317 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2318 struct moments1 *moments;
2320 /* MEDIAN, MODE, PTILE. */
2323 struct casewriter *writer;
2328 /* XXX multiple response */
2332 ctables_summary_init (union ctables_summary *s,
2333 const struct ctables_summary_spec *ss)
2335 switch (ss->function)
2339 case CTSF_ROWPCT_COUNT:
2340 case CTSF_COLPCT_COUNT:
2341 case CTSF_TABLEPCT_COUNT:
2342 case CTSF_SUBTABLEPCT_COUNT:
2343 case CTSF_LAYERPCT_COUNT:
2344 case CTSF_LAYERROWPCT_COUNT:
2345 case CTSF_LAYERCOLPCT_COUNT:
2346 case CTSF_ROWPCT_VALIDN:
2347 case CTSF_COLPCT_VALIDN:
2348 case CTSF_TABLEPCT_VALIDN:
2349 case CTSF_SUBTABLEPCT_VALIDN:
2350 case CTSF_LAYERPCT_VALIDN:
2351 case CTSF_LAYERROWPCT_VALIDN:
2352 case CTSF_LAYERCOLPCT_VALIDN:
2353 case CTSF_ROWPCT_TOTALN:
2354 case CTSF_COLPCT_TOTALN:
2355 case CTSF_TABLEPCT_TOTALN:
2356 case CTSF_SUBTABLEPCT_TOTALN:
2357 case CTSF_LAYERPCT_TOTALN:
2358 case CTSF_LAYERROWPCT_TOTALN:
2359 case CTSF_LAYERCOLPCT_TOTALN:
2366 case CTSF_UROWPCT_COUNT:
2367 case CTSF_UCOLPCT_COUNT:
2368 case CTSF_UTABLEPCT_COUNT:
2369 case CTSF_USUBTABLEPCT_COUNT:
2370 case CTSF_ULAYERPCT_COUNT:
2371 case CTSF_ULAYERROWPCT_COUNT:
2372 case CTSF_ULAYERCOLPCT_COUNT:
2373 case CTSF_UROWPCT_VALIDN:
2374 case CTSF_UCOLPCT_VALIDN:
2375 case CTSF_UTABLEPCT_VALIDN:
2376 case CTSF_USUBTABLEPCT_VALIDN:
2377 case CTSF_ULAYERPCT_VALIDN:
2378 case CTSF_ULAYERROWPCT_VALIDN:
2379 case CTSF_ULAYERCOLPCT_VALIDN:
2380 case CTSF_UROWPCT_TOTALN:
2381 case CTSF_UCOLPCT_TOTALN:
2382 case CTSF_UTABLEPCT_TOTALN:
2383 case CTSF_USUBTABLEPCT_TOTALN:
2384 case CTSF_ULAYERPCT_TOTALN:
2385 case CTSF_ULAYERROWPCT_TOTALN:
2386 case CTSF_ULAYERCOLPCT_TOTALN:
2396 case CTSF_SUBTABLE_ID:
2398 case CTSF_LAYERROW_ID:
2399 case CTSF_LAYERCOL_ID:
2405 s->min = s->max = SYSMIS;
2413 case CTSF_ROWPCT_SUM:
2414 case CTSF_COLPCT_SUM:
2415 case CTSF_TABLEPCT_SUM:
2416 case CTSF_SUBTABLEPCT_SUM:
2417 case CTSF_LAYERPCT_SUM:
2418 case CTSF_LAYERROWPCT_SUM:
2419 case CTSF_LAYERCOLPCT_SUM:
2424 case CTSF_UVARIANCE:
2425 case CTSF_UROWPCT_SUM:
2426 case CTSF_UCOLPCT_SUM:
2427 case CTSF_UTABLEPCT_SUM:
2428 case CTSF_USUBTABLEPCT_SUM:
2429 case CTSF_ULAYERPCT_SUM:
2430 case CTSF_ULAYERROWPCT_SUM:
2431 case CTSF_ULAYERCOLPCT_SUM:
2432 s->moments = moments1_create (MOMENT_VARIANCE);
2442 struct caseproto *proto = caseproto_create ();
2443 proto = caseproto_add_width (proto, 0);
2444 proto = caseproto_add_width (proto, 0);
2446 struct subcase ordering;
2447 subcase_init (&ordering, 0, 0, SC_ASCEND);
2448 s->writer = sort_create_writer (&ordering, proto);
2449 subcase_uninit (&ordering);
2450 caseproto_unref (proto);
2460 ctables_summary_uninit (union ctables_summary *s,
2461 const struct ctables_summary_spec *ss)
2463 switch (ss->function)
2467 case CTSF_ROWPCT_COUNT:
2468 case CTSF_COLPCT_COUNT:
2469 case CTSF_TABLEPCT_COUNT:
2470 case CTSF_SUBTABLEPCT_COUNT:
2471 case CTSF_LAYERPCT_COUNT:
2472 case CTSF_LAYERROWPCT_COUNT:
2473 case CTSF_LAYERCOLPCT_COUNT:
2474 case CTSF_ROWPCT_VALIDN:
2475 case CTSF_COLPCT_VALIDN:
2476 case CTSF_TABLEPCT_VALIDN:
2477 case CTSF_SUBTABLEPCT_VALIDN:
2478 case CTSF_LAYERPCT_VALIDN:
2479 case CTSF_LAYERROWPCT_VALIDN:
2480 case CTSF_LAYERCOLPCT_VALIDN:
2481 case CTSF_ROWPCT_TOTALN:
2482 case CTSF_COLPCT_TOTALN:
2483 case CTSF_TABLEPCT_TOTALN:
2484 case CTSF_SUBTABLEPCT_TOTALN:
2485 case CTSF_LAYERPCT_TOTALN:
2486 case CTSF_LAYERROWPCT_TOTALN:
2487 case CTSF_LAYERCOLPCT_TOTALN:
2494 case CTSF_UROWPCT_COUNT:
2495 case CTSF_UCOLPCT_COUNT:
2496 case CTSF_UTABLEPCT_COUNT:
2497 case CTSF_USUBTABLEPCT_COUNT:
2498 case CTSF_ULAYERPCT_COUNT:
2499 case CTSF_ULAYERROWPCT_COUNT:
2500 case CTSF_ULAYERCOLPCT_COUNT:
2501 case CTSF_UROWPCT_VALIDN:
2502 case CTSF_UCOLPCT_VALIDN:
2503 case CTSF_UTABLEPCT_VALIDN:
2504 case CTSF_USUBTABLEPCT_VALIDN:
2505 case CTSF_ULAYERPCT_VALIDN:
2506 case CTSF_ULAYERROWPCT_VALIDN:
2507 case CTSF_ULAYERCOLPCT_VALIDN:
2508 case CTSF_UROWPCT_TOTALN:
2509 case CTSF_UCOLPCT_TOTALN:
2510 case CTSF_UTABLEPCT_TOTALN:
2511 case CTSF_USUBTABLEPCT_TOTALN:
2512 case CTSF_ULAYERPCT_TOTALN:
2513 case CTSF_ULAYERROWPCT_TOTALN:
2514 case CTSF_ULAYERCOLPCT_TOTALN:
2523 case CTSF_SUBTABLE_ID:
2525 case CTSF_LAYERROW_ID:
2526 case CTSF_LAYERCOL_ID:
2539 case CTSF_ROWPCT_SUM:
2540 case CTSF_COLPCT_SUM:
2541 case CTSF_TABLEPCT_SUM:
2542 case CTSF_SUBTABLEPCT_SUM:
2543 case CTSF_LAYERPCT_SUM:
2544 case CTSF_LAYERROWPCT_SUM:
2545 case CTSF_LAYERCOLPCT_SUM:
2550 case CTSF_UVARIANCE:
2551 case CTSF_UROWPCT_SUM:
2552 case CTSF_UCOLPCT_SUM:
2553 case CTSF_UTABLEPCT_SUM:
2554 case CTSF_USUBTABLEPCT_SUM:
2555 case CTSF_ULAYERPCT_SUM:
2556 case CTSF_ULAYERROWPCT_SUM:
2557 case CTSF_ULAYERCOLPCT_SUM:
2558 moments1_destroy (s->moments);
2567 casewriter_destroy (s->writer);
2573 ctables_summary_add (union ctables_summary *s,
2574 const struct ctables_summary_spec *ss,
2575 const struct variable *var, const union value *value,
2576 bool is_scale, bool is_scale_missing,
2577 bool is_missing, bool excluded_missing,
2578 double d_weight, double e_weight)
2580 /* To determine whether a case is included in a given table for a particular
2581 kind of summary, consider the following charts for each variable in the
2582 table. Only if "yes" appears for every variable for the summary is the
2585 Categorical variables: VALIDN COUNT TOTALN
2586 Valid values in included categories yes yes yes
2587 Missing values in included categories --- yes yes
2588 Missing values in excluded categories --- --- yes
2589 Valid values in excluded categories --- --- ---
2591 Scale variables: VALIDN COUNT TOTALN
2592 Valid value yes yes yes
2593 Missing value --- yes yes
2595 Missing values include both user- and system-missing. (The system-missing
2596 value is always in an excluded category.)
2598 switch (ss->function)
2601 case CTSF_ROWPCT_TOTALN:
2602 case CTSF_COLPCT_TOTALN:
2603 case CTSF_TABLEPCT_TOTALN:
2604 case CTSF_SUBTABLEPCT_TOTALN:
2605 case CTSF_LAYERPCT_TOTALN:
2606 case CTSF_LAYERROWPCT_TOTALN:
2607 case CTSF_LAYERCOLPCT_TOTALN:
2608 s->count += d_weight;
2612 case CTSF_UROWPCT_TOTALN:
2613 case CTSF_UCOLPCT_TOTALN:
2614 case CTSF_UTABLEPCT_TOTALN:
2615 case CTSF_USUBTABLEPCT_TOTALN:
2616 case CTSF_ULAYERPCT_TOTALN:
2617 case CTSF_ULAYERROWPCT_TOTALN:
2618 case CTSF_ULAYERCOLPCT_TOTALN:
2623 case CTSF_ROWPCT_COUNT:
2624 case CTSF_COLPCT_COUNT:
2625 case CTSF_TABLEPCT_COUNT:
2626 case CTSF_SUBTABLEPCT_COUNT:
2627 case CTSF_LAYERPCT_COUNT:
2628 case CTSF_LAYERROWPCT_COUNT:
2629 case CTSF_LAYERCOLPCT_COUNT:
2630 if (is_scale || !excluded_missing)
2631 s->count += d_weight;
2635 case CTSF_UROWPCT_COUNT:
2636 case CTSF_UCOLPCT_COUNT:
2637 case CTSF_UTABLEPCT_COUNT:
2638 case CTSF_USUBTABLEPCT_COUNT:
2639 case CTSF_ULAYERPCT_COUNT:
2640 case CTSF_ULAYERROWPCT_COUNT:
2641 case CTSF_ULAYERCOLPCT_COUNT:
2642 if (is_scale || !excluded_missing)
2647 case CTSF_ROWPCT_VALIDN:
2648 case CTSF_COLPCT_VALIDN:
2649 case CTSF_TABLEPCT_VALIDN:
2650 case CTSF_SUBTABLEPCT_VALIDN:
2651 case CTSF_LAYERPCT_VALIDN:
2652 case CTSF_LAYERROWPCT_VALIDN:
2653 case CTSF_LAYERCOLPCT_VALIDN:
2657 s->count += d_weight;
2661 case CTSF_UROWPCT_VALIDN:
2662 case CTSF_UCOLPCT_VALIDN:
2663 case CTSF_UTABLEPCT_VALIDN:
2664 case CTSF_USUBTABLEPCT_VALIDN:
2665 case CTSF_ULAYERPCT_VALIDN:
2666 case CTSF_ULAYERROWPCT_VALIDN:
2667 case CTSF_ULAYERCOLPCT_VALIDN:
2677 case CTSF_SUBTABLE_ID:
2679 case CTSF_LAYERROW_ID:
2680 case CTSF_LAYERCOL_ID:
2687 s->count += d_weight;
2698 if (is_scale || !excluded_missing)
2699 s->count += e_weight;
2706 s->count += e_weight;
2710 s->count += e_weight;
2716 if (!is_scale_missing)
2718 assert (!var_is_alpha (var)); /* XXX? */
2719 if (s->min == SYSMIS || value->f < s->min)
2721 if (s->max == SYSMIS || value->f > s->max)
2731 case CTSF_ROWPCT_SUM:
2732 case CTSF_COLPCT_SUM:
2733 case CTSF_TABLEPCT_SUM:
2734 case CTSF_SUBTABLEPCT_SUM:
2735 case CTSF_LAYERPCT_SUM:
2736 case CTSF_LAYERROWPCT_SUM:
2737 case CTSF_LAYERCOLPCT_SUM:
2738 if (!is_scale_missing)
2739 moments1_add (s->moments, value->f, e_weight);
2746 case CTSF_UVARIANCE:
2747 case CTSF_UROWPCT_SUM:
2748 case CTSF_UCOLPCT_SUM:
2749 case CTSF_UTABLEPCT_SUM:
2750 case CTSF_USUBTABLEPCT_SUM:
2751 case CTSF_ULAYERPCT_SUM:
2752 case CTSF_ULAYERROWPCT_SUM:
2753 case CTSF_ULAYERCOLPCT_SUM:
2754 if (!is_scale_missing)
2755 moments1_add (s->moments, value->f, 1.0);
2761 d_weight = e_weight = 1.0;
2766 if (!is_scale_missing)
2768 s->ovalid += e_weight;
2770 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2771 *case_num_rw_idx (c, 0) = value->f;
2772 *case_num_rw_idx (c, 1) = e_weight;
2773 casewriter_write (s->writer, c);
2779 static enum ctables_domain_type
2780 ctables_function_domain (enum ctables_summary_function function)
2810 case CTSF_UVARIANCE:
2816 case CTSF_COLPCT_COUNT:
2817 case CTSF_COLPCT_SUM:
2818 case CTSF_COLPCT_TOTALN:
2819 case CTSF_COLPCT_VALIDN:
2820 case CTSF_UCOLPCT_COUNT:
2821 case CTSF_UCOLPCT_SUM:
2822 case CTSF_UCOLPCT_TOTALN:
2823 case CTSF_UCOLPCT_VALIDN:
2827 case CTSF_LAYERCOLPCT_COUNT:
2828 case CTSF_LAYERCOLPCT_SUM:
2829 case CTSF_LAYERCOLPCT_TOTALN:
2830 case CTSF_LAYERCOLPCT_VALIDN:
2831 case CTSF_ULAYERCOLPCT_COUNT:
2832 case CTSF_ULAYERCOLPCT_SUM:
2833 case CTSF_ULAYERCOLPCT_TOTALN:
2834 case CTSF_ULAYERCOLPCT_VALIDN:
2835 case CTSF_LAYERCOL_ID:
2836 return CTDT_LAYERCOL;
2838 case CTSF_LAYERPCT_COUNT:
2839 case CTSF_LAYERPCT_SUM:
2840 case CTSF_LAYERPCT_TOTALN:
2841 case CTSF_LAYERPCT_VALIDN:
2842 case CTSF_ULAYERPCT_COUNT:
2843 case CTSF_ULAYERPCT_SUM:
2844 case CTSF_ULAYERPCT_TOTALN:
2845 case CTSF_ULAYERPCT_VALIDN:
2849 case CTSF_LAYERROWPCT_COUNT:
2850 case CTSF_LAYERROWPCT_SUM:
2851 case CTSF_LAYERROWPCT_TOTALN:
2852 case CTSF_LAYERROWPCT_VALIDN:
2853 case CTSF_ULAYERROWPCT_COUNT:
2854 case CTSF_ULAYERROWPCT_SUM:
2855 case CTSF_ULAYERROWPCT_TOTALN:
2856 case CTSF_ULAYERROWPCT_VALIDN:
2857 case CTSF_LAYERROW_ID:
2858 return CTDT_LAYERROW;
2860 case CTSF_ROWPCT_COUNT:
2861 case CTSF_ROWPCT_SUM:
2862 case CTSF_ROWPCT_TOTALN:
2863 case CTSF_ROWPCT_VALIDN:
2864 case CTSF_UROWPCT_COUNT:
2865 case CTSF_UROWPCT_SUM:
2866 case CTSF_UROWPCT_TOTALN:
2867 case CTSF_UROWPCT_VALIDN:
2871 case CTSF_SUBTABLEPCT_COUNT:
2872 case CTSF_SUBTABLEPCT_SUM:
2873 case CTSF_SUBTABLEPCT_TOTALN:
2874 case CTSF_SUBTABLEPCT_VALIDN:
2875 case CTSF_USUBTABLEPCT_COUNT:
2876 case CTSF_USUBTABLEPCT_SUM:
2877 case CTSF_USUBTABLEPCT_TOTALN:
2878 case CTSF_USUBTABLEPCT_VALIDN:
2879 case CTSF_SUBTABLE_ID:
2880 return CTDT_SUBTABLE;
2882 case CTSF_TABLEPCT_COUNT:
2883 case CTSF_TABLEPCT_SUM:
2884 case CTSF_TABLEPCT_TOTALN:
2885 case CTSF_TABLEPCT_VALIDN:
2886 case CTSF_UTABLEPCT_COUNT:
2887 case CTSF_UTABLEPCT_SUM:
2888 case CTSF_UTABLEPCT_TOTALN:
2889 case CTSF_UTABLEPCT_VALIDN:
2897 static enum ctables_domain_type
2898 ctables_function_is_pctsum (enum ctables_summary_function function)
2928 case CTSF_UVARIANCE:
2932 case CTSF_COLPCT_COUNT:
2933 case CTSF_COLPCT_TOTALN:
2934 case CTSF_COLPCT_VALIDN:
2935 case CTSF_UCOLPCT_COUNT:
2936 case CTSF_UCOLPCT_TOTALN:
2937 case CTSF_UCOLPCT_VALIDN:
2938 case CTSF_LAYERCOLPCT_COUNT:
2939 case CTSF_LAYERCOLPCT_TOTALN:
2940 case CTSF_LAYERCOLPCT_VALIDN:
2941 case CTSF_ULAYERCOLPCT_COUNT:
2942 case CTSF_ULAYERCOLPCT_TOTALN:
2943 case CTSF_ULAYERCOLPCT_VALIDN:
2944 case CTSF_LAYERPCT_COUNT:
2945 case CTSF_LAYERPCT_TOTALN:
2946 case CTSF_LAYERPCT_VALIDN:
2947 case CTSF_ULAYERPCT_COUNT:
2948 case CTSF_ULAYERPCT_TOTALN:
2949 case CTSF_ULAYERPCT_VALIDN:
2950 case CTSF_LAYERROWPCT_COUNT:
2951 case CTSF_LAYERROWPCT_TOTALN:
2952 case CTSF_LAYERROWPCT_VALIDN:
2953 case CTSF_ULAYERROWPCT_COUNT:
2954 case CTSF_ULAYERROWPCT_TOTALN:
2955 case CTSF_ULAYERROWPCT_VALIDN:
2956 case CTSF_ROWPCT_COUNT:
2957 case CTSF_ROWPCT_TOTALN:
2958 case CTSF_ROWPCT_VALIDN:
2959 case CTSF_UROWPCT_COUNT:
2960 case CTSF_UROWPCT_TOTALN:
2961 case CTSF_UROWPCT_VALIDN:
2962 case CTSF_SUBTABLEPCT_COUNT:
2963 case CTSF_SUBTABLEPCT_TOTALN:
2964 case CTSF_SUBTABLEPCT_VALIDN:
2965 case CTSF_USUBTABLEPCT_COUNT:
2966 case CTSF_USUBTABLEPCT_TOTALN:
2967 case CTSF_USUBTABLEPCT_VALIDN:
2968 case CTSF_TABLEPCT_COUNT:
2969 case CTSF_TABLEPCT_TOTALN:
2970 case CTSF_TABLEPCT_VALIDN:
2971 case CTSF_UTABLEPCT_COUNT:
2972 case CTSF_UTABLEPCT_TOTALN:
2973 case CTSF_UTABLEPCT_VALIDN:
2977 case CTSF_SUBTABLE_ID:
2979 case CTSF_LAYERROW_ID:
2980 case CTSF_LAYERCOL_ID:
2983 case CTSF_COLPCT_SUM:
2984 case CTSF_UCOLPCT_SUM:
2985 case CTSF_LAYERCOLPCT_SUM:
2986 case CTSF_ULAYERCOLPCT_SUM:
2987 case CTSF_LAYERPCT_SUM:
2988 case CTSF_ULAYERPCT_SUM:
2989 case CTSF_LAYERROWPCT_SUM:
2990 case CTSF_ULAYERROWPCT_SUM:
2991 case CTSF_ROWPCT_SUM:
2992 case CTSF_UROWPCT_SUM:
2993 case CTSF_SUBTABLEPCT_SUM:
2994 case CTSF_USUBTABLEPCT_SUM:
2995 case CTSF_TABLEPCT_SUM:
2996 case CTSF_UTABLEPCT_SUM:
3004 ctables_summary_value (const struct ctables_cell *cell,
3005 union ctables_summary *s,
3006 const struct ctables_summary_spec *ss)
3008 switch (ss->function)
3018 case CTSF_SUBTABLE_ID:
3020 case CTSF_LAYERROW_ID:
3021 case CTSF_LAYERCOL_ID:
3022 return cell->domains[ctables_function_domain (ss->function)]->sequence;
3024 case CTSF_ROWPCT_COUNT:
3025 case CTSF_COLPCT_COUNT:
3026 case CTSF_TABLEPCT_COUNT:
3027 case CTSF_SUBTABLEPCT_COUNT:
3028 case CTSF_LAYERPCT_COUNT:
3029 case CTSF_LAYERROWPCT_COUNT:
3030 case CTSF_LAYERCOLPCT_COUNT:
3032 enum ctables_domain_type d = ctables_function_domain (ss->function);
3033 return (cell->domains[d]->e_count
3034 ? s->count / cell->domains[d]->e_count * 100
3038 case CTSF_UROWPCT_COUNT:
3039 case CTSF_UCOLPCT_COUNT:
3040 case CTSF_UTABLEPCT_COUNT:
3041 case CTSF_USUBTABLEPCT_COUNT:
3042 case CTSF_ULAYERPCT_COUNT:
3043 case CTSF_ULAYERROWPCT_COUNT:
3044 case CTSF_ULAYERCOLPCT_COUNT:
3046 enum ctables_domain_type d = ctables_function_domain (ss->function);
3047 return (cell->domains[d]->u_count
3048 ? s->count / cell->domains[d]->u_count * 100
3052 case CTSF_ROWPCT_VALIDN:
3053 case CTSF_COLPCT_VALIDN:
3054 case CTSF_TABLEPCT_VALIDN:
3055 case CTSF_SUBTABLEPCT_VALIDN:
3056 case CTSF_LAYERPCT_VALIDN:
3057 case CTSF_LAYERROWPCT_VALIDN:
3058 case CTSF_LAYERCOLPCT_VALIDN:
3060 enum ctables_domain_type d = ctables_function_domain (ss->function);
3061 return (cell->domains[d]->e_valid
3062 ? s->count / cell->domains[d]->e_valid * 100
3066 case CTSF_UROWPCT_VALIDN:
3067 case CTSF_UCOLPCT_VALIDN:
3068 case CTSF_UTABLEPCT_VALIDN:
3069 case CTSF_USUBTABLEPCT_VALIDN:
3070 case CTSF_ULAYERPCT_VALIDN:
3071 case CTSF_ULAYERROWPCT_VALIDN:
3072 case CTSF_ULAYERCOLPCT_VALIDN:
3074 enum ctables_domain_type d = ctables_function_domain (ss->function);
3075 return (cell->domains[d]->u_valid
3076 ? s->count / cell->domains[d]->u_valid * 100
3080 case CTSF_ROWPCT_TOTALN:
3081 case CTSF_COLPCT_TOTALN:
3082 case CTSF_TABLEPCT_TOTALN:
3083 case CTSF_SUBTABLEPCT_TOTALN:
3084 case CTSF_LAYERPCT_TOTALN:
3085 case CTSF_LAYERROWPCT_TOTALN:
3086 case CTSF_LAYERCOLPCT_TOTALN:
3088 enum ctables_domain_type d = ctables_function_domain (ss->function);
3089 return (cell->domains[d]->e_total
3090 ? s->count / cell->domains[d]->e_total * 100
3094 case CTSF_UROWPCT_TOTALN:
3095 case CTSF_UCOLPCT_TOTALN:
3096 case CTSF_UTABLEPCT_TOTALN:
3097 case CTSF_USUBTABLEPCT_TOTALN:
3098 case CTSF_ULAYERPCT_TOTALN:
3099 case CTSF_ULAYERROWPCT_TOTALN:
3100 case CTSF_ULAYERCOLPCT_TOTALN:
3102 enum ctables_domain_type d = ctables_function_domain (ss->function);
3103 return (cell->domains[d]->u_total
3104 ? s->count / cell->domains[d]->u_total * 100
3125 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3131 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3138 double weight, variance;
3139 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3140 return calc_semean (variance, weight);
3147 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3148 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3154 double weight, mean;
3155 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3156 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3160 case CTSF_UVARIANCE:
3163 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3167 case CTSF_ROWPCT_SUM:
3168 case CTSF_COLPCT_SUM:
3169 case CTSF_TABLEPCT_SUM:
3170 case CTSF_SUBTABLEPCT_SUM:
3171 case CTSF_LAYERPCT_SUM:
3172 case CTSF_LAYERROWPCT_SUM:
3173 case CTSF_LAYERCOLPCT_SUM:
3175 double weight, mean;
3176 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3177 if (weight == SYSMIS || mean == SYSMIS)
3179 enum ctables_domain_type d = ctables_function_domain (ss->function);
3180 double num = weight * mean;
3181 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3182 return denom != 0 ? num / denom * 100 : SYSMIS;
3184 case CTSF_UROWPCT_SUM:
3185 case CTSF_UCOLPCT_SUM:
3186 case CTSF_UTABLEPCT_SUM:
3187 case CTSF_USUBTABLEPCT_SUM:
3188 case CTSF_ULAYERPCT_SUM:
3189 case CTSF_ULAYERROWPCT_SUM:
3190 case CTSF_ULAYERCOLPCT_SUM:
3192 double weight, mean;
3193 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3194 if (weight == SYSMIS || mean == SYSMIS)
3196 enum ctables_domain_type d = ctables_function_domain (ss->function);
3197 double num = weight * mean;
3198 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3199 return denom != 0 ? num / denom * 100 : SYSMIS;
3208 struct casereader *reader = casewriter_make_reader (s->writer);
3211 struct percentile *ptile = percentile_create (
3212 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3213 struct order_stats *os = &ptile->parent;
3214 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3215 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3216 statistic_destroy (&ptile->parent.parent);
3224 struct casereader *reader = casewriter_make_reader (s->writer);
3227 struct mode *mode = mode_create ();
3228 struct order_stats *os = &mode->parent;
3229 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3230 s->ovalue = mode->mode;
3231 statistic_destroy (&mode->parent.parent);
3239 struct ctables_cell_sort_aux
3241 const struct ctables_nest *nest;
3242 enum pivot_axis_type a;
3246 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3248 const struct ctables_cell_sort_aux *aux = aux_;
3249 struct ctables_cell *const *ap = a_;
3250 struct ctables_cell *const *bp = b_;
3251 const struct ctables_cell *a = *ap;
3252 const struct ctables_cell *b = *bp;
3254 const struct ctables_nest *nest = aux->nest;
3255 for (size_t i = 0; i < nest->n; i++)
3256 if (i != nest->scale_idx)
3258 const struct variable *var = nest->vars[i];
3259 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3260 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3261 if (a_cv->category != b_cv->category)
3262 return a_cv->category > b_cv->category ? 1 : -1;
3264 const union value *a_val = &a_cv->value;
3265 const union value *b_val = &b_cv->value;
3266 switch (a_cv->category->type)
3272 case CCT_POSTCOMPUTE:
3273 case CCT_EXCLUDED_MISSING:
3274 /* Must be equal. */
3282 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3290 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3292 return a_cv->category->sort_ascending ? cmp : -cmp;
3298 const char *a_label = var_lookup_value_label (var, a_val);
3299 const char *b_label = var_lookup_value_label (var, b_val);
3305 cmp = strcmp (a_label, b_label);
3311 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3314 return a_cv->category->sort_ascending ? cmp : -cmp;
3326 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3327 const void *aux UNUSED)
3329 struct ctables_cell *const *ap = a_;
3330 struct ctables_cell *const *bp = b_;
3331 const struct ctables_cell *a = *ap;
3332 const struct ctables_cell *b = *bp;
3334 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3336 int al = a->axes[axis].leaf;
3337 int bl = b->axes[axis].leaf;
3339 return al > bl ? 1 : -1;
3347 For each ctables_table:
3348 For each combination of row vars:
3349 For each combination of column vars:
3350 For each combination of layer vars:
3352 Make a table of row values:
3353 Sort entries by row values
3354 Assign a 0-based index to each actual value
3355 Construct a dimension
3356 Make a table of column values
3357 Make a table of layer values
3359 Fill the table entry using the indexes from before.
3362 static struct ctables_domain *
3363 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3364 enum ctables_domain_type domain)
3367 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3369 const struct ctables_nest *nest = s->nests[a];
3370 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3372 size_t v_idx = nest->domains[domain][i];
3373 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3374 hash = hash_pointer (cv->category, hash);
3375 if (cv->category->type != CCT_TOTAL
3376 && cv->category->type != CCT_SUBTOTAL
3377 && cv->category->type != CCT_POSTCOMPUTE)
3378 hash = value_hash (&cv->value,
3379 var_get_width (nest->vars[v_idx]), hash);
3383 struct ctables_domain *d;
3384 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3386 const struct ctables_cell *df = d->example;
3387 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3389 const struct ctables_nest *nest = s->nests[a];
3390 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3392 size_t v_idx = nest->domains[domain][i];
3393 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3394 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3395 if (cv1->category != cv2->category
3396 || (cv1->category->type != CCT_TOTAL
3397 && cv1->category->type != CCT_SUBTOTAL
3398 && cv1->category->type != CCT_POSTCOMPUTE
3399 && !value_equal (&cv1->value, &cv2->value,
3400 var_get_width (nest->vars[v_idx]))))
3409 struct ctables_sum *sums = (s->table->n_sum_vars
3410 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3413 d = xmalloc (sizeof *d);
3414 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3415 hmap_insert (&s->domains[domain], &d->node, hash);
3419 static struct substring
3420 rtrim_value (const union value *v, const struct variable *var)
3422 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3423 var_get_width (var));
3424 ss_rtrim (&s, ss_cstr (" "));
3429 in_string_range (const union value *v, const struct variable *var,
3430 const struct substring *srange)
3432 struct substring s = rtrim_value (v, var);
3433 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3434 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3437 static const struct ctables_category *
3438 ctables_categories_match (const struct ctables_categories *c,
3439 const union value *v, const struct variable *var)
3441 if (var_is_numeric (var) && v->f == SYSMIS)
3444 const struct ctables_category *othernm = NULL;
3445 for (size_t i = c->n_cats; i-- > 0; )
3447 const struct ctables_category *cat = &c->cats[i];
3451 if (cat->number == v->f)
3456 if (ss_equals (cat->string, rtrim_value (v, var)))
3461 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3462 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3467 if (in_string_range (v, var, cat->srange))
3472 if (var_is_value_missing (var, v))
3476 case CCT_POSTCOMPUTE:
3491 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3494 case CCT_EXCLUDED_MISSING:
3499 return var_is_value_missing (var, v) ? NULL : othernm;
3502 static const struct ctables_category *
3503 ctables_categories_total (const struct ctables_categories *c)
3505 const struct ctables_category *first = &c->cats[0];
3506 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3507 return (first->type == CCT_TOTAL ? first
3508 : last->type == CCT_TOTAL ? last
3512 static struct ctables_cell *
3513 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3514 const struct ctables_category *cats[PIVOT_N_AXES][10])
3517 enum ctables_summary_variant sv = CSV_CELL;
3518 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3520 const struct ctables_nest *nest = s->nests[a];
3521 for (size_t i = 0; i < nest->n; i++)
3522 if (i != nest->scale_idx)
3524 hash = hash_pointer (cats[a][i], hash);
3525 if (cats[a][i]->type != CCT_TOTAL
3526 && cats[a][i]->type != CCT_SUBTOTAL
3527 && cats[a][i]->type != CCT_POSTCOMPUTE)
3528 hash = value_hash (case_data (c, nest->vars[i]),
3529 var_get_width (nest->vars[i]), hash);
3535 struct ctables_cell *cell;
3536 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3538 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3540 const struct ctables_nest *nest = s->nests[a];
3541 for (size_t i = 0; i < nest->n; i++)
3542 if (i != nest->scale_idx
3543 && (cats[a][i] != cell->axes[a].cvs[i].category
3544 || (cats[a][i]->type != CCT_TOTAL
3545 && cats[a][i]->type != CCT_SUBTOTAL
3546 && cats[a][i]->type != CCT_POSTCOMPUTE
3547 && !value_equal (case_data (c, nest->vars[i]),
3548 &cell->axes[a].cvs[i].value,
3549 var_get_width (nest->vars[i])))))
3558 cell = xmalloc (sizeof *cell);
3561 cell->omit_domains = 0;
3562 cell->postcompute = false;
3563 //struct string name = DS_EMPTY_INITIALIZER;
3564 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3566 const struct ctables_nest *nest = s->nests[a];
3567 cell->axes[a].cvs = (nest->n
3568 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3570 for (size_t i = 0; i < nest->n; i++)
3572 const struct ctables_category *cat = cats[a][i];
3573 const struct variable *var = nest->vars[i];
3574 const union value *value = case_data (c, var);
3575 if (i != nest->scale_idx)
3577 const struct ctables_category *subtotal = cat->subtotal;
3578 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3581 if (cat->type == CCT_TOTAL
3582 || cat->type == CCT_SUBTOTAL
3583 || cat->type == CCT_POSTCOMPUTE)
3585 /* XXX these should be more encompassing I think.*/
3589 case PIVOT_AXIS_COLUMN:
3590 cell->omit_domains |= ((1u << CTDT_TABLE) |
3591 (1u << CTDT_LAYER) |
3592 (1u << CTDT_LAYERCOL) |
3593 (1u << CTDT_SUBTABLE) |
3596 case PIVOT_AXIS_ROW:
3597 cell->omit_domains |= ((1u << CTDT_TABLE) |
3598 (1u << CTDT_LAYER) |
3599 (1u << CTDT_LAYERROW) |
3600 (1u << CTDT_SUBTABLE) |
3603 case PIVOT_AXIS_LAYER:
3604 cell->omit_domains |= ((1u << CTDT_TABLE) |
3605 (1u << CTDT_LAYER));
3609 if (cat->type == CCT_POSTCOMPUTE)
3610 cell->postcompute = true;
3613 cell->axes[a].cvs[i].category = cat;
3614 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3617 if (i != nest->scale_idx)
3619 if (!ds_is_empty (&name))
3620 ds_put_cstr (&name, ", ");
3621 char *value_s = data_out (value, var_get_encoding (var),
3622 var_get_print_format (var),
3623 settings_get_fmt_settings ());
3624 if (cat->type == CCT_TOTAL
3625 || cat->type == CCT_SUBTOTAL
3626 || cat->type == CCT_POSTCOMPUTE)
3627 ds_put_format (&name, "%s=total", var_get_name (var));
3629 ds_put_format (&name, "%s=%s", var_get_name (var),
3630 value_s + strspn (value_s, " "));
3636 //cell->name = ds_steal_cstr (&name);
3638 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3639 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3640 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3641 for (size_t i = 0; i < specs->n; i++)
3642 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3643 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3644 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3645 hmap_insert (&s->cells, &cell->node, hash);
3650 is_scale_missing (const struct ctables_summary_spec_set *specs,
3651 const struct ccase *c)
3653 if (!specs->is_scale)
3656 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3659 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3661 const struct variable *var = specs->listwise_vars[i];
3662 if (var_is_num_missing (var, case_num (c, var)))
3670 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3671 const struct ctables_category *cats[PIVOT_N_AXES][10],
3672 bool is_missing, bool excluded_missing,
3673 double d_weight, double e_weight)
3675 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3676 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3678 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3680 bool scale_missing = is_scale_missing (specs, c);
3681 for (size_t i = 0; i < specs->n; i++)
3682 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3683 specs->var, case_data (c, specs->var), specs->is_scale,
3684 scale_missing, is_missing, excluded_missing,
3685 d_weight, e_weight);
3686 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3687 if (!(cell->omit_domains && (1u << dt)))
3689 struct ctables_domain *d = cell->domains[dt];
3690 d->d_total += d_weight;
3691 d->e_total += e_weight;
3693 if (!excluded_missing)
3695 d->d_count += d_weight;
3696 d->e_count += e_weight;
3701 d->d_valid += d_weight;
3702 d->e_valid += e_weight;
3705 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3707 /* XXX listwise_missing??? */
3708 const struct variable *var = s->table->sum_vars[i];
3709 double addend = case_num (c, var);
3710 if (!var_is_num_missing (var, addend))
3712 struct ctables_sum *sum = &d->sums[i];
3713 sum->e_sum += addend * e_weight;
3714 sum->u_sum += addend;
3722 recurse_totals (struct ctables_section *s, const struct ccase *c,
3723 const struct ctables_category *cats[PIVOT_N_AXES][10],
3724 bool is_missing, bool excluded_missing,
3725 double d_weight, double e_weight,
3726 enum pivot_axis_type start_axis, size_t start_nest)
3728 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3730 const struct ctables_nest *nest = s->nests[a];
3731 for (size_t i = start_nest; i < nest->n; i++)
3733 if (i == nest->scale_idx)
3736 const struct variable *var = nest->vars[i];
3738 const struct ctables_category *total = ctables_categories_total (
3739 s->table->categories[var_get_dict_index (var)]);
3742 const struct ctables_category *save = cats[a][i];
3744 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3745 d_weight, e_weight);
3746 recurse_totals (s, c, cats, is_missing, excluded_missing,
3747 d_weight, e_weight, a, i + 1);
3756 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3757 const struct ctables_category *cats[PIVOT_N_AXES][10],
3758 bool is_missing, bool excluded_missing,
3759 double d_weight, double e_weight,
3760 enum pivot_axis_type start_axis, size_t start_nest)
3762 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3764 const struct ctables_nest *nest = s->nests[a];
3765 for (size_t i = start_nest; i < nest->n; i++)
3767 if (i == nest->scale_idx)
3770 const struct ctables_category *save = cats[a][i];
3773 cats[a][i] = save->subtotal;
3774 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3775 d_weight, e_weight);
3776 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3777 d_weight, e_weight, a, i + 1);
3786 ctables_add_occurrence (const struct variable *var,
3787 const union value *value,
3788 struct hmap *occurrences)
3790 int width = var_get_width (var);
3791 unsigned int hash = value_hash (value, width, 0);
3793 struct ctables_occurrence *o;
3794 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3796 if (value_equal (value, &o->value, width))
3799 o = xmalloc (sizeof *o);
3800 value_clone (&o->value, value, width);
3801 hmap_insert (occurrences, &o->node, hash);
3805 ctables_cell_insert (struct ctables_section *s,
3806 const struct ccase *c,
3807 double d_weight, double e_weight)
3809 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3811 /* Does at least one categorical variable have a missing value in an included
3812 or excluded category? */
3813 bool is_missing = false;
3815 /* Does at least one categorical variable have a missing value in an excluded
3817 bool excluded_missing = false;
3819 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3821 const struct ctables_nest *nest = s->nests[a];
3822 for (size_t i = 0; i < nest->n; i++)
3824 if (i == nest->scale_idx)
3827 const struct variable *var = nest->vars[i];
3828 const union value *value = case_data (c, var);
3830 bool var_missing = var_is_value_missing (var, value) != 0;
3834 cats[a][i] = ctables_categories_match (
3835 s->table->categories[var_get_dict_index (var)], value, var);
3841 static const struct ctables_category cct_excluded_missing = {
3842 .type = CCT_EXCLUDED_MISSING,
3845 cats[a][i] = &cct_excluded_missing;
3846 excluded_missing = true;
3851 if (!excluded_missing)
3852 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3854 const struct ctables_nest *nest = s->nests[a];
3855 for (size_t i = 0; i < nest->n; i++)
3856 if (i != nest->scale_idx)
3858 const struct variable *var = nest->vars[i];
3859 const union value *value = case_data (c, var);
3860 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3864 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3865 d_weight, e_weight);
3867 //if (!excluded_missing)
3869 recurse_totals (s, c, cats, is_missing, excluded_missing,
3870 d_weight, e_weight, 0, 0);
3871 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3872 d_weight, e_weight, 0, 0);
3878 const struct ctables_summary_spec_set *set;
3883 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3885 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3886 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3887 if (as->function != bs->function)
3888 return as->function > bs->function ? 1 : -1;
3889 else if (as->percentile != bs->percentile)
3890 return as->percentile < bs->percentile ? 1 : -1;
3892 const char *as_label = as->label ? as->label : "";
3893 const char *bs_label = bs->label ? bs->label : "";
3894 return strcmp (as_label, bs_label);
3898 ctables_category_format_number (double number, const struct variable *var,
3901 struct pivot_value *pv = pivot_value_new_var_value (
3902 var, &(union value) { .f = number });
3903 pivot_value_format (pv, NULL, s);
3904 pivot_value_destroy (pv);
3908 ctables_category_format_string (struct substring string,
3909 const struct variable *var, struct string *out)
3911 int width = var_get_width (var);
3912 char *s = xmalloc (width);
3913 buf_copy_rpad (s, width, string.string, string.length, ' ');
3914 struct pivot_value *pv = pivot_value_new_var_value (
3915 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3916 pivot_value_format (pv, NULL, out);
3917 pivot_value_destroy (pv);
3922 ctables_category_format_label (const struct ctables_category *cat,
3923 const struct variable *var,
3929 ctables_category_format_number (cat->number, var, s);
3933 ctables_category_format_string (cat->string, var, s);
3937 ctables_category_format_number (cat->nrange[0], var, s);
3938 ds_put_format (s, " THRU ");
3939 ctables_category_format_number (cat->nrange[1], var, s);
3943 ctables_category_format_string (cat->srange[0], var, s);
3944 ds_put_format (s, " THRU ");
3945 ctables_category_format_string (cat->srange[1], var, s);
3949 ds_put_cstr (s, "MISSING");
3953 ds_put_cstr (s, "OTHERNM");
3956 case CCT_POSTCOMPUTE:
3957 ds_put_format (s, "&%s", cat->pc->name);
3962 ds_put_cstr (s, cat->total_label);
3968 case CCT_EXCLUDED_MISSING:
3975 static struct pivot_value *
3976 ctables_postcompute_label (const struct ctables_categories *cats,
3977 const struct ctables_category *cat,
3978 const struct variable *var)
3980 struct substring in = ss_cstr (cat->pc->label);
3981 struct substring target = ss_cstr (")LABEL[");
3983 struct string out = DS_EMPTY_INITIALIZER;
3986 size_t chunk = ss_find_substring (in, target);
3987 if (chunk == SIZE_MAX)
3989 if (ds_is_empty (&out))
3990 return pivot_value_new_user_text (in.string, in.length);
3993 ds_put_substring (&out, in);
3994 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3998 ds_put_substring (&out, ss_head (in, chunk));
3999 ss_advance (&in, chunk + target.length);
4001 struct substring idx_s;
4002 if (!ss_get_until (&in, ']', &idx_s))
4005 long int idx = strtol (idx_s.string, &tail, 10);
4006 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4009 struct ctables_category *cat2 = &cats->cats[idx - 1];
4010 if (!ctables_category_format_label (cat2, var, &out))
4016 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4019 static struct pivot_value *
4020 ctables_category_create_value_label (const struct ctables_categories *cats,
4021 const struct ctables_category *cat,
4022 const struct variable *var,
4023 const union value *value)
4025 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4026 ? ctables_postcompute_label (cats, cat, var)
4027 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4028 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4029 : pivot_value_new_var_value (var, value));
4032 static struct ctables_value *
4033 ctables_value_find__ (struct ctables_table *t, const union value *value,
4034 int width, unsigned int hash)
4036 struct ctables_value *clv;
4037 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4038 hash, &t->clabels_values_map)
4039 if (value_equal (value, &clv->value, width))
4045 ctables_value_insert (struct ctables_table *t, const union value *value,
4048 unsigned int hash = value_hash (value, width, 0);
4049 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4052 clv = xmalloc (sizeof *clv);
4053 value_clone (&clv->value, value, width);
4054 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4058 static struct ctables_value *
4059 ctables_value_find (struct ctables_table *t,
4060 const union value *value, int width)
4062 return ctables_value_find__ (t, value, width,
4063 value_hash (value, width, 0));
4067 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4068 size_t ix[PIVOT_N_AXES])
4070 if (a < PIVOT_N_AXES)
4072 size_t limit = MAX (t->stacks[a].n, 1);
4073 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4074 ctables_table_add_section (t, a + 1, ix);
4078 struct ctables_section *s = &t->sections[t->n_sections++];
4079 *s = (struct ctables_section) {
4081 .cells = HMAP_INITIALIZER (s->cells),
4083 for (a = 0; a < PIVOT_N_AXES; a++)
4086 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4088 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4089 for (size_t i = 0; i < nest->n; i++)
4090 hmap_init (&s->occurrences[a][i]);
4092 for (size_t i = 0; i < N_CTDTS; i++)
4093 hmap_init (&s->domains[i]);
4098 ctpo_add (double a, double b)
4104 ctpo_sub (double a, double b)
4110 ctpo_mul (double a, double b)
4116 ctpo_div (double a, double b)
4118 return b ? a / b : SYSMIS;
4122 ctpo_pow (double a, double b)
4124 int save_errno = errno;
4126 double result = pow (a, b);
4134 ctpo_neg (double a, double b UNUSED)
4139 struct ctables_pcexpr_evaluate_ctx
4141 const struct ctables_cell *cell;
4142 const struct ctables_section *section;
4143 const struct ctables_categories *cats;
4144 enum pivot_axis_type pc_a;
4147 enum fmt_type parse_format;
4150 static double ctables_pcexpr_evaluate (
4151 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4154 ctables_pcexpr_evaluate_nonterminal (
4155 const struct ctables_pcexpr_evaluate_ctx *ctx,
4156 const struct ctables_pcexpr *e, size_t n_args,
4157 double evaluate (double, double))
4159 double args[2] = { 0, 0 };
4160 for (size_t i = 0; i < n_args; i++)
4162 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4163 if (!isfinite (args[i]) || args[i] == SYSMIS)
4166 return evaluate (args[0], args[1]);
4170 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4171 const struct ctables_cell_value *pc_cv)
4173 const struct ctables_section *s = ctx->section;
4176 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4178 const struct ctables_nest *nest = s->nests[a];
4179 for (size_t i = 0; i < nest->n; i++)
4180 if (i != nest->scale_idx)
4182 const struct ctables_cell_value *cv
4183 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4184 : &ctx->cell->axes[a].cvs[i]);
4185 hash = hash_pointer (cv->category, hash);
4186 if (cv->category->type != CCT_TOTAL
4187 && cv->category->type != CCT_SUBTOTAL
4188 && cv->category->type != CCT_POSTCOMPUTE)
4189 hash = value_hash (&cv->value,
4190 var_get_width (nest->vars[i]), hash);
4194 struct ctables_cell *tc;
4195 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4197 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4199 const struct ctables_nest *nest = s->nests[a];
4200 for (size_t i = 0; i < nest->n; i++)
4201 if (i != nest->scale_idx)
4203 const struct ctables_cell_value *p_cv
4204 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4205 : &ctx->cell->axes[a].cvs[i]);
4206 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4207 if (p_cv->category != t_cv->category
4208 || (p_cv->category->type != CCT_TOTAL
4209 && p_cv->category->type != CCT_SUBTOTAL
4210 && p_cv->category->type != CCT_POSTCOMPUTE
4211 && !value_equal (&p_cv->value,
4213 var_get_width (nest->vars[i]))))
4225 const struct ctables_table *t = s->table;
4226 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4227 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4228 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4229 &specs->specs[ctx->summary_idx]);
4233 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4234 const struct ctables_pcexpr *e)
4241 case CTPO_CAT_NRANGE:
4242 case CTPO_CAT_SRANGE:
4243 case CTPO_CAT_MISSING:
4244 case CTPO_CAT_OTHERNM:
4246 struct ctables_cell_value cv = {
4247 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4249 assert (cv.category != NULL);
4251 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4252 const struct ctables_occurrence *o;
4255 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4256 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4257 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4259 cv.value = o->value;
4260 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4265 case CTPO_CAT_NUMBER:
4266 case CTPO_CAT_SUBTOTAL:
4267 case CTPO_CAT_TOTAL:
4269 struct ctables_cell_value cv = {
4270 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4271 .value = { .f = e->number },
4273 assert (cv.category != NULL);
4274 return ctables_pcexpr_evaluate_category (ctx, &cv);
4277 case CTPO_CAT_STRING:
4279 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4281 if (width > e->string.length)
4283 s = xmalloc (width);
4284 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4287 const struct ctables_category *category
4288 = ctables_find_category_for_postcompute (
4289 ctx->section->table->ctables->dict,
4290 ctx->cats, ctx->parse_format, e);
4291 assert (category != NULL);
4293 struct ctables_cell_value cv = { .category = category };
4294 if (category->type == CCT_NUMBER)
4295 cv.value.f = category->number;
4296 else if (category->type == CCT_STRING)
4297 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
4301 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4307 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4310 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4313 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4316 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4319 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4322 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4328 static const struct ctables_category *
4329 ctables_cell_postcompute (const struct ctables_section *s,
4330 const struct ctables_cell *cell,
4331 enum pivot_axis_type *pc_a_p,
4334 assert (cell->postcompute);
4335 const struct ctables_category *pc_cat = NULL;
4336 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4337 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4339 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4340 if (cv->category->type == CCT_POSTCOMPUTE)
4344 /* Multiple postcomputes cross each other. The value is
4349 pc_cat = cv->category;
4353 *pc_a_idx_p = pc_a_idx;
4357 assert (pc_cat != NULL);
4362 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4363 const struct ctables_cell *cell,
4364 const struct ctables_summary_spec *ss,
4365 struct fmt_spec *format,
4366 bool *is_ctables_format,
4369 enum pivot_axis_type pc_a = 0;
4370 size_t pc_a_idx = 0;
4371 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4372 s, cell, &pc_a, &pc_a_idx);
4376 const struct ctables_postcompute *pc = pc_cat->pc;
4379 for (size_t i = 0; i < pc->specs->n; i++)
4381 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4382 if (ss->function == ss2->function
4383 && ss->percentile == ss2->percentile)
4385 *format = ss2->format;
4386 *is_ctables_format = ss2->is_ctables_format;
4392 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4393 const struct ctables_categories *cats = s->table->categories[
4394 var_get_dict_index (var)];
4395 struct ctables_pcexpr_evaluate_ctx ctx = {
4400 .pc_a_idx = pc_a_idx,
4401 .summary_idx = summary_idx,
4402 .parse_format = pc_cat->parse_format,
4404 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4408 ctables_format (double d, const struct fmt_spec *format,
4409 const struct fmt_settings *settings)
4411 const union value v = { .f = d };
4412 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4414 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4415 produce the results we want for negative numbers, putting the negative
4416 sign in the wrong spot, before the prefix instead of after it. We can't,
4417 in fact, produce the desired results using a custom-currency
4418 specification. Instead, we postprocess the output, moving the negative
4421 NEQUAL: "-N=3" => "N=-3"
4422 PAREN: "-(3)" => "(-3)"
4423 PCTPAREN: "-(3%)" => "(-3%)"
4425 This transformation doesn't affect NEGPAREN. */
4426 char *minus_src = strchr (s, '-');
4427 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4429 char *n_equals = strstr (s, "N=");
4430 char *lparen = strchr (s, '(');
4431 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4433 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4439 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4441 for (size_t i = 0; i < t->stacks[a].n; i++)
4443 struct ctables_nest *nest = &t->stacks[a].nests[i];
4444 if (nest->n != 1 || nest->scale_idx != 0)
4447 enum ctables_vlabel vlabel
4448 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4449 if (vlabel != CTVL_NONE)
4456 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4458 struct pivot_table *pt = pivot_table_create__ (
4460 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4461 : pivot_value_new_text (N_("Custom Tables"))),
4464 pivot_table_set_caption (
4465 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4467 pivot_table_set_corner_text (
4468 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4470 bool summary_dimension = (t->summary_axis != t->slabels_axis
4471 || (!t->slabels_visible
4472 && t->summary_specs.n > 1));
4473 if (summary_dimension)
4475 struct pivot_dimension *d = pivot_dimension_create (
4476 pt, t->slabels_axis, N_("Statistics"));
4477 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4478 if (!t->slabels_visible)
4479 d->hide_all_labels = true;
4480 for (size_t i = 0; i < specs->n; i++)
4481 pivot_category_create_leaf (
4482 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4485 bool categories_dimension = t->clabels_example != NULL;
4486 if (categories_dimension)
4488 struct pivot_dimension *d = pivot_dimension_create (
4489 pt, t->label_axis[t->clabels_from_axis],
4490 t->clabels_from_axis == PIVOT_AXIS_ROW
4491 ? N_("Row Categories")
4492 : N_("Column Categories"));
4493 const struct variable *var = t->clabels_example;
4494 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4495 for (size_t i = 0; i < t->n_clabels_values; i++)
4497 const struct ctables_value *value = t->clabels_values[i];
4498 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4499 assert (cat != NULL);
4500 pivot_category_create_leaf (
4501 d->root, ctables_category_create_value_label (c, cat,
4507 pivot_table_set_look (pt, ct->look);
4508 struct pivot_dimension *d[PIVOT_N_AXES];
4509 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4511 static const char *names[] = {
4512 [PIVOT_AXIS_ROW] = N_("Rows"),
4513 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4514 [PIVOT_AXIS_LAYER] = N_("Layers"),
4516 d[a] = (t->axes[a] || a == t->summary_axis
4517 ? pivot_dimension_create (pt, a, names[a])
4522 assert (t->axes[a]);
4524 for (size_t i = 0; i < t->stacks[a].n; i++)
4526 struct ctables_nest *nest = &t->stacks[a].nests[i];
4527 struct ctables_section **sections = xnmalloc (t->n_sections,
4529 size_t n_sections = 0;
4531 size_t n_total_cells = 0;
4532 size_t max_depth = 0;
4533 for (size_t j = 0; j < t->n_sections; j++)
4534 if (t->sections[j].nests[a] == nest)
4536 struct ctables_section *s = &t->sections[j];
4537 sections[n_sections++] = s;
4538 n_total_cells += hmap_count (&s->cells);
4540 size_t depth = s->nests[a]->n;
4541 max_depth = MAX (depth, max_depth);
4544 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4546 size_t n_sorted = 0;
4548 for (size_t j = 0; j < n_sections; j++)
4550 struct ctables_section *s = sections[j];
4552 struct ctables_cell *cell;
4553 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4555 sorted[n_sorted++] = cell;
4556 assert (n_sorted <= n_total_cells);
4559 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4560 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4563 if (a == PIVOT_AXIS_ROW)
4565 size_t ids[N_CTDTS];
4566 memset (ids, 0, sizeof ids);
4567 for (size_t j = 0; j < n_sorted; j++)
4569 struct ctables_cell *cell = sorted[j];
4570 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4572 struct ctables_domain *domain = cell->domains[dt];
4573 if (!domain->sequence)
4574 domain->sequence = ++ids[dt];
4581 for (size_t j = 0; j < n_sorted; j++)
4583 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4588 struct ctables_level
4590 enum ctables_level_type
4592 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4593 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4594 CTL_SUMMARY, /* Summary functions. */
4598 enum settings_value_show vlabel; /* CTL_VAR only. */
4601 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4602 size_t n_levels = 0;
4603 for (size_t k = 0; k < nest->n; k++)
4605 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4606 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4608 if (vlabel != CTVL_NONE)
4610 levels[n_levels++] = (struct ctables_level) {
4612 .vlabel = (enum settings_value_show) vlabel,
4617 if (nest->scale_idx != k
4618 && (k != nest->n - 1 || t->label_axis[a] == a))
4620 levels[n_levels++] = (struct ctables_level) {
4621 .type = CTL_CATEGORY,
4627 if (!summary_dimension && a == t->slabels_axis)
4629 levels[n_levels++] = (struct ctables_level) {
4630 .type = CTL_SUMMARY,
4631 .var_idx = SIZE_MAX,
4635 /* Pivot categories:
4637 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4638 - category for nest->vars[0], if nest->scale_idx != 0
4639 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4640 - category for nest->vars[1], if nest->scale_idx != 1
4642 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4643 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4644 - summary function, if 'a == t->slabels_axis && a ==
4647 Additional dimensions:
4649 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4651 - If 't->label_axis[b] == a' for some 'b != a', add a category
4656 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4658 for (size_t j = 0; j < n_sorted; j++)
4660 struct ctables_cell *cell = sorted[j];
4661 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4663 size_t n_common = 0;
4666 for (; n_common < n_levels; n_common++)
4668 const struct ctables_level *level = &levels[n_common];
4669 if (level->type == CTL_CATEGORY)
4671 size_t var_idx = level->var_idx;
4672 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4673 if (prev->axes[a].cvs[var_idx].category != c)
4675 else if (c->type != CCT_SUBTOTAL
4676 && c->type != CCT_TOTAL
4677 && c->type != CCT_POSTCOMPUTE
4678 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4679 &cell->axes[a].cvs[var_idx].value,
4680 var_get_type (nest->vars[var_idx])))
4686 for (size_t k = n_common; k < n_levels; k++)
4688 const struct ctables_level *level = &levels[k];
4689 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4690 if (level->type == CTL_SUMMARY)
4692 assert (k == n_levels - 1);
4694 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4695 for (size_t m = 0; m < specs->n; m++)
4697 int leaf = pivot_category_create_leaf (
4698 parent, ctables_summary_label (&specs->specs[m],
4706 const struct variable *var = nest->vars[level->var_idx];
4707 struct pivot_value *label;
4708 if (level->type == CTL_VAR)
4710 label = pivot_value_new_variable (var);
4711 label->variable.show = level->vlabel;
4713 else if (level->type == CTL_CATEGORY)
4715 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4716 label = ctables_category_create_value_label (
4717 t->categories[var_get_dict_index (var)],
4718 cv->category, var, &cv->value);
4723 if (k == n_levels - 1)
4724 prev_leaf = pivot_category_create_leaf (parent, label);
4726 groups[k] = pivot_category_create_group__ (parent, label);
4730 cell->axes[a].leaf = prev_leaf;
4739 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4743 size_t n_total_cells = 0;
4744 for (size_t j = 0; j < t->n_sections; j++)
4745 n_total_cells += hmap_count (&t->sections[j].cells);
4747 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4748 size_t n_sorted = 0;
4749 for (size_t j = 0; j < t->n_sections; j++)
4751 const struct ctables_section *s = &t->sections[j];
4752 struct ctables_cell *cell;
4753 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4755 sorted[n_sorted++] = cell;
4757 assert (n_sorted <= n_total_cells);
4758 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4760 size_t ids[N_CTDTS];
4761 memset (ids, 0, sizeof ids);
4762 for (size_t j = 0; j < n_sorted; j++)
4764 struct ctables_cell *cell = sorted[j];
4765 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4767 struct ctables_domain *domain = cell->domains[dt];
4768 if (!domain->sequence)
4769 domain->sequence = ++ids[dt];
4776 for (size_t i = 0; i < t->n_sections; i++)
4778 struct ctables_section *s = &t->sections[i];
4780 struct ctables_cell *cell;
4781 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4786 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4787 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4788 for (size_t j = 0; j < specs->n; j++)
4791 size_t n_dindexes = 0;
4793 if (summary_dimension)
4794 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4796 if (categories_dimension)
4798 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4799 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4800 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4801 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4804 dindexes[n_dindexes++] = ctv->leaf;
4807 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4810 int leaf = cell->axes[a].leaf;
4811 if (a == t->summary_axis && !summary_dimension)
4813 dindexes[n_dindexes++] = leaf;
4816 const struct ctables_summary_spec *ss = &specs->specs[j];
4818 struct fmt_spec format = specs->specs[j].format;
4819 bool is_ctables_format = ss->is_ctables_format;
4820 double d = (cell->postcompute
4821 ? ctables_cell_calculate_postcompute (
4822 s, cell, ss, &format, &is_ctables_format, j)
4823 : ctables_summary_value (cell, &cell->summaries[j],
4826 struct pivot_value *value;
4827 if (ct->hide_threshold != 0
4828 && d < ct->hide_threshold
4829 && ctables_summary_function_is_count (ss->function))
4831 value = pivot_value_new_user_text_nocopy (
4832 xasprintf ("<%d", ct->hide_threshold));
4834 else if (d == 0 && ct->zero)
4835 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4836 else if (d == SYSMIS && ct->missing)
4837 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4838 else if (is_ctables_format)
4839 value = pivot_value_new_user_text_nocopy (
4840 ctables_format (d, &format, &ct->ctables_formats));
4843 value = pivot_value_new_number (d);
4844 value->numeric.format = format;
4846 /* XXX should text values be right-justified? */
4847 pivot_table_put (pt, dindexes, n_dindexes, value);
4852 pivot_table_submit (pt);
4856 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4858 enum pivot_axis_type label_pos = t->label_axis[a];
4862 t->clabels_from_axis = a;
4864 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4865 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4867 const struct ctables_stack *stack = &t->stacks[a];
4871 const struct ctables_nest *n0 = &stack->nests[0];
4874 assert (stack->n == 1);
4878 const struct variable *v0 = n0->vars[n0->n - 1];
4879 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4880 t->clabels_example = v0;
4882 for (size_t i = 0; i < c0->n_cats; i++)
4883 if (c0->cats[i].type == CCT_FUNCTION)
4885 msg (SE, _("%s=%s is not allowed with sorting based "
4886 "on a summary function."),
4887 subcommand_name, pos_name);
4890 if (n0->n - 1 == n0->scale_idx)
4892 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4893 "but %s is a scale variable."),
4894 subcommand_name, pos_name, var_get_name (v0));
4898 for (size_t i = 1; i < stack->n; i++)
4900 const struct ctables_nest *ni = &stack->nests[i];
4902 const struct variable *vi = ni->vars[ni->n - 1];
4903 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4905 if (ni->n - 1 == ni->scale_idx)
4907 msg (SE, _("%s=%s requires the variables to be moved to be "
4908 "categorical, but %s is a scale variable."),
4909 subcommand_name, pos_name, var_get_name (vi));
4912 if (var_get_width (v0) != var_get_width (vi))
4914 msg (SE, _("%s=%s requires the variables to be "
4915 "moved to have the same width, but %s has "
4916 "width %d and %s has width %d."),
4917 subcommand_name, pos_name,
4918 var_get_name (v0), var_get_width (v0),
4919 var_get_name (vi), var_get_width (vi));
4922 if (!val_labs_equal (var_get_value_labels (v0),
4923 var_get_value_labels (vi)))
4925 msg (SE, _("%s=%s requires the variables to be "
4926 "moved to have the same value labels, but %s "
4927 "and %s have different value labels."),
4928 subcommand_name, pos_name,
4929 var_get_name (v0), var_get_name (vi));
4932 if (!ctables_categories_equal (c0, ci))
4934 msg (SE, _("%s=%s requires the variables to be "
4935 "moved to have the same category "
4936 "specifications, but %s and %s have different "
4937 "category specifications."),
4938 subcommand_name, pos_name,
4939 var_get_name (v0), var_get_name (vi));
4948 add_sum_var (struct variable *var,
4949 struct variable ***sum_vars, size_t *n, size_t *allocated)
4951 for (size_t i = 0; i < *n; i++)
4952 if (var == (*sum_vars)[i])
4955 if (*n >= *allocated)
4956 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4957 (*sum_vars)[*n] = var;
4962 enumerate_sum_vars (const struct ctables_axis *a,
4963 struct variable ***sum_vars, size_t *n, size_t *allocated)
4971 for (size_t i = 0; i < N_CSVS; i++)
4972 for (size_t j = 0; j < a->specs[i].n; j++)
4974 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4975 if (ctables_function_is_pctsum (spec->function))
4976 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4982 for (size_t i = 0; i < 2; i++)
4983 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4989 ctables_prepare_table (struct ctables_table *t)
4991 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4994 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4996 for (size_t j = 0; j < t->stacks[a].n; j++)
4998 struct ctables_nest *nest = &t->stacks[a].nests[j];
4999 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5001 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
5002 nest->n_domains[dt] = 0;
5004 for (size_t k = 0; k < nest->n; k++)
5006 if (k == nest->scale_idx)
5015 if (a != PIVOT_AXIS_LAYER)
5022 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
5023 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
5024 : a == PIVOT_AXIS_ROW)
5026 if (k == nest->n - 1
5027 || (nest->scale_idx == nest->n - 1
5028 && k == nest->n - 2))
5034 if (a == PIVOT_AXIS_COLUMN)
5039 if (a == PIVOT_AXIS_ROW)
5044 nest->domains[dt][nest->n_domains[dt]++] = k;
5051 struct ctables_nest *nest = xmalloc (sizeof *nest);
5052 *nest = (struct ctables_nest) { .n = 0 };
5053 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5055 /* There's no point in moving labels away from an axis that has no
5056 labels, so avoid dealing with the special cases around that. */
5057 t->label_axis[a] = a;
5060 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5061 for (size_t i = 0; i < stack->n; i++)
5063 struct ctables_nest *nest = &stack->nests[i];
5064 if (!nest->specs[CSV_CELL].n)
5066 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
5067 specs->specs = xmalloc (sizeof *specs->specs);
5070 enum ctables_summary_function function
5071 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
5073 *specs->specs = (struct ctables_summary_spec) {
5074 .function = function,
5075 .format = ctables_summary_default_format (function, specs->var),
5078 specs->var = nest->vars[0];
5080 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5081 &nest->specs[CSV_CELL]);
5083 else if (!nest->specs[CSV_TOTAL].n)
5084 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5085 &nest->specs[CSV_CELL]);
5087 if (t->ctables->smissing_listwise)
5089 struct variable **listwise_vars = NULL;
5091 size_t allocated = 0;
5093 for (size_t j = nest->group_head; j < stack->n; j++)
5095 const struct ctables_nest *other_nest = &stack->nests[j];
5096 if (other_nest->group_head != nest->group_head)
5099 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5102 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5103 sizeof *listwise_vars);
5104 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5107 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5110 listwise_vars = xmemdup (listwise_vars,
5111 n * sizeof *listwise_vars);
5112 nest->specs[sv].listwise_vars = listwise_vars;
5113 nest->specs[sv].n_listwise_vars = n;
5118 struct ctables_summary_spec_set *merged = &t->summary_specs;
5119 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5121 for (size_t j = 0; j < stack->n; j++)
5123 const struct ctables_nest *nest = &stack->nests[j];
5125 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5126 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5131 struct merge_item min = items[0];
5132 for (size_t j = 1; j < n_left; j++)
5133 if (merge_item_compare_3way (&items[j], &min) < 0)
5136 if (merged->n >= merged->allocated)
5137 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5138 sizeof *merged->specs);
5139 merged->specs[merged->n++] = min.set->specs[min.ofs];
5141 for (size_t j = 0; j < n_left; )
5143 if (merge_item_compare_3way (&items[j], &min) == 0)
5145 struct merge_item *item = &items[j];
5146 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5147 if (++item->ofs >= item->set->n)
5149 items[j] = items[--n_left];
5159 for (size_t j = 0; j < merged->n; j++)
5160 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5162 for (size_t j = 0; j < stack->n; j++)
5164 const struct ctables_nest *nest = &stack->nests[j];
5165 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5167 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5168 for (size_t k = 0; k < specs->n; k++)
5169 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5170 specs->specs[k].axis_idx);
5176 size_t allocated_sum_vars = 0;
5177 enumerate_sum_vars (t->axes[t->summary_axis],
5178 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5180 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5181 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5185 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5186 enum pivot_axis_type a)
5188 struct ctables_stack *stack = &t->stacks[a];
5189 for (size_t i = 0; i < stack->n; i++)
5191 const struct ctables_nest *nest = &stack->nests[i];
5192 const struct variable *var = nest->vars[nest->n - 1];
5193 const union value *value = case_data (c, var);
5195 if (var_is_numeric (var) && value->f == SYSMIS)
5198 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5200 ctables_value_insert (t, value, var_get_width (var));
5205 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5207 const struct ctables_value *const *ap = a_;
5208 const struct ctables_value *const *bp = b_;
5209 const struct ctables_value *a = *ap;
5210 const struct ctables_value *b = *bp;
5211 const int *width = width_;
5212 return value_compare_3way (&a->value, &b->value, *width);
5216 ctables_sort_clabels_values (struct ctables_table *t)
5218 const struct variable *v0 = t->clabels_example;
5219 int width = var_get_width (v0);
5221 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5224 const struct val_labs *val_labs = var_get_value_labels (v0);
5225 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5226 vl = val_labs_next (val_labs, vl))
5227 if (ctables_categories_match (c0, &vl->value, v0))
5228 ctables_value_insert (t, &vl->value, width);
5231 size_t n = hmap_count (&t->clabels_values_map);
5232 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5234 struct ctables_value *clv;
5236 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5237 t->clabels_values[i++] = clv;
5238 t->n_clabels_values = n;
5241 sort (t->clabels_values, n, sizeof *t->clabels_values,
5242 compare_clabels_values_3way, &width);
5244 for (size_t i = 0; i < n; i++)
5245 t->clabels_values[i]->leaf = i;
5249 ctables_add_category_occurrences (const struct variable *var,
5250 struct hmap *occurrences,
5251 const struct ctables_categories *cats)
5253 const struct val_labs *val_labs = var_get_value_labels (var);
5255 for (size_t i = 0; i < cats->n_cats; i++)
5257 const struct ctables_category *c = &cats->cats[i];
5261 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5267 int width = var_get_width (var);
5269 value_init (&value, width);
5270 value_copy_buf_rpad (&value, width,
5271 CHAR_CAST (uint8_t *, c->string.string),
5272 c->string.length, ' ');
5273 ctables_add_occurrence (var, &value, occurrences);
5274 value_destroy (&value, width);
5279 assert (var_is_numeric (var));
5280 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5281 vl = val_labs_next (val_labs, vl))
5282 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5283 ctables_add_occurrence (var, &vl->value, occurrences);
5287 assert (var_is_alpha (var));
5288 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5289 vl = val_labs_next (val_labs, vl))
5290 if (in_string_range (&vl->value, var, c->srange))
5291 ctables_add_occurrence (var, &vl->value, occurrences);
5295 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5296 vl = val_labs_next (val_labs, vl))
5297 if (var_is_value_missing (var, &vl->value))
5298 ctables_add_occurrence (var, &vl->value, occurrences);
5302 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5303 vl = val_labs_next (val_labs, vl))
5304 ctables_add_occurrence (var, &vl->value, occurrences);
5307 case CCT_POSTCOMPUTE:
5317 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5318 vl = val_labs_next (val_labs, vl))
5319 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5320 ctables_add_occurrence (var, &vl->value, occurrences);
5323 case CCT_EXCLUDED_MISSING:
5330 ctables_section_recurse_add_empty_categories (
5331 struct ctables_section *s,
5332 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5333 enum pivot_axis_type a, size_t a_idx)
5335 if (a >= PIVOT_N_AXES)
5336 ctables_cell_insert__ (s, c, cats);
5337 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5338 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5341 const struct variable *var = s->nests[a]->vars[a_idx];
5342 const struct ctables_categories *categories = s->table->categories[
5343 var_get_dict_index (var)];
5344 int width = var_get_width (var);
5345 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5346 const struct ctables_occurrence *o;
5347 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5349 union value *value = case_data_rw (c, var);
5350 value_destroy (value, width);
5351 value_clone (value, &o->value, width);
5352 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5353 assert (cats[a][a_idx] != NULL);
5354 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5357 for (size_t i = 0; i < categories->n_cats; i++)
5359 const struct ctables_category *cat = &categories->cats[i];
5360 if (cat->type == CCT_POSTCOMPUTE)
5362 cats[a][a_idx] = cat;
5363 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5370 ctables_section_add_empty_categories (struct ctables_section *s)
5372 bool show_empty = false;
5373 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5375 for (size_t k = 0; k < s->nests[a]->n; k++)
5376 if (k != s->nests[a]->scale_idx)
5378 const struct variable *var = s->nests[a]->vars[k];
5379 const struct ctables_categories *cats = s->table->categories[
5380 var_get_dict_index (var)];
5381 if (cats->show_empty)
5384 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5390 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5391 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5392 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5397 ctables_section_clear (struct ctables_section *s)
5399 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5401 const struct ctables_nest *nest = s->nests[a];
5402 for (size_t i = 0; i < nest->n; i++)
5403 if (i != nest->scale_idx)
5405 const struct variable *var = nest->vars[i];
5406 int width = var_get_width (var);
5407 struct ctables_occurrence *o, *next;
5408 struct hmap *map = &s->occurrences[a][i];
5409 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5411 value_destroy (&o->value, width);
5412 hmap_delete (map, &o->node);
5419 struct ctables_cell *cell, *next_cell;
5420 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5422 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5424 const struct ctables_nest *nest = s->nests[a];
5425 for (size_t i = 0; i < nest->n; i++)
5426 if (i != nest->scale_idx)
5427 value_destroy (&cell->axes[a].cvs[i].value,
5428 var_get_width (nest->vars[i]));
5429 free (cell->axes[a].cvs);
5432 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5433 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5434 for (size_t i = 0; i < specs->n; i++)
5435 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5436 free (cell->summaries);
5438 hmap_delete (&s->cells, &cell->node);
5441 hmap_shrink (&s->cells);
5443 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5445 struct ctables_domain *domain, *next_domain;
5446 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5449 free (domain->sums);
5450 hmap_delete (&s->domains[dt], &domain->node);
5453 hmap_shrink (&s->domains[dt]);
5458 ctables_section_uninit (struct ctables_section *s)
5460 ctables_section_clear (s);
5462 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5464 struct ctables_nest *nest = s->nests[a];
5465 for (size_t i = 0; i < nest->n; i++)
5466 hmap_destroy (&s->occurrences[a][i]);
5467 free (s->occurrences[a]);
5470 hmap_destroy (&s->cells);
5471 for (size_t i = 0; i < N_CTDTS; i++)
5472 hmap_destroy (&s->domains[i]);
5476 ctables_table_clear (struct ctables_table *t)
5478 for (size_t i = 0; i < t->n_sections; i++)
5479 ctables_section_clear (&t->sections[i]);
5481 if (t->clabels_example)
5483 int width = var_get_width (t->clabels_example);
5484 struct ctables_value *value, *next_value;
5485 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5486 &t->clabels_values_map)
5488 value_destroy (&value->value, width);
5489 hmap_delete (&t->clabels_values_map, &value->node);
5492 hmap_shrink (&t->clabels_values_map);
5494 free (t->clabels_values);
5495 t->clabels_values = NULL;
5496 t->n_clabels_values = 0;
5501 ctables_execute (struct dataset *ds, struct casereader *input,
5504 for (size_t i = 0; i < ct->n_tables; i++)
5506 struct ctables_table *t = ct->tables[i];
5507 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5508 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5509 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5510 sizeof *t->sections);
5511 size_t ix[PIVOT_N_AXES];
5512 ctables_table_add_section (t, 0, ix);
5515 struct dictionary *dict = dataset_dict (ds);
5516 struct casegrouper *grouper
5517 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5518 ? casegrouper_create_splits (input, dict)
5519 : casegrouper_create_vars (input, NULL, 0));
5520 struct casereader *group;
5521 while (casegrouper_get_next_group (grouper, &group))
5523 /* Output SPLIT FILE variables. */
5524 struct ccase *c = casereader_peek (group, 0);
5527 output_split_file_values (ds, c);
5531 bool warn_on_invalid = true;
5532 for (c = casereader_read (group); c;
5533 case_unref (c), c = casereader_read (group))
5535 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5536 double e_weight = (ct->e_weight
5537 ? var_force_valid_weight (ct->e_weight,
5538 case_num (c, ct->e_weight),
5542 for (size_t i = 0; i < ct->n_tables; i++)
5544 struct ctables_table *t = ct->tables[i];
5546 for (size_t j = 0; j < t->n_sections; j++)
5547 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5549 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5550 if (t->label_axis[a] != a)
5551 ctables_insert_clabels_values (t, c, a);
5554 casereader_destroy (group);
5556 for (size_t i = 0; i < ct->n_tables; i++)
5558 struct ctables_table *t = ct->tables[i];
5560 if (t->clabels_example)
5561 ctables_sort_clabels_values (t);
5563 for (size_t j = 0; j < t->n_sections; j++)
5564 ctables_section_add_empty_categories (&t->sections[j]);
5566 ctables_table_output (ct, t);
5567 ctables_table_clear (t);
5570 return casegrouper_destroy (grouper);
5575 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5576 struct dictionary *);
5579 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5585 case CTPO_CAT_STRING:
5586 ss_dealloc (&e->string);
5589 case CTPO_CAT_SRANGE:
5590 for (size_t i = 0; i < 2; i++)
5591 ss_dealloc (&e->srange[i]);
5600 for (size_t i = 0; i < 2; i++)
5601 ctables_pcexpr_destroy (e->subs[i]);
5605 case CTPO_CAT_NUMBER:
5606 case CTPO_CAT_NRANGE:
5607 case CTPO_CAT_MISSING:
5608 case CTPO_CAT_OTHERNM:
5609 case CTPO_CAT_SUBTOTAL:
5610 case CTPO_CAT_TOTAL:
5614 msg_location_destroy (e->location);
5619 static struct ctables_pcexpr *
5620 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5621 struct ctables_pcexpr *sub0,
5622 struct ctables_pcexpr *sub1)
5624 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5625 *e = (struct ctables_pcexpr) {
5627 .subs = { sub0, sub1 },
5628 .location = msg_location_merged (sub0->location, sub1->location),
5633 /* How to parse an operator. */
5636 enum token_type token;
5637 enum ctables_postcompute_op op;
5640 static const struct operator *
5641 ctables_pcexpr_match_operator (struct lexer *lexer,
5642 const struct operator ops[], size_t n_ops)
5644 for (const struct operator *op = ops; op < ops + n_ops; op++)
5645 if (lex_token (lexer) == op->token)
5647 if (op->token != T_NEG_NUM)
5656 static struct ctables_pcexpr *
5657 ctables_pcexpr_parse_binary_operators__ (
5658 struct lexer *lexer, struct dictionary *dict,
5659 const struct operator ops[], size_t n_ops,
5660 parse_recursively_func *parse_next_level,
5661 const char *chain_warning, struct ctables_pcexpr *lhs)
5663 for (int op_count = 0; ; op_count++)
5665 const struct operator *op
5666 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5669 if (op_count > 1 && chain_warning)
5670 msg_at (SW, lhs->location, "%s", chain_warning);
5675 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5678 ctables_pcexpr_destroy (lhs);
5682 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5686 static struct ctables_pcexpr *
5687 ctables_pcexpr_parse_binary_operators (
5688 struct lexer *lexer, struct dictionary *dict,
5689 const struct operator ops[], size_t n_ops,
5690 parse_recursively_func *parse_next_level, const char *chain_warning)
5692 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5696 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5698 chain_warning, lhs);
5701 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5702 struct dictionary *);
5704 static struct ctables_pcexpr
5705 ctpo_cat_nrange (double low, double high)
5707 return (struct ctables_pcexpr) {
5708 .op = CTPO_CAT_NRANGE,
5709 .nrange = { low, high },
5713 static struct ctables_pcexpr
5714 ctpo_cat_srange (struct substring low, struct substring high)
5716 return (struct ctables_pcexpr) {
5717 .op = CTPO_CAT_SRANGE,
5718 .srange = { low, high },
5722 static struct ctables_pcexpr *
5723 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5725 int start_ofs = lex_ofs (lexer);
5726 struct ctables_pcexpr e;
5727 if (lex_is_number (lexer))
5729 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5730 .number = lex_number (lexer) };
5733 else if (lex_match_id (lexer, "MISSING"))
5734 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5735 else if (lex_match_id (lexer, "OTHERNM"))
5736 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5737 else if (lex_match_id (lexer, "TOTAL"))
5738 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5739 else if (lex_match_id (lexer, "SUBTOTAL"))
5741 size_t subtotal_index = 0;
5742 if (lex_match (lexer, T_LBRACK))
5744 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5746 subtotal_index = lex_integer (lexer);
5748 if (!lex_force_match (lexer, T_RBRACK))
5751 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5752 .subtotal_index = subtotal_index };
5754 else if (lex_match (lexer, T_LBRACK))
5756 if (lex_match_id (lexer, "LO"))
5758 if (!lex_force_match_id (lexer, "THRU"))
5761 if (lex_is_string (lexer))
5763 struct substring low = { .string = NULL };
5764 struct substring high = parse_substring (lexer, dict);
5765 e = ctpo_cat_srange (low, high);
5769 if (!lex_force_num (lexer))
5771 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5775 else if (lex_is_number (lexer))
5777 double number = lex_number (lexer);
5779 if (lex_match_id (lexer, "THRU"))
5781 if (lex_match_id (lexer, "HI"))
5782 e = ctpo_cat_nrange (number, DBL_MAX);
5785 if (!lex_force_num (lexer))
5787 e = ctpo_cat_nrange (number, lex_number (lexer));
5792 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5795 else if (lex_is_string (lexer))
5797 struct substring s = parse_substring (lexer, dict);
5799 if (lex_match_id (lexer, "THRU"))
5801 struct substring high;
5803 if (lex_match_id (lexer, "HI"))
5804 high = (struct substring) { .string = NULL };
5807 if (!lex_force_string (lexer))
5812 high = parse_substring (lexer, dict);
5815 e = ctpo_cat_srange (s, high);
5818 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5822 lex_error (lexer, NULL);
5826 if (!lex_force_match (lexer, T_RBRACK))
5828 if (e.op == CTPO_CAT_STRING)
5829 ss_dealloc (&e.string);
5830 else if (e.op == CTPO_CAT_SRANGE)
5832 ss_dealloc (&e.srange[0]);
5833 ss_dealloc (&e.srange[1]);
5838 else if (lex_match (lexer, T_LPAREN))
5840 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5843 if (!lex_force_match (lexer, T_RPAREN))
5845 ctables_pcexpr_destroy (ep);
5852 lex_error (lexer, NULL);
5856 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5857 return xmemdup (&e, sizeof e);
5860 static struct ctables_pcexpr *
5861 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5862 struct lexer *lexer, int start_ofs)
5864 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5865 *e = (struct ctables_pcexpr) {
5868 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5873 static struct ctables_pcexpr *
5874 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5876 static const struct operator op = { T_EXP, CTPO_POW };
5878 const char *chain_warning =
5879 _("The exponentiation operator (`**') is left-associative: "
5880 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5881 "To disable this warning, insert parentheses.");
5883 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5884 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5885 ctables_pcexpr_parse_primary,
5888 /* Special case for situations like "-5**6", which must be parsed as
5891 int start_ofs = lex_ofs (lexer);
5892 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5893 *lhs = (struct ctables_pcexpr) {
5894 .op = CTPO_CONSTANT,
5895 .number = -lex_tokval (lexer),
5896 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5900 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5901 lexer, dict, &op, 1,
5902 ctables_pcexpr_parse_primary, chain_warning, lhs);
5906 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5909 /* Parses the unary minus level. */
5910 static struct ctables_pcexpr *
5911 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5913 int start_ofs = lex_ofs (lexer);
5914 if (!lex_match (lexer, T_DASH))
5915 return ctables_pcexpr_parse_exp (lexer, dict);
5917 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5921 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5924 /* Parses the multiplication and division level. */
5925 static struct ctables_pcexpr *
5926 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5928 static const struct operator ops[] =
5930 { T_ASTERISK, CTPO_MUL },
5931 { T_SLASH, CTPO_DIV },
5934 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5935 sizeof ops / sizeof *ops,
5936 ctables_pcexpr_parse_neg, NULL);
5939 /* Parses the addition and subtraction level. */
5940 static struct ctables_pcexpr *
5941 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5943 static const struct operator ops[] =
5945 { T_PLUS, CTPO_ADD },
5946 { T_DASH, CTPO_SUB },
5947 { T_NEG_NUM, CTPO_ADD },
5950 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5951 ops, sizeof ops / sizeof *ops,
5952 ctables_pcexpr_parse_mul, NULL);
5955 static struct ctables_postcompute *
5956 ctables_find_postcompute (struct ctables *ct, const char *name)
5958 struct ctables_postcompute *pc;
5959 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5960 utf8_hash_case_string (name, 0), &ct->postcomputes)
5961 if (!utf8_strcasecmp (pc->name, name))
5967 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5970 int pcompute_start = lex_ofs (lexer) - 1;
5972 if (!lex_match (lexer, T_AND))
5974 lex_error_expecting (lexer, "&");
5977 if (!lex_force_id (lexer))
5980 char *name = ss_xstrdup (lex_tokss (lexer));
5983 if (!lex_force_match (lexer, T_EQUALS)
5984 || !lex_force_match_id (lexer, "EXPR")
5985 || !lex_force_match (lexer, T_LPAREN))
5991 int expr_start = lex_ofs (lexer);
5992 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5993 int expr_end = lex_ofs (lexer) - 1;
5994 if (!expr || !lex_force_match (lexer, T_RPAREN))
5996 ctables_pcexpr_destroy (expr);
6000 int pcompute_end = lex_ofs (lexer) - 1;
6002 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
6005 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
6008 msg_at (SW, location, _("New definition of &%s will override the "
6009 "previous definition."),
6011 msg_at (SN, pc->location, _("This is the previous definition."));
6013 ctables_pcexpr_destroy (pc->expr);
6014 msg_location_destroy (pc->location);
6019 pc = xmalloc (sizeof *pc);
6020 *pc = (struct ctables_postcompute) { .name = name };
6021 hmap_insert (&ct->postcomputes, &pc->hmap_node,
6022 utf8_hash_case_string (pc->name, 0));
6025 pc->location = location;
6027 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
6032 ctables_parse_pproperties_format (struct lexer *lexer,
6033 struct ctables_summary_spec_set *sss)
6035 *sss = (struct ctables_summary_spec_set) { .n = 0 };
6037 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
6038 && !(lex_token (lexer) == T_ID
6039 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
6040 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
6041 lex_tokss (lexer)))))
6043 /* Parse function. */
6044 enum ctables_summary_function function;
6045 if (!parse_ctables_summary_function (lexer, &function))
6048 /* Parse percentile. */
6049 double percentile = 0;
6050 if (function == CTSF_PTILE)
6052 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
6054 percentile = lex_number (lexer);
6059 struct fmt_spec format;
6060 bool is_ctables_format;
6061 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
6064 if (sss->n >= sss->allocated)
6065 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
6066 sizeof *sss->specs);
6067 sss->specs[sss->n++] = (struct ctables_summary_spec) {
6068 .function = function,
6069 .percentile = percentile,
6071 .is_ctables_format = is_ctables_format,
6077 ctables_summary_spec_set_uninit (sss);
6082 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
6084 struct ctables_postcompute **pcs = NULL;
6086 size_t allocated_pcs = 0;
6088 while (lex_match (lexer, T_AND))
6090 if (!lex_force_id (lexer))
6092 struct ctables_postcompute *pc
6093 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
6096 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6101 if (n_pcs >= allocated_pcs)
6102 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6106 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6108 if (lex_match_id (lexer, "LABEL"))
6110 lex_match (lexer, T_EQUALS);
6111 if (!lex_force_string (lexer))
6114 for (size_t i = 0; i < n_pcs; i++)
6116 free (pcs[i]->label);
6117 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6122 else if (lex_match_id (lexer, "FORMAT"))
6124 lex_match (lexer, T_EQUALS);
6126 struct ctables_summary_spec_set sss;
6127 if (!ctables_parse_pproperties_format (lexer, &sss))
6130 for (size_t i = 0; i < n_pcs; i++)
6133 ctables_summary_spec_set_uninit (pcs[i]->specs);
6135 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6136 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6138 ctables_summary_spec_set_uninit (&sss);
6140 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6142 lex_match (lexer, T_EQUALS);
6143 bool hide_source_cats;
6144 if (!parse_bool (lexer, &hide_source_cats))
6146 for (size_t i = 0; i < n_pcs; i++)
6147 pcs[i]->hide_source_cats = hide_source_cats;
6151 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6164 put_strftime (struct string *out, time_t now, const char *format)
6166 const struct tm *tm = localtime (&now);
6168 strftime (value, sizeof value, format, tm);
6169 ds_put_cstr (out, value);
6173 skip_prefix (struct substring *s, struct substring prefix)
6175 if (ss_starts_with (*s, prefix))
6177 ss_advance (s, prefix.length);
6185 put_table_expression (struct string *out, struct lexer *lexer,
6186 struct dictionary *dict, int expr_start, int expr_end)
6189 for (int ofs = expr_start; ofs < expr_end; ofs++)
6191 const struct token *t = lex_ofs_token (lexer, ofs);
6192 if (t->type == T_LBRACK)
6194 else if (t->type == T_RBRACK && nest > 0)
6200 else if (t->type == T_ID)
6202 const struct variable *var
6203 = dict_lookup_var (dict, t->string.string);
6204 const char *label = var ? var_get_label (var) : NULL;
6205 ds_put_cstr (out, label ? label : t->string.string);
6209 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6210 ds_put_byte (out, ' ');
6212 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6213 ds_put_cstr (out, repr);
6216 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6217 ds_put_byte (out, ' ');
6223 put_title_text (struct string *out, struct substring in, time_t now,
6224 struct lexer *lexer, struct dictionary *dict,
6225 int expr_start, int expr_end)
6229 size_t chunk = ss_find_byte (in, ')');
6230 ds_put_substring (out, ss_head (in, chunk));
6231 ss_advance (&in, chunk);
6232 if (ss_is_empty (in))
6235 if (skip_prefix (&in, ss_cstr (")DATE")))
6236 put_strftime (out, now, "%x");
6237 else if (skip_prefix (&in, ss_cstr (")TIME")))
6238 put_strftime (out, now, "%X");
6239 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6240 put_table_expression (out, lexer, dict, expr_start, expr_end);
6243 ds_put_byte (out, ')');
6244 ss_advance (&in, 1);
6250 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6252 struct casereader *input = NULL;
6254 struct measure_guesser *mg = measure_guesser_create (ds);
6257 input = proc_open (ds);
6258 measure_guesser_run (mg, input);
6259 measure_guesser_destroy (mg);
6262 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6263 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6264 enum settings_value_show tvars = settings_get_show_variables ();
6265 for (size_t i = 0; i < n_vars; i++)
6266 vlabels[i] = (enum ctables_vlabel) tvars;
6268 struct pivot_table_look *look = pivot_table_look_unshare (
6269 pivot_table_look_ref (pivot_table_look_get_default ()));
6270 look->omit_empty = false;
6272 struct ctables *ct = xmalloc (sizeof *ct);
6273 *ct = (struct ctables) {
6274 .dict = dataset_dict (ds),
6276 .ctables_formats = FMT_SETTINGS_INIT,
6278 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6281 time_t now = time (NULL);
6286 const char *dot_string;
6287 const char *comma_string;
6289 static const struct ctf ctfs[4] = {
6290 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6291 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6292 { CTEF_PAREN, "-,(,),", "-.(.)." },
6293 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6295 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6296 for (size_t i = 0; i < 4; i++)
6298 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6299 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6300 fmt_number_style_from_string (s));
6303 if (!lex_force_match (lexer, T_SLASH))
6306 while (!lex_match_id (lexer, "TABLE"))
6308 if (lex_match_id (lexer, "FORMAT"))
6310 double widths[2] = { SYSMIS, SYSMIS };
6311 double units_per_inch = 72.0;
6313 while (lex_token (lexer) != T_SLASH)
6315 if (lex_match_id (lexer, "MINCOLWIDTH"))
6317 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6320 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6322 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6325 else if (lex_match_id (lexer, "UNITS"))
6327 lex_match (lexer, T_EQUALS);
6328 if (lex_match_id (lexer, "POINTS"))
6329 units_per_inch = 72.0;
6330 else if (lex_match_id (lexer, "INCHES"))
6331 units_per_inch = 1.0;
6332 else if (lex_match_id (lexer, "CM"))
6333 units_per_inch = 2.54;
6336 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6340 else if (lex_match_id (lexer, "EMPTY"))
6345 lex_match (lexer, T_EQUALS);
6346 if (lex_match_id (lexer, "ZERO"))
6348 /* Nothing to do. */
6350 else if (lex_match_id (lexer, "BLANK"))
6351 ct->zero = xstrdup ("");
6352 else if (lex_force_string (lexer))
6354 ct->zero = ss_xstrdup (lex_tokss (lexer));
6360 else if (lex_match_id (lexer, "MISSING"))
6362 lex_match (lexer, T_EQUALS);
6363 if (!lex_force_string (lexer))
6367 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6368 ? ss_xstrdup (lex_tokss (lexer))
6374 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6375 "UNITS", "EMPTY", "MISSING");
6380 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6381 && widths[0] > widths[1])
6383 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6387 for (size_t i = 0; i < 2; i++)
6388 if (widths[i] != SYSMIS)
6390 int *wr = ct->look->width_ranges[TABLE_HORZ];
6391 wr[i] = widths[i] / units_per_inch * 96.0;
6396 else if (lex_match_id (lexer, "VLABELS"))
6398 if (!lex_force_match_id (lexer, "VARIABLES"))
6400 lex_match (lexer, T_EQUALS);
6402 struct variable **vars;
6404 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6408 if (!lex_force_match_id (lexer, "DISPLAY"))
6413 lex_match (lexer, T_EQUALS);
6415 enum ctables_vlabel vlabel;
6416 if (lex_match_id (lexer, "DEFAULT"))
6417 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6418 else if (lex_match_id (lexer, "NAME"))
6420 else if (lex_match_id (lexer, "LABEL"))
6421 vlabel = CTVL_LABEL;
6422 else if (lex_match_id (lexer, "BOTH"))
6424 else if (lex_match_id (lexer, "NONE"))
6428 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6434 for (size_t i = 0; i < n_vars; i++)
6435 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6438 else if (lex_match_id (lexer, "MRSETS"))
6440 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6442 lex_match (lexer, T_EQUALS);
6443 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6446 else if (lex_match_id (lexer, "SMISSING"))
6448 if (lex_match_id (lexer, "VARIABLE"))
6449 ct->smissing_listwise = false;
6450 else if (lex_match_id (lexer, "LISTWISE"))
6451 ct->smissing_listwise = true;
6454 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6458 else if (lex_match_id (lexer, "PCOMPUTE"))
6460 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6463 else if (lex_match_id (lexer, "PPROPERTIES"))
6465 if (!ctables_parse_pproperties (lexer, ct))
6468 else if (lex_match_id (lexer, "WEIGHT"))
6470 if (!lex_force_match_id (lexer, "VARIABLE"))
6472 lex_match (lexer, T_EQUALS);
6473 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6477 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6479 if (lex_match_id (lexer, "COUNT"))
6481 lex_match (lexer, T_EQUALS);
6482 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6485 ct->hide_threshold = lex_integer (lexer);
6488 else if (ct->hide_threshold == 0)
6489 ct->hide_threshold = 5;
6493 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6494 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6495 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6499 if (!lex_force_match (lexer, T_SLASH))
6503 size_t allocated_tables = 0;
6506 if (ct->n_tables >= allocated_tables)
6507 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6508 sizeof *ct->tables);
6510 struct ctables_category *cat = xmalloc (sizeof *cat);
6511 *cat = (struct ctables_category) {
6513 .include_missing = false,
6514 .sort_ascending = true,
6517 struct ctables_categories *c = xmalloc (sizeof *c);
6518 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6519 *c = (struct ctables_categories) {
6526 struct ctables_categories **categories = xnmalloc (n_vars,
6527 sizeof *categories);
6528 for (size_t i = 0; i < n_vars; i++)
6531 struct ctables_table *t = xmalloc (sizeof *t);
6532 *t = (struct ctables_table) {
6534 .slabels_axis = PIVOT_AXIS_COLUMN,
6535 .slabels_visible = true,
6536 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6538 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6539 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6540 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6542 .clabels_from_axis = PIVOT_AXIS_LAYER,
6543 .categories = categories,
6544 .n_categories = n_vars,
6547 ct->tables[ct->n_tables++] = t;
6549 lex_match (lexer, T_EQUALS);
6550 int expr_start = lex_ofs (lexer);
6551 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6553 if (lex_match (lexer, T_BY))
6555 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6556 ct, t, PIVOT_AXIS_COLUMN))
6559 if (lex_match (lexer, T_BY))
6561 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6562 ct, t, PIVOT_AXIS_LAYER))
6566 int expr_end = lex_ofs (lexer);
6568 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6569 && !t->axes[PIVOT_AXIS_LAYER])
6571 lex_error (lexer, _("At least one variable must be specified."));
6575 const struct ctables_axis *scales[PIVOT_N_AXES];
6576 size_t n_scales = 0;
6577 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6579 scales[a] = find_scale (t->axes[a]);
6585 msg (SE, _("Scale variables may appear only on one axis."));
6586 if (scales[PIVOT_AXIS_ROW])
6587 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6588 _("This scale variable appears on the rows axis."));
6589 if (scales[PIVOT_AXIS_COLUMN])
6590 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6591 _("This scale variable appears on the columns axis."));
6592 if (scales[PIVOT_AXIS_LAYER])
6593 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6594 _("This scale variable appears on the layer axis."));
6598 const struct ctables_axis *summaries[PIVOT_N_AXES];
6599 size_t n_summaries = 0;
6600 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6602 summaries[a] = (scales[a]
6604 : find_categorical_summary_spec (t->axes[a]));
6608 if (n_summaries > 1)
6610 msg (SE, _("Summaries may appear only on one axis."));
6611 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6614 msg_at (SN, summaries[a]->loc,
6616 ? _("This variable on the rows axis has a summary.")
6617 : a == PIVOT_AXIS_COLUMN
6618 ? _("This variable on the columns axis has a summary.")
6619 : _("This variable on the layers axis has a summary."));
6621 msg_at (SN, summaries[a]->loc,
6622 _("This is a scale variable, so it always has a "
6623 "summary even if the syntax does not explicitly "
6628 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6629 if (n_summaries ? summaries[a] : t->axes[a])
6631 t->summary_axis = a;
6635 if (lex_token (lexer) == T_ENDCMD)
6637 if (!ctables_prepare_table (t))
6641 if (!lex_force_match (lexer, T_SLASH))
6644 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6646 if (lex_match_id (lexer, "SLABELS"))
6648 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6650 if (lex_match_id (lexer, "POSITION"))
6652 lex_match (lexer, T_EQUALS);
6653 if (lex_match_id (lexer, "COLUMN"))
6654 t->slabels_axis = PIVOT_AXIS_COLUMN;
6655 else if (lex_match_id (lexer, "ROW"))
6656 t->slabels_axis = PIVOT_AXIS_ROW;
6657 else if (lex_match_id (lexer, "LAYER"))
6658 t->slabels_axis = PIVOT_AXIS_LAYER;
6661 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6665 else if (lex_match_id (lexer, "VISIBLE"))
6667 lex_match (lexer, T_EQUALS);
6668 if (!parse_bool (lexer, &t->slabels_visible))
6673 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6678 else if (lex_match_id (lexer, "CLABELS"))
6680 if (lex_match_id (lexer, "AUTO"))
6682 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6683 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6685 else if (lex_match_id (lexer, "ROWLABELS"))
6687 lex_match (lexer, T_EQUALS);
6688 if (lex_match_id (lexer, "OPPOSITE"))
6689 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6690 else if (lex_match_id (lexer, "LAYER"))
6691 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6694 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6698 else if (lex_match_id (lexer, "COLLABELS"))
6700 lex_match (lexer, T_EQUALS);
6701 if (lex_match_id (lexer, "OPPOSITE"))
6702 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6703 else if (lex_match_id (lexer, "LAYER"))
6704 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6707 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6713 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6718 else if (lex_match_id (lexer, "CRITERIA"))
6720 if (!lex_force_match_id (lexer, "CILEVEL"))
6722 lex_match (lexer, T_EQUALS);
6724 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6726 t->cilevel = lex_number (lexer);
6729 else if (lex_match_id (lexer, "CATEGORIES"))
6731 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6735 else if (lex_match_id (lexer, "TITLES"))
6740 if (lex_match_id (lexer, "CAPTION"))
6741 textp = &t->caption;
6742 else if (lex_match_id (lexer, "CORNER"))
6744 else if (lex_match_id (lexer, "TITLE"))
6748 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6751 lex_match (lexer, T_EQUALS);
6753 struct string s = DS_EMPTY_INITIALIZER;
6754 while (lex_is_string (lexer))
6756 if (!ds_is_empty (&s))
6757 ds_put_byte (&s, ' ');
6758 put_title_text (&s, lex_tokss (lexer), now,
6759 lexer, dataset_dict (ds),
6760 expr_start, expr_end);
6764 *textp = ds_steal_cstr (&s);
6766 while (lex_token (lexer) != T_SLASH
6767 && lex_token (lexer) != T_ENDCMD);
6769 else if (lex_match_id (lexer, "SIGTEST"))
6773 t->chisq = xmalloc (sizeof *t->chisq);
6774 *t->chisq = (struct ctables_chisq) {
6776 .include_mrsets = true,
6777 .all_visible = true,
6783 if (lex_match_id (lexer, "TYPE"))
6785 lex_match (lexer, T_EQUALS);
6786 if (!lex_force_match_id (lexer, "CHISQUARE"))
6789 else if (lex_match_id (lexer, "ALPHA"))
6791 lex_match (lexer, T_EQUALS);
6792 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6794 t->chisq->alpha = lex_number (lexer);
6797 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6799 lex_match (lexer, T_EQUALS);
6800 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6803 else if (lex_match_id (lexer, "CATEGORIES"))
6805 lex_match (lexer, T_EQUALS);
6806 if (lex_match_id (lexer, "ALLVISIBLE"))
6807 t->chisq->all_visible = true;
6808 else if (lex_match_id (lexer, "SUBTOTALS"))
6809 t->chisq->all_visible = false;
6812 lex_error_expecting (lexer,
6813 "ALLVISIBLE", "SUBTOTALS");
6819 lex_error_expecting (lexer, "TYPE", "ALPHA",
6820 "INCLUDEMRSETS", "CATEGORIES");
6824 while (lex_token (lexer) != T_SLASH
6825 && lex_token (lexer) != T_ENDCMD);
6827 else if (lex_match_id (lexer, "COMPARETEST"))
6831 t->pairwise = xmalloc (sizeof *t->pairwise);
6832 *t->pairwise = (struct ctables_pairwise) {
6834 .alpha = { .05, .05 },
6835 .adjust = BONFERRONI,
6836 .include_mrsets = true,
6837 .meansvariance_allcats = true,
6838 .all_visible = true,
6847 if (lex_match_id (lexer, "TYPE"))
6849 lex_match (lexer, T_EQUALS);
6850 if (lex_match_id (lexer, "PROP"))
6851 t->pairwise->type = PROP;
6852 else if (lex_match_id (lexer, "MEAN"))
6853 t->pairwise->type = MEAN;
6856 lex_error_expecting (lexer, "PROP", "MEAN");
6860 else if (lex_match_id (lexer, "ALPHA"))
6862 lex_match (lexer, T_EQUALS);
6864 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6866 double a0 = lex_number (lexer);
6869 lex_match (lexer, T_COMMA);
6870 if (lex_is_number (lexer))
6872 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6874 double a1 = lex_number (lexer);
6877 t->pairwise->alpha[0] = MIN (a0, a1);
6878 t->pairwise->alpha[1] = MAX (a0, a1);
6881 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6883 else if (lex_match_id (lexer, "ADJUST"))
6885 lex_match (lexer, T_EQUALS);
6886 if (lex_match_id (lexer, "BONFERRONI"))
6887 t->pairwise->adjust = BONFERRONI;
6888 else if (lex_match_id (lexer, "BH"))
6889 t->pairwise->adjust = BH;
6890 else if (lex_match_id (lexer, "NONE"))
6891 t->pairwise->adjust = 0;
6894 lex_error_expecting (lexer, "BONFERRONI", "BH",
6899 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6901 lex_match (lexer, T_EQUALS);
6902 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6905 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6907 lex_match (lexer, T_EQUALS);
6908 if (lex_match_id (lexer, "ALLCATS"))
6909 t->pairwise->meansvariance_allcats = true;
6910 else if (lex_match_id (lexer, "TESTEDCATS"))
6911 t->pairwise->meansvariance_allcats = false;
6914 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6918 else if (lex_match_id (lexer, "CATEGORIES"))
6920 lex_match (lexer, T_EQUALS);
6921 if (lex_match_id (lexer, "ALLVISIBLE"))
6922 t->pairwise->all_visible = true;
6923 else if (lex_match_id (lexer, "SUBTOTALS"))
6924 t->pairwise->all_visible = false;
6927 lex_error_expecting (lexer, "ALLVISIBLE",
6932 else if (lex_match_id (lexer, "MERGE"))
6934 lex_match (lexer, T_EQUALS);
6935 if (!parse_bool (lexer, &t->pairwise->merge))
6938 else if (lex_match_id (lexer, "STYLE"))
6940 lex_match (lexer, T_EQUALS);
6941 if (lex_match_id (lexer, "APA"))
6942 t->pairwise->apa_style = true;
6943 else if (lex_match_id (lexer, "SIMPLE"))
6944 t->pairwise->apa_style = false;
6947 lex_error_expecting (lexer, "APA", "SIMPLE");
6951 else if (lex_match_id (lexer, "SHOWSIG"))
6953 lex_match (lexer, T_EQUALS);
6954 if (!parse_bool (lexer, &t->pairwise->show_sig))
6959 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6960 "INCLUDEMRSETS", "MEANSVARIANCE",
6961 "CATEGORIES", "MERGE", "STYLE",
6966 while (lex_token (lexer) != T_SLASH
6967 && lex_token (lexer) != T_ENDCMD);
6971 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6972 "CRITERIA", "CATEGORIES", "TITLES",
6973 "SIGTEST", "COMPARETEST");
6977 if (!lex_match (lexer, T_SLASH))
6981 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6982 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6984 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6988 if (!ctables_prepare_table (t))
6991 while (lex_token (lexer) != T_ENDCMD);
6994 input = proc_open (ds);
6995 bool ok = ctables_execute (ds, input, ct);
6996 ok = proc_commit (ds) && ok;
6998 ctables_destroy (ct);
6999 return ok ? CMD_SUCCESS : CMD_FAILURE;
7004 ctables_destroy (ct);