1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 enum ctables_summary_function
75 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
76 #include "ctables.inc"
81 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
83 #include "ctables.inc"
87 static bool ctables_summary_function_is_count (enum ctables_summary_function);
89 enum ctables_domain_type
91 /* Within a section, where stacked variables divide one section from
93 CTDT_TABLE, /* All layers of a whole section. */
94 CTDT_LAYER, /* One layer within a section. */
95 CTDT_LAYERROW, /* Row in one layer within a section. */
96 CTDT_LAYERCOL, /* Column in one layer within a section. */
98 /* Within a subtable, where a subtable pairs an innermost row variable with
99 an innermost column variable within a single layer. */
100 CTDT_SUBTABLE, /* Whole subtable. */
101 CTDT_ROW, /* Row within a subtable. */
102 CTDT_COL, /* Column within a subtable. */
106 struct ctables_domain
108 struct hmap_node node;
110 const struct ctables_cell *example;
113 double d_valid; /* Dictionary weight. */
116 double e_valid; /* Effective weight */
119 double u_valid; /* Unweighted. */
122 struct ctables_sum *sums;
131 enum ctables_summary_variant
140 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
141 all the axes (except the scalar variable, if any). */
142 struct hmap_node node;
144 /* The domains that contain this cell. */
145 uint32_t omit_domains;
146 struct ctables_domain *domains[N_CTDTS];
151 enum ctables_summary_variant sv;
153 struct ctables_cell_axis
155 struct ctables_cell_value
157 const struct ctables_category *category;
165 union ctables_summary *summaries;
172 const struct dictionary *dict;
173 struct pivot_table_look *look;
175 /* CTABLES has a number of extra formats that we implement via custom
176 currency specifications on an alternate fmt_settings. */
177 #define CTEF_NEGPAREN FMT_CCA
178 #define CTEF_NEQUAL FMT_CCB
179 #define CTEF_PAREN FMT_CCC
180 #define CTEF_PCTPAREN FMT_CCD
181 struct fmt_settings ctables_formats;
183 /* If this is NULL, zeros are displayed using the normal print format.
184 Otherwise, this string is displayed. */
187 /* If this is NULL, missing values are displayed using the normal print
188 format. Otherwise, this string is displayed. */
191 /* Indexed by variable dictionary index. */
192 enum ctables_vlabel *vlabels;
194 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
196 bool mrsets_count_duplicates; /* MRSETS. */
197 bool smissing_listwise; /* SMISSING. */
198 struct variable *e_weight; /* WEIGHT. */
199 int hide_threshold; /* HIDESMALLCOUNTS. */
201 struct ctables_table **tables;
205 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
208 struct ctables_postcompute
210 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
211 char *name; /* Name, without leading &. */
213 struct msg_location *location; /* Location of definition. */
214 struct ctables_pcexpr *expr;
216 struct ctables_summary_spec_set *specs;
217 bool hide_source_cats;
220 struct ctables_pcexpr
230 enum ctables_postcompute_op
233 CTPO_CONSTANT, /* 5 */
234 CTPO_CAT_NUMBER, /* [5] */
235 CTPO_CAT_STRING, /* ["STRING"] */
236 CTPO_CAT_NRANGE, /* [LO THRU 5] */
237 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
238 CTPO_CAT_MISSING, /* MISSING */
239 CTPO_CAT_OTHERNM, /* OTHERNM */
240 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
241 CTPO_CAT_TOTAL, /* TOTAL */
255 /* CTPO_CAT_NUMBER. */
258 /* CTPO_CAT_STRING, in dictionary encoding. */
259 struct substring string;
261 /* CTPO_CAT_NRANGE. */
264 /* CTPO_CAT_SRANGE. */
265 struct substring srange[2];
267 /* CTPO_CAT_SUBTOTAL. */
268 size_t subtotal_index;
270 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
271 One element: CTPO_NEG. */
272 struct ctables_pcexpr *subs[2];
275 /* Source location. */
276 struct msg_location *location;
279 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
280 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
281 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
282 struct ctables_pcexpr *sub1);
284 struct ctables_summary_spec_set
286 struct ctables_summary_spec *specs;
290 /* The variable to which the summary specs are applied. */
291 struct variable *var;
293 /* Whether the variable to which the summary specs are applied is a scale
294 variable for the purpose of summarization.
296 (VALIDN and TOTALN act differently for summarizing scale and categorical
300 /* If any of these optional additional scale variables are missing, then
301 treat 'var' as if it's missing too. This is for implementing
302 SMISSING=LISTWISE. */
303 struct variable **listwise_vars;
304 size_t n_listwise_vars;
307 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
308 const struct ctables_summary_spec_set *);
309 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
311 /* A nested sequence of variables, e.g. a > b > c. */
314 struct variable **vars;
317 size_t *domains[N_CTDTS];
318 size_t n_domains[N_CTDTS];
321 struct ctables_summary_spec_set specs[N_CSVS];
324 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
327 struct ctables_nest *nests;
331 static void ctables_stack_uninit (struct ctables_stack *);
335 struct hmap_node node;
340 struct ctables_occurrence
342 struct hmap_node node;
346 struct ctables_section
349 struct ctables_table *table;
350 struct ctables_nest *nests[PIVOT_N_AXES];
353 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
354 struct hmap cells; /* Contains "struct ctables_cell"s. */
355 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
358 static void ctables_section_uninit (struct ctables_section *);
362 struct ctables *ctables;
363 struct ctables_axis *axes[PIVOT_N_AXES];
364 struct ctables_stack stacks[PIVOT_N_AXES];
365 struct ctables_section *sections;
367 enum pivot_axis_type summary_axis;
368 struct ctables_summary_spec_set summary_specs;
369 struct variable **sum_vars;
372 enum pivot_axis_type slabels_axis;
373 bool slabels_visible;
375 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
377 Most commonly, label_axis[a] == a, and in particular we always have
378 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
380 If ROWLABELS or COLLABELS is specified, then one of
381 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
382 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
384 If any category labels are moved, then 'clabels_example' is one of the
385 variables being moved (and it is otherwise NULL). All of the variables
386 being moved have the same width, value labels, and categories, so this
387 example variable can be used to find those out.
389 The remaining members in this group are relevant only if category labels
392 'clabels_values_map' holds a "struct ctables_value" for all the values
393 that appear in all of the variables in the moved categories. It is
394 accumulated as the data is read. Once the data is fully read, its
395 sorted values are put into 'clabels_values' and 'n_clabels_values'.
397 enum pivot_axis_type label_axis[PIVOT_N_AXES];
398 enum pivot_axis_type clabels_from_axis;
399 const struct variable *clabels_example;
400 struct hmap clabels_values_map;
401 struct ctables_value **clabels_values;
402 size_t n_clabels_values;
404 /* Indexed by variable dictionary index. */
405 struct ctables_categories **categories;
414 struct ctables_chisq *chisq;
415 struct ctables_pairwise *pairwise;
418 struct ctables_categories
421 struct ctables_category *cats;
426 struct ctables_category
428 enum ctables_category_type
430 /* Explicit category lists. */
433 CCT_NRANGE, /* Numerical range. */
434 CCT_SRANGE, /* String range. */
439 /* Totals and subtotals. */
443 /* Implicit category lists. */
448 /* For contributing to TOTALN. */
449 CCT_EXCLUDED_MISSING,
453 struct ctables_category *subtotal;
459 double number; /* CCT_NUMBER. */
460 struct substring string; /* CCT_STRING, in dictionary encoding. */
461 double nrange[2]; /* CCT_NRANGE. */
462 struct substring srange[2]; /* CCT_SRANGE. */
466 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
467 bool hide_subcategories; /* CCT_SUBTOTAL. */
470 /* CCT_POSTCOMPUTE. */
473 const struct ctables_postcompute *pc;
474 enum fmt_type parse_format;
477 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
480 bool include_missing;
484 enum ctables_summary_function sort_function;
485 struct variable *sort_var;
490 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
491 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
492 struct msg_location *location;
496 ctables_category_uninit (struct ctables_category *cat)
501 msg_location_destroy (cat->location);
508 case CCT_POSTCOMPUTE:
512 ss_dealloc (&cat->string);
516 ss_dealloc (&cat->srange[0]);
517 ss_dealloc (&cat->srange[1]);
522 free (cat->total_label);
530 case CCT_EXCLUDED_MISSING:
536 nullable_substring_equal (const struct substring *a,
537 const struct substring *b)
539 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
543 ctables_category_equal (const struct ctables_category *a,
544 const struct ctables_category *b)
546 if (a->type != b->type)
552 return a->number == b->number;
555 return ss_equals (a->string, b->string);
558 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
561 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
562 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
568 case CCT_POSTCOMPUTE:
569 return a->pc == b->pc;
573 return !strcmp (a->total_label, b->total_label);
578 return (a->include_missing == b->include_missing
579 && a->sort_ascending == b->sort_ascending
580 && a->sort_function == b->sort_function
581 && a->sort_var == b->sort_var
582 && a->percentile == b->percentile);
584 case CCT_EXCLUDED_MISSING:
592 ctables_categories_unref (struct ctables_categories *c)
597 assert (c->n_refs > 0);
601 for (size_t i = 0; i < c->n_cats; i++)
602 ctables_category_uninit (&c->cats[i]);
608 ctables_categories_equal (const struct ctables_categories *a,
609 const struct ctables_categories *b)
611 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
614 for (size_t i = 0; i < a->n_cats; i++)
615 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
621 /* Chi-square test (SIGTEST). */
629 /* Pairwise comparison test (COMPARETEST). */
630 struct ctables_pairwise
632 enum { PROP, MEAN } type;
635 bool meansvariance_allcats;
637 enum { BONFERRONI = 1, BH } adjust;
661 struct variable *var;
663 struct ctables_summary_spec_set specs[N_CSVS];
667 struct ctables_axis *subs[2];
670 struct msg_location *loc;
673 static void ctables_axis_destroy (struct ctables_axis *);
682 enum ctables_function_availability
684 CTFA_ALL, /* Any variables. */
685 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
686 //CTFA_MRSETS, /* Only multiple-response sets */
689 struct ctables_summary_spec
691 enum ctables_summary_function function;
692 double percentile; /* CTSF_PTILE only. */
695 struct fmt_spec format;
696 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
703 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
704 const struct ctables_summary_spec *src)
707 dst->label = xstrdup_if_nonnull (src->label);
711 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
718 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
719 const struct ctables_summary_spec_set *src)
721 struct ctables_summary_spec *specs
722 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
723 for (size_t i = 0; i < src->n; i++)
724 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
726 *dst = (struct ctables_summary_spec_set) {
731 .is_scale = src->is_scale,
736 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
738 for (size_t i = 0; i < set->n; i++)
739 ctables_summary_spec_uninit (&set->specs[i]);
740 free (set->listwise_vars);
745 parse_col_width (struct lexer *lexer, const char *name, double *width)
747 lex_match (lexer, T_EQUALS);
748 if (lex_match_id (lexer, "DEFAULT"))
750 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
752 *width = lex_number (lexer);
762 parse_bool (struct lexer *lexer, bool *b)
764 if (lex_match_id (lexer, "NO"))
766 else if (lex_match_id (lexer, "YES"))
770 lex_error_expecting (lexer, "YES", "NO");
776 static enum ctables_function_availability
777 ctables_function_availability (enum ctables_summary_function f)
779 static enum ctables_function_availability availability[] = {
780 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
781 #include "ctables.inc"
785 return availability[f];
789 ctables_summary_function_is_count (enum ctables_summary_function f)
791 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
795 parse_ctables_summary_function (struct lexer *lexer,
796 enum ctables_summary_function *f)
800 enum ctables_summary_function function;
801 struct substring name;
803 static struct pair names[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
805 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
806 #include "ctables.inc"
807 /* The .COUNT suffix may be omitted. */
808 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
809 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
810 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
811 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
812 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
813 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
814 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
818 if (!lex_force_id (lexer))
821 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
822 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
824 *f = names[i].function;
829 lex_error (lexer, _("Expecting summary function name."));
834 ctables_axis_destroy (struct ctables_axis *axis)
842 for (size_t i = 0; i < N_CSVS; i++)
843 ctables_summary_spec_set_uninit (&axis->specs[i]);
848 ctables_axis_destroy (axis->subs[0]);
849 ctables_axis_destroy (axis->subs[1]);
852 msg_location_destroy (axis->loc);
856 static struct ctables_axis *
857 ctables_axis_new_nonterminal (enum ctables_axis_op op,
858 struct ctables_axis *sub0,
859 struct ctables_axis *sub1,
860 struct lexer *lexer, int start_ofs)
862 struct ctables_axis *axis = xmalloc (sizeof *axis);
863 *axis = (struct ctables_axis) {
865 .subs = { sub0, sub1 },
866 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
871 struct ctables_axis_parse_ctx
874 struct dictionary *dict;
876 struct ctables_table *t;
879 static struct fmt_spec
880 ctables_summary_default_format (enum ctables_summary_function function,
881 const struct variable *var)
883 static const enum ctables_format default_formats[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
885 #include "ctables.inc"
888 switch (default_formats[function])
891 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
894 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
897 return *var_get_print_format (var);
904 static struct pivot_value *
905 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
909 static const char *default_labels[] = {
910 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
911 #include "ctables.inc"
915 return (spec->function == CTSF_PTILE
916 ? pivot_value_new_text_format (N_("Percentile %.2f"),
918 : pivot_value_new_text (default_labels[spec->function]));
922 struct substring in = ss_cstr (spec->label);
923 struct substring target = ss_cstr (")CILEVEL");
925 struct string out = DS_EMPTY_INITIALIZER;
928 size_t chunk = ss_find_substring (in, target);
929 ds_put_substring (&out, ss_head (in, chunk));
930 ss_advance (&in, chunk);
932 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
934 ss_advance (&in, target.length);
935 ds_put_format (&out, "%g", cilevel);
941 ctables_summary_function_name (enum ctables_summary_function function)
943 static const char *names[] = {
944 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
945 #include "ctables.inc"
948 return names[function];
952 add_summary_spec (struct ctables_axis *axis,
953 enum ctables_summary_function function, double percentile,
954 const char *label, const struct fmt_spec *format,
955 bool is_ctables_format, const struct msg_location *loc,
956 enum ctables_summary_variant sv)
958 if (axis->op == CTAO_VAR)
960 const char *function_name = ctables_summary_function_name (function);
961 const char *var_name = var_get_name (axis->var);
962 switch (ctables_function_availability (function))
966 msg_at (SE, loc, _("Summary function %s applies only to multiple "
967 "response sets."), function_name);
968 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
974 if (!axis->scale && sv != CSV_TOTAL)
977 _("Summary function %s applies only to scale variables."),
979 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
989 struct ctables_summary_spec_set *set = &axis->specs[sv];
990 if (set->n >= set->allocated)
991 set->specs = x2nrealloc (set->specs, &set->allocated,
994 struct ctables_summary_spec *dst = &set->specs[set->n++];
995 *dst = (struct ctables_summary_spec) {
996 .function = function,
997 .percentile = percentile,
998 .label = xstrdup_if_nonnull (label),
999 .format = (format ? *format
1000 : ctables_summary_default_format (function, axis->var)),
1001 .is_ctables_format = is_ctables_format,
1007 for (size_t i = 0; i < 2; i++)
1008 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1009 format, is_ctables_format, loc, sv))
1015 static struct ctables_axis *ctables_axis_parse_stack (
1016 struct ctables_axis_parse_ctx *);
1019 static struct ctables_axis *
1020 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1022 if (lex_match (ctx->lexer, T_LPAREN))
1024 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1025 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1027 ctables_axis_destroy (sub);
1033 if (!lex_force_id (ctx->lexer))
1036 int start_ofs = lex_ofs (ctx->lexer);
1037 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1041 struct ctables_axis *axis = xmalloc (sizeof *axis);
1042 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1044 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1045 : lex_match_phrase (ctx->lexer, "[C]") ? false
1046 : var_get_measure (var) == MEASURE_SCALE);
1047 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1048 lex_ofs (ctx->lexer) - 1);
1049 if (axis->scale && var_is_alpha (var))
1051 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1053 var_get_name (var));
1054 ctables_axis_destroy (axis);
1062 has_digit (const char *s)
1064 return s[strcspn (s, "0123456789")] != '\0';
1068 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1069 bool *is_ctables_format)
1071 char type[FMT_TYPE_LEN_MAX + 1];
1072 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1075 if (!strcasecmp (type, "NEGPAREN"))
1076 format->type = CTEF_NEGPAREN;
1077 else if (!strcasecmp (type, "NEQUAL"))
1078 format->type = CTEF_NEQUAL;
1079 else if (!strcasecmp (type, "PAREN"))
1080 format->type = CTEF_PAREN;
1081 else if (!strcasecmp (type, "PCTPAREN"))
1082 format->type = CTEF_PCTPAREN;
1085 *is_ctables_format = false;
1086 return (parse_format_specifier (lexer, format)
1087 && fmt_check_output (format)
1088 && fmt_check_type_compat (format, VAL_NUMERIC));
1094 lex_next_error (lexer, -1, -1,
1095 _("Output format %s requires width 2 or greater."), type);
1098 else if (format->d > format->w - 1)
1100 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1101 "greater than decimals."), type);
1106 *is_ctables_format = true;
1111 static struct ctables_axis *
1112 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1114 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1115 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1118 enum ctables_summary_variant sv = CSV_CELL;
1121 int start_ofs = lex_ofs (ctx->lexer);
1123 /* Parse function. */
1124 enum ctables_summary_function function;
1125 if (!parse_ctables_summary_function (ctx->lexer, &function))
1128 /* Parse percentile. */
1129 double percentile = 0;
1130 if (function == CTSF_PTILE)
1132 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1134 percentile = lex_number (ctx->lexer);
1135 lex_get (ctx->lexer);
1140 if (lex_is_string (ctx->lexer))
1142 label = ss_xstrdup (lex_tokss (ctx->lexer));
1143 lex_get (ctx->lexer);
1147 struct fmt_spec format;
1148 const struct fmt_spec *formatp;
1149 bool is_ctables_format = false;
1150 if (lex_token (ctx->lexer) == T_ID
1151 && has_digit (lex_tokcstr (ctx->lexer)))
1153 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1154 &is_ctables_format))
1164 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1165 lex_ofs (ctx->lexer) - 1);
1166 add_summary_spec (sub, function, percentile, label, formatp,
1167 is_ctables_format, loc, sv);
1169 msg_location_destroy (loc);
1171 lex_match (ctx->lexer, T_COMMA);
1172 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1174 if (!lex_force_match (ctx->lexer, T_LBRACK))
1178 else if (lex_match (ctx->lexer, T_RBRACK))
1180 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1187 ctables_axis_destroy (sub);
1191 static const struct ctables_axis *
1192 find_scale (const struct ctables_axis *axis)
1196 else if (axis->op == CTAO_VAR)
1197 return axis->scale ? axis : NULL;
1200 for (size_t i = 0; i < 2; i++)
1202 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1210 static const struct ctables_axis *
1211 find_categorical_summary_spec (const struct ctables_axis *axis)
1215 else if (axis->op == CTAO_VAR)
1216 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1219 for (size_t i = 0; i < 2; i++)
1221 const struct ctables_axis *sum
1222 = find_categorical_summary_spec (axis->subs[i]);
1230 static struct ctables_axis *
1231 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1233 int start_ofs = lex_ofs (ctx->lexer);
1234 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1238 while (lex_match (ctx->lexer, T_GT))
1240 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1243 ctables_axis_destroy (lhs);
1247 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1248 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1250 const struct ctables_axis *outer_scale = find_scale (lhs);
1251 const struct ctables_axis *inner_scale = find_scale (rhs);
1252 if (outer_scale && inner_scale)
1254 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1255 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1256 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1257 ctables_axis_destroy (nest);
1261 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1264 msg_at (SE, nest->loc,
1265 _("Summaries may only be requested for categorical variables "
1266 "at the innermost nesting level."));
1267 msg_at (SN, outer_sum->loc,
1268 _("This outer categorical variable has a summary."));
1269 ctables_axis_destroy (nest);
1279 static struct ctables_axis *
1280 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1282 int start_ofs = lex_ofs (ctx->lexer);
1283 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1287 while (lex_match (ctx->lexer, T_PLUS))
1289 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1292 ctables_axis_destroy (lhs);
1296 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1297 ctx->lexer, start_ofs);
1304 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1305 struct ctables *ct, struct ctables_table *t,
1306 enum pivot_axis_type a)
1308 if (lex_token (lexer) == T_BY
1309 || lex_token (lexer) == T_SLASH
1310 || lex_token (lexer) == T_ENDCMD)
1313 struct ctables_axis_parse_ctx ctx = {
1319 t->axes[a] = ctables_axis_parse_stack (&ctx);
1320 return t->axes[a] != NULL;
1324 ctables_chisq_destroy (struct ctables_chisq *chisq)
1330 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1336 ctables_table_destroy (struct ctables_table *t)
1341 for (size_t i = 0; i < t->n_sections; i++)
1342 ctables_section_uninit (&t->sections[i]);
1345 for (size_t i = 0; i < t->n_categories; i++)
1346 ctables_categories_unref (t->categories[i]);
1347 free (t->categories);
1349 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1351 ctables_axis_destroy (t->axes[a]);
1352 ctables_stack_uninit (&t->stacks[a]);
1354 free (t->summary_specs.specs);
1356 struct ctables_value *ctv, *next_ctv;
1357 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1358 &t->clabels_values_map)
1360 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1361 hmap_delete (&t->clabels_values_map, &ctv->node);
1364 hmap_destroy (&t->clabels_values_map);
1365 free (t->clabels_values);
1371 ctables_chisq_destroy (t->chisq);
1372 ctables_pairwise_destroy (t->pairwise);
1377 ctables_destroy (struct ctables *ct)
1382 struct ctables_postcompute *pc, *next_pc;
1383 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1387 msg_location_destroy (pc->location);
1388 ctables_pcexpr_destroy (pc->expr);
1392 ctables_summary_spec_set_uninit (pc->specs);
1395 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1399 fmt_settings_uninit (&ct->ctables_formats);
1400 pivot_table_look_unref (ct->look);
1404 for (size_t i = 0; i < ct->n_tables; i++)
1405 ctables_table_destroy (ct->tables[i]);
1410 static struct ctables_category
1411 cct_nrange (double low, double high)
1413 return (struct ctables_category) {
1415 .nrange = { low, high }
1419 static struct ctables_category
1420 cct_srange (struct substring low, struct substring high)
1422 return (struct ctables_category) {
1424 .srange = { low, high }
1429 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1430 struct ctables_category *cat)
1433 if (lex_match (lexer, T_EQUALS))
1435 if (!lex_force_string (lexer))
1438 total_label = ss_xstrdup (lex_tokss (lexer));
1442 total_label = xstrdup (_("Subtotal"));
1444 *cat = (struct ctables_category) {
1445 .type = CCT_SUBTOTAL,
1446 .hide_subcategories = hide_subcategories,
1447 .total_label = total_label
1452 static struct substring
1453 parse_substring (struct lexer *lexer, struct dictionary *dict)
1455 struct substring s = recode_substring_pool (
1456 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1457 ss_rtrim (&s, ss_cstr (" "));
1463 ctables_table_parse_explicit_category (struct lexer *lexer,
1464 struct dictionary *dict,
1466 struct ctables_category *cat)
1468 if (lex_match_id (lexer, "OTHERNM"))
1469 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1470 else if (lex_match_id (lexer, "MISSING"))
1471 *cat = (struct ctables_category) { .type = CCT_MISSING };
1472 else if (lex_match_id (lexer, "SUBTOTAL"))
1473 return ctables_table_parse_subtotal (lexer, false, cat);
1474 else if (lex_match_id (lexer, "HSUBTOTAL"))
1475 return ctables_table_parse_subtotal (lexer, true, cat);
1476 else if (lex_match_id (lexer, "LO"))
1478 if (!lex_force_match_id (lexer, "THRU"))
1480 if (lex_is_string (lexer))
1482 struct substring sr0 = { .string = NULL };
1483 struct substring sr1 = parse_substring (lexer, dict);
1484 *cat = cct_srange (sr0, sr1);
1486 else if (lex_force_num (lexer))
1488 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1494 else if (lex_is_number (lexer))
1496 double number = lex_number (lexer);
1498 if (lex_match_id (lexer, "THRU"))
1500 if (lex_match_id (lexer, "HI"))
1501 *cat = cct_nrange (number, DBL_MAX);
1504 if (!lex_force_num (lexer))
1506 *cat = cct_nrange (number, lex_number (lexer));
1511 *cat = (struct ctables_category) {
1516 else if (lex_is_string (lexer))
1518 struct substring s = parse_substring (lexer, dict);
1519 if (lex_match_id (lexer, "THRU"))
1521 if (lex_match_id (lexer, "HI"))
1523 struct substring sr1 = { .string = NULL };
1524 *cat = cct_srange (s, sr1);
1528 if (!lex_force_string (lexer))
1533 struct substring sr1 = parse_substring (lexer, dict);
1534 *cat = cct_srange (s, sr1);
1538 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1540 else if (lex_match (lexer, T_AND))
1542 if (!lex_force_id (lexer))
1544 struct ctables_postcompute *pc = ctables_find_postcompute (
1545 ct, lex_tokcstr (lexer));
1548 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1549 msg_at (SE, loc, _("Unknown postcompute &%s."),
1550 lex_tokcstr (lexer));
1551 msg_location_destroy (loc);
1556 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1560 lex_error (lexer, NULL);
1568 parse_category_string (struct msg_location *location,
1569 struct substring s, const struct dictionary *dict,
1570 enum fmt_type format, double *n)
1573 char *error = data_in (s, dict_get_encoding (dict), format,
1574 settings_get_fmt_settings (), &v, 0, NULL);
1577 msg_at (SE, location,
1578 _("Failed to parse category specification as format %s: %s."),
1579 fmt_name (format), error);
1588 static struct ctables_category *
1589 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1590 const struct ctables_pcexpr *e)
1592 struct ctables_category *best = NULL;
1593 size_t n_subtotals = 0;
1594 for (size_t i = 0; i < cats->n_cats; i++)
1596 struct ctables_category *cat = &cats->cats[i];
1599 case CTPO_CAT_NUMBER:
1600 if (cat->type == CCT_NUMBER && cat->number == e->number)
1604 case CTPO_CAT_STRING:
1605 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1609 case CTPO_CAT_NRANGE:
1610 if (cat->type == CCT_NRANGE
1611 && cat->nrange[0] == e->nrange[0]
1612 && cat->nrange[1] == e->nrange[1])
1616 case CTPO_CAT_SRANGE:
1617 if (cat->type == CCT_SRANGE
1618 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1619 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1623 case CTPO_CAT_MISSING:
1624 if (cat->type == CCT_MISSING)
1628 case CTPO_CAT_OTHERNM:
1629 if (cat->type == CCT_OTHERNM)
1633 case CTPO_CAT_SUBTOTAL:
1634 if (cat->type == CCT_SUBTOTAL)
1637 if (e->subtotal_index == n_subtotals)
1639 else if (e->subtotal_index == 0)
1644 case CTPO_CAT_TOTAL:
1645 if (cat->type == CCT_TOTAL)
1659 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1664 static struct ctables_category *
1665 ctables_find_category_for_postcompute (const struct dictionary *dict,
1666 const struct ctables_categories *cats,
1667 enum fmt_type parse_format,
1668 const struct ctables_pcexpr *e)
1670 if (parse_format != FMT_F)
1672 if (e->op == CTPO_CAT_STRING)
1675 if (!parse_category_string (e->location, e->string, dict,
1676 parse_format, &number))
1679 struct ctables_pcexpr e2 = {
1680 .op = CTPO_CAT_NUMBER,
1682 .location = e->location,
1684 return ctables_find_category_for_postcompute__ (cats, &e2);
1686 else if (e->op == CTPO_CAT_SRANGE)
1689 if (!e->srange[0].string)
1690 nrange[0] = -DBL_MAX;
1691 else if (!parse_category_string (e->location, e->srange[0], dict,
1692 parse_format, &nrange[0]))
1695 if (!e->srange[1].string)
1696 nrange[1] = DBL_MAX;
1697 else if (!parse_category_string (e->location, e->srange[1], dict,
1698 parse_format, &nrange[1]))
1701 struct ctables_pcexpr e2 = {
1702 .op = CTPO_CAT_NRANGE,
1703 .nrange = { nrange[0], nrange[1] },
1704 .location = e->location,
1706 return ctables_find_category_for_postcompute__ (cats, &e2);
1709 return ctables_find_category_for_postcompute__ (cats, e);
1713 ctables_recursive_check_postcompute (struct dictionary *dict,
1714 const struct ctables_pcexpr *e,
1715 struct ctables_category *pc_cat,
1716 const struct ctables_categories *cats,
1717 const struct msg_location *cats_location)
1721 case CTPO_CAT_NUMBER:
1722 case CTPO_CAT_STRING:
1723 case CTPO_CAT_NRANGE:
1724 case CTPO_CAT_SRANGE:
1725 case CTPO_CAT_MISSING:
1726 case CTPO_CAT_OTHERNM:
1727 case CTPO_CAT_SUBTOTAL:
1728 case CTPO_CAT_TOTAL:
1730 struct ctables_category *cat = ctables_find_category_for_postcompute (
1731 dict, cats, pc_cat->parse_format, e);
1734 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1736 size_t n_subtotals = 0;
1737 for (size_t i = 0; i < cats->n_cats; i++)
1738 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1739 if (n_subtotals > 1)
1741 msg_at (SE, cats_location,
1742 ngettext ("These categories include %zu instance "
1743 "of SUBTOTAL or HSUBTOTAL, so references "
1744 "from computed categories must refer to "
1745 "subtotals by position, "
1746 "e.g. SUBTOTAL[1].",
1747 "These categories include %zu instances "
1748 "of SUBTOTAL or HSUBTOTAL, so references "
1749 "from computed categories must refer to "
1750 "subtotals by position, "
1751 "e.g. SUBTOTAL[1].",
1754 msg_at (SN, e->location,
1755 _("This is the reference that lacks a position."));
1760 msg_at (SE, pc_cat->location,
1761 _("Computed category &%s references a category not included "
1762 "in the category list."),
1764 msg_at (SN, e->location, _("This is the missing category."));
1765 if (e->op == CTPO_CAT_SUBTOTAL)
1766 msg_at (SN, cats_location,
1767 _("To fix the problem, add subtotals to the "
1768 "list of categories here."));
1769 else if (e->op == CTPO_CAT_TOTAL)
1770 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1771 "CATEGORIES specification."));
1773 msg_at (SN, cats_location,
1774 _("To fix the problem, add the missing category to the "
1775 "list of categories here."));
1778 if (pc_cat->pc->hide_source_cats)
1792 for (size_t i = 0; i < 2; i++)
1793 if (e->subs[i] && !ctables_recursive_check_postcompute (
1794 dict, e->subs[i], pc_cat, cats, cats_location))
1803 all_strings (struct variable **vars, size_t n_vars,
1804 const struct ctables_category *cat)
1806 for (size_t j = 0; j < n_vars; j++)
1807 if (var_is_numeric (vars[j]))
1809 msg_at (SE, cat->location,
1810 _("This category specification may be applied only to string "
1811 "variables, but this subcommand tries to apply it to "
1812 "numeric variable %s."),
1813 var_get_name (vars[j]));
1820 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1821 struct ctables *ct, struct ctables_table *t)
1823 if (!lex_match_id (lexer, "VARIABLES"))
1825 lex_match (lexer, T_EQUALS);
1827 struct variable **vars;
1829 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1832 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1833 for (size_t i = 1; i < n_vars; i++)
1835 const struct fmt_spec *f = var_get_print_format (vars[i]);
1836 if (f->type != common_format->type)
1838 common_format = NULL;
1844 && (fmt_get_category (common_format->type)
1845 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1847 struct ctables_categories *c = xmalloc (sizeof *c);
1848 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1849 for (size_t i = 0; i < n_vars; i++)
1851 struct ctables_categories **cp
1852 = &t->categories[var_get_dict_index (vars[i])];
1853 ctables_categories_unref (*cp);
1857 size_t allocated_cats = 0;
1858 int cats_start_ofs = -1;
1859 int cats_end_ofs = -1;
1860 if (lex_match (lexer, T_LBRACK))
1862 cats_start_ofs = lex_ofs (lexer);
1865 if (c->n_cats >= allocated_cats)
1866 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1868 int start_ofs = lex_ofs (lexer);
1869 struct ctables_category *cat = &c->cats[c->n_cats];
1870 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1872 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1875 lex_match (lexer, T_COMMA);
1877 while (!lex_match (lexer, T_RBRACK));
1878 cats_end_ofs = lex_ofs (lexer) - 1;
1881 struct ctables_category cat = {
1883 .include_missing = false,
1884 .sort_ascending = true,
1886 bool show_totals = false;
1887 char *total_label = NULL;
1888 bool totals_before = false;
1889 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1891 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1893 lex_match (lexer, T_EQUALS);
1894 if (lex_match_id (lexer, "A"))
1895 cat.sort_ascending = true;
1896 else if (lex_match_id (lexer, "D"))
1897 cat.sort_ascending = false;
1900 lex_error_expecting (lexer, "A", "D");
1904 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1906 lex_match (lexer, T_EQUALS);
1907 if (lex_match_id (lexer, "VALUE"))
1908 cat.type = CCT_VALUE;
1909 else if (lex_match_id (lexer, "LABEL"))
1910 cat.type = CCT_LABEL;
1913 cat.type = CCT_FUNCTION;
1914 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1917 if (lex_match (lexer, T_LPAREN))
1919 cat.sort_var = parse_variable (lexer, dict);
1923 if (cat.sort_function == CTSF_PTILE)
1925 lex_match (lexer, T_COMMA);
1926 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1928 cat.percentile = lex_number (lexer);
1932 if (!lex_force_match (lexer, T_RPAREN))
1935 else if (ctables_function_availability (cat.sort_function)
1938 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1943 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1945 lex_match (lexer, T_EQUALS);
1946 if (lex_match_id (lexer, "INCLUDE"))
1947 cat.include_missing = true;
1948 else if (lex_match_id (lexer, "EXCLUDE"))
1949 cat.include_missing = false;
1952 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1956 else if (lex_match_id (lexer, "TOTAL"))
1958 lex_match (lexer, T_EQUALS);
1959 if (!parse_bool (lexer, &show_totals))
1962 else if (lex_match_id (lexer, "LABEL"))
1964 lex_match (lexer, T_EQUALS);
1965 if (!lex_force_string (lexer))
1968 total_label = ss_xstrdup (lex_tokss (lexer));
1971 else if (lex_match_id (lexer, "POSITION"))
1973 lex_match (lexer, T_EQUALS);
1974 if (lex_match_id (lexer, "BEFORE"))
1975 totals_before = true;
1976 else if (lex_match_id (lexer, "AFTER"))
1977 totals_before = false;
1980 lex_error_expecting (lexer, "BEFORE", "AFTER");
1984 else if (lex_match_id (lexer, "EMPTY"))
1986 lex_match (lexer, T_EQUALS);
1987 if (lex_match_id (lexer, "INCLUDE"))
1988 c->show_empty = true;
1989 else if (lex_match_id (lexer, "EXCLUDE"))
1990 c->show_empty = false;
1993 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2000 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2001 "TOTAL", "LABEL", "POSITION", "EMPTY");
2003 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2010 if (c->n_cats >= allocated_cats)
2011 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2012 c->cats[c->n_cats++] = cat;
2017 if (c->n_cats >= allocated_cats)
2018 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2020 struct ctables_category *totals;
2023 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2024 totals = &c->cats[0];
2027 totals = &c->cats[c->n_cats];
2030 *totals = (struct ctables_category) {
2032 .total_label = total_label ? total_label : xstrdup (_("Total")),
2036 struct ctables_category *subtotal = NULL;
2037 for (size_t i = totals_before ? 0 : c->n_cats;
2038 totals_before ? i < c->n_cats : i-- > 0;
2039 totals_before ? i++ : 0)
2041 struct ctables_category *cat = &c->cats[i];
2050 cat->subtotal = subtotal;
2053 case CCT_POSTCOMPUTE:
2064 case CCT_EXCLUDED_MISSING:
2069 if (cats_start_ofs != -1)
2071 for (size_t i = 0; i < c->n_cats; i++)
2073 struct ctables_category *cat = &c->cats[i];
2076 case CCT_POSTCOMPUTE:
2077 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2078 struct msg_location *cats_location
2079 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2080 bool ok = ctables_recursive_check_postcompute (
2081 dict, cat->pc->expr, cat, c, cats_location);
2082 msg_location_destroy (cats_location);
2089 for (size_t j = 0; j < n_vars; j++)
2090 if (var_is_alpha (vars[j]))
2092 msg_at (SE, cat->location,
2093 _("This category specification may be applied "
2094 "only to numeric variables, but this "
2095 "subcommand tries to apply it to string "
2097 var_get_name (vars[j]));
2106 if (!parse_category_string (cat->location, cat->string, dict,
2107 common_format->type, &n))
2110 ss_dealloc (&cat->string);
2112 cat->type = CCT_NUMBER;
2115 else if (!all_strings (vars, n_vars, cat))
2124 if (!cat->srange[0].string)
2126 else if (!parse_category_string (cat->location,
2127 cat->srange[0], dict,
2128 common_format->type, &n[0]))
2131 if (!cat->srange[1].string)
2133 else if (!parse_category_string (cat->location,
2134 cat->srange[1], dict,
2135 common_format->type, &n[1]))
2138 ss_dealloc (&cat->srange[0]);
2139 ss_dealloc (&cat->srange[1]);
2141 cat->type = CCT_NRANGE;
2142 cat->nrange[0] = n[0];
2143 cat->nrange[1] = n[1];
2145 else if (!all_strings (vars, n_vars, cat))
2156 case CCT_EXCLUDED_MISSING:
2171 ctables_nest_uninit (struct ctables_nest *nest)
2174 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2175 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2176 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2177 free (nest->domains[dt]);
2181 ctables_stack_uninit (struct ctables_stack *stack)
2185 for (size_t i = 0; i < stack->n; i++)
2186 ctables_nest_uninit (&stack->nests[i]);
2187 free (stack->nests);
2191 static struct ctables_stack
2192 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2199 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2200 for (size_t i = 0; i < s0.n; i++)
2201 for (size_t j = 0; j < s1.n; j++)
2203 const struct ctables_nest *a = &s0.nests[i];
2204 const struct ctables_nest *b = &s1.nests[j];
2206 size_t allocate = a->n + b->n;
2207 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2209 for (size_t k = 0; k < a->n; k++)
2210 vars[n++] = a->vars[k];
2211 for (size_t k = 0; k < b->n; k++)
2212 vars[n++] = b->vars[k];
2213 assert (n == allocate);
2215 const struct ctables_nest *summary_src;
2216 if (!a->specs[CSV_CELL].var)
2218 else if (!b->specs[CSV_CELL].var)
2223 struct ctables_nest *new = &stack.nests[stack.n++];
2224 *new = (struct ctables_nest) {
2226 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2227 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2231 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2232 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2234 ctables_stack_uninit (&s0);
2235 ctables_stack_uninit (&s1);
2239 static struct ctables_stack
2240 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2242 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2243 for (size_t i = 0; i < s0.n; i++)
2244 stack.nests[stack.n++] = s0.nests[i];
2245 for (size_t i = 0; i < s1.n; i++)
2247 stack.nests[stack.n] = s1.nests[i];
2248 stack.nests[stack.n].group_head += s0.n;
2251 assert (stack.n == s0.n + s1.n);
2257 static struct ctables_stack
2258 var_fts (const struct ctables_axis *a)
2260 struct variable **vars = xmalloc (sizeof *vars);
2263 struct ctables_nest *nest = xmalloc (sizeof *nest);
2264 *nest = (struct ctables_nest) {
2267 .scale_idx = a->scale ? 0 : SIZE_MAX,
2269 if (a->specs[CSV_CELL].n || a->scale)
2270 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2272 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2273 nest->specs[sv].var = a->var;
2274 nest->specs[sv].is_scale = a->scale;
2276 return (struct ctables_stack) { .nests = nest, .n = 1 };
2279 static struct ctables_stack
2280 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2283 return (struct ctables_stack) { .n = 0 };
2291 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2292 enumerate_fts (axis_type, a->subs[1]));
2295 /* This should consider any of the scale variables found in the result to
2296 be linked to each other listwise for SMISSING=LISTWISE. */
2297 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2298 enumerate_fts (axis_type, a->subs[1]));
2304 union ctables_summary
2306 /* COUNT, VALIDN, TOTALN. */
2309 /* MINIMUM, MAXIMUM, RANGE. */
2316 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2317 struct moments1 *moments;
2319 /* MEDIAN, MODE, PTILE. */
2322 struct casewriter *writer;
2327 /* XXX multiple response */
2331 ctables_summary_init (union ctables_summary *s,
2332 const struct ctables_summary_spec *ss)
2334 switch (ss->function)
2338 case CTSF_ROWPCT_COUNT:
2339 case CTSF_COLPCT_COUNT:
2340 case CTSF_TABLEPCT_COUNT:
2341 case CTSF_SUBTABLEPCT_COUNT:
2342 case CTSF_LAYERPCT_COUNT:
2343 case CTSF_LAYERROWPCT_COUNT:
2344 case CTSF_LAYERCOLPCT_COUNT:
2345 case CTSF_ROWPCT_VALIDN:
2346 case CTSF_COLPCT_VALIDN:
2347 case CTSF_TABLEPCT_VALIDN:
2348 case CTSF_SUBTABLEPCT_VALIDN:
2349 case CTSF_LAYERPCT_VALIDN:
2350 case CTSF_LAYERROWPCT_VALIDN:
2351 case CTSF_LAYERCOLPCT_VALIDN:
2352 case CTSF_ROWPCT_TOTALN:
2353 case CTSF_COLPCT_TOTALN:
2354 case CTSF_TABLEPCT_TOTALN:
2355 case CTSF_SUBTABLEPCT_TOTALN:
2356 case CTSF_LAYERPCT_TOTALN:
2357 case CTSF_LAYERROWPCT_TOTALN:
2358 case CTSF_LAYERCOLPCT_TOTALN:
2365 case CTSF_UROWPCT_COUNT:
2366 case CTSF_UCOLPCT_COUNT:
2367 case CTSF_UTABLEPCT_COUNT:
2368 case CTSF_USUBTABLEPCT_COUNT:
2369 case CTSF_ULAYERPCT_COUNT:
2370 case CTSF_ULAYERROWPCT_COUNT:
2371 case CTSF_ULAYERCOLPCT_COUNT:
2372 case CTSF_UROWPCT_VALIDN:
2373 case CTSF_UCOLPCT_VALIDN:
2374 case CTSF_UTABLEPCT_VALIDN:
2375 case CTSF_USUBTABLEPCT_VALIDN:
2376 case CTSF_ULAYERPCT_VALIDN:
2377 case CTSF_ULAYERROWPCT_VALIDN:
2378 case CTSF_ULAYERCOLPCT_VALIDN:
2379 case CTSF_UROWPCT_TOTALN:
2380 case CTSF_UCOLPCT_TOTALN:
2381 case CTSF_UTABLEPCT_TOTALN:
2382 case CTSF_USUBTABLEPCT_TOTALN:
2383 case CTSF_ULAYERPCT_TOTALN:
2384 case CTSF_ULAYERROWPCT_TOTALN:
2385 case CTSF_ULAYERCOLPCT_TOTALN:
2395 case CTSF_SUBTABLE_ID:
2397 case CTSF_LAYERROW_ID:
2398 case CTSF_LAYERCOL_ID:
2404 s->min = s->max = SYSMIS;
2412 case CTSF_ROWPCT_SUM:
2413 case CTSF_COLPCT_SUM:
2414 case CTSF_TABLEPCT_SUM:
2415 case CTSF_SUBTABLEPCT_SUM:
2416 case CTSF_LAYERPCT_SUM:
2417 case CTSF_LAYERROWPCT_SUM:
2418 case CTSF_LAYERCOLPCT_SUM:
2423 case CTSF_UVARIANCE:
2424 case CTSF_UROWPCT_SUM:
2425 case CTSF_UCOLPCT_SUM:
2426 case CTSF_UTABLEPCT_SUM:
2427 case CTSF_USUBTABLEPCT_SUM:
2428 case CTSF_ULAYERPCT_SUM:
2429 case CTSF_ULAYERROWPCT_SUM:
2430 case CTSF_ULAYERCOLPCT_SUM:
2431 s->moments = moments1_create (MOMENT_VARIANCE);
2441 struct caseproto *proto = caseproto_create ();
2442 proto = caseproto_add_width (proto, 0);
2443 proto = caseproto_add_width (proto, 0);
2445 struct subcase ordering;
2446 subcase_init (&ordering, 0, 0, SC_ASCEND);
2447 s->writer = sort_create_writer (&ordering, proto);
2448 subcase_uninit (&ordering);
2449 caseproto_unref (proto);
2459 ctables_summary_uninit (union ctables_summary *s,
2460 const struct ctables_summary_spec *ss)
2462 switch (ss->function)
2466 case CTSF_ROWPCT_COUNT:
2467 case CTSF_COLPCT_COUNT:
2468 case CTSF_TABLEPCT_COUNT:
2469 case CTSF_SUBTABLEPCT_COUNT:
2470 case CTSF_LAYERPCT_COUNT:
2471 case CTSF_LAYERROWPCT_COUNT:
2472 case CTSF_LAYERCOLPCT_COUNT:
2473 case CTSF_ROWPCT_VALIDN:
2474 case CTSF_COLPCT_VALIDN:
2475 case CTSF_TABLEPCT_VALIDN:
2476 case CTSF_SUBTABLEPCT_VALIDN:
2477 case CTSF_LAYERPCT_VALIDN:
2478 case CTSF_LAYERROWPCT_VALIDN:
2479 case CTSF_LAYERCOLPCT_VALIDN:
2480 case CTSF_ROWPCT_TOTALN:
2481 case CTSF_COLPCT_TOTALN:
2482 case CTSF_TABLEPCT_TOTALN:
2483 case CTSF_SUBTABLEPCT_TOTALN:
2484 case CTSF_LAYERPCT_TOTALN:
2485 case CTSF_LAYERROWPCT_TOTALN:
2486 case CTSF_LAYERCOLPCT_TOTALN:
2493 case CTSF_UROWPCT_COUNT:
2494 case CTSF_UCOLPCT_COUNT:
2495 case CTSF_UTABLEPCT_COUNT:
2496 case CTSF_USUBTABLEPCT_COUNT:
2497 case CTSF_ULAYERPCT_COUNT:
2498 case CTSF_ULAYERROWPCT_COUNT:
2499 case CTSF_ULAYERCOLPCT_COUNT:
2500 case CTSF_UROWPCT_VALIDN:
2501 case CTSF_UCOLPCT_VALIDN:
2502 case CTSF_UTABLEPCT_VALIDN:
2503 case CTSF_USUBTABLEPCT_VALIDN:
2504 case CTSF_ULAYERPCT_VALIDN:
2505 case CTSF_ULAYERROWPCT_VALIDN:
2506 case CTSF_ULAYERCOLPCT_VALIDN:
2507 case CTSF_UROWPCT_TOTALN:
2508 case CTSF_UCOLPCT_TOTALN:
2509 case CTSF_UTABLEPCT_TOTALN:
2510 case CTSF_USUBTABLEPCT_TOTALN:
2511 case CTSF_ULAYERPCT_TOTALN:
2512 case CTSF_ULAYERROWPCT_TOTALN:
2513 case CTSF_ULAYERCOLPCT_TOTALN:
2522 case CTSF_SUBTABLE_ID:
2524 case CTSF_LAYERROW_ID:
2525 case CTSF_LAYERCOL_ID:
2538 case CTSF_ROWPCT_SUM:
2539 case CTSF_COLPCT_SUM:
2540 case CTSF_TABLEPCT_SUM:
2541 case CTSF_SUBTABLEPCT_SUM:
2542 case CTSF_LAYERPCT_SUM:
2543 case CTSF_LAYERROWPCT_SUM:
2544 case CTSF_LAYERCOLPCT_SUM:
2549 case CTSF_UVARIANCE:
2550 case CTSF_UROWPCT_SUM:
2551 case CTSF_UCOLPCT_SUM:
2552 case CTSF_UTABLEPCT_SUM:
2553 case CTSF_USUBTABLEPCT_SUM:
2554 case CTSF_ULAYERPCT_SUM:
2555 case CTSF_ULAYERROWPCT_SUM:
2556 case CTSF_ULAYERCOLPCT_SUM:
2557 moments1_destroy (s->moments);
2566 casewriter_destroy (s->writer);
2572 ctables_summary_add (union ctables_summary *s,
2573 const struct ctables_summary_spec *ss,
2574 const struct variable *var, const union value *value,
2575 bool is_scale, bool is_scale_missing,
2576 bool is_missing, bool excluded_missing,
2577 double d_weight, double e_weight)
2579 /* To determine whether a case is included in a given table for a particular
2580 kind of summary, consider the following charts for each variable in the
2581 table. Only if "yes" appears for every variable for the summary is the
2584 Categorical variables: VALIDN COUNT TOTALN
2585 Valid values in included categories yes yes yes
2586 Missing values in included categories --- yes yes
2587 Missing values in excluded categories --- --- yes
2588 Valid values in excluded categories --- --- ---
2590 Scale variables: VALIDN COUNT TOTALN
2591 Valid value yes yes yes
2592 Missing value --- yes yes
2594 Missing values include both user- and system-missing. (The system-missing
2595 value is always in an excluded category.)
2597 switch (ss->function)
2600 case CTSF_ROWPCT_TOTALN:
2601 case CTSF_COLPCT_TOTALN:
2602 case CTSF_TABLEPCT_TOTALN:
2603 case CTSF_SUBTABLEPCT_TOTALN:
2604 case CTSF_LAYERPCT_TOTALN:
2605 case CTSF_LAYERROWPCT_TOTALN:
2606 case CTSF_LAYERCOLPCT_TOTALN:
2607 s->count += d_weight;
2611 case CTSF_UROWPCT_TOTALN:
2612 case CTSF_UCOLPCT_TOTALN:
2613 case CTSF_UTABLEPCT_TOTALN:
2614 case CTSF_USUBTABLEPCT_TOTALN:
2615 case CTSF_ULAYERPCT_TOTALN:
2616 case CTSF_ULAYERROWPCT_TOTALN:
2617 case CTSF_ULAYERCOLPCT_TOTALN:
2622 case CTSF_ROWPCT_COUNT:
2623 case CTSF_COLPCT_COUNT:
2624 case CTSF_TABLEPCT_COUNT:
2625 case CTSF_SUBTABLEPCT_COUNT:
2626 case CTSF_LAYERPCT_COUNT:
2627 case CTSF_LAYERROWPCT_COUNT:
2628 case CTSF_LAYERCOLPCT_COUNT:
2629 if (is_scale || !excluded_missing)
2630 s->count += d_weight;
2634 case CTSF_UROWPCT_COUNT:
2635 case CTSF_UCOLPCT_COUNT:
2636 case CTSF_UTABLEPCT_COUNT:
2637 case CTSF_USUBTABLEPCT_COUNT:
2638 case CTSF_ULAYERPCT_COUNT:
2639 case CTSF_ULAYERROWPCT_COUNT:
2640 case CTSF_ULAYERCOLPCT_COUNT:
2641 if (is_scale || !excluded_missing)
2646 case CTSF_ROWPCT_VALIDN:
2647 case CTSF_COLPCT_VALIDN:
2648 case CTSF_TABLEPCT_VALIDN:
2649 case CTSF_SUBTABLEPCT_VALIDN:
2650 case CTSF_LAYERPCT_VALIDN:
2651 case CTSF_LAYERROWPCT_VALIDN:
2652 case CTSF_LAYERCOLPCT_VALIDN:
2656 s->count += d_weight;
2660 case CTSF_UROWPCT_VALIDN:
2661 case CTSF_UCOLPCT_VALIDN:
2662 case CTSF_UTABLEPCT_VALIDN:
2663 case CTSF_USUBTABLEPCT_VALIDN:
2664 case CTSF_ULAYERPCT_VALIDN:
2665 case CTSF_ULAYERROWPCT_VALIDN:
2666 case CTSF_ULAYERCOLPCT_VALIDN:
2676 case CTSF_SUBTABLE_ID:
2678 case CTSF_LAYERROW_ID:
2679 case CTSF_LAYERCOL_ID:
2686 s->count += d_weight;
2697 if (is_scale || !excluded_missing)
2698 s->count += e_weight;
2705 s->count += e_weight;
2709 s->count += e_weight;
2715 if (!is_scale_missing)
2717 assert (!var_is_alpha (var)); /* XXX? */
2718 if (s->min == SYSMIS || value->f < s->min)
2720 if (s->max == SYSMIS || value->f > s->max)
2730 case CTSF_ROWPCT_SUM:
2731 case CTSF_COLPCT_SUM:
2732 case CTSF_TABLEPCT_SUM:
2733 case CTSF_SUBTABLEPCT_SUM:
2734 case CTSF_LAYERPCT_SUM:
2735 case CTSF_LAYERROWPCT_SUM:
2736 case CTSF_LAYERCOLPCT_SUM:
2737 if (!is_scale_missing)
2738 moments1_add (s->moments, value->f, e_weight);
2745 case CTSF_UVARIANCE:
2746 case CTSF_UROWPCT_SUM:
2747 case CTSF_UCOLPCT_SUM:
2748 case CTSF_UTABLEPCT_SUM:
2749 case CTSF_USUBTABLEPCT_SUM:
2750 case CTSF_ULAYERPCT_SUM:
2751 case CTSF_ULAYERROWPCT_SUM:
2752 case CTSF_ULAYERCOLPCT_SUM:
2753 if (!is_scale_missing)
2754 moments1_add (s->moments, value->f, 1.0);
2760 d_weight = e_weight = 1.0;
2765 if (!is_scale_missing)
2767 s->ovalid += e_weight;
2769 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2770 *case_num_rw_idx (c, 0) = value->f;
2771 *case_num_rw_idx (c, 1) = e_weight;
2772 casewriter_write (s->writer, c);
2778 static enum ctables_domain_type
2779 ctables_function_domain (enum ctables_summary_function function)
2809 case CTSF_UVARIANCE:
2815 case CTSF_COLPCT_COUNT:
2816 case CTSF_COLPCT_SUM:
2817 case CTSF_COLPCT_TOTALN:
2818 case CTSF_COLPCT_VALIDN:
2819 case CTSF_UCOLPCT_COUNT:
2820 case CTSF_UCOLPCT_SUM:
2821 case CTSF_UCOLPCT_TOTALN:
2822 case CTSF_UCOLPCT_VALIDN:
2826 case CTSF_LAYERCOLPCT_COUNT:
2827 case CTSF_LAYERCOLPCT_SUM:
2828 case CTSF_LAYERCOLPCT_TOTALN:
2829 case CTSF_LAYERCOLPCT_VALIDN:
2830 case CTSF_ULAYERCOLPCT_COUNT:
2831 case CTSF_ULAYERCOLPCT_SUM:
2832 case CTSF_ULAYERCOLPCT_TOTALN:
2833 case CTSF_ULAYERCOLPCT_VALIDN:
2834 case CTSF_LAYERCOL_ID:
2835 return CTDT_LAYERCOL;
2837 case CTSF_LAYERPCT_COUNT:
2838 case CTSF_LAYERPCT_SUM:
2839 case CTSF_LAYERPCT_TOTALN:
2840 case CTSF_LAYERPCT_VALIDN:
2841 case CTSF_ULAYERPCT_COUNT:
2842 case CTSF_ULAYERPCT_SUM:
2843 case CTSF_ULAYERPCT_TOTALN:
2844 case CTSF_ULAYERPCT_VALIDN:
2848 case CTSF_LAYERROWPCT_COUNT:
2849 case CTSF_LAYERROWPCT_SUM:
2850 case CTSF_LAYERROWPCT_TOTALN:
2851 case CTSF_LAYERROWPCT_VALIDN:
2852 case CTSF_ULAYERROWPCT_COUNT:
2853 case CTSF_ULAYERROWPCT_SUM:
2854 case CTSF_ULAYERROWPCT_TOTALN:
2855 case CTSF_ULAYERROWPCT_VALIDN:
2856 case CTSF_LAYERROW_ID:
2857 return CTDT_LAYERROW;
2859 case CTSF_ROWPCT_COUNT:
2860 case CTSF_ROWPCT_SUM:
2861 case CTSF_ROWPCT_TOTALN:
2862 case CTSF_ROWPCT_VALIDN:
2863 case CTSF_UROWPCT_COUNT:
2864 case CTSF_UROWPCT_SUM:
2865 case CTSF_UROWPCT_TOTALN:
2866 case CTSF_UROWPCT_VALIDN:
2870 case CTSF_SUBTABLEPCT_COUNT:
2871 case CTSF_SUBTABLEPCT_SUM:
2872 case CTSF_SUBTABLEPCT_TOTALN:
2873 case CTSF_SUBTABLEPCT_VALIDN:
2874 case CTSF_USUBTABLEPCT_COUNT:
2875 case CTSF_USUBTABLEPCT_SUM:
2876 case CTSF_USUBTABLEPCT_TOTALN:
2877 case CTSF_USUBTABLEPCT_VALIDN:
2878 case CTSF_SUBTABLE_ID:
2879 return CTDT_SUBTABLE;
2881 case CTSF_TABLEPCT_COUNT:
2882 case CTSF_TABLEPCT_SUM:
2883 case CTSF_TABLEPCT_TOTALN:
2884 case CTSF_TABLEPCT_VALIDN:
2885 case CTSF_UTABLEPCT_COUNT:
2886 case CTSF_UTABLEPCT_SUM:
2887 case CTSF_UTABLEPCT_TOTALN:
2888 case CTSF_UTABLEPCT_VALIDN:
2896 static enum ctables_domain_type
2897 ctables_function_is_pctsum (enum ctables_summary_function function)
2927 case CTSF_UVARIANCE:
2931 case CTSF_COLPCT_COUNT:
2932 case CTSF_COLPCT_TOTALN:
2933 case CTSF_COLPCT_VALIDN:
2934 case CTSF_UCOLPCT_COUNT:
2935 case CTSF_UCOLPCT_TOTALN:
2936 case CTSF_UCOLPCT_VALIDN:
2937 case CTSF_LAYERCOLPCT_COUNT:
2938 case CTSF_LAYERCOLPCT_TOTALN:
2939 case CTSF_LAYERCOLPCT_VALIDN:
2940 case CTSF_ULAYERCOLPCT_COUNT:
2941 case CTSF_ULAYERCOLPCT_TOTALN:
2942 case CTSF_ULAYERCOLPCT_VALIDN:
2943 case CTSF_LAYERPCT_COUNT:
2944 case CTSF_LAYERPCT_TOTALN:
2945 case CTSF_LAYERPCT_VALIDN:
2946 case CTSF_ULAYERPCT_COUNT:
2947 case CTSF_ULAYERPCT_TOTALN:
2948 case CTSF_ULAYERPCT_VALIDN:
2949 case CTSF_LAYERROWPCT_COUNT:
2950 case CTSF_LAYERROWPCT_TOTALN:
2951 case CTSF_LAYERROWPCT_VALIDN:
2952 case CTSF_ULAYERROWPCT_COUNT:
2953 case CTSF_ULAYERROWPCT_TOTALN:
2954 case CTSF_ULAYERROWPCT_VALIDN:
2955 case CTSF_ROWPCT_COUNT:
2956 case CTSF_ROWPCT_TOTALN:
2957 case CTSF_ROWPCT_VALIDN:
2958 case CTSF_UROWPCT_COUNT:
2959 case CTSF_UROWPCT_TOTALN:
2960 case CTSF_UROWPCT_VALIDN:
2961 case CTSF_SUBTABLEPCT_COUNT:
2962 case CTSF_SUBTABLEPCT_TOTALN:
2963 case CTSF_SUBTABLEPCT_VALIDN:
2964 case CTSF_USUBTABLEPCT_COUNT:
2965 case CTSF_USUBTABLEPCT_TOTALN:
2966 case CTSF_USUBTABLEPCT_VALIDN:
2967 case CTSF_TABLEPCT_COUNT:
2968 case CTSF_TABLEPCT_TOTALN:
2969 case CTSF_TABLEPCT_VALIDN:
2970 case CTSF_UTABLEPCT_COUNT:
2971 case CTSF_UTABLEPCT_TOTALN:
2972 case CTSF_UTABLEPCT_VALIDN:
2976 case CTSF_SUBTABLE_ID:
2978 case CTSF_LAYERROW_ID:
2979 case CTSF_LAYERCOL_ID:
2982 case CTSF_COLPCT_SUM:
2983 case CTSF_UCOLPCT_SUM:
2984 case CTSF_LAYERCOLPCT_SUM:
2985 case CTSF_ULAYERCOLPCT_SUM:
2986 case CTSF_LAYERPCT_SUM:
2987 case CTSF_ULAYERPCT_SUM:
2988 case CTSF_LAYERROWPCT_SUM:
2989 case CTSF_ULAYERROWPCT_SUM:
2990 case CTSF_ROWPCT_SUM:
2991 case CTSF_UROWPCT_SUM:
2992 case CTSF_SUBTABLEPCT_SUM:
2993 case CTSF_USUBTABLEPCT_SUM:
2994 case CTSF_TABLEPCT_SUM:
2995 case CTSF_UTABLEPCT_SUM:
3003 ctables_summary_value (const struct ctables_cell *cell,
3004 union ctables_summary *s,
3005 const struct ctables_summary_spec *ss)
3007 switch (ss->function)
3017 case CTSF_SUBTABLE_ID:
3019 case CTSF_LAYERROW_ID:
3020 case CTSF_LAYERCOL_ID:
3021 return cell->domains[ctables_function_domain (ss->function)]->sequence;
3023 case CTSF_ROWPCT_COUNT:
3024 case CTSF_COLPCT_COUNT:
3025 case CTSF_TABLEPCT_COUNT:
3026 case CTSF_SUBTABLEPCT_COUNT:
3027 case CTSF_LAYERPCT_COUNT:
3028 case CTSF_LAYERROWPCT_COUNT:
3029 case CTSF_LAYERCOLPCT_COUNT:
3031 enum ctables_domain_type d = ctables_function_domain (ss->function);
3032 return (cell->domains[d]->e_count
3033 ? s->count / cell->domains[d]->e_count * 100
3037 case CTSF_UROWPCT_COUNT:
3038 case CTSF_UCOLPCT_COUNT:
3039 case CTSF_UTABLEPCT_COUNT:
3040 case CTSF_USUBTABLEPCT_COUNT:
3041 case CTSF_ULAYERPCT_COUNT:
3042 case CTSF_ULAYERROWPCT_COUNT:
3043 case CTSF_ULAYERCOLPCT_COUNT:
3045 enum ctables_domain_type d = ctables_function_domain (ss->function);
3046 return (cell->domains[d]->u_count
3047 ? s->count / cell->domains[d]->u_count * 100
3051 case CTSF_ROWPCT_VALIDN:
3052 case CTSF_COLPCT_VALIDN:
3053 case CTSF_TABLEPCT_VALIDN:
3054 case CTSF_SUBTABLEPCT_VALIDN:
3055 case CTSF_LAYERPCT_VALIDN:
3056 case CTSF_LAYERROWPCT_VALIDN:
3057 case CTSF_LAYERCOLPCT_VALIDN:
3059 enum ctables_domain_type d = ctables_function_domain (ss->function);
3060 return (cell->domains[d]->e_valid
3061 ? s->count / cell->domains[d]->e_valid * 100
3065 case CTSF_UROWPCT_VALIDN:
3066 case CTSF_UCOLPCT_VALIDN:
3067 case CTSF_UTABLEPCT_VALIDN:
3068 case CTSF_USUBTABLEPCT_VALIDN:
3069 case CTSF_ULAYERPCT_VALIDN:
3070 case CTSF_ULAYERROWPCT_VALIDN:
3071 case CTSF_ULAYERCOLPCT_VALIDN:
3073 enum ctables_domain_type d = ctables_function_domain (ss->function);
3074 return (cell->domains[d]->u_valid
3075 ? s->count / cell->domains[d]->u_valid * 100
3079 case CTSF_ROWPCT_TOTALN:
3080 case CTSF_COLPCT_TOTALN:
3081 case CTSF_TABLEPCT_TOTALN:
3082 case CTSF_SUBTABLEPCT_TOTALN:
3083 case CTSF_LAYERPCT_TOTALN:
3084 case CTSF_LAYERROWPCT_TOTALN:
3085 case CTSF_LAYERCOLPCT_TOTALN:
3087 enum ctables_domain_type d = ctables_function_domain (ss->function);
3088 return (cell->domains[d]->e_total
3089 ? s->count / cell->domains[d]->e_total * 100
3093 case CTSF_UROWPCT_TOTALN:
3094 case CTSF_UCOLPCT_TOTALN:
3095 case CTSF_UTABLEPCT_TOTALN:
3096 case CTSF_USUBTABLEPCT_TOTALN:
3097 case CTSF_ULAYERPCT_TOTALN:
3098 case CTSF_ULAYERROWPCT_TOTALN:
3099 case CTSF_ULAYERCOLPCT_TOTALN:
3101 enum ctables_domain_type d = ctables_function_domain (ss->function);
3102 return (cell->domains[d]->u_total
3103 ? s->count / cell->domains[d]->u_total * 100
3124 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3130 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3137 double weight, variance;
3138 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3139 return calc_semean (variance, weight);
3146 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3147 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3153 double weight, mean;
3154 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3155 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3159 case CTSF_UVARIANCE:
3162 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3166 case CTSF_ROWPCT_SUM:
3167 case CTSF_COLPCT_SUM:
3168 case CTSF_TABLEPCT_SUM:
3169 case CTSF_SUBTABLEPCT_SUM:
3170 case CTSF_LAYERPCT_SUM:
3171 case CTSF_LAYERROWPCT_SUM:
3172 case CTSF_LAYERCOLPCT_SUM:
3174 double weight, mean;
3175 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3176 if (weight == SYSMIS || mean == SYSMIS)
3178 enum ctables_domain_type d = ctables_function_domain (ss->function);
3179 double num = weight * mean;
3180 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3181 return denom != 0 ? num / denom * 100 : SYSMIS;
3183 case CTSF_UROWPCT_SUM:
3184 case CTSF_UCOLPCT_SUM:
3185 case CTSF_UTABLEPCT_SUM:
3186 case CTSF_USUBTABLEPCT_SUM:
3187 case CTSF_ULAYERPCT_SUM:
3188 case CTSF_ULAYERROWPCT_SUM:
3189 case CTSF_ULAYERCOLPCT_SUM:
3191 double weight, mean;
3192 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3193 if (weight == SYSMIS || mean == SYSMIS)
3195 enum ctables_domain_type d = ctables_function_domain (ss->function);
3196 double num = weight * mean;
3197 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3198 return denom != 0 ? num / denom * 100 : SYSMIS;
3207 struct casereader *reader = casewriter_make_reader (s->writer);
3210 struct percentile *ptile = percentile_create (
3211 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3212 struct order_stats *os = &ptile->parent;
3213 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3214 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3215 statistic_destroy (&ptile->parent.parent);
3223 struct casereader *reader = casewriter_make_reader (s->writer);
3226 struct mode *mode = mode_create ();
3227 struct order_stats *os = &mode->parent;
3228 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3229 s->ovalue = mode->mode;
3230 statistic_destroy (&mode->parent.parent);
3238 struct ctables_cell_sort_aux
3240 const struct ctables_nest *nest;
3241 enum pivot_axis_type a;
3245 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3247 const struct ctables_cell_sort_aux *aux = aux_;
3248 struct ctables_cell *const *ap = a_;
3249 struct ctables_cell *const *bp = b_;
3250 const struct ctables_cell *a = *ap;
3251 const struct ctables_cell *b = *bp;
3253 const struct ctables_nest *nest = aux->nest;
3254 for (size_t i = 0; i < nest->n; i++)
3255 if (i != nest->scale_idx)
3257 const struct variable *var = nest->vars[i];
3258 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3259 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3260 if (a_cv->category != b_cv->category)
3261 return a_cv->category > b_cv->category ? 1 : -1;
3263 const union value *a_val = &a_cv->value;
3264 const union value *b_val = &b_cv->value;
3265 switch (a_cv->category->type)
3271 case CCT_POSTCOMPUTE:
3272 case CCT_EXCLUDED_MISSING:
3273 /* Must be equal. */
3281 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3289 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3291 return a_cv->category->sort_ascending ? cmp : -cmp;
3297 const char *a_label = var_lookup_value_label (var, a_val);
3298 const char *b_label = var_lookup_value_label (var, b_val);
3304 cmp = strcmp (a_label, b_label);
3310 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3313 return a_cv->category->sort_ascending ? cmp : -cmp;
3325 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3326 const void *aux UNUSED)
3328 struct ctables_cell *const *ap = a_;
3329 struct ctables_cell *const *bp = b_;
3330 const struct ctables_cell *a = *ap;
3331 const struct ctables_cell *b = *bp;
3333 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3335 int al = a->axes[axis].leaf;
3336 int bl = b->axes[axis].leaf;
3338 return al > bl ? 1 : -1;
3346 For each ctables_table:
3347 For each combination of row vars:
3348 For each combination of column vars:
3349 For each combination of layer vars:
3351 Make a table of row values:
3352 Sort entries by row values
3353 Assign a 0-based index to each actual value
3354 Construct a dimension
3355 Make a table of column values
3356 Make a table of layer values
3358 Fill the table entry using the indexes from before.
3361 static struct ctables_domain *
3362 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3363 enum ctables_domain_type domain)
3366 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3368 const struct ctables_nest *nest = s->nests[a];
3369 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3371 size_t v_idx = nest->domains[domain][i];
3372 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3373 hash = hash_pointer (cv->category, hash);
3374 if (cv->category->type != CCT_TOTAL
3375 && cv->category->type != CCT_SUBTOTAL
3376 && cv->category->type != CCT_POSTCOMPUTE)
3377 hash = value_hash (&cv->value,
3378 var_get_width (nest->vars[v_idx]), hash);
3382 struct ctables_domain *d;
3383 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3385 const struct ctables_cell *df = d->example;
3386 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3388 const struct ctables_nest *nest = s->nests[a];
3389 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3391 size_t v_idx = nest->domains[domain][i];
3392 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3393 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3394 if (cv1->category != cv2->category
3395 || (cv1->category->type != CCT_TOTAL
3396 && cv1->category->type != CCT_SUBTOTAL
3397 && cv1->category->type != CCT_POSTCOMPUTE
3398 && !value_equal (&cv1->value, &cv2->value,
3399 var_get_width (nest->vars[v_idx]))))
3408 struct ctables_sum *sums = (s->table->n_sum_vars
3409 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3412 d = xmalloc (sizeof *d);
3413 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3414 hmap_insert (&s->domains[domain], &d->node, hash);
3418 static struct substring
3419 rtrim_value (const union value *v, const struct variable *var)
3421 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3422 var_get_width (var));
3423 ss_rtrim (&s, ss_cstr (" "));
3428 in_string_range (const union value *v, const struct variable *var,
3429 const struct substring *srange)
3431 struct substring s = rtrim_value (v, var);
3432 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3433 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3436 static const struct ctables_category *
3437 ctables_categories_match (const struct ctables_categories *c,
3438 const union value *v, const struct variable *var)
3440 if (var_is_numeric (var) && v->f == SYSMIS)
3443 const struct ctables_category *othernm = NULL;
3444 for (size_t i = c->n_cats; i-- > 0; )
3446 const struct ctables_category *cat = &c->cats[i];
3450 if (cat->number == v->f)
3455 if (ss_equals (cat->string, rtrim_value (v, var)))
3460 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3461 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3466 if (in_string_range (v, var, cat->srange))
3471 if (var_is_value_missing (var, v))
3475 case CCT_POSTCOMPUTE:
3490 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3493 case CCT_EXCLUDED_MISSING:
3498 return var_is_value_missing (var, v) ? NULL : othernm;
3501 static const struct ctables_category *
3502 ctables_categories_total (const struct ctables_categories *c)
3504 const struct ctables_category *first = &c->cats[0];
3505 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3506 return (first->type == CCT_TOTAL ? first
3507 : last->type == CCT_TOTAL ? last
3511 static struct ctables_cell *
3512 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3513 const struct ctables_category *cats[PIVOT_N_AXES][10])
3516 enum ctables_summary_variant sv = CSV_CELL;
3517 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3519 const struct ctables_nest *nest = s->nests[a];
3520 for (size_t i = 0; i < nest->n; i++)
3521 if (i != nest->scale_idx)
3523 hash = hash_pointer (cats[a][i], hash);
3524 if (cats[a][i]->type != CCT_TOTAL
3525 && cats[a][i]->type != CCT_SUBTOTAL
3526 && cats[a][i]->type != CCT_POSTCOMPUTE)
3527 hash = value_hash (case_data (c, nest->vars[i]),
3528 var_get_width (nest->vars[i]), hash);
3534 struct ctables_cell *cell;
3535 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3537 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3539 const struct ctables_nest *nest = s->nests[a];
3540 for (size_t i = 0; i < nest->n; i++)
3541 if (i != nest->scale_idx
3542 && (cats[a][i] != cell->axes[a].cvs[i].category
3543 || (cats[a][i]->type != CCT_TOTAL
3544 && cats[a][i]->type != CCT_SUBTOTAL
3545 && cats[a][i]->type != CCT_POSTCOMPUTE
3546 && !value_equal (case_data (c, nest->vars[i]),
3547 &cell->axes[a].cvs[i].value,
3548 var_get_width (nest->vars[i])))))
3557 cell = xmalloc (sizeof *cell);
3560 cell->omit_domains = 0;
3561 cell->postcompute = false;
3562 //struct string name = DS_EMPTY_INITIALIZER;
3563 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3565 const struct ctables_nest *nest = s->nests[a];
3566 cell->axes[a].cvs = (nest->n
3567 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3569 for (size_t i = 0; i < nest->n; i++)
3571 const struct ctables_category *cat = cats[a][i];
3572 const struct variable *var = nest->vars[i];
3573 const union value *value = case_data (c, var);
3574 if (i != nest->scale_idx)
3576 const struct ctables_category *subtotal = cat->subtotal;
3577 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3580 if (cat->type == CCT_TOTAL
3581 || cat->type == CCT_SUBTOTAL
3582 || cat->type == CCT_POSTCOMPUTE)
3584 /* XXX these should be more encompassing I think.*/
3588 case PIVOT_AXIS_COLUMN:
3589 cell->omit_domains |= ((1u << CTDT_TABLE) |
3590 (1u << CTDT_LAYER) |
3591 (1u << CTDT_LAYERCOL) |
3592 (1u << CTDT_SUBTABLE) |
3595 case PIVOT_AXIS_ROW:
3596 cell->omit_domains |= ((1u << CTDT_TABLE) |
3597 (1u << CTDT_LAYER) |
3598 (1u << CTDT_LAYERROW) |
3599 (1u << CTDT_SUBTABLE) |
3602 case PIVOT_AXIS_LAYER:
3603 cell->omit_domains |= ((1u << CTDT_TABLE) |
3604 (1u << CTDT_LAYER));
3608 if (cat->type == CCT_POSTCOMPUTE)
3609 cell->postcompute = true;
3612 cell->axes[a].cvs[i].category = cat;
3613 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3616 if (i != nest->scale_idx)
3618 if (!ds_is_empty (&name))
3619 ds_put_cstr (&name, ", ");
3620 char *value_s = data_out (value, var_get_encoding (var),
3621 var_get_print_format (var),
3622 settings_get_fmt_settings ());
3623 if (cat->type == CCT_TOTAL
3624 || cat->type == CCT_SUBTOTAL
3625 || cat->type == CCT_POSTCOMPUTE)
3626 ds_put_format (&name, "%s=total", var_get_name (var));
3628 ds_put_format (&name, "%s=%s", var_get_name (var),
3629 value_s + strspn (value_s, " "));
3635 //cell->name = ds_steal_cstr (&name);
3637 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3638 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3639 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3640 for (size_t i = 0; i < specs->n; i++)
3641 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3642 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3643 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3644 hmap_insert (&s->cells, &cell->node, hash);
3649 is_scale_missing (const struct ctables_summary_spec_set *specs,
3650 const struct ccase *c)
3652 if (!specs->is_scale)
3655 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3658 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3660 const struct variable *var = specs->listwise_vars[i];
3661 if (var_is_num_missing (var, case_num (c, var)))
3669 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3670 const struct ctables_category *cats[PIVOT_N_AXES][10],
3671 bool is_missing, bool excluded_missing,
3672 double d_weight, double e_weight)
3674 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3675 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3677 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3679 bool scale_missing = is_scale_missing (specs, c);
3680 for (size_t i = 0; i < specs->n; i++)
3681 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3682 specs->var, case_data (c, specs->var), specs->is_scale,
3683 scale_missing, is_missing, excluded_missing,
3684 d_weight, e_weight);
3685 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3686 if (!(cell->omit_domains && (1u << dt)))
3688 struct ctables_domain *d = cell->domains[dt];
3689 d->d_total += d_weight;
3690 d->e_total += e_weight;
3692 if (!excluded_missing)
3694 d->d_count += d_weight;
3695 d->e_count += e_weight;
3700 d->d_valid += d_weight;
3701 d->e_valid += e_weight;
3704 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3706 /* XXX listwise_missing??? */
3707 const struct variable *var = s->table->sum_vars[i];
3708 double addend = case_num (c, var);
3709 if (!var_is_num_missing (var, addend))
3711 struct ctables_sum *sum = &d->sums[i];
3712 sum->e_sum += addend * e_weight;
3713 sum->u_sum += addend;
3721 recurse_totals (struct ctables_section *s, const struct ccase *c,
3722 const struct ctables_category *cats[PIVOT_N_AXES][10],
3723 bool is_missing, bool excluded_missing,
3724 double d_weight, double e_weight,
3725 enum pivot_axis_type start_axis, size_t start_nest)
3727 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3729 const struct ctables_nest *nest = s->nests[a];
3730 for (size_t i = start_nest; i < nest->n; i++)
3732 if (i == nest->scale_idx)
3735 const struct variable *var = nest->vars[i];
3737 const struct ctables_category *total = ctables_categories_total (
3738 s->table->categories[var_get_dict_index (var)]);
3741 const struct ctables_category *save = cats[a][i];
3743 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3744 d_weight, e_weight);
3745 recurse_totals (s, c, cats, is_missing, excluded_missing,
3746 d_weight, e_weight, a, i + 1);
3755 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3756 const struct ctables_category *cats[PIVOT_N_AXES][10],
3757 bool is_missing, bool excluded_missing,
3758 double d_weight, double e_weight,
3759 enum pivot_axis_type start_axis, size_t start_nest)
3761 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3763 const struct ctables_nest *nest = s->nests[a];
3764 for (size_t i = start_nest; i < nest->n; i++)
3766 if (i == nest->scale_idx)
3769 const struct ctables_category *save = cats[a][i];
3772 cats[a][i] = save->subtotal;
3773 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3774 d_weight, e_weight);
3775 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3776 d_weight, e_weight, a, i + 1);
3785 ctables_add_occurrence (const struct variable *var,
3786 const union value *value,
3787 struct hmap *occurrences)
3789 int width = var_get_width (var);
3790 unsigned int hash = value_hash (value, width, 0);
3792 struct ctables_occurrence *o;
3793 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3795 if (value_equal (value, &o->value, width))
3798 o = xmalloc (sizeof *o);
3799 value_clone (&o->value, value, width);
3800 hmap_insert (occurrences, &o->node, hash);
3804 ctables_cell_insert (struct ctables_section *s,
3805 const struct ccase *c,
3806 double d_weight, double e_weight)
3808 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3810 /* Does at least one categorical variable have a missing value in an included
3811 or excluded category? */
3812 bool is_missing = false;
3814 /* Does at least one categorical variable have a missing value in an excluded
3816 bool excluded_missing = false;
3818 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3820 const struct ctables_nest *nest = s->nests[a];
3821 for (size_t i = 0; i < nest->n; i++)
3823 if (i == nest->scale_idx)
3826 const struct variable *var = nest->vars[i];
3827 const union value *value = case_data (c, var);
3829 bool var_missing = var_is_value_missing (var, value) != 0;
3833 cats[a][i] = ctables_categories_match (
3834 s->table->categories[var_get_dict_index (var)], value, var);
3840 static const struct ctables_category cct_excluded_missing = {
3841 .type = CCT_EXCLUDED_MISSING,
3844 cats[a][i] = &cct_excluded_missing;
3845 excluded_missing = true;
3850 if (!excluded_missing)
3851 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3853 const struct ctables_nest *nest = s->nests[a];
3854 for (size_t i = 0; i < nest->n; i++)
3855 if (i != nest->scale_idx)
3857 const struct variable *var = nest->vars[i];
3858 const union value *value = case_data (c, var);
3859 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3863 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3864 d_weight, e_weight);
3866 //if (!excluded_missing)
3868 recurse_totals (s, c, cats, is_missing, excluded_missing,
3869 d_weight, e_weight, 0, 0);
3870 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3871 d_weight, e_weight, 0, 0);
3877 const struct ctables_summary_spec_set *set;
3882 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3884 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3885 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3886 if (as->function != bs->function)
3887 return as->function > bs->function ? 1 : -1;
3888 else if (as->percentile != bs->percentile)
3889 return as->percentile < bs->percentile ? 1 : -1;
3891 const char *as_label = as->label ? as->label : "";
3892 const char *bs_label = bs->label ? bs->label : "";
3893 return strcmp (as_label, bs_label);
3896 static struct pivot_value *
3897 ctables_category_create_label__ (const struct ctables_category *cat,
3898 const struct variable *var,
3899 const union value *value)
3901 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3902 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3903 : pivot_value_new_var_value (var, value));
3906 static struct pivot_value *
3907 ctables_postcompute_label (const struct ctables_categories *cats,
3908 const struct ctables_category *cat,
3909 const struct variable *var,
3910 const union value *value)
3912 struct substring in = ss_cstr (cat->pc->label);
3913 struct substring target = ss_cstr (")LABEL[");
3915 struct string out = DS_EMPTY_INITIALIZER;
3918 size_t chunk = ss_find_substring (in, target);
3919 if (chunk == SIZE_MAX)
3921 if (ds_is_empty (&out))
3922 return pivot_value_new_user_text (in.string, in.length);
3925 ds_put_substring (&out, in);
3926 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3930 ds_put_substring (&out, ss_head (in, chunk));
3931 ss_advance (&in, chunk + target.length);
3933 struct substring idx_s;
3934 if (!ss_get_until (&in, ']', &idx_s))
3937 long int idx = strtol (idx_s.string, &tail, 10);
3938 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3941 struct ctables_category *cat2 = &cats->cats[idx - 1];
3942 struct pivot_value *label2
3943 = ctables_category_create_label__ (cat2, var, value);
3944 char *label2_s = pivot_value_to_string_defaults (label2);
3945 ds_put_cstr (&out, label2_s);
3947 pivot_value_destroy (label2);
3952 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3955 static struct pivot_value *
3956 ctables_category_create_label (const struct ctables_categories *cats,
3957 const struct ctables_category *cat,
3958 const struct variable *var,
3959 const union value *value)
3961 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3962 ? ctables_postcompute_label (cats, cat, var, value)
3963 : ctables_category_create_label__ (cat, var, value));
3966 static struct ctables_value *
3967 ctables_value_find__ (struct ctables_table *t, const union value *value,
3968 int width, unsigned int hash)
3970 struct ctables_value *clv;
3971 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3972 hash, &t->clabels_values_map)
3973 if (value_equal (value, &clv->value, width))
3979 ctables_value_insert (struct ctables_table *t, const union value *value,
3982 unsigned int hash = value_hash (value, width, 0);
3983 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3986 clv = xmalloc (sizeof *clv);
3987 value_clone (&clv->value, value, width);
3988 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3992 static struct ctables_value *
3993 ctables_value_find (struct ctables_table *t,
3994 const union value *value, int width)
3996 return ctables_value_find__ (t, value, width,
3997 value_hash (value, width, 0));
4001 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4002 size_t ix[PIVOT_N_AXES])
4004 if (a < PIVOT_N_AXES)
4006 size_t limit = MAX (t->stacks[a].n, 1);
4007 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4008 ctables_table_add_section (t, a + 1, ix);
4012 struct ctables_section *s = &t->sections[t->n_sections++];
4013 *s = (struct ctables_section) {
4015 .cells = HMAP_INITIALIZER (s->cells),
4017 for (a = 0; a < PIVOT_N_AXES; a++)
4020 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4022 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4023 for (size_t i = 0; i < nest->n; i++)
4024 hmap_init (&s->occurrences[a][i]);
4026 for (size_t i = 0; i < N_CTDTS; i++)
4027 hmap_init (&s->domains[i]);
4032 ctpo_add (double a, double b)
4038 ctpo_sub (double a, double b)
4044 ctpo_mul (double a, double b)
4050 ctpo_div (double a, double b)
4052 return b ? a / b : SYSMIS;
4056 ctpo_pow (double a, double b)
4058 int save_errno = errno;
4060 double result = pow (a, b);
4068 ctpo_neg (double a, double b UNUSED)
4073 struct ctables_pcexpr_evaluate_ctx
4075 const struct ctables_cell *cell;
4076 const struct ctables_section *section;
4077 const struct ctables_categories *cats;
4078 enum pivot_axis_type pc_a;
4081 enum fmt_type parse_format;
4084 static double ctables_pcexpr_evaluate (
4085 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4088 ctables_pcexpr_evaluate_nonterminal (
4089 const struct ctables_pcexpr_evaluate_ctx *ctx,
4090 const struct ctables_pcexpr *e, size_t n_args,
4091 double evaluate (double, double))
4093 double args[2] = { 0, 0 };
4094 for (size_t i = 0; i < n_args; i++)
4096 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4097 if (!isfinite (args[i]) || args[i] == SYSMIS)
4100 return evaluate (args[0], args[1]);
4104 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4105 const struct ctables_cell_value *pc_cv)
4107 const struct ctables_section *s = ctx->section;
4110 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4112 const struct ctables_nest *nest = s->nests[a];
4113 for (size_t i = 0; i < nest->n; i++)
4114 if (i != nest->scale_idx)
4116 const struct ctables_cell_value *cv
4117 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4118 : &ctx->cell->axes[a].cvs[i]);
4119 hash = hash_pointer (cv->category, hash);
4120 if (cv->category->type != CCT_TOTAL
4121 && cv->category->type != CCT_SUBTOTAL
4122 && cv->category->type != CCT_POSTCOMPUTE)
4123 hash = value_hash (&cv->value,
4124 var_get_width (nest->vars[i]), hash);
4128 struct ctables_cell *tc;
4129 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4131 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4133 const struct ctables_nest *nest = s->nests[a];
4134 for (size_t i = 0; i < nest->n; i++)
4135 if (i != nest->scale_idx)
4137 const struct ctables_cell_value *p_cv
4138 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4139 : &ctx->cell->axes[a].cvs[i]);
4140 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4141 if (p_cv->category != t_cv->category
4142 || (p_cv->category->type != CCT_TOTAL
4143 && p_cv->category->type != CCT_SUBTOTAL
4144 && p_cv->category->type != CCT_POSTCOMPUTE
4145 && !value_equal (&p_cv->value,
4147 var_get_width (nest->vars[i]))))
4159 const struct ctables_table *t = s->table;
4160 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4161 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4162 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4163 &specs->specs[ctx->summary_idx]);
4167 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4168 const struct ctables_pcexpr *e)
4175 case CTPO_CAT_NRANGE:
4176 case CTPO_CAT_SRANGE:
4178 struct ctables_cell_value cv = {
4179 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4181 assert (cv.category != NULL);
4183 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4184 const struct ctables_occurrence *o;
4187 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4188 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4189 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4191 cv.value = o->value;
4192 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4197 case CTPO_CAT_NUMBER:
4198 case CTPO_CAT_MISSING:
4199 case CTPO_CAT_OTHERNM:
4200 case CTPO_CAT_SUBTOTAL:
4201 case CTPO_CAT_TOTAL:
4203 struct ctables_cell_value cv = {
4204 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4205 .value = { .f = e->number },
4207 assert (cv.category != NULL);
4208 return ctables_pcexpr_evaluate_category (ctx, &cv);
4211 case CTPO_CAT_STRING:
4213 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4215 if (width > e->string.length)
4217 s = xmalloc (width);
4218 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4220 struct ctables_cell_value cv = {
4221 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4222 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4224 assert (cv.category != NULL);
4225 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4231 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4234 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4237 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4240 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4243 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4246 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4252 static const struct ctables_category *
4253 ctables_cell_postcompute (const struct ctables_section *s,
4254 const struct ctables_cell *cell,
4255 enum pivot_axis_type *pc_a_p,
4258 assert (cell->postcompute);
4259 const struct ctables_category *pc_cat = NULL;
4260 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4261 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4263 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4264 if (cv->category->type == CCT_POSTCOMPUTE)
4268 /* Multiple postcomputes cross each other. The value is
4273 pc_cat = cv->category;
4277 *pc_a_idx_p = pc_a_idx;
4281 assert (pc_cat != NULL);
4286 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4287 const struct ctables_cell *cell,
4288 const struct ctables_summary_spec *ss,
4289 struct fmt_spec *format,
4290 bool *is_ctables_format,
4293 enum pivot_axis_type pc_a = 0;
4294 size_t pc_a_idx = 0;
4295 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4296 s, cell, &pc_a, &pc_a_idx);
4300 const struct ctables_postcompute *pc = pc_cat->pc;
4303 for (size_t i = 0; i < pc->specs->n; i++)
4305 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4306 if (ss->function == ss2->function
4307 && ss->percentile == ss2->percentile)
4309 *format = ss2->format;
4310 *is_ctables_format = ss2->is_ctables_format;
4316 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4317 const struct ctables_categories *cats = s->table->categories[
4318 var_get_dict_index (var)];
4319 struct ctables_pcexpr_evaluate_ctx ctx = {
4324 .pc_a_idx = pc_a_idx,
4325 .summary_idx = summary_idx,
4326 .parse_format = pc_cat->parse_format,
4328 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4332 ctables_format (double d, const struct fmt_spec *format,
4333 const struct fmt_settings *settings)
4335 const union value v = { .f = d };
4336 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4338 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4339 produce the results we want for negative numbers, putting the negative
4340 sign in the wrong spot, before the prefix instead of after it. We can't,
4341 in fact, produce the desired results using a custom-currency
4342 specification. Instead, we postprocess the output, moving the negative
4345 NEQUAL: "-N=3" => "N=-3"
4346 PAREN: "-(3)" => "(-3)"
4347 PCTPAREN: "-(3%)" => "(-3%)"
4349 This transformation doesn't affect NEGPAREN. */
4350 char *minus_src = strchr (s, '-');
4351 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4353 char *n_equals = strstr (s, "N=");
4354 char *lparen = strchr (s, '(');
4355 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4357 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4363 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4367 struct pivot_table *pt = pivot_table_create__ (
4369 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4370 : pivot_value_new_text (N_("Custom Tables"))),
4373 pivot_table_set_caption (
4374 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4376 pivot_table_set_corner_text (
4377 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4379 bool summary_dimension = (t->summary_axis != t->slabels_axis
4380 || (!t->slabels_visible
4381 && t->summary_specs.n > 1));
4382 if (summary_dimension)
4384 printf ("summary_dimension\n");
4385 struct pivot_dimension *d = pivot_dimension_create (
4386 pt, t->slabels_axis, N_("Statistics"));
4387 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4388 if (!t->slabels_visible)
4389 d->hide_all_labels = true;
4390 for (size_t i = 0; i < specs->n; i++)
4391 pivot_category_create_leaf (
4392 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4395 bool categories_dimension = t->clabels_example != NULL;
4396 if (categories_dimension)
4398 printf ("categories_dimension\n");
4399 struct pivot_dimension *d = pivot_dimension_create (
4400 pt, t->label_axis[t->clabels_from_axis],
4401 t->clabels_from_axis == PIVOT_AXIS_ROW
4402 ? N_("Row Categories")
4403 : N_("Column Categories"));
4404 const struct variable *var = t->clabels_example;
4405 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4406 for (size_t i = 0; i < t->n_clabels_values; i++)
4408 const struct ctables_value *value = t->clabels_values[i];
4409 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4410 assert (cat != NULL);
4411 pivot_category_create_leaf (d->root, ctables_category_create_label (
4412 c, cat, t->clabels_example,
4417 pivot_table_set_look (pt, ct->look);
4418 struct pivot_dimension *d[PIVOT_N_AXES];
4419 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4422 if (!t->axes[a] && a != t->summary_axis)
4424 assert (t->axes[a]);
4426 for (size_t i = 0; i < t->stacks[a].n; i++)
4428 struct ctables_nest *nest = &t->stacks[a].nests[i];
4429 struct ctables_section **sections = xnmalloc (t->n_sections,
4431 size_t n_sections = 0;
4433 size_t n_total_cells = 0;
4434 size_t max_depth = 0;
4435 for (size_t j = 0; j < t->n_sections; j++)
4436 if (t->sections[j].nests[a] == nest)
4438 struct ctables_section *s = &t->sections[j];
4439 sections[n_sections++] = s;
4440 n_total_cells += hmap_count (&s->cells);
4442 size_t depth = s->nests[a]->n;
4443 max_depth = MAX (depth, max_depth);
4446 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4448 size_t n_sorted = 0;
4450 for (size_t j = 0; j < n_sections; j++)
4452 struct ctables_section *s = sections[j];
4454 struct ctables_cell *cell;
4455 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4457 sorted[n_sorted++] = cell;
4458 assert (n_sorted <= n_total_cells);
4461 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4462 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4465 if (a == PIVOT_AXIS_ROW)
4467 size_t ids[N_CTDTS];
4468 memset (ids, 0, sizeof ids);
4469 for (size_t j = 0; j < n_sorted; j++)
4471 struct ctables_cell *cell = sorted[j];
4472 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4474 struct ctables_domain *domain = cell->domains[dt];
4475 if (!domain->sequence)
4476 domain->sequence = ++ids[dt];
4483 for (size_t j = 0; j < n_sorted; j++)
4485 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4490 struct ctables_level
4492 enum ctables_level_type
4494 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4495 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4496 CTL_SUMMARY, /* Summary functions. */
4500 enum settings_value_show vlabel; /* CTL_VAR only. */
4503 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4504 size_t n_levels = 0;
4505 for (size_t k = 0; k < nest->n; k++)
4507 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4508 if (vlabel != CTVL_NONE)
4510 levels[n_levels++] = (struct ctables_level) {
4512 .vlabel = (enum settings_value_show) vlabel,
4517 if (nest->scale_idx != k
4518 && (k != nest->n - 1 || t->label_axis[a] == a))
4520 levels[n_levels++] = (struct ctables_level) {
4521 .type = CTL_CATEGORY,
4527 if (!summary_dimension && a == t->slabels_axis)
4529 levels[n_levels++] = (struct ctables_level) {
4530 .type = CTL_SUMMARY,
4531 .var_idx = SIZE_MAX,
4538 static const char *names[] = {
4539 [PIVOT_AXIS_ROW] = N_("Rows"),
4540 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4541 [PIVOT_AXIS_LAYER] = N_("Layers"),
4543 d[a] = pivot_dimension_create (pt, a, names[a]);
4544 printf ("%s dimension\n", names[a]);
4546 /* Pivot categories:
4548 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4549 - category for nest->vars[0], if nest->scale_idx != 0
4550 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4551 - category for nest->vars[1], if nest->scale_idx != 1
4553 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4554 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4555 - summary function, if 'a == t->slabels_axis && a ==
4558 Additional dimensions:
4560 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4562 - If 't->label_axis[b] == a' for some 'b != a', add a category
4567 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4569 for (size_t j = 0; j < n_sorted; j++)
4571 struct ctables_cell *cell = sorted[j];
4572 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4574 size_t n_common = 0;
4577 for (; n_common < n_levels; n_common++)
4579 const struct ctables_level *level = &levels[n_common];
4580 if (level->type == CTL_CATEGORY)
4582 size_t var_idx = level->var_idx;
4583 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4584 if (prev->axes[a].cvs[var_idx].category != c)
4586 else if (c->type != CCT_SUBTOTAL
4587 && c->type != CCT_TOTAL
4588 && c->type != CCT_POSTCOMPUTE
4589 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4590 &cell->axes[a].cvs[var_idx].value,
4591 var_get_type (nest->vars[var_idx])))
4597 for (size_t k = n_common; k < n_levels; k++)
4599 const struct ctables_level *level = &levels[k];
4600 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4601 if (level->type == CTL_SUMMARY)
4603 assert (k == n_levels - 1);
4605 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4606 for (size_t m = 0; m < specs->n; m++)
4608 int leaf = pivot_category_create_leaf (
4609 parent, ctables_summary_label (&specs->specs[m],
4617 const struct variable *var = nest->vars[level->var_idx];
4618 struct pivot_value *label;
4619 if (level->type == CTL_VAR)
4621 label = pivot_value_new_variable (var);
4622 label->variable.show = level->vlabel;
4624 else if (level->type == CTL_CATEGORY)
4626 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4627 label = ctables_category_create_label (
4628 t->categories[var_get_dict_index (var)],
4629 cv->category, var, &cv->value);
4634 if (k == n_levels - 1)
4635 prev_leaf = pivot_category_create_leaf (parent, label);
4637 groups[k] = pivot_category_create_group__ (parent, label);
4641 cell->axes[a].leaf = prev_leaf;
4652 size_t n_total_cells = 0;
4653 for (size_t j = 0; j < t->n_sections; j++)
4654 n_total_cells += hmap_count (&t->sections[j].cells);
4656 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4657 size_t n_sorted = 0;
4658 for (size_t j = 0; j < t->n_sections; j++)
4660 const struct ctables_section *s = &t->sections[j];
4661 struct ctables_cell *cell;
4662 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4664 sorted[n_sorted++] = cell;
4666 assert (n_sorted <= n_total_cells);
4667 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4669 size_t ids[N_CTDTS];
4670 memset (ids, 0, sizeof ids);
4671 for (size_t j = 0; j < n_sorted; j++)
4673 struct ctables_cell *cell = sorted[j];
4674 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4676 struct ctables_domain *domain = cell->domains[dt];
4677 if (!domain->sequence)
4678 domain->sequence = ++ids[dt];
4685 for (size_t i = 0; i < t->n_sections; i++)
4687 struct ctables_section *s = &t->sections[i];
4689 struct ctables_cell *cell;
4690 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4695 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4696 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4697 for (size_t j = 0; j < specs->n; j++)
4700 size_t n_dindexes = 0;
4702 if (summary_dimension)
4703 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4705 if (categories_dimension)
4707 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4708 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4709 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4710 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4713 dindexes[n_dindexes++] = ctv->leaf;
4716 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4719 int leaf = cell->axes[a].leaf;
4720 if (a == t->summary_axis && !summary_dimension)
4722 dindexes[n_dindexes++] = leaf;
4725 const struct ctables_summary_spec *ss = &specs->specs[j];
4727 struct fmt_spec format = specs->specs[j].format;
4728 bool is_ctables_format = ss->is_ctables_format;
4729 double d = (cell->postcompute
4730 ? ctables_cell_calculate_postcompute (
4731 s, cell, ss, &format, &is_ctables_format, j)
4732 : ctables_summary_value (cell, &cell->summaries[j],
4735 struct pivot_value *value;
4736 if (ct->hide_threshold != 0
4737 && d < ct->hide_threshold
4738 && ctables_summary_function_is_count (ss->function))
4740 value = pivot_value_new_user_text_nocopy (
4741 xasprintf ("<%d", ct->hide_threshold));
4743 else if (d == 0 && ct->zero)
4744 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4745 else if (d == SYSMIS && ct->missing)
4746 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4747 else if (is_ctables_format)
4748 value = pivot_value_new_user_text_nocopy (
4749 ctables_format (d, &format, &ct->ctables_formats));
4752 value = pivot_value_new_number (d);
4753 value->numeric.format = format;
4755 /* XXX should text values be right-justified? */
4756 pivot_table_put (pt, dindexes, n_dindexes, value);
4761 pivot_table_submit (pt);
4765 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4767 enum pivot_axis_type label_pos = t->label_axis[a];
4771 t->clabels_from_axis = a;
4773 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4774 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4776 const struct ctables_stack *stack = &t->stacks[a];
4780 const struct ctables_nest *n0 = &stack->nests[0];
4783 assert (stack->n == 1);
4787 const struct variable *v0 = n0->vars[n0->n - 1];
4788 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4789 t->clabels_example = v0;
4791 for (size_t i = 0; i < c0->n_cats; i++)
4792 if (c0->cats[i].type == CCT_FUNCTION)
4794 msg (SE, _("%s=%s is not allowed with sorting based "
4795 "on a summary function."),
4796 subcommand_name, pos_name);
4799 if (n0->n - 1 == n0->scale_idx)
4801 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4802 "but %s is a scale variable."),
4803 subcommand_name, pos_name, var_get_name (v0));
4807 for (size_t i = 1; i < stack->n; i++)
4809 const struct ctables_nest *ni = &stack->nests[i];
4811 const struct variable *vi = ni->vars[ni->n - 1];
4812 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4814 if (ni->n - 1 == ni->scale_idx)
4816 msg (SE, _("%s=%s requires the variables to be moved to be "
4817 "categorical, but %s is a scale variable."),
4818 subcommand_name, pos_name, var_get_name (vi));
4821 if (var_get_width (v0) != var_get_width (vi))
4823 msg (SE, _("%s=%s requires the variables to be "
4824 "moved to have the same width, but %s has "
4825 "width %d and %s has width %d."),
4826 subcommand_name, pos_name,
4827 var_get_name (v0), var_get_width (v0),
4828 var_get_name (vi), var_get_width (vi));
4831 if (!val_labs_equal (var_get_value_labels (v0),
4832 var_get_value_labels (vi)))
4834 msg (SE, _("%s=%s requires the variables to be "
4835 "moved to have the same value labels, but %s "
4836 "and %s have different value labels."),
4837 subcommand_name, pos_name,
4838 var_get_name (v0), var_get_name (vi));
4841 if (!ctables_categories_equal (c0, ci))
4843 msg (SE, _("%s=%s requires the variables to be "
4844 "moved to have the same category "
4845 "specifications, but %s and %s have different "
4846 "category specifications."),
4847 subcommand_name, pos_name,
4848 var_get_name (v0), var_get_name (vi));
4857 add_sum_var (struct variable *var,
4858 struct variable ***sum_vars, size_t *n, size_t *allocated)
4860 for (size_t i = 0; i < *n; i++)
4861 if (var == (*sum_vars)[i])
4864 if (*n >= *allocated)
4865 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4866 (*sum_vars)[*n] = var;
4871 enumerate_sum_vars (const struct ctables_axis *a,
4872 struct variable ***sum_vars, size_t *n, size_t *allocated)
4880 for (size_t i = 0; i < N_CSVS; i++)
4881 for (size_t j = 0; j < a->specs[i].n; j++)
4883 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4884 if (ctables_function_is_pctsum (spec->function))
4885 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4891 for (size_t i = 0; i < 2; i++)
4892 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4898 ctables_prepare_table (struct ctables_table *t)
4900 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4903 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4905 for (size_t j = 0; j < t->stacks[a].n; j++)
4907 struct ctables_nest *nest = &t->stacks[a].nests[j];
4908 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4910 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4911 nest->n_domains[dt] = 0;
4913 for (size_t k = 0; k < nest->n; k++)
4915 if (k == nest->scale_idx)
4924 if (a != PIVOT_AXIS_LAYER)
4931 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4932 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4933 : a == PIVOT_AXIS_ROW)
4935 if (k == nest->n - 1
4936 || (nest->scale_idx == nest->n - 1
4937 && k == nest->n - 2))
4943 if (a == PIVOT_AXIS_COLUMN)
4948 if (a == PIVOT_AXIS_ROW)
4953 nest->domains[dt][nest->n_domains[dt]++] = k;
4960 struct ctables_nest *nest = xmalloc (sizeof *nest);
4961 *nest = (struct ctables_nest) { .n = 0 };
4962 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4964 /* There's no point in moving labels away from an axis that has no
4965 labels, so avoid dealing with the special cases around that. */
4966 t->label_axis[a] = a;
4969 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4970 for (size_t i = 0; i < stack->n; i++)
4972 struct ctables_nest *nest = &stack->nests[i];
4973 if (!nest->specs[CSV_CELL].n)
4975 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4976 specs->specs = xmalloc (sizeof *specs->specs);
4979 enum ctables_summary_function function
4980 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4982 *specs->specs = (struct ctables_summary_spec) {
4983 .function = function,
4984 .format = ctables_summary_default_format (function, specs->var),
4987 specs->var = nest->vars[0];
4989 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4990 &nest->specs[CSV_CELL]);
4992 else if (!nest->specs[CSV_TOTAL].n)
4993 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4994 &nest->specs[CSV_CELL]);
4996 if (t->ctables->smissing_listwise)
4998 struct variable **listwise_vars = NULL;
5000 size_t allocated = 0;
5002 for (size_t j = nest->group_head; j < stack->n; j++)
5004 const struct ctables_nest *other_nest = &stack->nests[j];
5005 if (other_nest->group_head != nest->group_head)
5008 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5011 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5012 sizeof *listwise_vars);
5013 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5016 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5019 listwise_vars = xmemdup (listwise_vars,
5020 n * sizeof *listwise_vars);
5021 nest->specs[sv].listwise_vars = listwise_vars;
5022 nest->specs[sv].n_listwise_vars = n;
5027 struct ctables_summary_spec_set *merged = &t->summary_specs;
5028 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5030 for (size_t j = 0; j < stack->n; j++)
5032 const struct ctables_nest *nest = &stack->nests[j];
5034 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5035 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5040 struct merge_item min = items[0];
5041 for (size_t j = 1; j < n_left; j++)
5042 if (merge_item_compare_3way (&items[j], &min) < 0)
5045 if (merged->n >= merged->allocated)
5046 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5047 sizeof *merged->specs);
5048 merged->specs[merged->n++] = min.set->specs[min.ofs];
5050 for (size_t j = 0; j < n_left; )
5052 if (merge_item_compare_3way (&items[j], &min) == 0)
5054 struct merge_item *item = &items[j];
5055 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5056 if (++item->ofs >= item->set->n)
5058 items[j] = items[--n_left];
5068 for (size_t j = 0; j < merged->n; j++)
5069 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5071 for (size_t j = 0; j < stack->n; j++)
5073 const struct ctables_nest *nest = &stack->nests[j];
5074 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5076 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5077 for (size_t k = 0; k < specs->n; k++)
5078 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5079 specs->specs[k].axis_idx);
5085 size_t allocated_sum_vars = 0;
5086 enumerate_sum_vars (t->axes[t->summary_axis],
5087 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5089 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5090 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5094 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5095 enum pivot_axis_type a)
5097 struct ctables_stack *stack = &t->stacks[a];
5098 for (size_t i = 0; i < stack->n; i++)
5100 const struct ctables_nest *nest = &stack->nests[i];
5101 const struct variable *var = nest->vars[nest->n - 1];
5102 const union value *value = case_data (c, var);
5104 if (var_is_numeric (var) && value->f == SYSMIS)
5107 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5109 ctables_value_insert (t, value, var_get_width (var));
5114 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5116 const struct ctables_value *const *ap = a_;
5117 const struct ctables_value *const *bp = b_;
5118 const struct ctables_value *a = *ap;
5119 const struct ctables_value *b = *bp;
5120 const int *width = width_;
5121 return value_compare_3way (&a->value, &b->value, *width);
5125 ctables_sort_clabels_values (struct ctables_table *t)
5127 const struct variable *v0 = t->clabels_example;
5128 int width = var_get_width (v0);
5130 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5133 const struct val_labs *val_labs = var_get_value_labels (v0);
5134 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5135 vl = val_labs_next (val_labs, vl))
5136 if (ctables_categories_match (c0, &vl->value, v0))
5137 ctables_value_insert (t, &vl->value, width);
5140 size_t n = hmap_count (&t->clabels_values_map);
5141 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5143 struct ctables_value *clv;
5145 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5146 t->clabels_values[i++] = clv;
5147 t->n_clabels_values = n;
5150 sort (t->clabels_values, n, sizeof *t->clabels_values,
5151 compare_clabels_values_3way, &width);
5153 for (size_t i = 0; i < n; i++)
5154 t->clabels_values[i]->leaf = i;
5158 ctables_add_category_occurrences (const struct variable *var,
5159 struct hmap *occurrences,
5160 const struct ctables_categories *cats)
5162 const struct val_labs *val_labs = var_get_value_labels (var);
5164 for (size_t i = 0; i < cats->n_cats; i++)
5166 const struct ctables_category *c = &cats->cats[i];
5170 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5176 int width = var_get_width (var);
5178 value_init (&value, width);
5179 value_copy_buf_rpad (&value, width,
5180 CHAR_CAST (uint8_t *, c->string.string),
5181 c->string.length, ' ');
5182 ctables_add_occurrence (var, &value, occurrences);
5183 value_destroy (&value, width);
5188 assert (var_is_numeric (var));
5189 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5190 vl = val_labs_next (val_labs, vl))
5191 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5192 ctables_add_occurrence (var, &vl->value, occurrences);
5196 assert (var_is_alpha (var));
5197 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5198 vl = val_labs_next (val_labs, vl))
5199 if (in_string_range (&vl->value, var, c->srange))
5200 ctables_add_occurrence (var, &vl->value, occurrences);
5204 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5205 vl = val_labs_next (val_labs, vl))
5206 if (var_is_value_missing (var, &vl->value))
5207 ctables_add_occurrence (var, &vl->value, occurrences);
5211 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5212 vl = val_labs_next (val_labs, vl))
5213 ctables_add_occurrence (var, &vl->value, occurrences);
5216 case CCT_POSTCOMPUTE:
5226 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5227 vl = val_labs_next (val_labs, vl))
5228 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5229 ctables_add_occurrence (var, &vl->value, occurrences);
5232 case CCT_EXCLUDED_MISSING:
5239 ctables_section_recurse_add_empty_categories (
5240 struct ctables_section *s,
5241 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5242 enum pivot_axis_type a, size_t a_idx)
5244 if (a >= PIVOT_N_AXES)
5245 ctables_cell_insert__ (s, c, cats);
5246 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5247 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5250 const struct variable *var = s->nests[a]->vars[a_idx];
5251 const struct ctables_categories *categories = s->table->categories[
5252 var_get_dict_index (var)];
5253 int width = var_get_width (var);
5254 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5255 const struct ctables_occurrence *o;
5256 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5258 union value *value = case_data_rw (c, var);
5259 value_destroy (value, width);
5260 value_clone (value, &o->value, width);
5261 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5262 assert (cats[a][a_idx] != NULL);
5263 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5266 for (size_t i = 0; i < categories->n_cats; i++)
5268 const struct ctables_category *cat = &categories->cats[i];
5269 if (cat->type == CCT_POSTCOMPUTE)
5271 cats[a][a_idx] = cat;
5272 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5279 ctables_section_add_empty_categories (struct ctables_section *s)
5281 bool show_empty = false;
5282 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5284 for (size_t k = 0; k < s->nests[a]->n; k++)
5285 if (k != s->nests[a]->scale_idx)
5287 const struct variable *var = s->nests[a]->vars[k];
5288 const struct ctables_categories *cats = s->table->categories[
5289 var_get_dict_index (var)];
5290 if (cats->show_empty)
5293 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5299 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5300 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5301 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5306 ctables_section_clear (struct ctables_section *s)
5308 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5310 const struct ctables_nest *nest = s->nests[a];
5311 for (size_t i = 0; i < nest->n; i++)
5312 if (i != nest->scale_idx)
5314 const struct variable *var = nest->vars[i];
5315 int width = var_get_width (var);
5316 struct ctables_occurrence *o, *next;
5317 struct hmap *map = &s->occurrences[a][i];
5318 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5320 value_destroy (&o->value, width);
5321 hmap_delete (map, &o->node);
5328 struct ctables_cell *cell, *next_cell;
5329 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5331 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5333 const struct ctables_nest *nest = s->nests[a];
5334 for (size_t i = 0; i < nest->n; i++)
5335 if (i != nest->scale_idx)
5336 value_destroy (&cell->axes[a].cvs[i].value,
5337 var_get_width (nest->vars[i]));
5338 free (cell->axes[a].cvs);
5341 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5342 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5343 for (size_t i = 0; i < specs->n; i++)
5344 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5345 free (cell->summaries);
5347 hmap_delete (&s->cells, &cell->node);
5350 hmap_shrink (&s->cells);
5352 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5354 struct ctables_domain *domain, *next_domain;
5355 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5358 free (domain->sums);
5359 hmap_delete (&s->domains[dt], &domain->node);
5362 hmap_shrink (&s->domains[dt]);
5367 ctables_section_uninit (struct ctables_section *s)
5369 ctables_section_clear (s);
5371 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5373 struct ctables_nest *nest = s->nests[a];
5374 for (size_t i = 0; i < nest->n; i++)
5375 hmap_destroy (&s->occurrences[a][i]);
5376 free (s->occurrences[a]);
5379 hmap_destroy (&s->cells);
5380 for (size_t i = 0; i < N_CTDTS; i++)
5381 hmap_destroy (&s->domains[i]);
5385 ctables_table_clear (struct ctables_table *t)
5387 for (size_t i = 0; i < t->n_sections; i++)
5388 ctables_section_clear (&t->sections[i]);
5390 if (t->clabels_example)
5392 int width = var_get_width (t->clabels_example);
5393 struct ctables_value *value, *next_value;
5394 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5395 &t->clabels_values_map)
5397 value_destroy (&value->value, width);
5398 hmap_delete (&t->clabels_values_map, &value->node);
5401 hmap_shrink (&t->clabels_values_map);
5403 free (t->clabels_values);
5404 t->clabels_values = NULL;
5405 t->n_clabels_values = 0;
5410 ctables_execute (struct dataset *ds, struct casereader *input,
5413 for (size_t i = 0; i < ct->n_tables; i++)
5415 struct ctables_table *t = ct->tables[i];
5416 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5417 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5418 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5419 sizeof *t->sections);
5420 size_t ix[PIVOT_N_AXES];
5421 ctables_table_add_section (t, 0, ix);
5424 struct dictionary *dict = dataset_dict (ds);
5425 struct casegrouper *grouper
5426 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5427 ? casegrouper_create_splits (input, dict)
5428 : casegrouper_create_vars (input, NULL, 0));
5429 struct casereader *group;
5430 while (casegrouper_get_next_group (grouper, &group))
5432 /* Output SPLIT FILE variables. */
5433 struct ccase *c = casereader_peek (group, 0);
5436 output_split_file_values (ds, c);
5440 bool warn_on_invalid = true;
5441 for (c = casereader_read (group); c;
5442 case_unref (c), c = casereader_read (group))
5444 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5445 double e_weight = (ct->e_weight
5446 ? var_force_valid_weight (ct->e_weight,
5447 case_num (c, ct->e_weight),
5451 for (size_t i = 0; i < ct->n_tables; i++)
5453 struct ctables_table *t = ct->tables[i];
5455 for (size_t j = 0; j < t->n_sections; j++)
5456 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5458 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5459 if (t->label_axis[a] != a)
5460 ctables_insert_clabels_values (t, c, a);
5463 casereader_destroy (group);
5465 for (size_t i = 0; i < ct->n_tables; i++)
5467 struct ctables_table *t = ct->tables[i];
5469 if (t->clabels_example)
5470 ctables_sort_clabels_values (t);
5472 for (size_t j = 0; j < t->n_sections; j++)
5473 ctables_section_add_empty_categories (&t->sections[j]);
5475 ctables_table_output (ct, t);
5476 ctables_table_clear (t);
5479 return casegrouper_destroy (grouper);
5484 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5485 struct dictionary *);
5488 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5494 case CTPO_CAT_STRING:
5495 ss_dealloc (&e->string);
5498 case CTPO_CAT_SRANGE:
5499 for (size_t i = 0; i < 2; i++)
5500 ss_dealloc (&e->srange[i]);
5509 for (size_t i = 0; i < 2; i++)
5510 ctables_pcexpr_destroy (e->subs[i]);
5514 case CTPO_CAT_NUMBER:
5515 case CTPO_CAT_NRANGE:
5516 case CTPO_CAT_MISSING:
5517 case CTPO_CAT_OTHERNM:
5518 case CTPO_CAT_SUBTOTAL:
5519 case CTPO_CAT_TOTAL:
5523 msg_location_destroy (e->location);
5528 static struct ctables_pcexpr *
5529 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5530 struct ctables_pcexpr *sub0,
5531 struct ctables_pcexpr *sub1)
5533 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5534 *e = (struct ctables_pcexpr) {
5536 .subs = { sub0, sub1 },
5537 .location = msg_location_merged (sub0->location, sub1->location),
5542 /* How to parse an operator. */
5545 enum token_type token;
5546 enum ctables_postcompute_op op;
5549 static const struct operator *
5550 ctables_pcexpr_match_operator (struct lexer *lexer,
5551 const struct operator ops[], size_t n_ops)
5553 for (const struct operator *op = ops; op < ops + n_ops; op++)
5554 if (lex_token (lexer) == op->token)
5556 if (op->token != T_NEG_NUM)
5565 static struct ctables_pcexpr *
5566 ctables_pcexpr_parse_binary_operators__ (
5567 struct lexer *lexer, struct dictionary *dict,
5568 const struct operator ops[], size_t n_ops,
5569 parse_recursively_func *parse_next_level,
5570 const char *chain_warning, struct ctables_pcexpr *lhs)
5572 for (int op_count = 0; ; op_count++)
5574 const struct operator *op
5575 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5578 if (op_count > 1 && chain_warning)
5579 msg_at (SW, lhs->location, "%s", chain_warning);
5584 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5587 ctables_pcexpr_destroy (lhs);
5591 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5595 static struct ctables_pcexpr *
5596 ctables_pcexpr_parse_binary_operators (
5597 struct lexer *lexer, struct dictionary *dict,
5598 const struct operator ops[], size_t n_ops,
5599 parse_recursively_func *parse_next_level, const char *chain_warning)
5601 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5605 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5607 chain_warning, lhs);
5610 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5611 struct dictionary *);
5613 static struct ctables_pcexpr
5614 ctpo_cat_nrange (double low, double high)
5616 return (struct ctables_pcexpr) {
5617 .op = CTPO_CAT_NRANGE,
5618 .nrange = { low, high },
5622 static struct ctables_pcexpr
5623 ctpo_cat_srange (struct substring low, struct substring high)
5625 return (struct ctables_pcexpr) {
5626 .op = CTPO_CAT_SRANGE,
5627 .srange = { low, high },
5631 static struct ctables_pcexpr *
5632 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5634 int start_ofs = lex_ofs (lexer);
5635 struct ctables_pcexpr e;
5636 if (lex_is_number (lexer))
5638 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5639 .number = lex_number (lexer) };
5642 else if (lex_match_id (lexer, "MISSING"))
5643 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5644 else if (lex_match_id (lexer, "OTHERNM"))
5645 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5646 else if (lex_match_id (lexer, "TOTAL"))
5647 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5648 else if (lex_match_id (lexer, "SUBTOTAL"))
5650 size_t subtotal_index = 0;
5651 if (lex_match (lexer, T_LBRACK))
5653 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5655 subtotal_index = lex_integer (lexer);
5657 if (!lex_force_match (lexer, T_RBRACK))
5660 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5661 .subtotal_index = subtotal_index };
5663 else if (lex_match (lexer, T_LBRACK))
5665 if (lex_match_id (lexer, "LO"))
5667 if (!lex_force_match_id (lexer, "THRU"))
5670 if (lex_is_string (lexer))
5672 struct substring low = { .string = NULL };
5673 struct substring high = parse_substring (lexer, dict);
5674 e = ctpo_cat_srange (low, high);
5678 if (!lex_force_num (lexer))
5680 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5684 else if (lex_is_number (lexer))
5686 double number = lex_number (lexer);
5688 if (lex_match_id (lexer, "THRU"))
5690 if (lex_match_id (lexer, "HI"))
5691 e = ctpo_cat_nrange (number, DBL_MAX);
5694 if (!lex_force_num (lexer))
5696 e = ctpo_cat_nrange (number, lex_number (lexer));
5701 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5704 else if (lex_is_string (lexer))
5706 struct substring s = parse_substring (lexer, dict);
5708 if (lex_match_id (lexer, "THRU"))
5710 struct substring high;
5712 if (lex_match_id (lexer, "HI"))
5713 high = (struct substring) { .string = NULL };
5716 if (!lex_force_string (lexer))
5721 high = parse_substring (lexer, dict);
5724 e = ctpo_cat_srange (s, high);
5727 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5731 lex_error (lexer, NULL);
5735 if (!lex_force_match (lexer, T_RBRACK))
5737 if (e.op == CTPO_CAT_STRING)
5738 ss_dealloc (&e.string);
5739 else if (e.op == CTPO_CAT_SRANGE)
5741 ss_dealloc (&e.srange[0]);
5742 ss_dealloc (&e.srange[1]);
5747 else if (lex_match (lexer, T_LPAREN))
5749 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5752 if (!lex_force_match (lexer, T_RPAREN))
5754 ctables_pcexpr_destroy (ep);
5761 lex_error (lexer, NULL);
5765 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5766 return xmemdup (&e, sizeof e);
5769 static struct ctables_pcexpr *
5770 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5771 struct lexer *lexer, int start_ofs)
5773 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5774 *e = (struct ctables_pcexpr) {
5777 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5782 static struct ctables_pcexpr *
5783 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5785 static const struct operator op = { T_EXP, CTPO_POW };
5787 const char *chain_warning =
5788 _("The exponentiation operator (`**') is left-associative: "
5789 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5790 "To disable this warning, insert parentheses.");
5792 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5793 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5794 ctables_pcexpr_parse_primary,
5797 /* Special case for situations like "-5**6", which must be parsed as
5800 int start_ofs = lex_ofs (lexer);
5801 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5802 *lhs = (struct ctables_pcexpr) {
5803 .op = CTPO_CONSTANT,
5804 .number = -lex_tokval (lexer),
5805 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5809 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5810 lexer, dict, &op, 1,
5811 ctables_pcexpr_parse_primary, chain_warning, lhs);
5815 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5818 /* Parses the unary minus level. */
5819 static struct ctables_pcexpr *
5820 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5822 int start_ofs = lex_ofs (lexer);
5823 if (!lex_match (lexer, T_DASH))
5824 return ctables_pcexpr_parse_exp (lexer, dict);
5826 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5830 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5833 /* Parses the multiplication and division level. */
5834 static struct ctables_pcexpr *
5835 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5837 static const struct operator ops[] =
5839 { T_ASTERISK, CTPO_MUL },
5840 { T_SLASH, CTPO_DIV },
5843 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5844 sizeof ops / sizeof *ops,
5845 ctables_pcexpr_parse_neg, NULL);
5848 /* Parses the addition and subtraction level. */
5849 static struct ctables_pcexpr *
5850 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5852 static const struct operator ops[] =
5854 { T_PLUS, CTPO_ADD },
5855 { T_DASH, CTPO_SUB },
5856 { T_NEG_NUM, CTPO_ADD },
5859 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5860 ops, sizeof ops / sizeof *ops,
5861 ctables_pcexpr_parse_mul, NULL);
5864 static struct ctables_postcompute *
5865 ctables_find_postcompute (struct ctables *ct, const char *name)
5867 struct ctables_postcompute *pc;
5868 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5869 utf8_hash_case_string (name, 0), &ct->postcomputes)
5870 if (!utf8_strcasecmp (pc->name, name))
5876 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5879 int pcompute_start = lex_ofs (lexer) - 1;
5881 if (!lex_match (lexer, T_AND))
5883 lex_error_expecting (lexer, "&");
5886 if (!lex_force_id (lexer))
5889 char *name = ss_xstrdup (lex_tokss (lexer));
5892 if (!lex_force_match (lexer, T_EQUALS)
5893 || !lex_force_match_id (lexer, "EXPR")
5894 || !lex_force_match (lexer, T_LPAREN))
5900 int expr_start = lex_ofs (lexer);
5901 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5902 int expr_end = lex_ofs (lexer) - 1;
5903 if (!expr || !lex_force_match (lexer, T_RPAREN))
5905 ctables_pcexpr_destroy (expr);
5909 int pcompute_end = lex_ofs (lexer) - 1;
5911 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5914 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5917 msg_at (SW, location, _("New definition of &%s will override the "
5918 "previous definition."),
5920 msg_at (SN, pc->location, _("This is the previous definition."));
5922 ctables_pcexpr_destroy (pc->expr);
5923 msg_location_destroy (pc->location);
5928 pc = xmalloc (sizeof *pc);
5929 *pc = (struct ctables_postcompute) { .name = name };
5930 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5931 utf8_hash_case_string (pc->name, 0));
5934 pc->location = location;
5936 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5941 ctables_parse_pproperties_format (struct lexer *lexer,
5942 struct ctables_summary_spec_set *sss)
5944 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5946 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5947 && !(lex_token (lexer) == T_ID
5948 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5949 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5950 lex_tokss (lexer)))))
5952 /* Parse function. */
5953 enum ctables_summary_function function;
5954 if (!parse_ctables_summary_function (lexer, &function))
5957 /* Parse percentile. */
5958 double percentile = 0;
5959 if (function == CTSF_PTILE)
5961 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5963 percentile = lex_number (lexer);
5968 struct fmt_spec format;
5969 bool is_ctables_format;
5970 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5973 if (sss->n >= sss->allocated)
5974 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5975 sizeof *sss->specs);
5976 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5977 .function = function,
5978 .percentile = percentile,
5980 .is_ctables_format = is_ctables_format,
5986 ctables_summary_spec_set_uninit (sss);
5991 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5993 struct ctables_postcompute **pcs = NULL;
5995 size_t allocated_pcs = 0;
5997 while (lex_match (lexer, T_AND))
5999 if (!lex_force_id (lexer))
6001 struct ctables_postcompute *pc
6002 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
6005 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6010 if (n_pcs >= allocated_pcs)
6011 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6015 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6017 if (lex_match_id (lexer, "LABEL"))
6019 lex_match (lexer, T_EQUALS);
6020 if (!lex_force_string (lexer))
6023 for (size_t i = 0; i < n_pcs; i++)
6025 free (pcs[i]->label);
6026 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6031 else if (lex_match_id (lexer, "FORMAT"))
6033 lex_match (lexer, T_EQUALS);
6035 struct ctables_summary_spec_set sss;
6036 if (!ctables_parse_pproperties_format (lexer, &sss))
6039 for (size_t i = 0; i < n_pcs; i++)
6042 ctables_summary_spec_set_uninit (pcs[i]->specs);
6044 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6045 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6047 ctables_summary_spec_set_uninit (&sss);
6049 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6051 lex_match (lexer, T_EQUALS);
6052 bool hide_source_cats;
6053 if (!parse_bool (lexer, &hide_source_cats))
6055 for (size_t i = 0; i < n_pcs; i++)
6056 pcs[i]->hide_source_cats = hide_source_cats;
6060 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6073 put_strftime (struct string *out, time_t now, const char *format)
6075 const struct tm *tm = localtime (&now);
6077 strftime (value, sizeof value, format, tm);
6078 ds_put_cstr (out, value);
6082 skip_prefix (struct substring *s, struct substring prefix)
6084 if (ss_starts_with (*s, prefix))
6086 ss_advance (s, prefix.length);
6094 put_table_expression (struct string *out, struct lexer *lexer,
6095 struct dictionary *dict, int expr_start, int expr_end)
6098 for (int ofs = expr_start; ofs < expr_end; ofs++)
6100 const struct token *t = lex_ofs_token (lexer, ofs);
6101 if (t->type == T_LBRACK)
6103 else if (t->type == T_RBRACK && nest > 0)
6109 else if (t->type == T_ID)
6111 const struct variable *var
6112 = dict_lookup_var (dict, t->string.string);
6113 const char *label = var ? var_get_label (var) : NULL;
6114 ds_put_cstr (out, label ? label : t->string.string);
6118 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6119 ds_put_byte (out, ' ');
6121 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6122 ds_put_cstr (out, repr);
6125 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6126 ds_put_byte (out, ' ');
6132 put_title_text (struct string *out, struct substring in, time_t now,
6133 struct lexer *lexer, struct dictionary *dict,
6134 int expr_start, int expr_end)
6138 size_t chunk = ss_find_byte (in, ')');
6139 ds_put_substring (out, ss_head (in, chunk));
6140 ss_advance (&in, chunk);
6141 if (ss_is_empty (in))
6144 if (skip_prefix (&in, ss_cstr (")DATE")))
6145 put_strftime (out, now, "%x");
6146 else if (skip_prefix (&in, ss_cstr (")TIME")))
6147 put_strftime (out, now, "%X");
6148 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6149 put_table_expression (out, lexer, dict, expr_start, expr_end);
6152 ds_put_byte (out, ')');
6153 ss_advance (&in, 1);
6159 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6161 struct casereader *input = NULL;
6163 struct measure_guesser *mg = measure_guesser_create (ds);
6166 input = proc_open (ds);
6167 measure_guesser_run (mg, input);
6168 measure_guesser_destroy (mg);
6171 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6172 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6173 enum settings_value_show tvars = settings_get_show_variables ();
6174 for (size_t i = 0; i < n_vars; i++)
6175 vlabels[i] = (enum ctables_vlabel) tvars;
6177 struct pivot_table_look *look = pivot_table_look_unshare (
6178 pivot_table_look_ref (pivot_table_look_get_default ()));
6179 look->omit_empty = false;
6181 struct ctables *ct = xmalloc (sizeof *ct);
6182 *ct = (struct ctables) {
6183 .dict = dataset_dict (ds),
6185 .ctables_formats = FMT_SETTINGS_INIT,
6187 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6190 time_t now = time (NULL);
6195 const char *dot_string;
6196 const char *comma_string;
6198 static const struct ctf ctfs[4] = {
6199 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6200 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6201 { CTEF_PAREN, "-,(,),", "-.(.)." },
6202 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6204 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6205 for (size_t i = 0; i < 4; i++)
6207 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6208 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6209 fmt_number_style_from_string (s));
6212 if (!lex_force_match (lexer, T_SLASH))
6215 while (!lex_match_id (lexer, "TABLE"))
6217 if (lex_match_id (lexer, "FORMAT"))
6219 double widths[2] = { SYSMIS, SYSMIS };
6220 double units_per_inch = 72.0;
6222 while (lex_token (lexer) != T_SLASH)
6224 if (lex_match_id (lexer, "MINCOLWIDTH"))
6226 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6229 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6231 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6234 else if (lex_match_id (lexer, "UNITS"))
6236 lex_match (lexer, T_EQUALS);
6237 if (lex_match_id (lexer, "POINTS"))
6238 units_per_inch = 72.0;
6239 else if (lex_match_id (lexer, "INCHES"))
6240 units_per_inch = 1.0;
6241 else if (lex_match_id (lexer, "CM"))
6242 units_per_inch = 2.54;
6245 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6249 else if (lex_match_id (lexer, "EMPTY"))
6254 lex_match (lexer, T_EQUALS);
6255 if (lex_match_id (lexer, "ZERO"))
6257 /* Nothing to do. */
6259 else if (lex_match_id (lexer, "BLANK"))
6260 ct->zero = xstrdup ("");
6261 else if (lex_force_string (lexer))
6263 ct->zero = ss_xstrdup (lex_tokss (lexer));
6269 else if (lex_match_id (lexer, "MISSING"))
6271 lex_match (lexer, T_EQUALS);
6272 if (!lex_force_string (lexer))
6276 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6277 ? ss_xstrdup (lex_tokss (lexer))
6283 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6284 "UNITS", "EMPTY", "MISSING");
6289 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6290 && widths[0] > widths[1])
6292 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6296 for (size_t i = 0; i < 2; i++)
6297 if (widths[i] != SYSMIS)
6299 int *wr = ct->look->width_ranges[TABLE_HORZ];
6300 wr[i] = widths[i] / units_per_inch * 96.0;
6305 else if (lex_match_id (lexer, "VLABELS"))
6307 if (!lex_force_match_id (lexer, "VARIABLES"))
6309 lex_match (lexer, T_EQUALS);
6311 struct variable **vars;
6313 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6317 if (!lex_force_match_id (lexer, "DISPLAY"))
6322 lex_match (lexer, T_EQUALS);
6324 enum ctables_vlabel vlabel;
6325 if (lex_match_id (lexer, "DEFAULT"))
6326 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6327 else if (lex_match_id (lexer, "NAME"))
6329 else if (lex_match_id (lexer, "LABEL"))
6330 vlabel = CTVL_LABEL;
6331 else if (lex_match_id (lexer, "BOTH"))
6333 else if (lex_match_id (lexer, "NONE"))
6337 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6343 for (size_t i = 0; i < n_vars; i++)
6344 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6347 else if (lex_match_id (lexer, "MRSETS"))
6349 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6351 lex_match (lexer, T_EQUALS);
6352 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6355 else if (lex_match_id (lexer, "SMISSING"))
6357 if (lex_match_id (lexer, "VARIABLE"))
6358 ct->smissing_listwise = false;
6359 else if (lex_match_id (lexer, "LISTWISE"))
6360 ct->smissing_listwise = true;
6363 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6367 else if (lex_match_id (lexer, "PCOMPUTE"))
6369 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6372 else if (lex_match_id (lexer, "PPROPERTIES"))
6374 if (!ctables_parse_pproperties (lexer, ct))
6377 else if (lex_match_id (lexer, "WEIGHT"))
6379 if (!lex_force_match_id (lexer, "VARIABLE"))
6381 lex_match (lexer, T_EQUALS);
6382 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6386 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6388 if (lex_match_id (lexer, "COUNT"))
6390 lex_match (lexer, T_EQUALS);
6391 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6394 ct->hide_threshold = lex_integer (lexer);
6397 else if (ct->hide_threshold == 0)
6398 ct->hide_threshold = 5;
6402 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6403 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6404 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6408 if (!lex_force_match (lexer, T_SLASH))
6412 size_t allocated_tables = 0;
6415 if (ct->n_tables >= allocated_tables)
6416 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6417 sizeof *ct->tables);
6419 struct ctables_category *cat = xmalloc (sizeof *cat);
6420 *cat = (struct ctables_category) {
6422 .include_missing = false,
6423 .sort_ascending = true,
6426 struct ctables_categories *c = xmalloc (sizeof *c);
6427 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6428 *c = (struct ctables_categories) {
6435 struct ctables_categories **categories = xnmalloc (n_vars,
6436 sizeof *categories);
6437 for (size_t i = 0; i < n_vars; i++)
6440 struct ctables_table *t = xmalloc (sizeof *t);
6441 *t = (struct ctables_table) {
6443 .slabels_axis = PIVOT_AXIS_COLUMN,
6444 .slabels_visible = true,
6445 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6447 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6448 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6449 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6451 .clabels_from_axis = PIVOT_AXIS_LAYER,
6452 .categories = categories,
6453 .n_categories = n_vars,
6456 ct->tables[ct->n_tables++] = t;
6458 lex_match (lexer, T_EQUALS);
6459 int expr_start = lex_ofs (lexer);
6460 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6462 if (lex_match (lexer, T_BY))
6464 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6465 ct, t, PIVOT_AXIS_COLUMN))
6468 if (lex_match (lexer, T_BY))
6470 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6471 ct, t, PIVOT_AXIS_LAYER))
6475 int expr_end = lex_ofs (lexer);
6477 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6478 && !t->axes[PIVOT_AXIS_LAYER])
6480 lex_error (lexer, _("At least one variable must be specified."));
6484 const struct ctables_axis *scales[PIVOT_N_AXES];
6485 size_t n_scales = 0;
6486 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6488 scales[a] = find_scale (t->axes[a]);
6494 msg (SE, _("Scale variables may appear only on one axis."));
6495 if (scales[PIVOT_AXIS_ROW])
6496 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6497 _("This scale variable appears on the rows axis."));
6498 if (scales[PIVOT_AXIS_COLUMN])
6499 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6500 _("This scale variable appears on the columns axis."));
6501 if (scales[PIVOT_AXIS_LAYER])
6502 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6503 _("This scale variable appears on the layer axis."));
6507 const struct ctables_axis *summaries[PIVOT_N_AXES];
6508 size_t n_summaries = 0;
6509 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6511 summaries[a] = (scales[a]
6513 : find_categorical_summary_spec (t->axes[a]));
6517 if (n_summaries > 1)
6519 msg (SE, _("Summaries may appear only on one axis."));
6520 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6523 msg_at (SN, summaries[a]->loc,
6525 ? _("This variable on the rows axis has a summary.")
6526 : a == PIVOT_AXIS_COLUMN
6527 ? _("This variable on the columns axis has a summary.")
6528 : _("This variable on the layers axis has a summary."));
6530 msg_at (SN, summaries[a]->loc,
6531 _("This is a scale variable, so it always has a "
6532 "summary even if the syntax does not explicitly "
6537 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6538 if (n_summaries ? summaries[a] : t->axes[a])
6540 t->summary_axis = a;
6544 if (lex_token (lexer) == T_ENDCMD)
6546 if (!ctables_prepare_table (t))
6550 if (!lex_force_match (lexer, T_SLASH))
6553 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6555 if (lex_match_id (lexer, "SLABELS"))
6557 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6559 if (lex_match_id (lexer, "POSITION"))
6561 lex_match (lexer, T_EQUALS);
6562 if (lex_match_id (lexer, "COLUMN"))
6563 t->slabels_axis = PIVOT_AXIS_COLUMN;
6564 else if (lex_match_id (lexer, "ROW"))
6565 t->slabels_axis = PIVOT_AXIS_ROW;
6566 else if (lex_match_id (lexer, "LAYER"))
6567 t->slabels_axis = PIVOT_AXIS_LAYER;
6570 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6574 else if (lex_match_id (lexer, "VISIBLE"))
6576 lex_match (lexer, T_EQUALS);
6577 if (!parse_bool (lexer, &t->slabels_visible))
6582 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6587 else if (lex_match_id (lexer, "CLABELS"))
6589 if (lex_match_id (lexer, "AUTO"))
6591 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6592 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6594 else if (lex_match_id (lexer, "ROWLABELS"))
6596 lex_match (lexer, T_EQUALS);
6597 if (lex_match_id (lexer, "OPPOSITE"))
6598 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6599 else if (lex_match_id (lexer, "LAYER"))
6600 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6603 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6607 else if (lex_match_id (lexer, "COLLABELS"))
6609 lex_match (lexer, T_EQUALS);
6610 if (lex_match_id (lexer, "OPPOSITE"))
6611 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6612 else if (lex_match_id (lexer, "LAYER"))
6613 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6616 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6622 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6627 else if (lex_match_id (lexer, "CRITERIA"))
6629 if (!lex_force_match_id (lexer, "CILEVEL"))
6631 lex_match (lexer, T_EQUALS);
6633 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6635 t->cilevel = lex_number (lexer);
6638 else if (lex_match_id (lexer, "CATEGORIES"))
6640 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6644 else if (lex_match_id (lexer, "TITLES"))
6649 if (lex_match_id (lexer, "CAPTION"))
6650 textp = &t->caption;
6651 else if (lex_match_id (lexer, "CORNER"))
6653 else if (lex_match_id (lexer, "TITLE"))
6657 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6660 lex_match (lexer, T_EQUALS);
6662 struct string s = DS_EMPTY_INITIALIZER;
6663 while (lex_is_string (lexer))
6665 if (!ds_is_empty (&s))
6666 ds_put_byte (&s, ' ');
6667 put_title_text (&s, lex_tokss (lexer), now,
6668 lexer, dataset_dict (ds),
6669 expr_start, expr_end);
6673 *textp = ds_steal_cstr (&s);
6675 while (lex_token (lexer) != T_SLASH
6676 && lex_token (lexer) != T_ENDCMD);
6678 else if (lex_match_id (lexer, "SIGTEST"))
6682 t->chisq = xmalloc (sizeof *t->chisq);
6683 *t->chisq = (struct ctables_chisq) {
6685 .include_mrsets = true,
6686 .all_visible = true,
6692 if (lex_match_id (lexer, "TYPE"))
6694 lex_match (lexer, T_EQUALS);
6695 if (!lex_force_match_id (lexer, "CHISQUARE"))
6698 else if (lex_match_id (lexer, "ALPHA"))
6700 lex_match (lexer, T_EQUALS);
6701 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6703 t->chisq->alpha = lex_number (lexer);
6706 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6708 lex_match (lexer, T_EQUALS);
6709 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6712 else if (lex_match_id (lexer, "CATEGORIES"))
6714 lex_match (lexer, T_EQUALS);
6715 if (lex_match_id (lexer, "ALLVISIBLE"))
6716 t->chisq->all_visible = true;
6717 else if (lex_match_id (lexer, "SUBTOTALS"))
6718 t->chisq->all_visible = false;
6721 lex_error_expecting (lexer,
6722 "ALLVISIBLE", "SUBTOTALS");
6728 lex_error_expecting (lexer, "TYPE", "ALPHA",
6729 "INCLUDEMRSETS", "CATEGORIES");
6733 while (lex_token (lexer) != T_SLASH
6734 && lex_token (lexer) != T_ENDCMD);
6736 else if (lex_match_id (lexer, "COMPARETEST"))
6740 t->pairwise = xmalloc (sizeof *t->pairwise);
6741 *t->pairwise = (struct ctables_pairwise) {
6743 .alpha = { .05, .05 },
6744 .adjust = BONFERRONI,
6745 .include_mrsets = true,
6746 .meansvariance_allcats = true,
6747 .all_visible = true,
6756 if (lex_match_id (lexer, "TYPE"))
6758 lex_match (lexer, T_EQUALS);
6759 if (lex_match_id (lexer, "PROP"))
6760 t->pairwise->type = PROP;
6761 else if (lex_match_id (lexer, "MEAN"))
6762 t->pairwise->type = MEAN;
6765 lex_error_expecting (lexer, "PROP", "MEAN");
6769 else if (lex_match_id (lexer, "ALPHA"))
6771 lex_match (lexer, T_EQUALS);
6773 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6775 double a0 = lex_number (lexer);
6778 lex_match (lexer, T_COMMA);
6779 if (lex_is_number (lexer))
6781 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6783 double a1 = lex_number (lexer);
6786 t->pairwise->alpha[0] = MIN (a0, a1);
6787 t->pairwise->alpha[1] = MAX (a0, a1);
6790 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6792 else if (lex_match_id (lexer, "ADJUST"))
6794 lex_match (lexer, T_EQUALS);
6795 if (lex_match_id (lexer, "BONFERRONI"))
6796 t->pairwise->adjust = BONFERRONI;
6797 else if (lex_match_id (lexer, "BH"))
6798 t->pairwise->adjust = BH;
6799 else if (lex_match_id (lexer, "NONE"))
6800 t->pairwise->adjust = 0;
6803 lex_error_expecting (lexer, "BONFERRONI", "BH",
6808 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6810 lex_match (lexer, T_EQUALS);
6811 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6814 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6816 lex_match (lexer, T_EQUALS);
6817 if (lex_match_id (lexer, "ALLCATS"))
6818 t->pairwise->meansvariance_allcats = true;
6819 else if (lex_match_id (lexer, "TESTEDCATS"))
6820 t->pairwise->meansvariance_allcats = false;
6823 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6827 else if (lex_match_id (lexer, "CATEGORIES"))
6829 lex_match (lexer, T_EQUALS);
6830 if (lex_match_id (lexer, "ALLVISIBLE"))
6831 t->pairwise->all_visible = true;
6832 else if (lex_match_id (lexer, "SUBTOTALS"))
6833 t->pairwise->all_visible = false;
6836 lex_error_expecting (lexer, "ALLVISIBLE",
6841 else if (lex_match_id (lexer, "MERGE"))
6843 lex_match (lexer, T_EQUALS);
6844 if (!parse_bool (lexer, &t->pairwise->merge))
6847 else if (lex_match_id (lexer, "STYLE"))
6849 lex_match (lexer, T_EQUALS);
6850 if (lex_match_id (lexer, "APA"))
6851 t->pairwise->apa_style = true;
6852 else if (lex_match_id (lexer, "SIMPLE"))
6853 t->pairwise->apa_style = false;
6856 lex_error_expecting (lexer, "APA", "SIMPLE");
6860 else if (lex_match_id (lexer, "SHOWSIG"))
6862 lex_match (lexer, T_EQUALS);
6863 if (!parse_bool (lexer, &t->pairwise->show_sig))
6868 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6869 "INCLUDEMRSETS", "MEANSVARIANCE",
6870 "CATEGORIES", "MERGE", "STYLE",
6875 while (lex_token (lexer) != T_SLASH
6876 && lex_token (lexer) != T_ENDCMD);
6880 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6881 "CRITERIA", "CATEGORIES", "TITLES",
6882 "SIGTEST", "COMPARETEST");
6886 if (!lex_match (lexer, T_SLASH))
6890 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6891 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6893 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6897 if (!ctables_prepare_table (t))
6900 while (lex_token (lexer) != T_ENDCMD);
6903 input = proc_open (ds);
6904 bool ok = ctables_execute (ds, input, ct);
6905 ok = proc_commit (ds) && ok;
6907 ctables_destroy (ct);
6908 return ok ? CMD_SUCCESS : CMD_FAILURE;
6913 ctables_destroy (ct);