1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 enum ctables_summary_function
75 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
76 #include "ctables.inc"
81 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
83 #include "ctables.inc"
87 static bool ctables_summary_function_is_count (enum ctables_summary_function);
89 enum ctables_domain_type
91 /* Within a section, where stacked variables divide one section from
93 CTDT_TABLE, /* All layers of a whole section. */
94 CTDT_LAYER, /* One layer within a section. */
95 CTDT_LAYERROW, /* Row in one layer within a section. */
96 CTDT_LAYERCOL, /* Column in one layer within a section. */
98 /* Within a subtable, where a subtable pairs an innermost row variable with
99 an innermost column variable within a single layer. */
100 CTDT_SUBTABLE, /* Whole subtable. */
101 CTDT_ROW, /* Row within a subtable. */
102 CTDT_COL, /* Column within a subtable. */
106 struct ctables_domain
108 struct hmap_node node;
110 const struct ctables_cell *example;
113 double d_valid; /* Dictionary weight. */
116 double e_valid; /* Effective weight */
119 double u_valid; /* Unweighted. */
122 struct ctables_sum *sums;
131 enum ctables_summary_variant
140 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
141 all the axes (except the scalar variable, if any). */
142 struct hmap_node node;
144 /* The domains that contain this cell. */
145 uint32_t omit_domains;
146 struct ctables_domain *domains[N_CTDTS];
151 enum ctables_summary_variant sv;
153 struct ctables_cell_axis
155 struct ctables_cell_value
157 const struct ctables_category *category;
165 union ctables_summary *summaries;
172 const struct dictionary *dict;
173 struct pivot_table_look *look;
175 /* CTABLES has a number of extra formats that we implement via custom
176 currency specifications on an alternate fmt_settings. */
177 #define CTEF_NEGPAREN FMT_CCA
178 #define CTEF_NEQUAL FMT_CCB
179 #define CTEF_PAREN FMT_CCC
180 #define CTEF_PCTPAREN FMT_CCD
181 struct fmt_settings ctables_formats;
183 /* If this is NULL, zeros are displayed using the normal print format.
184 Otherwise, this string is displayed. */
187 /* If this is NULL, missing values are displayed using the normal print
188 format. Otherwise, this string is displayed. */
191 /* Indexed by variable dictionary index. */
192 enum ctables_vlabel *vlabels;
194 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
196 bool mrsets_count_duplicates; /* MRSETS. */
197 bool smissing_listwise; /* SMISSING. */
198 struct variable *e_weight; /* WEIGHT. */
199 int hide_threshold; /* HIDESMALLCOUNTS. */
201 struct ctables_table **tables;
205 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
208 struct ctables_postcompute
210 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
211 char *name; /* Name, without leading &. */
213 struct msg_location *location; /* Location of definition. */
214 struct ctables_pcexpr *expr;
216 struct ctables_summary_spec_set *specs;
217 bool hide_source_cats;
220 struct ctables_pcexpr
230 enum ctables_postcompute_op
233 CTPO_CONSTANT, /* 5 */
234 CTPO_CAT_NUMBER, /* [5] */
235 CTPO_CAT_STRING, /* ["STRING"] */
236 CTPO_CAT_NRANGE, /* [LO THRU 5] */
237 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
238 CTPO_CAT_MISSING, /* MISSING */
239 CTPO_CAT_OTHERNM, /* OTHERNM */
240 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
241 CTPO_CAT_TOTAL, /* TOTAL */
255 /* CTPO_CAT_NUMBER. */
258 /* CTPO_CAT_STRING, in dictionary encoding. */
259 struct substring string;
261 /* CTPO_CAT_NRANGE. */
264 /* CTPO_CAT_SRANGE. */
265 struct substring srange[2];
267 /* CTPO_CAT_SUBTOTAL. */
268 size_t subtotal_index;
270 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
271 One element: CTPO_NEG. */
272 struct ctables_pcexpr *subs[2];
275 /* Source location. */
276 struct msg_location *location;
279 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
280 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
281 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
282 struct ctables_pcexpr *sub1);
284 struct ctables_summary_spec_set
286 struct ctables_summary_spec *specs;
290 /* The variable to which the summary specs are applied. */
291 struct variable *var;
293 /* Whether the variable to which the summary specs are applied is a scale
294 variable for the purpose of summarization.
296 (VALIDN and TOTALN act differently for summarizing scale and categorical
300 /* If any of these optional additional scale variables are missing, then
301 treat 'var' as if it's missing too. This is for implementing
302 SMISSING=LISTWISE. */
303 struct variable **listwise_vars;
304 size_t n_listwise_vars;
307 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
308 const struct ctables_summary_spec_set *);
309 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
311 /* A nested sequence of variables, e.g. a > b > c. */
314 struct variable **vars;
317 size_t *domains[N_CTDTS];
318 size_t n_domains[N_CTDTS];
321 struct ctables_summary_spec_set specs[N_CSVS];
324 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
327 struct ctables_nest *nests;
331 static void ctables_stack_uninit (struct ctables_stack *);
335 struct hmap_node node;
340 struct ctables_occurrence
342 struct hmap_node node;
346 struct ctables_section
349 struct ctables_table *table;
350 struct ctables_nest *nests[PIVOT_N_AXES];
353 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
354 struct hmap cells; /* Contains "struct ctables_cell"s. */
355 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
358 static void ctables_section_uninit (struct ctables_section *);
362 struct ctables *ctables;
363 struct ctables_axis *axes[PIVOT_N_AXES];
364 struct ctables_stack stacks[PIVOT_N_AXES];
365 struct ctables_section *sections;
367 enum pivot_axis_type summary_axis;
368 struct ctables_summary_spec_set summary_specs;
369 struct variable **sum_vars;
372 enum pivot_axis_type slabels_axis;
373 bool slabels_visible;
375 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
377 Most commonly, label_axis[a] == a, and in particular we always have
378 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
380 If ROWLABELS or COLLABELS is specified, then one of
381 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
382 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
384 If any category labels are moved, then 'clabels_example' is one of the
385 variables being moved (and it is otherwise NULL). All of the variables
386 being moved have the same width, value labels, and categories, so this
387 example variable can be used to find those out.
389 The remaining members in this group are relevant only if category labels
392 'clabels_values_map' holds a "struct ctables_value" for all the values
393 that appear in all of the variables in the moved categories. It is
394 accumulated as the data is read. Once the data is fully read, its
395 sorted values are put into 'clabels_values' and 'n_clabels_values'.
397 enum pivot_axis_type label_axis[PIVOT_N_AXES];
398 enum pivot_axis_type clabels_from_axis;
399 const struct variable *clabels_example;
400 struct hmap clabels_values_map;
401 struct ctables_value **clabels_values;
402 size_t n_clabels_values;
404 /* Indexed by variable dictionary index. */
405 struct ctables_categories **categories;
414 struct ctables_chisq *chisq;
415 struct ctables_pairwise *pairwise;
418 struct ctables_categories
421 struct ctables_category *cats;
426 struct ctables_category
428 enum ctables_category_type
430 /* Explicit category lists. */
433 CCT_NRANGE, /* Numerical range. */
434 CCT_SRANGE, /* String range. */
439 /* Totals and subtotals. */
443 /* Implicit category lists. */
448 /* For contributing to TOTALN. */
449 CCT_EXCLUDED_MISSING,
453 struct ctables_category *subtotal;
459 double number; /* CCT_NUMBER. */
460 struct substring string; /* CCT_STRING, in dictionary encoding. */
461 double nrange[2]; /* CCT_NRANGE. */
462 struct substring srange[2]; /* CCT_SRANGE. */
466 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
467 bool hide_subcategories; /* CCT_SUBTOTAL. */
470 /* CCT_POSTCOMPUTE. */
473 const struct ctables_postcompute *pc;
474 enum fmt_type parse_format;
477 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
480 bool include_missing;
484 enum ctables_summary_function sort_function;
485 struct variable *sort_var;
490 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
491 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
492 struct msg_location *location;
496 ctables_category_uninit (struct ctables_category *cat)
501 msg_location_destroy (cat->location);
508 case CCT_POSTCOMPUTE:
512 ss_dealloc (&cat->string);
516 ss_dealloc (&cat->srange[0]);
517 ss_dealloc (&cat->srange[1]);
522 free (cat->total_label);
530 case CCT_EXCLUDED_MISSING:
536 nullable_substring_equal (const struct substring *a,
537 const struct substring *b)
539 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
543 ctables_category_equal (const struct ctables_category *a,
544 const struct ctables_category *b)
546 if (a->type != b->type)
552 return a->number == b->number;
555 return ss_equals (a->string, b->string);
558 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
561 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
562 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
568 case CCT_POSTCOMPUTE:
569 return a->pc == b->pc;
573 return !strcmp (a->total_label, b->total_label);
578 return (a->include_missing == b->include_missing
579 && a->sort_ascending == b->sort_ascending
580 && a->sort_function == b->sort_function
581 && a->sort_var == b->sort_var
582 && a->percentile == b->percentile);
584 case CCT_EXCLUDED_MISSING:
592 ctables_categories_unref (struct ctables_categories *c)
597 assert (c->n_refs > 0);
601 for (size_t i = 0; i < c->n_cats; i++)
602 ctables_category_uninit (&c->cats[i]);
608 ctables_categories_equal (const struct ctables_categories *a,
609 const struct ctables_categories *b)
611 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
614 for (size_t i = 0; i < a->n_cats; i++)
615 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
621 /* Chi-square test (SIGTEST). */
629 /* Pairwise comparison test (COMPARETEST). */
630 struct ctables_pairwise
632 enum { PROP, MEAN } type;
635 bool meansvariance_allcats;
637 enum { BONFERRONI = 1, BH } adjust;
661 struct variable *var;
663 struct ctables_summary_spec_set specs[N_CSVS];
667 struct ctables_axis *subs[2];
670 struct msg_location *loc;
673 static void ctables_axis_destroy (struct ctables_axis *);
682 enum ctables_function_availability
684 CTFA_ALL, /* Any variables. */
685 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
686 //CTFA_MRSETS, /* Only multiple-response sets */
689 struct ctables_summary_spec
691 enum ctables_summary_function function;
692 double percentile; /* CTSF_PTILE only. */
695 struct fmt_spec format;
696 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
703 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
704 const struct ctables_summary_spec *src)
707 dst->label = xstrdup_if_nonnull (src->label);
711 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
718 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
719 const struct ctables_summary_spec_set *src)
721 struct ctables_summary_spec *specs
722 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
723 for (size_t i = 0; i < src->n; i++)
724 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
726 *dst = (struct ctables_summary_spec_set) {
731 .is_scale = src->is_scale,
736 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
738 for (size_t i = 0; i < set->n; i++)
739 ctables_summary_spec_uninit (&set->specs[i]);
740 free (set->listwise_vars);
745 parse_col_width (struct lexer *lexer, const char *name, double *width)
747 lex_match (lexer, T_EQUALS);
748 if (lex_match_id (lexer, "DEFAULT"))
750 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
752 *width = lex_number (lexer);
762 parse_bool (struct lexer *lexer, bool *b)
764 if (lex_match_id (lexer, "NO"))
766 else if (lex_match_id (lexer, "YES"))
770 lex_error_expecting (lexer, "YES", "NO");
776 static enum ctables_function_availability
777 ctables_function_availability (enum ctables_summary_function f)
779 static enum ctables_function_availability availability[] = {
780 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
781 #include "ctables.inc"
785 return availability[f];
789 ctables_summary_function_is_count (enum ctables_summary_function f)
791 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
795 parse_ctables_summary_function (struct lexer *lexer,
796 enum ctables_summary_function *f)
800 enum ctables_summary_function function;
801 struct substring name;
803 static struct pair names[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
805 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
806 #include "ctables.inc"
807 /* The .COUNT suffix may be omitted. */
808 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
809 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
810 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
811 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
812 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
813 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
814 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
818 if (!lex_force_id (lexer))
821 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
822 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
824 *f = names[i].function;
829 lex_error (lexer, _("Expecting summary function name."));
834 ctables_axis_destroy (struct ctables_axis *axis)
842 for (size_t i = 0; i < N_CSVS; i++)
843 ctables_summary_spec_set_uninit (&axis->specs[i]);
848 ctables_axis_destroy (axis->subs[0]);
849 ctables_axis_destroy (axis->subs[1]);
852 msg_location_destroy (axis->loc);
856 static struct ctables_axis *
857 ctables_axis_new_nonterminal (enum ctables_axis_op op,
858 struct ctables_axis *sub0,
859 struct ctables_axis *sub1,
860 struct lexer *lexer, int start_ofs)
862 struct ctables_axis *axis = xmalloc (sizeof *axis);
863 *axis = (struct ctables_axis) {
865 .subs = { sub0, sub1 },
866 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
871 struct ctables_axis_parse_ctx
874 struct dictionary *dict;
876 struct ctables_table *t;
879 static struct fmt_spec
880 ctables_summary_default_format (enum ctables_summary_function function,
881 const struct variable *var)
883 static const enum ctables_format default_formats[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
885 #include "ctables.inc"
888 switch (default_formats[function])
891 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
894 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
897 return *var_get_print_format (var);
904 static struct pivot_value *
905 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
909 static const char *default_labels[] = {
910 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
911 #include "ctables.inc"
915 return (spec->function == CTSF_PTILE
916 ? pivot_value_new_text_format (N_("Percentile %.2f"),
918 : pivot_value_new_text (default_labels[spec->function]));
922 struct substring in = ss_cstr (spec->label);
923 struct substring target = ss_cstr (")CILEVEL");
925 struct string out = DS_EMPTY_INITIALIZER;
928 size_t chunk = ss_find_substring (in, target);
929 ds_put_substring (&out, ss_head (in, chunk));
930 ss_advance (&in, chunk);
932 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
934 ss_advance (&in, target.length);
935 ds_put_format (&out, "%g", cilevel);
941 ctables_summary_function_name (enum ctables_summary_function function)
943 static const char *names[] = {
944 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
945 #include "ctables.inc"
948 return names[function];
952 add_summary_spec (struct ctables_axis *axis,
953 enum ctables_summary_function function, double percentile,
954 const char *label, const struct fmt_spec *format,
955 bool is_ctables_format, const struct msg_location *loc,
956 enum ctables_summary_variant sv)
958 if (axis->op == CTAO_VAR)
960 const char *function_name = ctables_summary_function_name (function);
961 const char *var_name = var_get_name (axis->var);
962 switch (ctables_function_availability (function))
966 msg_at (SE, loc, _("Summary function %s applies only to multiple "
967 "response sets."), function_name);
968 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
974 if (!axis->scale && sv != CSV_TOTAL)
977 _("Summary function %s applies only to scale variables."),
979 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
989 struct ctables_summary_spec_set *set = &axis->specs[sv];
990 if (set->n >= set->allocated)
991 set->specs = x2nrealloc (set->specs, &set->allocated,
994 struct ctables_summary_spec *dst = &set->specs[set->n++];
995 *dst = (struct ctables_summary_spec) {
996 .function = function,
997 .percentile = percentile,
998 .label = xstrdup_if_nonnull (label),
999 .format = (format ? *format
1000 : ctables_summary_default_format (function, axis->var)),
1001 .is_ctables_format = is_ctables_format,
1007 for (size_t i = 0; i < 2; i++)
1008 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1009 format, is_ctables_format, loc, sv))
1015 static struct ctables_axis *ctables_axis_parse_stack (
1016 struct ctables_axis_parse_ctx *);
1019 static struct ctables_axis *
1020 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1022 if (lex_match (ctx->lexer, T_LPAREN))
1024 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1025 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1027 ctables_axis_destroy (sub);
1033 if (!lex_force_id (ctx->lexer))
1036 int start_ofs = lex_ofs (ctx->lexer);
1037 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1041 struct ctables_axis *axis = xmalloc (sizeof *axis);
1042 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1044 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1045 : lex_match_phrase (ctx->lexer, "[C]") ? false
1046 : var_get_measure (var) == MEASURE_SCALE);
1047 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1048 lex_ofs (ctx->lexer) - 1);
1049 if (axis->scale && var_is_alpha (var))
1051 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1053 var_get_name (var));
1054 ctables_axis_destroy (axis);
1062 has_digit (const char *s)
1064 return s[strcspn (s, "0123456789")] != '\0';
1068 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1069 bool *is_ctables_format)
1071 char type[FMT_TYPE_LEN_MAX + 1];
1072 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1075 if (!strcasecmp (type, "NEGPAREN"))
1076 format->type = CTEF_NEGPAREN;
1077 else if (!strcasecmp (type, "NEQUAL"))
1078 format->type = CTEF_NEQUAL;
1079 else if (!strcasecmp (type, "PAREN"))
1080 format->type = CTEF_PAREN;
1081 else if (!strcasecmp (type, "PCTPAREN"))
1082 format->type = CTEF_PCTPAREN;
1085 *is_ctables_format = false;
1086 return (parse_format_specifier (lexer, format)
1087 && fmt_check_output (format)
1088 && fmt_check_type_compat (format, VAL_NUMERIC));
1094 lex_next_error (lexer, -1, -1,
1095 _("Output format %s requires width 2 or greater."), type);
1098 else if (format->d > format->w - 1)
1100 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1101 "greater than decimals."), type);
1106 *is_ctables_format = true;
1111 static struct ctables_axis *
1112 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1114 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1115 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1118 enum ctables_summary_variant sv = CSV_CELL;
1121 int start_ofs = lex_ofs (ctx->lexer);
1123 /* Parse function. */
1124 enum ctables_summary_function function;
1125 if (!parse_ctables_summary_function (ctx->lexer, &function))
1128 /* Parse percentile. */
1129 double percentile = 0;
1130 if (function == CTSF_PTILE)
1132 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1134 percentile = lex_number (ctx->lexer);
1135 lex_get (ctx->lexer);
1140 if (lex_is_string (ctx->lexer))
1142 label = ss_xstrdup (lex_tokss (ctx->lexer));
1143 lex_get (ctx->lexer);
1147 struct fmt_spec format;
1148 const struct fmt_spec *formatp;
1149 bool is_ctables_format = false;
1150 if (lex_token (ctx->lexer) == T_ID
1151 && has_digit (lex_tokcstr (ctx->lexer)))
1153 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1154 &is_ctables_format))
1164 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1165 lex_ofs (ctx->lexer) - 1);
1166 add_summary_spec (sub, function, percentile, label, formatp,
1167 is_ctables_format, loc, sv);
1169 msg_location_destroy (loc);
1171 lex_match (ctx->lexer, T_COMMA);
1172 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1174 if (!lex_force_match (ctx->lexer, T_LBRACK))
1178 else if (lex_match (ctx->lexer, T_RBRACK))
1180 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1187 ctables_axis_destroy (sub);
1191 static const struct ctables_axis *
1192 find_scale (const struct ctables_axis *axis)
1196 else if (axis->op == CTAO_VAR)
1197 return axis->scale ? axis : NULL;
1200 for (size_t i = 0; i < 2; i++)
1202 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1210 static const struct ctables_axis *
1211 find_categorical_summary_spec (const struct ctables_axis *axis)
1215 else if (axis->op == CTAO_VAR)
1216 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1219 for (size_t i = 0; i < 2; i++)
1221 const struct ctables_axis *sum
1222 = find_categorical_summary_spec (axis->subs[i]);
1230 static struct ctables_axis *
1231 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1233 int start_ofs = lex_ofs (ctx->lexer);
1234 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1238 while (lex_match (ctx->lexer, T_GT))
1240 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1244 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1245 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1247 const struct ctables_axis *outer_scale = find_scale (lhs);
1248 const struct ctables_axis *inner_scale = find_scale (rhs);
1249 if (outer_scale && inner_scale)
1251 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1252 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1253 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1254 ctables_axis_destroy (nest);
1258 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1261 msg_at (SE, nest->loc,
1262 _("Summaries may only be requested for categorical variables "
1263 "at the innermost nesting level."));
1264 msg_at (SN, outer_sum->loc,
1265 _("This outer categorical variable has a summary."));
1266 ctables_axis_destroy (nest);
1276 static struct ctables_axis *
1277 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1279 int start_ofs = lex_ofs (ctx->lexer);
1280 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1284 while (lex_match (ctx->lexer, T_PLUS))
1286 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1290 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1291 ctx->lexer, start_ofs);
1298 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1299 struct ctables *ct, struct ctables_table *t,
1300 enum pivot_axis_type a)
1302 if (lex_token (lexer) == T_BY
1303 || lex_token (lexer) == T_SLASH
1304 || lex_token (lexer) == T_ENDCMD)
1307 struct ctables_axis_parse_ctx ctx = {
1313 t->axes[a] = ctables_axis_parse_stack (&ctx);
1314 return t->axes[a] != NULL;
1318 ctables_chisq_destroy (struct ctables_chisq *chisq)
1324 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1330 ctables_table_destroy (struct ctables_table *t)
1335 for (size_t i = 0; i < t->n_sections; i++)
1336 ctables_section_uninit (&t->sections[i]);
1339 for (size_t i = 0; i < t->n_categories; i++)
1340 ctables_categories_unref (t->categories[i]);
1341 free (t->categories);
1343 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1345 ctables_axis_destroy (t->axes[a]);
1346 ctables_stack_uninit (&t->stacks[a]);
1348 free (t->summary_specs.specs);
1350 struct ctables_value *ctv, *next_ctv;
1351 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1352 &t->clabels_values_map)
1354 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1355 hmap_delete (&t->clabels_values_map, &ctv->node);
1358 hmap_destroy (&t->clabels_values_map);
1359 free (t->clabels_values);
1365 ctables_chisq_destroy (t->chisq);
1366 ctables_pairwise_destroy (t->pairwise);
1371 ctables_destroy (struct ctables *ct)
1376 struct ctables_postcompute *pc, *next_pc;
1377 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1381 msg_location_destroy (pc->location);
1382 ctables_pcexpr_destroy (pc->expr);
1386 ctables_summary_spec_set_uninit (pc->specs);
1389 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1393 fmt_settings_uninit (&ct->ctables_formats);
1394 pivot_table_look_unref (ct->look);
1398 for (size_t i = 0; i < ct->n_tables; i++)
1399 ctables_table_destroy (ct->tables[i]);
1404 static struct ctables_category
1405 cct_nrange (double low, double high)
1407 return (struct ctables_category) {
1409 .nrange = { low, high }
1413 static struct ctables_category
1414 cct_srange (struct substring low, struct substring high)
1416 return (struct ctables_category) {
1418 .srange = { low, high }
1423 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1424 struct ctables_category *cat)
1427 if (lex_match (lexer, T_EQUALS))
1429 if (!lex_force_string (lexer))
1432 total_label = ss_xstrdup (lex_tokss (lexer));
1436 total_label = xstrdup (_("Subtotal"));
1438 *cat = (struct ctables_category) {
1439 .type = CCT_SUBTOTAL,
1440 .hide_subcategories = hide_subcategories,
1441 .total_label = total_label
1446 static struct substring
1447 parse_substring (struct lexer *lexer, struct dictionary *dict)
1449 struct substring s = recode_substring_pool (
1450 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1451 ss_rtrim (&s, ss_cstr (" "));
1457 ctables_table_parse_explicit_category (struct lexer *lexer,
1458 struct dictionary *dict,
1460 struct ctables_category *cat)
1462 if (lex_match_id (lexer, "OTHERNM"))
1463 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1464 else if (lex_match_id (lexer, "MISSING"))
1465 *cat = (struct ctables_category) { .type = CCT_MISSING };
1466 else if (lex_match_id (lexer, "SUBTOTAL"))
1467 return ctables_table_parse_subtotal (lexer, false, cat);
1468 else if (lex_match_id (lexer, "HSUBTOTAL"))
1469 return ctables_table_parse_subtotal (lexer, true, cat);
1470 else if (lex_match_id (lexer, "LO"))
1472 if (!lex_force_match_id (lexer, "THRU"))
1474 if (lex_is_string (lexer))
1476 struct substring sr0 = { .string = NULL };
1477 struct substring sr1 = parse_substring (lexer, dict);
1478 *cat = cct_srange (sr0, sr1);
1480 else if (lex_force_num (lexer))
1482 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1488 else if (lex_is_number (lexer))
1490 double number = lex_number (lexer);
1492 if (lex_match_id (lexer, "THRU"))
1494 if (lex_match_id (lexer, "HI"))
1495 *cat = cct_nrange (number, DBL_MAX);
1498 if (!lex_force_num (lexer))
1500 *cat = cct_nrange (number, lex_number (lexer));
1505 *cat = (struct ctables_category) {
1510 else if (lex_is_string (lexer))
1512 struct substring s = parse_substring (lexer, dict);
1513 if (lex_match_id (lexer, "THRU"))
1515 if (lex_match_id (lexer, "HI"))
1517 struct substring sr1 = { .string = NULL };
1518 *cat = cct_srange (s, sr1);
1522 if (!lex_force_string (lexer))
1527 struct substring sr1 = parse_substring (lexer, dict);
1528 *cat = cct_srange (s, sr1);
1532 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1534 else if (lex_match (lexer, T_AND))
1536 if (!lex_force_id (lexer))
1538 struct ctables_postcompute *pc = ctables_find_postcompute (
1539 ct, lex_tokcstr (lexer));
1542 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1543 msg_at (SE, loc, _("Unknown postcompute &%s."),
1544 lex_tokcstr (lexer));
1545 msg_location_destroy (loc);
1550 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1554 lex_error (lexer, NULL);
1562 parse_category_string (struct msg_location *location,
1563 struct substring s, const struct dictionary *dict,
1564 enum fmt_type format, double *n)
1567 char *error = data_in (s, dict_get_encoding (dict), format,
1568 settings_get_fmt_settings (), &v, 0, NULL);
1571 msg_at (SE, location,
1572 _("Failed to parse category specification as format %s: %s."),
1573 fmt_name (format), error);
1582 static struct ctables_category *
1583 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1584 const struct ctables_pcexpr *e)
1586 struct ctables_category *best = NULL;
1587 size_t n_subtotals = 0;
1588 for (size_t i = 0; i < cats->n_cats; i++)
1590 struct ctables_category *cat = &cats->cats[i];
1593 case CTPO_CAT_NUMBER:
1594 if (cat->type == CCT_NUMBER && cat->number == e->number)
1598 case CTPO_CAT_STRING:
1599 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1603 case CTPO_CAT_NRANGE:
1604 if (cat->type == CCT_NRANGE
1605 && cat->nrange[0] == e->nrange[0]
1606 && cat->nrange[1] == e->nrange[1])
1610 case CTPO_CAT_SRANGE:
1611 if (cat->type == CCT_SRANGE
1612 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1613 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1617 case CTPO_CAT_MISSING:
1618 if (cat->type == CCT_MISSING)
1622 case CTPO_CAT_OTHERNM:
1623 if (cat->type == CCT_OTHERNM)
1627 case CTPO_CAT_SUBTOTAL:
1628 if (cat->type == CCT_SUBTOTAL)
1631 if (e->subtotal_index == n_subtotals)
1633 else if (e->subtotal_index == 0)
1638 case CTPO_CAT_TOTAL:
1639 if (cat->type == CCT_TOTAL)
1653 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1658 static struct ctables_category *
1659 ctables_find_category_for_postcompute (const struct dictionary *dict,
1660 const struct ctables_categories *cats,
1661 enum fmt_type parse_format,
1662 const struct ctables_pcexpr *e)
1664 if (parse_format != FMT_F)
1666 if (e->op == CTPO_CAT_STRING)
1669 if (!parse_category_string (e->location, e->string, dict,
1670 parse_format, &number))
1673 struct ctables_pcexpr e2 = {
1674 .op = CTPO_CAT_NUMBER,
1676 .location = e->location,
1678 return ctables_find_category_for_postcompute__ (cats, &e2);
1680 else if (e->op == CTPO_CAT_SRANGE)
1683 if (!e->srange[0].string)
1684 nrange[0] = -DBL_MAX;
1685 else if (!parse_category_string (e->location, e->srange[0], dict,
1686 parse_format, &nrange[0]))
1689 if (!e->srange[1].string)
1690 nrange[1] = DBL_MAX;
1691 else if (!parse_category_string (e->location, e->srange[1], dict,
1692 parse_format, &nrange[1]))
1695 struct ctables_pcexpr e2 = {
1696 .op = CTPO_CAT_NRANGE,
1697 .nrange = { nrange[0], nrange[1] },
1698 .location = e->location,
1700 return ctables_find_category_for_postcompute__ (cats, &e2);
1703 return ctables_find_category_for_postcompute__ (cats, e);
1707 ctables_recursive_check_postcompute (struct dictionary *dict,
1708 const struct ctables_pcexpr *e,
1709 struct ctables_category *pc_cat,
1710 const struct ctables_categories *cats,
1711 const struct msg_location *cats_location)
1715 case CTPO_CAT_NUMBER:
1716 case CTPO_CAT_STRING:
1717 case CTPO_CAT_NRANGE:
1718 case CTPO_CAT_SRANGE:
1719 case CTPO_CAT_MISSING:
1720 case CTPO_CAT_OTHERNM:
1721 case CTPO_CAT_SUBTOTAL:
1722 case CTPO_CAT_TOTAL:
1724 struct ctables_category *cat = ctables_find_category_for_postcompute (
1725 dict, cats, pc_cat->parse_format, e);
1728 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1730 size_t n_subtotals = 0;
1731 for (size_t i = 0; i < cats->n_cats; i++)
1732 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1733 if (n_subtotals > 1)
1735 msg_at (SE, cats_location,
1736 ngettext ("These categories include %zu instance "
1737 "of SUBTOTAL or HSUBTOTAL, so references "
1738 "from computed categories must refer to "
1739 "subtotals by position, "
1740 "e.g. SUBTOTAL[1].",
1741 "These categories include %zu instances "
1742 "of SUBTOTAL or HSUBTOTAL, so references "
1743 "from computed categories must refer to "
1744 "subtotals by position, "
1745 "e.g. SUBTOTAL[1].",
1748 msg_at (SN, e->location,
1749 _("This is the reference that lacks a position."));
1754 msg_at (SE, pc_cat->location,
1755 _("Computed category &%s references a category not included "
1756 "in the category list."),
1758 msg_at (SN, e->location, _("This is the missing category."));
1759 if (e->op == CTPO_CAT_SUBTOTAL)
1760 msg_at (SN, cats_location,
1761 _("To fix the problem, add subtotals to the "
1762 "list of categories here."));
1763 else if (e->op == CTPO_CAT_TOTAL)
1764 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1765 "CATEGORIES specification."));
1767 msg_at (SN, cats_location,
1768 _("To fix the problem, add the missing category to the "
1769 "list of categories here."));
1772 if (pc_cat->pc->hide_source_cats)
1786 for (size_t i = 0; i < 2; i++)
1787 if (e->subs[i] && !ctables_recursive_check_postcompute (
1788 dict, e->subs[i], pc_cat, cats, cats_location))
1797 all_strings (struct variable **vars, size_t n_vars,
1798 const struct ctables_category *cat)
1800 for (size_t j = 0; j < n_vars; j++)
1801 if (var_is_numeric (vars[j]))
1803 msg_at (SE, cat->location,
1804 _("This category specification may be applied only to string "
1805 "variables, but this subcommand tries to apply it to "
1806 "numeric variable %s."),
1807 var_get_name (vars[j]));
1814 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1815 struct ctables *ct, struct ctables_table *t)
1817 if (!lex_match_id (lexer, "VARIABLES"))
1819 lex_match (lexer, T_EQUALS);
1821 struct variable **vars;
1823 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1826 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1827 for (size_t i = 1; i < n_vars; i++)
1829 const struct fmt_spec *f = var_get_print_format (vars[i]);
1830 if (f->type != common_format->type)
1832 common_format = NULL;
1838 && (fmt_get_category (common_format->type)
1839 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1841 struct ctables_categories *c = xmalloc (sizeof *c);
1842 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1843 for (size_t i = 0; i < n_vars; i++)
1845 struct ctables_categories **cp
1846 = &t->categories[var_get_dict_index (vars[i])];
1847 ctables_categories_unref (*cp);
1851 size_t allocated_cats = 0;
1852 int cats_start_ofs = -1;
1853 int cats_end_ofs = -1;
1854 if (lex_match (lexer, T_LBRACK))
1856 cats_start_ofs = lex_ofs (lexer);
1859 if (c->n_cats >= allocated_cats)
1860 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1862 int start_ofs = lex_ofs (lexer);
1863 struct ctables_category *cat = &c->cats[c->n_cats];
1864 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1866 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1869 lex_match (lexer, T_COMMA);
1871 while (!lex_match (lexer, T_RBRACK));
1872 cats_end_ofs = lex_ofs (lexer) - 1;
1875 struct ctables_category cat = {
1877 .include_missing = false,
1878 .sort_ascending = true,
1880 bool show_totals = false;
1881 char *total_label = NULL;
1882 bool totals_before = false;
1883 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1885 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1887 lex_match (lexer, T_EQUALS);
1888 if (lex_match_id (lexer, "A"))
1889 cat.sort_ascending = true;
1890 else if (lex_match_id (lexer, "D"))
1891 cat.sort_ascending = false;
1894 lex_error_expecting (lexer, "A", "D");
1898 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1900 lex_match (lexer, T_EQUALS);
1901 if (lex_match_id (lexer, "VALUE"))
1902 cat.type = CCT_VALUE;
1903 else if (lex_match_id (lexer, "LABEL"))
1904 cat.type = CCT_LABEL;
1907 cat.type = CCT_FUNCTION;
1908 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1911 if (lex_match (lexer, T_LPAREN))
1913 cat.sort_var = parse_variable (lexer, dict);
1917 if (cat.sort_function == CTSF_PTILE)
1919 lex_match (lexer, T_COMMA);
1920 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1922 cat.percentile = lex_number (lexer);
1926 if (!lex_force_match (lexer, T_RPAREN))
1929 else if (ctables_function_availability (cat.sort_function)
1932 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1937 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1939 lex_match (lexer, T_EQUALS);
1940 if (lex_match_id (lexer, "INCLUDE"))
1941 cat.include_missing = true;
1942 else if (lex_match_id (lexer, "EXCLUDE"))
1943 cat.include_missing = false;
1946 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1950 else if (lex_match_id (lexer, "TOTAL"))
1952 lex_match (lexer, T_EQUALS);
1953 if (!parse_bool (lexer, &show_totals))
1956 else if (lex_match_id (lexer, "LABEL"))
1958 lex_match (lexer, T_EQUALS);
1959 if (!lex_force_string (lexer))
1962 total_label = ss_xstrdup (lex_tokss (lexer));
1965 else if (lex_match_id (lexer, "POSITION"))
1967 lex_match (lexer, T_EQUALS);
1968 if (lex_match_id (lexer, "BEFORE"))
1969 totals_before = true;
1970 else if (lex_match_id (lexer, "AFTER"))
1971 totals_before = false;
1974 lex_error_expecting (lexer, "BEFORE", "AFTER");
1978 else if (lex_match_id (lexer, "EMPTY"))
1980 lex_match (lexer, T_EQUALS);
1981 if (lex_match_id (lexer, "INCLUDE"))
1982 c->show_empty = true;
1983 else if (lex_match_id (lexer, "EXCLUDE"))
1984 c->show_empty = false;
1987 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1994 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
1995 "TOTAL", "LABEL", "POSITION", "EMPTY");
1997 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2004 if (c->n_cats >= allocated_cats)
2005 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2006 c->cats[c->n_cats++] = cat;
2011 if (c->n_cats >= allocated_cats)
2012 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2014 struct ctables_category *totals;
2017 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2018 totals = &c->cats[0];
2021 totals = &c->cats[c->n_cats];
2024 *totals = (struct ctables_category) {
2026 .total_label = total_label ? total_label : xstrdup (_("Total")),
2030 struct ctables_category *subtotal = NULL;
2031 for (size_t i = totals_before ? 0 : c->n_cats;
2032 totals_before ? i < c->n_cats : i-- > 0;
2033 totals_before ? i++ : 0)
2035 struct ctables_category *cat = &c->cats[i];
2044 cat->subtotal = subtotal;
2047 case CCT_POSTCOMPUTE:
2058 case CCT_EXCLUDED_MISSING:
2063 if (cats_start_ofs != -1)
2065 for (size_t i = 0; i < c->n_cats; i++)
2067 struct ctables_category *cat = &c->cats[i];
2070 case CCT_POSTCOMPUTE:
2071 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2072 struct msg_location *cats_location
2073 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2074 bool ok = ctables_recursive_check_postcompute (
2075 dict, cat->pc->expr, cat, c, cats_location);
2076 msg_location_destroy (cats_location);
2083 for (size_t j = 0; j < n_vars; j++)
2084 if (var_is_alpha (vars[j]))
2086 msg_at (SE, cat->location,
2087 _("This category specification may be applied "
2088 "only to numeric variables, but this "
2089 "subcommand tries to apply it to string "
2091 var_get_name (vars[j]));
2100 if (!parse_category_string (cat->location, cat->string, dict,
2101 common_format->type, &n))
2104 ss_dealloc (&cat->string);
2106 cat->type = CCT_NUMBER;
2109 else if (!all_strings (vars, n_vars, cat))
2118 if (!cat->srange[0].string)
2120 else if (!parse_category_string (cat->location,
2121 cat->srange[0], dict,
2122 common_format->type, &n[0]))
2125 if (!cat->srange[1].string)
2127 else if (!parse_category_string (cat->location,
2128 cat->srange[1], dict,
2129 common_format->type, &n[1]))
2132 ss_dealloc (&cat->srange[0]);
2133 ss_dealloc (&cat->srange[1]);
2135 cat->type = CCT_NRANGE;
2136 cat->nrange[0] = n[0];
2137 cat->nrange[1] = n[1];
2139 else if (!all_strings (vars, n_vars, cat))
2150 case CCT_EXCLUDED_MISSING:
2165 ctables_nest_uninit (struct ctables_nest *nest)
2168 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2169 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2170 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2171 free (nest->domains[dt]);
2175 ctables_stack_uninit (struct ctables_stack *stack)
2179 for (size_t i = 0; i < stack->n; i++)
2180 ctables_nest_uninit (&stack->nests[i]);
2181 free (stack->nests);
2185 static struct ctables_stack
2186 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2193 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2194 for (size_t i = 0; i < s0.n; i++)
2195 for (size_t j = 0; j < s1.n; j++)
2197 const struct ctables_nest *a = &s0.nests[i];
2198 const struct ctables_nest *b = &s1.nests[j];
2200 size_t allocate = a->n + b->n;
2201 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2203 for (size_t k = 0; k < a->n; k++)
2204 vars[n++] = a->vars[k];
2205 for (size_t k = 0; k < b->n; k++)
2206 vars[n++] = b->vars[k];
2207 assert (n == allocate);
2209 const struct ctables_nest *summary_src;
2210 if (!a->specs[CSV_CELL].var)
2212 else if (!b->specs[CSV_CELL].var)
2217 struct ctables_nest *new = &stack.nests[stack.n++];
2218 *new = (struct ctables_nest) {
2220 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2221 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2225 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2226 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2228 ctables_stack_uninit (&s0);
2229 ctables_stack_uninit (&s1);
2233 static struct ctables_stack
2234 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2236 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2237 for (size_t i = 0; i < s0.n; i++)
2238 stack.nests[stack.n++] = s0.nests[i];
2239 for (size_t i = 0; i < s1.n; i++)
2241 stack.nests[stack.n] = s1.nests[i];
2242 stack.nests[stack.n].group_head += s0.n;
2245 assert (stack.n == s0.n + s1.n);
2251 static struct ctables_stack
2252 var_fts (const struct ctables_axis *a)
2254 struct variable **vars = xmalloc (sizeof *vars);
2257 struct ctables_nest *nest = xmalloc (sizeof *nest);
2258 *nest = (struct ctables_nest) {
2261 .scale_idx = a->scale ? 0 : SIZE_MAX,
2263 if (a->specs[CSV_CELL].n || a->scale)
2264 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2266 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2267 nest->specs[sv].var = a->var;
2268 nest->specs[sv].is_scale = a->scale;
2270 return (struct ctables_stack) { .nests = nest, .n = 1 };
2273 static struct ctables_stack
2274 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2277 return (struct ctables_stack) { .n = 0 };
2285 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2286 enumerate_fts (axis_type, a->subs[1]));
2289 /* This should consider any of the scale variables found in the result to
2290 be linked to each other listwise for SMISSING=LISTWISE. */
2291 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2292 enumerate_fts (axis_type, a->subs[1]));
2298 union ctables_summary
2300 /* COUNT, VALIDN, TOTALN. */
2303 /* MINIMUM, MAXIMUM, RANGE. */
2310 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2311 struct moments1 *moments;
2313 /* MEDIAN, MODE, PTILE. */
2316 struct casewriter *writer;
2321 /* XXX multiple response */
2325 ctables_summary_init (union ctables_summary *s,
2326 const struct ctables_summary_spec *ss)
2328 switch (ss->function)
2332 case CTSF_ROWPCT_COUNT:
2333 case CTSF_COLPCT_COUNT:
2334 case CTSF_TABLEPCT_COUNT:
2335 case CTSF_SUBTABLEPCT_COUNT:
2336 case CTSF_LAYERPCT_COUNT:
2337 case CTSF_LAYERROWPCT_COUNT:
2338 case CTSF_LAYERCOLPCT_COUNT:
2339 case CTSF_ROWPCT_VALIDN:
2340 case CTSF_COLPCT_VALIDN:
2341 case CTSF_TABLEPCT_VALIDN:
2342 case CTSF_SUBTABLEPCT_VALIDN:
2343 case CTSF_LAYERPCT_VALIDN:
2344 case CTSF_LAYERROWPCT_VALIDN:
2345 case CTSF_LAYERCOLPCT_VALIDN:
2346 case CTSF_ROWPCT_TOTALN:
2347 case CTSF_COLPCT_TOTALN:
2348 case CTSF_TABLEPCT_TOTALN:
2349 case CTSF_SUBTABLEPCT_TOTALN:
2350 case CTSF_LAYERPCT_TOTALN:
2351 case CTSF_LAYERROWPCT_TOTALN:
2352 case CTSF_LAYERCOLPCT_TOTALN:
2359 case CTSF_UROWPCT_COUNT:
2360 case CTSF_UCOLPCT_COUNT:
2361 case CTSF_UTABLEPCT_COUNT:
2362 case CTSF_USUBTABLEPCT_COUNT:
2363 case CTSF_ULAYERPCT_COUNT:
2364 case CTSF_ULAYERROWPCT_COUNT:
2365 case CTSF_ULAYERCOLPCT_COUNT:
2366 case CTSF_UROWPCT_VALIDN:
2367 case CTSF_UCOLPCT_VALIDN:
2368 case CTSF_UTABLEPCT_VALIDN:
2369 case CTSF_USUBTABLEPCT_VALIDN:
2370 case CTSF_ULAYERPCT_VALIDN:
2371 case CTSF_ULAYERROWPCT_VALIDN:
2372 case CTSF_ULAYERCOLPCT_VALIDN:
2373 case CTSF_UROWPCT_TOTALN:
2374 case CTSF_UCOLPCT_TOTALN:
2375 case CTSF_UTABLEPCT_TOTALN:
2376 case CTSF_USUBTABLEPCT_TOTALN:
2377 case CTSF_ULAYERPCT_TOTALN:
2378 case CTSF_ULAYERROWPCT_TOTALN:
2379 case CTSF_ULAYERCOLPCT_TOTALN:
2389 case CTSF_SUBTABLE_ID:
2391 case CTSF_LAYERROW_ID:
2392 case CTSF_LAYERCOL_ID:
2398 s->min = s->max = SYSMIS;
2406 case CTSF_ROWPCT_SUM:
2407 case CTSF_COLPCT_SUM:
2408 case CTSF_TABLEPCT_SUM:
2409 case CTSF_SUBTABLEPCT_SUM:
2410 case CTSF_LAYERPCT_SUM:
2411 case CTSF_LAYERROWPCT_SUM:
2412 case CTSF_LAYERCOLPCT_SUM:
2417 case CTSF_UVARIANCE:
2418 case CTSF_UROWPCT_SUM:
2419 case CTSF_UCOLPCT_SUM:
2420 case CTSF_UTABLEPCT_SUM:
2421 case CTSF_USUBTABLEPCT_SUM:
2422 case CTSF_ULAYERPCT_SUM:
2423 case CTSF_ULAYERROWPCT_SUM:
2424 case CTSF_ULAYERCOLPCT_SUM:
2425 s->moments = moments1_create (MOMENT_VARIANCE);
2435 struct caseproto *proto = caseproto_create ();
2436 proto = caseproto_add_width (proto, 0);
2437 proto = caseproto_add_width (proto, 0);
2439 struct subcase ordering;
2440 subcase_init (&ordering, 0, 0, SC_ASCEND);
2441 s->writer = sort_create_writer (&ordering, proto);
2442 subcase_uninit (&ordering);
2443 caseproto_unref (proto);
2453 ctables_summary_uninit (union ctables_summary *s,
2454 const struct ctables_summary_spec *ss)
2456 switch (ss->function)
2460 case CTSF_ROWPCT_COUNT:
2461 case CTSF_COLPCT_COUNT:
2462 case CTSF_TABLEPCT_COUNT:
2463 case CTSF_SUBTABLEPCT_COUNT:
2464 case CTSF_LAYERPCT_COUNT:
2465 case CTSF_LAYERROWPCT_COUNT:
2466 case CTSF_LAYERCOLPCT_COUNT:
2467 case CTSF_ROWPCT_VALIDN:
2468 case CTSF_COLPCT_VALIDN:
2469 case CTSF_TABLEPCT_VALIDN:
2470 case CTSF_SUBTABLEPCT_VALIDN:
2471 case CTSF_LAYERPCT_VALIDN:
2472 case CTSF_LAYERROWPCT_VALIDN:
2473 case CTSF_LAYERCOLPCT_VALIDN:
2474 case CTSF_ROWPCT_TOTALN:
2475 case CTSF_COLPCT_TOTALN:
2476 case CTSF_TABLEPCT_TOTALN:
2477 case CTSF_SUBTABLEPCT_TOTALN:
2478 case CTSF_LAYERPCT_TOTALN:
2479 case CTSF_LAYERROWPCT_TOTALN:
2480 case CTSF_LAYERCOLPCT_TOTALN:
2487 case CTSF_UROWPCT_COUNT:
2488 case CTSF_UCOLPCT_COUNT:
2489 case CTSF_UTABLEPCT_COUNT:
2490 case CTSF_USUBTABLEPCT_COUNT:
2491 case CTSF_ULAYERPCT_COUNT:
2492 case CTSF_ULAYERROWPCT_COUNT:
2493 case CTSF_ULAYERCOLPCT_COUNT:
2494 case CTSF_UROWPCT_VALIDN:
2495 case CTSF_UCOLPCT_VALIDN:
2496 case CTSF_UTABLEPCT_VALIDN:
2497 case CTSF_USUBTABLEPCT_VALIDN:
2498 case CTSF_ULAYERPCT_VALIDN:
2499 case CTSF_ULAYERROWPCT_VALIDN:
2500 case CTSF_ULAYERCOLPCT_VALIDN:
2501 case CTSF_UROWPCT_TOTALN:
2502 case CTSF_UCOLPCT_TOTALN:
2503 case CTSF_UTABLEPCT_TOTALN:
2504 case CTSF_USUBTABLEPCT_TOTALN:
2505 case CTSF_ULAYERPCT_TOTALN:
2506 case CTSF_ULAYERROWPCT_TOTALN:
2507 case CTSF_ULAYERCOLPCT_TOTALN:
2516 case CTSF_SUBTABLE_ID:
2518 case CTSF_LAYERROW_ID:
2519 case CTSF_LAYERCOL_ID:
2532 case CTSF_ROWPCT_SUM:
2533 case CTSF_COLPCT_SUM:
2534 case CTSF_TABLEPCT_SUM:
2535 case CTSF_SUBTABLEPCT_SUM:
2536 case CTSF_LAYERPCT_SUM:
2537 case CTSF_LAYERROWPCT_SUM:
2538 case CTSF_LAYERCOLPCT_SUM:
2543 case CTSF_UVARIANCE:
2544 case CTSF_UROWPCT_SUM:
2545 case CTSF_UCOLPCT_SUM:
2546 case CTSF_UTABLEPCT_SUM:
2547 case CTSF_USUBTABLEPCT_SUM:
2548 case CTSF_ULAYERPCT_SUM:
2549 case CTSF_ULAYERROWPCT_SUM:
2550 case CTSF_ULAYERCOLPCT_SUM:
2551 moments1_destroy (s->moments);
2560 casewriter_destroy (s->writer);
2566 ctables_summary_add (union ctables_summary *s,
2567 const struct ctables_summary_spec *ss,
2568 const struct variable *var, const union value *value,
2569 bool is_scale, bool is_scale_missing,
2570 bool is_missing, bool excluded_missing,
2571 double d_weight, double e_weight)
2573 /* To determine whether a case is included in a given table for a particular
2574 kind of summary, consider the following charts for each variable in the
2575 table. Only if "yes" appears for every variable for the summary is the
2578 Categorical variables: VALIDN COUNT TOTALN
2579 Valid values in included categories yes yes yes
2580 Missing values in included categories --- yes yes
2581 Missing values in excluded categories --- --- yes
2582 Valid values in excluded categories --- --- ---
2584 Scale variables: VALIDN COUNT TOTALN
2585 Valid value yes yes yes
2586 Missing value --- yes yes
2588 Missing values include both user- and system-missing. (The system-missing
2589 value is always in an excluded category.)
2591 switch (ss->function)
2594 case CTSF_ROWPCT_TOTALN:
2595 case CTSF_COLPCT_TOTALN:
2596 case CTSF_TABLEPCT_TOTALN:
2597 case CTSF_SUBTABLEPCT_TOTALN:
2598 case CTSF_LAYERPCT_TOTALN:
2599 case CTSF_LAYERROWPCT_TOTALN:
2600 case CTSF_LAYERCOLPCT_TOTALN:
2601 s->count += d_weight;
2605 case CTSF_UROWPCT_TOTALN:
2606 case CTSF_UCOLPCT_TOTALN:
2607 case CTSF_UTABLEPCT_TOTALN:
2608 case CTSF_USUBTABLEPCT_TOTALN:
2609 case CTSF_ULAYERPCT_TOTALN:
2610 case CTSF_ULAYERROWPCT_TOTALN:
2611 case CTSF_ULAYERCOLPCT_TOTALN:
2616 case CTSF_ROWPCT_COUNT:
2617 case CTSF_COLPCT_COUNT:
2618 case CTSF_TABLEPCT_COUNT:
2619 case CTSF_SUBTABLEPCT_COUNT:
2620 case CTSF_LAYERPCT_COUNT:
2621 case CTSF_LAYERROWPCT_COUNT:
2622 case CTSF_LAYERCOLPCT_COUNT:
2623 if (is_scale || !excluded_missing)
2624 s->count += d_weight;
2628 case CTSF_UROWPCT_COUNT:
2629 case CTSF_UCOLPCT_COUNT:
2630 case CTSF_UTABLEPCT_COUNT:
2631 case CTSF_USUBTABLEPCT_COUNT:
2632 case CTSF_ULAYERPCT_COUNT:
2633 case CTSF_ULAYERROWPCT_COUNT:
2634 case CTSF_ULAYERCOLPCT_COUNT:
2635 if (is_scale || !excluded_missing)
2640 case CTSF_ROWPCT_VALIDN:
2641 case CTSF_COLPCT_VALIDN:
2642 case CTSF_TABLEPCT_VALIDN:
2643 case CTSF_SUBTABLEPCT_VALIDN:
2644 case CTSF_LAYERPCT_VALIDN:
2645 case CTSF_LAYERROWPCT_VALIDN:
2646 case CTSF_LAYERCOLPCT_VALIDN:
2650 s->count += d_weight;
2654 case CTSF_UROWPCT_VALIDN:
2655 case CTSF_UCOLPCT_VALIDN:
2656 case CTSF_UTABLEPCT_VALIDN:
2657 case CTSF_USUBTABLEPCT_VALIDN:
2658 case CTSF_ULAYERPCT_VALIDN:
2659 case CTSF_ULAYERROWPCT_VALIDN:
2660 case CTSF_ULAYERCOLPCT_VALIDN:
2670 case CTSF_SUBTABLE_ID:
2672 case CTSF_LAYERROW_ID:
2673 case CTSF_LAYERCOL_ID:
2678 s->count += d_weight;
2687 if (is_scale || !excluded_missing)
2688 s->count += e_weight;
2695 s->count += e_weight;
2699 s->count += e_weight;
2705 if (!is_scale_missing)
2707 assert (!var_is_alpha (var)); /* XXX? */
2708 if (s->min == SYSMIS || value->f < s->min)
2710 if (s->max == SYSMIS || value->f > s->max)
2720 case CTSF_ROWPCT_SUM:
2721 case CTSF_COLPCT_SUM:
2722 case CTSF_TABLEPCT_SUM:
2723 case CTSF_SUBTABLEPCT_SUM:
2724 case CTSF_LAYERPCT_SUM:
2725 case CTSF_LAYERROWPCT_SUM:
2726 case CTSF_LAYERCOLPCT_SUM:
2727 if (!is_scale_missing)
2728 moments1_add (s->moments, value->f, e_weight);
2735 case CTSF_UVARIANCE:
2736 case CTSF_UROWPCT_SUM:
2737 case CTSF_UCOLPCT_SUM:
2738 case CTSF_UTABLEPCT_SUM:
2739 case CTSF_USUBTABLEPCT_SUM:
2740 case CTSF_ULAYERPCT_SUM:
2741 case CTSF_ULAYERROWPCT_SUM:
2742 case CTSF_ULAYERCOLPCT_SUM:
2743 if (!is_scale_missing)
2744 moments1_add (s->moments, value->f, 1.0);
2750 d_weight = e_weight = 1.0;
2755 if (!is_scale_missing)
2757 s->ovalid += e_weight;
2759 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2760 *case_num_rw_idx (c, 0) = value->f;
2761 *case_num_rw_idx (c, 1) = e_weight;
2762 casewriter_write (s->writer, c);
2768 static enum ctables_domain_type
2769 ctables_function_domain (enum ctables_summary_function function)
2799 case CTSF_UVARIANCE:
2805 case CTSF_COLPCT_COUNT:
2806 case CTSF_COLPCT_SUM:
2807 case CTSF_COLPCT_TOTALN:
2808 case CTSF_COLPCT_VALIDN:
2809 case CTSF_UCOLPCT_COUNT:
2810 case CTSF_UCOLPCT_SUM:
2811 case CTSF_UCOLPCT_TOTALN:
2812 case CTSF_UCOLPCT_VALIDN:
2816 case CTSF_LAYERCOLPCT_COUNT:
2817 case CTSF_LAYERCOLPCT_SUM:
2818 case CTSF_LAYERCOLPCT_TOTALN:
2819 case CTSF_LAYERCOLPCT_VALIDN:
2820 case CTSF_ULAYERCOLPCT_COUNT:
2821 case CTSF_ULAYERCOLPCT_SUM:
2822 case CTSF_ULAYERCOLPCT_TOTALN:
2823 case CTSF_ULAYERCOLPCT_VALIDN:
2824 case CTSF_LAYERCOL_ID:
2825 return CTDT_LAYERCOL;
2827 case CTSF_LAYERPCT_COUNT:
2828 case CTSF_LAYERPCT_SUM:
2829 case CTSF_LAYERPCT_TOTALN:
2830 case CTSF_LAYERPCT_VALIDN:
2831 case CTSF_ULAYERPCT_COUNT:
2832 case CTSF_ULAYERPCT_SUM:
2833 case CTSF_ULAYERPCT_TOTALN:
2834 case CTSF_ULAYERPCT_VALIDN:
2838 case CTSF_LAYERROWPCT_COUNT:
2839 case CTSF_LAYERROWPCT_SUM:
2840 case CTSF_LAYERROWPCT_TOTALN:
2841 case CTSF_LAYERROWPCT_VALIDN:
2842 case CTSF_ULAYERROWPCT_COUNT:
2843 case CTSF_ULAYERROWPCT_SUM:
2844 case CTSF_ULAYERROWPCT_TOTALN:
2845 case CTSF_ULAYERROWPCT_VALIDN:
2846 case CTSF_LAYERROW_ID:
2847 return CTDT_LAYERROW;
2849 case CTSF_ROWPCT_COUNT:
2850 case CTSF_ROWPCT_SUM:
2851 case CTSF_ROWPCT_TOTALN:
2852 case CTSF_ROWPCT_VALIDN:
2853 case CTSF_UROWPCT_COUNT:
2854 case CTSF_UROWPCT_SUM:
2855 case CTSF_UROWPCT_TOTALN:
2856 case CTSF_UROWPCT_VALIDN:
2860 case CTSF_SUBTABLEPCT_COUNT:
2861 case CTSF_SUBTABLEPCT_SUM:
2862 case CTSF_SUBTABLEPCT_TOTALN:
2863 case CTSF_SUBTABLEPCT_VALIDN:
2864 case CTSF_USUBTABLEPCT_COUNT:
2865 case CTSF_USUBTABLEPCT_SUM:
2866 case CTSF_USUBTABLEPCT_TOTALN:
2867 case CTSF_USUBTABLEPCT_VALIDN:
2868 case CTSF_SUBTABLE_ID:
2869 return CTDT_SUBTABLE;
2871 case CTSF_TABLEPCT_COUNT:
2872 case CTSF_TABLEPCT_SUM:
2873 case CTSF_TABLEPCT_TOTALN:
2874 case CTSF_TABLEPCT_VALIDN:
2875 case CTSF_UTABLEPCT_COUNT:
2876 case CTSF_UTABLEPCT_SUM:
2877 case CTSF_UTABLEPCT_TOTALN:
2878 case CTSF_UTABLEPCT_VALIDN:
2886 static enum ctables_domain_type
2887 ctables_function_is_pctsum (enum ctables_summary_function function)
2917 case CTSF_UVARIANCE:
2921 case CTSF_COLPCT_COUNT:
2922 case CTSF_COLPCT_TOTALN:
2923 case CTSF_COLPCT_VALIDN:
2924 case CTSF_UCOLPCT_COUNT:
2925 case CTSF_UCOLPCT_TOTALN:
2926 case CTSF_UCOLPCT_VALIDN:
2927 case CTSF_LAYERCOLPCT_COUNT:
2928 case CTSF_LAYERCOLPCT_TOTALN:
2929 case CTSF_LAYERCOLPCT_VALIDN:
2930 case CTSF_ULAYERCOLPCT_COUNT:
2931 case CTSF_ULAYERCOLPCT_TOTALN:
2932 case CTSF_ULAYERCOLPCT_VALIDN:
2933 case CTSF_LAYERPCT_COUNT:
2934 case CTSF_LAYERPCT_TOTALN:
2935 case CTSF_LAYERPCT_VALIDN:
2936 case CTSF_ULAYERPCT_COUNT:
2937 case CTSF_ULAYERPCT_TOTALN:
2938 case CTSF_ULAYERPCT_VALIDN:
2939 case CTSF_LAYERROWPCT_COUNT:
2940 case CTSF_LAYERROWPCT_TOTALN:
2941 case CTSF_LAYERROWPCT_VALIDN:
2942 case CTSF_ULAYERROWPCT_COUNT:
2943 case CTSF_ULAYERROWPCT_TOTALN:
2944 case CTSF_ULAYERROWPCT_VALIDN:
2945 case CTSF_ROWPCT_COUNT:
2946 case CTSF_ROWPCT_TOTALN:
2947 case CTSF_ROWPCT_VALIDN:
2948 case CTSF_UROWPCT_COUNT:
2949 case CTSF_UROWPCT_TOTALN:
2950 case CTSF_UROWPCT_VALIDN:
2951 case CTSF_SUBTABLEPCT_COUNT:
2952 case CTSF_SUBTABLEPCT_TOTALN:
2953 case CTSF_SUBTABLEPCT_VALIDN:
2954 case CTSF_USUBTABLEPCT_COUNT:
2955 case CTSF_USUBTABLEPCT_TOTALN:
2956 case CTSF_USUBTABLEPCT_VALIDN:
2957 case CTSF_TABLEPCT_COUNT:
2958 case CTSF_TABLEPCT_TOTALN:
2959 case CTSF_TABLEPCT_VALIDN:
2960 case CTSF_UTABLEPCT_COUNT:
2961 case CTSF_UTABLEPCT_TOTALN:
2962 case CTSF_UTABLEPCT_VALIDN:
2966 case CTSF_SUBTABLE_ID:
2968 case CTSF_LAYERROW_ID:
2969 case CTSF_LAYERCOL_ID:
2972 case CTSF_COLPCT_SUM:
2973 case CTSF_UCOLPCT_SUM:
2974 case CTSF_LAYERCOLPCT_SUM:
2975 case CTSF_ULAYERCOLPCT_SUM:
2976 case CTSF_LAYERPCT_SUM:
2977 case CTSF_ULAYERPCT_SUM:
2978 case CTSF_LAYERROWPCT_SUM:
2979 case CTSF_ULAYERROWPCT_SUM:
2980 case CTSF_ROWPCT_SUM:
2981 case CTSF_UROWPCT_SUM:
2982 case CTSF_SUBTABLEPCT_SUM:
2983 case CTSF_USUBTABLEPCT_SUM:
2984 case CTSF_TABLEPCT_SUM:
2985 case CTSF_UTABLEPCT_SUM:
2993 ctables_summary_value (const struct ctables_cell *cell,
2994 union ctables_summary *s,
2995 const struct ctables_summary_spec *ss)
2997 switch (ss->function)
3007 case CTSF_SUBTABLE_ID:
3009 case CTSF_LAYERROW_ID:
3010 case CTSF_LAYERCOL_ID:
3011 return cell->domains[ctables_function_domain (ss->function)]->sequence;
3013 case CTSF_ROWPCT_COUNT:
3014 case CTSF_COLPCT_COUNT:
3015 case CTSF_TABLEPCT_COUNT:
3016 case CTSF_SUBTABLEPCT_COUNT:
3017 case CTSF_LAYERPCT_COUNT:
3018 case CTSF_LAYERROWPCT_COUNT:
3019 case CTSF_LAYERCOLPCT_COUNT:
3021 enum ctables_domain_type d = ctables_function_domain (ss->function);
3022 return (cell->domains[d]->e_count
3023 ? s->count / cell->domains[d]->e_count * 100
3027 case CTSF_UROWPCT_COUNT:
3028 case CTSF_UCOLPCT_COUNT:
3029 case CTSF_UTABLEPCT_COUNT:
3030 case CTSF_USUBTABLEPCT_COUNT:
3031 case CTSF_ULAYERPCT_COUNT:
3032 case CTSF_ULAYERROWPCT_COUNT:
3033 case CTSF_ULAYERCOLPCT_COUNT:
3035 enum ctables_domain_type d = ctables_function_domain (ss->function);
3036 return (cell->domains[d]->u_count
3037 ? s->count / cell->domains[d]->u_count * 100
3041 case CTSF_ROWPCT_VALIDN:
3042 case CTSF_COLPCT_VALIDN:
3043 case CTSF_TABLEPCT_VALIDN:
3044 case CTSF_SUBTABLEPCT_VALIDN:
3045 case CTSF_LAYERPCT_VALIDN:
3046 case CTSF_LAYERROWPCT_VALIDN:
3047 case CTSF_LAYERCOLPCT_VALIDN:
3049 enum ctables_domain_type d = ctables_function_domain (ss->function);
3050 return (cell->domains[d]->e_valid
3051 ? s->count / cell->domains[d]->e_valid * 100
3055 case CTSF_UROWPCT_VALIDN:
3056 case CTSF_UCOLPCT_VALIDN:
3057 case CTSF_UTABLEPCT_VALIDN:
3058 case CTSF_USUBTABLEPCT_VALIDN:
3059 case CTSF_ULAYERPCT_VALIDN:
3060 case CTSF_ULAYERROWPCT_VALIDN:
3061 case CTSF_ULAYERCOLPCT_VALIDN:
3063 enum ctables_domain_type d = ctables_function_domain (ss->function);
3064 return (cell->domains[d]->u_valid
3065 ? s->count / cell->domains[d]->u_valid * 100
3069 case CTSF_ROWPCT_TOTALN:
3070 case CTSF_COLPCT_TOTALN:
3071 case CTSF_TABLEPCT_TOTALN:
3072 case CTSF_SUBTABLEPCT_TOTALN:
3073 case CTSF_LAYERPCT_TOTALN:
3074 case CTSF_LAYERROWPCT_TOTALN:
3075 case CTSF_LAYERCOLPCT_TOTALN:
3077 enum ctables_domain_type d = ctables_function_domain (ss->function);
3078 return (cell->domains[d]->e_total
3079 ? s->count / cell->domains[d]->e_total * 100
3083 case CTSF_UROWPCT_TOTALN:
3084 case CTSF_UCOLPCT_TOTALN:
3085 case CTSF_UTABLEPCT_TOTALN:
3086 case CTSF_USUBTABLEPCT_TOTALN:
3087 case CTSF_ULAYERPCT_TOTALN:
3088 case CTSF_ULAYERROWPCT_TOTALN:
3089 case CTSF_ULAYERCOLPCT_TOTALN:
3091 enum ctables_domain_type d = ctables_function_domain (ss->function);
3092 return (cell->domains[d]->u_total
3093 ? s->count / cell->domains[d]->u_total * 100
3114 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3120 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3127 double weight, variance;
3128 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3129 return calc_semean (variance, weight);
3136 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3137 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3143 double weight, mean;
3144 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3145 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3149 case CTSF_UVARIANCE:
3152 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3156 case CTSF_ROWPCT_SUM:
3157 case CTSF_COLPCT_SUM:
3158 case CTSF_TABLEPCT_SUM:
3159 case CTSF_SUBTABLEPCT_SUM:
3160 case CTSF_LAYERPCT_SUM:
3161 case CTSF_LAYERROWPCT_SUM:
3162 case CTSF_LAYERCOLPCT_SUM:
3164 double weight, mean;
3165 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3166 if (weight == SYSMIS || mean == SYSMIS)
3168 enum ctables_domain_type d = ctables_function_domain (ss->function);
3169 double num = weight * mean;
3170 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3171 return denom != 0 ? num / denom * 100 : SYSMIS;
3173 case CTSF_UROWPCT_SUM:
3174 case CTSF_UCOLPCT_SUM:
3175 case CTSF_UTABLEPCT_SUM:
3176 case CTSF_USUBTABLEPCT_SUM:
3177 case CTSF_ULAYERPCT_SUM:
3178 case CTSF_ULAYERROWPCT_SUM:
3179 case CTSF_ULAYERCOLPCT_SUM:
3181 double weight, mean;
3182 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3183 if (weight == SYSMIS || mean == SYSMIS)
3185 enum ctables_domain_type d = ctables_function_domain (ss->function);
3186 double num = weight * mean;
3187 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3188 return denom != 0 ? num / denom * 100 : SYSMIS;
3197 struct casereader *reader = casewriter_make_reader (s->writer);
3200 struct percentile *ptile = percentile_create (
3201 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3202 struct order_stats *os = &ptile->parent;
3203 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3204 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3205 statistic_destroy (&ptile->parent.parent);
3213 struct casereader *reader = casewriter_make_reader (s->writer);
3216 struct mode *mode = mode_create ();
3217 struct order_stats *os = &mode->parent;
3218 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3219 s->ovalue = mode->mode;
3220 statistic_destroy (&mode->parent.parent);
3228 struct ctables_cell_sort_aux
3230 const struct ctables_nest *nest;
3231 enum pivot_axis_type a;
3235 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3237 const struct ctables_cell_sort_aux *aux = aux_;
3238 struct ctables_cell *const *ap = a_;
3239 struct ctables_cell *const *bp = b_;
3240 const struct ctables_cell *a = *ap;
3241 const struct ctables_cell *b = *bp;
3243 const struct ctables_nest *nest = aux->nest;
3244 for (size_t i = 0; i < nest->n; i++)
3245 if (i != nest->scale_idx)
3247 const struct variable *var = nest->vars[i];
3248 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3249 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3250 if (a_cv->category != b_cv->category)
3251 return a_cv->category > b_cv->category ? 1 : -1;
3253 const union value *a_val = &a_cv->value;
3254 const union value *b_val = &b_cv->value;
3255 switch (a_cv->category->type)
3261 case CCT_POSTCOMPUTE:
3262 case CCT_EXCLUDED_MISSING:
3263 /* Must be equal. */
3271 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3279 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3281 return a_cv->category->sort_ascending ? cmp : -cmp;
3287 const char *a_label = var_lookup_value_label (var, a_val);
3288 const char *b_label = var_lookup_value_label (var, b_val);
3294 cmp = strcmp (a_label, b_label);
3300 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3303 return a_cv->category->sort_ascending ? cmp : -cmp;
3315 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3316 const void *aux UNUSED)
3318 struct ctables_cell *const *ap = a_;
3319 struct ctables_cell *const *bp = b_;
3320 const struct ctables_cell *a = *ap;
3321 const struct ctables_cell *b = *bp;
3323 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3325 int al = a->axes[axis].leaf;
3326 int bl = b->axes[axis].leaf;
3328 return al > bl ? 1 : -1;
3336 For each ctables_table:
3337 For each combination of row vars:
3338 For each combination of column vars:
3339 For each combination of layer vars:
3341 Make a table of row values:
3342 Sort entries by row values
3343 Assign a 0-based index to each actual value
3344 Construct a dimension
3345 Make a table of column values
3346 Make a table of layer values
3348 Fill the table entry using the indexes from before.
3351 static struct ctables_domain *
3352 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3353 enum ctables_domain_type domain)
3356 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3358 const struct ctables_nest *nest = s->nests[a];
3359 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3361 size_t v_idx = nest->domains[domain][i];
3362 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3363 hash = hash_pointer (cv->category, hash);
3364 if (cv->category->type != CCT_TOTAL
3365 && cv->category->type != CCT_SUBTOTAL
3366 && cv->category->type != CCT_POSTCOMPUTE)
3367 hash = value_hash (&cv->value,
3368 var_get_width (nest->vars[v_idx]), hash);
3372 struct ctables_domain *d;
3373 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3375 const struct ctables_cell *df = d->example;
3376 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3378 const struct ctables_nest *nest = s->nests[a];
3379 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3381 size_t v_idx = nest->domains[domain][i];
3382 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3383 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3384 if (cv1->category != cv2->category
3385 || (cv1->category->type != CCT_TOTAL
3386 && cv1->category->type != CCT_SUBTOTAL
3387 && cv1->category->type != CCT_POSTCOMPUTE
3388 && !value_equal (&cv1->value, &cv2->value,
3389 var_get_width (nest->vars[v_idx]))))
3398 struct ctables_sum *sums = (s->table->n_sum_vars
3399 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3402 d = xmalloc (sizeof *d);
3403 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3404 hmap_insert (&s->domains[domain], &d->node, hash);
3408 static struct substring
3409 rtrim_value (const union value *v, const struct variable *var)
3411 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3412 var_get_width (var));
3413 ss_rtrim (&s, ss_cstr (" "));
3418 in_string_range (const union value *v, const struct variable *var,
3419 const struct substring *srange)
3421 struct substring s = rtrim_value (v, var);
3422 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3423 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3426 static const struct ctables_category *
3427 ctables_categories_match (const struct ctables_categories *c,
3428 const union value *v, const struct variable *var)
3430 if (var_is_numeric (var) && v->f == SYSMIS)
3433 const struct ctables_category *othernm = NULL;
3434 for (size_t i = c->n_cats; i-- > 0; )
3436 const struct ctables_category *cat = &c->cats[i];
3440 if (cat->number == v->f)
3445 if (ss_equals (cat->string, rtrim_value (v, var)))
3450 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3451 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3456 if (in_string_range (v, var, cat->srange))
3461 if (var_is_value_missing (var, v))
3465 case CCT_POSTCOMPUTE:
3480 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3483 case CCT_EXCLUDED_MISSING:
3488 return var_is_value_missing (var, v) ? NULL : othernm;
3491 static const struct ctables_category *
3492 ctables_categories_total (const struct ctables_categories *c)
3494 const struct ctables_category *first = &c->cats[0];
3495 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3496 return (first->type == CCT_TOTAL ? first
3497 : last->type == CCT_TOTAL ? last
3501 static struct ctables_cell *
3502 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3503 const struct ctables_category *cats[PIVOT_N_AXES][10])
3506 enum ctables_summary_variant sv = CSV_CELL;
3507 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3509 const struct ctables_nest *nest = s->nests[a];
3510 for (size_t i = 0; i < nest->n; i++)
3511 if (i != nest->scale_idx)
3513 hash = hash_pointer (cats[a][i], hash);
3514 if (cats[a][i]->type != CCT_TOTAL
3515 && cats[a][i]->type != CCT_SUBTOTAL
3516 && cats[a][i]->type != CCT_POSTCOMPUTE)
3517 hash = value_hash (case_data (c, nest->vars[i]),
3518 var_get_width (nest->vars[i]), hash);
3524 struct ctables_cell *cell;
3525 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3527 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3529 const struct ctables_nest *nest = s->nests[a];
3530 for (size_t i = 0; i < nest->n; i++)
3531 if (i != nest->scale_idx
3532 && (cats[a][i] != cell->axes[a].cvs[i].category
3533 || (cats[a][i]->type != CCT_TOTAL
3534 && cats[a][i]->type != CCT_SUBTOTAL
3535 && cats[a][i]->type != CCT_POSTCOMPUTE
3536 && !value_equal (case_data (c, nest->vars[i]),
3537 &cell->axes[a].cvs[i].value,
3538 var_get_width (nest->vars[i])))))
3547 cell = xmalloc (sizeof *cell);
3550 cell->omit_domains = 0;
3551 cell->postcompute = false;
3552 //struct string name = DS_EMPTY_INITIALIZER;
3553 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3555 const struct ctables_nest *nest = s->nests[a];
3556 cell->axes[a].cvs = (nest->n
3557 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3559 for (size_t i = 0; i < nest->n; i++)
3561 const struct ctables_category *cat = cats[a][i];
3562 const struct variable *var = nest->vars[i];
3563 const union value *value = case_data (c, var);
3564 if (i != nest->scale_idx)
3566 const struct ctables_category *subtotal = cat->subtotal;
3567 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3570 if (cat->type == CCT_TOTAL
3571 || cat->type == CCT_SUBTOTAL
3572 || cat->type == CCT_POSTCOMPUTE)
3574 /* XXX these should be more encompassing I think.*/
3578 case PIVOT_AXIS_COLUMN:
3579 cell->omit_domains |= ((1u << CTDT_TABLE) |
3580 (1u << CTDT_LAYER) |
3581 (1u << CTDT_LAYERCOL) |
3582 (1u << CTDT_SUBTABLE) |
3585 case PIVOT_AXIS_ROW:
3586 cell->omit_domains |= ((1u << CTDT_TABLE) |
3587 (1u << CTDT_LAYER) |
3588 (1u << CTDT_LAYERROW) |
3589 (1u << CTDT_SUBTABLE) |
3592 case PIVOT_AXIS_LAYER:
3593 cell->omit_domains |= ((1u << CTDT_TABLE) |
3594 (1u << CTDT_LAYER));
3598 if (cat->type == CCT_POSTCOMPUTE)
3599 cell->postcompute = true;
3602 cell->axes[a].cvs[i].category = cat;
3603 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3606 if (i != nest->scale_idx)
3608 if (!ds_is_empty (&name))
3609 ds_put_cstr (&name, ", ");
3610 char *value_s = data_out (value, var_get_encoding (var),
3611 var_get_print_format (var),
3612 settings_get_fmt_settings ());
3613 if (cat->type == CCT_TOTAL
3614 || cat->type == CCT_SUBTOTAL
3615 || cat->type == CCT_POSTCOMPUTE)
3616 ds_put_format (&name, "%s=total", var_get_name (var));
3618 ds_put_format (&name, "%s=%s", var_get_name (var),
3619 value_s + strspn (value_s, " "));
3625 //cell->name = ds_steal_cstr (&name);
3627 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3628 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3629 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3630 for (size_t i = 0; i < specs->n; i++)
3631 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3632 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3633 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3634 hmap_insert (&s->cells, &cell->node, hash);
3639 is_scale_missing (const struct ctables_summary_spec_set *specs,
3640 const struct ccase *c)
3642 if (!specs->is_scale)
3645 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3648 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3650 const struct variable *var = specs->listwise_vars[i];
3651 if (var_is_num_missing (var, case_num (c, var)))
3659 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3660 const struct ctables_category *cats[PIVOT_N_AXES][10],
3661 bool is_missing, bool excluded_missing,
3662 double d_weight, double e_weight)
3664 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3665 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3667 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3669 bool scale_missing = is_scale_missing (specs, c);
3670 for (size_t i = 0; i < specs->n; i++)
3671 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3672 specs->var, case_data (c, specs->var), specs->is_scale,
3673 scale_missing, is_missing, excluded_missing,
3674 d_weight, e_weight);
3675 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3676 if (!(cell->omit_domains && (1u << dt)))
3678 struct ctables_domain *d = cell->domains[dt];
3679 d->d_total += d_weight;
3680 d->e_total += e_weight;
3682 if (!excluded_missing)
3684 d->d_count += d_weight;
3685 d->e_count += e_weight;
3690 d->d_valid += d_weight;
3691 d->e_valid += e_weight;
3694 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3696 /* XXX listwise_missing??? */
3697 const struct variable *var = s->table->sum_vars[i];
3698 double addend = case_num (c, var);
3699 if (!var_is_num_missing (var, addend))
3701 struct ctables_sum *sum = &d->sums[i];
3702 sum->e_sum += addend * e_weight;
3703 sum->u_sum += addend;
3711 recurse_totals (struct ctables_section *s, const struct ccase *c,
3712 const struct ctables_category *cats[PIVOT_N_AXES][10],
3713 bool is_missing, bool excluded_missing,
3714 double d_weight, double e_weight,
3715 enum pivot_axis_type start_axis, size_t start_nest)
3717 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3719 const struct ctables_nest *nest = s->nests[a];
3720 for (size_t i = start_nest; i < nest->n; i++)
3722 if (i == nest->scale_idx)
3725 const struct variable *var = nest->vars[i];
3727 const struct ctables_category *total = ctables_categories_total (
3728 s->table->categories[var_get_dict_index (var)]);
3731 const struct ctables_category *save = cats[a][i];
3733 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3734 d_weight, e_weight);
3735 recurse_totals (s, c, cats, is_missing, excluded_missing,
3736 d_weight, e_weight, a, i + 1);
3745 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3746 const struct ctables_category *cats[PIVOT_N_AXES][10],
3747 bool is_missing, bool excluded_missing,
3748 double d_weight, double e_weight,
3749 enum pivot_axis_type start_axis, size_t start_nest)
3751 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3753 const struct ctables_nest *nest = s->nests[a];
3754 for (size_t i = start_nest; i < nest->n; i++)
3756 if (i == nest->scale_idx)
3759 const struct ctables_category *save = cats[a][i];
3762 cats[a][i] = save->subtotal;
3763 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3764 d_weight, e_weight);
3765 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3766 d_weight, e_weight, a, i + 1);
3775 ctables_add_occurrence (const struct variable *var,
3776 const union value *value,
3777 struct hmap *occurrences)
3779 int width = var_get_width (var);
3780 unsigned int hash = value_hash (value, width, 0);
3782 struct ctables_occurrence *o;
3783 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3785 if (value_equal (value, &o->value, width))
3788 o = xmalloc (sizeof *o);
3789 value_clone (&o->value, value, width);
3790 hmap_insert (occurrences, &o->node, hash);
3794 ctables_cell_insert (struct ctables_section *s,
3795 const struct ccase *c,
3796 double d_weight, double e_weight)
3798 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3800 /* Does at least one categorical variable have a missing value in an included
3801 or excluded category? */
3802 bool is_missing = false;
3804 /* Does at least one categorical variable have a missing value in an excluded
3806 bool excluded_missing = false;
3808 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3810 const struct ctables_nest *nest = s->nests[a];
3811 for (size_t i = 0; i < nest->n; i++)
3813 if (i == nest->scale_idx)
3816 const struct variable *var = nest->vars[i];
3817 const union value *value = case_data (c, var);
3819 bool var_missing = var_is_value_missing (var, value) != 0;
3823 cats[a][i] = ctables_categories_match (
3824 s->table->categories[var_get_dict_index (var)], value, var);
3830 static const struct ctables_category cct_excluded_missing = {
3831 .type = CCT_EXCLUDED_MISSING,
3834 cats[a][i] = &cct_excluded_missing;
3835 excluded_missing = true;
3840 if (!excluded_missing)
3841 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3843 const struct ctables_nest *nest = s->nests[a];
3844 for (size_t i = 0; i < nest->n; i++)
3845 if (i != nest->scale_idx)
3847 const struct variable *var = nest->vars[i];
3848 const union value *value = case_data (c, var);
3849 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3853 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3854 d_weight, e_weight);
3856 //if (!excluded_missing)
3858 recurse_totals (s, c, cats, is_missing, excluded_missing,
3859 d_weight, e_weight, 0, 0);
3860 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3861 d_weight, e_weight, 0, 0);
3867 const struct ctables_summary_spec_set *set;
3872 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3874 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3875 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3876 if (as->function != bs->function)
3877 return as->function > bs->function ? 1 : -1;
3878 else if (as->percentile != bs->percentile)
3879 return as->percentile < bs->percentile ? 1 : -1;
3881 const char *as_label = as->label ? as->label : "";
3882 const char *bs_label = bs->label ? bs->label : "";
3883 return strcmp (as_label, bs_label);
3886 static struct pivot_value *
3887 ctables_category_create_label__ (const struct ctables_category *cat,
3888 const struct variable *var,
3889 const union value *value)
3891 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3892 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3893 : pivot_value_new_var_value (var, value));
3896 static struct pivot_value *
3897 ctables_postcompute_label (const struct ctables_categories *cats,
3898 const struct ctables_category *cat,
3899 const struct variable *var,
3900 const union value *value)
3902 struct substring in = ss_cstr (cat->pc->label);
3903 struct substring target = ss_cstr (")LABEL[");
3905 struct string out = DS_EMPTY_INITIALIZER;
3908 size_t chunk = ss_find_substring (in, target);
3909 if (chunk == SIZE_MAX)
3911 if (ds_is_empty (&out))
3912 return pivot_value_new_user_text (in.string, in.length);
3915 ds_put_substring (&out, in);
3916 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3920 ds_put_substring (&out, ss_head (in, chunk));
3921 ss_advance (&in, chunk + target.length);
3923 struct substring idx_s;
3924 if (!ss_get_until (&in, ']', &idx_s))
3927 long int idx = strtol (idx_s.string, &tail, 10);
3928 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3931 struct ctables_category *cat2 = &cats->cats[idx - 1];
3932 struct pivot_value *label2
3933 = ctables_category_create_label__ (cat2, var, value);
3934 char *label2_s = pivot_value_to_string_defaults (label2);
3935 ds_put_cstr (&out, label2_s);
3937 pivot_value_destroy (label2);
3942 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3945 static struct pivot_value *
3946 ctables_category_create_label (const struct ctables_categories *cats,
3947 const struct ctables_category *cat,
3948 const struct variable *var,
3949 const union value *value)
3951 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3952 ? ctables_postcompute_label (cats, cat, var, value)
3953 : ctables_category_create_label__ (cat, var, value));
3956 static struct ctables_value *
3957 ctables_value_find__ (struct ctables_table *t, const union value *value,
3958 int width, unsigned int hash)
3960 struct ctables_value *clv;
3961 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3962 hash, &t->clabels_values_map)
3963 if (value_equal (value, &clv->value, width))
3969 ctables_value_insert (struct ctables_table *t, const union value *value,
3972 unsigned int hash = value_hash (value, width, 0);
3973 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3976 clv = xmalloc (sizeof *clv);
3977 value_clone (&clv->value, value, width);
3978 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3982 static struct ctables_value *
3983 ctables_value_find (struct ctables_table *t,
3984 const union value *value, int width)
3986 return ctables_value_find__ (t, value, width,
3987 value_hash (value, width, 0));
3991 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3992 size_t ix[PIVOT_N_AXES])
3994 if (a < PIVOT_N_AXES)
3996 size_t limit = MAX (t->stacks[a].n, 1);
3997 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3998 ctables_table_add_section (t, a + 1, ix);
4002 struct ctables_section *s = &t->sections[t->n_sections++];
4003 *s = (struct ctables_section) {
4005 .cells = HMAP_INITIALIZER (s->cells),
4007 for (a = 0; a < PIVOT_N_AXES; a++)
4010 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4012 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4013 for (size_t i = 0; i < nest->n; i++)
4014 hmap_init (&s->occurrences[a][i]);
4016 for (size_t i = 0; i < N_CTDTS; i++)
4017 hmap_init (&s->domains[i]);
4022 ctpo_add (double a, double b)
4028 ctpo_sub (double a, double b)
4034 ctpo_mul (double a, double b)
4040 ctpo_div (double a, double b)
4042 return b ? a / b : SYSMIS;
4046 ctpo_pow (double a, double b)
4048 int save_errno = errno;
4050 double result = pow (a, b);
4058 ctpo_neg (double a, double b UNUSED)
4063 struct ctables_pcexpr_evaluate_ctx
4065 const struct ctables_cell *cell;
4066 const struct ctables_section *section;
4067 const struct ctables_categories *cats;
4068 enum pivot_axis_type pc_a;
4071 enum fmt_type parse_format;
4074 static double ctables_pcexpr_evaluate (
4075 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4078 ctables_pcexpr_evaluate_nonterminal (
4079 const struct ctables_pcexpr_evaluate_ctx *ctx,
4080 const struct ctables_pcexpr *e, size_t n_args,
4081 double evaluate (double, double))
4083 double args[2] = { 0, 0 };
4084 for (size_t i = 0; i < n_args; i++)
4086 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4087 if (!isfinite (args[i]) || args[i] == SYSMIS)
4090 return evaluate (args[0], args[1]);
4094 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4095 const struct ctables_cell_value *pc_cv)
4097 const struct ctables_section *s = ctx->section;
4100 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4102 const struct ctables_nest *nest = s->nests[a];
4103 for (size_t i = 0; i < nest->n; i++)
4104 if (i != nest->scale_idx)
4106 const struct ctables_cell_value *cv
4107 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4108 : &ctx->cell->axes[a].cvs[i]);
4109 hash = hash_pointer (cv->category, hash);
4110 if (cv->category->type != CCT_TOTAL
4111 && cv->category->type != CCT_SUBTOTAL
4112 && cv->category->type != CCT_POSTCOMPUTE)
4113 hash = value_hash (&cv->value,
4114 var_get_width (nest->vars[i]), hash);
4118 struct ctables_cell *tc;
4119 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4121 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4123 const struct ctables_nest *nest = s->nests[a];
4124 for (size_t i = 0; i < nest->n; i++)
4125 if (i != nest->scale_idx)
4127 const struct ctables_cell_value *p_cv
4128 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4129 : &ctx->cell->axes[a].cvs[i]);
4130 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4131 if (p_cv->category != t_cv->category
4132 || (p_cv->category->type != CCT_TOTAL
4133 && p_cv->category->type != CCT_SUBTOTAL
4134 && p_cv->category->type != CCT_POSTCOMPUTE
4135 && !value_equal (&p_cv->value,
4137 var_get_width (nest->vars[i]))))
4149 const struct ctables_table *t = s->table;
4150 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4151 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4152 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4153 &specs->specs[ctx->summary_idx]);
4157 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4158 const struct ctables_pcexpr *e)
4165 case CTPO_CAT_NRANGE:
4166 case CTPO_CAT_SRANGE:
4168 struct ctables_cell_value cv = {
4169 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4171 assert (cv.category != NULL);
4173 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4174 const struct ctables_occurrence *o;
4177 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4178 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4179 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4181 cv.value = o->value;
4182 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4187 case CTPO_CAT_NUMBER:
4188 case CTPO_CAT_MISSING:
4189 case CTPO_CAT_OTHERNM:
4190 case CTPO_CAT_SUBTOTAL:
4191 case CTPO_CAT_TOTAL:
4193 struct ctables_cell_value cv = {
4194 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4195 .value = { .f = e->number },
4197 assert (cv.category != NULL);
4198 return ctables_pcexpr_evaluate_category (ctx, &cv);
4201 case CTPO_CAT_STRING:
4203 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4205 if (width > e->string.length)
4207 s = xmalloc (width);
4208 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4210 struct ctables_cell_value cv = {
4211 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4212 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4214 assert (cv.category != NULL);
4215 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4221 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4224 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4227 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4230 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4233 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4236 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4242 static const struct ctables_category *
4243 ctables_cell_postcompute (const struct ctables_section *s,
4244 const struct ctables_cell *cell,
4245 enum pivot_axis_type *pc_a_p,
4248 assert (cell->postcompute);
4249 const struct ctables_category *pc_cat = NULL;
4250 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4251 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4253 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4254 if (cv->category->type == CCT_POSTCOMPUTE)
4258 /* Multiple postcomputes cross each other. The value is
4263 pc_cat = cv->category;
4267 *pc_a_idx_p = pc_a_idx;
4271 assert (pc_cat != NULL);
4276 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4277 const struct ctables_cell *cell,
4278 const struct ctables_summary_spec *ss,
4279 struct fmt_spec *format,
4280 bool *is_ctables_format,
4283 enum pivot_axis_type pc_a = 0;
4284 size_t pc_a_idx = 0;
4285 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4286 s, cell, &pc_a, &pc_a_idx);
4290 const struct ctables_postcompute *pc = pc_cat->pc;
4293 for (size_t i = 0; i < pc->specs->n; i++)
4295 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4296 if (ss->function == ss2->function
4297 && ss->percentile == ss2->percentile)
4299 *format = ss2->format;
4300 *is_ctables_format = ss2->is_ctables_format;
4306 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4307 const struct ctables_categories *cats = s->table->categories[
4308 var_get_dict_index (var)];
4309 struct ctables_pcexpr_evaluate_ctx ctx = {
4314 .pc_a_idx = pc_a_idx,
4315 .summary_idx = summary_idx,
4316 .parse_format = pc_cat->parse_format,
4318 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4322 ctables_format (double d, const struct fmt_spec *format,
4323 const struct fmt_settings *settings)
4325 const union value v = { .f = d };
4326 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4328 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4329 produce the results we want for negative numbers, putting the negative
4330 sign in the wrong spot, before the prefix instead of after it. We can't,
4331 in fact, produce the desired results using a custom-currency
4332 specification. Instead, we postprocess the output, moving the negative
4335 NEQUAL: "-N=3" => "N=-3"
4336 PAREN: "-(3)" => "(-3)"
4337 PCTPAREN: "-(3%)" => "(-3%)"
4339 This transformation doesn't affect NEGPAREN. */
4340 char *minus_src = strchr (s, '-');
4341 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4343 char *n_equals = strstr (s, "N=");
4344 char *lparen = strchr (s, '(');
4345 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4347 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4353 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4355 struct pivot_table *pt = pivot_table_create__ (
4357 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4358 : pivot_value_new_text (N_("Custom Tables"))),
4361 pivot_table_set_caption (
4362 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4364 pivot_table_set_corner_text (
4365 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4367 bool summary_dimension = (t->summary_axis != t->slabels_axis
4368 || (!t->slabels_visible
4369 && t->summary_specs.n > 1));
4370 if (summary_dimension)
4372 struct pivot_dimension *d = pivot_dimension_create (
4373 pt, t->slabels_axis, N_("Statistics"));
4374 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4375 if (!t->slabels_visible)
4376 d->hide_all_labels = true;
4377 for (size_t i = 0; i < specs->n; i++)
4378 pivot_category_create_leaf (
4379 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4382 bool categories_dimension = t->clabels_example != NULL;
4383 if (categories_dimension)
4385 struct pivot_dimension *d = pivot_dimension_create (
4386 pt, t->label_axis[t->clabels_from_axis],
4387 t->clabels_from_axis == PIVOT_AXIS_ROW
4388 ? N_("Row Categories")
4389 : N_("Column Categories"));
4390 const struct variable *var = t->clabels_example;
4391 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4392 for (size_t i = 0; i < t->n_clabels_values; i++)
4394 const struct ctables_value *value = t->clabels_values[i];
4395 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4396 assert (cat != NULL);
4397 pivot_category_create_leaf (d->root, ctables_category_create_label (
4398 c, cat, t->clabels_example,
4403 pivot_table_set_look (pt, ct->look);
4404 struct pivot_dimension *d[PIVOT_N_AXES];
4405 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4407 static const char *names[] = {
4408 [PIVOT_AXIS_ROW] = N_("Rows"),
4409 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4410 [PIVOT_AXIS_LAYER] = N_("Layers"),
4412 d[a] = (t->axes[a] || a == t->summary_axis
4413 ? pivot_dimension_create (pt, a, names[a])
4418 assert (t->axes[a]);
4420 for (size_t i = 0; i < t->stacks[a].n; i++)
4422 struct ctables_nest *nest = &t->stacks[a].nests[i];
4423 struct ctables_section **sections = xnmalloc (t->n_sections,
4425 size_t n_sections = 0;
4427 size_t n_total_cells = 0;
4428 size_t max_depth = 0;
4429 for (size_t j = 0; j < t->n_sections; j++)
4430 if (t->sections[j].nests[a] == nest)
4432 struct ctables_section *s = &t->sections[j];
4433 sections[n_sections++] = s;
4434 n_total_cells += hmap_count (&s->cells);
4436 size_t depth = s->nests[a]->n;
4437 max_depth = MAX (depth, max_depth);
4440 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4442 size_t n_sorted = 0;
4444 for (size_t j = 0; j < n_sections; j++)
4446 struct ctables_section *s = sections[j];
4448 struct ctables_cell *cell;
4449 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4451 sorted[n_sorted++] = cell;
4452 assert (n_sorted <= n_total_cells);
4455 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4456 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4459 if (a == PIVOT_AXIS_ROW)
4461 size_t ids[N_CTDTS];
4462 memset (ids, 0, sizeof ids);
4463 for (size_t j = 0; j < n_sorted; j++)
4465 struct ctables_cell *cell = sorted[j];
4466 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4468 struct ctables_domain *domain = cell->domains[dt];
4469 if (!domain->sequence)
4470 domain->sequence = ++ids[dt];
4477 for (size_t j = 0; j < n_sorted; j++)
4479 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4484 struct ctables_level
4486 enum ctables_level_type
4488 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4489 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4490 CTL_SUMMARY, /* Summary functions. */
4494 enum settings_value_show vlabel; /* CTL_VAR only. */
4497 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4498 size_t n_levels = 0;
4499 for (size_t k = 0; k < nest->n; k++)
4501 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4502 if (vlabel != CTVL_NONE)
4504 levels[n_levels++] = (struct ctables_level) {
4506 .vlabel = (enum settings_value_show) vlabel,
4511 if (nest->scale_idx != k
4512 && (k != nest->n - 1 || t->label_axis[a] == a))
4514 levels[n_levels++] = (struct ctables_level) {
4515 .type = CTL_CATEGORY,
4521 if (!summary_dimension && a == t->slabels_axis)
4523 levels[n_levels++] = (struct ctables_level) {
4524 .type = CTL_SUMMARY,
4525 .var_idx = SIZE_MAX,
4529 /* Pivot categories:
4531 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4532 - category for nest->vars[0], if nest->scale_idx != 0
4533 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4534 - category for nest->vars[1], if nest->scale_idx != 1
4536 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4537 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4538 - summary function, if 'a == t->slabels_axis && a ==
4541 Additional dimensions:
4543 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4545 - If 't->label_axis[b] == a' for some 'b != a', add a category
4550 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4552 for (size_t j = 0; j < n_sorted; j++)
4554 struct ctables_cell *cell = sorted[j];
4555 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4557 size_t n_common = 0;
4560 for (; n_common < n_levels; n_common++)
4562 const struct ctables_level *level = &levels[n_common];
4563 if (level->type == CTL_CATEGORY)
4565 size_t var_idx = level->var_idx;
4566 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4567 if (prev->axes[a].cvs[var_idx].category != c)
4569 else if (c->type != CCT_SUBTOTAL
4570 && c->type != CCT_TOTAL
4571 && c->type != CCT_POSTCOMPUTE
4572 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4573 &cell->axes[a].cvs[var_idx].value,
4574 var_get_type (nest->vars[var_idx])))
4580 for (size_t k = n_common; k < n_levels; k++)
4582 const struct ctables_level *level = &levels[k];
4583 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4584 if (level->type == CTL_SUMMARY)
4586 assert (k == n_levels - 1);
4588 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4589 for (size_t m = 0; m < specs->n; m++)
4591 int leaf = pivot_category_create_leaf (
4592 parent, ctables_summary_label (&specs->specs[m],
4600 const struct variable *var = nest->vars[level->var_idx];
4601 struct pivot_value *label;
4602 if (level->type == CTL_VAR)
4604 label = pivot_value_new_variable (var);
4605 label->variable.show = level->vlabel;
4607 else if (level->type == CTL_CATEGORY)
4609 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4610 label = ctables_category_create_label (
4611 t->categories[var_get_dict_index (var)],
4612 cv->category, var, &cv->value);
4617 if (k == n_levels - 1)
4618 prev_leaf = pivot_category_create_leaf (parent, label);
4620 groups[k] = pivot_category_create_group__ (parent, label);
4624 cell->axes[a].leaf = prev_leaf;
4634 size_t n_total_cells = 0;
4635 for (size_t j = 0; j < t->n_sections; j++)
4636 n_total_cells += hmap_count (&t->sections[j].cells);
4638 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4639 size_t n_sorted = 0;
4640 for (size_t j = 0; j < t->n_sections; j++)
4642 const struct ctables_section *s = &t->sections[j];
4643 struct ctables_cell *cell;
4644 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4646 sorted[n_sorted++] = cell;
4648 assert (n_sorted <= n_total_cells);
4649 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4651 size_t ids[N_CTDTS];
4652 memset (ids, 0, sizeof ids);
4653 for (size_t j = 0; j < n_sorted; j++)
4655 struct ctables_cell *cell = sorted[j];
4656 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4658 struct ctables_domain *domain = cell->domains[dt];
4659 if (!domain->sequence)
4660 domain->sequence = ++ids[dt];
4667 for (size_t i = 0; i < t->n_sections; i++)
4669 struct ctables_section *s = &t->sections[i];
4671 struct ctables_cell *cell;
4672 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4677 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4678 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4679 for (size_t j = 0; j < specs->n; j++)
4682 size_t n_dindexes = 0;
4684 if (summary_dimension)
4685 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4687 if (categories_dimension)
4689 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4690 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4691 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4692 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4695 dindexes[n_dindexes++] = ctv->leaf;
4698 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4701 int leaf = cell->axes[a].leaf;
4702 if (a == t->summary_axis && !summary_dimension)
4704 dindexes[n_dindexes++] = leaf;
4707 const struct ctables_summary_spec *ss = &specs->specs[j];
4709 struct fmt_spec format = specs->specs[j].format;
4710 bool is_ctables_format = ss->is_ctables_format;
4711 double d = (cell->postcompute
4712 ? ctables_cell_calculate_postcompute (
4713 s, cell, ss, &format, &is_ctables_format, j)
4714 : ctables_summary_value (cell, &cell->summaries[j],
4717 struct pivot_value *value;
4718 if (ct->hide_threshold != 0
4719 && d < ct->hide_threshold
4720 && ctables_summary_function_is_count (ss->function))
4722 value = pivot_value_new_user_text_nocopy (
4723 xasprintf ("<%d", ct->hide_threshold));
4725 else if (d == 0 && ct->zero)
4726 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4727 else if (d == SYSMIS && ct->missing)
4728 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4729 else if (is_ctables_format)
4730 value = pivot_value_new_user_text_nocopy (
4731 ctables_format (d, &format, &ct->ctables_formats));
4734 value = pivot_value_new_number (d);
4735 value->numeric.format = format;
4737 /* XXX should text values be right-justified? */
4738 pivot_table_put (pt, dindexes, n_dindexes, value);
4743 pivot_table_submit (pt);
4747 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4749 enum pivot_axis_type label_pos = t->label_axis[a];
4753 t->clabels_from_axis = a;
4755 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4756 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4758 const struct ctables_stack *stack = &t->stacks[a];
4762 const struct ctables_nest *n0 = &stack->nests[0];
4765 assert (stack->n == 1);
4769 const struct variable *v0 = n0->vars[n0->n - 1];
4770 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4771 t->clabels_example = v0;
4773 for (size_t i = 0; i < c0->n_cats; i++)
4774 if (c0->cats[i].type == CCT_FUNCTION)
4776 msg (SE, _("%s=%s is not allowed with sorting based "
4777 "on a summary function."),
4778 subcommand_name, pos_name);
4781 if (n0->n - 1 == n0->scale_idx)
4783 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4784 "but %s is a scale variable."),
4785 subcommand_name, pos_name, var_get_name (v0));
4789 for (size_t i = 1; i < stack->n; i++)
4791 const struct ctables_nest *ni = &stack->nests[i];
4793 const struct variable *vi = ni->vars[ni->n - 1];
4794 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4796 if (ni->n - 1 == ni->scale_idx)
4798 msg (SE, _("%s=%s requires the variables to be moved to be "
4799 "categorical, but %s is a scale variable."),
4800 subcommand_name, pos_name, var_get_name (vi));
4803 if (var_get_width (v0) != var_get_width (vi))
4805 msg (SE, _("%s=%s requires the variables to be "
4806 "moved to have the same width, but %s has "
4807 "width %d and %s has width %d."),
4808 subcommand_name, pos_name,
4809 var_get_name (v0), var_get_width (v0),
4810 var_get_name (vi), var_get_width (vi));
4813 if (!val_labs_equal (var_get_value_labels (v0),
4814 var_get_value_labels (vi)))
4816 msg (SE, _("%s=%s requires the variables to be "
4817 "moved to have the same value labels, but %s "
4818 "and %s have different value labels."),
4819 subcommand_name, pos_name,
4820 var_get_name (v0), var_get_name (vi));
4823 if (!ctables_categories_equal (c0, ci))
4825 msg (SE, _("%s=%s requires the variables to be "
4826 "moved to have the same category "
4827 "specifications, but %s and %s have different "
4828 "category specifications."),
4829 subcommand_name, pos_name,
4830 var_get_name (v0), var_get_name (vi));
4839 add_sum_var (struct variable *var,
4840 struct variable ***sum_vars, size_t *n, size_t *allocated)
4842 for (size_t i = 0; i < *n; i++)
4843 if (var == (*sum_vars)[i])
4846 if (*n >= *allocated)
4847 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4848 (*sum_vars)[*n] = var;
4853 enumerate_sum_vars (const struct ctables_axis *a,
4854 struct variable ***sum_vars, size_t *n, size_t *allocated)
4862 for (size_t i = 0; i < N_CSVS; i++)
4863 for (size_t j = 0; j < a->specs[i].n; j++)
4865 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4866 if (ctables_function_is_pctsum (spec->function))
4867 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4873 for (size_t i = 0; i < 2; i++)
4874 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4880 ctables_prepare_table (struct ctables_table *t)
4882 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4885 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4887 for (size_t j = 0; j < t->stacks[a].n; j++)
4889 struct ctables_nest *nest = &t->stacks[a].nests[j];
4890 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4892 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4893 nest->n_domains[dt] = 0;
4895 for (size_t k = 0; k < nest->n; k++)
4897 if (k == nest->scale_idx)
4906 if (a != PIVOT_AXIS_LAYER)
4913 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4914 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4915 : a == PIVOT_AXIS_ROW)
4917 if (k == nest->n - 1
4918 || (nest->scale_idx == nest->n - 1
4919 && k == nest->n - 2))
4925 if (a == PIVOT_AXIS_COLUMN)
4930 if (a == PIVOT_AXIS_ROW)
4935 nest->domains[dt][nest->n_domains[dt]++] = k;
4942 struct ctables_nest *nest = xmalloc (sizeof *nest);
4943 *nest = (struct ctables_nest) { .n = 0 };
4944 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4946 /* There's no point in moving labels away from an axis that has no
4947 labels, so avoid dealing with the special cases around that. */
4948 t->label_axis[a] = a;
4951 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4952 for (size_t i = 0; i < stack->n; i++)
4954 struct ctables_nest *nest = &stack->nests[i];
4955 if (!nest->specs[CSV_CELL].n)
4957 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4958 specs->specs = xmalloc (sizeof *specs->specs);
4961 enum ctables_summary_function function
4962 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4964 *specs->specs = (struct ctables_summary_spec) {
4965 .function = function,
4966 .format = ctables_summary_default_format (function, specs->var),
4969 specs->var = nest->vars[0];
4971 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4972 &nest->specs[CSV_CELL]);
4974 else if (!nest->specs[CSV_TOTAL].n)
4975 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4976 &nest->specs[CSV_CELL]);
4978 if (t->ctables->smissing_listwise)
4980 struct variable **listwise_vars = NULL;
4982 size_t allocated = 0;
4984 for (size_t j = nest->group_head; j < stack->n; j++)
4986 const struct ctables_nest *other_nest = &stack->nests[j];
4987 if (other_nest->group_head != nest->group_head)
4990 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4993 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4994 sizeof *listwise_vars);
4995 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4998 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5001 listwise_vars = xmemdup (listwise_vars,
5002 n * sizeof *listwise_vars);
5003 nest->specs[sv].listwise_vars = listwise_vars;
5004 nest->specs[sv].n_listwise_vars = n;
5009 struct ctables_summary_spec_set *merged = &t->summary_specs;
5010 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5012 for (size_t j = 0; j < stack->n; j++)
5014 const struct ctables_nest *nest = &stack->nests[j];
5016 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5017 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5022 struct merge_item min = items[0];
5023 for (size_t j = 1; j < n_left; j++)
5024 if (merge_item_compare_3way (&items[j], &min) < 0)
5027 if (merged->n >= merged->allocated)
5028 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5029 sizeof *merged->specs);
5030 merged->specs[merged->n++] = min.set->specs[min.ofs];
5032 for (size_t j = 0; j < n_left; )
5034 if (merge_item_compare_3way (&items[j], &min) == 0)
5036 struct merge_item *item = &items[j];
5037 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5038 if (++item->ofs >= item->set->n)
5040 items[j] = items[--n_left];
5050 for (size_t j = 0; j < merged->n; j++)
5051 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5053 for (size_t j = 0; j < stack->n; j++)
5055 const struct ctables_nest *nest = &stack->nests[j];
5056 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5058 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5059 for (size_t k = 0; k < specs->n; k++)
5060 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5061 specs->specs[k].axis_idx);
5067 size_t allocated_sum_vars = 0;
5068 enumerate_sum_vars (t->axes[t->summary_axis],
5069 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5071 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5072 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5076 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5077 enum pivot_axis_type a)
5079 struct ctables_stack *stack = &t->stacks[a];
5080 for (size_t i = 0; i < stack->n; i++)
5082 const struct ctables_nest *nest = &stack->nests[i];
5083 const struct variable *var = nest->vars[nest->n - 1];
5084 const union value *value = case_data (c, var);
5086 if (var_is_numeric (var) && value->f == SYSMIS)
5089 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5091 ctables_value_insert (t, value, var_get_width (var));
5096 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5098 const struct ctables_value *const *ap = a_;
5099 const struct ctables_value *const *bp = b_;
5100 const struct ctables_value *a = *ap;
5101 const struct ctables_value *b = *bp;
5102 const int *width = width_;
5103 return value_compare_3way (&a->value, &b->value, *width);
5107 ctables_sort_clabels_values (struct ctables_table *t)
5109 const struct variable *v0 = t->clabels_example;
5110 int width = var_get_width (v0);
5112 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5115 const struct val_labs *val_labs = var_get_value_labels (v0);
5116 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5117 vl = val_labs_next (val_labs, vl))
5118 if (ctables_categories_match (c0, &vl->value, v0))
5119 ctables_value_insert (t, &vl->value, width);
5122 size_t n = hmap_count (&t->clabels_values_map);
5123 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5125 struct ctables_value *clv;
5127 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5128 t->clabels_values[i++] = clv;
5129 t->n_clabels_values = n;
5132 sort (t->clabels_values, n, sizeof *t->clabels_values,
5133 compare_clabels_values_3way, &width);
5135 for (size_t i = 0; i < n; i++)
5136 t->clabels_values[i]->leaf = i;
5140 ctables_add_category_occurrences (const struct variable *var,
5141 struct hmap *occurrences,
5142 const struct ctables_categories *cats)
5144 const struct val_labs *val_labs = var_get_value_labels (var);
5146 for (size_t i = 0; i < cats->n_cats; i++)
5148 const struct ctables_category *c = &cats->cats[i];
5152 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5158 int width = var_get_width (var);
5160 value_init (&value, width);
5161 value_copy_buf_rpad (&value, width,
5162 CHAR_CAST (uint8_t *, c->string.string),
5163 c->string.length, ' ');
5164 ctables_add_occurrence (var, &value, occurrences);
5165 value_destroy (&value, width);
5170 assert (var_is_numeric (var));
5171 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5172 vl = val_labs_next (val_labs, vl))
5173 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5174 ctables_add_occurrence (var, &vl->value, occurrences);
5178 assert (var_is_alpha (var));
5179 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5180 vl = val_labs_next (val_labs, vl))
5181 if (in_string_range (&vl->value, var, c->srange))
5182 ctables_add_occurrence (var, &vl->value, occurrences);
5186 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5187 vl = val_labs_next (val_labs, vl))
5188 if (var_is_value_missing (var, &vl->value))
5189 ctables_add_occurrence (var, &vl->value, occurrences);
5193 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5194 vl = val_labs_next (val_labs, vl))
5195 ctables_add_occurrence (var, &vl->value, occurrences);
5198 case CCT_POSTCOMPUTE:
5208 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5209 vl = val_labs_next (val_labs, vl))
5210 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5211 ctables_add_occurrence (var, &vl->value, occurrences);
5214 case CCT_EXCLUDED_MISSING:
5221 ctables_section_recurse_add_empty_categories (
5222 struct ctables_section *s,
5223 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5224 enum pivot_axis_type a, size_t a_idx)
5226 if (a >= PIVOT_N_AXES)
5227 ctables_cell_insert__ (s, c, cats);
5228 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5229 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5232 const struct variable *var = s->nests[a]->vars[a_idx];
5233 const struct ctables_categories *categories = s->table->categories[
5234 var_get_dict_index (var)];
5235 int width = var_get_width (var);
5236 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5237 const struct ctables_occurrence *o;
5238 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5240 union value *value = case_data_rw (c, var);
5241 value_destroy (value, width);
5242 value_clone (value, &o->value, width);
5243 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5244 assert (cats[a][a_idx] != NULL);
5245 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5248 for (size_t i = 0; i < categories->n_cats; i++)
5250 const struct ctables_category *cat = &categories->cats[i];
5251 if (cat->type == CCT_POSTCOMPUTE)
5253 cats[a][a_idx] = cat;
5254 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5261 ctables_section_add_empty_categories (struct ctables_section *s)
5263 bool show_empty = false;
5264 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5266 for (size_t k = 0; k < s->nests[a]->n; k++)
5267 if (k != s->nests[a]->scale_idx)
5269 const struct variable *var = s->nests[a]->vars[k];
5270 const struct ctables_categories *cats = s->table->categories[
5271 var_get_dict_index (var)];
5272 if (cats->show_empty)
5275 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5281 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5282 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5283 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5288 ctables_section_clear (struct ctables_section *s)
5290 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5292 const struct ctables_nest *nest = s->nests[a];
5293 for (size_t i = 0; i < nest->n; i++)
5294 if (i != nest->scale_idx)
5296 const struct variable *var = nest->vars[i];
5297 int width = var_get_width (var);
5298 struct ctables_occurrence *o, *next;
5299 struct hmap *map = &s->occurrences[a][i];
5300 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5302 value_destroy (&o->value, width);
5303 hmap_delete (map, &o->node);
5310 struct ctables_cell *cell, *next_cell;
5311 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5313 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5315 const struct ctables_nest *nest = s->nests[a];
5316 for (size_t i = 0; i < nest->n; i++)
5317 if (i != nest->scale_idx)
5318 value_destroy (&cell->axes[a].cvs[i].value,
5319 var_get_width (nest->vars[i]));
5320 free (cell->axes[a].cvs);
5323 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5324 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5325 for (size_t i = 0; i < specs->n; i++)
5326 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5327 free (cell->summaries);
5329 hmap_delete (&s->cells, &cell->node);
5332 hmap_shrink (&s->cells);
5334 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5336 struct ctables_domain *domain, *next_domain;
5337 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5340 free (domain->sums);
5341 hmap_delete (&s->domains[dt], &domain->node);
5344 hmap_shrink (&s->domains[dt]);
5349 ctables_section_uninit (struct ctables_section *s)
5351 ctables_section_clear (s);
5353 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5355 struct ctables_nest *nest = s->nests[a];
5356 for (size_t i = 0; i < nest->n; i++)
5357 hmap_destroy (&s->occurrences[a][i]);
5358 free (s->occurrences[a]);
5361 hmap_destroy (&s->cells);
5362 for (size_t i = 0; i < N_CTDTS; i++)
5363 hmap_destroy (&s->domains[i]);
5367 ctables_table_clear (struct ctables_table *t)
5369 for (size_t i = 0; i < t->n_sections; i++)
5370 ctables_section_clear (&t->sections[i]);
5372 if (t->clabels_example)
5374 int width = var_get_width (t->clabels_example);
5375 struct ctables_value *value, *next_value;
5376 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5377 &t->clabels_values_map)
5379 value_destroy (&value->value, width);
5380 hmap_delete (&t->clabels_values_map, &value->node);
5383 hmap_shrink (&t->clabels_values_map);
5385 free (t->clabels_values);
5386 t->clabels_values = NULL;
5387 t->n_clabels_values = 0;
5392 ctables_execute (struct dataset *ds, struct casereader *input,
5395 for (size_t i = 0; i < ct->n_tables; i++)
5397 struct ctables_table *t = ct->tables[i];
5398 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5399 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5400 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5401 sizeof *t->sections);
5402 size_t ix[PIVOT_N_AXES];
5403 ctables_table_add_section (t, 0, ix);
5406 struct dictionary *dict = dataset_dict (ds);
5407 struct casegrouper *grouper
5408 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5409 ? casegrouper_create_splits (input, dict)
5410 : casegrouper_create_vars (input, NULL, 0));
5411 struct casereader *group;
5412 while (casegrouper_get_next_group (grouper, &group))
5414 /* Output SPLIT FILE variables. */
5415 struct ccase *c = casereader_peek (group, 0);
5418 output_split_file_values (ds, c);
5422 bool warn_on_invalid = true;
5423 for (c = casereader_read (group); c;
5424 case_unref (c), c = casereader_read (group))
5426 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5427 double e_weight = (ct->e_weight
5428 ? var_force_valid_weight (ct->e_weight,
5429 case_num (c, ct->e_weight),
5433 for (size_t i = 0; i < ct->n_tables; i++)
5435 struct ctables_table *t = ct->tables[i];
5437 for (size_t j = 0; j < t->n_sections; j++)
5438 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5440 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5441 if (t->label_axis[a] != a)
5442 ctables_insert_clabels_values (t, c, a);
5445 casereader_destroy (group);
5447 for (size_t i = 0; i < ct->n_tables; i++)
5449 struct ctables_table *t = ct->tables[i];
5451 if (t->clabels_example)
5452 ctables_sort_clabels_values (t);
5454 for (size_t j = 0; j < t->n_sections; j++)
5455 ctables_section_add_empty_categories (&t->sections[j]);
5457 ctables_table_output (ct, t);
5458 ctables_table_clear (t);
5461 return casegrouper_destroy (grouper);
5466 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5467 struct dictionary *);
5470 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5476 case CTPO_CAT_STRING:
5477 ss_dealloc (&e->string);
5480 case CTPO_CAT_SRANGE:
5481 for (size_t i = 0; i < 2; i++)
5482 ss_dealloc (&e->srange[i]);
5491 for (size_t i = 0; i < 2; i++)
5492 ctables_pcexpr_destroy (e->subs[i]);
5496 case CTPO_CAT_NUMBER:
5497 case CTPO_CAT_NRANGE:
5498 case CTPO_CAT_MISSING:
5499 case CTPO_CAT_OTHERNM:
5500 case CTPO_CAT_SUBTOTAL:
5501 case CTPO_CAT_TOTAL:
5505 msg_location_destroy (e->location);
5510 static struct ctables_pcexpr *
5511 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5512 struct ctables_pcexpr *sub0,
5513 struct ctables_pcexpr *sub1)
5515 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5516 *e = (struct ctables_pcexpr) {
5518 .subs = { sub0, sub1 },
5519 .location = msg_location_merged (sub0->location, sub1->location),
5524 /* How to parse an operator. */
5527 enum token_type token;
5528 enum ctables_postcompute_op op;
5531 static const struct operator *
5532 ctables_pcexpr_match_operator (struct lexer *lexer,
5533 const struct operator ops[], size_t n_ops)
5535 for (const struct operator *op = ops; op < ops + n_ops; op++)
5536 if (lex_token (lexer) == op->token)
5538 if (op->token != T_NEG_NUM)
5547 static struct ctables_pcexpr *
5548 ctables_pcexpr_parse_binary_operators__ (
5549 struct lexer *lexer, struct dictionary *dict,
5550 const struct operator ops[], size_t n_ops,
5551 parse_recursively_func *parse_next_level,
5552 const char *chain_warning, struct ctables_pcexpr *lhs)
5554 for (int op_count = 0; ; op_count++)
5556 const struct operator *op
5557 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5560 if (op_count > 1 && chain_warning)
5561 msg_at (SW, lhs->location, "%s", chain_warning);
5566 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5569 ctables_pcexpr_destroy (lhs);
5573 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5577 static struct ctables_pcexpr *
5578 ctables_pcexpr_parse_binary_operators (
5579 struct lexer *lexer, struct dictionary *dict,
5580 const struct operator ops[], size_t n_ops,
5581 parse_recursively_func *parse_next_level, const char *chain_warning)
5583 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5587 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5589 chain_warning, lhs);
5592 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5593 struct dictionary *);
5595 static struct ctables_pcexpr
5596 ctpo_cat_nrange (double low, double high)
5598 return (struct ctables_pcexpr) {
5599 .op = CTPO_CAT_NRANGE,
5600 .nrange = { low, high },
5604 static struct ctables_pcexpr
5605 ctpo_cat_srange (struct substring low, struct substring high)
5607 return (struct ctables_pcexpr) {
5608 .op = CTPO_CAT_SRANGE,
5609 .srange = { low, high },
5613 static struct ctables_pcexpr *
5614 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5616 int start_ofs = lex_ofs (lexer);
5617 struct ctables_pcexpr e;
5618 if (lex_is_number (lexer))
5620 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5621 .number = lex_number (lexer) };
5624 else if (lex_match_id (lexer, "MISSING"))
5625 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5626 else if (lex_match_id (lexer, "OTHERNM"))
5627 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5628 else if (lex_match_id (lexer, "TOTAL"))
5629 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5630 else if (lex_match_id (lexer, "SUBTOTAL"))
5632 size_t subtotal_index = 0;
5633 if (lex_match (lexer, T_LBRACK))
5635 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5637 subtotal_index = lex_integer (lexer);
5639 if (!lex_force_match (lexer, T_RBRACK))
5642 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5643 .subtotal_index = subtotal_index };
5645 else if (lex_match (lexer, T_LBRACK))
5647 if (lex_match_id (lexer, "LO"))
5649 if (!lex_force_match_id (lexer, "THRU"))
5652 if (lex_is_string (lexer))
5654 struct substring low = { .string = NULL };
5655 struct substring high = parse_substring (lexer, dict);
5656 e = ctpo_cat_srange (low, high);
5660 if (!lex_force_num (lexer))
5662 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5666 else if (lex_is_number (lexer))
5668 double number = lex_number (lexer);
5670 if (lex_match_id (lexer, "THRU"))
5672 if (lex_match_id (lexer, "HI"))
5673 e = ctpo_cat_nrange (number, DBL_MAX);
5676 if (!lex_force_num (lexer))
5678 e = ctpo_cat_nrange (number, lex_number (lexer));
5683 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5686 else if (lex_is_string (lexer))
5688 struct substring s = parse_substring (lexer, dict);
5690 if (lex_match_id (lexer, "THRU"))
5692 struct substring high;
5694 if (lex_match_id (lexer, "HI"))
5695 high = (struct substring) { .string = NULL };
5698 if (!lex_force_string (lexer))
5703 high = parse_substring (lexer, dict);
5706 e = ctpo_cat_srange (s, high);
5709 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5713 lex_error (lexer, NULL);
5717 if (!lex_force_match (lexer, T_RBRACK))
5719 if (e.op == CTPO_CAT_STRING)
5720 ss_dealloc (&e.string);
5721 else if (e.op == CTPO_CAT_SRANGE)
5723 ss_dealloc (&e.srange[0]);
5724 ss_dealloc (&e.srange[1]);
5729 else if (lex_match (lexer, T_LPAREN))
5731 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5734 if (!lex_force_match (lexer, T_RPAREN))
5736 ctables_pcexpr_destroy (ep);
5743 lex_error (lexer, NULL);
5747 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5748 return xmemdup (&e, sizeof e);
5751 static struct ctables_pcexpr *
5752 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5753 struct lexer *lexer, int start_ofs)
5755 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5756 *e = (struct ctables_pcexpr) {
5759 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5764 static struct ctables_pcexpr *
5765 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5767 static const struct operator op = { T_EXP, CTPO_POW };
5769 const char *chain_warning =
5770 _("The exponentiation operator (`**') is left-associative: "
5771 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5772 "To disable this warning, insert parentheses.");
5774 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5775 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5776 ctables_pcexpr_parse_primary,
5779 /* Special case for situations like "-5**6", which must be parsed as
5782 int start_ofs = lex_ofs (lexer);
5783 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5784 *lhs = (struct ctables_pcexpr) {
5785 .op = CTPO_CONSTANT,
5786 .number = -lex_tokval (lexer),
5787 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5791 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5792 lexer, dict, &op, 1,
5793 ctables_pcexpr_parse_primary, chain_warning, lhs);
5797 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5800 /* Parses the unary minus level. */
5801 static struct ctables_pcexpr *
5802 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5804 int start_ofs = lex_ofs (lexer);
5805 if (!lex_match (lexer, T_DASH))
5806 return ctables_pcexpr_parse_exp (lexer, dict);
5808 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5812 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5815 /* Parses the multiplication and division level. */
5816 static struct ctables_pcexpr *
5817 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5819 static const struct operator ops[] =
5821 { T_ASTERISK, CTPO_MUL },
5822 { T_SLASH, CTPO_DIV },
5825 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5826 sizeof ops / sizeof *ops,
5827 ctables_pcexpr_parse_neg, NULL);
5830 /* Parses the addition and subtraction level. */
5831 static struct ctables_pcexpr *
5832 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5834 static const struct operator ops[] =
5836 { T_PLUS, CTPO_ADD },
5837 { T_DASH, CTPO_SUB },
5838 { T_NEG_NUM, CTPO_ADD },
5841 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5842 ops, sizeof ops / sizeof *ops,
5843 ctables_pcexpr_parse_mul, NULL);
5846 static struct ctables_postcompute *
5847 ctables_find_postcompute (struct ctables *ct, const char *name)
5849 struct ctables_postcompute *pc;
5850 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5851 utf8_hash_case_string (name, 0), &ct->postcomputes)
5852 if (!utf8_strcasecmp (pc->name, name))
5858 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5861 int pcompute_start = lex_ofs (lexer) - 1;
5863 if (!lex_match (lexer, T_AND))
5865 lex_error_expecting (lexer, "&");
5868 if (!lex_force_id (lexer))
5871 char *name = ss_xstrdup (lex_tokss (lexer));
5874 if (!lex_force_match (lexer, T_EQUALS)
5875 || !lex_force_match_id (lexer, "EXPR")
5876 || !lex_force_match (lexer, T_LPAREN))
5882 int expr_start = lex_ofs (lexer);
5883 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5884 int expr_end = lex_ofs (lexer) - 1;
5885 if (!expr || !lex_force_match (lexer, T_RPAREN))
5887 ctables_pcexpr_destroy (expr);
5891 int pcompute_end = lex_ofs (lexer) - 1;
5893 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5896 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5899 msg_at (SW, location, _("New definition of &%s will override the "
5900 "previous definition."),
5902 msg_at (SN, pc->location, _("This is the previous definition."));
5904 ctables_pcexpr_destroy (pc->expr);
5905 msg_location_destroy (pc->location);
5910 pc = xmalloc (sizeof *pc);
5911 *pc = (struct ctables_postcompute) { .name = name };
5912 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5913 utf8_hash_case_string (pc->name, 0));
5916 pc->location = location;
5918 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5923 ctables_parse_pproperties_format (struct lexer *lexer,
5924 struct ctables_summary_spec_set *sss)
5926 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5928 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5929 && !(lex_token (lexer) == T_ID
5930 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5931 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5932 lex_tokss (lexer)))))
5934 /* Parse function. */
5935 enum ctables_summary_function function;
5936 if (!parse_ctables_summary_function (lexer, &function))
5939 /* Parse percentile. */
5940 double percentile = 0;
5941 if (function == CTSF_PTILE)
5943 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5945 percentile = lex_number (lexer);
5950 struct fmt_spec format;
5951 bool is_ctables_format;
5952 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5955 if (sss->n >= sss->allocated)
5956 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5957 sizeof *sss->specs);
5958 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5959 .function = function,
5960 .percentile = percentile,
5962 .is_ctables_format = is_ctables_format,
5968 ctables_summary_spec_set_uninit (sss);
5973 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5975 struct ctables_postcompute **pcs = NULL;
5977 size_t allocated_pcs = 0;
5979 while (lex_match (lexer, T_AND))
5981 if (!lex_force_id (lexer))
5983 struct ctables_postcompute *pc
5984 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5987 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5992 if (n_pcs >= allocated_pcs)
5993 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5997 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5999 if (lex_match_id (lexer, "LABEL"))
6001 lex_match (lexer, T_EQUALS);
6002 if (!lex_force_string (lexer))
6005 for (size_t i = 0; i < n_pcs; i++)
6007 free (pcs[i]->label);
6008 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6013 else if (lex_match_id (lexer, "FORMAT"))
6015 lex_match (lexer, T_EQUALS);
6017 struct ctables_summary_spec_set sss;
6018 if (!ctables_parse_pproperties_format (lexer, &sss))
6021 for (size_t i = 0; i < n_pcs; i++)
6024 ctables_summary_spec_set_uninit (pcs[i]->specs);
6026 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6027 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6029 ctables_summary_spec_set_uninit (&sss);
6031 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6033 lex_match (lexer, T_EQUALS);
6034 bool hide_source_cats;
6035 if (!parse_bool (lexer, &hide_source_cats))
6037 for (size_t i = 0; i < n_pcs; i++)
6038 pcs[i]->hide_source_cats = hide_source_cats;
6042 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6055 put_strftime (struct string *out, time_t now, const char *format)
6057 const struct tm *tm = localtime (&now);
6059 strftime (value, sizeof value, format, tm);
6060 ds_put_cstr (out, value);
6064 skip_prefix (struct substring *s, struct substring prefix)
6066 if (ss_starts_with (*s, prefix))
6068 ss_advance (s, prefix.length);
6076 put_table_expression (struct string *out, struct lexer *lexer,
6077 struct dictionary *dict, int expr_start, int expr_end)
6080 for (int ofs = expr_start; ofs < expr_end; ofs++)
6082 const struct token *t = lex_ofs_token (lexer, ofs);
6083 if (t->type == T_LBRACK)
6085 else if (t->type == T_RBRACK && nest > 0)
6091 else if (t->type == T_ID)
6093 const struct variable *var
6094 = dict_lookup_var (dict, t->string.string);
6095 const char *label = var ? var_get_label (var) : NULL;
6096 ds_put_cstr (out, label ? label : t->string.string);
6100 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6101 ds_put_byte (out, ' ');
6103 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6104 ds_put_cstr (out, repr);
6107 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6108 ds_put_byte (out, ' ');
6114 put_title_text (struct string *out, struct substring in, time_t now,
6115 struct lexer *lexer, struct dictionary *dict,
6116 int expr_start, int expr_end)
6120 size_t chunk = ss_find_byte (in, ')');
6121 ds_put_substring (out, ss_head (in, chunk));
6122 ss_advance (&in, chunk);
6123 if (ss_is_empty (in))
6126 if (skip_prefix (&in, ss_cstr (")DATE")))
6127 put_strftime (out, now, "%x");
6128 else if (skip_prefix (&in, ss_cstr (")TIME")))
6129 put_strftime (out, now, "%X");
6130 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6131 put_table_expression (out, lexer, dict, expr_start, expr_end);
6134 ds_put_byte (out, ')');
6135 ss_advance (&in, 1);
6141 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6143 struct casereader *input = NULL;
6145 struct measure_guesser *mg = measure_guesser_create (ds);
6148 input = proc_open (ds);
6149 measure_guesser_run (mg, input);
6150 measure_guesser_destroy (mg);
6153 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6154 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6155 enum settings_value_show tvars = settings_get_show_variables ();
6156 for (size_t i = 0; i < n_vars; i++)
6157 vlabels[i] = (enum ctables_vlabel) tvars;
6159 struct pivot_table_look *look = pivot_table_look_unshare (
6160 pivot_table_look_ref (pivot_table_look_get_default ()));
6161 look->omit_empty = false;
6163 struct ctables *ct = xmalloc (sizeof *ct);
6164 *ct = (struct ctables) {
6165 .dict = dataset_dict (ds),
6167 .ctables_formats = FMT_SETTINGS_INIT,
6169 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6172 time_t now = time (NULL);
6177 const char *dot_string;
6178 const char *comma_string;
6180 static const struct ctf ctfs[4] = {
6181 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6182 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6183 { CTEF_PAREN, "-,(,),", "-.(.)." },
6184 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6186 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6187 for (size_t i = 0; i < 4; i++)
6189 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6190 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6191 fmt_number_style_from_string (s));
6194 if (!lex_force_match (lexer, T_SLASH))
6197 while (!lex_match_id (lexer, "TABLE"))
6199 if (lex_match_id (lexer, "FORMAT"))
6201 double widths[2] = { SYSMIS, SYSMIS };
6202 double units_per_inch = 72.0;
6204 while (lex_token (lexer) != T_SLASH)
6206 if (lex_match_id (lexer, "MINCOLWIDTH"))
6208 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6211 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6213 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6216 else if (lex_match_id (lexer, "UNITS"))
6218 lex_match (lexer, T_EQUALS);
6219 if (lex_match_id (lexer, "POINTS"))
6220 units_per_inch = 72.0;
6221 else if (lex_match_id (lexer, "INCHES"))
6222 units_per_inch = 1.0;
6223 else if (lex_match_id (lexer, "CM"))
6224 units_per_inch = 2.54;
6227 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6231 else if (lex_match_id (lexer, "EMPTY"))
6236 lex_match (lexer, T_EQUALS);
6237 if (lex_match_id (lexer, "ZERO"))
6239 /* Nothing to do. */
6241 else if (lex_match_id (lexer, "BLANK"))
6242 ct->zero = xstrdup ("");
6243 else if (lex_force_string (lexer))
6245 ct->zero = ss_xstrdup (lex_tokss (lexer));
6251 else if (lex_match_id (lexer, "MISSING"))
6253 lex_match (lexer, T_EQUALS);
6254 if (!lex_force_string (lexer))
6258 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6259 ? ss_xstrdup (lex_tokss (lexer))
6265 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6266 "UNITS", "EMPTY", "MISSING");
6271 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6272 && widths[0] > widths[1])
6274 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6278 for (size_t i = 0; i < 2; i++)
6279 if (widths[i] != SYSMIS)
6281 int *wr = ct->look->width_ranges[TABLE_HORZ];
6282 wr[i] = widths[i] / units_per_inch * 96.0;
6287 else if (lex_match_id (lexer, "VLABELS"))
6289 if (!lex_force_match_id (lexer, "VARIABLES"))
6291 lex_match (lexer, T_EQUALS);
6293 struct variable **vars;
6295 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6299 if (!lex_force_match_id (lexer, "DISPLAY"))
6304 lex_match (lexer, T_EQUALS);
6306 enum ctables_vlabel vlabel;
6307 if (lex_match_id (lexer, "DEFAULT"))
6308 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6309 else if (lex_match_id (lexer, "NAME"))
6311 else if (lex_match_id (lexer, "LABEL"))
6312 vlabel = CTVL_LABEL;
6313 else if (lex_match_id (lexer, "BOTH"))
6315 else if (lex_match_id (lexer, "NONE"))
6319 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6325 for (size_t i = 0; i < n_vars; i++)
6326 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6329 else if (lex_match_id (lexer, "MRSETS"))
6331 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6333 lex_match (lexer, T_EQUALS);
6334 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6337 else if (lex_match_id (lexer, "SMISSING"))
6339 if (lex_match_id (lexer, "VARIABLE"))
6340 ct->smissing_listwise = false;
6341 else if (lex_match_id (lexer, "LISTWISE"))
6342 ct->smissing_listwise = true;
6345 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6349 else if (lex_match_id (lexer, "PCOMPUTE"))
6351 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6354 else if (lex_match_id (lexer, "PPROPERTIES"))
6356 if (!ctables_parse_pproperties (lexer, ct))
6359 else if (lex_match_id (lexer, "WEIGHT"))
6361 if (!lex_force_match_id (lexer, "VARIABLE"))
6363 lex_match (lexer, T_EQUALS);
6364 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6368 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6370 if (lex_match_id (lexer, "COUNT"))
6372 lex_match (lexer, T_EQUALS);
6373 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6376 ct->hide_threshold = lex_integer (lexer);
6379 else if (ct->hide_threshold == 0)
6380 ct->hide_threshold = 5;
6384 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6385 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6386 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6390 if (!lex_force_match (lexer, T_SLASH))
6394 size_t allocated_tables = 0;
6397 if (ct->n_tables >= allocated_tables)
6398 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6399 sizeof *ct->tables);
6401 struct ctables_category *cat = xmalloc (sizeof *cat);
6402 *cat = (struct ctables_category) {
6404 .include_missing = false,
6405 .sort_ascending = true,
6408 struct ctables_categories *c = xmalloc (sizeof *c);
6409 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6410 *c = (struct ctables_categories) {
6417 struct ctables_categories **categories = xnmalloc (n_vars,
6418 sizeof *categories);
6419 for (size_t i = 0; i < n_vars; i++)
6422 struct ctables_table *t = xmalloc (sizeof *t);
6423 *t = (struct ctables_table) {
6425 .slabels_axis = PIVOT_AXIS_COLUMN,
6426 .slabels_visible = true,
6427 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6429 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6430 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6431 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6433 .clabels_from_axis = PIVOT_AXIS_LAYER,
6434 .categories = categories,
6435 .n_categories = n_vars,
6438 ct->tables[ct->n_tables++] = t;
6440 lex_match (lexer, T_EQUALS);
6441 int expr_start = lex_ofs (lexer);
6442 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6444 if (lex_match (lexer, T_BY))
6446 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6447 ct, t, PIVOT_AXIS_COLUMN))
6450 if (lex_match (lexer, T_BY))
6452 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6453 ct, t, PIVOT_AXIS_LAYER))
6457 int expr_end = lex_ofs (lexer);
6459 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6460 && !t->axes[PIVOT_AXIS_LAYER])
6462 lex_error (lexer, _("At least one variable must be specified."));
6466 const struct ctables_axis *scales[PIVOT_N_AXES];
6467 size_t n_scales = 0;
6468 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6470 scales[a] = find_scale (t->axes[a]);
6476 msg (SE, _("Scale variables may appear only on one axis."));
6477 if (scales[PIVOT_AXIS_ROW])
6478 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6479 _("This scale variable appears on the rows axis."));
6480 if (scales[PIVOT_AXIS_COLUMN])
6481 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6482 _("This scale variable appears on the columns axis."));
6483 if (scales[PIVOT_AXIS_LAYER])
6484 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6485 _("This scale variable appears on the layer axis."));
6489 const struct ctables_axis *summaries[PIVOT_N_AXES];
6490 size_t n_summaries = 0;
6491 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6493 summaries[a] = (scales[a]
6495 : find_categorical_summary_spec (t->axes[a]));
6499 if (n_summaries > 1)
6501 msg (SE, _("Summaries may appear only on one axis."));
6502 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6505 msg_at (SN, summaries[a]->loc,
6507 ? _("This variable on the rows axis has a summary.")
6508 : a == PIVOT_AXIS_COLUMN
6509 ? _("This variable on the columns axis has a summary.")
6510 : _("This variable on the layers axis has a summary."));
6512 msg_at (SN, summaries[a]->loc,
6513 _("This is a scale variable, so it always has a "
6514 "summary even if the syntax does not explicitly "
6519 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6520 if (n_summaries ? summaries[a] : t->axes[a])
6522 t->summary_axis = a;
6526 if (lex_token (lexer) == T_ENDCMD)
6528 if (!ctables_prepare_table (t))
6532 if (!lex_force_match (lexer, T_SLASH))
6535 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6537 if (lex_match_id (lexer, "SLABELS"))
6539 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6541 if (lex_match_id (lexer, "POSITION"))
6543 lex_match (lexer, T_EQUALS);
6544 if (lex_match_id (lexer, "COLUMN"))
6545 t->slabels_axis = PIVOT_AXIS_COLUMN;
6546 else if (lex_match_id (lexer, "ROW"))
6547 t->slabels_axis = PIVOT_AXIS_ROW;
6548 else if (lex_match_id (lexer, "LAYER"))
6549 t->slabels_axis = PIVOT_AXIS_LAYER;
6552 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6556 else if (lex_match_id (lexer, "VISIBLE"))
6558 lex_match (lexer, T_EQUALS);
6559 if (!parse_bool (lexer, &t->slabels_visible))
6564 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6569 else if (lex_match_id (lexer, "CLABELS"))
6571 if (lex_match_id (lexer, "AUTO"))
6573 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6574 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6576 else if (lex_match_id (lexer, "ROWLABELS"))
6578 lex_match (lexer, T_EQUALS);
6579 if (lex_match_id (lexer, "OPPOSITE"))
6580 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6581 else if (lex_match_id (lexer, "LAYER"))
6582 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6585 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6589 else if (lex_match_id (lexer, "COLLABELS"))
6591 lex_match (lexer, T_EQUALS);
6592 if (lex_match_id (lexer, "OPPOSITE"))
6593 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6594 else if (lex_match_id (lexer, "LAYER"))
6595 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6598 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6604 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6609 else if (lex_match_id (lexer, "CRITERIA"))
6611 if (!lex_force_match_id (lexer, "CILEVEL"))
6613 lex_match (lexer, T_EQUALS);
6615 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6617 t->cilevel = lex_number (lexer);
6620 else if (lex_match_id (lexer, "CATEGORIES"))
6622 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6626 else if (lex_match_id (lexer, "TITLES"))
6631 if (lex_match_id (lexer, "CAPTION"))
6632 textp = &t->caption;
6633 else if (lex_match_id (lexer, "CORNER"))
6635 else if (lex_match_id (lexer, "TITLE"))
6639 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6642 lex_match (lexer, T_EQUALS);
6644 struct string s = DS_EMPTY_INITIALIZER;
6645 while (lex_is_string (lexer))
6647 if (!ds_is_empty (&s))
6648 ds_put_byte (&s, ' ');
6649 put_title_text (&s, lex_tokss (lexer), now,
6650 lexer, dataset_dict (ds),
6651 expr_start, expr_end);
6655 *textp = ds_steal_cstr (&s);
6657 while (lex_token (lexer) != T_SLASH
6658 && lex_token (lexer) != T_ENDCMD);
6660 else if (lex_match_id (lexer, "SIGTEST"))
6664 t->chisq = xmalloc (sizeof *t->chisq);
6665 *t->chisq = (struct ctables_chisq) {
6667 .include_mrsets = true,
6668 .all_visible = true,
6674 if (lex_match_id (lexer, "TYPE"))
6676 lex_match (lexer, T_EQUALS);
6677 if (!lex_force_match_id (lexer, "CHISQUARE"))
6680 else if (lex_match_id (lexer, "ALPHA"))
6682 lex_match (lexer, T_EQUALS);
6683 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6685 t->chisq->alpha = lex_number (lexer);
6688 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6690 lex_match (lexer, T_EQUALS);
6691 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6694 else if (lex_match_id (lexer, "CATEGORIES"))
6696 lex_match (lexer, T_EQUALS);
6697 if (lex_match_id (lexer, "ALLVISIBLE"))
6698 t->chisq->all_visible = true;
6699 else if (lex_match_id (lexer, "SUBTOTALS"))
6700 t->chisq->all_visible = false;
6703 lex_error_expecting (lexer,
6704 "ALLVISIBLE", "SUBTOTALS");
6710 lex_error_expecting (lexer, "TYPE", "ALPHA",
6711 "INCLUDEMRSETS", "CATEGORIES");
6715 while (lex_token (lexer) != T_SLASH
6716 && lex_token (lexer) != T_ENDCMD);
6718 else if (lex_match_id (lexer, "COMPARETEST"))
6722 t->pairwise = xmalloc (sizeof *t->pairwise);
6723 *t->pairwise = (struct ctables_pairwise) {
6725 .alpha = { .05, .05 },
6726 .adjust = BONFERRONI,
6727 .include_mrsets = true,
6728 .meansvariance_allcats = true,
6729 .all_visible = true,
6738 if (lex_match_id (lexer, "TYPE"))
6740 lex_match (lexer, T_EQUALS);
6741 if (lex_match_id (lexer, "PROP"))
6742 t->pairwise->type = PROP;
6743 else if (lex_match_id (lexer, "MEAN"))
6744 t->pairwise->type = MEAN;
6747 lex_error_expecting (lexer, "PROP", "MEAN");
6751 else if (lex_match_id (lexer, "ALPHA"))
6753 lex_match (lexer, T_EQUALS);
6755 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6757 double a0 = lex_number (lexer);
6760 lex_match (lexer, T_COMMA);
6761 if (lex_is_number (lexer))
6763 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6765 double a1 = lex_number (lexer);
6768 t->pairwise->alpha[0] = MIN (a0, a1);
6769 t->pairwise->alpha[1] = MAX (a0, a1);
6772 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6774 else if (lex_match_id (lexer, "ADJUST"))
6776 lex_match (lexer, T_EQUALS);
6777 if (lex_match_id (lexer, "BONFERRONI"))
6778 t->pairwise->adjust = BONFERRONI;
6779 else if (lex_match_id (lexer, "BH"))
6780 t->pairwise->adjust = BH;
6781 else if (lex_match_id (lexer, "NONE"))
6782 t->pairwise->adjust = 0;
6785 lex_error_expecting (lexer, "BONFERRONI", "BH",
6790 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6792 lex_match (lexer, T_EQUALS);
6793 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6796 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6798 lex_match (lexer, T_EQUALS);
6799 if (lex_match_id (lexer, "ALLCATS"))
6800 t->pairwise->meansvariance_allcats = true;
6801 else if (lex_match_id (lexer, "TESTEDCATS"))
6802 t->pairwise->meansvariance_allcats = false;
6805 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6809 else if (lex_match_id (lexer, "CATEGORIES"))
6811 lex_match (lexer, T_EQUALS);
6812 if (lex_match_id (lexer, "ALLVISIBLE"))
6813 t->pairwise->all_visible = true;
6814 else if (lex_match_id (lexer, "SUBTOTALS"))
6815 t->pairwise->all_visible = false;
6818 lex_error_expecting (lexer, "ALLVISIBLE",
6823 else if (lex_match_id (lexer, "MERGE"))
6825 lex_match (lexer, T_EQUALS);
6826 if (!parse_bool (lexer, &t->pairwise->merge))
6829 else if (lex_match_id (lexer, "STYLE"))
6831 lex_match (lexer, T_EQUALS);
6832 if (lex_match_id (lexer, "APA"))
6833 t->pairwise->apa_style = true;
6834 else if (lex_match_id (lexer, "SIMPLE"))
6835 t->pairwise->apa_style = false;
6838 lex_error_expecting (lexer, "APA", "SIMPLE");
6842 else if (lex_match_id (lexer, "SHOWSIG"))
6844 lex_match (lexer, T_EQUALS);
6845 if (!parse_bool (lexer, &t->pairwise->show_sig))
6850 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6851 "INCLUDEMRSETS", "MEANSVARIANCE",
6852 "CATEGORIES", "MERGE", "STYLE",
6857 while (lex_token (lexer) != T_SLASH
6858 && lex_token (lexer) != T_ENDCMD);
6862 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6863 "CRITERIA", "CATEGORIES", "TITLES",
6864 "SIGTEST", "COMPARETEST");
6868 if (!lex_match (lexer, T_SLASH))
6872 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6873 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6875 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6879 if (!ctables_prepare_table (t))
6882 while (lex_token (lexer) != T_ENDCMD);
6885 input = proc_open (ds);
6886 bool ok = ctables_execute (ds, input, ct);
6887 ok = proc_commit (ds) && ok;
6889 ctables_destroy (ct);
6890 return ok ? CMD_SUCCESS : CMD_FAILURE;
6895 ctables_destroy (ct);