1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 enum ctables_summary_function
75 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
76 #include "ctables.inc"
81 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
83 #include "ctables.inc"
87 static bool ctables_summary_function_is_count (enum ctables_summary_function);
89 enum ctables_domain_type
91 /* Within a section, where stacked variables divide one section from
93 CTDT_TABLE, /* All layers of a whole section. */
94 CTDT_LAYER, /* One layer within a section. */
95 CTDT_LAYERROW, /* Row in one layer within a section. */
96 CTDT_LAYERCOL, /* Column in one layer within a section. */
98 /* Within a subtable, where a subtable pairs an innermost row variable with
99 an innermost column variable within a single layer. */
100 CTDT_SUBTABLE, /* Whole subtable. */
101 CTDT_ROW, /* Row within a subtable. */
102 CTDT_COL, /* Column within a subtable. */
106 struct ctables_domain
108 struct hmap_node node;
110 const struct ctables_cell *example;
113 double d_valid; /* Dictionary weight. */
116 double e_valid; /* Effective weight */
119 double u_valid; /* Unweighted. */
122 struct ctables_sum *sums;
131 enum ctables_summary_variant
140 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
141 all the axes (except the scalar variable, if any). */
142 struct hmap_node node;
144 /* The domains that contain this cell. */
145 uint32_t omit_domains;
146 struct ctables_domain *domains[N_CTDTS];
151 enum ctables_summary_variant sv;
153 struct ctables_cell_axis
155 struct ctables_cell_value
157 const struct ctables_category *category;
165 union ctables_summary *summaries;
172 const struct dictionary *dict;
173 struct pivot_table_look *look;
175 /* CTABLES has a number of extra formats that we implement via custom
176 currency specifications on an alternate fmt_settings. */
177 #define CTEF_NEGPAREN FMT_CCA
178 #define CTEF_NEQUAL FMT_CCB
179 #define CTEF_PAREN FMT_CCC
180 #define CTEF_PCTPAREN FMT_CCD
181 struct fmt_settings ctables_formats;
183 /* If this is NULL, zeros are displayed using the normal print format.
184 Otherwise, this string is displayed. */
187 /* If this is NULL, missing values are displayed using the normal print
188 format. Otherwise, this string is displayed. */
191 /* Indexed by variable dictionary index. */
192 enum ctables_vlabel *vlabels;
194 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
196 bool mrsets_count_duplicates; /* MRSETS. */
197 bool smissing_listwise; /* SMISSING. */
198 struct variable *e_weight; /* WEIGHT. */
199 int hide_threshold; /* HIDESMALLCOUNTS. */
201 struct ctables_table **tables;
205 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
208 struct ctables_postcompute
210 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
211 char *name; /* Name, without leading &. */
213 struct msg_location *location; /* Location of definition. */
214 struct ctables_pcexpr *expr;
216 struct ctables_summary_spec_set *specs;
217 bool hide_source_cats;
220 struct ctables_pcexpr
230 enum ctables_postcompute_op
233 CTPO_CONSTANT, /* 5 */
234 CTPO_CAT_NUMBER, /* [5] */
235 CTPO_CAT_STRING, /* ["STRING"] */
236 CTPO_CAT_NRANGE, /* [LO THRU 5] */
237 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
238 CTPO_CAT_MISSING, /* MISSING */
239 CTPO_CAT_OTHERNM, /* OTHERNM */
240 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
241 CTPO_CAT_TOTAL, /* TOTAL */
255 /* CTPO_CAT_NUMBER. */
258 /* CTPO_CAT_STRING, in dictionary encoding. */
259 struct substring string;
261 /* CTPO_CAT_NRANGE. */
264 /* CTPO_CAT_SRANGE. */
265 struct substring srange[2];
267 /* CTPO_CAT_SUBTOTAL. */
268 size_t subtotal_index;
270 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
271 One element: CTPO_NEG. */
272 struct ctables_pcexpr *subs[2];
275 /* Source location. */
276 struct msg_location *location;
279 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
280 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
281 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
282 struct ctables_pcexpr *sub1);
284 struct ctables_summary_spec_set
286 struct ctables_summary_spec *specs;
290 /* The variable to which the summary specs are applied. */
291 struct variable *var;
293 /* Whether the variable to which the summary specs are applied is a scale
294 variable for the purpose of summarization.
296 (VALIDN and TOTALN act differently for summarizing scale and categorical
300 /* If any of these optional additional scale variables are missing, then
301 treat 'var' as if it's missing too. This is for implementing
302 SMISSING=LISTWISE. */
303 struct variable **listwise_vars;
304 size_t n_listwise_vars;
307 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
308 const struct ctables_summary_spec_set *);
309 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
311 /* A nested sequence of variables, e.g. a > b > c. */
314 struct variable **vars;
317 size_t *domains[N_CTDTS];
318 size_t n_domains[N_CTDTS];
321 struct ctables_summary_spec_set specs[N_CSVS];
324 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
327 struct ctables_nest *nests;
331 static void ctables_stack_uninit (struct ctables_stack *);
335 struct hmap_node node;
340 struct ctables_occurrence
342 struct hmap_node node;
346 struct ctables_section
349 struct ctables_table *table;
350 struct ctables_nest *nests[PIVOT_N_AXES];
353 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
354 struct hmap cells; /* Contains "struct ctables_cell"s. */
355 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
358 static void ctables_section_uninit (struct ctables_section *);
362 struct ctables *ctables;
363 struct ctables_axis *axes[PIVOT_N_AXES];
364 struct ctables_stack stacks[PIVOT_N_AXES];
365 struct ctables_section *sections;
367 enum pivot_axis_type summary_axis;
368 struct ctables_summary_spec_set summary_specs;
369 struct variable **sum_vars;
372 enum pivot_axis_type slabels_axis;
373 bool slabels_visible;
375 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
377 Most commonly, label_axis[a] == a, and in particular we always have
378 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
380 If ROWLABELS or COLLABELS is specified, then one of
381 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
382 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
384 If any category labels are moved, then 'clabels_example' is one of the
385 variables being moved (and it is otherwise NULL). All of the variables
386 being moved have the same width, value labels, and categories, so this
387 example variable can be used to find those out.
389 The remaining members in this group are relevant only if category labels
392 'clabels_values_map' holds a "struct ctables_value" for all the values
393 that appear in all of the variables in the moved categories. It is
394 accumulated as the data is read. Once the data is fully read, its
395 sorted values are put into 'clabels_values' and 'n_clabels_values'.
397 enum pivot_axis_type label_axis[PIVOT_N_AXES];
398 enum pivot_axis_type clabels_from_axis;
399 const struct variable *clabels_example;
400 struct hmap clabels_values_map;
401 struct ctables_value **clabels_values;
402 size_t n_clabels_values;
404 /* Indexed by variable dictionary index. */
405 struct ctables_categories **categories;
414 struct ctables_chisq *chisq;
415 struct ctables_pairwise *pairwise;
418 struct ctables_categories
421 struct ctables_category *cats;
426 struct ctables_category
428 enum ctables_category_type
430 /* Explicit category lists. */
433 CCT_NRANGE, /* Numerical range. */
434 CCT_SRANGE, /* String range. */
439 /* Totals and subtotals. */
443 /* Implicit category lists. */
448 /* For contributing to TOTALN. */
449 CCT_EXCLUDED_MISSING,
453 struct ctables_category *subtotal;
459 double number; /* CCT_NUMBER. */
460 struct substring string; /* CCT_STRING, in dictionary encoding. */
461 double nrange[2]; /* CCT_NRANGE. */
462 struct substring srange[2]; /* CCT_SRANGE. */
466 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
467 bool hide_subcategories; /* CCT_SUBTOTAL. */
470 /* CCT_POSTCOMPUTE. */
473 const struct ctables_postcompute *pc;
474 enum fmt_type parse_format;
477 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
480 bool include_missing;
484 enum ctables_summary_function sort_function;
485 struct variable *sort_var;
490 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
491 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
492 struct msg_location *location;
496 ctables_category_uninit (struct ctables_category *cat)
501 msg_location_destroy (cat->location);
508 case CCT_POSTCOMPUTE:
512 ss_dealloc (&cat->string);
516 ss_dealloc (&cat->srange[0]);
517 ss_dealloc (&cat->srange[1]);
522 free (cat->total_label);
530 case CCT_EXCLUDED_MISSING:
536 nullable_substring_equal (const struct substring *a,
537 const struct substring *b)
539 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
543 ctables_category_equal (const struct ctables_category *a,
544 const struct ctables_category *b)
546 if (a->type != b->type)
552 return a->number == b->number;
555 return ss_equals (a->string, b->string);
558 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
561 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
562 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
568 case CCT_POSTCOMPUTE:
569 return a->pc == b->pc;
573 return !strcmp (a->total_label, b->total_label);
578 return (a->include_missing == b->include_missing
579 && a->sort_ascending == b->sort_ascending
580 && a->sort_function == b->sort_function
581 && a->sort_var == b->sort_var
582 && a->percentile == b->percentile);
584 case CCT_EXCLUDED_MISSING:
592 ctables_categories_unref (struct ctables_categories *c)
597 assert (c->n_refs > 0);
601 for (size_t i = 0; i < c->n_cats; i++)
602 ctables_category_uninit (&c->cats[i]);
608 ctables_categories_equal (const struct ctables_categories *a,
609 const struct ctables_categories *b)
611 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
614 for (size_t i = 0; i < a->n_cats; i++)
615 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
621 /* Chi-square test (SIGTEST). */
629 /* Pairwise comparison test (COMPARETEST). */
630 struct ctables_pairwise
632 enum { PROP, MEAN } type;
635 bool meansvariance_allcats;
637 enum { BONFERRONI = 1, BH } adjust;
661 struct variable *var;
663 struct ctables_summary_spec_set specs[N_CSVS];
667 struct ctables_axis *subs[2];
670 struct msg_location *loc;
673 static void ctables_axis_destroy (struct ctables_axis *);
682 enum ctables_function_availability
684 CTFA_ALL, /* Any variables. */
685 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
686 //CTFA_MRSETS, /* Only multiple-response sets */
689 struct ctables_summary_spec
691 enum ctables_summary_function function;
692 double percentile; /* CTSF_PTILE only. */
695 struct fmt_spec format;
696 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
703 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
704 const struct ctables_summary_spec *src)
707 dst->label = xstrdup_if_nonnull (src->label);
711 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
718 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
719 const struct ctables_summary_spec_set *src)
721 struct ctables_summary_spec *specs
722 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
723 for (size_t i = 0; i < src->n; i++)
724 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
726 *dst = (struct ctables_summary_spec_set) {
731 .is_scale = src->is_scale,
736 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
738 for (size_t i = 0; i < set->n; i++)
739 ctables_summary_spec_uninit (&set->specs[i]);
740 free (set->listwise_vars);
745 parse_col_width (struct lexer *lexer, const char *name, double *width)
747 lex_match (lexer, T_EQUALS);
748 if (lex_match_id (lexer, "DEFAULT"))
750 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
752 *width = lex_number (lexer);
762 parse_bool (struct lexer *lexer, bool *b)
764 if (lex_match_id (lexer, "NO"))
766 else if (lex_match_id (lexer, "YES"))
770 lex_error_expecting (lexer, "YES", "NO");
776 static enum ctables_function_availability
777 ctables_function_availability (enum ctables_summary_function f)
779 static enum ctables_function_availability availability[] = {
780 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
781 #include "ctables.inc"
785 return availability[f];
789 ctables_summary_function_is_count (enum ctables_summary_function f)
791 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
795 parse_ctables_summary_function (struct lexer *lexer,
796 enum ctables_summary_function *f)
800 enum ctables_summary_function function;
801 struct substring name;
803 static struct pair names[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
805 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
806 #include "ctables.inc"
807 /* The .COUNT suffix may be omitted. */
808 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
809 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
810 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
811 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
812 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
813 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
814 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
818 if (!lex_force_id (lexer))
821 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
822 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
824 *f = names[i].function;
829 lex_error (lexer, _("Expecting summary function name."));
834 ctables_axis_destroy (struct ctables_axis *axis)
842 for (size_t i = 0; i < N_CSVS; i++)
843 ctables_summary_spec_set_uninit (&axis->specs[i]);
848 ctables_axis_destroy (axis->subs[0]);
849 ctables_axis_destroy (axis->subs[1]);
852 msg_location_destroy (axis->loc);
856 static struct ctables_axis *
857 ctables_axis_new_nonterminal (enum ctables_axis_op op,
858 struct ctables_axis *sub0,
859 struct ctables_axis *sub1,
860 struct lexer *lexer, int start_ofs)
862 struct ctables_axis *axis = xmalloc (sizeof *axis);
863 *axis = (struct ctables_axis) {
865 .subs = { sub0, sub1 },
866 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
871 struct ctables_axis_parse_ctx
874 struct dictionary *dict;
876 struct ctables_table *t;
879 static struct fmt_spec
880 ctables_summary_default_format (enum ctables_summary_function function,
881 const struct variable *var)
883 static const enum ctables_format default_formats[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
885 #include "ctables.inc"
888 switch (default_formats[function])
891 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
894 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
897 return *var_get_print_format (var);
904 static struct pivot_value *
905 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
909 static const char *default_labels[] = {
910 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
911 #include "ctables.inc"
915 return (spec->function == CTSF_PTILE
916 ? pivot_value_new_text_format (N_("Percentile %.2f"),
918 : pivot_value_new_text (default_labels[spec->function]));
922 struct substring in = ss_cstr (spec->label);
923 struct substring target = ss_cstr (")CILEVEL");
925 struct string out = DS_EMPTY_INITIALIZER;
928 size_t chunk = ss_find_substring (in, target);
929 ds_put_substring (&out, ss_head (in, chunk));
930 ss_advance (&in, chunk);
932 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
934 ss_advance (&in, target.length);
935 ds_put_format (&out, "%g", cilevel);
941 ctables_summary_function_name (enum ctables_summary_function function)
943 static const char *names[] = {
944 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
945 #include "ctables.inc"
948 return names[function];
952 add_summary_spec (struct ctables_axis *axis,
953 enum ctables_summary_function function, double percentile,
954 const char *label, const struct fmt_spec *format,
955 bool is_ctables_format, const struct msg_location *loc,
956 enum ctables_summary_variant sv)
958 if (axis->op == CTAO_VAR)
960 const char *function_name = ctables_summary_function_name (function);
961 const char *var_name = var_get_name (axis->var);
962 switch (ctables_function_availability (function))
966 msg_at (SE, loc, _("Summary function %s applies only to multiple "
967 "response sets."), function_name);
968 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
974 if (!axis->scale && sv != CSV_TOTAL)
977 _("Summary function %s applies only to scale variables."),
979 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
989 struct ctables_summary_spec_set *set = &axis->specs[sv];
990 if (set->n >= set->allocated)
991 set->specs = x2nrealloc (set->specs, &set->allocated,
994 struct ctables_summary_spec *dst = &set->specs[set->n++];
995 *dst = (struct ctables_summary_spec) {
996 .function = function,
997 .percentile = percentile,
998 .label = xstrdup_if_nonnull (label),
999 .format = (format ? *format
1000 : ctables_summary_default_format (function, axis->var)),
1001 .is_ctables_format = is_ctables_format,
1007 for (size_t i = 0; i < 2; i++)
1008 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1009 format, is_ctables_format, loc, sv))
1015 static struct ctables_axis *ctables_axis_parse_stack (
1016 struct ctables_axis_parse_ctx *);
1019 static struct ctables_axis *
1020 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1022 if (lex_match (ctx->lexer, T_LPAREN))
1024 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1025 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1027 ctables_axis_destroy (sub);
1033 if (!lex_force_id (ctx->lexer))
1036 int start_ofs = lex_ofs (ctx->lexer);
1037 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1041 struct ctables_axis *axis = xmalloc (sizeof *axis);
1042 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1044 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1045 : lex_match_phrase (ctx->lexer, "[C]") ? false
1046 : var_get_measure (var) == MEASURE_SCALE);
1047 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1048 lex_ofs (ctx->lexer) - 1);
1049 if (axis->scale && var_is_alpha (var))
1051 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1053 var_get_name (var));
1054 ctables_axis_destroy (axis);
1062 has_digit (const char *s)
1064 return s[strcspn (s, "0123456789")] != '\0';
1068 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1069 bool *is_ctables_format)
1071 char type[FMT_TYPE_LEN_MAX + 1];
1072 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1075 if (!strcasecmp (type, "NEGPAREN"))
1076 format->type = CTEF_NEGPAREN;
1077 else if (!strcasecmp (type, "NEQUAL"))
1078 format->type = CTEF_NEQUAL;
1079 else if (!strcasecmp (type, "PAREN"))
1080 format->type = CTEF_PAREN;
1081 else if (!strcasecmp (type, "PCTPAREN"))
1082 format->type = CTEF_PCTPAREN;
1085 *is_ctables_format = false;
1086 return (parse_format_specifier (lexer, format)
1087 && fmt_check_output (format)
1088 && fmt_check_type_compat (format, VAL_NUMERIC));
1094 lex_next_error (lexer, -1, -1,
1095 _("Output format %s requires width 2 or greater."), type);
1098 else if (format->d > format->w - 1)
1100 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1101 "greater than decimals."), type);
1106 *is_ctables_format = true;
1111 static struct ctables_axis *
1112 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1114 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1115 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1118 enum ctables_summary_variant sv = CSV_CELL;
1121 int start_ofs = lex_ofs (ctx->lexer);
1123 /* Parse function. */
1124 enum ctables_summary_function function;
1125 if (!parse_ctables_summary_function (ctx->lexer, &function))
1128 /* Parse percentile. */
1129 double percentile = 0;
1130 if (function == CTSF_PTILE)
1132 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1134 percentile = lex_number (ctx->lexer);
1135 lex_get (ctx->lexer);
1140 if (lex_is_string (ctx->lexer))
1142 label = ss_xstrdup (lex_tokss (ctx->lexer));
1143 lex_get (ctx->lexer);
1147 struct fmt_spec format;
1148 const struct fmt_spec *formatp;
1149 bool is_ctables_format = false;
1150 if (lex_token (ctx->lexer) == T_ID
1151 && has_digit (lex_tokcstr (ctx->lexer)))
1153 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1154 &is_ctables_format))
1164 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1165 lex_ofs (ctx->lexer) - 1);
1166 add_summary_spec (sub, function, percentile, label, formatp,
1167 is_ctables_format, loc, sv);
1169 msg_location_destroy (loc);
1171 lex_match (ctx->lexer, T_COMMA);
1172 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1174 if (!lex_force_match (ctx->lexer, T_LBRACK))
1178 else if (lex_match (ctx->lexer, T_RBRACK))
1180 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1187 ctables_axis_destroy (sub);
1191 static const struct ctables_axis *
1192 find_scale (const struct ctables_axis *axis)
1196 else if (axis->op == CTAO_VAR)
1197 return axis->scale ? axis : NULL;
1200 for (size_t i = 0; i < 2; i++)
1202 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1210 static const struct ctables_axis *
1211 find_categorical_summary_spec (const struct ctables_axis *axis)
1215 else if (axis->op == CTAO_VAR)
1216 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1219 for (size_t i = 0; i < 2; i++)
1221 const struct ctables_axis *sum
1222 = find_categorical_summary_spec (axis->subs[i]);
1230 static struct ctables_axis *
1231 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1233 int start_ofs = lex_ofs (ctx->lexer);
1234 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1238 while (lex_match (ctx->lexer, T_GT))
1240 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1243 ctables_axis_destroy (lhs);
1247 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1248 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1250 const struct ctables_axis *outer_scale = find_scale (lhs);
1251 const struct ctables_axis *inner_scale = find_scale (rhs);
1252 if (outer_scale && inner_scale)
1254 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1255 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1256 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1257 ctables_axis_destroy (nest);
1261 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1264 msg_at (SE, nest->loc,
1265 _("Summaries may only be requested for categorical variables "
1266 "at the innermost nesting level."));
1267 msg_at (SN, outer_sum->loc,
1268 _("This outer categorical variable has a summary."));
1269 ctables_axis_destroy (nest);
1279 static struct ctables_axis *
1280 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1282 int start_ofs = lex_ofs (ctx->lexer);
1283 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1287 while (lex_match (ctx->lexer, T_PLUS))
1289 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1292 ctables_axis_destroy (lhs);
1296 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1297 ctx->lexer, start_ofs);
1304 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1305 struct ctables *ct, struct ctables_table *t,
1306 enum pivot_axis_type a)
1308 if (lex_token (lexer) == T_BY
1309 || lex_token (lexer) == T_SLASH
1310 || lex_token (lexer) == T_ENDCMD)
1313 struct ctables_axis_parse_ctx ctx = {
1319 t->axes[a] = ctables_axis_parse_stack (&ctx);
1320 return t->axes[a] != NULL;
1324 ctables_chisq_destroy (struct ctables_chisq *chisq)
1330 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1336 ctables_table_destroy (struct ctables_table *t)
1341 for (size_t i = 0; i < t->n_sections; i++)
1342 ctables_section_uninit (&t->sections[i]);
1345 for (size_t i = 0; i < t->n_categories; i++)
1346 ctables_categories_unref (t->categories[i]);
1347 free (t->categories);
1349 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1351 ctables_axis_destroy (t->axes[a]);
1352 ctables_stack_uninit (&t->stacks[a]);
1354 free (t->summary_specs.specs);
1356 struct ctables_value *ctv, *next_ctv;
1357 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1358 &t->clabels_values_map)
1360 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1361 hmap_delete (&t->clabels_values_map, &ctv->node);
1364 hmap_destroy (&t->clabels_values_map);
1365 free (t->clabels_values);
1371 ctables_chisq_destroy (t->chisq);
1372 ctables_pairwise_destroy (t->pairwise);
1377 ctables_destroy (struct ctables *ct)
1382 struct ctables_postcompute *pc, *next_pc;
1383 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1387 msg_location_destroy (pc->location);
1388 ctables_pcexpr_destroy (pc->expr);
1392 ctables_summary_spec_set_uninit (pc->specs);
1395 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1399 fmt_settings_uninit (&ct->ctables_formats);
1400 pivot_table_look_unref (ct->look);
1404 for (size_t i = 0; i < ct->n_tables; i++)
1405 ctables_table_destroy (ct->tables[i]);
1410 static struct ctables_category
1411 cct_nrange (double low, double high)
1413 return (struct ctables_category) {
1415 .nrange = { low, high }
1419 static struct ctables_category
1420 cct_srange (struct substring low, struct substring high)
1422 return (struct ctables_category) {
1424 .srange = { low, high }
1429 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1430 struct ctables_category *cat)
1433 if (lex_match (lexer, T_EQUALS))
1435 if (!lex_force_string (lexer))
1438 total_label = ss_xstrdup (lex_tokss (lexer));
1442 total_label = xstrdup (_("Subtotal"));
1444 *cat = (struct ctables_category) {
1445 .type = CCT_SUBTOTAL,
1446 .hide_subcategories = hide_subcategories,
1447 .total_label = total_label
1452 static struct substring
1453 parse_substring (struct lexer *lexer, struct dictionary *dict)
1455 struct substring s = recode_substring_pool (
1456 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1457 ss_rtrim (&s, ss_cstr (" "));
1463 ctables_table_parse_explicit_category (struct lexer *lexer,
1464 struct dictionary *dict,
1466 struct ctables_category *cat)
1468 if (lex_match_id (lexer, "OTHERNM"))
1469 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1470 else if (lex_match_id (lexer, "MISSING"))
1471 *cat = (struct ctables_category) { .type = CCT_MISSING };
1472 else if (lex_match_id (lexer, "SUBTOTAL"))
1473 return ctables_table_parse_subtotal (lexer, false, cat);
1474 else if (lex_match_id (lexer, "HSUBTOTAL"))
1475 return ctables_table_parse_subtotal (lexer, true, cat);
1476 else if (lex_match_id (lexer, "LO"))
1478 if (!lex_force_match_id (lexer, "THRU"))
1480 if (lex_is_string (lexer))
1482 struct substring sr0 = { .string = NULL };
1483 struct substring sr1 = parse_substring (lexer, dict);
1484 *cat = cct_srange (sr0, sr1);
1486 else if (lex_force_num (lexer))
1488 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1494 else if (lex_is_number (lexer))
1496 double number = lex_number (lexer);
1498 if (lex_match_id (lexer, "THRU"))
1500 if (lex_match_id (lexer, "HI"))
1501 *cat = cct_nrange (number, DBL_MAX);
1504 if (!lex_force_num (lexer))
1506 *cat = cct_nrange (number, lex_number (lexer));
1511 *cat = (struct ctables_category) {
1516 else if (lex_is_string (lexer))
1518 struct substring s = parse_substring (lexer, dict);
1519 if (lex_match_id (lexer, "THRU"))
1521 if (lex_match_id (lexer, "HI"))
1523 struct substring sr1 = { .string = NULL };
1524 *cat = cct_srange (s, sr1);
1528 if (!lex_force_string (lexer))
1533 struct substring sr1 = parse_substring (lexer, dict);
1534 *cat = cct_srange (s, sr1);
1538 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1540 else if (lex_match (lexer, T_AND))
1542 if (!lex_force_id (lexer))
1544 struct ctables_postcompute *pc = ctables_find_postcompute (
1545 ct, lex_tokcstr (lexer));
1548 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1549 msg_at (SE, loc, _("Unknown postcompute &%s."),
1550 lex_tokcstr (lexer));
1551 msg_location_destroy (loc);
1556 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1560 lex_error (lexer, NULL);
1568 parse_category_string (struct msg_location *location,
1569 struct substring s, const struct dictionary *dict,
1570 enum fmt_type format, double *n)
1573 char *error = data_in (s, dict_get_encoding (dict), format,
1574 settings_get_fmt_settings (), &v, 0, NULL);
1577 msg_at (SE, location,
1578 _("Failed to parse category specification as format %s: %s."),
1579 fmt_name (format), error);
1588 static struct ctables_category *
1589 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1590 const struct ctables_pcexpr *e)
1592 struct ctables_category *best = NULL;
1593 size_t n_subtotals = 0;
1594 for (size_t i = 0; i < cats->n_cats; i++)
1596 struct ctables_category *cat = &cats->cats[i];
1599 case CTPO_CAT_NUMBER:
1600 if (cat->type == CCT_NUMBER && cat->number == e->number)
1604 case CTPO_CAT_STRING:
1605 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1609 case CTPO_CAT_NRANGE:
1610 if (cat->type == CCT_NRANGE
1611 && cat->nrange[0] == e->nrange[0]
1612 && cat->nrange[1] == e->nrange[1])
1616 case CTPO_CAT_SRANGE:
1617 if (cat->type == CCT_SRANGE
1618 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1619 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1623 case CTPO_CAT_MISSING:
1624 if (cat->type == CCT_MISSING)
1628 case CTPO_CAT_OTHERNM:
1629 if (cat->type == CCT_OTHERNM)
1633 case CTPO_CAT_SUBTOTAL:
1634 if (cat->type == CCT_SUBTOTAL)
1637 if (e->subtotal_index == n_subtotals)
1639 else if (e->subtotal_index == 0)
1644 case CTPO_CAT_TOTAL:
1645 if (cat->type == CCT_TOTAL)
1659 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1664 static struct ctables_category *
1665 ctables_find_category_for_postcompute (const struct dictionary *dict,
1666 const struct ctables_categories *cats,
1667 enum fmt_type parse_format,
1668 const struct ctables_pcexpr *e)
1670 if (parse_format != FMT_F)
1672 if (e->op == CTPO_CAT_STRING)
1675 if (!parse_category_string (e->location, e->string, dict,
1676 parse_format, &number))
1679 struct ctables_pcexpr e2 = {
1680 .op = CTPO_CAT_NUMBER,
1682 .location = e->location,
1684 return ctables_find_category_for_postcompute__ (cats, &e2);
1686 else if (e->op == CTPO_CAT_SRANGE)
1689 if (!e->srange[0].string)
1690 nrange[0] = -DBL_MAX;
1691 else if (!parse_category_string (e->location, e->srange[0], dict,
1692 parse_format, &nrange[0]))
1695 if (!e->srange[1].string)
1696 nrange[1] = DBL_MAX;
1697 else if (!parse_category_string (e->location, e->srange[1], dict,
1698 parse_format, &nrange[1]))
1701 struct ctables_pcexpr e2 = {
1702 .op = CTPO_CAT_NRANGE,
1703 .nrange = { nrange[0], nrange[1] },
1704 .location = e->location,
1706 return ctables_find_category_for_postcompute__ (cats, &e2);
1709 return ctables_find_category_for_postcompute__ (cats, e);
1713 ctables_recursive_check_postcompute (struct dictionary *dict,
1714 const struct ctables_pcexpr *e,
1715 struct ctables_category *pc_cat,
1716 const struct ctables_categories *cats,
1717 const struct msg_location *cats_location)
1721 case CTPO_CAT_NUMBER:
1722 case CTPO_CAT_STRING:
1723 case CTPO_CAT_NRANGE:
1724 case CTPO_CAT_SRANGE:
1725 case CTPO_CAT_MISSING:
1726 case CTPO_CAT_OTHERNM:
1727 case CTPO_CAT_SUBTOTAL:
1728 case CTPO_CAT_TOTAL:
1730 struct ctables_category *cat = ctables_find_category_for_postcompute (
1731 dict, cats, pc_cat->parse_format, e);
1734 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1736 size_t n_subtotals = 0;
1737 for (size_t i = 0; i < cats->n_cats; i++)
1738 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1739 if (n_subtotals > 1)
1741 msg_at (SE, cats_location,
1742 ngettext ("These categories include %zu instance "
1743 "of SUBTOTAL or HSUBTOTAL, so references "
1744 "from computed categories must refer to "
1745 "subtotals by position, "
1746 "e.g. SUBTOTAL[1].",
1747 "These categories include %zu instances "
1748 "of SUBTOTAL or HSUBTOTAL, so references "
1749 "from computed categories must refer to "
1750 "subtotals by position, "
1751 "e.g. SUBTOTAL[1].",
1754 msg_at (SN, e->location,
1755 _("This is the reference that lacks a position."));
1760 msg_at (SE, pc_cat->location,
1761 _("Computed category &%s references a category not included "
1762 "in the category list."),
1764 msg_at (SN, e->location, _("This is the missing category."));
1765 if (e->op == CTPO_CAT_SUBTOTAL)
1766 msg_at (SN, cats_location,
1767 _("To fix the problem, add subtotals to the "
1768 "list of categories here."));
1769 else if (e->op == CTPO_CAT_TOTAL)
1770 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1771 "CATEGORIES specification."));
1773 msg_at (SN, cats_location,
1774 _("To fix the problem, add the missing category to the "
1775 "list of categories here."));
1778 if (pc_cat->pc->hide_source_cats)
1792 for (size_t i = 0; i < 2; i++)
1793 if (e->subs[i] && !ctables_recursive_check_postcompute (
1794 dict, e->subs[i], pc_cat, cats, cats_location))
1803 all_strings (struct variable **vars, size_t n_vars,
1804 const struct ctables_category *cat)
1806 for (size_t j = 0; j < n_vars; j++)
1807 if (var_is_numeric (vars[j]))
1809 msg_at (SE, cat->location,
1810 _("This category specification may be applied only to string "
1811 "variables, but this subcommand tries to apply it to "
1812 "numeric variable %s."),
1813 var_get_name (vars[j]));
1820 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1821 struct ctables *ct, struct ctables_table *t)
1823 if (!lex_match_id (lexer, "VARIABLES"))
1825 lex_match (lexer, T_EQUALS);
1827 struct variable **vars;
1829 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1832 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1833 for (size_t i = 1; i < n_vars; i++)
1835 const struct fmt_spec *f = var_get_print_format (vars[i]);
1836 if (f->type != common_format->type)
1838 common_format = NULL;
1844 && (fmt_get_category (common_format->type)
1845 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1847 struct ctables_categories *c = xmalloc (sizeof *c);
1848 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1849 for (size_t i = 0; i < n_vars; i++)
1851 struct ctables_categories **cp
1852 = &t->categories[var_get_dict_index (vars[i])];
1853 ctables_categories_unref (*cp);
1857 size_t allocated_cats = 0;
1858 int cats_start_ofs = -1;
1859 int cats_end_ofs = -1;
1860 if (lex_match (lexer, T_LBRACK))
1862 cats_start_ofs = lex_ofs (lexer);
1865 if (c->n_cats >= allocated_cats)
1866 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1868 int start_ofs = lex_ofs (lexer);
1869 struct ctables_category *cat = &c->cats[c->n_cats];
1870 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1872 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1875 lex_match (lexer, T_COMMA);
1877 while (!lex_match (lexer, T_RBRACK));
1878 cats_end_ofs = lex_ofs (lexer) - 1;
1881 struct ctables_category cat = {
1883 .include_missing = false,
1884 .sort_ascending = true,
1886 bool show_totals = false;
1887 char *total_label = NULL;
1888 bool totals_before = false;
1889 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1891 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1893 lex_match (lexer, T_EQUALS);
1894 if (lex_match_id (lexer, "A"))
1895 cat.sort_ascending = true;
1896 else if (lex_match_id (lexer, "D"))
1897 cat.sort_ascending = false;
1900 lex_error_expecting (lexer, "A", "D");
1904 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1906 lex_match (lexer, T_EQUALS);
1907 if (lex_match_id (lexer, "VALUE"))
1908 cat.type = CCT_VALUE;
1909 else if (lex_match_id (lexer, "LABEL"))
1910 cat.type = CCT_LABEL;
1913 cat.type = CCT_FUNCTION;
1914 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1917 if (lex_match (lexer, T_LPAREN))
1919 cat.sort_var = parse_variable (lexer, dict);
1923 if (cat.sort_function == CTSF_PTILE)
1925 lex_match (lexer, T_COMMA);
1926 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1928 cat.percentile = lex_number (lexer);
1932 if (!lex_force_match (lexer, T_RPAREN))
1935 else if (ctables_function_availability (cat.sort_function)
1938 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1943 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1945 lex_match (lexer, T_EQUALS);
1946 if (lex_match_id (lexer, "INCLUDE"))
1947 cat.include_missing = true;
1948 else if (lex_match_id (lexer, "EXCLUDE"))
1949 cat.include_missing = false;
1952 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1956 else if (lex_match_id (lexer, "TOTAL"))
1958 lex_match (lexer, T_EQUALS);
1959 if (!parse_bool (lexer, &show_totals))
1962 else if (lex_match_id (lexer, "LABEL"))
1964 lex_match (lexer, T_EQUALS);
1965 if (!lex_force_string (lexer))
1968 total_label = ss_xstrdup (lex_tokss (lexer));
1971 else if (lex_match_id (lexer, "POSITION"))
1973 lex_match (lexer, T_EQUALS);
1974 if (lex_match_id (lexer, "BEFORE"))
1975 totals_before = true;
1976 else if (lex_match_id (lexer, "AFTER"))
1977 totals_before = false;
1980 lex_error_expecting (lexer, "BEFORE", "AFTER");
1984 else if (lex_match_id (lexer, "EMPTY"))
1986 lex_match (lexer, T_EQUALS);
1987 if (lex_match_id (lexer, "INCLUDE"))
1988 c->show_empty = true;
1989 else if (lex_match_id (lexer, "EXCLUDE"))
1990 c->show_empty = false;
1993 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2000 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2001 "TOTAL", "LABEL", "POSITION", "EMPTY");
2003 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2010 if (c->n_cats >= allocated_cats)
2011 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2012 c->cats[c->n_cats++] = cat;
2017 if (c->n_cats >= allocated_cats)
2018 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2020 struct ctables_category *totals;
2023 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2024 totals = &c->cats[0];
2027 totals = &c->cats[c->n_cats];
2030 *totals = (struct ctables_category) {
2032 .total_label = total_label ? total_label : xstrdup (_("Total")),
2036 struct ctables_category *subtotal = NULL;
2037 for (size_t i = totals_before ? 0 : c->n_cats;
2038 totals_before ? i < c->n_cats : i-- > 0;
2039 totals_before ? i++ : 0)
2041 struct ctables_category *cat = &c->cats[i];
2050 cat->subtotal = subtotal;
2053 case CCT_POSTCOMPUTE:
2064 case CCT_EXCLUDED_MISSING:
2069 if (cats_start_ofs != -1)
2071 for (size_t i = 0; i < c->n_cats; i++)
2073 struct ctables_category *cat = &c->cats[i];
2076 case CCT_POSTCOMPUTE:
2077 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2078 struct msg_location *cats_location
2079 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2080 bool ok = ctables_recursive_check_postcompute (
2081 dict, cat->pc->expr, cat, c, cats_location);
2082 msg_location_destroy (cats_location);
2089 for (size_t j = 0; j < n_vars; j++)
2090 if (var_is_alpha (vars[j]))
2092 msg_at (SE, cat->location,
2093 _("This category specification may be applied "
2094 "only to numeric variables, but this "
2095 "subcommand tries to apply it to string "
2097 var_get_name (vars[j]));
2106 if (!parse_category_string (cat->location, cat->string, dict,
2107 common_format->type, &n))
2110 ss_dealloc (&cat->string);
2112 cat->type = CCT_NUMBER;
2115 else if (!all_strings (vars, n_vars, cat))
2124 if (!cat->srange[0].string)
2126 else if (!parse_category_string (cat->location,
2127 cat->srange[0], dict,
2128 common_format->type, &n[0]))
2131 if (!cat->srange[1].string)
2133 else if (!parse_category_string (cat->location,
2134 cat->srange[1], dict,
2135 common_format->type, &n[1]))
2138 ss_dealloc (&cat->srange[0]);
2139 ss_dealloc (&cat->srange[1]);
2141 cat->type = CCT_NRANGE;
2142 cat->nrange[0] = n[0];
2143 cat->nrange[1] = n[1];
2145 else if (!all_strings (vars, n_vars, cat))
2156 case CCT_EXCLUDED_MISSING:
2171 ctables_nest_uninit (struct ctables_nest *nest)
2174 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2175 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2176 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2177 free (nest->domains[dt]);
2181 ctables_stack_uninit (struct ctables_stack *stack)
2185 for (size_t i = 0; i < stack->n; i++)
2186 ctables_nest_uninit (&stack->nests[i]);
2187 free (stack->nests);
2191 static struct ctables_stack
2192 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2199 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2200 for (size_t i = 0; i < s0.n; i++)
2201 for (size_t j = 0; j < s1.n; j++)
2203 const struct ctables_nest *a = &s0.nests[i];
2204 const struct ctables_nest *b = &s1.nests[j];
2206 size_t allocate = a->n + b->n;
2207 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2209 for (size_t k = 0; k < a->n; k++)
2210 vars[n++] = a->vars[k];
2211 for (size_t k = 0; k < b->n; k++)
2212 vars[n++] = b->vars[k];
2213 assert (n == allocate);
2215 const struct ctables_nest *summary_src;
2216 if (!a->specs[CSV_CELL].var)
2218 else if (!b->specs[CSV_CELL].var)
2223 struct ctables_nest *new = &stack.nests[stack.n++];
2224 *new = (struct ctables_nest) {
2226 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2227 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2231 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2232 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2234 ctables_stack_uninit (&s0);
2235 ctables_stack_uninit (&s1);
2239 static struct ctables_stack
2240 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2242 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2243 for (size_t i = 0; i < s0.n; i++)
2244 stack.nests[stack.n++] = s0.nests[i];
2245 for (size_t i = 0; i < s1.n; i++)
2247 stack.nests[stack.n] = s1.nests[i];
2248 stack.nests[stack.n].group_head += s0.n;
2251 assert (stack.n == s0.n + s1.n);
2257 static struct ctables_stack
2258 var_fts (const struct ctables_axis *a)
2260 struct variable **vars = xmalloc (sizeof *vars);
2263 struct ctables_nest *nest = xmalloc (sizeof *nest);
2264 *nest = (struct ctables_nest) {
2267 .scale_idx = a->scale ? 0 : SIZE_MAX,
2269 if (a->specs[CSV_CELL].n || a->scale)
2270 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2272 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2273 nest->specs[sv].var = a->var;
2274 nest->specs[sv].is_scale = a->scale;
2276 return (struct ctables_stack) { .nests = nest, .n = 1 };
2279 static struct ctables_stack
2280 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2283 return (struct ctables_stack) { .n = 0 };
2291 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2292 enumerate_fts (axis_type, a->subs[1]));
2295 /* This should consider any of the scale variables found in the result to
2296 be linked to each other listwise for SMISSING=LISTWISE. */
2297 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2298 enumerate_fts (axis_type, a->subs[1]));
2304 union ctables_summary
2306 /* COUNT, VALIDN, TOTALN. */
2309 /* MINIMUM, MAXIMUM, RANGE. */
2316 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2317 struct moments1 *moments;
2319 /* MEDIAN, MODE, PTILE. */
2322 struct casewriter *writer;
2327 /* XXX multiple response */
2331 ctables_summary_init (union ctables_summary *s,
2332 const struct ctables_summary_spec *ss)
2334 switch (ss->function)
2338 case CTSF_ROWPCT_COUNT:
2339 case CTSF_COLPCT_COUNT:
2340 case CTSF_TABLEPCT_COUNT:
2341 case CTSF_SUBTABLEPCT_COUNT:
2342 case CTSF_LAYERPCT_COUNT:
2343 case CTSF_LAYERROWPCT_COUNT:
2344 case CTSF_LAYERCOLPCT_COUNT:
2345 case CTSF_ROWPCT_VALIDN:
2346 case CTSF_COLPCT_VALIDN:
2347 case CTSF_TABLEPCT_VALIDN:
2348 case CTSF_SUBTABLEPCT_VALIDN:
2349 case CTSF_LAYERPCT_VALIDN:
2350 case CTSF_LAYERROWPCT_VALIDN:
2351 case CTSF_LAYERCOLPCT_VALIDN:
2352 case CTSF_ROWPCT_TOTALN:
2353 case CTSF_COLPCT_TOTALN:
2354 case CTSF_TABLEPCT_TOTALN:
2355 case CTSF_SUBTABLEPCT_TOTALN:
2356 case CTSF_LAYERPCT_TOTALN:
2357 case CTSF_LAYERROWPCT_TOTALN:
2358 case CTSF_LAYERCOLPCT_TOTALN:
2365 case CTSF_UROWPCT_COUNT:
2366 case CTSF_UCOLPCT_COUNT:
2367 case CTSF_UTABLEPCT_COUNT:
2368 case CTSF_USUBTABLEPCT_COUNT:
2369 case CTSF_ULAYERPCT_COUNT:
2370 case CTSF_ULAYERROWPCT_COUNT:
2371 case CTSF_ULAYERCOLPCT_COUNT:
2372 case CTSF_UROWPCT_VALIDN:
2373 case CTSF_UCOLPCT_VALIDN:
2374 case CTSF_UTABLEPCT_VALIDN:
2375 case CTSF_USUBTABLEPCT_VALIDN:
2376 case CTSF_ULAYERPCT_VALIDN:
2377 case CTSF_ULAYERROWPCT_VALIDN:
2378 case CTSF_ULAYERCOLPCT_VALIDN:
2379 case CTSF_UROWPCT_TOTALN:
2380 case CTSF_UCOLPCT_TOTALN:
2381 case CTSF_UTABLEPCT_TOTALN:
2382 case CTSF_USUBTABLEPCT_TOTALN:
2383 case CTSF_ULAYERPCT_TOTALN:
2384 case CTSF_ULAYERROWPCT_TOTALN:
2385 case CTSF_ULAYERCOLPCT_TOTALN:
2395 case CTSF_SUBTABLE_ID:
2397 case CTSF_LAYERROW_ID:
2398 case CTSF_LAYERCOL_ID:
2404 s->min = s->max = SYSMIS;
2412 case CTSF_ROWPCT_SUM:
2413 case CTSF_COLPCT_SUM:
2414 case CTSF_TABLEPCT_SUM:
2415 case CTSF_SUBTABLEPCT_SUM:
2416 case CTSF_LAYERPCT_SUM:
2417 case CTSF_LAYERROWPCT_SUM:
2418 case CTSF_LAYERCOLPCT_SUM:
2423 case CTSF_UVARIANCE:
2424 case CTSF_UROWPCT_SUM:
2425 case CTSF_UCOLPCT_SUM:
2426 case CTSF_UTABLEPCT_SUM:
2427 case CTSF_USUBTABLEPCT_SUM:
2428 case CTSF_ULAYERPCT_SUM:
2429 case CTSF_ULAYERROWPCT_SUM:
2430 case CTSF_ULAYERCOLPCT_SUM:
2431 s->moments = moments1_create (MOMENT_VARIANCE);
2441 struct caseproto *proto = caseproto_create ();
2442 proto = caseproto_add_width (proto, 0);
2443 proto = caseproto_add_width (proto, 0);
2445 struct subcase ordering;
2446 subcase_init (&ordering, 0, 0, SC_ASCEND);
2447 s->writer = sort_create_writer (&ordering, proto);
2448 subcase_uninit (&ordering);
2449 caseproto_unref (proto);
2459 ctables_summary_uninit (union ctables_summary *s,
2460 const struct ctables_summary_spec *ss)
2462 switch (ss->function)
2466 case CTSF_ROWPCT_COUNT:
2467 case CTSF_COLPCT_COUNT:
2468 case CTSF_TABLEPCT_COUNT:
2469 case CTSF_SUBTABLEPCT_COUNT:
2470 case CTSF_LAYERPCT_COUNT:
2471 case CTSF_LAYERROWPCT_COUNT:
2472 case CTSF_LAYERCOLPCT_COUNT:
2473 case CTSF_ROWPCT_VALIDN:
2474 case CTSF_COLPCT_VALIDN:
2475 case CTSF_TABLEPCT_VALIDN:
2476 case CTSF_SUBTABLEPCT_VALIDN:
2477 case CTSF_LAYERPCT_VALIDN:
2478 case CTSF_LAYERROWPCT_VALIDN:
2479 case CTSF_LAYERCOLPCT_VALIDN:
2480 case CTSF_ROWPCT_TOTALN:
2481 case CTSF_COLPCT_TOTALN:
2482 case CTSF_TABLEPCT_TOTALN:
2483 case CTSF_SUBTABLEPCT_TOTALN:
2484 case CTSF_LAYERPCT_TOTALN:
2485 case CTSF_LAYERROWPCT_TOTALN:
2486 case CTSF_LAYERCOLPCT_TOTALN:
2493 case CTSF_UROWPCT_COUNT:
2494 case CTSF_UCOLPCT_COUNT:
2495 case CTSF_UTABLEPCT_COUNT:
2496 case CTSF_USUBTABLEPCT_COUNT:
2497 case CTSF_ULAYERPCT_COUNT:
2498 case CTSF_ULAYERROWPCT_COUNT:
2499 case CTSF_ULAYERCOLPCT_COUNT:
2500 case CTSF_UROWPCT_VALIDN:
2501 case CTSF_UCOLPCT_VALIDN:
2502 case CTSF_UTABLEPCT_VALIDN:
2503 case CTSF_USUBTABLEPCT_VALIDN:
2504 case CTSF_ULAYERPCT_VALIDN:
2505 case CTSF_ULAYERROWPCT_VALIDN:
2506 case CTSF_ULAYERCOLPCT_VALIDN:
2507 case CTSF_UROWPCT_TOTALN:
2508 case CTSF_UCOLPCT_TOTALN:
2509 case CTSF_UTABLEPCT_TOTALN:
2510 case CTSF_USUBTABLEPCT_TOTALN:
2511 case CTSF_ULAYERPCT_TOTALN:
2512 case CTSF_ULAYERROWPCT_TOTALN:
2513 case CTSF_ULAYERCOLPCT_TOTALN:
2522 case CTSF_SUBTABLE_ID:
2524 case CTSF_LAYERROW_ID:
2525 case CTSF_LAYERCOL_ID:
2538 case CTSF_ROWPCT_SUM:
2539 case CTSF_COLPCT_SUM:
2540 case CTSF_TABLEPCT_SUM:
2541 case CTSF_SUBTABLEPCT_SUM:
2542 case CTSF_LAYERPCT_SUM:
2543 case CTSF_LAYERROWPCT_SUM:
2544 case CTSF_LAYERCOLPCT_SUM:
2549 case CTSF_UVARIANCE:
2550 case CTSF_UROWPCT_SUM:
2551 case CTSF_UCOLPCT_SUM:
2552 case CTSF_UTABLEPCT_SUM:
2553 case CTSF_USUBTABLEPCT_SUM:
2554 case CTSF_ULAYERPCT_SUM:
2555 case CTSF_ULAYERROWPCT_SUM:
2556 case CTSF_ULAYERCOLPCT_SUM:
2557 moments1_destroy (s->moments);
2566 casewriter_destroy (s->writer);
2572 ctables_summary_add (union ctables_summary *s,
2573 const struct ctables_summary_spec *ss,
2574 const struct variable *var, const union value *value,
2575 bool is_scale, bool is_scale_missing,
2576 bool is_missing, bool excluded_missing,
2577 double d_weight, double e_weight)
2579 /* To determine whether a case is included in a given table for a particular
2580 kind of summary, consider the following charts for each variable in the
2581 table. Only if "yes" appears for every variable for the summary is the
2584 Categorical variables: VALIDN COUNT TOTALN
2585 Valid values in included categories yes yes yes
2586 Missing values in included categories --- yes yes
2587 Missing values in excluded categories --- --- yes
2588 Valid values in excluded categories --- --- ---
2590 Scale variables: VALIDN COUNT TOTALN
2591 Valid value yes yes yes
2592 Missing value --- yes yes
2594 Missing values include both user- and system-missing. (The system-missing
2595 value is always in an excluded category.)
2597 switch (ss->function)
2600 case CTSF_ROWPCT_TOTALN:
2601 case CTSF_COLPCT_TOTALN:
2602 case CTSF_TABLEPCT_TOTALN:
2603 case CTSF_SUBTABLEPCT_TOTALN:
2604 case CTSF_LAYERPCT_TOTALN:
2605 case CTSF_LAYERROWPCT_TOTALN:
2606 case CTSF_LAYERCOLPCT_TOTALN:
2607 s->count += d_weight;
2611 case CTSF_UROWPCT_TOTALN:
2612 case CTSF_UCOLPCT_TOTALN:
2613 case CTSF_UTABLEPCT_TOTALN:
2614 case CTSF_USUBTABLEPCT_TOTALN:
2615 case CTSF_ULAYERPCT_TOTALN:
2616 case CTSF_ULAYERROWPCT_TOTALN:
2617 case CTSF_ULAYERCOLPCT_TOTALN:
2622 case CTSF_ROWPCT_COUNT:
2623 case CTSF_COLPCT_COUNT:
2624 case CTSF_TABLEPCT_COUNT:
2625 case CTSF_SUBTABLEPCT_COUNT:
2626 case CTSF_LAYERPCT_COUNT:
2627 case CTSF_LAYERROWPCT_COUNT:
2628 case CTSF_LAYERCOLPCT_COUNT:
2629 if (is_scale || !excluded_missing)
2630 s->count += d_weight;
2634 case CTSF_UROWPCT_COUNT:
2635 case CTSF_UCOLPCT_COUNT:
2636 case CTSF_UTABLEPCT_COUNT:
2637 case CTSF_USUBTABLEPCT_COUNT:
2638 case CTSF_ULAYERPCT_COUNT:
2639 case CTSF_ULAYERROWPCT_COUNT:
2640 case CTSF_ULAYERCOLPCT_COUNT:
2641 if (is_scale || !excluded_missing)
2646 case CTSF_ROWPCT_VALIDN:
2647 case CTSF_COLPCT_VALIDN:
2648 case CTSF_TABLEPCT_VALIDN:
2649 case CTSF_SUBTABLEPCT_VALIDN:
2650 case CTSF_LAYERPCT_VALIDN:
2651 case CTSF_LAYERROWPCT_VALIDN:
2652 case CTSF_LAYERCOLPCT_VALIDN:
2656 s->count += d_weight;
2660 case CTSF_UROWPCT_VALIDN:
2661 case CTSF_UCOLPCT_VALIDN:
2662 case CTSF_UTABLEPCT_VALIDN:
2663 case CTSF_USUBTABLEPCT_VALIDN:
2664 case CTSF_ULAYERPCT_VALIDN:
2665 case CTSF_ULAYERROWPCT_VALIDN:
2666 case CTSF_ULAYERCOLPCT_VALIDN:
2676 case CTSF_SUBTABLE_ID:
2678 case CTSF_LAYERROW_ID:
2679 case CTSF_LAYERCOL_ID:
2686 s->count += d_weight;
2697 if (is_scale || !excluded_missing)
2698 s->count += e_weight;
2705 s->count += e_weight;
2709 s->count += e_weight;
2715 if (!is_scale_missing)
2717 assert (!var_is_alpha (var)); /* XXX? */
2718 if (s->min == SYSMIS || value->f < s->min)
2720 if (s->max == SYSMIS || value->f > s->max)
2730 case CTSF_ROWPCT_SUM:
2731 case CTSF_COLPCT_SUM:
2732 case CTSF_TABLEPCT_SUM:
2733 case CTSF_SUBTABLEPCT_SUM:
2734 case CTSF_LAYERPCT_SUM:
2735 case CTSF_LAYERROWPCT_SUM:
2736 case CTSF_LAYERCOLPCT_SUM:
2737 if (!is_scale_missing)
2738 moments1_add (s->moments, value->f, e_weight);
2745 case CTSF_UVARIANCE:
2746 case CTSF_UROWPCT_SUM:
2747 case CTSF_UCOLPCT_SUM:
2748 case CTSF_UTABLEPCT_SUM:
2749 case CTSF_USUBTABLEPCT_SUM:
2750 case CTSF_ULAYERPCT_SUM:
2751 case CTSF_ULAYERROWPCT_SUM:
2752 case CTSF_ULAYERCOLPCT_SUM:
2753 if (!is_scale_missing)
2754 moments1_add (s->moments, value->f, 1.0);
2760 d_weight = e_weight = 1.0;
2765 if (!is_scale_missing)
2767 s->ovalid += e_weight;
2769 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2770 *case_num_rw_idx (c, 0) = value->f;
2771 *case_num_rw_idx (c, 1) = e_weight;
2772 casewriter_write (s->writer, c);
2778 static enum ctables_domain_type
2779 ctables_function_domain (enum ctables_summary_function function)
2809 case CTSF_UVARIANCE:
2815 case CTSF_COLPCT_COUNT:
2816 case CTSF_COLPCT_SUM:
2817 case CTSF_COLPCT_TOTALN:
2818 case CTSF_COLPCT_VALIDN:
2819 case CTSF_UCOLPCT_COUNT:
2820 case CTSF_UCOLPCT_SUM:
2821 case CTSF_UCOLPCT_TOTALN:
2822 case CTSF_UCOLPCT_VALIDN:
2826 case CTSF_LAYERCOLPCT_COUNT:
2827 case CTSF_LAYERCOLPCT_SUM:
2828 case CTSF_LAYERCOLPCT_TOTALN:
2829 case CTSF_LAYERCOLPCT_VALIDN:
2830 case CTSF_ULAYERCOLPCT_COUNT:
2831 case CTSF_ULAYERCOLPCT_SUM:
2832 case CTSF_ULAYERCOLPCT_TOTALN:
2833 case CTSF_ULAYERCOLPCT_VALIDN:
2834 case CTSF_LAYERCOL_ID:
2835 return CTDT_LAYERCOL;
2837 case CTSF_LAYERPCT_COUNT:
2838 case CTSF_LAYERPCT_SUM:
2839 case CTSF_LAYERPCT_TOTALN:
2840 case CTSF_LAYERPCT_VALIDN:
2841 case CTSF_ULAYERPCT_COUNT:
2842 case CTSF_ULAYERPCT_SUM:
2843 case CTSF_ULAYERPCT_TOTALN:
2844 case CTSF_ULAYERPCT_VALIDN:
2848 case CTSF_LAYERROWPCT_COUNT:
2849 case CTSF_LAYERROWPCT_SUM:
2850 case CTSF_LAYERROWPCT_TOTALN:
2851 case CTSF_LAYERROWPCT_VALIDN:
2852 case CTSF_ULAYERROWPCT_COUNT:
2853 case CTSF_ULAYERROWPCT_SUM:
2854 case CTSF_ULAYERROWPCT_TOTALN:
2855 case CTSF_ULAYERROWPCT_VALIDN:
2856 case CTSF_LAYERROW_ID:
2857 return CTDT_LAYERROW;
2859 case CTSF_ROWPCT_COUNT:
2860 case CTSF_ROWPCT_SUM:
2861 case CTSF_ROWPCT_TOTALN:
2862 case CTSF_ROWPCT_VALIDN:
2863 case CTSF_UROWPCT_COUNT:
2864 case CTSF_UROWPCT_SUM:
2865 case CTSF_UROWPCT_TOTALN:
2866 case CTSF_UROWPCT_VALIDN:
2870 case CTSF_SUBTABLEPCT_COUNT:
2871 case CTSF_SUBTABLEPCT_SUM:
2872 case CTSF_SUBTABLEPCT_TOTALN:
2873 case CTSF_SUBTABLEPCT_VALIDN:
2874 case CTSF_USUBTABLEPCT_COUNT:
2875 case CTSF_USUBTABLEPCT_SUM:
2876 case CTSF_USUBTABLEPCT_TOTALN:
2877 case CTSF_USUBTABLEPCT_VALIDN:
2878 case CTSF_SUBTABLE_ID:
2879 return CTDT_SUBTABLE;
2881 case CTSF_TABLEPCT_COUNT:
2882 case CTSF_TABLEPCT_SUM:
2883 case CTSF_TABLEPCT_TOTALN:
2884 case CTSF_TABLEPCT_VALIDN:
2885 case CTSF_UTABLEPCT_COUNT:
2886 case CTSF_UTABLEPCT_SUM:
2887 case CTSF_UTABLEPCT_TOTALN:
2888 case CTSF_UTABLEPCT_VALIDN:
2896 static enum ctables_domain_type
2897 ctables_function_is_pctsum (enum ctables_summary_function function)
2927 case CTSF_UVARIANCE:
2931 case CTSF_COLPCT_COUNT:
2932 case CTSF_COLPCT_TOTALN:
2933 case CTSF_COLPCT_VALIDN:
2934 case CTSF_UCOLPCT_COUNT:
2935 case CTSF_UCOLPCT_TOTALN:
2936 case CTSF_UCOLPCT_VALIDN:
2937 case CTSF_LAYERCOLPCT_COUNT:
2938 case CTSF_LAYERCOLPCT_TOTALN:
2939 case CTSF_LAYERCOLPCT_VALIDN:
2940 case CTSF_ULAYERCOLPCT_COUNT:
2941 case CTSF_ULAYERCOLPCT_TOTALN:
2942 case CTSF_ULAYERCOLPCT_VALIDN:
2943 case CTSF_LAYERPCT_COUNT:
2944 case CTSF_LAYERPCT_TOTALN:
2945 case CTSF_LAYERPCT_VALIDN:
2946 case CTSF_ULAYERPCT_COUNT:
2947 case CTSF_ULAYERPCT_TOTALN:
2948 case CTSF_ULAYERPCT_VALIDN:
2949 case CTSF_LAYERROWPCT_COUNT:
2950 case CTSF_LAYERROWPCT_TOTALN:
2951 case CTSF_LAYERROWPCT_VALIDN:
2952 case CTSF_ULAYERROWPCT_COUNT:
2953 case CTSF_ULAYERROWPCT_TOTALN:
2954 case CTSF_ULAYERROWPCT_VALIDN:
2955 case CTSF_ROWPCT_COUNT:
2956 case CTSF_ROWPCT_TOTALN:
2957 case CTSF_ROWPCT_VALIDN:
2958 case CTSF_UROWPCT_COUNT:
2959 case CTSF_UROWPCT_TOTALN:
2960 case CTSF_UROWPCT_VALIDN:
2961 case CTSF_SUBTABLEPCT_COUNT:
2962 case CTSF_SUBTABLEPCT_TOTALN:
2963 case CTSF_SUBTABLEPCT_VALIDN:
2964 case CTSF_USUBTABLEPCT_COUNT:
2965 case CTSF_USUBTABLEPCT_TOTALN:
2966 case CTSF_USUBTABLEPCT_VALIDN:
2967 case CTSF_TABLEPCT_COUNT:
2968 case CTSF_TABLEPCT_TOTALN:
2969 case CTSF_TABLEPCT_VALIDN:
2970 case CTSF_UTABLEPCT_COUNT:
2971 case CTSF_UTABLEPCT_TOTALN:
2972 case CTSF_UTABLEPCT_VALIDN:
2976 case CTSF_SUBTABLE_ID:
2978 case CTSF_LAYERROW_ID:
2979 case CTSF_LAYERCOL_ID:
2982 case CTSF_COLPCT_SUM:
2983 case CTSF_UCOLPCT_SUM:
2984 case CTSF_LAYERCOLPCT_SUM:
2985 case CTSF_ULAYERCOLPCT_SUM:
2986 case CTSF_LAYERPCT_SUM:
2987 case CTSF_ULAYERPCT_SUM:
2988 case CTSF_LAYERROWPCT_SUM:
2989 case CTSF_ULAYERROWPCT_SUM:
2990 case CTSF_ROWPCT_SUM:
2991 case CTSF_UROWPCT_SUM:
2992 case CTSF_SUBTABLEPCT_SUM:
2993 case CTSF_USUBTABLEPCT_SUM:
2994 case CTSF_TABLEPCT_SUM:
2995 case CTSF_UTABLEPCT_SUM:
3003 ctables_summary_value (const struct ctables_cell *cell,
3004 union ctables_summary *s,
3005 const struct ctables_summary_spec *ss)
3007 switch (ss->function)
3017 case CTSF_SUBTABLE_ID:
3019 case CTSF_LAYERROW_ID:
3020 case CTSF_LAYERCOL_ID:
3021 return cell->domains[ctables_function_domain (ss->function)]->sequence;
3023 case CTSF_ROWPCT_COUNT:
3024 case CTSF_COLPCT_COUNT:
3025 case CTSF_TABLEPCT_COUNT:
3026 case CTSF_SUBTABLEPCT_COUNT:
3027 case CTSF_LAYERPCT_COUNT:
3028 case CTSF_LAYERROWPCT_COUNT:
3029 case CTSF_LAYERCOLPCT_COUNT:
3031 enum ctables_domain_type d = ctables_function_domain (ss->function);
3032 return (cell->domains[d]->e_count
3033 ? s->count / cell->domains[d]->e_count * 100
3037 case CTSF_UROWPCT_COUNT:
3038 case CTSF_UCOLPCT_COUNT:
3039 case CTSF_UTABLEPCT_COUNT:
3040 case CTSF_USUBTABLEPCT_COUNT:
3041 case CTSF_ULAYERPCT_COUNT:
3042 case CTSF_ULAYERROWPCT_COUNT:
3043 case CTSF_ULAYERCOLPCT_COUNT:
3045 enum ctables_domain_type d = ctables_function_domain (ss->function);
3046 return (cell->domains[d]->u_count
3047 ? s->count / cell->domains[d]->u_count * 100
3051 case CTSF_ROWPCT_VALIDN:
3052 case CTSF_COLPCT_VALIDN:
3053 case CTSF_TABLEPCT_VALIDN:
3054 case CTSF_SUBTABLEPCT_VALIDN:
3055 case CTSF_LAYERPCT_VALIDN:
3056 case CTSF_LAYERROWPCT_VALIDN:
3057 case CTSF_LAYERCOLPCT_VALIDN:
3059 enum ctables_domain_type d = ctables_function_domain (ss->function);
3060 return (cell->domains[d]->e_valid
3061 ? s->count / cell->domains[d]->e_valid * 100
3065 case CTSF_UROWPCT_VALIDN:
3066 case CTSF_UCOLPCT_VALIDN:
3067 case CTSF_UTABLEPCT_VALIDN:
3068 case CTSF_USUBTABLEPCT_VALIDN:
3069 case CTSF_ULAYERPCT_VALIDN:
3070 case CTSF_ULAYERROWPCT_VALIDN:
3071 case CTSF_ULAYERCOLPCT_VALIDN:
3073 enum ctables_domain_type d = ctables_function_domain (ss->function);
3074 return (cell->domains[d]->u_valid
3075 ? s->count / cell->domains[d]->u_valid * 100
3079 case CTSF_ROWPCT_TOTALN:
3080 case CTSF_COLPCT_TOTALN:
3081 case CTSF_TABLEPCT_TOTALN:
3082 case CTSF_SUBTABLEPCT_TOTALN:
3083 case CTSF_LAYERPCT_TOTALN:
3084 case CTSF_LAYERROWPCT_TOTALN:
3085 case CTSF_LAYERCOLPCT_TOTALN:
3087 enum ctables_domain_type d = ctables_function_domain (ss->function);
3088 return (cell->domains[d]->e_total
3089 ? s->count / cell->domains[d]->e_total * 100
3093 case CTSF_UROWPCT_TOTALN:
3094 case CTSF_UCOLPCT_TOTALN:
3095 case CTSF_UTABLEPCT_TOTALN:
3096 case CTSF_USUBTABLEPCT_TOTALN:
3097 case CTSF_ULAYERPCT_TOTALN:
3098 case CTSF_ULAYERROWPCT_TOTALN:
3099 case CTSF_ULAYERCOLPCT_TOTALN:
3101 enum ctables_domain_type d = ctables_function_domain (ss->function);
3102 return (cell->domains[d]->u_total
3103 ? s->count / cell->domains[d]->u_total * 100
3124 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3130 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3137 double weight, variance;
3138 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3139 return calc_semean (variance, weight);
3146 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3147 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3153 double weight, mean;
3154 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3155 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3159 case CTSF_UVARIANCE:
3162 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3166 case CTSF_ROWPCT_SUM:
3167 case CTSF_COLPCT_SUM:
3168 case CTSF_TABLEPCT_SUM:
3169 case CTSF_SUBTABLEPCT_SUM:
3170 case CTSF_LAYERPCT_SUM:
3171 case CTSF_LAYERROWPCT_SUM:
3172 case CTSF_LAYERCOLPCT_SUM:
3174 double weight, mean;
3175 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3176 if (weight == SYSMIS || mean == SYSMIS)
3178 enum ctables_domain_type d = ctables_function_domain (ss->function);
3179 double num = weight * mean;
3180 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3181 return denom != 0 ? num / denom * 100 : SYSMIS;
3183 case CTSF_UROWPCT_SUM:
3184 case CTSF_UCOLPCT_SUM:
3185 case CTSF_UTABLEPCT_SUM:
3186 case CTSF_USUBTABLEPCT_SUM:
3187 case CTSF_ULAYERPCT_SUM:
3188 case CTSF_ULAYERROWPCT_SUM:
3189 case CTSF_ULAYERCOLPCT_SUM:
3191 double weight, mean;
3192 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3193 if (weight == SYSMIS || mean == SYSMIS)
3195 enum ctables_domain_type d = ctables_function_domain (ss->function);
3196 double num = weight * mean;
3197 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3198 return denom != 0 ? num / denom * 100 : SYSMIS;
3207 struct casereader *reader = casewriter_make_reader (s->writer);
3210 struct percentile *ptile = percentile_create (
3211 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3212 struct order_stats *os = &ptile->parent;
3213 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3214 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3215 statistic_destroy (&ptile->parent.parent);
3223 struct casereader *reader = casewriter_make_reader (s->writer);
3226 struct mode *mode = mode_create ();
3227 struct order_stats *os = &mode->parent;
3228 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3229 s->ovalue = mode->mode;
3230 statistic_destroy (&mode->parent.parent);
3238 struct ctables_cell_sort_aux
3240 const struct ctables_nest *nest;
3241 enum pivot_axis_type a;
3245 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3247 const struct ctables_cell_sort_aux *aux = aux_;
3248 struct ctables_cell *const *ap = a_;
3249 struct ctables_cell *const *bp = b_;
3250 const struct ctables_cell *a = *ap;
3251 const struct ctables_cell *b = *bp;
3253 const struct ctables_nest *nest = aux->nest;
3254 for (size_t i = 0; i < nest->n; i++)
3255 if (i != nest->scale_idx)
3257 const struct variable *var = nest->vars[i];
3258 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3259 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3260 if (a_cv->category != b_cv->category)
3261 return a_cv->category > b_cv->category ? 1 : -1;
3263 const union value *a_val = &a_cv->value;
3264 const union value *b_val = &b_cv->value;
3265 switch (a_cv->category->type)
3271 case CCT_POSTCOMPUTE:
3272 case CCT_EXCLUDED_MISSING:
3273 /* Must be equal. */
3281 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3289 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3291 return a_cv->category->sort_ascending ? cmp : -cmp;
3297 const char *a_label = var_lookup_value_label (var, a_val);
3298 const char *b_label = var_lookup_value_label (var, b_val);
3304 cmp = strcmp (a_label, b_label);
3310 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3313 return a_cv->category->sort_ascending ? cmp : -cmp;
3325 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3326 const void *aux UNUSED)
3328 struct ctables_cell *const *ap = a_;
3329 struct ctables_cell *const *bp = b_;
3330 const struct ctables_cell *a = *ap;
3331 const struct ctables_cell *b = *bp;
3333 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3335 int al = a->axes[axis].leaf;
3336 int bl = b->axes[axis].leaf;
3338 return al > bl ? 1 : -1;
3346 For each ctables_table:
3347 For each combination of row vars:
3348 For each combination of column vars:
3349 For each combination of layer vars:
3351 Make a table of row values:
3352 Sort entries by row values
3353 Assign a 0-based index to each actual value
3354 Construct a dimension
3355 Make a table of column values
3356 Make a table of layer values
3358 Fill the table entry using the indexes from before.
3361 static struct ctables_domain *
3362 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3363 enum ctables_domain_type domain)
3366 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3368 const struct ctables_nest *nest = s->nests[a];
3369 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3371 size_t v_idx = nest->domains[domain][i];
3372 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3373 hash = hash_pointer (cv->category, hash);
3374 if (cv->category->type != CCT_TOTAL
3375 && cv->category->type != CCT_SUBTOTAL
3376 && cv->category->type != CCT_POSTCOMPUTE)
3377 hash = value_hash (&cv->value,
3378 var_get_width (nest->vars[v_idx]), hash);
3382 struct ctables_domain *d;
3383 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3385 const struct ctables_cell *df = d->example;
3386 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3388 const struct ctables_nest *nest = s->nests[a];
3389 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3391 size_t v_idx = nest->domains[domain][i];
3392 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3393 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3394 if (cv1->category != cv2->category
3395 || (cv1->category->type != CCT_TOTAL
3396 && cv1->category->type != CCT_SUBTOTAL
3397 && cv1->category->type != CCT_POSTCOMPUTE
3398 && !value_equal (&cv1->value, &cv2->value,
3399 var_get_width (nest->vars[v_idx]))))
3408 struct ctables_sum *sums = (s->table->n_sum_vars
3409 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3412 d = xmalloc (sizeof *d);
3413 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3414 hmap_insert (&s->domains[domain], &d->node, hash);
3418 static struct substring
3419 rtrim_value (const union value *v, const struct variable *var)
3421 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3422 var_get_width (var));
3423 ss_rtrim (&s, ss_cstr (" "));
3428 in_string_range (const union value *v, const struct variable *var,
3429 const struct substring *srange)
3431 struct substring s = rtrim_value (v, var);
3432 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3433 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3436 static const struct ctables_category *
3437 ctables_categories_match (const struct ctables_categories *c,
3438 const union value *v, const struct variable *var)
3440 if (var_is_numeric (var) && v->f == SYSMIS)
3443 const struct ctables_category *othernm = NULL;
3444 for (size_t i = c->n_cats; i-- > 0; )
3446 const struct ctables_category *cat = &c->cats[i];
3450 if (cat->number == v->f)
3455 if (ss_equals (cat->string, rtrim_value (v, var)))
3460 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3461 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3466 if (in_string_range (v, var, cat->srange))
3471 if (var_is_value_missing (var, v))
3475 case CCT_POSTCOMPUTE:
3490 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3493 case CCT_EXCLUDED_MISSING:
3498 return var_is_value_missing (var, v) ? NULL : othernm;
3501 static const struct ctables_category *
3502 ctables_categories_total (const struct ctables_categories *c)
3504 const struct ctables_category *first = &c->cats[0];
3505 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3506 return (first->type == CCT_TOTAL ? first
3507 : last->type == CCT_TOTAL ? last
3511 static struct ctables_cell *
3512 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3513 const struct ctables_category *cats[PIVOT_N_AXES][10])
3516 enum ctables_summary_variant sv = CSV_CELL;
3517 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3519 const struct ctables_nest *nest = s->nests[a];
3520 for (size_t i = 0; i < nest->n; i++)
3521 if (i != nest->scale_idx)
3523 hash = hash_pointer (cats[a][i], hash);
3524 if (cats[a][i]->type != CCT_TOTAL
3525 && cats[a][i]->type != CCT_SUBTOTAL
3526 && cats[a][i]->type != CCT_POSTCOMPUTE)
3527 hash = value_hash (case_data (c, nest->vars[i]),
3528 var_get_width (nest->vars[i]), hash);
3534 struct ctables_cell *cell;
3535 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3537 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3539 const struct ctables_nest *nest = s->nests[a];
3540 for (size_t i = 0; i < nest->n; i++)
3541 if (i != nest->scale_idx
3542 && (cats[a][i] != cell->axes[a].cvs[i].category
3543 || (cats[a][i]->type != CCT_TOTAL
3544 && cats[a][i]->type != CCT_SUBTOTAL
3545 && cats[a][i]->type != CCT_POSTCOMPUTE
3546 && !value_equal (case_data (c, nest->vars[i]),
3547 &cell->axes[a].cvs[i].value,
3548 var_get_width (nest->vars[i])))))
3557 cell = xmalloc (sizeof *cell);
3560 cell->omit_domains = 0;
3561 cell->postcompute = false;
3562 //struct string name = DS_EMPTY_INITIALIZER;
3563 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3565 const struct ctables_nest *nest = s->nests[a];
3566 cell->axes[a].cvs = (nest->n
3567 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3569 for (size_t i = 0; i < nest->n; i++)
3571 const struct ctables_category *cat = cats[a][i];
3572 const struct variable *var = nest->vars[i];
3573 const union value *value = case_data (c, var);
3574 if (i != nest->scale_idx)
3576 const struct ctables_category *subtotal = cat->subtotal;
3577 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3580 if (cat->type == CCT_TOTAL
3581 || cat->type == CCT_SUBTOTAL
3582 || cat->type == CCT_POSTCOMPUTE)
3584 /* XXX these should be more encompassing I think.*/
3588 case PIVOT_AXIS_COLUMN:
3589 cell->omit_domains |= ((1u << CTDT_TABLE) |
3590 (1u << CTDT_LAYER) |
3591 (1u << CTDT_LAYERCOL) |
3592 (1u << CTDT_SUBTABLE) |
3595 case PIVOT_AXIS_ROW:
3596 cell->omit_domains |= ((1u << CTDT_TABLE) |
3597 (1u << CTDT_LAYER) |
3598 (1u << CTDT_LAYERROW) |
3599 (1u << CTDT_SUBTABLE) |
3602 case PIVOT_AXIS_LAYER:
3603 cell->omit_domains |= ((1u << CTDT_TABLE) |
3604 (1u << CTDT_LAYER));
3608 if (cat->type == CCT_POSTCOMPUTE)
3609 cell->postcompute = true;
3612 cell->axes[a].cvs[i].category = cat;
3613 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3616 if (i != nest->scale_idx)
3618 if (!ds_is_empty (&name))
3619 ds_put_cstr (&name, ", ");
3620 char *value_s = data_out (value, var_get_encoding (var),
3621 var_get_print_format (var),
3622 settings_get_fmt_settings ());
3623 if (cat->type == CCT_TOTAL
3624 || cat->type == CCT_SUBTOTAL
3625 || cat->type == CCT_POSTCOMPUTE)
3626 ds_put_format (&name, "%s=total", var_get_name (var));
3628 ds_put_format (&name, "%s=%s", var_get_name (var),
3629 value_s + strspn (value_s, " "));
3635 //cell->name = ds_steal_cstr (&name);
3637 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3638 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3639 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3640 for (size_t i = 0; i < specs->n; i++)
3641 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3642 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3643 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3644 hmap_insert (&s->cells, &cell->node, hash);
3649 is_scale_missing (const struct ctables_summary_spec_set *specs,
3650 const struct ccase *c)
3652 if (!specs->is_scale)
3655 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3658 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3660 const struct variable *var = specs->listwise_vars[i];
3661 if (var_is_num_missing (var, case_num (c, var)))
3669 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3670 const struct ctables_category *cats[PIVOT_N_AXES][10],
3671 bool is_missing, bool excluded_missing,
3672 double d_weight, double e_weight)
3674 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3675 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3677 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3679 bool scale_missing = is_scale_missing (specs, c);
3680 for (size_t i = 0; i < specs->n; i++)
3681 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3682 specs->var, case_data (c, specs->var), specs->is_scale,
3683 scale_missing, is_missing, excluded_missing,
3684 d_weight, e_weight);
3685 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3686 if (!(cell->omit_domains && (1u << dt)))
3688 struct ctables_domain *d = cell->domains[dt];
3689 d->d_total += d_weight;
3690 d->e_total += e_weight;
3692 if (!excluded_missing)
3694 d->d_count += d_weight;
3695 d->e_count += e_weight;
3700 d->d_valid += d_weight;
3701 d->e_valid += e_weight;
3704 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3706 /* XXX listwise_missing??? */
3707 const struct variable *var = s->table->sum_vars[i];
3708 double addend = case_num (c, var);
3709 if (!var_is_num_missing (var, addend))
3711 struct ctables_sum *sum = &d->sums[i];
3712 sum->e_sum += addend * e_weight;
3713 sum->u_sum += addend;
3721 recurse_totals (struct ctables_section *s, const struct ccase *c,
3722 const struct ctables_category *cats[PIVOT_N_AXES][10],
3723 bool is_missing, bool excluded_missing,
3724 double d_weight, double e_weight,
3725 enum pivot_axis_type start_axis, size_t start_nest)
3727 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3729 const struct ctables_nest *nest = s->nests[a];
3730 for (size_t i = start_nest; i < nest->n; i++)
3732 if (i == nest->scale_idx)
3735 const struct variable *var = nest->vars[i];
3737 const struct ctables_category *total = ctables_categories_total (
3738 s->table->categories[var_get_dict_index (var)]);
3741 const struct ctables_category *save = cats[a][i];
3743 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3744 d_weight, e_weight);
3745 recurse_totals (s, c, cats, is_missing, excluded_missing,
3746 d_weight, e_weight, a, i + 1);
3755 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3756 const struct ctables_category *cats[PIVOT_N_AXES][10],
3757 bool is_missing, bool excluded_missing,
3758 double d_weight, double e_weight,
3759 enum pivot_axis_type start_axis, size_t start_nest)
3761 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3763 const struct ctables_nest *nest = s->nests[a];
3764 for (size_t i = start_nest; i < nest->n; i++)
3766 if (i == nest->scale_idx)
3769 const struct ctables_category *save = cats[a][i];
3772 cats[a][i] = save->subtotal;
3773 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3774 d_weight, e_weight);
3775 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3776 d_weight, e_weight, a, i + 1);
3785 ctables_add_occurrence (const struct variable *var,
3786 const union value *value,
3787 struct hmap *occurrences)
3789 int width = var_get_width (var);
3790 unsigned int hash = value_hash (value, width, 0);
3792 struct ctables_occurrence *o;
3793 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3795 if (value_equal (value, &o->value, width))
3798 o = xmalloc (sizeof *o);
3799 value_clone (&o->value, value, width);
3800 hmap_insert (occurrences, &o->node, hash);
3804 ctables_cell_insert (struct ctables_section *s,
3805 const struct ccase *c,
3806 double d_weight, double e_weight)
3808 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3810 /* Does at least one categorical variable have a missing value in an included
3811 or excluded category? */
3812 bool is_missing = false;
3814 /* Does at least one categorical variable have a missing value in an excluded
3816 bool excluded_missing = false;
3818 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3820 const struct ctables_nest *nest = s->nests[a];
3821 for (size_t i = 0; i < nest->n; i++)
3823 if (i == nest->scale_idx)
3826 const struct variable *var = nest->vars[i];
3827 const union value *value = case_data (c, var);
3829 bool var_missing = var_is_value_missing (var, value) != 0;
3833 cats[a][i] = ctables_categories_match (
3834 s->table->categories[var_get_dict_index (var)], value, var);
3840 static const struct ctables_category cct_excluded_missing = {
3841 .type = CCT_EXCLUDED_MISSING,
3844 cats[a][i] = &cct_excluded_missing;
3845 excluded_missing = true;
3850 if (!excluded_missing)
3851 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3853 const struct ctables_nest *nest = s->nests[a];
3854 for (size_t i = 0; i < nest->n; i++)
3855 if (i != nest->scale_idx)
3857 const struct variable *var = nest->vars[i];
3858 const union value *value = case_data (c, var);
3859 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3863 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3864 d_weight, e_weight);
3866 //if (!excluded_missing)
3868 recurse_totals (s, c, cats, is_missing, excluded_missing,
3869 d_weight, e_weight, 0, 0);
3870 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3871 d_weight, e_weight, 0, 0);
3877 const struct ctables_summary_spec_set *set;
3882 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3884 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3885 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3886 if (as->function != bs->function)
3887 return as->function > bs->function ? 1 : -1;
3888 else if (as->percentile != bs->percentile)
3889 return as->percentile < bs->percentile ? 1 : -1;
3891 const char *as_label = as->label ? as->label : "";
3892 const char *bs_label = bs->label ? bs->label : "";
3893 return strcmp (as_label, bs_label);
3896 static struct pivot_value *
3897 ctables_category_create_label__ (const struct ctables_category *cat,
3898 const struct variable *var,
3899 const union value *value)
3901 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3902 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3903 : pivot_value_new_var_value (var, value));
3906 static struct pivot_value *
3907 ctables_postcompute_label (const struct ctables_categories *cats,
3908 const struct ctables_category *cat,
3909 const struct variable *var,
3910 const union value *value)
3912 struct substring in = ss_cstr (cat->pc->label);
3913 struct substring target = ss_cstr (")LABEL[");
3915 struct string out = DS_EMPTY_INITIALIZER;
3918 size_t chunk = ss_find_substring (in, target);
3919 if (chunk == SIZE_MAX)
3921 if (ds_is_empty (&out))
3922 return pivot_value_new_user_text (in.string, in.length);
3925 ds_put_substring (&out, in);
3926 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3930 ds_put_substring (&out, ss_head (in, chunk));
3931 ss_advance (&in, chunk + target.length);
3933 struct substring idx_s;
3934 if (!ss_get_until (&in, ']', &idx_s))
3937 long int idx = strtol (idx_s.string, &tail, 10);
3938 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3941 struct ctables_category *cat2 = &cats->cats[idx - 1];
3942 struct pivot_value *label2
3943 = ctables_category_create_label__ (cat2, var, value);
3944 char *label2_s = pivot_value_to_string_defaults (label2);
3945 ds_put_cstr (&out, label2_s);
3947 pivot_value_destroy (label2);
3952 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3955 static struct pivot_value *
3956 ctables_category_create_label (const struct ctables_categories *cats,
3957 const struct ctables_category *cat,
3958 const struct variable *var,
3959 const union value *value)
3961 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3962 ? ctables_postcompute_label (cats, cat, var, value)
3963 : ctables_category_create_label__ (cat, var, value));
3966 static struct ctables_value *
3967 ctables_value_find__ (struct ctables_table *t, const union value *value,
3968 int width, unsigned int hash)
3970 struct ctables_value *clv;
3971 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3972 hash, &t->clabels_values_map)
3973 if (value_equal (value, &clv->value, width))
3979 ctables_value_insert (struct ctables_table *t, const union value *value,
3982 unsigned int hash = value_hash (value, width, 0);
3983 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3986 clv = xmalloc (sizeof *clv);
3987 value_clone (&clv->value, value, width);
3988 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3992 static struct ctables_value *
3993 ctables_value_find (struct ctables_table *t,
3994 const union value *value, int width)
3996 return ctables_value_find__ (t, value, width,
3997 value_hash (value, width, 0));
4001 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4002 size_t ix[PIVOT_N_AXES])
4004 if (a < PIVOT_N_AXES)
4006 size_t limit = MAX (t->stacks[a].n, 1);
4007 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4008 ctables_table_add_section (t, a + 1, ix);
4012 struct ctables_section *s = &t->sections[t->n_sections++];
4013 *s = (struct ctables_section) {
4015 .cells = HMAP_INITIALIZER (s->cells),
4017 for (a = 0; a < PIVOT_N_AXES; a++)
4020 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4022 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4023 for (size_t i = 0; i < nest->n; i++)
4024 hmap_init (&s->occurrences[a][i]);
4026 for (size_t i = 0; i < N_CTDTS; i++)
4027 hmap_init (&s->domains[i]);
4032 ctpo_add (double a, double b)
4038 ctpo_sub (double a, double b)
4044 ctpo_mul (double a, double b)
4050 ctpo_div (double a, double b)
4052 return b ? a / b : SYSMIS;
4056 ctpo_pow (double a, double b)
4058 int save_errno = errno;
4060 double result = pow (a, b);
4068 ctpo_neg (double a, double b UNUSED)
4073 struct ctables_pcexpr_evaluate_ctx
4075 const struct ctables_cell *cell;
4076 const struct ctables_section *section;
4077 const struct ctables_categories *cats;
4078 enum pivot_axis_type pc_a;
4081 enum fmt_type parse_format;
4084 static double ctables_pcexpr_evaluate (
4085 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4088 ctables_pcexpr_evaluate_nonterminal (
4089 const struct ctables_pcexpr_evaluate_ctx *ctx,
4090 const struct ctables_pcexpr *e, size_t n_args,
4091 double evaluate (double, double))
4093 double args[2] = { 0, 0 };
4094 for (size_t i = 0; i < n_args; i++)
4096 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4097 if (!isfinite (args[i]) || args[i] == SYSMIS)
4100 return evaluate (args[0], args[1]);
4104 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4105 const struct ctables_cell_value *pc_cv)
4107 const struct ctables_section *s = ctx->section;
4110 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4112 const struct ctables_nest *nest = s->nests[a];
4113 for (size_t i = 0; i < nest->n; i++)
4114 if (i != nest->scale_idx)
4116 const struct ctables_cell_value *cv
4117 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4118 : &ctx->cell->axes[a].cvs[i]);
4119 hash = hash_pointer (cv->category, hash);
4120 if (cv->category->type != CCT_TOTAL
4121 && cv->category->type != CCT_SUBTOTAL
4122 && cv->category->type != CCT_POSTCOMPUTE)
4123 hash = value_hash (&cv->value,
4124 var_get_width (nest->vars[i]), hash);
4128 struct ctables_cell *tc;
4129 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4131 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4133 const struct ctables_nest *nest = s->nests[a];
4134 for (size_t i = 0; i < nest->n; i++)
4135 if (i != nest->scale_idx)
4137 const struct ctables_cell_value *p_cv
4138 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4139 : &ctx->cell->axes[a].cvs[i]);
4140 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4141 if (p_cv->category != t_cv->category
4142 || (p_cv->category->type != CCT_TOTAL
4143 && p_cv->category->type != CCT_SUBTOTAL
4144 && p_cv->category->type != CCT_POSTCOMPUTE
4145 && !value_equal (&p_cv->value,
4147 var_get_width (nest->vars[i]))))
4159 const struct ctables_table *t = s->table;
4160 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4161 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4162 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4163 &specs->specs[ctx->summary_idx]);
4167 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4168 const struct ctables_pcexpr *e)
4175 case CTPO_CAT_NRANGE:
4176 case CTPO_CAT_SRANGE:
4178 struct ctables_cell_value cv = {
4179 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4181 assert (cv.category != NULL);
4183 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4184 const struct ctables_occurrence *o;
4187 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4188 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4189 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4191 cv.value = o->value;
4192 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4197 case CTPO_CAT_NUMBER:
4198 case CTPO_CAT_MISSING:
4199 case CTPO_CAT_OTHERNM:
4200 case CTPO_CAT_SUBTOTAL:
4201 case CTPO_CAT_TOTAL:
4203 struct ctables_cell_value cv = {
4204 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4205 .value = { .f = e->number },
4207 assert (cv.category != NULL);
4208 return ctables_pcexpr_evaluate_category (ctx, &cv);
4211 case CTPO_CAT_STRING:
4213 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4215 if (width > e->string.length)
4217 s = xmalloc (width);
4218 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4220 struct ctables_cell_value cv = {
4221 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4222 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4224 assert (cv.category != NULL);
4225 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4231 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4234 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4237 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4240 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4243 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4246 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4252 static const struct ctables_category *
4253 ctables_cell_postcompute (const struct ctables_section *s,
4254 const struct ctables_cell *cell,
4255 enum pivot_axis_type *pc_a_p,
4258 assert (cell->postcompute);
4259 const struct ctables_category *pc_cat = NULL;
4260 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4261 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4263 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4264 if (cv->category->type == CCT_POSTCOMPUTE)
4268 /* Multiple postcomputes cross each other. The value is
4273 pc_cat = cv->category;
4277 *pc_a_idx_p = pc_a_idx;
4281 assert (pc_cat != NULL);
4286 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4287 const struct ctables_cell *cell,
4288 const struct ctables_summary_spec *ss,
4289 struct fmt_spec *format,
4290 bool *is_ctables_format,
4293 enum pivot_axis_type pc_a = 0;
4294 size_t pc_a_idx = 0;
4295 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4296 s, cell, &pc_a, &pc_a_idx);
4300 const struct ctables_postcompute *pc = pc_cat->pc;
4303 for (size_t i = 0; i < pc->specs->n; i++)
4305 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4306 if (ss->function == ss2->function
4307 && ss->percentile == ss2->percentile)
4309 *format = ss2->format;
4310 *is_ctables_format = ss2->is_ctables_format;
4316 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4317 const struct ctables_categories *cats = s->table->categories[
4318 var_get_dict_index (var)];
4319 struct ctables_pcexpr_evaluate_ctx ctx = {
4324 .pc_a_idx = pc_a_idx,
4325 .summary_idx = summary_idx,
4326 .parse_format = pc_cat->parse_format,
4328 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4332 ctables_format (double d, const struct fmt_spec *format,
4333 const struct fmt_settings *settings)
4335 const union value v = { .f = d };
4336 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4338 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4339 produce the results we want for negative numbers, putting the negative
4340 sign in the wrong spot, before the prefix instead of after it. We can't,
4341 in fact, produce the desired results using a custom-currency
4342 specification. Instead, we postprocess the output, moving the negative
4345 NEQUAL: "-N=3" => "N=-3"
4346 PAREN: "-(3)" => "(-3)"
4347 PCTPAREN: "-(3%)" => "(-3%)"
4349 This transformation doesn't affect NEGPAREN. */
4350 char *minus_src = strchr (s, '-');
4351 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4353 char *n_equals = strstr (s, "N=");
4354 char *lparen = strchr (s, '(');
4355 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4357 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4363 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4365 for (size_t i = 0; i < t->stacks[a].n; i++)
4367 struct ctables_nest *nest = &t->stacks[a].nests[i];
4368 if (nest->n != 1 || nest->scale_idx != 0)
4371 enum ctables_vlabel vlabel
4372 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4373 if (vlabel != CTVL_NONE)
4380 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4382 struct pivot_table *pt = pivot_table_create__ (
4384 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4385 : pivot_value_new_text (N_("Custom Tables"))),
4388 pivot_table_set_caption (
4389 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4391 pivot_table_set_corner_text (
4392 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4394 bool summary_dimension = (t->summary_axis != t->slabels_axis
4395 || (!t->slabels_visible
4396 && t->summary_specs.n > 1));
4397 if (summary_dimension)
4399 struct pivot_dimension *d = pivot_dimension_create (
4400 pt, t->slabels_axis, N_("Statistics"));
4401 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4402 if (!t->slabels_visible)
4403 d->hide_all_labels = true;
4404 for (size_t i = 0; i < specs->n; i++)
4405 pivot_category_create_leaf (
4406 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4409 bool categories_dimension = t->clabels_example != NULL;
4410 if (categories_dimension)
4412 struct pivot_dimension *d = pivot_dimension_create (
4413 pt, t->label_axis[t->clabels_from_axis],
4414 t->clabels_from_axis == PIVOT_AXIS_ROW
4415 ? N_("Row Categories")
4416 : N_("Column Categories"));
4417 const struct variable *var = t->clabels_example;
4418 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4419 for (size_t i = 0; i < t->n_clabels_values; i++)
4421 const struct ctables_value *value = t->clabels_values[i];
4422 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4423 assert (cat != NULL);
4424 pivot_category_create_leaf (d->root, ctables_category_create_label (
4425 c, cat, t->clabels_example,
4430 pivot_table_set_look (pt, ct->look);
4431 struct pivot_dimension *d[PIVOT_N_AXES];
4432 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4434 static const char *names[] = {
4435 [PIVOT_AXIS_ROW] = N_("Rows"),
4436 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4437 [PIVOT_AXIS_LAYER] = N_("Layers"),
4439 d[a] = (t->axes[a] || a == t->summary_axis
4440 ? pivot_dimension_create (pt, a, names[a])
4445 assert (t->axes[a]);
4447 for (size_t i = 0; i < t->stacks[a].n; i++)
4449 struct ctables_nest *nest = &t->stacks[a].nests[i];
4450 struct ctables_section **sections = xnmalloc (t->n_sections,
4452 size_t n_sections = 0;
4454 size_t n_total_cells = 0;
4455 size_t max_depth = 0;
4456 for (size_t j = 0; j < t->n_sections; j++)
4457 if (t->sections[j].nests[a] == nest)
4459 struct ctables_section *s = &t->sections[j];
4460 sections[n_sections++] = s;
4461 n_total_cells += hmap_count (&s->cells);
4463 size_t depth = s->nests[a]->n;
4464 max_depth = MAX (depth, max_depth);
4467 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4469 size_t n_sorted = 0;
4471 for (size_t j = 0; j < n_sections; j++)
4473 struct ctables_section *s = sections[j];
4475 struct ctables_cell *cell;
4476 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4478 sorted[n_sorted++] = cell;
4479 assert (n_sorted <= n_total_cells);
4482 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4483 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4486 if (a == PIVOT_AXIS_ROW)
4488 size_t ids[N_CTDTS];
4489 memset (ids, 0, sizeof ids);
4490 for (size_t j = 0; j < n_sorted; j++)
4492 struct ctables_cell *cell = sorted[j];
4493 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4495 struct ctables_domain *domain = cell->domains[dt];
4496 if (!domain->sequence)
4497 domain->sequence = ++ids[dt];
4504 for (size_t j = 0; j < n_sorted; j++)
4506 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4511 struct ctables_level
4513 enum ctables_level_type
4515 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4516 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4517 CTL_SUMMARY, /* Summary functions. */
4521 enum settings_value_show vlabel; /* CTL_VAR only. */
4524 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4525 size_t n_levels = 0;
4526 for (size_t k = 0; k < nest->n; k++)
4528 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4529 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4531 if (vlabel != CTVL_NONE)
4533 levels[n_levels++] = (struct ctables_level) {
4535 .vlabel = (enum settings_value_show) vlabel,
4540 if (nest->scale_idx != k
4541 && (k != nest->n - 1 || t->label_axis[a] == a))
4543 levels[n_levels++] = (struct ctables_level) {
4544 .type = CTL_CATEGORY,
4550 if (!summary_dimension && a == t->slabels_axis)
4552 levels[n_levels++] = (struct ctables_level) {
4553 .type = CTL_SUMMARY,
4554 .var_idx = SIZE_MAX,
4558 /* Pivot categories:
4560 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4561 - category for nest->vars[0], if nest->scale_idx != 0
4562 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4563 - category for nest->vars[1], if nest->scale_idx != 1
4565 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4566 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4567 - summary function, if 'a == t->slabels_axis && a ==
4570 Additional dimensions:
4572 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4574 - If 't->label_axis[b] == a' for some 'b != a', add a category
4579 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4581 for (size_t j = 0; j < n_sorted; j++)
4583 struct ctables_cell *cell = sorted[j];
4584 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4586 size_t n_common = 0;
4589 for (; n_common < n_levels; n_common++)
4591 const struct ctables_level *level = &levels[n_common];
4592 if (level->type == CTL_CATEGORY)
4594 size_t var_idx = level->var_idx;
4595 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4596 if (prev->axes[a].cvs[var_idx].category != c)
4598 else if (c->type != CCT_SUBTOTAL
4599 && c->type != CCT_TOTAL
4600 && c->type != CCT_POSTCOMPUTE
4601 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4602 &cell->axes[a].cvs[var_idx].value,
4603 var_get_type (nest->vars[var_idx])))
4609 for (size_t k = n_common; k < n_levels; k++)
4611 const struct ctables_level *level = &levels[k];
4612 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4613 if (level->type == CTL_SUMMARY)
4615 assert (k == n_levels - 1);
4617 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4618 for (size_t m = 0; m < specs->n; m++)
4620 int leaf = pivot_category_create_leaf (
4621 parent, ctables_summary_label (&specs->specs[m],
4629 const struct variable *var = nest->vars[level->var_idx];
4630 struct pivot_value *label;
4631 if (level->type == CTL_VAR)
4633 label = pivot_value_new_variable (var);
4634 label->variable.show = level->vlabel;
4636 else if (level->type == CTL_CATEGORY)
4638 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4639 label = ctables_category_create_label (
4640 t->categories[var_get_dict_index (var)],
4641 cv->category, var, &cv->value);
4646 if (k == n_levels - 1)
4647 prev_leaf = pivot_category_create_leaf (parent, label);
4649 groups[k] = pivot_category_create_group__ (parent, label);
4653 cell->axes[a].leaf = prev_leaf;
4662 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4666 size_t n_total_cells = 0;
4667 for (size_t j = 0; j < t->n_sections; j++)
4668 n_total_cells += hmap_count (&t->sections[j].cells);
4670 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4671 size_t n_sorted = 0;
4672 for (size_t j = 0; j < t->n_sections; j++)
4674 const struct ctables_section *s = &t->sections[j];
4675 struct ctables_cell *cell;
4676 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4678 sorted[n_sorted++] = cell;
4680 assert (n_sorted <= n_total_cells);
4681 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4683 size_t ids[N_CTDTS];
4684 memset (ids, 0, sizeof ids);
4685 for (size_t j = 0; j < n_sorted; j++)
4687 struct ctables_cell *cell = sorted[j];
4688 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4690 struct ctables_domain *domain = cell->domains[dt];
4691 if (!domain->sequence)
4692 domain->sequence = ++ids[dt];
4699 for (size_t i = 0; i < t->n_sections; i++)
4701 struct ctables_section *s = &t->sections[i];
4703 struct ctables_cell *cell;
4704 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4709 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4710 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4711 for (size_t j = 0; j < specs->n; j++)
4714 size_t n_dindexes = 0;
4716 if (summary_dimension)
4717 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4719 if (categories_dimension)
4721 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4722 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4723 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4724 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4727 dindexes[n_dindexes++] = ctv->leaf;
4730 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4733 int leaf = cell->axes[a].leaf;
4734 if (a == t->summary_axis && !summary_dimension)
4736 dindexes[n_dindexes++] = leaf;
4739 const struct ctables_summary_spec *ss = &specs->specs[j];
4741 struct fmt_spec format = specs->specs[j].format;
4742 bool is_ctables_format = ss->is_ctables_format;
4743 double d = (cell->postcompute
4744 ? ctables_cell_calculate_postcompute (
4745 s, cell, ss, &format, &is_ctables_format, j)
4746 : ctables_summary_value (cell, &cell->summaries[j],
4749 struct pivot_value *value;
4750 if (ct->hide_threshold != 0
4751 && d < ct->hide_threshold
4752 && ctables_summary_function_is_count (ss->function))
4754 value = pivot_value_new_user_text_nocopy (
4755 xasprintf ("<%d", ct->hide_threshold));
4757 else if (d == 0 && ct->zero)
4758 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4759 else if (d == SYSMIS && ct->missing)
4760 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4761 else if (is_ctables_format)
4762 value = pivot_value_new_user_text_nocopy (
4763 ctables_format (d, &format, &ct->ctables_formats));
4766 value = pivot_value_new_number (d);
4767 value->numeric.format = format;
4769 /* XXX should text values be right-justified? */
4770 pivot_table_put (pt, dindexes, n_dindexes, value);
4775 pivot_table_submit (pt);
4779 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4781 enum pivot_axis_type label_pos = t->label_axis[a];
4785 t->clabels_from_axis = a;
4787 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4788 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4790 const struct ctables_stack *stack = &t->stacks[a];
4794 const struct ctables_nest *n0 = &stack->nests[0];
4797 assert (stack->n == 1);
4801 const struct variable *v0 = n0->vars[n0->n - 1];
4802 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4803 t->clabels_example = v0;
4805 for (size_t i = 0; i < c0->n_cats; i++)
4806 if (c0->cats[i].type == CCT_FUNCTION)
4808 msg (SE, _("%s=%s is not allowed with sorting based "
4809 "on a summary function."),
4810 subcommand_name, pos_name);
4813 if (n0->n - 1 == n0->scale_idx)
4815 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4816 "but %s is a scale variable."),
4817 subcommand_name, pos_name, var_get_name (v0));
4821 for (size_t i = 1; i < stack->n; i++)
4823 const struct ctables_nest *ni = &stack->nests[i];
4825 const struct variable *vi = ni->vars[ni->n - 1];
4826 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4828 if (ni->n - 1 == ni->scale_idx)
4830 msg (SE, _("%s=%s requires the variables to be moved to be "
4831 "categorical, but %s is a scale variable."),
4832 subcommand_name, pos_name, var_get_name (vi));
4835 if (var_get_width (v0) != var_get_width (vi))
4837 msg (SE, _("%s=%s requires the variables to be "
4838 "moved to have the same width, but %s has "
4839 "width %d and %s has width %d."),
4840 subcommand_name, pos_name,
4841 var_get_name (v0), var_get_width (v0),
4842 var_get_name (vi), var_get_width (vi));
4845 if (!val_labs_equal (var_get_value_labels (v0),
4846 var_get_value_labels (vi)))
4848 msg (SE, _("%s=%s requires the variables to be "
4849 "moved to have the same value labels, but %s "
4850 "and %s have different value labels."),
4851 subcommand_name, pos_name,
4852 var_get_name (v0), var_get_name (vi));
4855 if (!ctables_categories_equal (c0, ci))
4857 msg (SE, _("%s=%s requires the variables to be "
4858 "moved to have the same category "
4859 "specifications, but %s and %s have different "
4860 "category specifications."),
4861 subcommand_name, pos_name,
4862 var_get_name (v0), var_get_name (vi));
4871 add_sum_var (struct variable *var,
4872 struct variable ***sum_vars, size_t *n, size_t *allocated)
4874 for (size_t i = 0; i < *n; i++)
4875 if (var == (*sum_vars)[i])
4878 if (*n >= *allocated)
4879 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4880 (*sum_vars)[*n] = var;
4885 enumerate_sum_vars (const struct ctables_axis *a,
4886 struct variable ***sum_vars, size_t *n, size_t *allocated)
4894 for (size_t i = 0; i < N_CSVS; i++)
4895 for (size_t j = 0; j < a->specs[i].n; j++)
4897 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4898 if (ctables_function_is_pctsum (spec->function))
4899 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4905 for (size_t i = 0; i < 2; i++)
4906 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4912 ctables_prepare_table (struct ctables_table *t)
4914 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4917 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4919 for (size_t j = 0; j < t->stacks[a].n; j++)
4921 struct ctables_nest *nest = &t->stacks[a].nests[j];
4922 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4924 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4925 nest->n_domains[dt] = 0;
4927 for (size_t k = 0; k < nest->n; k++)
4929 if (k == nest->scale_idx)
4938 if (a != PIVOT_AXIS_LAYER)
4945 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4946 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4947 : a == PIVOT_AXIS_ROW)
4949 if (k == nest->n - 1
4950 || (nest->scale_idx == nest->n - 1
4951 && k == nest->n - 2))
4957 if (a == PIVOT_AXIS_COLUMN)
4962 if (a == PIVOT_AXIS_ROW)
4967 nest->domains[dt][nest->n_domains[dt]++] = k;
4974 struct ctables_nest *nest = xmalloc (sizeof *nest);
4975 *nest = (struct ctables_nest) { .n = 0 };
4976 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4978 /* There's no point in moving labels away from an axis that has no
4979 labels, so avoid dealing with the special cases around that. */
4980 t->label_axis[a] = a;
4983 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4984 for (size_t i = 0; i < stack->n; i++)
4986 struct ctables_nest *nest = &stack->nests[i];
4987 if (!nest->specs[CSV_CELL].n)
4989 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4990 specs->specs = xmalloc (sizeof *specs->specs);
4993 enum ctables_summary_function function
4994 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4996 *specs->specs = (struct ctables_summary_spec) {
4997 .function = function,
4998 .format = ctables_summary_default_format (function, specs->var),
5001 specs->var = nest->vars[0];
5003 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5004 &nest->specs[CSV_CELL]);
5006 else if (!nest->specs[CSV_TOTAL].n)
5007 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5008 &nest->specs[CSV_CELL]);
5010 if (t->ctables->smissing_listwise)
5012 struct variable **listwise_vars = NULL;
5014 size_t allocated = 0;
5016 for (size_t j = nest->group_head; j < stack->n; j++)
5018 const struct ctables_nest *other_nest = &stack->nests[j];
5019 if (other_nest->group_head != nest->group_head)
5022 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5025 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5026 sizeof *listwise_vars);
5027 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5030 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5033 listwise_vars = xmemdup (listwise_vars,
5034 n * sizeof *listwise_vars);
5035 nest->specs[sv].listwise_vars = listwise_vars;
5036 nest->specs[sv].n_listwise_vars = n;
5041 struct ctables_summary_spec_set *merged = &t->summary_specs;
5042 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5044 for (size_t j = 0; j < stack->n; j++)
5046 const struct ctables_nest *nest = &stack->nests[j];
5048 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5049 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5054 struct merge_item min = items[0];
5055 for (size_t j = 1; j < n_left; j++)
5056 if (merge_item_compare_3way (&items[j], &min) < 0)
5059 if (merged->n >= merged->allocated)
5060 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5061 sizeof *merged->specs);
5062 merged->specs[merged->n++] = min.set->specs[min.ofs];
5064 for (size_t j = 0; j < n_left; )
5066 if (merge_item_compare_3way (&items[j], &min) == 0)
5068 struct merge_item *item = &items[j];
5069 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5070 if (++item->ofs >= item->set->n)
5072 items[j] = items[--n_left];
5082 for (size_t j = 0; j < merged->n; j++)
5083 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5085 for (size_t j = 0; j < stack->n; j++)
5087 const struct ctables_nest *nest = &stack->nests[j];
5088 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5090 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5091 for (size_t k = 0; k < specs->n; k++)
5092 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5093 specs->specs[k].axis_idx);
5099 size_t allocated_sum_vars = 0;
5100 enumerate_sum_vars (t->axes[t->summary_axis],
5101 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5103 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5104 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5108 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5109 enum pivot_axis_type a)
5111 struct ctables_stack *stack = &t->stacks[a];
5112 for (size_t i = 0; i < stack->n; i++)
5114 const struct ctables_nest *nest = &stack->nests[i];
5115 const struct variable *var = nest->vars[nest->n - 1];
5116 const union value *value = case_data (c, var);
5118 if (var_is_numeric (var) && value->f == SYSMIS)
5121 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5123 ctables_value_insert (t, value, var_get_width (var));
5128 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5130 const struct ctables_value *const *ap = a_;
5131 const struct ctables_value *const *bp = b_;
5132 const struct ctables_value *a = *ap;
5133 const struct ctables_value *b = *bp;
5134 const int *width = width_;
5135 return value_compare_3way (&a->value, &b->value, *width);
5139 ctables_sort_clabels_values (struct ctables_table *t)
5141 const struct variable *v0 = t->clabels_example;
5142 int width = var_get_width (v0);
5144 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5147 const struct val_labs *val_labs = var_get_value_labels (v0);
5148 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5149 vl = val_labs_next (val_labs, vl))
5150 if (ctables_categories_match (c0, &vl->value, v0))
5151 ctables_value_insert (t, &vl->value, width);
5154 size_t n = hmap_count (&t->clabels_values_map);
5155 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5157 struct ctables_value *clv;
5159 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5160 t->clabels_values[i++] = clv;
5161 t->n_clabels_values = n;
5164 sort (t->clabels_values, n, sizeof *t->clabels_values,
5165 compare_clabels_values_3way, &width);
5167 for (size_t i = 0; i < n; i++)
5168 t->clabels_values[i]->leaf = i;
5172 ctables_add_category_occurrences (const struct variable *var,
5173 struct hmap *occurrences,
5174 const struct ctables_categories *cats)
5176 const struct val_labs *val_labs = var_get_value_labels (var);
5178 for (size_t i = 0; i < cats->n_cats; i++)
5180 const struct ctables_category *c = &cats->cats[i];
5184 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5190 int width = var_get_width (var);
5192 value_init (&value, width);
5193 value_copy_buf_rpad (&value, width,
5194 CHAR_CAST (uint8_t *, c->string.string),
5195 c->string.length, ' ');
5196 ctables_add_occurrence (var, &value, occurrences);
5197 value_destroy (&value, width);
5202 assert (var_is_numeric (var));
5203 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5204 vl = val_labs_next (val_labs, vl))
5205 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5206 ctables_add_occurrence (var, &vl->value, occurrences);
5210 assert (var_is_alpha (var));
5211 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5212 vl = val_labs_next (val_labs, vl))
5213 if (in_string_range (&vl->value, var, c->srange))
5214 ctables_add_occurrence (var, &vl->value, occurrences);
5218 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5219 vl = val_labs_next (val_labs, vl))
5220 if (var_is_value_missing (var, &vl->value))
5221 ctables_add_occurrence (var, &vl->value, occurrences);
5225 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5226 vl = val_labs_next (val_labs, vl))
5227 ctables_add_occurrence (var, &vl->value, occurrences);
5230 case CCT_POSTCOMPUTE:
5240 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5241 vl = val_labs_next (val_labs, vl))
5242 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5243 ctables_add_occurrence (var, &vl->value, occurrences);
5246 case CCT_EXCLUDED_MISSING:
5253 ctables_section_recurse_add_empty_categories (
5254 struct ctables_section *s,
5255 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5256 enum pivot_axis_type a, size_t a_idx)
5258 if (a >= PIVOT_N_AXES)
5259 ctables_cell_insert__ (s, c, cats);
5260 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5261 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5264 const struct variable *var = s->nests[a]->vars[a_idx];
5265 const struct ctables_categories *categories = s->table->categories[
5266 var_get_dict_index (var)];
5267 int width = var_get_width (var);
5268 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5269 const struct ctables_occurrence *o;
5270 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5272 union value *value = case_data_rw (c, var);
5273 value_destroy (value, width);
5274 value_clone (value, &o->value, width);
5275 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5276 assert (cats[a][a_idx] != NULL);
5277 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5280 for (size_t i = 0; i < categories->n_cats; i++)
5282 const struct ctables_category *cat = &categories->cats[i];
5283 if (cat->type == CCT_POSTCOMPUTE)
5285 cats[a][a_idx] = cat;
5286 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5293 ctables_section_add_empty_categories (struct ctables_section *s)
5295 bool show_empty = false;
5296 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5298 for (size_t k = 0; k < s->nests[a]->n; k++)
5299 if (k != s->nests[a]->scale_idx)
5301 const struct variable *var = s->nests[a]->vars[k];
5302 const struct ctables_categories *cats = s->table->categories[
5303 var_get_dict_index (var)];
5304 if (cats->show_empty)
5307 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5313 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5314 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5315 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5320 ctables_section_clear (struct ctables_section *s)
5322 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5324 const struct ctables_nest *nest = s->nests[a];
5325 for (size_t i = 0; i < nest->n; i++)
5326 if (i != nest->scale_idx)
5328 const struct variable *var = nest->vars[i];
5329 int width = var_get_width (var);
5330 struct ctables_occurrence *o, *next;
5331 struct hmap *map = &s->occurrences[a][i];
5332 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5334 value_destroy (&o->value, width);
5335 hmap_delete (map, &o->node);
5342 struct ctables_cell *cell, *next_cell;
5343 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5345 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5347 const struct ctables_nest *nest = s->nests[a];
5348 for (size_t i = 0; i < nest->n; i++)
5349 if (i != nest->scale_idx)
5350 value_destroy (&cell->axes[a].cvs[i].value,
5351 var_get_width (nest->vars[i]));
5352 free (cell->axes[a].cvs);
5355 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5356 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5357 for (size_t i = 0; i < specs->n; i++)
5358 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5359 free (cell->summaries);
5361 hmap_delete (&s->cells, &cell->node);
5364 hmap_shrink (&s->cells);
5366 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5368 struct ctables_domain *domain, *next_domain;
5369 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5372 free (domain->sums);
5373 hmap_delete (&s->domains[dt], &domain->node);
5376 hmap_shrink (&s->domains[dt]);
5381 ctables_section_uninit (struct ctables_section *s)
5383 ctables_section_clear (s);
5385 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5387 struct ctables_nest *nest = s->nests[a];
5388 for (size_t i = 0; i < nest->n; i++)
5389 hmap_destroy (&s->occurrences[a][i]);
5390 free (s->occurrences[a]);
5393 hmap_destroy (&s->cells);
5394 for (size_t i = 0; i < N_CTDTS; i++)
5395 hmap_destroy (&s->domains[i]);
5399 ctables_table_clear (struct ctables_table *t)
5401 for (size_t i = 0; i < t->n_sections; i++)
5402 ctables_section_clear (&t->sections[i]);
5404 if (t->clabels_example)
5406 int width = var_get_width (t->clabels_example);
5407 struct ctables_value *value, *next_value;
5408 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5409 &t->clabels_values_map)
5411 value_destroy (&value->value, width);
5412 hmap_delete (&t->clabels_values_map, &value->node);
5415 hmap_shrink (&t->clabels_values_map);
5417 free (t->clabels_values);
5418 t->clabels_values = NULL;
5419 t->n_clabels_values = 0;
5424 ctables_execute (struct dataset *ds, struct casereader *input,
5427 for (size_t i = 0; i < ct->n_tables; i++)
5429 struct ctables_table *t = ct->tables[i];
5430 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5431 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5432 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5433 sizeof *t->sections);
5434 size_t ix[PIVOT_N_AXES];
5435 ctables_table_add_section (t, 0, ix);
5438 struct dictionary *dict = dataset_dict (ds);
5439 struct casegrouper *grouper
5440 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5441 ? casegrouper_create_splits (input, dict)
5442 : casegrouper_create_vars (input, NULL, 0));
5443 struct casereader *group;
5444 while (casegrouper_get_next_group (grouper, &group))
5446 /* Output SPLIT FILE variables. */
5447 struct ccase *c = casereader_peek (group, 0);
5450 output_split_file_values (ds, c);
5454 bool warn_on_invalid = true;
5455 for (c = casereader_read (group); c;
5456 case_unref (c), c = casereader_read (group))
5458 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5459 double e_weight = (ct->e_weight
5460 ? var_force_valid_weight (ct->e_weight,
5461 case_num (c, ct->e_weight),
5465 for (size_t i = 0; i < ct->n_tables; i++)
5467 struct ctables_table *t = ct->tables[i];
5469 for (size_t j = 0; j < t->n_sections; j++)
5470 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5472 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5473 if (t->label_axis[a] != a)
5474 ctables_insert_clabels_values (t, c, a);
5477 casereader_destroy (group);
5479 for (size_t i = 0; i < ct->n_tables; i++)
5481 struct ctables_table *t = ct->tables[i];
5483 if (t->clabels_example)
5484 ctables_sort_clabels_values (t);
5486 for (size_t j = 0; j < t->n_sections; j++)
5487 ctables_section_add_empty_categories (&t->sections[j]);
5489 ctables_table_output (ct, t);
5490 ctables_table_clear (t);
5493 return casegrouper_destroy (grouper);
5498 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5499 struct dictionary *);
5502 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5508 case CTPO_CAT_STRING:
5509 ss_dealloc (&e->string);
5512 case CTPO_CAT_SRANGE:
5513 for (size_t i = 0; i < 2; i++)
5514 ss_dealloc (&e->srange[i]);
5523 for (size_t i = 0; i < 2; i++)
5524 ctables_pcexpr_destroy (e->subs[i]);
5528 case CTPO_CAT_NUMBER:
5529 case CTPO_CAT_NRANGE:
5530 case CTPO_CAT_MISSING:
5531 case CTPO_CAT_OTHERNM:
5532 case CTPO_CAT_SUBTOTAL:
5533 case CTPO_CAT_TOTAL:
5537 msg_location_destroy (e->location);
5542 static struct ctables_pcexpr *
5543 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5544 struct ctables_pcexpr *sub0,
5545 struct ctables_pcexpr *sub1)
5547 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5548 *e = (struct ctables_pcexpr) {
5550 .subs = { sub0, sub1 },
5551 .location = msg_location_merged (sub0->location, sub1->location),
5556 /* How to parse an operator. */
5559 enum token_type token;
5560 enum ctables_postcompute_op op;
5563 static const struct operator *
5564 ctables_pcexpr_match_operator (struct lexer *lexer,
5565 const struct operator ops[], size_t n_ops)
5567 for (const struct operator *op = ops; op < ops + n_ops; op++)
5568 if (lex_token (lexer) == op->token)
5570 if (op->token != T_NEG_NUM)
5579 static struct ctables_pcexpr *
5580 ctables_pcexpr_parse_binary_operators__ (
5581 struct lexer *lexer, struct dictionary *dict,
5582 const struct operator ops[], size_t n_ops,
5583 parse_recursively_func *parse_next_level,
5584 const char *chain_warning, struct ctables_pcexpr *lhs)
5586 for (int op_count = 0; ; op_count++)
5588 const struct operator *op
5589 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5592 if (op_count > 1 && chain_warning)
5593 msg_at (SW, lhs->location, "%s", chain_warning);
5598 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5601 ctables_pcexpr_destroy (lhs);
5605 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5609 static struct ctables_pcexpr *
5610 ctables_pcexpr_parse_binary_operators (
5611 struct lexer *lexer, struct dictionary *dict,
5612 const struct operator ops[], size_t n_ops,
5613 parse_recursively_func *parse_next_level, const char *chain_warning)
5615 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5619 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5621 chain_warning, lhs);
5624 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5625 struct dictionary *);
5627 static struct ctables_pcexpr
5628 ctpo_cat_nrange (double low, double high)
5630 return (struct ctables_pcexpr) {
5631 .op = CTPO_CAT_NRANGE,
5632 .nrange = { low, high },
5636 static struct ctables_pcexpr
5637 ctpo_cat_srange (struct substring low, struct substring high)
5639 return (struct ctables_pcexpr) {
5640 .op = CTPO_CAT_SRANGE,
5641 .srange = { low, high },
5645 static struct ctables_pcexpr *
5646 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5648 int start_ofs = lex_ofs (lexer);
5649 struct ctables_pcexpr e;
5650 if (lex_is_number (lexer))
5652 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5653 .number = lex_number (lexer) };
5656 else if (lex_match_id (lexer, "MISSING"))
5657 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5658 else if (lex_match_id (lexer, "OTHERNM"))
5659 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5660 else if (lex_match_id (lexer, "TOTAL"))
5661 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5662 else if (lex_match_id (lexer, "SUBTOTAL"))
5664 size_t subtotal_index = 0;
5665 if (lex_match (lexer, T_LBRACK))
5667 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5669 subtotal_index = lex_integer (lexer);
5671 if (!lex_force_match (lexer, T_RBRACK))
5674 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5675 .subtotal_index = subtotal_index };
5677 else if (lex_match (lexer, T_LBRACK))
5679 if (lex_match_id (lexer, "LO"))
5681 if (!lex_force_match_id (lexer, "THRU"))
5684 if (lex_is_string (lexer))
5686 struct substring low = { .string = NULL };
5687 struct substring high = parse_substring (lexer, dict);
5688 e = ctpo_cat_srange (low, high);
5692 if (!lex_force_num (lexer))
5694 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5698 else if (lex_is_number (lexer))
5700 double number = lex_number (lexer);
5702 if (lex_match_id (lexer, "THRU"))
5704 if (lex_match_id (lexer, "HI"))
5705 e = ctpo_cat_nrange (number, DBL_MAX);
5708 if (!lex_force_num (lexer))
5710 e = ctpo_cat_nrange (number, lex_number (lexer));
5715 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5718 else if (lex_is_string (lexer))
5720 struct substring s = parse_substring (lexer, dict);
5722 if (lex_match_id (lexer, "THRU"))
5724 struct substring high;
5726 if (lex_match_id (lexer, "HI"))
5727 high = (struct substring) { .string = NULL };
5730 if (!lex_force_string (lexer))
5735 high = parse_substring (lexer, dict);
5738 e = ctpo_cat_srange (s, high);
5741 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5745 lex_error (lexer, NULL);
5749 if (!lex_force_match (lexer, T_RBRACK))
5751 if (e.op == CTPO_CAT_STRING)
5752 ss_dealloc (&e.string);
5753 else if (e.op == CTPO_CAT_SRANGE)
5755 ss_dealloc (&e.srange[0]);
5756 ss_dealloc (&e.srange[1]);
5761 else if (lex_match (lexer, T_LPAREN))
5763 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5766 if (!lex_force_match (lexer, T_RPAREN))
5768 ctables_pcexpr_destroy (ep);
5775 lex_error (lexer, NULL);
5779 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5780 return xmemdup (&e, sizeof e);
5783 static struct ctables_pcexpr *
5784 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5785 struct lexer *lexer, int start_ofs)
5787 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5788 *e = (struct ctables_pcexpr) {
5791 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5796 static struct ctables_pcexpr *
5797 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5799 static const struct operator op = { T_EXP, CTPO_POW };
5801 const char *chain_warning =
5802 _("The exponentiation operator (`**') is left-associative: "
5803 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5804 "To disable this warning, insert parentheses.");
5806 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5807 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5808 ctables_pcexpr_parse_primary,
5811 /* Special case for situations like "-5**6", which must be parsed as
5814 int start_ofs = lex_ofs (lexer);
5815 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5816 *lhs = (struct ctables_pcexpr) {
5817 .op = CTPO_CONSTANT,
5818 .number = -lex_tokval (lexer),
5819 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5823 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5824 lexer, dict, &op, 1,
5825 ctables_pcexpr_parse_primary, chain_warning, lhs);
5829 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5832 /* Parses the unary minus level. */
5833 static struct ctables_pcexpr *
5834 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5836 int start_ofs = lex_ofs (lexer);
5837 if (!lex_match (lexer, T_DASH))
5838 return ctables_pcexpr_parse_exp (lexer, dict);
5840 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5844 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5847 /* Parses the multiplication and division level. */
5848 static struct ctables_pcexpr *
5849 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5851 static const struct operator ops[] =
5853 { T_ASTERISK, CTPO_MUL },
5854 { T_SLASH, CTPO_DIV },
5857 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5858 sizeof ops / sizeof *ops,
5859 ctables_pcexpr_parse_neg, NULL);
5862 /* Parses the addition and subtraction level. */
5863 static struct ctables_pcexpr *
5864 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5866 static const struct operator ops[] =
5868 { T_PLUS, CTPO_ADD },
5869 { T_DASH, CTPO_SUB },
5870 { T_NEG_NUM, CTPO_ADD },
5873 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5874 ops, sizeof ops / sizeof *ops,
5875 ctables_pcexpr_parse_mul, NULL);
5878 static struct ctables_postcompute *
5879 ctables_find_postcompute (struct ctables *ct, const char *name)
5881 struct ctables_postcompute *pc;
5882 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5883 utf8_hash_case_string (name, 0), &ct->postcomputes)
5884 if (!utf8_strcasecmp (pc->name, name))
5890 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5893 int pcompute_start = lex_ofs (lexer) - 1;
5895 if (!lex_match (lexer, T_AND))
5897 lex_error_expecting (lexer, "&");
5900 if (!lex_force_id (lexer))
5903 char *name = ss_xstrdup (lex_tokss (lexer));
5906 if (!lex_force_match (lexer, T_EQUALS)
5907 || !lex_force_match_id (lexer, "EXPR")
5908 || !lex_force_match (lexer, T_LPAREN))
5914 int expr_start = lex_ofs (lexer);
5915 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5916 int expr_end = lex_ofs (lexer) - 1;
5917 if (!expr || !lex_force_match (lexer, T_RPAREN))
5919 ctables_pcexpr_destroy (expr);
5923 int pcompute_end = lex_ofs (lexer) - 1;
5925 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5928 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5931 msg_at (SW, location, _("New definition of &%s will override the "
5932 "previous definition."),
5934 msg_at (SN, pc->location, _("This is the previous definition."));
5936 ctables_pcexpr_destroy (pc->expr);
5937 msg_location_destroy (pc->location);
5942 pc = xmalloc (sizeof *pc);
5943 *pc = (struct ctables_postcompute) { .name = name };
5944 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5945 utf8_hash_case_string (pc->name, 0));
5948 pc->location = location;
5950 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5955 ctables_parse_pproperties_format (struct lexer *lexer,
5956 struct ctables_summary_spec_set *sss)
5958 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5960 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5961 && !(lex_token (lexer) == T_ID
5962 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5963 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5964 lex_tokss (lexer)))))
5966 /* Parse function. */
5967 enum ctables_summary_function function;
5968 if (!parse_ctables_summary_function (lexer, &function))
5971 /* Parse percentile. */
5972 double percentile = 0;
5973 if (function == CTSF_PTILE)
5975 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5977 percentile = lex_number (lexer);
5982 struct fmt_spec format;
5983 bool is_ctables_format;
5984 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5987 if (sss->n >= sss->allocated)
5988 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5989 sizeof *sss->specs);
5990 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5991 .function = function,
5992 .percentile = percentile,
5994 .is_ctables_format = is_ctables_format,
6000 ctables_summary_spec_set_uninit (sss);
6005 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
6007 struct ctables_postcompute **pcs = NULL;
6009 size_t allocated_pcs = 0;
6011 while (lex_match (lexer, T_AND))
6013 if (!lex_force_id (lexer))
6015 struct ctables_postcompute *pc
6016 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
6019 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6024 if (n_pcs >= allocated_pcs)
6025 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6029 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6031 if (lex_match_id (lexer, "LABEL"))
6033 lex_match (lexer, T_EQUALS);
6034 if (!lex_force_string (lexer))
6037 for (size_t i = 0; i < n_pcs; i++)
6039 free (pcs[i]->label);
6040 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6045 else if (lex_match_id (lexer, "FORMAT"))
6047 lex_match (lexer, T_EQUALS);
6049 struct ctables_summary_spec_set sss;
6050 if (!ctables_parse_pproperties_format (lexer, &sss))
6053 for (size_t i = 0; i < n_pcs; i++)
6056 ctables_summary_spec_set_uninit (pcs[i]->specs);
6058 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6059 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6061 ctables_summary_spec_set_uninit (&sss);
6063 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6065 lex_match (lexer, T_EQUALS);
6066 bool hide_source_cats;
6067 if (!parse_bool (lexer, &hide_source_cats))
6069 for (size_t i = 0; i < n_pcs; i++)
6070 pcs[i]->hide_source_cats = hide_source_cats;
6074 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6087 put_strftime (struct string *out, time_t now, const char *format)
6089 const struct tm *tm = localtime (&now);
6091 strftime (value, sizeof value, format, tm);
6092 ds_put_cstr (out, value);
6096 skip_prefix (struct substring *s, struct substring prefix)
6098 if (ss_starts_with (*s, prefix))
6100 ss_advance (s, prefix.length);
6108 put_table_expression (struct string *out, struct lexer *lexer,
6109 struct dictionary *dict, int expr_start, int expr_end)
6112 for (int ofs = expr_start; ofs < expr_end; ofs++)
6114 const struct token *t = lex_ofs_token (lexer, ofs);
6115 if (t->type == T_LBRACK)
6117 else if (t->type == T_RBRACK && nest > 0)
6123 else if (t->type == T_ID)
6125 const struct variable *var
6126 = dict_lookup_var (dict, t->string.string);
6127 const char *label = var ? var_get_label (var) : NULL;
6128 ds_put_cstr (out, label ? label : t->string.string);
6132 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6133 ds_put_byte (out, ' ');
6135 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6136 ds_put_cstr (out, repr);
6139 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6140 ds_put_byte (out, ' ');
6146 put_title_text (struct string *out, struct substring in, time_t now,
6147 struct lexer *lexer, struct dictionary *dict,
6148 int expr_start, int expr_end)
6152 size_t chunk = ss_find_byte (in, ')');
6153 ds_put_substring (out, ss_head (in, chunk));
6154 ss_advance (&in, chunk);
6155 if (ss_is_empty (in))
6158 if (skip_prefix (&in, ss_cstr (")DATE")))
6159 put_strftime (out, now, "%x");
6160 else if (skip_prefix (&in, ss_cstr (")TIME")))
6161 put_strftime (out, now, "%X");
6162 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6163 put_table_expression (out, lexer, dict, expr_start, expr_end);
6166 ds_put_byte (out, ')');
6167 ss_advance (&in, 1);
6173 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6175 struct casereader *input = NULL;
6177 struct measure_guesser *mg = measure_guesser_create (ds);
6180 input = proc_open (ds);
6181 measure_guesser_run (mg, input);
6182 measure_guesser_destroy (mg);
6185 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6186 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6187 enum settings_value_show tvars = settings_get_show_variables ();
6188 for (size_t i = 0; i < n_vars; i++)
6189 vlabels[i] = (enum ctables_vlabel) tvars;
6191 struct pivot_table_look *look = pivot_table_look_unshare (
6192 pivot_table_look_ref (pivot_table_look_get_default ()));
6193 look->omit_empty = false;
6195 struct ctables *ct = xmalloc (sizeof *ct);
6196 *ct = (struct ctables) {
6197 .dict = dataset_dict (ds),
6199 .ctables_formats = FMT_SETTINGS_INIT,
6201 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6204 time_t now = time (NULL);
6209 const char *dot_string;
6210 const char *comma_string;
6212 static const struct ctf ctfs[4] = {
6213 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6214 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6215 { CTEF_PAREN, "-,(,),", "-.(.)." },
6216 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6218 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6219 for (size_t i = 0; i < 4; i++)
6221 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6222 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6223 fmt_number_style_from_string (s));
6226 if (!lex_force_match (lexer, T_SLASH))
6229 while (!lex_match_id (lexer, "TABLE"))
6231 if (lex_match_id (lexer, "FORMAT"))
6233 double widths[2] = { SYSMIS, SYSMIS };
6234 double units_per_inch = 72.0;
6236 while (lex_token (lexer) != T_SLASH)
6238 if (lex_match_id (lexer, "MINCOLWIDTH"))
6240 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6243 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6245 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6248 else if (lex_match_id (lexer, "UNITS"))
6250 lex_match (lexer, T_EQUALS);
6251 if (lex_match_id (lexer, "POINTS"))
6252 units_per_inch = 72.0;
6253 else if (lex_match_id (lexer, "INCHES"))
6254 units_per_inch = 1.0;
6255 else if (lex_match_id (lexer, "CM"))
6256 units_per_inch = 2.54;
6259 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6263 else if (lex_match_id (lexer, "EMPTY"))
6268 lex_match (lexer, T_EQUALS);
6269 if (lex_match_id (lexer, "ZERO"))
6271 /* Nothing to do. */
6273 else if (lex_match_id (lexer, "BLANK"))
6274 ct->zero = xstrdup ("");
6275 else if (lex_force_string (lexer))
6277 ct->zero = ss_xstrdup (lex_tokss (lexer));
6283 else if (lex_match_id (lexer, "MISSING"))
6285 lex_match (lexer, T_EQUALS);
6286 if (!lex_force_string (lexer))
6290 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6291 ? ss_xstrdup (lex_tokss (lexer))
6297 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6298 "UNITS", "EMPTY", "MISSING");
6303 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6304 && widths[0] > widths[1])
6306 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6310 for (size_t i = 0; i < 2; i++)
6311 if (widths[i] != SYSMIS)
6313 int *wr = ct->look->width_ranges[TABLE_HORZ];
6314 wr[i] = widths[i] / units_per_inch * 96.0;
6319 else if (lex_match_id (lexer, "VLABELS"))
6321 if (!lex_force_match_id (lexer, "VARIABLES"))
6323 lex_match (lexer, T_EQUALS);
6325 struct variable **vars;
6327 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6331 if (!lex_force_match_id (lexer, "DISPLAY"))
6336 lex_match (lexer, T_EQUALS);
6338 enum ctables_vlabel vlabel;
6339 if (lex_match_id (lexer, "DEFAULT"))
6340 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6341 else if (lex_match_id (lexer, "NAME"))
6343 else if (lex_match_id (lexer, "LABEL"))
6344 vlabel = CTVL_LABEL;
6345 else if (lex_match_id (lexer, "BOTH"))
6347 else if (lex_match_id (lexer, "NONE"))
6351 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6357 for (size_t i = 0; i < n_vars; i++)
6358 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6361 else if (lex_match_id (lexer, "MRSETS"))
6363 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6365 lex_match (lexer, T_EQUALS);
6366 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6369 else if (lex_match_id (lexer, "SMISSING"))
6371 if (lex_match_id (lexer, "VARIABLE"))
6372 ct->smissing_listwise = false;
6373 else if (lex_match_id (lexer, "LISTWISE"))
6374 ct->smissing_listwise = true;
6377 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6381 else if (lex_match_id (lexer, "PCOMPUTE"))
6383 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6386 else if (lex_match_id (lexer, "PPROPERTIES"))
6388 if (!ctables_parse_pproperties (lexer, ct))
6391 else if (lex_match_id (lexer, "WEIGHT"))
6393 if (!lex_force_match_id (lexer, "VARIABLE"))
6395 lex_match (lexer, T_EQUALS);
6396 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6400 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6402 if (lex_match_id (lexer, "COUNT"))
6404 lex_match (lexer, T_EQUALS);
6405 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6408 ct->hide_threshold = lex_integer (lexer);
6411 else if (ct->hide_threshold == 0)
6412 ct->hide_threshold = 5;
6416 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6417 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6418 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6422 if (!lex_force_match (lexer, T_SLASH))
6426 size_t allocated_tables = 0;
6429 if (ct->n_tables >= allocated_tables)
6430 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6431 sizeof *ct->tables);
6433 struct ctables_category *cat = xmalloc (sizeof *cat);
6434 *cat = (struct ctables_category) {
6436 .include_missing = false,
6437 .sort_ascending = true,
6440 struct ctables_categories *c = xmalloc (sizeof *c);
6441 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6442 *c = (struct ctables_categories) {
6449 struct ctables_categories **categories = xnmalloc (n_vars,
6450 sizeof *categories);
6451 for (size_t i = 0; i < n_vars; i++)
6454 struct ctables_table *t = xmalloc (sizeof *t);
6455 *t = (struct ctables_table) {
6457 .slabels_axis = PIVOT_AXIS_COLUMN,
6458 .slabels_visible = true,
6459 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6461 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6462 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6463 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6465 .clabels_from_axis = PIVOT_AXIS_LAYER,
6466 .categories = categories,
6467 .n_categories = n_vars,
6470 ct->tables[ct->n_tables++] = t;
6472 lex_match (lexer, T_EQUALS);
6473 int expr_start = lex_ofs (lexer);
6474 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6476 if (lex_match (lexer, T_BY))
6478 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6479 ct, t, PIVOT_AXIS_COLUMN))
6482 if (lex_match (lexer, T_BY))
6484 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6485 ct, t, PIVOT_AXIS_LAYER))
6489 int expr_end = lex_ofs (lexer);
6491 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6492 && !t->axes[PIVOT_AXIS_LAYER])
6494 lex_error (lexer, _("At least one variable must be specified."));
6498 const struct ctables_axis *scales[PIVOT_N_AXES];
6499 size_t n_scales = 0;
6500 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6502 scales[a] = find_scale (t->axes[a]);
6508 msg (SE, _("Scale variables may appear only on one axis."));
6509 if (scales[PIVOT_AXIS_ROW])
6510 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6511 _("This scale variable appears on the rows axis."));
6512 if (scales[PIVOT_AXIS_COLUMN])
6513 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6514 _("This scale variable appears on the columns axis."));
6515 if (scales[PIVOT_AXIS_LAYER])
6516 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6517 _("This scale variable appears on the layer axis."));
6521 const struct ctables_axis *summaries[PIVOT_N_AXES];
6522 size_t n_summaries = 0;
6523 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6525 summaries[a] = (scales[a]
6527 : find_categorical_summary_spec (t->axes[a]));
6531 if (n_summaries > 1)
6533 msg (SE, _("Summaries may appear only on one axis."));
6534 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6537 msg_at (SN, summaries[a]->loc,
6539 ? _("This variable on the rows axis has a summary.")
6540 : a == PIVOT_AXIS_COLUMN
6541 ? _("This variable on the columns axis has a summary.")
6542 : _("This variable on the layers axis has a summary."));
6544 msg_at (SN, summaries[a]->loc,
6545 _("This is a scale variable, so it always has a "
6546 "summary even if the syntax does not explicitly "
6551 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6552 if (n_summaries ? summaries[a] : t->axes[a])
6554 t->summary_axis = a;
6558 if (lex_token (lexer) == T_ENDCMD)
6560 if (!ctables_prepare_table (t))
6564 if (!lex_force_match (lexer, T_SLASH))
6567 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6569 if (lex_match_id (lexer, "SLABELS"))
6571 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6573 if (lex_match_id (lexer, "POSITION"))
6575 lex_match (lexer, T_EQUALS);
6576 if (lex_match_id (lexer, "COLUMN"))
6577 t->slabels_axis = PIVOT_AXIS_COLUMN;
6578 else if (lex_match_id (lexer, "ROW"))
6579 t->slabels_axis = PIVOT_AXIS_ROW;
6580 else if (lex_match_id (lexer, "LAYER"))
6581 t->slabels_axis = PIVOT_AXIS_LAYER;
6584 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6588 else if (lex_match_id (lexer, "VISIBLE"))
6590 lex_match (lexer, T_EQUALS);
6591 if (!parse_bool (lexer, &t->slabels_visible))
6596 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6601 else if (lex_match_id (lexer, "CLABELS"))
6603 if (lex_match_id (lexer, "AUTO"))
6605 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6606 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6608 else if (lex_match_id (lexer, "ROWLABELS"))
6610 lex_match (lexer, T_EQUALS);
6611 if (lex_match_id (lexer, "OPPOSITE"))
6612 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6613 else if (lex_match_id (lexer, "LAYER"))
6614 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6617 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6621 else if (lex_match_id (lexer, "COLLABELS"))
6623 lex_match (lexer, T_EQUALS);
6624 if (lex_match_id (lexer, "OPPOSITE"))
6625 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6626 else if (lex_match_id (lexer, "LAYER"))
6627 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6630 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6636 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6641 else if (lex_match_id (lexer, "CRITERIA"))
6643 if (!lex_force_match_id (lexer, "CILEVEL"))
6645 lex_match (lexer, T_EQUALS);
6647 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6649 t->cilevel = lex_number (lexer);
6652 else if (lex_match_id (lexer, "CATEGORIES"))
6654 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6658 else if (lex_match_id (lexer, "TITLES"))
6663 if (lex_match_id (lexer, "CAPTION"))
6664 textp = &t->caption;
6665 else if (lex_match_id (lexer, "CORNER"))
6667 else if (lex_match_id (lexer, "TITLE"))
6671 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6674 lex_match (lexer, T_EQUALS);
6676 struct string s = DS_EMPTY_INITIALIZER;
6677 while (lex_is_string (lexer))
6679 if (!ds_is_empty (&s))
6680 ds_put_byte (&s, ' ');
6681 put_title_text (&s, lex_tokss (lexer), now,
6682 lexer, dataset_dict (ds),
6683 expr_start, expr_end);
6687 *textp = ds_steal_cstr (&s);
6689 while (lex_token (lexer) != T_SLASH
6690 && lex_token (lexer) != T_ENDCMD);
6692 else if (lex_match_id (lexer, "SIGTEST"))
6696 t->chisq = xmalloc (sizeof *t->chisq);
6697 *t->chisq = (struct ctables_chisq) {
6699 .include_mrsets = true,
6700 .all_visible = true,
6706 if (lex_match_id (lexer, "TYPE"))
6708 lex_match (lexer, T_EQUALS);
6709 if (!lex_force_match_id (lexer, "CHISQUARE"))
6712 else if (lex_match_id (lexer, "ALPHA"))
6714 lex_match (lexer, T_EQUALS);
6715 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6717 t->chisq->alpha = lex_number (lexer);
6720 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6722 lex_match (lexer, T_EQUALS);
6723 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6726 else if (lex_match_id (lexer, "CATEGORIES"))
6728 lex_match (lexer, T_EQUALS);
6729 if (lex_match_id (lexer, "ALLVISIBLE"))
6730 t->chisq->all_visible = true;
6731 else if (lex_match_id (lexer, "SUBTOTALS"))
6732 t->chisq->all_visible = false;
6735 lex_error_expecting (lexer,
6736 "ALLVISIBLE", "SUBTOTALS");
6742 lex_error_expecting (lexer, "TYPE", "ALPHA",
6743 "INCLUDEMRSETS", "CATEGORIES");
6747 while (lex_token (lexer) != T_SLASH
6748 && lex_token (lexer) != T_ENDCMD);
6750 else if (lex_match_id (lexer, "COMPARETEST"))
6754 t->pairwise = xmalloc (sizeof *t->pairwise);
6755 *t->pairwise = (struct ctables_pairwise) {
6757 .alpha = { .05, .05 },
6758 .adjust = BONFERRONI,
6759 .include_mrsets = true,
6760 .meansvariance_allcats = true,
6761 .all_visible = true,
6770 if (lex_match_id (lexer, "TYPE"))
6772 lex_match (lexer, T_EQUALS);
6773 if (lex_match_id (lexer, "PROP"))
6774 t->pairwise->type = PROP;
6775 else if (lex_match_id (lexer, "MEAN"))
6776 t->pairwise->type = MEAN;
6779 lex_error_expecting (lexer, "PROP", "MEAN");
6783 else if (lex_match_id (lexer, "ALPHA"))
6785 lex_match (lexer, T_EQUALS);
6787 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6789 double a0 = lex_number (lexer);
6792 lex_match (lexer, T_COMMA);
6793 if (lex_is_number (lexer))
6795 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6797 double a1 = lex_number (lexer);
6800 t->pairwise->alpha[0] = MIN (a0, a1);
6801 t->pairwise->alpha[1] = MAX (a0, a1);
6804 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6806 else if (lex_match_id (lexer, "ADJUST"))
6808 lex_match (lexer, T_EQUALS);
6809 if (lex_match_id (lexer, "BONFERRONI"))
6810 t->pairwise->adjust = BONFERRONI;
6811 else if (lex_match_id (lexer, "BH"))
6812 t->pairwise->adjust = BH;
6813 else if (lex_match_id (lexer, "NONE"))
6814 t->pairwise->adjust = 0;
6817 lex_error_expecting (lexer, "BONFERRONI", "BH",
6822 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6824 lex_match (lexer, T_EQUALS);
6825 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6828 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6830 lex_match (lexer, T_EQUALS);
6831 if (lex_match_id (lexer, "ALLCATS"))
6832 t->pairwise->meansvariance_allcats = true;
6833 else if (lex_match_id (lexer, "TESTEDCATS"))
6834 t->pairwise->meansvariance_allcats = false;
6837 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6841 else if (lex_match_id (lexer, "CATEGORIES"))
6843 lex_match (lexer, T_EQUALS);
6844 if (lex_match_id (lexer, "ALLVISIBLE"))
6845 t->pairwise->all_visible = true;
6846 else if (lex_match_id (lexer, "SUBTOTALS"))
6847 t->pairwise->all_visible = false;
6850 lex_error_expecting (lexer, "ALLVISIBLE",
6855 else if (lex_match_id (lexer, "MERGE"))
6857 lex_match (lexer, T_EQUALS);
6858 if (!parse_bool (lexer, &t->pairwise->merge))
6861 else if (lex_match_id (lexer, "STYLE"))
6863 lex_match (lexer, T_EQUALS);
6864 if (lex_match_id (lexer, "APA"))
6865 t->pairwise->apa_style = true;
6866 else if (lex_match_id (lexer, "SIMPLE"))
6867 t->pairwise->apa_style = false;
6870 lex_error_expecting (lexer, "APA", "SIMPLE");
6874 else if (lex_match_id (lexer, "SHOWSIG"))
6876 lex_match (lexer, T_EQUALS);
6877 if (!parse_bool (lexer, &t->pairwise->show_sig))
6882 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6883 "INCLUDEMRSETS", "MEANSVARIANCE",
6884 "CATEGORIES", "MERGE", "STYLE",
6889 while (lex_token (lexer) != T_SLASH
6890 && lex_token (lexer) != T_ENDCMD);
6894 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6895 "CRITERIA", "CATEGORIES", "TITLES",
6896 "SIGTEST", "COMPARETEST");
6900 if (!lex_match (lexer, T_SLASH))
6904 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6905 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6907 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6911 if (!ctables_prepare_table (t))
6914 while (lex_token (lexer) != T_ENDCMD);
6917 input = proc_open (ds);
6918 bool ok = ctables_execute (ds, input, ct);
6919 ok = proc_commit (ds) && ok;
6921 ctables_destroy (ct);
6922 return ok ? CMD_SUCCESS : CMD_FAILURE;
6927 ctables_destroy (ct);