1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 enum ctables_summary_function
75 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
76 #include "ctables.inc"
81 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
83 #include "ctables.inc"
87 static bool ctables_summary_function_is_count (enum ctables_summary_function);
89 enum ctables_domain_type
91 /* Within a section, where stacked variables divide one section from
93 CTDT_TABLE, /* All layers of a whole section. */
94 CTDT_LAYER, /* One layer within a section. */
95 CTDT_LAYERROW, /* Row in one layer within a section. */
96 CTDT_LAYERCOL, /* Column in one layer within a section. */
98 /* Within a subtable, where a subtable pairs an innermost row variable with
99 an innermost column variable within a single layer. */
100 CTDT_SUBTABLE, /* Whole subtable. */
101 CTDT_ROW, /* Row within a subtable. */
102 CTDT_COL, /* Column within a subtable. */
106 struct ctables_domain
108 struct hmap_node node;
110 const struct ctables_cell *example;
113 double d_valid; /* Dictionary weight. */
116 double e_valid; /* Effective weight */
119 double u_valid; /* Unweighted. */
122 struct ctables_sum *sums;
131 enum ctables_summary_variant
140 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
141 all the axes (except the scalar variable, if any). */
142 struct hmap_node node;
144 /* The domains that contain this cell. */
145 uint32_t omit_domains;
146 struct ctables_domain *domains[N_CTDTS];
151 enum ctables_summary_variant sv;
153 struct ctables_cell_axis
155 struct ctables_cell_value
157 const struct ctables_category *category;
165 union ctables_summary *summaries;
172 const struct dictionary *dict;
173 struct pivot_table_look *look;
175 /* CTABLES has a number of extra formats that we implement via custom
176 currency specifications on an alternate fmt_settings. */
177 #define CTEF_NEGPAREN FMT_CCA
178 #define CTEF_NEQUAL FMT_CCB
179 #define CTEF_PAREN FMT_CCC
180 #define CTEF_PCTPAREN FMT_CCD
181 struct fmt_settings ctables_formats;
183 /* If this is NULL, zeros are displayed using the normal print format.
184 Otherwise, this string is displayed. */
187 /* If this is NULL, missing values are displayed using the normal print
188 format. Otherwise, this string is displayed. */
191 /* Indexed by variable dictionary index. */
192 enum ctables_vlabel *vlabels;
194 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
196 bool mrsets_count_duplicates; /* MRSETS. */
197 bool smissing_listwise; /* SMISSING. */
198 struct variable *e_weight; /* WEIGHT. */
199 int hide_threshold; /* HIDESMALLCOUNTS. */
201 struct ctables_table **tables;
205 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
208 struct ctables_postcompute
210 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
211 char *name; /* Name, without leading &. */
213 struct msg_location *location; /* Location of definition. */
214 struct ctables_pcexpr *expr;
216 struct ctables_summary_spec_set *specs;
217 bool hide_source_cats;
220 struct ctables_pcexpr
230 enum ctables_postcompute_op
233 CTPO_CONSTANT, /* 5 */
234 CTPO_CAT_NUMBER, /* [5] */
235 CTPO_CAT_STRING, /* ["STRING"] */
236 CTPO_CAT_NRANGE, /* [LO THRU 5] */
237 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
238 CTPO_CAT_MISSING, /* MISSING */
239 CTPO_CAT_OTHERNM, /* OTHERNM */
240 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
241 CTPO_CAT_TOTAL, /* TOTAL */
255 /* CTPO_CAT_NUMBER. */
258 /* CTPO_CAT_STRING, in dictionary encoding. */
259 struct substring string;
261 /* CTPO_CAT_NRANGE. */
264 /* CTPO_CAT_SRANGE. */
265 struct substring srange[2];
267 /* CTPO_CAT_SUBTOTAL. */
268 size_t subtotal_index;
270 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
271 One element: CTPO_NEG. */
272 struct ctables_pcexpr *subs[2];
275 /* Source location. */
276 struct msg_location *location;
279 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
280 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
281 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
282 struct ctables_pcexpr *sub1);
284 struct ctables_summary_spec_set
286 struct ctables_summary_spec *specs;
290 /* The variable to which the summary specs are applied. */
291 struct variable *var;
293 /* Whether the variable to which the summary specs are applied is a scale
294 variable for the purpose of summarization.
296 (VALIDN and TOTALN act differently for summarizing scale and categorical
300 /* If any of these optional additional scale variables are missing, then
301 treat 'var' as if it's missing too. This is for implementing
302 SMISSING=LISTWISE. */
303 struct variable **listwise_vars;
304 size_t n_listwise_vars;
307 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
308 const struct ctables_summary_spec_set *);
309 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
311 /* A nested sequence of variables, e.g. a > b > c. */
314 struct variable **vars;
317 size_t *domains[N_CTDTS];
318 size_t n_domains[N_CTDTS];
321 struct ctables_summary_spec_set specs[N_CSVS];
324 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
327 struct ctables_nest *nests;
331 static void ctables_stack_uninit (struct ctables_stack *);
335 struct hmap_node node;
340 struct ctables_occurrence
342 struct hmap_node node;
346 struct ctables_section
349 struct ctables_table *table;
350 struct ctables_nest *nests[PIVOT_N_AXES];
353 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
354 struct hmap cells; /* Contains "struct ctables_cell"s. */
355 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
358 static void ctables_section_uninit (struct ctables_section *);
362 struct ctables *ctables;
363 struct ctables_axis *axes[PIVOT_N_AXES];
364 struct ctables_stack stacks[PIVOT_N_AXES];
365 struct ctables_section *sections;
367 enum pivot_axis_type summary_axis;
368 struct ctables_summary_spec_set summary_specs;
369 struct variable **sum_vars;
372 enum pivot_axis_type slabels_axis;
373 bool slabels_visible;
375 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
377 Most commonly, label_axis[a] == a, and in particular we always have
378 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
380 If ROWLABELS or COLLABELS is specified, then one of
381 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
382 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
384 If any category labels are moved, then 'clabels_example' is one of the
385 variables being moved (and it is otherwise NULL). All of the variables
386 being moved have the same width, value labels, and categories, so this
387 example variable can be used to find those out.
389 The remaining members in this group are relevant only if category labels
392 'clabels_values_map' holds a "struct ctables_value" for all the values
393 that appear in all of the variables in the moved categories. It is
394 accumulated as the data is read. Once the data is fully read, its
395 sorted values are put into 'clabels_values' and 'n_clabels_values'.
397 enum pivot_axis_type label_axis[PIVOT_N_AXES];
398 enum pivot_axis_type clabels_from_axis;
399 const struct variable *clabels_example;
400 struct hmap clabels_values_map;
401 struct ctables_value **clabels_values;
402 size_t n_clabels_values;
404 /* Indexed by variable dictionary index. */
405 struct ctables_categories **categories;
414 struct ctables_chisq *chisq;
415 struct ctables_pairwise *pairwise;
418 struct ctables_categories
421 struct ctables_category *cats;
426 struct ctables_category
428 enum ctables_category_type
430 /* Explicit category lists. */
433 CCT_NRANGE, /* Numerical range. */
434 CCT_SRANGE, /* String range. */
439 /* Totals and subtotals. */
443 /* Implicit category lists. */
448 /* For contributing to TOTALN. */
449 CCT_EXCLUDED_MISSING,
453 struct ctables_category *subtotal;
459 double number; /* CCT_NUMBER. */
460 struct substring string; /* CCT_STRING, in dictionary encoding. */
461 double nrange[2]; /* CCT_NRANGE. */
462 struct substring srange[2]; /* CCT_SRANGE. */
466 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
467 bool hide_subcategories; /* CCT_SUBTOTAL. */
470 /* CCT_POSTCOMPUTE. */
473 const struct ctables_postcompute *pc;
474 enum fmt_type parse_format;
477 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
480 bool include_missing;
484 enum ctables_summary_function sort_function;
485 struct variable *sort_var;
490 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
491 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
492 struct msg_location *location;
496 ctables_category_uninit (struct ctables_category *cat)
501 msg_location_destroy (cat->location);
508 case CCT_POSTCOMPUTE:
512 ss_dealloc (&cat->string);
516 ss_dealloc (&cat->srange[0]);
517 ss_dealloc (&cat->srange[1]);
522 free (cat->total_label);
530 case CCT_EXCLUDED_MISSING:
536 nullable_substring_equal (const struct substring *a,
537 const struct substring *b)
539 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
543 ctables_category_equal (const struct ctables_category *a,
544 const struct ctables_category *b)
546 if (a->type != b->type)
552 return a->number == b->number;
555 return ss_equals (a->string, b->string);
558 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
561 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
562 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
568 case CCT_POSTCOMPUTE:
569 return a->pc == b->pc;
573 return !strcmp (a->total_label, b->total_label);
578 return (a->include_missing == b->include_missing
579 && a->sort_ascending == b->sort_ascending
580 && a->sort_function == b->sort_function
581 && a->sort_var == b->sort_var
582 && a->percentile == b->percentile);
584 case CCT_EXCLUDED_MISSING:
592 ctables_categories_unref (struct ctables_categories *c)
597 assert (c->n_refs > 0);
601 for (size_t i = 0; i < c->n_cats; i++)
602 ctables_category_uninit (&c->cats[i]);
608 ctables_categories_equal (const struct ctables_categories *a,
609 const struct ctables_categories *b)
611 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
614 for (size_t i = 0; i < a->n_cats; i++)
615 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
621 /* Chi-square test (SIGTEST). */
629 /* Pairwise comparison test (COMPARETEST). */
630 struct ctables_pairwise
632 enum { PROP, MEAN } type;
635 bool meansvariance_allcats;
637 enum { BONFERRONI = 1, BH } adjust;
661 struct variable *var;
663 struct ctables_summary_spec_set specs[N_CSVS];
667 struct ctables_axis *subs[2];
670 struct msg_location *loc;
673 static void ctables_axis_destroy (struct ctables_axis *);
682 enum ctables_function_availability
684 CTFA_ALL, /* Any variables. */
685 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
686 //CTFA_MRSETS, /* Only multiple-response sets */
689 struct ctables_summary_spec
691 enum ctables_summary_function function;
692 double percentile; /* CTSF_PTILE only. */
695 struct fmt_spec format;
696 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
703 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
704 const struct ctables_summary_spec *src)
707 dst->label = xstrdup_if_nonnull (src->label);
711 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
718 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
719 const struct ctables_summary_spec_set *src)
721 struct ctables_summary_spec *specs
722 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
723 for (size_t i = 0; i < src->n; i++)
724 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
726 *dst = (struct ctables_summary_spec_set) {
731 .is_scale = src->is_scale,
736 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
738 for (size_t i = 0; i < set->n; i++)
739 ctables_summary_spec_uninit (&set->specs[i]);
740 free (set->listwise_vars);
745 parse_col_width (struct lexer *lexer, const char *name, double *width)
747 lex_match (lexer, T_EQUALS);
748 if (lex_match_id (lexer, "DEFAULT"))
750 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
752 *width = lex_number (lexer);
762 parse_bool (struct lexer *lexer, bool *b)
764 if (lex_match_id (lexer, "NO"))
766 else if (lex_match_id (lexer, "YES"))
770 lex_error_expecting (lexer, "YES", "NO");
776 static enum ctables_function_availability
777 ctables_function_availability (enum ctables_summary_function f)
779 static enum ctables_function_availability availability[] = {
780 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
781 #include "ctables.inc"
785 return availability[f];
789 ctables_summary_function_is_count (enum ctables_summary_function f)
791 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
795 parse_ctables_summary_function (struct lexer *lexer,
796 enum ctables_summary_function *f)
800 enum ctables_summary_function function;
801 struct substring name;
803 static struct pair names[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
805 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
806 #include "ctables.inc"
807 /* The .COUNT suffix may be omitted. */
808 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
809 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
810 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
811 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
812 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
813 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
814 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
818 if (!lex_force_id (lexer))
821 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
822 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
824 *f = names[i].function;
829 lex_error (lexer, _("Expecting summary function name."));
834 ctables_axis_destroy (struct ctables_axis *axis)
842 for (size_t i = 0; i < N_CSVS; i++)
843 ctables_summary_spec_set_uninit (&axis->specs[i]);
848 ctables_axis_destroy (axis->subs[0]);
849 ctables_axis_destroy (axis->subs[1]);
852 msg_location_destroy (axis->loc);
856 static struct ctables_axis *
857 ctables_axis_new_nonterminal (enum ctables_axis_op op,
858 struct ctables_axis *sub0,
859 struct ctables_axis *sub1,
860 struct lexer *lexer, int start_ofs)
862 struct ctables_axis *axis = xmalloc (sizeof *axis);
863 *axis = (struct ctables_axis) {
865 .subs = { sub0, sub1 },
866 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
871 struct ctables_axis_parse_ctx
874 struct dictionary *dict;
876 struct ctables_table *t;
879 static struct fmt_spec
880 ctables_summary_default_format (enum ctables_summary_function function,
881 const struct variable *var)
883 static const enum ctables_format default_formats[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
885 #include "ctables.inc"
888 switch (default_formats[function])
891 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
894 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
897 return *var_get_print_format (var);
904 static struct pivot_value *
905 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
909 static const char *default_labels[] = {
910 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
911 #include "ctables.inc"
915 return (spec->function == CTSF_PTILE
916 ? pivot_value_new_text_format (N_("Percentile %.2f"),
918 : pivot_value_new_text (default_labels[spec->function]));
922 struct substring in = ss_cstr (spec->label);
923 struct substring target = ss_cstr (")CILEVEL");
925 struct string out = DS_EMPTY_INITIALIZER;
928 size_t chunk = ss_find_substring (in, target);
929 ds_put_substring (&out, ss_head (in, chunk));
930 ss_advance (&in, chunk);
932 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
934 ss_advance (&in, target.length);
935 ds_put_format (&out, "%g", cilevel);
941 ctables_summary_function_name (enum ctables_summary_function function)
943 static const char *names[] = {
944 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
945 #include "ctables.inc"
948 return names[function];
952 add_summary_spec (struct ctables_axis *axis,
953 enum ctables_summary_function function, double percentile,
954 const char *label, const struct fmt_spec *format,
955 bool is_ctables_format, const struct msg_location *loc,
956 enum ctables_summary_variant sv)
958 if (axis->op == CTAO_VAR)
960 const char *function_name = ctables_summary_function_name (function);
961 const char *var_name = var_get_name (axis->var);
962 switch (ctables_function_availability (function))
966 msg_at (SE, loc, _("Summary function %s applies only to multiple "
967 "response sets."), function_name);
968 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
974 if (!axis->scale && sv != CSV_TOTAL)
977 _("Summary function %s applies only to scale variables."),
979 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
989 struct ctables_summary_spec_set *set = &axis->specs[sv];
990 if (set->n >= set->allocated)
991 set->specs = x2nrealloc (set->specs, &set->allocated,
994 struct ctables_summary_spec *dst = &set->specs[set->n++];
995 *dst = (struct ctables_summary_spec) {
996 .function = function,
997 .percentile = percentile,
998 .label = xstrdup_if_nonnull (label),
999 .format = (format ? *format
1000 : ctables_summary_default_format (function, axis->var)),
1001 .is_ctables_format = is_ctables_format,
1007 for (size_t i = 0; i < 2; i++)
1008 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1009 format, is_ctables_format, loc, sv))
1015 static struct ctables_axis *ctables_axis_parse_stack (
1016 struct ctables_axis_parse_ctx *);
1019 static struct ctables_axis *
1020 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1022 if (lex_match (ctx->lexer, T_LPAREN))
1024 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1025 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1027 ctables_axis_destroy (sub);
1033 if (!lex_force_id (ctx->lexer))
1036 int start_ofs = lex_ofs (ctx->lexer);
1037 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1041 struct ctables_axis *axis = xmalloc (sizeof *axis);
1042 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1044 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1045 : lex_match_phrase (ctx->lexer, "[C]") ? false
1046 : var_get_measure (var) == MEASURE_SCALE);
1047 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1048 lex_ofs (ctx->lexer) - 1);
1049 if (axis->scale && var_is_alpha (var))
1051 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1053 var_get_name (var));
1054 ctables_axis_destroy (axis);
1062 has_digit (const char *s)
1064 return s[strcspn (s, "0123456789")] != '\0';
1068 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1069 bool *is_ctables_format)
1071 char type[FMT_TYPE_LEN_MAX + 1];
1072 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1075 if (!strcasecmp (type, "NEGPAREN"))
1076 format->type = CTEF_NEGPAREN;
1077 else if (!strcasecmp (type, "NEQUAL"))
1078 format->type = CTEF_NEQUAL;
1079 else if (!strcasecmp (type, "PAREN"))
1080 format->type = CTEF_PAREN;
1081 else if (!strcasecmp (type, "PCTPAREN"))
1082 format->type = CTEF_PCTPAREN;
1085 *is_ctables_format = false;
1086 return (parse_format_specifier (lexer, format)
1087 && fmt_check_output (format)
1088 && fmt_check_type_compat (format, VAL_NUMERIC));
1094 lex_next_error (lexer, -1, -1,
1095 _("Output format %s requires width 2 or greater."), type);
1098 else if (format->d > format->w - 1)
1100 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1101 "greater than decimals."), type);
1106 *is_ctables_format = true;
1111 static struct ctables_axis *
1112 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1114 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1115 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1118 enum ctables_summary_variant sv = CSV_CELL;
1121 int start_ofs = lex_ofs (ctx->lexer);
1123 /* Parse function. */
1124 enum ctables_summary_function function;
1125 if (!parse_ctables_summary_function (ctx->lexer, &function))
1128 /* Parse percentile. */
1129 double percentile = 0;
1130 if (function == CTSF_PTILE)
1132 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1134 percentile = lex_number (ctx->lexer);
1135 lex_get (ctx->lexer);
1140 if (lex_is_string (ctx->lexer))
1142 label = ss_xstrdup (lex_tokss (ctx->lexer));
1143 lex_get (ctx->lexer);
1147 struct fmt_spec format;
1148 const struct fmt_spec *formatp;
1149 bool is_ctables_format = false;
1150 if (lex_token (ctx->lexer) == T_ID
1151 && has_digit (lex_tokcstr (ctx->lexer)))
1153 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1154 &is_ctables_format))
1164 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1165 lex_ofs (ctx->lexer) - 1);
1166 add_summary_spec (sub, function, percentile, label, formatp,
1167 is_ctables_format, loc, sv);
1169 msg_location_destroy (loc);
1171 lex_match (ctx->lexer, T_COMMA);
1172 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1174 if (!lex_force_match (ctx->lexer, T_LBRACK))
1178 else if (lex_match (ctx->lexer, T_RBRACK))
1180 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1187 ctables_axis_destroy (sub);
1191 static const struct ctables_axis *
1192 find_scale (const struct ctables_axis *axis)
1196 else if (axis->op == CTAO_VAR)
1197 return axis->scale ? axis : NULL;
1200 for (size_t i = 0; i < 2; i++)
1202 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1210 static const struct ctables_axis *
1211 find_categorical_summary_spec (const struct ctables_axis *axis)
1215 else if (axis->op == CTAO_VAR)
1216 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1219 for (size_t i = 0; i < 2; i++)
1221 const struct ctables_axis *sum
1222 = find_categorical_summary_spec (axis->subs[i]);
1230 static struct ctables_axis *
1231 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1233 int start_ofs = lex_ofs (ctx->lexer);
1234 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1238 while (lex_match (ctx->lexer, T_GT))
1240 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1243 ctables_axis_destroy (lhs);
1247 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1248 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1250 const struct ctables_axis *outer_scale = find_scale (lhs);
1251 const struct ctables_axis *inner_scale = find_scale (rhs);
1252 if (outer_scale && inner_scale)
1254 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1255 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1256 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1257 ctables_axis_destroy (nest);
1261 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1264 msg_at (SE, nest->loc,
1265 _("Summaries may only be requested for categorical variables "
1266 "at the innermost nesting level."));
1267 msg_at (SN, outer_sum->loc,
1268 _("This outer categorical variable has a summary."));
1269 ctables_axis_destroy (nest);
1279 static struct ctables_axis *
1280 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1282 int start_ofs = lex_ofs (ctx->lexer);
1283 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1287 while (lex_match (ctx->lexer, T_PLUS))
1289 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1292 ctables_axis_destroy (lhs);
1296 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1297 ctx->lexer, start_ofs);
1304 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1305 struct ctables *ct, struct ctables_table *t,
1306 enum pivot_axis_type a)
1308 if (lex_token (lexer) == T_BY
1309 || lex_token (lexer) == T_SLASH
1310 || lex_token (lexer) == T_ENDCMD)
1313 struct ctables_axis_parse_ctx ctx = {
1319 t->axes[a] = ctables_axis_parse_stack (&ctx);
1320 return t->axes[a] != NULL;
1324 ctables_chisq_destroy (struct ctables_chisq *chisq)
1330 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1336 ctables_table_destroy (struct ctables_table *t)
1341 for (size_t i = 0; i < t->n_sections; i++)
1342 ctables_section_uninit (&t->sections[i]);
1345 for (size_t i = 0; i < t->n_categories; i++)
1346 ctables_categories_unref (t->categories[i]);
1347 free (t->categories);
1349 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1351 ctables_axis_destroy (t->axes[a]);
1352 ctables_stack_uninit (&t->stacks[a]);
1354 free (t->summary_specs.specs);
1356 struct ctables_value *ctv, *next_ctv;
1357 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1358 &t->clabels_values_map)
1360 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1361 hmap_delete (&t->clabels_values_map, &ctv->node);
1364 hmap_destroy (&t->clabels_values_map);
1365 free (t->clabels_values);
1371 ctables_chisq_destroy (t->chisq);
1372 ctables_pairwise_destroy (t->pairwise);
1377 ctables_destroy (struct ctables *ct)
1382 struct ctables_postcompute *pc, *next_pc;
1383 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1387 msg_location_destroy (pc->location);
1388 ctables_pcexpr_destroy (pc->expr);
1392 ctables_summary_spec_set_uninit (pc->specs);
1395 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1399 fmt_settings_uninit (&ct->ctables_formats);
1400 pivot_table_look_unref (ct->look);
1404 for (size_t i = 0; i < ct->n_tables; i++)
1405 ctables_table_destroy (ct->tables[i]);
1410 static struct ctables_category
1411 cct_nrange (double low, double high)
1413 return (struct ctables_category) {
1415 .nrange = { low, high }
1419 static struct ctables_category
1420 cct_srange (struct substring low, struct substring high)
1422 return (struct ctables_category) {
1424 .srange = { low, high }
1429 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1430 struct ctables_category *cat)
1433 if (lex_match (lexer, T_EQUALS))
1435 if (!lex_force_string (lexer))
1438 total_label = ss_xstrdup (lex_tokss (lexer));
1442 total_label = xstrdup (_("Subtotal"));
1444 *cat = (struct ctables_category) {
1445 .type = CCT_SUBTOTAL,
1446 .hide_subcategories = hide_subcategories,
1447 .total_label = total_label
1452 static struct substring
1453 parse_substring (struct lexer *lexer, struct dictionary *dict)
1455 struct substring s = recode_substring_pool (
1456 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1457 ss_rtrim (&s, ss_cstr (" "));
1463 ctables_table_parse_explicit_category (struct lexer *lexer,
1464 struct dictionary *dict,
1466 struct ctables_category *cat)
1468 if (lex_match_id (lexer, "OTHERNM"))
1469 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1470 else if (lex_match_id (lexer, "MISSING"))
1471 *cat = (struct ctables_category) { .type = CCT_MISSING };
1472 else if (lex_match_id (lexer, "SUBTOTAL"))
1473 return ctables_table_parse_subtotal (lexer, false, cat);
1474 else if (lex_match_id (lexer, "HSUBTOTAL"))
1475 return ctables_table_parse_subtotal (lexer, true, cat);
1476 else if (lex_match_id (lexer, "LO"))
1478 if (!lex_force_match_id (lexer, "THRU"))
1480 if (lex_is_string (lexer))
1482 struct substring sr0 = { .string = NULL };
1483 struct substring sr1 = parse_substring (lexer, dict);
1484 *cat = cct_srange (sr0, sr1);
1486 else if (lex_force_num (lexer))
1488 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1494 else if (lex_is_number (lexer))
1496 double number = lex_number (lexer);
1498 if (lex_match_id (lexer, "THRU"))
1500 if (lex_match_id (lexer, "HI"))
1501 *cat = cct_nrange (number, DBL_MAX);
1504 if (!lex_force_num (lexer))
1506 *cat = cct_nrange (number, lex_number (lexer));
1511 *cat = (struct ctables_category) {
1516 else if (lex_is_string (lexer))
1518 struct substring s = parse_substring (lexer, dict);
1519 if (lex_match_id (lexer, "THRU"))
1521 if (lex_match_id (lexer, "HI"))
1523 struct substring sr1 = { .string = NULL };
1524 *cat = cct_srange (s, sr1);
1528 if (!lex_force_string (lexer))
1533 struct substring sr1 = parse_substring (lexer, dict);
1534 *cat = cct_srange (s, sr1);
1538 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1540 else if (lex_match (lexer, T_AND))
1542 if (!lex_force_id (lexer))
1544 struct ctables_postcompute *pc = ctables_find_postcompute (
1545 ct, lex_tokcstr (lexer));
1548 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1549 msg_at (SE, loc, _("Unknown postcompute &%s."),
1550 lex_tokcstr (lexer));
1551 msg_location_destroy (loc);
1556 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1560 lex_error (lexer, NULL);
1568 parse_category_string (struct msg_location *location,
1569 struct substring s, const struct dictionary *dict,
1570 enum fmt_type format, double *n)
1573 char *error = data_in (s, dict_get_encoding (dict), format,
1574 settings_get_fmt_settings (), &v, 0, NULL);
1577 msg_at (SE, location,
1578 _("Failed to parse category specification as format %s: %s."),
1579 fmt_name (format), error);
1588 static struct ctables_category *
1589 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1590 const struct ctables_pcexpr *e)
1592 struct ctables_category *best = NULL;
1593 size_t n_subtotals = 0;
1594 for (size_t i = 0; i < cats->n_cats; i++)
1596 struct ctables_category *cat = &cats->cats[i];
1599 case CTPO_CAT_NUMBER:
1600 if (cat->type == CCT_NUMBER && cat->number == e->number)
1604 case CTPO_CAT_STRING:
1605 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1609 case CTPO_CAT_NRANGE:
1610 if (cat->type == CCT_NRANGE
1611 && cat->nrange[0] == e->nrange[0]
1612 && cat->nrange[1] == e->nrange[1])
1616 case CTPO_CAT_SRANGE:
1617 if (cat->type == CCT_SRANGE
1618 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1619 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1623 case CTPO_CAT_MISSING:
1624 if (cat->type == CCT_MISSING)
1628 case CTPO_CAT_OTHERNM:
1629 if (cat->type == CCT_OTHERNM)
1633 case CTPO_CAT_SUBTOTAL:
1634 if (cat->type == CCT_SUBTOTAL)
1637 if (e->subtotal_index == n_subtotals)
1639 else if (e->subtotal_index == 0)
1644 case CTPO_CAT_TOTAL:
1645 if (cat->type == CCT_TOTAL)
1659 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1664 static struct ctables_category *
1665 ctables_find_category_for_postcompute (const struct dictionary *dict,
1666 const struct ctables_categories *cats,
1667 enum fmt_type parse_format,
1668 const struct ctables_pcexpr *e)
1670 if (parse_format != FMT_F)
1672 if (e->op == CTPO_CAT_STRING)
1675 if (!parse_category_string (e->location, e->string, dict,
1676 parse_format, &number))
1679 struct ctables_pcexpr e2 = {
1680 .op = CTPO_CAT_NUMBER,
1682 .location = e->location,
1684 return ctables_find_category_for_postcompute__ (cats, &e2);
1686 else if (e->op == CTPO_CAT_SRANGE)
1689 if (!e->srange[0].string)
1690 nrange[0] = -DBL_MAX;
1691 else if (!parse_category_string (e->location, e->srange[0], dict,
1692 parse_format, &nrange[0]))
1695 if (!e->srange[1].string)
1696 nrange[1] = DBL_MAX;
1697 else if (!parse_category_string (e->location, e->srange[1], dict,
1698 parse_format, &nrange[1]))
1701 struct ctables_pcexpr e2 = {
1702 .op = CTPO_CAT_NRANGE,
1703 .nrange = { nrange[0], nrange[1] },
1704 .location = e->location,
1706 return ctables_find_category_for_postcompute__ (cats, &e2);
1709 return ctables_find_category_for_postcompute__ (cats, e);
1713 ctables_recursive_check_postcompute (struct dictionary *dict,
1714 const struct ctables_pcexpr *e,
1715 struct ctables_category *pc_cat,
1716 const struct ctables_categories *cats,
1717 const struct msg_location *cats_location)
1721 case CTPO_CAT_NUMBER:
1722 case CTPO_CAT_STRING:
1723 case CTPO_CAT_NRANGE:
1724 case CTPO_CAT_SRANGE:
1725 case CTPO_CAT_MISSING:
1726 case CTPO_CAT_OTHERNM:
1727 case CTPO_CAT_SUBTOTAL:
1728 case CTPO_CAT_TOTAL:
1730 struct ctables_category *cat = ctables_find_category_for_postcompute (
1731 dict, cats, pc_cat->parse_format, e);
1734 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1736 size_t n_subtotals = 0;
1737 for (size_t i = 0; i < cats->n_cats; i++)
1738 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1739 if (n_subtotals > 1)
1741 msg_at (SE, cats_location,
1742 ngettext ("These categories include %zu instance "
1743 "of SUBTOTAL or HSUBTOTAL, so references "
1744 "from computed categories must refer to "
1745 "subtotals by position, "
1746 "e.g. SUBTOTAL[1].",
1747 "These categories include %zu instances "
1748 "of SUBTOTAL or HSUBTOTAL, so references "
1749 "from computed categories must refer to "
1750 "subtotals by position, "
1751 "e.g. SUBTOTAL[1].",
1754 msg_at (SN, e->location,
1755 _("This is the reference that lacks a position."));
1760 msg_at (SE, pc_cat->location,
1761 _("Computed category &%s references a category not included "
1762 "in the category list."),
1764 msg_at (SN, e->location, _("This is the missing category."));
1765 if (e->op == CTPO_CAT_SUBTOTAL)
1766 msg_at (SN, cats_location,
1767 _("To fix the problem, add subtotals to the "
1768 "list of categories here."));
1769 else if (e->op == CTPO_CAT_TOTAL)
1770 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1771 "CATEGORIES specification."));
1773 msg_at (SN, cats_location,
1774 _("To fix the problem, add the missing category to the "
1775 "list of categories here."));
1778 if (pc_cat->pc->hide_source_cats)
1792 for (size_t i = 0; i < 2; i++)
1793 if (e->subs[i] && !ctables_recursive_check_postcompute (
1794 dict, e->subs[i], pc_cat, cats, cats_location))
1803 all_strings (struct variable **vars, size_t n_vars,
1804 const struct ctables_category *cat)
1806 for (size_t j = 0; j < n_vars; j++)
1807 if (var_is_numeric (vars[j]))
1809 msg_at (SE, cat->location,
1810 _("This category specification may be applied only to string "
1811 "variables, but this subcommand tries to apply it to "
1812 "numeric variable %s."),
1813 var_get_name (vars[j]));
1820 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1821 struct ctables *ct, struct ctables_table *t)
1823 if (!lex_match_id (lexer, "VARIABLES"))
1825 lex_match (lexer, T_EQUALS);
1827 struct variable **vars;
1829 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1832 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1833 for (size_t i = 1; i < n_vars; i++)
1835 const struct fmt_spec *f = var_get_print_format (vars[i]);
1836 if (f->type != common_format->type)
1838 common_format = NULL;
1844 && (fmt_get_category (common_format->type)
1845 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1847 struct ctables_categories *c = xmalloc (sizeof *c);
1848 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1849 for (size_t i = 0; i < n_vars; i++)
1851 struct ctables_categories **cp
1852 = &t->categories[var_get_dict_index (vars[i])];
1853 ctables_categories_unref (*cp);
1857 size_t allocated_cats = 0;
1858 int cats_start_ofs = -1;
1859 int cats_end_ofs = -1;
1860 if (lex_match (lexer, T_LBRACK))
1862 cats_start_ofs = lex_ofs (lexer);
1865 if (c->n_cats >= allocated_cats)
1866 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1868 int start_ofs = lex_ofs (lexer);
1869 struct ctables_category *cat = &c->cats[c->n_cats];
1870 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1872 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1875 lex_match (lexer, T_COMMA);
1877 while (!lex_match (lexer, T_RBRACK));
1878 cats_end_ofs = lex_ofs (lexer) - 1;
1881 struct ctables_category cat = {
1883 .include_missing = false,
1884 .sort_ascending = true,
1886 bool show_totals = false;
1887 char *total_label = NULL;
1888 bool totals_before = false;
1889 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1891 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1893 lex_match (lexer, T_EQUALS);
1894 if (lex_match_id (lexer, "A"))
1895 cat.sort_ascending = true;
1896 else if (lex_match_id (lexer, "D"))
1897 cat.sort_ascending = false;
1900 lex_error_expecting (lexer, "A", "D");
1904 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1906 lex_match (lexer, T_EQUALS);
1907 if (lex_match_id (lexer, "VALUE"))
1908 cat.type = CCT_VALUE;
1909 else if (lex_match_id (lexer, "LABEL"))
1910 cat.type = CCT_LABEL;
1913 cat.type = CCT_FUNCTION;
1914 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1917 if (lex_match (lexer, T_LPAREN))
1919 cat.sort_var = parse_variable (lexer, dict);
1923 if (cat.sort_function == CTSF_PTILE)
1925 lex_match (lexer, T_COMMA);
1926 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1928 cat.percentile = lex_number (lexer);
1932 if (!lex_force_match (lexer, T_RPAREN))
1935 else if (ctables_function_availability (cat.sort_function)
1938 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1943 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1945 lex_match (lexer, T_EQUALS);
1946 if (lex_match_id (lexer, "INCLUDE"))
1947 cat.include_missing = true;
1948 else if (lex_match_id (lexer, "EXCLUDE"))
1949 cat.include_missing = false;
1952 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1956 else if (lex_match_id (lexer, "TOTAL"))
1958 lex_match (lexer, T_EQUALS);
1959 if (!parse_bool (lexer, &show_totals))
1962 else if (lex_match_id (lexer, "LABEL"))
1964 lex_match (lexer, T_EQUALS);
1965 if (!lex_force_string (lexer))
1968 total_label = ss_xstrdup (lex_tokss (lexer));
1971 else if (lex_match_id (lexer, "POSITION"))
1973 lex_match (lexer, T_EQUALS);
1974 if (lex_match_id (lexer, "BEFORE"))
1975 totals_before = true;
1976 else if (lex_match_id (lexer, "AFTER"))
1977 totals_before = false;
1980 lex_error_expecting (lexer, "BEFORE", "AFTER");
1984 else if (lex_match_id (lexer, "EMPTY"))
1986 lex_match (lexer, T_EQUALS);
1987 if (lex_match_id (lexer, "INCLUDE"))
1988 c->show_empty = true;
1989 else if (lex_match_id (lexer, "EXCLUDE"))
1990 c->show_empty = false;
1993 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2000 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2001 "TOTAL", "LABEL", "POSITION", "EMPTY");
2003 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2010 if (c->n_cats >= allocated_cats)
2011 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2012 c->cats[c->n_cats++] = cat;
2017 if (c->n_cats >= allocated_cats)
2018 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2020 struct ctables_category *totals;
2023 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2024 totals = &c->cats[0];
2027 totals = &c->cats[c->n_cats];
2030 *totals = (struct ctables_category) {
2032 .total_label = total_label ? total_label : xstrdup (_("Total")),
2036 struct ctables_category *subtotal = NULL;
2037 for (size_t i = totals_before ? 0 : c->n_cats;
2038 totals_before ? i < c->n_cats : i-- > 0;
2039 totals_before ? i++ : 0)
2041 struct ctables_category *cat = &c->cats[i];
2050 cat->subtotal = subtotal;
2053 case CCT_POSTCOMPUTE:
2064 case CCT_EXCLUDED_MISSING:
2069 if (cats_start_ofs != -1)
2071 for (size_t i = 0; i < c->n_cats; i++)
2073 struct ctables_category *cat = &c->cats[i];
2076 case CCT_POSTCOMPUTE:
2077 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2078 struct msg_location *cats_location
2079 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2080 bool ok = ctables_recursive_check_postcompute (
2081 dict, cat->pc->expr, cat, c, cats_location);
2082 msg_location_destroy (cats_location);
2089 for (size_t j = 0; j < n_vars; j++)
2090 if (var_is_alpha (vars[j]))
2092 msg_at (SE, cat->location,
2093 _("This category specification may be applied "
2094 "only to numeric variables, but this "
2095 "subcommand tries to apply it to string "
2097 var_get_name (vars[j]));
2106 if (!parse_category_string (cat->location, cat->string, dict,
2107 common_format->type, &n))
2110 ss_dealloc (&cat->string);
2112 cat->type = CCT_NUMBER;
2115 else if (!all_strings (vars, n_vars, cat))
2124 if (!cat->srange[0].string)
2126 else if (!parse_category_string (cat->location,
2127 cat->srange[0], dict,
2128 common_format->type, &n[0]))
2131 if (!cat->srange[1].string)
2133 else if (!parse_category_string (cat->location,
2134 cat->srange[1], dict,
2135 common_format->type, &n[1]))
2138 ss_dealloc (&cat->srange[0]);
2139 ss_dealloc (&cat->srange[1]);
2141 cat->type = CCT_NRANGE;
2142 cat->nrange[0] = n[0];
2143 cat->nrange[1] = n[1];
2145 else if (!all_strings (vars, n_vars, cat))
2156 case CCT_EXCLUDED_MISSING:
2171 ctables_nest_uninit (struct ctables_nest *nest)
2174 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2175 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2176 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2177 free (nest->domains[dt]);
2181 ctables_stack_uninit (struct ctables_stack *stack)
2185 for (size_t i = 0; i < stack->n; i++)
2186 ctables_nest_uninit (&stack->nests[i]);
2187 free (stack->nests);
2191 static struct ctables_stack
2192 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2199 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2200 for (size_t i = 0; i < s0.n; i++)
2201 for (size_t j = 0; j < s1.n; j++)
2203 const struct ctables_nest *a = &s0.nests[i];
2204 const struct ctables_nest *b = &s1.nests[j];
2206 size_t allocate = a->n + b->n;
2207 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2209 for (size_t k = 0; k < a->n; k++)
2210 vars[n++] = a->vars[k];
2211 for (size_t k = 0; k < b->n; k++)
2212 vars[n++] = b->vars[k];
2213 assert (n == allocate);
2215 const struct ctables_nest *summary_src;
2216 if (!a->specs[CSV_CELL].var)
2218 else if (!b->specs[CSV_CELL].var)
2223 struct ctables_nest *new = &stack.nests[stack.n++];
2224 *new = (struct ctables_nest) {
2226 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2227 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2231 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2232 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2234 ctables_stack_uninit (&s0);
2235 ctables_stack_uninit (&s1);
2239 static struct ctables_stack
2240 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2242 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2243 for (size_t i = 0; i < s0.n; i++)
2244 stack.nests[stack.n++] = s0.nests[i];
2245 for (size_t i = 0; i < s1.n; i++)
2247 stack.nests[stack.n] = s1.nests[i];
2248 stack.nests[stack.n].group_head += s0.n;
2251 assert (stack.n == s0.n + s1.n);
2257 static struct ctables_stack
2258 var_fts (const struct ctables_axis *a)
2260 struct variable **vars = xmalloc (sizeof *vars);
2263 struct ctables_nest *nest = xmalloc (sizeof *nest);
2264 *nest = (struct ctables_nest) {
2267 .scale_idx = a->scale ? 0 : SIZE_MAX,
2269 if (a->specs[CSV_CELL].n || a->scale)
2270 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2272 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2273 nest->specs[sv].var = a->var;
2274 nest->specs[sv].is_scale = a->scale;
2276 return (struct ctables_stack) { .nests = nest, .n = 1 };
2279 static struct ctables_stack
2280 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2283 return (struct ctables_stack) { .n = 0 };
2291 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2292 enumerate_fts (axis_type, a->subs[1]));
2295 /* This should consider any of the scale variables found in the result to
2296 be linked to each other listwise for SMISSING=LISTWISE. */
2297 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2298 enumerate_fts (axis_type, a->subs[1]));
2304 union ctables_summary
2306 /* COUNT, VALIDN, TOTALN. */
2309 /* MINIMUM, MAXIMUM, RANGE. */
2316 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2317 struct moments1 *moments;
2319 /* MEDIAN, MODE, PTILE. */
2322 struct casewriter *writer;
2327 /* XXX multiple response */
2331 ctables_summary_init (union ctables_summary *s,
2332 const struct ctables_summary_spec *ss)
2334 switch (ss->function)
2338 case CTSF_ROWPCT_COUNT:
2339 case CTSF_COLPCT_COUNT:
2340 case CTSF_TABLEPCT_COUNT:
2341 case CTSF_SUBTABLEPCT_COUNT:
2342 case CTSF_LAYERPCT_COUNT:
2343 case CTSF_LAYERROWPCT_COUNT:
2344 case CTSF_LAYERCOLPCT_COUNT:
2345 case CTSF_ROWPCT_VALIDN:
2346 case CTSF_COLPCT_VALIDN:
2347 case CTSF_TABLEPCT_VALIDN:
2348 case CTSF_SUBTABLEPCT_VALIDN:
2349 case CTSF_LAYERPCT_VALIDN:
2350 case CTSF_LAYERROWPCT_VALIDN:
2351 case CTSF_LAYERCOLPCT_VALIDN:
2352 case CTSF_ROWPCT_TOTALN:
2353 case CTSF_COLPCT_TOTALN:
2354 case CTSF_TABLEPCT_TOTALN:
2355 case CTSF_SUBTABLEPCT_TOTALN:
2356 case CTSF_LAYERPCT_TOTALN:
2357 case CTSF_LAYERROWPCT_TOTALN:
2358 case CTSF_LAYERCOLPCT_TOTALN:
2365 case CTSF_UROWPCT_COUNT:
2366 case CTSF_UCOLPCT_COUNT:
2367 case CTSF_UTABLEPCT_COUNT:
2368 case CTSF_USUBTABLEPCT_COUNT:
2369 case CTSF_ULAYERPCT_COUNT:
2370 case CTSF_ULAYERROWPCT_COUNT:
2371 case CTSF_ULAYERCOLPCT_COUNT:
2372 case CTSF_UROWPCT_VALIDN:
2373 case CTSF_UCOLPCT_VALIDN:
2374 case CTSF_UTABLEPCT_VALIDN:
2375 case CTSF_USUBTABLEPCT_VALIDN:
2376 case CTSF_ULAYERPCT_VALIDN:
2377 case CTSF_ULAYERROWPCT_VALIDN:
2378 case CTSF_ULAYERCOLPCT_VALIDN:
2379 case CTSF_UROWPCT_TOTALN:
2380 case CTSF_UCOLPCT_TOTALN:
2381 case CTSF_UTABLEPCT_TOTALN:
2382 case CTSF_USUBTABLEPCT_TOTALN:
2383 case CTSF_ULAYERPCT_TOTALN:
2384 case CTSF_ULAYERROWPCT_TOTALN:
2385 case CTSF_ULAYERCOLPCT_TOTALN:
2395 case CTSF_SUBTABLE_ID:
2397 case CTSF_LAYERROW_ID:
2398 case CTSF_LAYERCOL_ID:
2404 s->min = s->max = SYSMIS;
2412 case CTSF_ROWPCT_SUM:
2413 case CTSF_COLPCT_SUM:
2414 case CTSF_TABLEPCT_SUM:
2415 case CTSF_SUBTABLEPCT_SUM:
2416 case CTSF_LAYERPCT_SUM:
2417 case CTSF_LAYERROWPCT_SUM:
2418 case CTSF_LAYERCOLPCT_SUM:
2423 case CTSF_UVARIANCE:
2424 case CTSF_UROWPCT_SUM:
2425 case CTSF_UCOLPCT_SUM:
2426 case CTSF_UTABLEPCT_SUM:
2427 case CTSF_USUBTABLEPCT_SUM:
2428 case CTSF_ULAYERPCT_SUM:
2429 case CTSF_ULAYERROWPCT_SUM:
2430 case CTSF_ULAYERCOLPCT_SUM:
2431 s->moments = moments1_create (MOMENT_VARIANCE);
2441 struct caseproto *proto = caseproto_create ();
2442 proto = caseproto_add_width (proto, 0);
2443 proto = caseproto_add_width (proto, 0);
2445 struct subcase ordering;
2446 subcase_init (&ordering, 0, 0, SC_ASCEND);
2447 s->writer = sort_create_writer (&ordering, proto);
2448 subcase_uninit (&ordering);
2449 caseproto_unref (proto);
2459 ctables_summary_uninit (union ctables_summary *s,
2460 const struct ctables_summary_spec *ss)
2462 switch (ss->function)
2466 case CTSF_ROWPCT_COUNT:
2467 case CTSF_COLPCT_COUNT:
2468 case CTSF_TABLEPCT_COUNT:
2469 case CTSF_SUBTABLEPCT_COUNT:
2470 case CTSF_LAYERPCT_COUNT:
2471 case CTSF_LAYERROWPCT_COUNT:
2472 case CTSF_LAYERCOLPCT_COUNT:
2473 case CTSF_ROWPCT_VALIDN:
2474 case CTSF_COLPCT_VALIDN:
2475 case CTSF_TABLEPCT_VALIDN:
2476 case CTSF_SUBTABLEPCT_VALIDN:
2477 case CTSF_LAYERPCT_VALIDN:
2478 case CTSF_LAYERROWPCT_VALIDN:
2479 case CTSF_LAYERCOLPCT_VALIDN:
2480 case CTSF_ROWPCT_TOTALN:
2481 case CTSF_COLPCT_TOTALN:
2482 case CTSF_TABLEPCT_TOTALN:
2483 case CTSF_SUBTABLEPCT_TOTALN:
2484 case CTSF_LAYERPCT_TOTALN:
2485 case CTSF_LAYERROWPCT_TOTALN:
2486 case CTSF_LAYERCOLPCT_TOTALN:
2493 case CTSF_UROWPCT_COUNT:
2494 case CTSF_UCOLPCT_COUNT:
2495 case CTSF_UTABLEPCT_COUNT:
2496 case CTSF_USUBTABLEPCT_COUNT:
2497 case CTSF_ULAYERPCT_COUNT:
2498 case CTSF_ULAYERROWPCT_COUNT:
2499 case CTSF_ULAYERCOLPCT_COUNT:
2500 case CTSF_UROWPCT_VALIDN:
2501 case CTSF_UCOLPCT_VALIDN:
2502 case CTSF_UTABLEPCT_VALIDN:
2503 case CTSF_USUBTABLEPCT_VALIDN:
2504 case CTSF_ULAYERPCT_VALIDN:
2505 case CTSF_ULAYERROWPCT_VALIDN:
2506 case CTSF_ULAYERCOLPCT_VALIDN:
2507 case CTSF_UROWPCT_TOTALN:
2508 case CTSF_UCOLPCT_TOTALN:
2509 case CTSF_UTABLEPCT_TOTALN:
2510 case CTSF_USUBTABLEPCT_TOTALN:
2511 case CTSF_ULAYERPCT_TOTALN:
2512 case CTSF_ULAYERROWPCT_TOTALN:
2513 case CTSF_ULAYERCOLPCT_TOTALN:
2522 case CTSF_SUBTABLE_ID:
2524 case CTSF_LAYERROW_ID:
2525 case CTSF_LAYERCOL_ID:
2538 case CTSF_ROWPCT_SUM:
2539 case CTSF_COLPCT_SUM:
2540 case CTSF_TABLEPCT_SUM:
2541 case CTSF_SUBTABLEPCT_SUM:
2542 case CTSF_LAYERPCT_SUM:
2543 case CTSF_LAYERROWPCT_SUM:
2544 case CTSF_LAYERCOLPCT_SUM:
2549 case CTSF_UVARIANCE:
2550 case CTSF_UROWPCT_SUM:
2551 case CTSF_UCOLPCT_SUM:
2552 case CTSF_UTABLEPCT_SUM:
2553 case CTSF_USUBTABLEPCT_SUM:
2554 case CTSF_ULAYERPCT_SUM:
2555 case CTSF_ULAYERROWPCT_SUM:
2556 case CTSF_ULAYERCOLPCT_SUM:
2557 moments1_destroy (s->moments);
2566 casewriter_destroy (s->writer);
2572 ctables_summary_add (union ctables_summary *s,
2573 const struct ctables_summary_spec *ss,
2574 const struct variable *var, const union value *value,
2575 bool is_scale, bool is_scale_missing,
2576 bool is_missing, bool excluded_missing,
2577 double d_weight, double e_weight)
2579 /* To determine whether a case is included in a given table for a particular
2580 kind of summary, consider the following charts for each variable in the
2581 table. Only if "yes" appears for every variable for the summary is the
2584 Categorical variables: VALIDN COUNT TOTALN
2585 Valid values in included categories yes yes yes
2586 Missing values in included categories --- yes yes
2587 Missing values in excluded categories --- --- yes
2588 Valid values in excluded categories --- --- ---
2590 Scale variables: VALIDN COUNT TOTALN
2591 Valid value yes yes yes
2592 Missing value --- yes yes
2594 Missing values include both user- and system-missing. (The system-missing
2595 value is always in an excluded category.)
2597 switch (ss->function)
2600 case CTSF_ROWPCT_TOTALN:
2601 case CTSF_COLPCT_TOTALN:
2602 case CTSF_TABLEPCT_TOTALN:
2603 case CTSF_SUBTABLEPCT_TOTALN:
2604 case CTSF_LAYERPCT_TOTALN:
2605 case CTSF_LAYERROWPCT_TOTALN:
2606 case CTSF_LAYERCOLPCT_TOTALN:
2607 s->count += d_weight;
2611 case CTSF_UROWPCT_TOTALN:
2612 case CTSF_UCOLPCT_TOTALN:
2613 case CTSF_UTABLEPCT_TOTALN:
2614 case CTSF_USUBTABLEPCT_TOTALN:
2615 case CTSF_ULAYERPCT_TOTALN:
2616 case CTSF_ULAYERROWPCT_TOTALN:
2617 case CTSF_ULAYERCOLPCT_TOTALN:
2622 case CTSF_ROWPCT_COUNT:
2623 case CTSF_COLPCT_COUNT:
2624 case CTSF_TABLEPCT_COUNT:
2625 case CTSF_SUBTABLEPCT_COUNT:
2626 case CTSF_LAYERPCT_COUNT:
2627 case CTSF_LAYERROWPCT_COUNT:
2628 case CTSF_LAYERCOLPCT_COUNT:
2629 if (is_scale || !excluded_missing)
2630 s->count += d_weight;
2634 case CTSF_UROWPCT_COUNT:
2635 case CTSF_UCOLPCT_COUNT:
2636 case CTSF_UTABLEPCT_COUNT:
2637 case CTSF_USUBTABLEPCT_COUNT:
2638 case CTSF_ULAYERPCT_COUNT:
2639 case CTSF_ULAYERROWPCT_COUNT:
2640 case CTSF_ULAYERCOLPCT_COUNT:
2641 if (is_scale || !excluded_missing)
2646 case CTSF_ROWPCT_VALIDN:
2647 case CTSF_COLPCT_VALIDN:
2648 case CTSF_TABLEPCT_VALIDN:
2649 case CTSF_SUBTABLEPCT_VALIDN:
2650 case CTSF_LAYERPCT_VALIDN:
2651 case CTSF_LAYERROWPCT_VALIDN:
2652 case CTSF_LAYERCOLPCT_VALIDN:
2656 s->count += d_weight;
2660 case CTSF_UROWPCT_VALIDN:
2661 case CTSF_UCOLPCT_VALIDN:
2662 case CTSF_UTABLEPCT_VALIDN:
2663 case CTSF_USUBTABLEPCT_VALIDN:
2664 case CTSF_ULAYERPCT_VALIDN:
2665 case CTSF_ULAYERROWPCT_VALIDN:
2666 case CTSF_ULAYERCOLPCT_VALIDN:
2676 case CTSF_SUBTABLE_ID:
2678 case CTSF_LAYERROW_ID:
2679 case CTSF_LAYERCOL_ID:
2686 s->count += d_weight;
2697 if (is_scale || !excluded_missing)
2698 s->count += e_weight;
2705 s->count += e_weight;
2709 s->count += e_weight;
2715 if (!is_scale_missing)
2717 assert (!var_is_alpha (var)); /* XXX? */
2718 if (s->min == SYSMIS || value->f < s->min)
2720 if (s->max == SYSMIS || value->f > s->max)
2730 case CTSF_ROWPCT_SUM:
2731 case CTSF_COLPCT_SUM:
2732 case CTSF_TABLEPCT_SUM:
2733 case CTSF_SUBTABLEPCT_SUM:
2734 case CTSF_LAYERPCT_SUM:
2735 case CTSF_LAYERROWPCT_SUM:
2736 case CTSF_LAYERCOLPCT_SUM:
2737 if (!is_scale_missing)
2738 moments1_add (s->moments, value->f, e_weight);
2745 case CTSF_UVARIANCE:
2746 case CTSF_UROWPCT_SUM:
2747 case CTSF_UCOLPCT_SUM:
2748 case CTSF_UTABLEPCT_SUM:
2749 case CTSF_USUBTABLEPCT_SUM:
2750 case CTSF_ULAYERPCT_SUM:
2751 case CTSF_ULAYERROWPCT_SUM:
2752 case CTSF_ULAYERCOLPCT_SUM:
2753 if (!is_scale_missing)
2754 moments1_add (s->moments, value->f, 1.0);
2760 d_weight = e_weight = 1.0;
2765 if (!is_scale_missing)
2767 s->ovalid += e_weight;
2769 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2770 *case_num_rw_idx (c, 0) = value->f;
2771 *case_num_rw_idx (c, 1) = e_weight;
2772 casewriter_write (s->writer, c);
2778 static enum ctables_domain_type
2779 ctables_function_domain (enum ctables_summary_function function)
2809 case CTSF_UVARIANCE:
2815 case CTSF_COLPCT_COUNT:
2816 case CTSF_COLPCT_SUM:
2817 case CTSF_COLPCT_TOTALN:
2818 case CTSF_COLPCT_VALIDN:
2819 case CTSF_UCOLPCT_COUNT:
2820 case CTSF_UCOLPCT_SUM:
2821 case CTSF_UCOLPCT_TOTALN:
2822 case CTSF_UCOLPCT_VALIDN:
2826 case CTSF_LAYERCOLPCT_COUNT:
2827 case CTSF_LAYERCOLPCT_SUM:
2828 case CTSF_LAYERCOLPCT_TOTALN:
2829 case CTSF_LAYERCOLPCT_VALIDN:
2830 case CTSF_ULAYERCOLPCT_COUNT:
2831 case CTSF_ULAYERCOLPCT_SUM:
2832 case CTSF_ULAYERCOLPCT_TOTALN:
2833 case CTSF_ULAYERCOLPCT_VALIDN:
2834 case CTSF_LAYERCOL_ID:
2835 return CTDT_LAYERCOL;
2837 case CTSF_LAYERPCT_COUNT:
2838 case CTSF_LAYERPCT_SUM:
2839 case CTSF_LAYERPCT_TOTALN:
2840 case CTSF_LAYERPCT_VALIDN:
2841 case CTSF_ULAYERPCT_COUNT:
2842 case CTSF_ULAYERPCT_SUM:
2843 case CTSF_ULAYERPCT_TOTALN:
2844 case CTSF_ULAYERPCT_VALIDN:
2848 case CTSF_LAYERROWPCT_COUNT:
2849 case CTSF_LAYERROWPCT_SUM:
2850 case CTSF_LAYERROWPCT_TOTALN:
2851 case CTSF_LAYERROWPCT_VALIDN:
2852 case CTSF_ULAYERROWPCT_COUNT:
2853 case CTSF_ULAYERROWPCT_SUM:
2854 case CTSF_ULAYERROWPCT_TOTALN:
2855 case CTSF_ULAYERROWPCT_VALIDN:
2856 case CTSF_LAYERROW_ID:
2857 return CTDT_LAYERROW;
2859 case CTSF_ROWPCT_COUNT:
2860 case CTSF_ROWPCT_SUM:
2861 case CTSF_ROWPCT_TOTALN:
2862 case CTSF_ROWPCT_VALIDN:
2863 case CTSF_UROWPCT_COUNT:
2864 case CTSF_UROWPCT_SUM:
2865 case CTSF_UROWPCT_TOTALN:
2866 case CTSF_UROWPCT_VALIDN:
2870 case CTSF_SUBTABLEPCT_COUNT:
2871 case CTSF_SUBTABLEPCT_SUM:
2872 case CTSF_SUBTABLEPCT_TOTALN:
2873 case CTSF_SUBTABLEPCT_VALIDN:
2874 case CTSF_USUBTABLEPCT_COUNT:
2875 case CTSF_USUBTABLEPCT_SUM:
2876 case CTSF_USUBTABLEPCT_TOTALN:
2877 case CTSF_USUBTABLEPCT_VALIDN:
2878 case CTSF_SUBTABLE_ID:
2879 return CTDT_SUBTABLE;
2881 case CTSF_TABLEPCT_COUNT:
2882 case CTSF_TABLEPCT_SUM:
2883 case CTSF_TABLEPCT_TOTALN:
2884 case CTSF_TABLEPCT_VALIDN:
2885 case CTSF_UTABLEPCT_COUNT:
2886 case CTSF_UTABLEPCT_SUM:
2887 case CTSF_UTABLEPCT_TOTALN:
2888 case CTSF_UTABLEPCT_VALIDN:
2896 static enum ctables_domain_type
2897 ctables_function_is_pctsum (enum ctables_summary_function function)
2927 case CTSF_UVARIANCE:
2931 case CTSF_COLPCT_COUNT:
2932 case CTSF_COLPCT_TOTALN:
2933 case CTSF_COLPCT_VALIDN:
2934 case CTSF_UCOLPCT_COUNT:
2935 case CTSF_UCOLPCT_TOTALN:
2936 case CTSF_UCOLPCT_VALIDN:
2937 case CTSF_LAYERCOLPCT_COUNT:
2938 case CTSF_LAYERCOLPCT_TOTALN:
2939 case CTSF_LAYERCOLPCT_VALIDN:
2940 case CTSF_ULAYERCOLPCT_COUNT:
2941 case CTSF_ULAYERCOLPCT_TOTALN:
2942 case CTSF_ULAYERCOLPCT_VALIDN:
2943 case CTSF_LAYERPCT_COUNT:
2944 case CTSF_LAYERPCT_TOTALN:
2945 case CTSF_LAYERPCT_VALIDN:
2946 case CTSF_ULAYERPCT_COUNT:
2947 case CTSF_ULAYERPCT_TOTALN:
2948 case CTSF_ULAYERPCT_VALIDN:
2949 case CTSF_LAYERROWPCT_COUNT:
2950 case CTSF_LAYERROWPCT_TOTALN:
2951 case CTSF_LAYERROWPCT_VALIDN:
2952 case CTSF_ULAYERROWPCT_COUNT:
2953 case CTSF_ULAYERROWPCT_TOTALN:
2954 case CTSF_ULAYERROWPCT_VALIDN:
2955 case CTSF_ROWPCT_COUNT:
2956 case CTSF_ROWPCT_TOTALN:
2957 case CTSF_ROWPCT_VALIDN:
2958 case CTSF_UROWPCT_COUNT:
2959 case CTSF_UROWPCT_TOTALN:
2960 case CTSF_UROWPCT_VALIDN:
2961 case CTSF_SUBTABLEPCT_COUNT:
2962 case CTSF_SUBTABLEPCT_TOTALN:
2963 case CTSF_SUBTABLEPCT_VALIDN:
2964 case CTSF_USUBTABLEPCT_COUNT:
2965 case CTSF_USUBTABLEPCT_TOTALN:
2966 case CTSF_USUBTABLEPCT_VALIDN:
2967 case CTSF_TABLEPCT_COUNT:
2968 case CTSF_TABLEPCT_TOTALN:
2969 case CTSF_TABLEPCT_VALIDN:
2970 case CTSF_UTABLEPCT_COUNT:
2971 case CTSF_UTABLEPCT_TOTALN:
2972 case CTSF_UTABLEPCT_VALIDN:
2976 case CTSF_SUBTABLE_ID:
2978 case CTSF_LAYERROW_ID:
2979 case CTSF_LAYERCOL_ID:
2982 case CTSF_COLPCT_SUM:
2983 case CTSF_UCOLPCT_SUM:
2984 case CTSF_LAYERCOLPCT_SUM:
2985 case CTSF_ULAYERCOLPCT_SUM:
2986 case CTSF_LAYERPCT_SUM:
2987 case CTSF_ULAYERPCT_SUM:
2988 case CTSF_LAYERROWPCT_SUM:
2989 case CTSF_ULAYERROWPCT_SUM:
2990 case CTSF_ROWPCT_SUM:
2991 case CTSF_UROWPCT_SUM:
2992 case CTSF_SUBTABLEPCT_SUM:
2993 case CTSF_USUBTABLEPCT_SUM:
2994 case CTSF_TABLEPCT_SUM:
2995 case CTSF_UTABLEPCT_SUM:
3003 ctables_summary_value (const struct ctables_cell *cell,
3004 union ctables_summary *s,
3005 const struct ctables_summary_spec *ss)
3007 switch (ss->function)
3017 case CTSF_SUBTABLE_ID:
3019 case CTSF_LAYERROW_ID:
3020 case CTSF_LAYERCOL_ID:
3021 return cell->domains[ctables_function_domain (ss->function)]->sequence;
3023 case CTSF_ROWPCT_COUNT:
3024 case CTSF_COLPCT_COUNT:
3025 case CTSF_TABLEPCT_COUNT:
3026 case CTSF_SUBTABLEPCT_COUNT:
3027 case CTSF_LAYERPCT_COUNT:
3028 case CTSF_LAYERROWPCT_COUNT:
3029 case CTSF_LAYERCOLPCT_COUNT:
3031 enum ctables_domain_type d = ctables_function_domain (ss->function);
3032 return (cell->domains[d]->e_count
3033 ? s->count / cell->domains[d]->e_count * 100
3037 case CTSF_UROWPCT_COUNT:
3038 case CTSF_UCOLPCT_COUNT:
3039 case CTSF_UTABLEPCT_COUNT:
3040 case CTSF_USUBTABLEPCT_COUNT:
3041 case CTSF_ULAYERPCT_COUNT:
3042 case CTSF_ULAYERROWPCT_COUNT:
3043 case CTSF_ULAYERCOLPCT_COUNT:
3045 enum ctables_domain_type d = ctables_function_domain (ss->function);
3046 return (cell->domains[d]->u_count
3047 ? s->count / cell->domains[d]->u_count * 100
3051 case CTSF_ROWPCT_VALIDN:
3052 case CTSF_COLPCT_VALIDN:
3053 case CTSF_TABLEPCT_VALIDN:
3054 case CTSF_SUBTABLEPCT_VALIDN:
3055 case CTSF_LAYERPCT_VALIDN:
3056 case CTSF_LAYERROWPCT_VALIDN:
3057 case CTSF_LAYERCOLPCT_VALIDN:
3059 enum ctables_domain_type d = ctables_function_domain (ss->function);
3060 return (cell->domains[d]->e_valid
3061 ? s->count / cell->domains[d]->e_valid * 100
3065 case CTSF_UROWPCT_VALIDN:
3066 case CTSF_UCOLPCT_VALIDN:
3067 case CTSF_UTABLEPCT_VALIDN:
3068 case CTSF_USUBTABLEPCT_VALIDN:
3069 case CTSF_ULAYERPCT_VALIDN:
3070 case CTSF_ULAYERROWPCT_VALIDN:
3071 case CTSF_ULAYERCOLPCT_VALIDN:
3073 enum ctables_domain_type d = ctables_function_domain (ss->function);
3074 return (cell->domains[d]->u_valid
3075 ? s->count / cell->domains[d]->u_valid * 100
3079 case CTSF_ROWPCT_TOTALN:
3080 case CTSF_COLPCT_TOTALN:
3081 case CTSF_TABLEPCT_TOTALN:
3082 case CTSF_SUBTABLEPCT_TOTALN:
3083 case CTSF_LAYERPCT_TOTALN:
3084 case CTSF_LAYERROWPCT_TOTALN:
3085 case CTSF_LAYERCOLPCT_TOTALN:
3087 enum ctables_domain_type d = ctables_function_domain (ss->function);
3088 return (cell->domains[d]->e_total
3089 ? s->count / cell->domains[d]->e_total * 100
3093 case CTSF_UROWPCT_TOTALN:
3094 case CTSF_UCOLPCT_TOTALN:
3095 case CTSF_UTABLEPCT_TOTALN:
3096 case CTSF_USUBTABLEPCT_TOTALN:
3097 case CTSF_ULAYERPCT_TOTALN:
3098 case CTSF_ULAYERROWPCT_TOTALN:
3099 case CTSF_ULAYERCOLPCT_TOTALN:
3101 enum ctables_domain_type d = ctables_function_domain (ss->function);
3102 return (cell->domains[d]->u_total
3103 ? s->count / cell->domains[d]->u_total * 100
3124 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3130 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3137 double weight, variance;
3138 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3139 return calc_semean (variance, weight);
3146 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3147 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3153 double weight, mean;
3154 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3155 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3159 case CTSF_UVARIANCE:
3162 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3166 case CTSF_ROWPCT_SUM:
3167 case CTSF_COLPCT_SUM:
3168 case CTSF_TABLEPCT_SUM:
3169 case CTSF_SUBTABLEPCT_SUM:
3170 case CTSF_LAYERPCT_SUM:
3171 case CTSF_LAYERROWPCT_SUM:
3172 case CTSF_LAYERCOLPCT_SUM:
3174 double weight, mean;
3175 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3176 if (weight == SYSMIS || mean == SYSMIS)
3178 enum ctables_domain_type d = ctables_function_domain (ss->function);
3179 double num = weight * mean;
3180 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3181 return denom != 0 ? num / denom * 100 : SYSMIS;
3183 case CTSF_UROWPCT_SUM:
3184 case CTSF_UCOLPCT_SUM:
3185 case CTSF_UTABLEPCT_SUM:
3186 case CTSF_USUBTABLEPCT_SUM:
3187 case CTSF_ULAYERPCT_SUM:
3188 case CTSF_ULAYERROWPCT_SUM:
3189 case CTSF_ULAYERCOLPCT_SUM:
3191 double weight, mean;
3192 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3193 if (weight == SYSMIS || mean == SYSMIS)
3195 enum ctables_domain_type d = ctables_function_domain (ss->function);
3196 double num = weight * mean;
3197 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3198 return denom != 0 ? num / denom * 100 : SYSMIS;
3207 struct casereader *reader = casewriter_make_reader (s->writer);
3210 struct percentile *ptile = percentile_create (
3211 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3212 struct order_stats *os = &ptile->parent;
3213 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3214 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3215 statistic_destroy (&ptile->parent.parent);
3223 struct casereader *reader = casewriter_make_reader (s->writer);
3226 struct mode *mode = mode_create ();
3227 struct order_stats *os = &mode->parent;
3228 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3229 s->ovalue = mode->mode;
3230 statistic_destroy (&mode->parent.parent);
3238 struct ctables_cell_sort_aux
3240 const struct ctables_nest *nest;
3241 enum pivot_axis_type a;
3245 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3247 const struct ctables_cell_sort_aux *aux = aux_;
3248 struct ctables_cell *const *ap = a_;
3249 struct ctables_cell *const *bp = b_;
3250 const struct ctables_cell *a = *ap;
3251 const struct ctables_cell *b = *bp;
3253 const struct ctables_nest *nest = aux->nest;
3254 for (size_t i = 0; i < nest->n; i++)
3255 if (i != nest->scale_idx)
3257 const struct variable *var = nest->vars[i];
3258 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3259 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3260 if (a_cv->category != b_cv->category)
3261 return a_cv->category > b_cv->category ? 1 : -1;
3263 const union value *a_val = &a_cv->value;
3264 const union value *b_val = &b_cv->value;
3265 switch (a_cv->category->type)
3271 case CCT_POSTCOMPUTE:
3272 case CCT_EXCLUDED_MISSING:
3273 /* Must be equal. */
3281 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3289 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3291 return a_cv->category->sort_ascending ? cmp : -cmp;
3297 const char *a_label = var_lookup_value_label (var, a_val);
3298 const char *b_label = var_lookup_value_label (var, b_val);
3304 cmp = strcmp (a_label, b_label);
3310 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3313 return a_cv->category->sort_ascending ? cmp : -cmp;
3325 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3326 const void *aux UNUSED)
3328 struct ctables_cell *const *ap = a_;
3329 struct ctables_cell *const *bp = b_;
3330 const struct ctables_cell *a = *ap;
3331 const struct ctables_cell *b = *bp;
3333 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3335 int al = a->axes[axis].leaf;
3336 int bl = b->axes[axis].leaf;
3338 return al > bl ? 1 : -1;
3346 For each ctables_table:
3347 For each combination of row vars:
3348 For each combination of column vars:
3349 For each combination of layer vars:
3351 Make a table of row values:
3352 Sort entries by row values
3353 Assign a 0-based index to each actual value
3354 Construct a dimension
3355 Make a table of column values
3356 Make a table of layer values
3358 Fill the table entry using the indexes from before.
3361 static struct ctables_domain *
3362 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3363 enum ctables_domain_type domain)
3366 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3368 const struct ctables_nest *nest = s->nests[a];
3369 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3371 size_t v_idx = nest->domains[domain][i];
3372 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3373 hash = hash_pointer (cv->category, hash);
3374 if (cv->category->type != CCT_TOTAL
3375 && cv->category->type != CCT_SUBTOTAL
3376 && cv->category->type != CCT_POSTCOMPUTE)
3377 hash = value_hash (&cv->value,
3378 var_get_width (nest->vars[v_idx]), hash);
3382 struct ctables_domain *d;
3383 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3385 const struct ctables_cell *df = d->example;
3386 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3388 const struct ctables_nest *nest = s->nests[a];
3389 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3391 size_t v_idx = nest->domains[domain][i];
3392 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3393 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3394 if (cv1->category != cv2->category
3395 || (cv1->category->type != CCT_TOTAL
3396 && cv1->category->type != CCT_SUBTOTAL
3397 && cv1->category->type != CCT_POSTCOMPUTE
3398 && !value_equal (&cv1->value, &cv2->value,
3399 var_get_width (nest->vars[v_idx]))))
3408 struct ctables_sum *sums = (s->table->n_sum_vars
3409 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3412 d = xmalloc (sizeof *d);
3413 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3414 hmap_insert (&s->domains[domain], &d->node, hash);
3418 static struct substring
3419 rtrim_value (const union value *v, const struct variable *var)
3421 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3422 var_get_width (var));
3423 ss_rtrim (&s, ss_cstr (" "));
3428 in_string_range (const union value *v, const struct variable *var,
3429 const struct substring *srange)
3431 struct substring s = rtrim_value (v, var);
3432 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3433 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3436 static const struct ctables_category *
3437 ctables_categories_match (const struct ctables_categories *c,
3438 const union value *v, const struct variable *var)
3440 if (var_is_numeric (var) && v->f == SYSMIS)
3443 const struct ctables_category *othernm = NULL;
3444 for (size_t i = c->n_cats; i-- > 0; )
3446 const struct ctables_category *cat = &c->cats[i];
3450 if (cat->number == v->f)
3455 if (ss_equals (cat->string, rtrim_value (v, var)))
3460 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3461 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3466 if (in_string_range (v, var, cat->srange))
3471 if (var_is_value_missing (var, v))
3475 case CCT_POSTCOMPUTE:
3490 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3493 case CCT_EXCLUDED_MISSING:
3498 return var_is_value_missing (var, v) ? NULL : othernm;
3501 static const struct ctables_category *
3502 ctables_categories_total (const struct ctables_categories *c)
3504 const struct ctables_category *first = &c->cats[0];
3505 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3506 return (first->type == CCT_TOTAL ? first
3507 : last->type == CCT_TOTAL ? last
3511 static struct ctables_cell *
3512 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3513 const struct ctables_category *cats[PIVOT_N_AXES][10])
3516 enum ctables_summary_variant sv = CSV_CELL;
3517 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3519 const struct ctables_nest *nest = s->nests[a];
3520 for (size_t i = 0; i < nest->n; i++)
3521 if (i != nest->scale_idx)
3523 hash = hash_pointer (cats[a][i], hash);
3524 if (cats[a][i]->type != CCT_TOTAL
3525 && cats[a][i]->type != CCT_SUBTOTAL
3526 && cats[a][i]->type != CCT_POSTCOMPUTE)
3527 hash = value_hash (case_data (c, nest->vars[i]),
3528 var_get_width (nest->vars[i]), hash);
3534 struct ctables_cell *cell;
3535 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3537 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3539 const struct ctables_nest *nest = s->nests[a];
3540 for (size_t i = 0; i < nest->n; i++)
3541 if (i != nest->scale_idx
3542 && (cats[a][i] != cell->axes[a].cvs[i].category
3543 || (cats[a][i]->type != CCT_TOTAL
3544 && cats[a][i]->type != CCT_SUBTOTAL
3545 && cats[a][i]->type != CCT_POSTCOMPUTE
3546 && !value_equal (case_data (c, nest->vars[i]),
3547 &cell->axes[a].cvs[i].value,
3548 var_get_width (nest->vars[i])))))
3557 cell = xmalloc (sizeof *cell);
3560 cell->omit_domains = 0;
3561 cell->postcompute = false;
3562 //struct string name = DS_EMPTY_INITIALIZER;
3563 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3565 const struct ctables_nest *nest = s->nests[a];
3566 cell->axes[a].cvs = (nest->n
3567 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3569 for (size_t i = 0; i < nest->n; i++)
3571 const struct ctables_category *cat = cats[a][i];
3572 const struct variable *var = nest->vars[i];
3573 const union value *value = case_data (c, var);
3574 if (i != nest->scale_idx)
3576 const struct ctables_category *subtotal = cat->subtotal;
3577 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3580 if (cat->type == CCT_TOTAL
3581 || cat->type == CCT_SUBTOTAL
3582 || cat->type == CCT_POSTCOMPUTE)
3584 /* XXX these should be more encompassing I think.*/
3588 case PIVOT_AXIS_COLUMN:
3589 cell->omit_domains |= ((1u << CTDT_TABLE) |
3590 (1u << CTDT_LAYER) |
3591 (1u << CTDT_LAYERCOL) |
3592 (1u << CTDT_SUBTABLE) |
3595 case PIVOT_AXIS_ROW:
3596 cell->omit_domains |= ((1u << CTDT_TABLE) |
3597 (1u << CTDT_LAYER) |
3598 (1u << CTDT_LAYERROW) |
3599 (1u << CTDT_SUBTABLE) |
3602 case PIVOT_AXIS_LAYER:
3603 cell->omit_domains |= ((1u << CTDT_TABLE) |
3604 (1u << CTDT_LAYER));
3608 if (cat->type == CCT_POSTCOMPUTE)
3609 cell->postcompute = true;
3612 cell->axes[a].cvs[i].category = cat;
3613 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3616 if (i != nest->scale_idx)
3618 if (!ds_is_empty (&name))
3619 ds_put_cstr (&name, ", ");
3620 char *value_s = data_out (value, var_get_encoding (var),
3621 var_get_print_format (var),
3622 settings_get_fmt_settings ());
3623 if (cat->type == CCT_TOTAL
3624 || cat->type == CCT_SUBTOTAL
3625 || cat->type == CCT_POSTCOMPUTE)
3626 ds_put_format (&name, "%s=total", var_get_name (var));
3628 ds_put_format (&name, "%s=%s", var_get_name (var),
3629 value_s + strspn (value_s, " "));
3635 //cell->name = ds_steal_cstr (&name);
3637 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3638 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3639 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3640 for (size_t i = 0; i < specs->n; i++)
3641 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3642 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3643 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3644 hmap_insert (&s->cells, &cell->node, hash);
3649 is_scale_missing (const struct ctables_summary_spec_set *specs,
3650 const struct ccase *c)
3652 if (!specs->is_scale)
3655 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3658 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3660 const struct variable *var = specs->listwise_vars[i];
3661 if (var_is_num_missing (var, case_num (c, var)))
3669 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3670 const struct ctables_category *cats[PIVOT_N_AXES][10],
3671 bool is_missing, bool excluded_missing,
3672 double d_weight, double e_weight)
3674 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3675 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3677 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3679 bool scale_missing = is_scale_missing (specs, c);
3680 for (size_t i = 0; i < specs->n; i++)
3681 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3682 specs->var, case_data (c, specs->var), specs->is_scale,
3683 scale_missing, is_missing, excluded_missing,
3684 d_weight, e_weight);
3685 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3686 if (!(cell->omit_domains && (1u << dt)))
3688 struct ctables_domain *d = cell->domains[dt];
3689 d->d_total += d_weight;
3690 d->e_total += e_weight;
3692 if (!excluded_missing)
3694 d->d_count += d_weight;
3695 d->e_count += e_weight;
3700 d->d_valid += d_weight;
3701 d->e_valid += e_weight;
3704 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3706 /* XXX listwise_missing??? */
3707 const struct variable *var = s->table->sum_vars[i];
3708 double addend = case_num (c, var);
3709 if (!var_is_num_missing (var, addend))
3711 struct ctables_sum *sum = &d->sums[i];
3712 sum->e_sum += addend * e_weight;
3713 sum->u_sum += addend;
3721 recurse_totals (struct ctables_section *s, const struct ccase *c,
3722 const struct ctables_category *cats[PIVOT_N_AXES][10],
3723 bool is_missing, bool excluded_missing,
3724 double d_weight, double e_weight,
3725 enum pivot_axis_type start_axis, size_t start_nest)
3727 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3729 const struct ctables_nest *nest = s->nests[a];
3730 for (size_t i = start_nest; i < nest->n; i++)
3732 if (i == nest->scale_idx)
3735 const struct variable *var = nest->vars[i];
3737 const struct ctables_category *total = ctables_categories_total (
3738 s->table->categories[var_get_dict_index (var)]);
3741 const struct ctables_category *save = cats[a][i];
3743 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3744 d_weight, e_weight);
3745 recurse_totals (s, c, cats, is_missing, excluded_missing,
3746 d_weight, e_weight, a, i + 1);
3755 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3756 const struct ctables_category *cats[PIVOT_N_AXES][10],
3757 bool is_missing, bool excluded_missing,
3758 double d_weight, double e_weight,
3759 enum pivot_axis_type start_axis, size_t start_nest)
3761 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3763 const struct ctables_nest *nest = s->nests[a];
3764 for (size_t i = start_nest; i < nest->n; i++)
3766 if (i == nest->scale_idx)
3769 const struct ctables_category *save = cats[a][i];
3772 cats[a][i] = save->subtotal;
3773 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3774 d_weight, e_weight);
3775 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3776 d_weight, e_weight, a, i + 1);
3785 ctables_add_occurrence (const struct variable *var,
3786 const union value *value,
3787 struct hmap *occurrences)
3789 int width = var_get_width (var);
3790 unsigned int hash = value_hash (value, width, 0);
3792 struct ctables_occurrence *o;
3793 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3795 if (value_equal (value, &o->value, width))
3798 o = xmalloc (sizeof *o);
3799 value_clone (&o->value, value, width);
3800 hmap_insert (occurrences, &o->node, hash);
3804 ctables_cell_insert (struct ctables_section *s,
3805 const struct ccase *c,
3806 double d_weight, double e_weight)
3808 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3810 /* Does at least one categorical variable have a missing value in an included
3811 or excluded category? */
3812 bool is_missing = false;
3814 /* Does at least one categorical variable have a missing value in an excluded
3816 bool excluded_missing = false;
3818 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3820 const struct ctables_nest *nest = s->nests[a];
3821 for (size_t i = 0; i < nest->n; i++)
3823 if (i == nest->scale_idx)
3826 const struct variable *var = nest->vars[i];
3827 const union value *value = case_data (c, var);
3829 bool var_missing = var_is_value_missing (var, value) != 0;
3833 cats[a][i] = ctables_categories_match (
3834 s->table->categories[var_get_dict_index (var)], value, var);
3840 static const struct ctables_category cct_excluded_missing = {
3841 .type = CCT_EXCLUDED_MISSING,
3844 cats[a][i] = &cct_excluded_missing;
3845 excluded_missing = true;
3850 if (!excluded_missing)
3851 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3853 const struct ctables_nest *nest = s->nests[a];
3854 for (size_t i = 0; i < nest->n; i++)
3855 if (i != nest->scale_idx)
3857 const struct variable *var = nest->vars[i];
3858 const union value *value = case_data (c, var);
3859 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3863 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3864 d_weight, e_weight);
3866 //if (!excluded_missing)
3868 recurse_totals (s, c, cats, is_missing, excluded_missing,
3869 d_weight, e_weight, 0, 0);
3870 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3871 d_weight, e_weight, 0, 0);
3877 const struct ctables_summary_spec_set *set;
3882 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3884 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3885 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3886 if (as->function != bs->function)
3887 return as->function > bs->function ? 1 : -1;
3888 else if (as->percentile != bs->percentile)
3889 return as->percentile < bs->percentile ? 1 : -1;
3891 const char *as_label = as->label ? as->label : "";
3892 const char *bs_label = bs->label ? bs->label : "";
3893 return strcmp (as_label, bs_label);
3896 static struct pivot_value *
3897 ctables_category_create_label__ (const struct ctables_category *cat,
3898 const struct variable *var,
3899 const union value *value)
3901 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3902 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3903 : pivot_value_new_var_value (var, value));
3906 static struct pivot_value *
3907 ctables_postcompute_label (const struct ctables_categories *cats,
3908 const struct ctables_category *cat,
3909 const struct variable *var,
3910 const union value *value)
3912 struct substring in = ss_cstr (cat->pc->label);
3913 struct substring target = ss_cstr (")LABEL[");
3915 struct string out = DS_EMPTY_INITIALIZER;
3918 size_t chunk = ss_find_substring (in, target);
3919 if (chunk == SIZE_MAX)
3921 if (ds_is_empty (&out))
3922 return pivot_value_new_user_text (in.string, in.length);
3925 ds_put_substring (&out, in);
3926 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3930 ds_put_substring (&out, ss_head (in, chunk));
3931 ss_advance (&in, chunk + target.length);
3933 struct substring idx_s;
3934 if (!ss_get_until (&in, ']', &idx_s))
3937 long int idx = strtol (idx_s.string, &tail, 10);
3938 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3941 struct ctables_category *cat2 = &cats->cats[idx - 1];
3942 struct pivot_value *label2
3943 = ctables_category_create_label__ (cat2, var, value);
3944 char *label2_s = pivot_value_to_string_defaults (label2);
3945 ds_put_cstr (&out, label2_s);
3947 pivot_value_destroy (label2);
3952 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3955 static struct pivot_value *
3956 ctables_category_create_label (const struct ctables_categories *cats,
3957 const struct ctables_category *cat,
3958 const struct variable *var,
3959 const union value *value)
3961 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3962 ? ctables_postcompute_label (cats, cat, var, value)
3963 : ctables_category_create_label__ (cat, var, value));
3966 static struct ctables_value *
3967 ctables_value_find__ (struct ctables_table *t, const union value *value,
3968 int width, unsigned int hash)
3970 struct ctables_value *clv;
3971 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3972 hash, &t->clabels_values_map)
3973 if (value_equal (value, &clv->value, width))
3979 ctables_value_insert (struct ctables_table *t, const union value *value,
3982 unsigned int hash = value_hash (value, width, 0);
3983 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3986 clv = xmalloc (sizeof *clv);
3987 value_clone (&clv->value, value, width);
3988 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3992 static struct ctables_value *
3993 ctables_value_find (struct ctables_table *t,
3994 const union value *value, int width)
3996 return ctables_value_find__ (t, value, width,
3997 value_hash (value, width, 0));
4001 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4002 size_t ix[PIVOT_N_AXES])
4004 if (a < PIVOT_N_AXES)
4006 size_t limit = MAX (t->stacks[a].n, 1);
4007 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4008 ctables_table_add_section (t, a + 1, ix);
4012 struct ctables_section *s = &t->sections[t->n_sections++];
4013 *s = (struct ctables_section) {
4015 .cells = HMAP_INITIALIZER (s->cells),
4017 for (a = 0; a < PIVOT_N_AXES; a++)
4020 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4022 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4023 for (size_t i = 0; i < nest->n; i++)
4024 hmap_init (&s->occurrences[a][i]);
4026 for (size_t i = 0; i < N_CTDTS; i++)
4027 hmap_init (&s->domains[i]);
4032 ctpo_add (double a, double b)
4038 ctpo_sub (double a, double b)
4044 ctpo_mul (double a, double b)
4050 ctpo_div (double a, double b)
4052 return b ? a / b : SYSMIS;
4056 ctpo_pow (double a, double b)
4058 int save_errno = errno;
4060 double result = pow (a, b);
4068 ctpo_neg (double a, double b UNUSED)
4073 struct ctables_pcexpr_evaluate_ctx
4075 const struct ctables_cell *cell;
4076 const struct ctables_section *section;
4077 const struct ctables_categories *cats;
4078 enum pivot_axis_type pc_a;
4081 enum fmt_type parse_format;
4084 static double ctables_pcexpr_evaluate (
4085 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4088 ctables_pcexpr_evaluate_nonterminal (
4089 const struct ctables_pcexpr_evaluate_ctx *ctx,
4090 const struct ctables_pcexpr *e, size_t n_args,
4091 double evaluate (double, double))
4093 double args[2] = { 0, 0 };
4094 for (size_t i = 0; i < n_args; i++)
4096 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4097 if (!isfinite (args[i]) || args[i] == SYSMIS)
4100 return evaluate (args[0], args[1]);
4104 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4105 const struct ctables_cell_value *pc_cv)
4107 const struct ctables_section *s = ctx->section;
4110 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4112 const struct ctables_nest *nest = s->nests[a];
4113 for (size_t i = 0; i < nest->n; i++)
4114 if (i != nest->scale_idx)
4116 const struct ctables_cell_value *cv
4117 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4118 : &ctx->cell->axes[a].cvs[i]);
4119 hash = hash_pointer (cv->category, hash);
4120 if (cv->category->type != CCT_TOTAL
4121 && cv->category->type != CCT_SUBTOTAL
4122 && cv->category->type != CCT_POSTCOMPUTE)
4123 hash = value_hash (&cv->value,
4124 var_get_width (nest->vars[i]), hash);
4128 struct ctables_cell *tc;
4129 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4131 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4133 const struct ctables_nest *nest = s->nests[a];
4134 for (size_t i = 0; i < nest->n; i++)
4135 if (i != nest->scale_idx)
4137 const struct ctables_cell_value *p_cv
4138 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4139 : &ctx->cell->axes[a].cvs[i]);
4140 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4141 if (p_cv->category != t_cv->category
4142 || (p_cv->category->type != CCT_TOTAL
4143 && p_cv->category->type != CCT_SUBTOTAL
4144 && p_cv->category->type != CCT_POSTCOMPUTE
4145 && !value_equal (&p_cv->value,
4147 var_get_width (nest->vars[i]))))
4159 const struct ctables_table *t = s->table;
4160 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4161 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4162 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4163 &specs->specs[ctx->summary_idx]);
4167 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4168 const struct ctables_pcexpr *e)
4175 case CTPO_CAT_NRANGE:
4176 case CTPO_CAT_SRANGE:
4178 struct ctables_cell_value cv = {
4179 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4181 assert (cv.category != NULL);
4183 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4184 const struct ctables_occurrence *o;
4187 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4188 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4189 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4191 cv.value = o->value;
4192 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4197 case CTPO_CAT_NUMBER:
4198 case CTPO_CAT_MISSING:
4199 case CTPO_CAT_OTHERNM:
4200 case CTPO_CAT_SUBTOTAL:
4201 case CTPO_CAT_TOTAL:
4203 struct ctables_cell_value cv = {
4204 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4205 .value = { .f = e->number },
4207 assert (cv.category != NULL);
4208 return ctables_pcexpr_evaluate_category (ctx, &cv);
4211 case CTPO_CAT_STRING:
4213 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4215 if (width > e->string.length)
4217 s = xmalloc (width);
4218 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4220 struct ctables_cell_value cv = {
4221 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4222 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4224 assert (cv.category != NULL);
4225 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4231 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4234 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4237 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4240 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4243 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4246 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4252 static const struct ctables_category *
4253 ctables_cell_postcompute (const struct ctables_section *s,
4254 const struct ctables_cell *cell,
4255 enum pivot_axis_type *pc_a_p,
4258 assert (cell->postcompute);
4259 const struct ctables_category *pc_cat = NULL;
4260 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4261 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4263 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4264 if (cv->category->type == CCT_POSTCOMPUTE)
4268 /* Multiple postcomputes cross each other. The value is
4273 pc_cat = cv->category;
4277 *pc_a_idx_p = pc_a_idx;
4281 assert (pc_cat != NULL);
4286 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4287 const struct ctables_cell *cell,
4288 const struct ctables_summary_spec *ss,
4289 struct fmt_spec *format,
4290 bool *is_ctables_format,
4293 enum pivot_axis_type pc_a = 0;
4294 size_t pc_a_idx = 0;
4295 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4296 s, cell, &pc_a, &pc_a_idx);
4300 const struct ctables_postcompute *pc = pc_cat->pc;
4303 for (size_t i = 0; i < pc->specs->n; i++)
4305 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4306 if (ss->function == ss2->function
4307 && ss->percentile == ss2->percentile)
4309 *format = ss2->format;
4310 *is_ctables_format = ss2->is_ctables_format;
4316 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4317 const struct ctables_categories *cats = s->table->categories[
4318 var_get_dict_index (var)];
4319 struct ctables_pcexpr_evaluate_ctx ctx = {
4324 .pc_a_idx = pc_a_idx,
4325 .summary_idx = summary_idx,
4326 .parse_format = pc_cat->parse_format,
4328 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4332 ctables_format (double d, const struct fmt_spec *format,
4333 const struct fmt_settings *settings)
4335 const union value v = { .f = d };
4336 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4338 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4339 produce the results we want for negative numbers, putting the negative
4340 sign in the wrong spot, before the prefix instead of after it. We can't,
4341 in fact, produce the desired results using a custom-currency
4342 specification. Instead, we postprocess the output, moving the negative
4345 NEQUAL: "-N=3" => "N=-3"
4346 PAREN: "-(3)" => "(-3)"
4347 PCTPAREN: "-(3%)" => "(-3%)"
4349 This transformation doesn't affect NEGPAREN. */
4350 char *minus_src = strchr (s, '-');
4351 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4353 char *n_equals = strstr (s, "N=");
4354 char *lparen = strchr (s, '(');
4355 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4357 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4363 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4365 struct pivot_table *pt = pivot_table_create__ (
4367 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4368 : pivot_value_new_text (N_("Custom Tables"))),
4371 pivot_table_set_caption (
4372 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4374 pivot_table_set_corner_text (
4375 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4377 bool summary_dimension = (t->summary_axis != t->slabels_axis
4378 || (!t->slabels_visible
4379 && t->summary_specs.n > 1));
4380 if (summary_dimension)
4382 struct pivot_dimension *d = pivot_dimension_create (
4383 pt, t->slabels_axis, N_("Statistics"));
4384 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4385 if (!t->slabels_visible)
4386 d->hide_all_labels = true;
4387 for (size_t i = 0; i < specs->n; i++)
4388 pivot_category_create_leaf (
4389 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4392 bool categories_dimension = t->clabels_example != NULL;
4393 if (categories_dimension)
4395 struct pivot_dimension *d = pivot_dimension_create (
4396 pt, t->label_axis[t->clabels_from_axis],
4397 t->clabels_from_axis == PIVOT_AXIS_ROW
4398 ? N_("Row Categories")
4399 : N_("Column Categories"));
4400 const struct variable *var = t->clabels_example;
4401 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4402 for (size_t i = 0; i < t->n_clabels_values; i++)
4404 const struct ctables_value *value = t->clabels_values[i];
4405 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4406 assert (cat != NULL);
4407 pivot_category_create_leaf (d->root, ctables_category_create_label (
4408 c, cat, t->clabels_example,
4413 pivot_table_set_look (pt, ct->look);
4414 struct pivot_dimension *d[PIVOT_N_AXES];
4415 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4417 static const char *names[] = {
4418 [PIVOT_AXIS_ROW] = N_("Rows"),
4419 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4420 [PIVOT_AXIS_LAYER] = N_("Layers"),
4422 d[a] = (t->axes[a] || a == t->summary_axis
4423 ? pivot_dimension_create (pt, a, names[a])
4428 assert (t->axes[a]);
4430 for (size_t i = 0; i < t->stacks[a].n; i++)
4432 struct ctables_nest *nest = &t->stacks[a].nests[i];
4433 struct ctables_section **sections = xnmalloc (t->n_sections,
4435 size_t n_sections = 0;
4437 size_t n_total_cells = 0;
4438 size_t max_depth = 0;
4439 for (size_t j = 0; j < t->n_sections; j++)
4440 if (t->sections[j].nests[a] == nest)
4442 struct ctables_section *s = &t->sections[j];
4443 sections[n_sections++] = s;
4444 n_total_cells += hmap_count (&s->cells);
4446 size_t depth = s->nests[a]->n;
4447 max_depth = MAX (depth, max_depth);
4450 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4452 size_t n_sorted = 0;
4454 for (size_t j = 0; j < n_sections; j++)
4456 struct ctables_section *s = sections[j];
4458 struct ctables_cell *cell;
4459 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4461 sorted[n_sorted++] = cell;
4462 assert (n_sorted <= n_total_cells);
4465 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4466 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4469 if (a == PIVOT_AXIS_ROW)
4471 size_t ids[N_CTDTS];
4472 memset (ids, 0, sizeof ids);
4473 for (size_t j = 0; j < n_sorted; j++)
4475 struct ctables_cell *cell = sorted[j];
4476 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4478 struct ctables_domain *domain = cell->domains[dt];
4479 if (!domain->sequence)
4480 domain->sequence = ++ids[dt];
4487 for (size_t j = 0; j < n_sorted; j++)
4489 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4494 struct ctables_level
4496 enum ctables_level_type
4498 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4499 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4500 CTL_SUMMARY, /* Summary functions. */
4504 enum settings_value_show vlabel; /* CTL_VAR only. */
4507 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4508 size_t n_levels = 0;
4509 for (size_t k = 0; k < nest->n; k++)
4511 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4512 if (vlabel != CTVL_NONE)
4514 levels[n_levels++] = (struct ctables_level) {
4516 .vlabel = (enum settings_value_show) vlabel,
4521 if (nest->scale_idx != k
4522 && (k != nest->n - 1 || t->label_axis[a] == a))
4524 levels[n_levels++] = (struct ctables_level) {
4525 .type = CTL_CATEGORY,
4531 if (!summary_dimension && a == t->slabels_axis)
4533 levels[n_levels++] = (struct ctables_level) {
4534 .type = CTL_SUMMARY,
4535 .var_idx = SIZE_MAX,
4539 /* Pivot categories:
4541 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4542 - category for nest->vars[0], if nest->scale_idx != 0
4543 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4544 - category for nest->vars[1], if nest->scale_idx != 1
4546 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4547 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4548 - summary function, if 'a == t->slabels_axis && a ==
4551 Additional dimensions:
4553 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4555 - If 't->label_axis[b] == a' for some 'b != a', add a category
4560 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4562 for (size_t j = 0; j < n_sorted; j++)
4564 struct ctables_cell *cell = sorted[j];
4565 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4567 size_t n_common = 0;
4570 for (; n_common < n_levels; n_common++)
4572 const struct ctables_level *level = &levels[n_common];
4573 if (level->type == CTL_CATEGORY)
4575 size_t var_idx = level->var_idx;
4576 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4577 if (prev->axes[a].cvs[var_idx].category != c)
4579 else if (c->type != CCT_SUBTOTAL
4580 && c->type != CCT_TOTAL
4581 && c->type != CCT_POSTCOMPUTE
4582 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4583 &cell->axes[a].cvs[var_idx].value,
4584 var_get_type (nest->vars[var_idx])))
4590 for (size_t k = n_common; k < n_levels; k++)
4592 const struct ctables_level *level = &levels[k];
4593 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4594 if (level->type == CTL_SUMMARY)
4596 assert (k == n_levels - 1);
4598 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4599 for (size_t m = 0; m < specs->n; m++)
4601 int leaf = pivot_category_create_leaf (
4602 parent, ctables_summary_label (&specs->specs[m],
4610 const struct variable *var = nest->vars[level->var_idx];
4611 struct pivot_value *label;
4612 if (level->type == CTL_VAR)
4614 label = pivot_value_new_variable (var);
4615 label->variable.show = level->vlabel;
4617 else if (level->type == CTL_CATEGORY)
4619 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4620 label = ctables_category_create_label (
4621 t->categories[var_get_dict_index (var)],
4622 cv->category, var, &cv->value);
4627 if (k == n_levels - 1)
4628 prev_leaf = pivot_category_create_leaf (parent, label);
4630 groups[k] = pivot_category_create_group__ (parent, label);
4634 cell->axes[a].leaf = prev_leaf;
4644 size_t n_total_cells = 0;
4645 for (size_t j = 0; j < t->n_sections; j++)
4646 n_total_cells += hmap_count (&t->sections[j].cells);
4648 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4649 size_t n_sorted = 0;
4650 for (size_t j = 0; j < t->n_sections; j++)
4652 const struct ctables_section *s = &t->sections[j];
4653 struct ctables_cell *cell;
4654 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4656 sorted[n_sorted++] = cell;
4658 assert (n_sorted <= n_total_cells);
4659 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4661 size_t ids[N_CTDTS];
4662 memset (ids, 0, sizeof ids);
4663 for (size_t j = 0; j < n_sorted; j++)
4665 struct ctables_cell *cell = sorted[j];
4666 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4668 struct ctables_domain *domain = cell->domains[dt];
4669 if (!domain->sequence)
4670 domain->sequence = ++ids[dt];
4677 for (size_t i = 0; i < t->n_sections; i++)
4679 struct ctables_section *s = &t->sections[i];
4681 struct ctables_cell *cell;
4682 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4687 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4688 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4689 for (size_t j = 0; j < specs->n; j++)
4692 size_t n_dindexes = 0;
4694 if (summary_dimension)
4695 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4697 if (categories_dimension)
4699 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4700 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4701 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4702 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4705 dindexes[n_dindexes++] = ctv->leaf;
4708 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4711 int leaf = cell->axes[a].leaf;
4712 if (a == t->summary_axis && !summary_dimension)
4714 dindexes[n_dindexes++] = leaf;
4717 const struct ctables_summary_spec *ss = &specs->specs[j];
4719 struct fmt_spec format = specs->specs[j].format;
4720 bool is_ctables_format = ss->is_ctables_format;
4721 double d = (cell->postcompute
4722 ? ctables_cell_calculate_postcompute (
4723 s, cell, ss, &format, &is_ctables_format, j)
4724 : ctables_summary_value (cell, &cell->summaries[j],
4727 struct pivot_value *value;
4728 if (ct->hide_threshold != 0
4729 && d < ct->hide_threshold
4730 && ctables_summary_function_is_count (ss->function))
4732 value = pivot_value_new_user_text_nocopy (
4733 xasprintf ("<%d", ct->hide_threshold));
4735 else if (d == 0 && ct->zero)
4736 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4737 else if (d == SYSMIS && ct->missing)
4738 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4739 else if (is_ctables_format)
4740 value = pivot_value_new_user_text_nocopy (
4741 ctables_format (d, &format, &ct->ctables_formats));
4744 value = pivot_value_new_number (d);
4745 value->numeric.format = format;
4747 /* XXX should text values be right-justified? */
4748 pivot_table_put (pt, dindexes, n_dindexes, value);
4753 pivot_table_submit (pt);
4757 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4759 enum pivot_axis_type label_pos = t->label_axis[a];
4763 t->clabels_from_axis = a;
4765 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4766 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4768 const struct ctables_stack *stack = &t->stacks[a];
4772 const struct ctables_nest *n0 = &stack->nests[0];
4775 assert (stack->n == 1);
4779 const struct variable *v0 = n0->vars[n0->n - 1];
4780 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4781 t->clabels_example = v0;
4783 for (size_t i = 0; i < c0->n_cats; i++)
4784 if (c0->cats[i].type == CCT_FUNCTION)
4786 msg (SE, _("%s=%s is not allowed with sorting based "
4787 "on a summary function."),
4788 subcommand_name, pos_name);
4791 if (n0->n - 1 == n0->scale_idx)
4793 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4794 "but %s is a scale variable."),
4795 subcommand_name, pos_name, var_get_name (v0));
4799 for (size_t i = 1; i < stack->n; i++)
4801 const struct ctables_nest *ni = &stack->nests[i];
4803 const struct variable *vi = ni->vars[ni->n - 1];
4804 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4806 if (ni->n - 1 == ni->scale_idx)
4808 msg (SE, _("%s=%s requires the variables to be moved to be "
4809 "categorical, but %s is a scale variable."),
4810 subcommand_name, pos_name, var_get_name (vi));
4813 if (var_get_width (v0) != var_get_width (vi))
4815 msg (SE, _("%s=%s requires the variables to be "
4816 "moved to have the same width, but %s has "
4817 "width %d and %s has width %d."),
4818 subcommand_name, pos_name,
4819 var_get_name (v0), var_get_width (v0),
4820 var_get_name (vi), var_get_width (vi));
4823 if (!val_labs_equal (var_get_value_labels (v0),
4824 var_get_value_labels (vi)))
4826 msg (SE, _("%s=%s requires the variables to be "
4827 "moved to have the same value labels, but %s "
4828 "and %s have different value labels."),
4829 subcommand_name, pos_name,
4830 var_get_name (v0), var_get_name (vi));
4833 if (!ctables_categories_equal (c0, ci))
4835 msg (SE, _("%s=%s requires the variables to be "
4836 "moved to have the same category "
4837 "specifications, but %s and %s have different "
4838 "category specifications."),
4839 subcommand_name, pos_name,
4840 var_get_name (v0), var_get_name (vi));
4849 add_sum_var (struct variable *var,
4850 struct variable ***sum_vars, size_t *n, size_t *allocated)
4852 for (size_t i = 0; i < *n; i++)
4853 if (var == (*sum_vars)[i])
4856 if (*n >= *allocated)
4857 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4858 (*sum_vars)[*n] = var;
4863 enumerate_sum_vars (const struct ctables_axis *a,
4864 struct variable ***sum_vars, size_t *n, size_t *allocated)
4872 for (size_t i = 0; i < N_CSVS; i++)
4873 for (size_t j = 0; j < a->specs[i].n; j++)
4875 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4876 if (ctables_function_is_pctsum (spec->function))
4877 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4883 for (size_t i = 0; i < 2; i++)
4884 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4890 ctables_prepare_table (struct ctables_table *t)
4892 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4895 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4897 for (size_t j = 0; j < t->stacks[a].n; j++)
4899 struct ctables_nest *nest = &t->stacks[a].nests[j];
4900 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4902 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4903 nest->n_domains[dt] = 0;
4905 for (size_t k = 0; k < nest->n; k++)
4907 if (k == nest->scale_idx)
4916 if (a != PIVOT_AXIS_LAYER)
4923 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4924 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4925 : a == PIVOT_AXIS_ROW)
4927 if (k == nest->n - 1
4928 || (nest->scale_idx == nest->n - 1
4929 && k == nest->n - 2))
4935 if (a == PIVOT_AXIS_COLUMN)
4940 if (a == PIVOT_AXIS_ROW)
4945 nest->domains[dt][nest->n_domains[dt]++] = k;
4952 struct ctables_nest *nest = xmalloc (sizeof *nest);
4953 *nest = (struct ctables_nest) { .n = 0 };
4954 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4956 /* There's no point in moving labels away from an axis that has no
4957 labels, so avoid dealing with the special cases around that. */
4958 t->label_axis[a] = a;
4961 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4962 for (size_t i = 0; i < stack->n; i++)
4964 struct ctables_nest *nest = &stack->nests[i];
4965 if (!nest->specs[CSV_CELL].n)
4967 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4968 specs->specs = xmalloc (sizeof *specs->specs);
4971 enum ctables_summary_function function
4972 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4974 *specs->specs = (struct ctables_summary_spec) {
4975 .function = function,
4976 .format = ctables_summary_default_format (function, specs->var),
4979 specs->var = nest->vars[0];
4981 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4982 &nest->specs[CSV_CELL]);
4984 else if (!nest->specs[CSV_TOTAL].n)
4985 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4986 &nest->specs[CSV_CELL]);
4988 if (t->ctables->smissing_listwise)
4990 struct variable **listwise_vars = NULL;
4992 size_t allocated = 0;
4994 for (size_t j = nest->group_head; j < stack->n; j++)
4996 const struct ctables_nest *other_nest = &stack->nests[j];
4997 if (other_nest->group_head != nest->group_head)
5000 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5003 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5004 sizeof *listwise_vars);
5005 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5008 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5011 listwise_vars = xmemdup (listwise_vars,
5012 n * sizeof *listwise_vars);
5013 nest->specs[sv].listwise_vars = listwise_vars;
5014 nest->specs[sv].n_listwise_vars = n;
5019 struct ctables_summary_spec_set *merged = &t->summary_specs;
5020 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5022 for (size_t j = 0; j < stack->n; j++)
5024 const struct ctables_nest *nest = &stack->nests[j];
5026 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5027 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5032 struct merge_item min = items[0];
5033 for (size_t j = 1; j < n_left; j++)
5034 if (merge_item_compare_3way (&items[j], &min) < 0)
5037 if (merged->n >= merged->allocated)
5038 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5039 sizeof *merged->specs);
5040 merged->specs[merged->n++] = min.set->specs[min.ofs];
5042 for (size_t j = 0; j < n_left; )
5044 if (merge_item_compare_3way (&items[j], &min) == 0)
5046 struct merge_item *item = &items[j];
5047 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5048 if (++item->ofs >= item->set->n)
5050 items[j] = items[--n_left];
5060 for (size_t j = 0; j < merged->n; j++)
5061 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5063 for (size_t j = 0; j < stack->n; j++)
5065 const struct ctables_nest *nest = &stack->nests[j];
5066 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5068 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5069 for (size_t k = 0; k < specs->n; k++)
5070 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5071 specs->specs[k].axis_idx);
5077 size_t allocated_sum_vars = 0;
5078 enumerate_sum_vars (t->axes[t->summary_axis],
5079 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5081 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5082 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5086 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5087 enum pivot_axis_type a)
5089 struct ctables_stack *stack = &t->stacks[a];
5090 for (size_t i = 0; i < stack->n; i++)
5092 const struct ctables_nest *nest = &stack->nests[i];
5093 const struct variable *var = nest->vars[nest->n - 1];
5094 const union value *value = case_data (c, var);
5096 if (var_is_numeric (var) && value->f == SYSMIS)
5099 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5101 ctables_value_insert (t, value, var_get_width (var));
5106 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5108 const struct ctables_value *const *ap = a_;
5109 const struct ctables_value *const *bp = b_;
5110 const struct ctables_value *a = *ap;
5111 const struct ctables_value *b = *bp;
5112 const int *width = width_;
5113 return value_compare_3way (&a->value, &b->value, *width);
5117 ctables_sort_clabels_values (struct ctables_table *t)
5119 const struct variable *v0 = t->clabels_example;
5120 int width = var_get_width (v0);
5122 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5125 const struct val_labs *val_labs = var_get_value_labels (v0);
5126 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5127 vl = val_labs_next (val_labs, vl))
5128 if (ctables_categories_match (c0, &vl->value, v0))
5129 ctables_value_insert (t, &vl->value, width);
5132 size_t n = hmap_count (&t->clabels_values_map);
5133 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5135 struct ctables_value *clv;
5137 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5138 t->clabels_values[i++] = clv;
5139 t->n_clabels_values = n;
5142 sort (t->clabels_values, n, sizeof *t->clabels_values,
5143 compare_clabels_values_3way, &width);
5145 for (size_t i = 0; i < n; i++)
5146 t->clabels_values[i]->leaf = i;
5150 ctables_add_category_occurrences (const struct variable *var,
5151 struct hmap *occurrences,
5152 const struct ctables_categories *cats)
5154 const struct val_labs *val_labs = var_get_value_labels (var);
5156 for (size_t i = 0; i < cats->n_cats; i++)
5158 const struct ctables_category *c = &cats->cats[i];
5162 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5168 int width = var_get_width (var);
5170 value_init (&value, width);
5171 value_copy_buf_rpad (&value, width,
5172 CHAR_CAST (uint8_t *, c->string.string),
5173 c->string.length, ' ');
5174 ctables_add_occurrence (var, &value, occurrences);
5175 value_destroy (&value, width);
5180 assert (var_is_numeric (var));
5181 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5182 vl = val_labs_next (val_labs, vl))
5183 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5184 ctables_add_occurrence (var, &vl->value, occurrences);
5188 assert (var_is_alpha (var));
5189 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5190 vl = val_labs_next (val_labs, vl))
5191 if (in_string_range (&vl->value, var, c->srange))
5192 ctables_add_occurrence (var, &vl->value, occurrences);
5196 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5197 vl = val_labs_next (val_labs, vl))
5198 if (var_is_value_missing (var, &vl->value))
5199 ctables_add_occurrence (var, &vl->value, occurrences);
5203 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5204 vl = val_labs_next (val_labs, vl))
5205 ctables_add_occurrence (var, &vl->value, occurrences);
5208 case CCT_POSTCOMPUTE:
5218 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5219 vl = val_labs_next (val_labs, vl))
5220 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5221 ctables_add_occurrence (var, &vl->value, occurrences);
5224 case CCT_EXCLUDED_MISSING:
5231 ctables_section_recurse_add_empty_categories (
5232 struct ctables_section *s,
5233 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5234 enum pivot_axis_type a, size_t a_idx)
5236 if (a >= PIVOT_N_AXES)
5237 ctables_cell_insert__ (s, c, cats);
5238 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5239 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5242 const struct variable *var = s->nests[a]->vars[a_idx];
5243 const struct ctables_categories *categories = s->table->categories[
5244 var_get_dict_index (var)];
5245 int width = var_get_width (var);
5246 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5247 const struct ctables_occurrence *o;
5248 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5250 union value *value = case_data_rw (c, var);
5251 value_destroy (value, width);
5252 value_clone (value, &o->value, width);
5253 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5254 assert (cats[a][a_idx] != NULL);
5255 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5258 for (size_t i = 0; i < categories->n_cats; i++)
5260 const struct ctables_category *cat = &categories->cats[i];
5261 if (cat->type == CCT_POSTCOMPUTE)
5263 cats[a][a_idx] = cat;
5264 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5271 ctables_section_add_empty_categories (struct ctables_section *s)
5273 bool show_empty = false;
5274 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5276 for (size_t k = 0; k < s->nests[a]->n; k++)
5277 if (k != s->nests[a]->scale_idx)
5279 const struct variable *var = s->nests[a]->vars[k];
5280 const struct ctables_categories *cats = s->table->categories[
5281 var_get_dict_index (var)];
5282 if (cats->show_empty)
5285 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5291 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5292 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5293 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5298 ctables_section_clear (struct ctables_section *s)
5300 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5302 const struct ctables_nest *nest = s->nests[a];
5303 for (size_t i = 0; i < nest->n; i++)
5304 if (i != nest->scale_idx)
5306 const struct variable *var = nest->vars[i];
5307 int width = var_get_width (var);
5308 struct ctables_occurrence *o, *next;
5309 struct hmap *map = &s->occurrences[a][i];
5310 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5312 value_destroy (&o->value, width);
5313 hmap_delete (map, &o->node);
5320 struct ctables_cell *cell, *next_cell;
5321 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5323 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5325 const struct ctables_nest *nest = s->nests[a];
5326 for (size_t i = 0; i < nest->n; i++)
5327 if (i != nest->scale_idx)
5328 value_destroy (&cell->axes[a].cvs[i].value,
5329 var_get_width (nest->vars[i]));
5330 free (cell->axes[a].cvs);
5333 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5334 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5335 for (size_t i = 0; i < specs->n; i++)
5336 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5337 free (cell->summaries);
5339 hmap_delete (&s->cells, &cell->node);
5342 hmap_shrink (&s->cells);
5344 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5346 struct ctables_domain *domain, *next_domain;
5347 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5350 free (domain->sums);
5351 hmap_delete (&s->domains[dt], &domain->node);
5354 hmap_shrink (&s->domains[dt]);
5359 ctables_section_uninit (struct ctables_section *s)
5361 ctables_section_clear (s);
5363 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5365 struct ctables_nest *nest = s->nests[a];
5366 for (size_t i = 0; i < nest->n; i++)
5367 hmap_destroy (&s->occurrences[a][i]);
5368 free (s->occurrences[a]);
5371 hmap_destroy (&s->cells);
5372 for (size_t i = 0; i < N_CTDTS; i++)
5373 hmap_destroy (&s->domains[i]);
5377 ctables_table_clear (struct ctables_table *t)
5379 for (size_t i = 0; i < t->n_sections; i++)
5380 ctables_section_clear (&t->sections[i]);
5382 if (t->clabels_example)
5384 int width = var_get_width (t->clabels_example);
5385 struct ctables_value *value, *next_value;
5386 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5387 &t->clabels_values_map)
5389 value_destroy (&value->value, width);
5390 hmap_delete (&t->clabels_values_map, &value->node);
5393 hmap_shrink (&t->clabels_values_map);
5395 free (t->clabels_values);
5396 t->clabels_values = NULL;
5397 t->n_clabels_values = 0;
5402 ctables_execute (struct dataset *ds, struct casereader *input,
5405 for (size_t i = 0; i < ct->n_tables; i++)
5407 struct ctables_table *t = ct->tables[i];
5408 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5409 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5410 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5411 sizeof *t->sections);
5412 size_t ix[PIVOT_N_AXES];
5413 ctables_table_add_section (t, 0, ix);
5416 struct dictionary *dict = dataset_dict (ds);
5417 struct casegrouper *grouper
5418 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5419 ? casegrouper_create_splits (input, dict)
5420 : casegrouper_create_vars (input, NULL, 0));
5421 struct casereader *group;
5422 while (casegrouper_get_next_group (grouper, &group))
5424 /* Output SPLIT FILE variables. */
5425 struct ccase *c = casereader_peek (group, 0);
5428 output_split_file_values (ds, c);
5432 bool warn_on_invalid = true;
5433 for (c = casereader_read (group); c;
5434 case_unref (c), c = casereader_read (group))
5436 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5437 double e_weight = (ct->e_weight
5438 ? var_force_valid_weight (ct->e_weight,
5439 case_num (c, ct->e_weight),
5443 for (size_t i = 0; i < ct->n_tables; i++)
5445 struct ctables_table *t = ct->tables[i];
5447 for (size_t j = 0; j < t->n_sections; j++)
5448 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5450 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5451 if (t->label_axis[a] != a)
5452 ctables_insert_clabels_values (t, c, a);
5455 casereader_destroy (group);
5457 for (size_t i = 0; i < ct->n_tables; i++)
5459 struct ctables_table *t = ct->tables[i];
5461 if (t->clabels_example)
5462 ctables_sort_clabels_values (t);
5464 for (size_t j = 0; j < t->n_sections; j++)
5465 ctables_section_add_empty_categories (&t->sections[j]);
5467 ctables_table_output (ct, t);
5468 ctables_table_clear (t);
5471 return casegrouper_destroy (grouper);
5476 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5477 struct dictionary *);
5480 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5486 case CTPO_CAT_STRING:
5487 ss_dealloc (&e->string);
5490 case CTPO_CAT_SRANGE:
5491 for (size_t i = 0; i < 2; i++)
5492 ss_dealloc (&e->srange[i]);
5501 for (size_t i = 0; i < 2; i++)
5502 ctables_pcexpr_destroy (e->subs[i]);
5506 case CTPO_CAT_NUMBER:
5507 case CTPO_CAT_NRANGE:
5508 case CTPO_CAT_MISSING:
5509 case CTPO_CAT_OTHERNM:
5510 case CTPO_CAT_SUBTOTAL:
5511 case CTPO_CAT_TOTAL:
5515 msg_location_destroy (e->location);
5520 static struct ctables_pcexpr *
5521 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5522 struct ctables_pcexpr *sub0,
5523 struct ctables_pcexpr *sub1)
5525 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5526 *e = (struct ctables_pcexpr) {
5528 .subs = { sub0, sub1 },
5529 .location = msg_location_merged (sub0->location, sub1->location),
5534 /* How to parse an operator. */
5537 enum token_type token;
5538 enum ctables_postcompute_op op;
5541 static const struct operator *
5542 ctables_pcexpr_match_operator (struct lexer *lexer,
5543 const struct operator ops[], size_t n_ops)
5545 for (const struct operator *op = ops; op < ops + n_ops; op++)
5546 if (lex_token (lexer) == op->token)
5548 if (op->token != T_NEG_NUM)
5557 static struct ctables_pcexpr *
5558 ctables_pcexpr_parse_binary_operators__ (
5559 struct lexer *lexer, struct dictionary *dict,
5560 const struct operator ops[], size_t n_ops,
5561 parse_recursively_func *parse_next_level,
5562 const char *chain_warning, struct ctables_pcexpr *lhs)
5564 for (int op_count = 0; ; op_count++)
5566 const struct operator *op
5567 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5570 if (op_count > 1 && chain_warning)
5571 msg_at (SW, lhs->location, "%s", chain_warning);
5576 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5579 ctables_pcexpr_destroy (lhs);
5583 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5587 static struct ctables_pcexpr *
5588 ctables_pcexpr_parse_binary_operators (
5589 struct lexer *lexer, struct dictionary *dict,
5590 const struct operator ops[], size_t n_ops,
5591 parse_recursively_func *parse_next_level, const char *chain_warning)
5593 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5597 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5599 chain_warning, lhs);
5602 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5603 struct dictionary *);
5605 static struct ctables_pcexpr
5606 ctpo_cat_nrange (double low, double high)
5608 return (struct ctables_pcexpr) {
5609 .op = CTPO_CAT_NRANGE,
5610 .nrange = { low, high },
5614 static struct ctables_pcexpr
5615 ctpo_cat_srange (struct substring low, struct substring high)
5617 return (struct ctables_pcexpr) {
5618 .op = CTPO_CAT_SRANGE,
5619 .srange = { low, high },
5623 static struct ctables_pcexpr *
5624 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5626 int start_ofs = lex_ofs (lexer);
5627 struct ctables_pcexpr e;
5628 if (lex_is_number (lexer))
5630 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5631 .number = lex_number (lexer) };
5634 else if (lex_match_id (lexer, "MISSING"))
5635 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5636 else if (lex_match_id (lexer, "OTHERNM"))
5637 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5638 else if (lex_match_id (lexer, "TOTAL"))
5639 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5640 else if (lex_match_id (lexer, "SUBTOTAL"))
5642 size_t subtotal_index = 0;
5643 if (lex_match (lexer, T_LBRACK))
5645 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5647 subtotal_index = lex_integer (lexer);
5649 if (!lex_force_match (lexer, T_RBRACK))
5652 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5653 .subtotal_index = subtotal_index };
5655 else if (lex_match (lexer, T_LBRACK))
5657 if (lex_match_id (lexer, "LO"))
5659 if (!lex_force_match_id (lexer, "THRU"))
5662 if (lex_is_string (lexer))
5664 struct substring low = { .string = NULL };
5665 struct substring high = parse_substring (lexer, dict);
5666 e = ctpo_cat_srange (low, high);
5670 if (!lex_force_num (lexer))
5672 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5676 else if (lex_is_number (lexer))
5678 double number = lex_number (lexer);
5680 if (lex_match_id (lexer, "THRU"))
5682 if (lex_match_id (lexer, "HI"))
5683 e = ctpo_cat_nrange (number, DBL_MAX);
5686 if (!lex_force_num (lexer))
5688 e = ctpo_cat_nrange (number, lex_number (lexer));
5693 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5696 else if (lex_is_string (lexer))
5698 struct substring s = parse_substring (lexer, dict);
5700 if (lex_match_id (lexer, "THRU"))
5702 struct substring high;
5704 if (lex_match_id (lexer, "HI"))
5705 high = (struct substring) { .string = NULL };
5708 if (!lex_force_string (lexer))
5713 high = parse_substring (lexer, dict);
5716 e = ctpo_cat_srange (s, high);
5719 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5723 lex_error (lexer, NULL);
5727 if (!lex_force_match (lexer, T_RBRACK))
5729 if (e.op == CTPO_CAT_STRING)
5730 ss_dealloc (&e.string);
5731 else if (e.op == CTPO_CAT_SRANGE)
5733 ss_dealloc (&e.srange[0]);
5734 ss_dealloc (&e.srange[1]);
5739 else if (lex_match (lexer, T_LPAREN))
5741 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5744 if (!lex_force_match (lexer, T_RPAREN))
5746 ctables_pcexpr_destroy (ep);
5753 lex_error (lexer, NULL);
5757 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5758 return xmemdup (&e, sizeof e);
5761 static struct ctables_pcexpr *
5762 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5763 struct lexer *lexer, int start_ofs)
5765 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5766 *e = (struct ctables_pcexpr) {
5769 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5774 static struct ctables_pcexpr *
5775 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5777 static const struct operator op = { T_EXP, CTPO_POW };
5779 const char *chain_warning =
5780 _("The exponentiation operator (`**') is left-associative: "
5781 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5782 "To disable this warning, insert parentheses.");
5784 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5785 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5786 ctables_pcexpr_parse_primary,
5789 /* Special case for situations like "-5**6", which must be parsed as
5792 int start_ofs = lex_ofs (lexer);
5793 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5794 *lhs = (struct ctables_pcexpr) {
5795 .op = CTPO_CONSTANT,
5796 .number = -lex_tokval (lexer),
5797 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5801 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5802 lexer, dict, &op, 1,
5803 ctables_pcexpr_parse_primary, chain_warning, lhs);
5807 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5810 /* Parses the unary minus level. */
5811 static struct ctables_pcexpr *
5812 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5814 int start_ofs = lex_ofs (lexer);
5815 if (!lex_match (lexer, T_DASH))
5816 return ctables_pcexpr_parse_exp (lexer, dict);
5818 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5822 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5825 /* Parses the multiplication and division level. */
5826 static struct ctables_pcexpr *
5827 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5829 static const struct operator ops[] =
5831 { T_ASTERISK, CTPO_MUL },
5832 { T_SLASH, CTPO_DIV },
5835 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5836 sizeof ops / sizeof *ops,
5837 ctables_pcexpr_parse_neg, NULL);
5840 /* Parses the addition and subtraction level. */
5841 static struct ctables_pcexpr *
5842 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5844 static const struct operator ops[] =
5846 { T_PLUS, CTPO_ADD },
5847 { T_DASH, CTPO_SUB },
5848 { T_NEG_NUM, CTPO_ADD },
5851 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5852 ops, sizeof ops / sizeof *ops,
5853 ctables_pcexpr_parse_mul, NULL);
5856 static struct ctables_postcompute *
5857 ctables_find_postcompute (struct ctables *ct, const char *name)
5859 struct ctables_postcompute *pc;
5860 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5861 utf8_hash_case_string (name, 0), &ct->postcomputes)
5862 if (!utf8_strcasecmp (pc->name, name))
5868 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5871 int pcompute_start = lex_ofs (lexer) - 1;
5873 if (!lex_match (lexer, T_AND))
5875 lex_error_expecting (lexer, "&");
5878 if (!lex_force_id (lexer))
5881 char *name = ss_xstrdup (lex_tokss (lexer));
5884 if (!lex_force_match (lexer, T_EQUALS)
5885 || !lex_force_match_id (lexer, "EXPR")
5886 || !lex_force_match (lexer, T_LPAREN))
5892 int expr_start = lex_ofs (lexer);
5893 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5894 int expr_end = lex_ofs (lexer) - 1;
5895 if (!expr || !lex_force_match (lexer, T_RPAREN))
5897 ctables_pcexpr_destroy (expr);
5901 int pcompute_end = lex_ofs (lexer) - 1;
5903 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5906 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5909 msg_at (SW, location, _("New definition of &%s will override the "
5910 "previous definition."),
5912 msg_at (SN, pc->location, _("This is the previous definition."));
5914 ctables_pcexpr_destroy (pc->expr);
5915 msg_location_destroy (pc->location);
5920 pc = xmalloc (sizeof *pc);
5921 *pc = (struct ctables_postcompute) { .name = name };
5922 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5923 utf8_hash_case_string (pc->name, 0));
5926 pc->location = location;
5928 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5933 ctables_parse_pproperties_format (struct lexer *lexer,
5934 struct ctables_summary_spec_set *sss)
5936 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5938 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5939 && !(lex_token (lexer) == T_ID
5940 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5941 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5942 lex_tokss (lexer)))))
5944 /* Parse function. */
5945 enum ctables_summary_function function;
5946 if (!parse_ctables_summary_function (lexer, &function))
5949 /* Parse percentile. */
5950 double percentile = 0;
5951 if (function == CTSF_PTILE)
5953 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5955 percentile = lex_number (lexer);
5960 struct fmt_spec format;
5961 bool is_ctables_format;
5962 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5965 if (sss->n >= sss->allocated)
5966 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5967 sizeof *sss->specs);
5968 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5969 .function = function,
5970 .percentile = percentile,
5972 .is_ctables_format = is_ctables_format,
5978 ctables_summary_spec_set_uninit (sss);
5983 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5985 struct ctables_postcompute **pcs = NULL;
5987 size_t allocated_pcs = 0;
5989 while (lex_match (lexer, T_AND))
5991 if (!lex_force_id (lexer))
5993 struct ctables_postcompute *pc
5994 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5997 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6002 if (n_pcs >= allocated_pcs)
6003 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6007 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6009 if (lex_match_id (lexer, "LABEL"))
6011 lex_match (lexer, T_EQUALS);
6012 if (!lex_force_string (lexer))
6015 for (size_t i = 0; i < n_pcs; i++)
6017 free (pcs[i]->label);
6018 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6023 else if (lex_match_id (lexer, "FORMAT"))
6025 lex_match (lexer, T_EQUALS);
6027 struct ctables_summary_spec_set sss;
6028 if (!ctables_parse_pproperties_format (lexer, &sss))
6031 for (size_t i = 0; i < n_pcs; i++)
6034 ctables_summary_spec_set_uninit (pcs[i]->specs);
6036 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6037 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6039 ctables_summary_spec_set_uninit (&sss);
6041 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6043 lex_match (lexer, T_EQUALS);
6044 bool hide_source_cats;
6045 if (!parse_bool (lexer, &hide_source_cats))
6047 for (size_t i = 0; i < n_pcs; i++)
6048 pcs[i]->hide_source_cats = hide_source_cats;
6052 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6065 put_strftime (struct string *out, time_t now, const char *format)
6067 const struct tm *tm = localtime (&now);
6069 strftime (value, sizeof value, format, tm);
6070 ds_put_cstr (out, value);
6074 skip_prefix (struct substring *s, struct substring prefix)
6076 if (ss_starts_with (*s, prefix))
6078 ss_advance (s, prefix.length);
6086 put_table_expression (struct string *out, struct lexer *lexer,
6087 struct dictionary *dict, int expr_start, int expr_end)
6090 for (int ofs = expr_start; ofs < expr_end; ofs++)
6092 const struct token *t = lex_ofs_token (lexer, ofs);
6093 if (t->type == T_LBRACK)
6095 else if (t->type == T_RBRACK && nest > 0)
6101 else if (t->type == T_ID)
6103 const struct variable *var
6104 = dict_lookup_var (dict, t->string.string);
6105 const char *label = var ? var_get_label (var) : NULL;
6106 ds_put_cstr (out, label ? label : t->string.string);
6110 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6111 ds_put_byte (out, ' ');
6113 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6114 ds_put_cstr (out, repr);
6117 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6118 ds_put_byte (out, ' ');
6124 put_title_text (struct string *out, struct substring in, time_t now,
6125 struct lexer *lexer, struct dictionary *dict,
6126 int expr_start, int expr_end)
6130 size_t chunk = ss_find_byte (in, ')');
6131 ds_put_substring (out, ss_head (in, chunk));
6132 ss_advance (&in, chunk);
6133 if (ss_is_empty (in))
6136 if (skip_prefix (&in, ss_cstr (")DATE")))
6137 put_strftime (out, now, "%x");
6138 else if (skip_prefix (&in, ss_cstr (")TIME")))
6139 put_strftime (out, now, "%X");
6140 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6141 put_table_expression (out, lexer, dict, expr_start, expr_end);
6144 ds_put_byte (out, ')');
6145 ss_advance (&in, 1);
6151 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6153 struct casereader *input = NULL;
6155 struct measure_guesser *mg = measure_guesser_create (ds);
6158 input = proc_open (ds);
6159 measure_guesser_run (mg, input);
6160 measure_guesser_destroy (mg);
6163 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6164 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6165 enum settings_value_show tvars = settings_get_show_variables ();
6166 for (size_t i = 0; i < n_vars; i++)
6167 vlabels[i] = (enum ctables_vlabel) tvars;
6169 struct pivot_table_look *look = pivot_table_look_unshare (
6170 pivot_table_look_ref (pivot_table_look_get_default ()));
6171 look->omit_empty = false;
6173 struct ctables *ct = xmalloc (sizeof *ct);
6174 *ct = (struct ctables) {
6175 .dict = dataset_dict (ds),
6177 .ctables_formats = FMT_SETTINGS_INIT,
6179 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6182 time_t now = time (NULL);
6187 const char *dot_string;
6188 const char *comma_string;
6190 static const struct ctf ctfs[4] = {
6191 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6192 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6193 { CTEF_PAREN, "-,(,),", "-.(.)." },
6194 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6196 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6197 for (size_t i = 0; i < 4; i++)
6199 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6200 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6201 fmt_number_style_from_string (s));
6204 if (!lex_force_match (lexer, T_SLASH))
6207 while (!lex_match_id (lexer, "TABLE"))
6209 if (lex_match_id (lexer, "FORMAT"))
6211 double widths[2] = { SYSMIS, SYSMIS };
6212 double units_per_inch = 72.0;
6214 while (lex_token (lexer) != T_SLASH)
6216 if (lex_match_id (lexer, "MINCOLWIDTH"))
6218 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6221 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6223 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6226 else if (lex_match_id (lexer, "UNITS"))
6228 lex_match (lexer, T_EQUALS);
6229 if (lex_match_id (lexer, "POINTS"))
6230 units_per_inch = 72.0;
6231 else if (lex_match_id (lexer, "INCHES"))
6232 units_per_inch = 1.0;
6233 else if (lex_match_id (lexer, "CM"))
6234 units_per_inch = 2.54;
6237 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6241 else if (lex_match_id (lexer, "EMPTY"))
6246 lex_match (lexer, T_EQUALS);
6247 if (lex_match_id (lexer, "ZERO"))
6249 /* Nothing to do. */
6251 else if (lex_match_id (lexer, "BLANK"))
6252 ct->zero = xstrdup ("");
6253 else if (lex_force_string (lexer))
6255 ct->zero = ss_xstrdup (lex_tokss (lexer));
6261 else if (lex_match_id (lexer, "MISSING"))
6263 lex_match (lexer, T_EQUALS);
6264 if (!lex_force_string (lexer))
6268 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6269 ? ss_xstrdup (lex_tokss (lexer))
6275 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6276 "UNITS", "EMPTY", "MISSING");
6281 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6282 && widths[0] > widths[1])
6284 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6288 for (size_t i = 0; i < 2; i++)
6289 if (widths[i] != SYSMIS)
6291 int *wr = ct->look->width_ranges[TABLE_HORZ];
6292 wr[i] = widths[i] / units_per_inch * 96.0;
6297 else if (lex_match_id (lexer, "VLABELS"))
6299 if (!lex_force_match_id (lexer, "VARIABLES"))
6301 lex_match (lexer, T_EQUALS);
6303 struct variable **vars;
6305 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6309 if (!lex_force_match_id (lexer, "DISPLAY"))
6314 lex_match (lexer, T_EQUALS);
6316 enum ctables_vlabel vlabel;
6317 if (lex_match_id (lexer, "DEFAULT"))
6318 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6319 else if (lex_match_id (lexer, "NAME"))
6321 else if (lex_match_id (lexer, "LABEL"))
6322 vlabel = CTVL_LABEL;
6323 else if (lex_match_id (lexer, "BOTH"))
6325 else if (lex_match_id (lexer, "NONE"))
6329 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6335 for (size_t i = 0; i < n_vars; i++)
6336 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6339 else if (lex_match_id (lexer, "MRSETS"))
6341 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6343 lex_match (lexer, T_EQUALS);
6344 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6347 else if (lex_match_id (lexer, "SMISSING"))
6349 if (lex_match_id (lexer, "VARIABLE"))
6350 ct->smissing_listwise = false;
6351 else if (lex_match_id (lexer, "LISTWISE"))
6352 ct->smissing_listwise = true;
6355 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6359 else if (lex_match_id (lexer, "PCOMPUTE"))
6361 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6364 else if (lex_match_id (lexer, "PPROPERTIES"))
6366 if (!ctables_parse_pproperties (lexer, ct))
6369 else if (lex_match_id (lexer, "WEIGHT"))
6371 if (!lex_force_match_id (lexer, "VARIABLE"))
6373 lex_match (lexer, T_EQUALS);
6374 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6378 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6380 if (lex_match_id (lexer, "COUNT"))
6382 lex_match (lexer, T_EQUALS);
6383 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6386 ct->hide_threshold = lex_integer (lexer);
6389 else if (ct->hide_threshold == 0)
6390 ct->hide_threshold = 5;
6394 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6395 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6396 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6400 if (!lex_force_match (lexer, T_SLASH))
6404 size_t allocated_tables = 0;
6407 if (ct->n_tables >= allocated_tables)
6408 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6409 sizeof *ct->tables);
6411 struct ctables_category *cat = xmalloc (sizeof *cat);
6412 *cat = (struct ctables_category) {
6414 .include_missing = false,
6415 .sort_ascending = true,
6418 struct ctables_categories *c = xmalloc (sizeof *c);
6419 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6420 *c = (struct ctables_categories) {
6427 struct ctables_categories **categories = xnmalloc (n_vars,
6428 sizeof *categories);
6429 for (size_t i = 0; i < n_vars; i++)
6432 struct ctables_table *t = xmalloc (sizeof *t);
6433 *t = (struct ctables_table) {
6435 .slabels_axis = PIVOT_AXIS_COLUMN,
6436 .slabels_visible = true,
6437 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6439 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6440 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6441 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6443 .clabels_from_axis = PIVOT_AXIS_LAYER,
6444 .categories = categories,
6445 .n_categories = n_vars,
6448 ct->tables[ct->n_tables++] = t;
6450 lex_match (lexer, T_EQUALS);
6451 int expr_start = lex_ofs (lexer);
6452 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6454 if (lex_match (lexer, T_BY))
6456 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6457 ct, t, PIVOT_AXIS_COLUMN))
6460 if (lex_match (lexer, T_BY))
6462 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6463 ct, t, PIVOT_AXIS_LAYER))
6467 int expr_end = lex_ofs (lexer);
6469 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6470 && !t->axes[PIVOT_AXIS_LAYER])
6472 lex_error (lexer, _("At least one variable must be specified."));
6476 const struct ctables_axis *scales[PIVOT_N_AXES];
6477 size_t n_scales = 0;
6478 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6480 scales[a] = find_scale (t->axes[a]);
6486 msg (SE, _("Scale variables may appear only on one axis."));
6487 if (scales[PIVOT_AXIS_ROW])
6488 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6489 _("This scale variable appears on the rows axis."));
6490 if (scales[PIVOT_AXIS_COLUMN])
6491 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6492 _("This scale variable appears on the columns axis."));
6493 if (scales[PIVOT_AXIS_LAYER])
6494 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6495 _("This scale variable appears on the layer axis."));
6499 const struct ctables_axis *summaries[PIVOT_N_AXES];
6500 size_t n_summaries = 0;
6501 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6503 summaries[a] = (scales[a]
6505 : find_categorical_summary_spec (t->axes[a]));
6509 if (n_summaries > 1)
6511 msg (SE, _("Summaries may appear only on one axis."));
6512 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6515 msg_at (SN, summaries[a]->loc,
6517 ? _("This variable on the rows axis has a summary.")
6518 : a == PIVOT_AXIS_COLUMN
6519 ? _("This variable on the columns axis has a summary.")
6520 : _("This variable on the layers axis has a summary."));
6522 msg_at (SN, summaries[a]->loc,
6523 _("This is a scale variable, so it always has a "
6524 "summary even if the syntax does not explicitly "
6529 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6530 if (n_summaries ? summaries[a] : t->axes[a])
6532 t->summary_axis = a;
6536 if (lex_token (lexer) == T_ENDCMD)
6538 if (!ctables_prepare_table (t))
6542 if (!lex_force_match (lexer, T_SLASH))
6545 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6547 if (lex_match_id (lexer, "SLABELS"))
6549 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6551 if (lex_match_id (lexer, "POSITION"))
6553 lex_match (lexer, T_EQUALS);
6554 if (lex_match_id (lexer, "COLUMN"))
6555 t->slabels_axis = PIVOT_AXIS_COLUMN;
6556 else if (lex_match_id (lexer, "ROW"))
6557 t->slabels_axis = PIVOT_AXIS_ROW;
6558 else if (lex_match_id (lexer, "LAYER"))
6559 t->slabels_axis = PIVOT_AXIS_LAYER;
6562 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6566 else if (lex_match_id (lexer, "VISIBLE"))
6568 lex_match (lexer, T_EQUALS);
6569 if (!parse_bool (lexer, &t->slabels_visible))
6574 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6579 else if (lex_match_id (lexer, "CLABELS"))
6581 if (lex_match_id (lexer, "AUTO"))
6583 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6584 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6586 else if (lex_match_id (lexer, "ROWLABELS"))
6588 lex_match (lexer, T_EQUALS);
6589 if (lex_match_id (lexer, "OPPOSITE"))
6590 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6591 else if (lex_match_id (lexer, "LAYER"))
6592 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6595 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6599 else if (lex_match_id (lexer, "COLLABELS"))
6601 lex_match (lexer, T_EQUALS);
6602 if (lex_match_id (lexer, "OPPOSITE"))
6603 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6604 else if (lex_match_id (lexer, "LAYER"))
6605 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6608 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6614 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6619 else if (lex_match_id (lexer, "CRITERIA"))
6621 if (!lex_force_match_id (lexer, "CILEVEL"))
6623 lex_match (lexer, T_EQUALS);
6625 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6627 t->cilevel = lex_number (lexer);
6630 else if (lex_match_id (lexer, "CATEGORIES"))
6632 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6636 else if (lex_match_id (lexer, "TITLES"))
6641 if (lex_match_id (lexer, "CAPTION"))
6642 textp = &t->caption;
6643 else if (lex_match_id (lexer, "CORNER"))
6645 else if (lex_match_id (lexer, "TITLE"))
6649 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6652 lex_match (lexer, T_EQUALS);
6654 struct string s = DS_EMPTY_INITIALIZER;
6655 while (lex_is_string (lexer))
6657 if (!ds_is_empty (&s))
6658 ds_put_byte (&s, ' ');
6659 put_title_text (&s, lex_tokss (lexer), now,
6660 lexer, dataset_dict (ds),
6661 expr_start, expr_end);
6665 *textp = ds_steal_cstr (&s);
6667 while (lex_token (lexer) != T_SLASH
6668 && lex_token (lexer) != T_ENDCMD);
6670 else if (lex_match_id (lexer, "SIGTEST"))
6674 t->chisq = xmalloc (sizeof *t->chisq);
6675 *t->chisq = (struct ctables_chisq) {
6677 .include_mrsets = true,
6678 .all_visible = true,
6684 if (lex_match_id (lexer, "TYPE"))
6686 lex_match (lexer, T_EQUALS);
6687 if (!lex_force_match_id (lexer, "CHISQUARE"))
6690 else if (lex_match_id (lexer, "ALPHA"))
6692 lex_match (lexer, T_EQUALS);
6693 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6695 t->chisq->alpha = lex_number (lexer);
6698 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6700 lex_match (lexer, T_EQUALS);
6701 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6704 else if (lex_match_id (lexer, "CATEGORIES"))
6706 lex_match (lexer, T_EQUALS);
6707 if (lex_match_id (lexer, "ALLVISIBLE"))
6708 t->chisq->all_visible = true;
6709 else if (lex_match_id (lexer, "SUBTOTALS"))
6710 t->chisq->all_visible = false;
6713 lex_error_expecting (lexer,
6714 "ALLVISIBLE", "SUBTOTALS");
6720 lex_error_expecting (lexer, "TYPE", "ALPHA",
6721 "INCLUDEMRSETS", "CATEGORIES");
6725 while (lex_token (lexer) != T_SLASH
6726 && lex_token (lexer) != T_ENDCMD);
6728 else if (lex_match_id (lexer, "COMPARETEST"))
6732 t->pairwise = xmalloc (sizeof *t->pairwise);
6733 *t->pairwise = (struct ctables_pairwise) {
6735 .alpha = { .05, .05 },
6736 .adjust = BONFERRONI,
6737 .include_mrsets = true,
6738 .meansvariance_allcats = true,
6739 .all_visible = true,
6748 if (lex_match_id (lexer, "TYPE"))
6750 lex_match (lexer, T_EQUALS);
6751 if (lex_match_id (lexer, "PROP"))
6752 t->pairwise->type = PROP;
6753 else if (lex_match_id (lexer, "MEAN"))
6754 t->pairwise->type = MEAN;
6757 lex_error_expecting (lexer, "PROP", "MEAN");
6761 else if (lex_match_id (lexer, "ALPHA"))
6763 lex_match (lexer, T_EQUALS);
6765 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6767 double a0 = lex_number (lexer);
6770 lex_match (lexer, T_COMMA);
6771 if (lex_is_number (lexer))
6773 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6775 double a1 = lex_number (lexer);
6778 t->pairwise->alpha[0] = MIN (a0, a1);
6779 t->pairwise->alpha[1] = MAX (a0, a1);
6782 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6784 else if (lex_match_id (lexer, "ADJUST"))
6786 lex_match (lexer, T_EQUALS);
6787 if (lex_match_id (lexer, "BONFERRONI"))
6788 t->pairwise->adjust = BONFERRONI;
6789 else if (lex_match_id (lexer, "BH"))
6790 t->pairwise->adjust = BH;
6791 else if (lex_match_id (lexer, "NONE"))
6792 t->pairwise->adjust = 0;
6795 lex_error_expecting (lexer, "BONFERRONI", "BH",
6800 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6802 lex_match (lexer, T_EQUALS);
6803 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6806 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6808 lex_match (lexer, T_EQUALS);
6809 if (lex_match_id (lexer, "ALLCATS"))
6810 t->pairwise->meansvariance_allcats = true;
6811 else if (lex_match_id (lexer, "TESTEDCATS"))
6812 t->pairwise->meansvariance_allcats = false;
6815 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6819 else if (lex_match_id (lexer, "CATEGORIES"))
6821 lex_match (lexer, T_EQUALS);
6822 if (lex_match_id (lexer, "ALLVISIBLE"))
6823 t->pairwise->all_visible = true;
6824 else if (lex_match_id (lexer, "SUBTOTALS"))
6825 t->pairwise->all_visible = false;
6828 lex_error_expecting (lexer, "ALLVISIBLE",
6833 else if (lex_match_id (lexer, "MERGE"))
6835 lex_match (lexer, T_EQUALS);
6836 if (!parse_bool (lexer, &t->pairwise->merge))
6839 else if (lex_match_id (lexer, "STYLE"))
6841 lex_match (lexer, T_EQUALS);
6842 if (lex_match_id (lexer, "APA"))
6843 t->pairwise->apa_style = true;
6844 else if (lex_match_id (lexer, "SIMPLE"))
6845 t->pairwise->apa_style = false;
6848 lex_error_expecting (lexer, "APA", "SIMPLE");
6852 else if (lex_match_id (lexer, "SHOWSIG"))
6854 lex_match (lexer, T_EQUALS);
6855 if (!parse_bool (lexer, &t->pairwise->show_sig))
6860 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6861 "INCLUDEMRSETS", "MEANSVARIANCE",
6862 "CATEGORIES", "MERGE", "STYLE",
6867 while (lex_token (lexer) != T_SLASH
6868 && lex_token (lexer) != T_ENDCMD);
6872 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6873 "CRITERIA", "CATEGORIES", "TITLES",
6874 "SIGTEST", "COMPARETEST");
6878 if (!lex_match (lexer, T_SLASH))
6882 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6883 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6885 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6889 if (!ctables_prepare_table (t))
6892 while (lex_token (lexer) != T_ENDCMD);
6895 input = proc_open (ds);
6896 bool ok = ctables_execute (ds, input, ct);
6897 ok = proc_commit (ds) && ok;
6899 ctables_destroy (ct);
6900 return ok ? CMD_SUCCESS : CMD_FAILURE;
6905 ctables_destroy (ct);