1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 enum ctables_summary_function
75 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
76 #include "ctables.inc"
81 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
83 #include "ctables.inc"
87 static bool ctables_summary_function_is_count (enum ctables_summary_function);
89 enum ctables_domain_type
91 /* Within a section, where stacked variables divide one section from
93 CTDT_TABLE, /* All layers of a whole section. */
94 CTDT_LAYER, /* One layer within a section. */
95 CTDT_LAYERROW, /* Row in one layer within a section. */
96 CTDT_LAYERCOL, /* Column in one layer within a section. */
98 /* Within a subtable, where a subtable pairs an innermost row variable with
99 an innermost column variable within a single layer. */
100 CTDT_SUBTABLE, /* Whole subtable. */
101 CTDT_ROW, /* Row within a subtable. */
102 CTDT_COL, /* Column within a subtable. */
106 struct ctables_domain
108 struct hmap_node node;
110 const struct ctables_cell *example;
113 double d_valid; /* Dictionary weight. */
116 double e_valid; /* Effective weight */
119 double u_valid; /* Unweighted. */
122 struct ctables_sum *sums;
131 enum ctables_summary_variant
140 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
141 all the axes (except the scalar variable, if any). */
142 struct hmap_node node;
144 /* The domains that contain this cell. */
145 uint32_t omit_domains;
146 struct ctables_domain *domains[N_CTDTS];
151 enum ctables_summary_variant sv;
153 struct ctables_cell_axis
155 struct ctables_cell_value
157 const struct ctables_category *category;
165 union ctables_summary *summaries;
172 const struct dictionary *dict;
173 struct pivot_table_look *look;
175 /* CTABLES has a number of extra formats that we implement via custom
176 currency specifications on an alternate fmt_settings. */
177 #define CTEF_NEGPAREN FMT_CCA
178 #define CTEF_NEQUAL FMT_CCB
179 #define CTEF_PAREN FMT_CCC
180 #define CTEF_PCTPAREN FMT_CCD
181 struct fmt_settings ctables_formats;
183 /* If this is NULL, zeros are displayed using the normal print format.
184 Otherwise, this string is displayed. */
187 /* If this is NULL, missing values are displayed using the normal print
188 format. Otherwise, this string is displayed. */
191 /* Indexed by variable dictionary index. */
192 enum ctables_vlabel *vlabels;
194 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
196 bool mrsets_count_duplicates; /* MRSETS. */
197 bool smissing_listwise; /* SMISSING. */
198 struct variable *e_weight; /* WEIGHT. */
199 int hide_threshold; /* HIDESMALLCOUNTS. */
201 struct ctables_table **tables;
205 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
208 struct ctables_postcompute
210 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
211 char *name; /* Name, without leading &. */
213 struct msg_location *location; /* Location of definition. */
214 struct ctables_pcexpr *expr;
216 struct ctables_summary_spec_set *specs;
217 bool hide_source_cats;
220 struct ctables_pcexpr
230 enum ctables_postcompute_op
233 CTPO_CONSTANT, /* 5 */
234 CTPO_CAT_NUMBER, /* [5] */
235 CTPO_CAT_STRING, /* ["STRING"] */
236 CTPO_CAT_NRANGE, /* [LO THRU 5] */
237 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
238 CTPO_CAT_MISSING, /* MISSING */
239 CTPO_CAT_OTHERNM, /* OTHERNM */
240 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
241 CTPO_CAT_TOTAL, /* TOTAL */
255 /* CTPO_CAT_NUMBER. */
258 /* CTPO_CAT_STRING, in dictionary encoding. */
259 struct substring string;
261 /* CTPO_CAT_NRANGE. */
264 /* CTPO_CAT_SRANGE. */
265 struct substring srange[2];
267 /* CTPO_CAT_SUBTOTAL. */
268 size_t subtotal_index;
270 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
271 One element: CTPO_NEG. */
272 struct ctables_pcexpr *subs[2];
275 /* Source location. */
276 struct msg_location *location;
279 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
280 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
281 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
282 struct ctables_pcexpr *sub1);
284 struct ctables_summary_spec_set
286 struct ctables_summary_spec *specs;
290 /* The variable to which the summary specs are applied. */
291 struct variable *var;
293 /* Whether the variable to which the summary specs are applied is a scale
294 variable for the purpose of summarization.
296 (VALIDN and TOTALN act differently for summarizing scale and categorical
300 /* If any of these optional additional scale variables are missing, then
301 treat 'var' as if it's missing too. This is for implementing
302 SMISSING=LISTWISE. */
303 struct variable **listwise_vars;
304 size_t n_listwise_vars;
307 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
308 const struct ctables_summary_spec_set *);
309 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
311 /* A nested sequence of variables, e.g. a > b > c. */
314 struct variable **vars;
317 size_t *domains[N_CTDTS];
318 size_t n_domains[N_CTDTS];
321 struct ctables_summary_spec_set specs[N_CSVS];
324 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
327 struct ctables_nest *nests;
331 static void ctables_stack_uninit (struct ctables_stack *);
335 struct hmap_node node;
340 struct ctables_occurrence
342 struct hmap_node node;
346 struct ctables_section
349 struct ctables_table *table;
350 struct ctables_nest *nests[PIVOT_N_AXES];
353 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
354 struct hmap cells; /* Contains "struct ctables_cell"s. */
355 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
358 static void ctables_section_uninit (struct ctables_section *);
362 struct ctables *ctables;
363 struct ctables_axis *axes[PIVOT_N_AXES];
364 struct ctables_stack stacks[PIVOT_N_AXES];
365 struct ctables_section *sections;
367 enum pivot_axis_type summary_axis;
368 struct ctables_summary_spec_set summary_specs;
369 struct variable **sum_vars;
372 enum pivot_axis_type slabels_axis;
373 bool slabels_visible;
375 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
377 Most commonly, label_axis[a] == a, and in particular we always have
378 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
380 If ROWLABELS or COLLABELS is specified, then one of
381 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
382 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
384 If any category labels are moved, then 'clabels_example' is one of the
385 variables being moved (and it is otherwise NULL). All of the variables
386 being moved have the same width, value labels, and categories, so this
387 example variable can be used to find those out.
389 The remaining members in this group are relevant only if category labels
392 'clabels_values_map' holds a "struct ctables_value" for all the values
393 that appear in all of the variables in the moved categories. It is
394 accumulated as the data is read. Once the data is fully read, its
395 sorted values are put into 'clabels_values' and 'n_clabels_values'.
397 enum pivot_axis_type label_axis[PIVOT_N_AXES];
398 enum pivot_axis_type clabels_from_axis;
399 const struct variable *clabels_example;
400 struct hmap clabels_values_map;
401 struct ctables_value **clabels_values;
402 size_t n_clabels_values;
404 /* Indexed by variable dictionary index. */
405 struct ctables_categories **categories;
414 struct ctables_chisq *chisq;
415 struct ctables_pairwise *pairwise;
418 struct ctables_categories
421 struct ctables_category *cats;
426 struct ctables_category
428 enum ctables_category_type
430 /* Explicit category lists. */
433 CCT_NRANGE, /* Numerical range. */
434 CCT_SRANGE, /* String range. */
439 /* Totals and subtotals. */
443 /* Implicit category lists. */
448 /* For contributing to TOTALN. */
449 CCT_EXCLUDED_MISSING,
453 struct ctables_category *subtotal;
459 double number; /* CCT_NUMBER. */
460 struct substring string; /* CCT_STRING, in dictionary encoding. */
461 double nrange[2]; /* CCT_NRANGE. */
462 struct substring srange[2]; /* CCT_SRANGE. */
466 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
467 bool hide_subcategories; /* CCT_SUBTOTAL. */
470 /* CCT_POSTCOMPUTE. */
473 const struct ctables_postcompute *pc;
474 enum fmt_type parse_format;
477 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
480 bool include_missing;
484 enum ctables_summary_function sort_function;
485 struct variable *sort_var;
490 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
491 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
492 struct msg_location *location;
496 ctables_category_uninit (struct ctables_category *cat)
501 msg_location_destroy (cat->location);
508 case CCT_POSTCOMPUTE:
512 ss_dealloc (&cat->string);
516 ss_dealloc (&cat->srange[0]);
517 ss_dealloc (&cat->srange[1]);
522 free (cat->total_label);
530 case CCT_EXCLUDED_MISSING:
536 nullable_substring_equal (const struct substring *a,
537 const struct substring *b)
539 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
543 ctables_category_equal (const struct ctables_category *a,
544 const struct ctables_category *b)
546 if (a->type != b->type)
552 return a->number == b->number;
555 return ss_equals (a->string, b->string);
558 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
561 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
562 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
568 case CCT_POSTCOMPUTE:
569 return a->pc == b->pc;
573 return !strcmp (a->total_label, b->total_label);
578 return (a->include_missing == b->include_missing
579 && a->sort_ascending == b->sort_ascending
580 && a->sort_function == b->sort_function
581 && a->sort_var == b->sort_var
582 && a->percentile == b->percentile);
584 case CCT_EXCLUDED_MISSING:
592 ctables_categories_unref (struct ctables_categories *c)
597 assert (c->n_refs > 0);
601 for (size_t i = 0; i < c->n_cats; i++)
602 ctables_category_uninit (&c->cats[i]);
608 ctables_categories_equal (const struct ctables_categories *a,
609 const struct ctables_categories *b)
611 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
614 for (size_t i = 0; i < a->n_cats; i++)
615 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
621 /* Chi-square test (SIGTEST). */
629 /* Pairwise comparison test (COMPARETEST). */
630 struct ctables_pairwise
632 enum { PROP, MEAN } type;
635 bool meansvariance_allcats;
637 enum { BONFERRONI = 1, BH } adjust;
661 struct variable *var;
663 struct ctables_summary_spec_set specs[N_CSVS];
667 struct ctables_axis *subs[2];
670 struct msg_location *loc;
673 static void ctables_axis_destroy (struct ctables_axis *);
682 enum ctables_function_availability
684 CTFA_ALL, /* Any variables. */
685 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
686 //CTFA_MRSETS, /* Only multiple-response sets */
689 struct ctables_summary_spec
691 enum ctables_summary_function function;
692 double percentile; /* CTSF_PTILE only. */
695 struct fmt_spec format;
696 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
703 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
704 const struct ctables_summary_spec *src)
707 dst->label = xstrdup_if_nonnull (src->label);
711 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
718 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
719 const struct ctables_summary_spec_set *src)
721 struct ctables_summary_spec *specs
722 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
723 for (size_t i = 0; i < src->n; i++)
724 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
726 *dst = (struct ctables_summary_spec_set) {
731 .is_scale = src->is_scale,
736 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
738 for (size_t i = 0; i < set->n; i++)
739 ctables_summary_spec_uninit (&set->specs[i]);
740 free (set->listwise_vars);
745 parse_col_width (struct lexer *lexer, const char *name, double *width)
747 lex_match (lexer, T_EQUALS);
748 if (lex_match_id (lexer, "DEFAULT"))
750 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
752 *width = lex_number (lexer);
762 parse_bool (struct lexer *lexer, bool *b)
764 if (lex_match_id (lexer, "NO"))
766 else if (lex_match_id (lexer, "YES"))
770 lex_error_expecting (lexer, "YES", "NO");
776 static enum ctables_function_availability
777 ctables_function_availability (enum ctables_summary_function f)
779 static enum ctables_function_availability availability[] = {
780 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
781 #include "ctables.inc"
785 return availability[f];
789 ctables_summary_function_is_count (enum ctables_summary_function f)
791 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
795 parse_ctables_summary_function (struct lexer *lexer,
796 enum ctables_summary_function *f)
800 enum ctables_summary_function function;
801 struct substring name;
803 static struct pair names[] = {
804 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
805 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
806 #include "ctables.inc"
807 /* The .COUNT suffix may be omitted. */
808 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
809 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
810 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
811 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
812 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
813 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
814 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
818 if (!lex_force_id (lexer))
821 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
822 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
824 *f = names[i].function;
829 lex_error (lexer, _("Expecting summary function name."));
834 ctables_axis_destroy (struct ctables_axis *axis)
842 for (size_t i = 0; i < N_CSVS; i++)
843 ctables_summary_spec_set_uninit (&axis->specs[i]);
848 ctables_axis_destroy (axis->subs[0]);
849 ctables_axis_destroy (axis->subs[1]);
852 msg_location_destroy (axis->loc);
856 static struct ctables_axis *
857 ctables_axis_new_nonterminal (enum ctables_axis_op op,
858 struct ctables_axis *sub0,
859 struct ctables_axis *sub1,
860 struct lexer *lexer, int start_ofs)
862 struct ctables_axis *axis = xmalloc (sizeof *axis);
863 *axis = (struct ctables_axis) {
865 .subs = { sub0, sub1 },
866 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
871 struct ctables_axis_parse_ctx
874 struct dictionary *dict;
876 struct ctables_table *t;
879 static struct fmt_spec
880 ctables_summary_default_format (enum ctables_summary_function function,
881 const struct variable *var)
883 static const enum ctables_format default_formats[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
885 #include "ctables.inc"
888 switch (default_formats[function])
891 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
894 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
897 return *var_get_print_format (var);
904 static struct pivot_value *
905 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
909 static const char *default_labels[] = {
910 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
911 #include "ctables.inc"
915 return (spec->function == CTSF_PTILE
916 ? pivot_value_new_text_format (N_("Percentile %.2f"),
918 : pivot_value_new_text (default_labels[spec->function]));
922 struct substring in = ss_cstr (spec->label);
923 struct substring target = ss_cstr (")CILEVEL");
925 struct string out = DS_EMPTY_INITIALIZER;
928 size_t chunk = ss_find_substring (in, target);
929 ds_put_substring (&out, ss_head (in, chunk));
930 ss_advance (&in, chunk);
932 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
934 ss_advance (&in, target.length);
935 ds_put_format (&out, "%g", cilevel);
941 ctables_summary_function_name (enum ctables_summary_function function)
943 static const char *names[] = {
944 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
945 #include "ctables.inc"
948 return names[function];
952 add_summary_spec (struct ctables_axis *axis,
953 enum ctables_summary_function function, double percentile,
954 const char *label, const struct fmt_spec *format,
955 bool is_ctables_format, const struct msg_location *loc,
956 enum ctables_summary_variant sv)
958 if (axis->op == CTAO_VAR)
960 const char *function_name = ctables_summary_function_name (function);
961 const char *var_name = var_get_name (axis->var);
962 switch (ctables_function_availability (function))
966 msg_at (SE, loc, _("Summary function %s applies only to multiple "
967 "response sets."), function_name);
968 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
974 if (!axis->scale && sv != CSV_TOTAL)
977 _("Summary function %s applies only to scale variables."),
979 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
989 struct ctables_summary_spec_set *set = &axis->specs[sv];
990 if (set->n >= set->allocated)
991 set->specs = x2nrealloc (set->specs, &set->allocated,
994 struct ctables_summary_spec *dst = &set->specs[set->n++];
995 *dst = (struct ctables_summary_spec) {
996 .function = function,
997 .percentile = percentile,
998 .label = xstrdup_if_nonnull (label),
999 .format = (format ? *format
1000 : ctables_summary_default_format (function, axis->var)),
1001 .is_ctables_format = is_ctables_format,
1007 for (size_t i = 0; i < 2; i++)
1008 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1009 format, is_ctables_format, loc, sv))
1015 static struct ctables_axis *ctables_axis_parse_stack (
1016 struct ctables_axis_parse_ctx *);
1019 static struct ctables_axis *
1020 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1022 if (lex_match (ctx->lexer, T_LPAREN))
1024 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1025 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1027 ctables_axis_destroy (sub);
1033 if (!lex_force_id (ctx->lexer))
1036 int start_ofs = lex_ofs (ctx->lexer);
1037 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1041 struct ctables_axis *axis = xmalloc (sizeof *axis);
1042 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1044 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1045 : lex_match_phrase (ctx->lexer, "[C]") ? false
1046 : var_get_measure (var) == MEASURE_SCALE);
1047 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1048 lex_ofs (ctx->lexer) - 1);
1049 if (axis->scale && var_is_alpha (var))
1051 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1053 var_get_name (var));
1054 ctables_axis_destroy (axis);
1062 has_digit (const char *s)
1064 return s[strcspn (s, "0123456789")] != '\0';
1068 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1069 bool *is_ctables_format)
1071 char type[FMT_TYPE_LEN_MAX + 1];
1072 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1075 if (!strcasecmp (type, "NEGPAREN"))
1076 format->type = CTEF_NEGPAREN;
1077 else if (!strcasecmp (type, "NEQUAL"))
1078 format->type = CTEF_NEQUAL;
1079 else if (!strcasecmp (type, "PAREN"))
1080 format->type = CTEF_PAREN;
1081 else if (!strcasecmp (type, "PCTPAREN"))
1082 format->type = CTEF_PCTPAREN;
1085 *is_ctables_format = false;
1086 return (parse_format_specifier (lexer, format)
1087 && fmt_check_output (format)
1088 && fmt_check_type_compat (format, VAL_NUMERIC));
1094 lex_next_error (lexer, -1, -1,
1095 _("Output format %s requires width 2 or greater."), type);
1098 else if (format->d > format->w - 1)
1100 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1101 "greater than decimals."), type);
1106 *is_ctables_format = true;
1111 static struct ctables_axis *
1112 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1114 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1115 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1118 enum ctables_summary_variant sv = CSV_CELL;
1121 int start_ofs = lex_ofs (ctx->lexer);
1123 /* Parse function. */
1124 enum ctables_summary_function function;
1125 if (!parse_ctables_summary_function (ctx->lexer, &function))
1128 /* Parse percentile. */
1129 double percentile = 0;
1130 if (function == CTSF_PTILE)
1132 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1134 percentile = lex_number (ctx->lexer);
1135 lex_get (ctx->lexer);
1140 if (lex_is_string (ctx->lexer))
1142 label = ss_xstrdup (lex_tokss (ctx->lexer));
1143 lex_get (ctx->lexer);
1147 struct fmt_spec format;
1148 const struct fmt_spec *formatp;
1149 bool is_ctables_format = false;
1150 if (lex_token (ctx->lexer) == T_ID
1151 && has_digit (lex_tokcstr (ctx->lexer)))
1153 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1154 &is_ctables_format))
1164 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1165 lex_ofs (ctx->lexer) - 1);
1166 add_summary_spec (sub, function, percentile, label, formatp,
1167 is_ctables_format, loc, sv);
1169 msg_location_destroy (loc);
1171 lex_match (ctx->lexer, T_COMMA);
1172 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1174 if (!lex_force_match (ctx->lexer, T_LBRACK))
1178 else if (lex_match (ctx->lexer, T_RBRACK))
1180 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1187 ctables_axis_destroy (sub);
1191 static const struct ctables_axis *
1192 find_scale (const struct ctables_axis *axis)
1196 else if (axis->op == CTAO_VAR)
1197 return axis->scale ? axis : NULL;
1200 for (size_t i = 0; i < 2; i++)
1202 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1210 static const struct ctables_axis *
1211 find_categorical_summary_spec (const struct ctables_axis *axis)
1215 else if (axis->op == CTAO_VAR)
1216 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1219 for (size_t i = 0; i < 2; i++)
1221 const struct ctables_axis *sum
1222 = find_categorical_summary_spec (axis->subs[i]);
1230 static struct ctables_axis *
1231 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1233 int start_ofs = lex_ofs (ctx->lexer);
1234 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1238 while (lex_match (ctx->lexer, T_GT))
1240 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1243 ctables_axis_destroy (lhs);
1247 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1248 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1250 const struct ctables_axis *outer_scale = find_scale (lhs);
1251 const struct ctables_axis *inner_scale = find_scale (rhs);
1252 if (outer_scale && inner_scale)
1254 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1255 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1256 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1257 ctables_axis_destroy (nest);
1261 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1264 msg_at (SE, nest->loc,
1265 _("Summaries may only be requested for categorical variables "
1266 "at the innermost nesting level."));
1267 msg_at (SN, outer_sum->loc,
1268 _("This outer categorical variable has a summary."));
1269 ctables_axis_destroy (nest);
1279 static struct ctables_axis *
1280 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1282 int start_ofs = lex_ofs (ctx->lexer);
1283 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1287 while (lex_match (ctx->lexer, T_PLUS))
1289 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1292 ctables_axis_destroy (lhs);
1296 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1297 ctx->lexer, start_ofs);
1304 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1305 struct ctables *ct, struct ctables_table *t,
1306 enum pivot_axis_type a)
1308 if (lex_token (lexer) == T_BY
1309 || lex_token (lexer) == T_SLASH
1310 || lex_token (lexer) == T_ENDCMD)
1313 struct ctables_axis_parse_ctx ctx = {
1319 t->axes[a] = ctables_axis_parse_stack (&ctx);
1320 return t->axes[a] != NULL;
1324 ctables_chisq_destroy (struct ctables_chisq *chisq)
1330 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1336 ctables_table_destroy (struct ctables_table *t)
1341 for (size_t i = 0; i < t->n_sections; i++)
1342 ctables_section_uninit (&t->sections[i]);
1345 for (size_t i = 0; i < t->n_categories; i++)
1346 ctables_categories_unref (t->categories[i]);
1347 free (t->categories);
1349 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1351 ctables_axis_destroy (t->axes[a]);
1352 ctables_stack_uninit (&t->stacks[a]);
1354 free (t->summary_specs.specs);
1356 struct ctables_value *ctv, *next_ctv;
1357 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1358 &t->clabels_values_map)
1360 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1361 hmap_delete (&t->clabels_values_map, &ctv->node);
1364 hmap_destroy (&t->clabels_values_map);
1365 free (t->clabels_values);
1371 ctables_chisq_destroy (t->chisq);
1372 ctables_pairwise_destroy (t->pairwise);
1377 ctables_destroy (struct ctables *ct)
1382 struct ctables_postcompute *pc, *next_pc;
1383 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1387 msg_location_destroy (pc->location);
1388 ctables_pcexpr_destroy (pc->expr);
1392 ctables_summary_spec_set_uninit (pc->specs);
1395 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1399 fmt_settings_uninit (&ct->ctables_formats);
1400 pivot_table_look_unref (ct->look);
1404 for (size_t i = 0; i < ct->n_tables; i++)
1405 ctables_table_destroy (ct->tables[i]);
1410 static struct ctables_category
1411 cct_nrange (double low, double high)
1413 return (struct ctables_category) {
1415 .nrange = { low, high }
1419 static struct ctables_category
1420 cct_srange (struct substring low, struct substring high)
1422 return (struct ctables_category) {
1424 .srange = { low, high }
1429 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1430 struct ctables_category *cat)
1433 if (lex_match (lexer, T_EQUALS))
1435 if (!lex_force_string (lexer))
1438 total_label = ss_xstrdup (lex_tokss (lexer));
1442 total_label = xstrdup (_("Subtotal"));
1444 *cat = (struct ctables_category) {
1445 .type = CCT_SUBTOTAL,
1446 .hide_subcategories = hide_subcategories,
1447 .total_label = total_label
1452 static struct substring
1453 parse_substring (struct lexer *lexer, struct dictionary *dict)
1455 struct substring s = recode_substring_pool (
1456 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1457 ss_rtrim (&s, ss_cstr (" "));
1463 ctables_table_parse_explicit_category (struct lexer *lexer,
1464 struct dictionary *dict,
1466 struct ctables_category *cat)
1468 if (lex_match_id (lexer, "OTHERNM"))
1469 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1470 else if (lex_match_id (lexer, "MISSING"))
1471 *cat = (struct ctables_category) { .type = CCT_MISSING };
1472 else if (lex_match_id (lexer, "SUBTOTAL"))
1473 return ctables_table_parse_subtotal (lexer, false, cat);
1474 else if (lex_match_id (lexer, "HSUBTOTAL"))
1475 return ctables_table_parse_subtotal (lexer, true, cat);
1476 else if (lex_match_id (lexer, "LO"))
1478 if (!lex_force_match_id (lexer, "THRU"))
1480 if (lex_is_string (lexer))
1482 struct substring sr0 = { .string = NULL };
1483 struct substring sr1 = parse_substring (lexer, dict);
1484 *cat = cct_srange (sr0, sr1);
1486 else if (lex_force_num (lexer))
1488 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1494 else if (lex_is_number (lexer))
1496 double number = lex_number (lexer);
1498 if (lex_match_id (lexer, "THRU"))
1500 if (lex_match_id (lexer, "HI"))
1501 *cat = cct_nrange (number, DBL_MAX);
1504 if (!lex_force_num (lexer))
1506 *cat = cct_nrange (number, lex_number (lexer));
1511 *cat = (struct ctables_category) {
1516 else if (lex_is_string (lexer))
1518 struct substring s = parse_substring (lexer, dict);
1519 if (lex_match_id (lexer, "THRU"))
1521 if (lex_match_id (lexer, "HI"))
1523 struct substring sr1 = { .string = NULL };
1524 *cat = cct_srange (s, sr1);
1528 if (!lex_force_string (lexer))
1533 struct substring sr1 = parse_substring (lexer, dict);
1534 *cat = cct_srange (s, sr1);
1538 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1540 else if (lex_match (lexer, T_AND))
1542 if (!lex_force_id (lexer))
1544 struct ctables_postcompute *pc = ctables_find_postcompute (
1545 ct, lex_tokcstr (lexer));
1548 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1549 msg_at (SE, loc, _("Unknown postcompute &%s."),
1550 lex_tokcstr (lexer));
1551 msg_location_destroy (loc);
1556 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1560 lex_error (lexer, NULL);
1568 parse_category_string (struct msg_location *location,
1569 struct substring s, const struct dictionary *dict,
1570 enum fmt_type format, double *n)
1573 char *error = data_in (s, dict_get_encoding (dict), format,
1574 settings_get_fmt_settings (), &v, 0, NULL);
1577 msg_at (SE, location,
1578 _("Failed to parse category specification as format %s: %s."),
1579 fmt_name (format), error);
1588 static struct ctables_category *
1589 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1590 const struct ctables_pcexpr *e)
1592 struct ctables_category *best = NULL;
1593 size_t n_subtotals = 0;
1594 for (size_t i = 0; i < cats->n_cats; i++)
1596 struct ctables_category *cat = &cats->cats[i];
1599 case CTPO_CAT_NUMBER:
1600 if (cat->type == CCT_NUMBER && cat->number == e->number)
1604 case CTPO_CAT_STRING:
1605 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1609 case CTPO_CAT_NRANGE:
1610 if (cat->type == CCT_NRANGE
1611 && cat->nrange[0] == e->nrange[0]
1612 && cat->nrange[1] == e->nrange[1])
1616 case CTPO_CAT_SRANGE:
1617 if (cat->type == CCT_SRANGE
1618 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1619 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1623 case CTPO_CAT_MISSING:
1624 if (cat->type == CCT_MISSING)
1628 case CTPO_CAT_OTHERNM:
1629 if (cat->type == CCT_OTHERNM)
1633 case CTPO_CAT_SUBTOTAL:
1634 if (cat->type == CCT_SUBTOTAL)
1637 if (e->subtotal_index == n_subtotals)
1639 else if (e->subtotal_index == 0)
1644 case CTPO_CAT_TOTAL:
1645 if (cat->type == CCT_TOTAL)
1659 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1664 static struct ctables_category *
1665 ctables_find_category_for_postcompute (const struct dictionary *dict,
1666 const struct ctables_categories *cats,
1667 enum fmt_type parse_format,
1668 const struct ctables_pcexpr *e)
1670 if (parse_format != FMT_F)
1672 if (e->op == CTPO_CAT_STRING)
1675 if (!parse_category_string (e->location, e->string, dict,
1676 parse_format, &number))
1679 struct ctables_pcexpr e2 = {
1680 .op = CTPO_CAT_NUMBER,
1682 .location = e->location,
1684 return ctables_find_category_for_postcompute__ (cats, &e2);
1686 else if (e->op == CTPO_CAT_SRANGE)
1689 if (!e->srange[0].string)
1690 nrange[0] = -DBL_MAX;
1691 else if (!parse_category_string (e->location, e->srange[0], dict,
1692 parse_format, &nrange[0]))
1695 if (!e->srange[1].string)
1696 nrange[1] = DBL_MAX;
1697 else if (!parse_category_string (e->location, e->srange[1], dict,
1698 parse_format, &nrange[1]))
1701 struct ctables_pcexpr e2 = {
1702 .op = CTPO_CAT_NRANGE,
1703 .nrange = { nrange[0], nrange[1] },
1704 .location = e->location,
1706 return ctables_find_category_for_postcompute__ (cats, &e2);
1709 return ctables_find_category_for_postcompute__ (cats, e);
1713 ctables_recursive_check_postcompute (struct dictionary *dict,
1714 const struct ctables_pcexpr *e,
1715 struct ctables_category *pc_cat,
1716 const struct ctables_categories *cats,
1717 const struct msg_location *cats_location)
1721 case CTPO_CAT_NUMBER:
1722 case CTPO_CAT_STRING:
1723 case CTPO_CAT_NRANGE:
1724 case CTPO_CAT_SRANGE:
1725 case CTPO_CAT_MISSING:
1726 case CTPO_CAT_OTHERNM:
1727 case CTPO_CAT_SUBTOTAL:
1728 case CTPO_CAT_TOTAL:
1730 struct ctables_category *cat = ctables_find_category_for_postcompute (
1731 dict, cats, pc_cat->parse_format, e);
1734 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1736 size_t n_subtotals = 0;
1737 for (size_t i = 0; i < cats->n_cats; i++)
1738 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1739 if (n_subtotals > 1)
1741 msg_at (SE, cats_location,
1742 ngettext ("These categories include %zu instance "
1743 "of SUBTOTAL or HSUBTOTAL, so references "
1744 "from computed categories must refer to "
1745 "subtotals by position, "
1746 "e.g. SUBTOTAL[1].",
1747 "These categories include %zu instances "
1748 "of SUBTOTAL or HSUBTOTAL, so references "
1749 "from computed categories must refer to "
1750 "subtotals by position, "
1751 "e.g. SUBTOTAL[1].",
1754 msg_at (SN, e->location,
1755 _("This is the reference that lacks a position."));
1760 msg_at (SE, pc_cat->location,
1761 _("Computed category &%s references a category not included "
1762 "in the category list."),
1764 msg_at (SN, e->location, _("This is the missing category."));
1765 if (e->op == CTPO_CAT_SUBTOTAL)
1766 msg_at (SN, cats_location,
1767 _("To fix the problem, add subtotals to the "
1768 "list of categories here."));
1769 else if (e->op == CTPO_CAT_TOTAL)
1770 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1771 "CATEGORIES specification."));
1773 msg_at (SN, cats_location,
1774 _("To fix the problem, add the missing category to the "
1775 "list of categories here."));
1778 if (pc_cat->pc->hide_source_cats)
1792 for (size_t i = 0; i < 2; i++)
1793 if (e->subs[i] && !ctables_recursive_check_postcompute (
1794 dict, e->subs[i], pc_cat, cats, cats_location))
1803 all_strings (struct variable **vars, size_t n_vars,
1804 const struct ctables_category *cat)
1806 for (size_t j = 0; j < n_vars; j++)
1807 if (var_is_numeric (vars[j]))
1809 msg_at (SE, cat->location,
1810 _("This category specification may be applied only to string "
1811 "variables, but this subcommand tries to apply it to "
1812 "numeric variable %s."),
1813 var_get_name (vars[j]));
1820 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1821 struct ctables *ct, struct ctables_table *t)
1823 if (!lex_match_id (lexer, "VARIABLES"))
1825 lex_match (lexer, T_EQUALS);
1827 struct variable **vars;
1829 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1832 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1833 for (size_t i = 1; i < n_vars; i++)
1835 const struct fmt_spec *f = var_get_print_format (vars[i]);
1836 if (f->type != common_format->type)
1838 common_format = NULL;
1844 && (fmt_get_category (common_format->type)
1845 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1847 struct ctables_categories *c = xmalloc (sizeof *c);
1848 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1849 for (size_t i = 0; i < n_vars; i++)
1851 struct ctables_categories **cp
1852 = &t->categories[var_get_dict_index (vars[i])];
1853 ctables_categories_unref (*cp);
1857 size_t allocated_cats = 0;
1858 int cats_start_ofs = -1;
1859 int cats_end_ofs = -1;
1860 if (lex_match (lexer, T_LBRACK))
1862 cats_start_ofs = lex_ofs (lexer);
1865 if (c->n_cats >= allocated_cats)
1866 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1868 int start_ofs = lex_ofs (lexer);
1869 struct ctables_category *cat = &c->cats[c->n_cats];
1870 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1872 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1875 lex_match (lexer, T_COMMA);
1877 while (!lex_match (lexer, T_RBRACK));
1878 cats_end_ofs = lex_ofs (lexer) - 1;
1881 struct ctables_category cat = {
1883 .include_missing = false,
1884 .sort_ascending = true,
1886 bool show_totals = false;
1887 char *total_label = NULL;
1888 bool totals_before = false;
1889 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1891 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1893 lex_match (lexer, T_EQUALS);
1894 if (lex_match_id (lexer, "A"))
1895 cat.sort_ascending = true;
1896 else if (lex_match_id (lexer, "D"))
1897 cat.sort_ascending = false;
1900 lex_error_expecting (lexer, "A", "D");
1904 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1906 lex_match (lexer, T_EQUALS);
1907 if (lex_match_id (lexer, "VALUE"))
1908 cat.type = CCT_VALUE;
1909 else if (lex_match_id (lexer, "LABEL"))
1910 cat.type = CCT_LABEL;
1913 cat.type = CCT_FUNCTION;
1914 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1917 if (lex_match (lexer, T_LPAREN))
1919 cat.sort_var = parse_variable (lexer, dict);
1923 if (cat.sort_function == CTSF_PTILE)
1925 lex_match (lexer, T_COMMA);
1926 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1928 cat.percentile = lex_number (lexer);
1932 if (!lex_force_match (lexer, T_RPAREN))
1935 else if (ctables_function_availability (cat.sort_function)
1938 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1943 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1945 lex_match (lexer, T_EQUALS);
1946 if (lex_match_id (lexer, "INCLUDE"))
1947 cat.include_missing = true;
1948 else if (lex_match_id (lexer, "EXCLUDE"))
1949 cat.include_missing = false;
1952 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1956 else if (lex_match_id (lexer, "TOTAL"))
1958 lex_match (lexer, T_EQUALS);
1959 if (!parse_bool (lexer, &show_totals))
1962 else if (lex_match_id (lexer, "LABEL"))
1964 lex_match (lexer, T_EQUALS);
1965 if (!lex_force_string (lexer))
1968 total_label = ss_xstrdup (lex_tokss (lexer));
1971 else if (lex_match_id (lexer, "POSITION"))
1973 lex_match (lexer, T_EQUALS);
1974 if (lex_match_id (lexer, "BEFORE"))
1975 totals_before = true;
1976 else if (lex_match_id (lexer, "AFTER"))
1977 totals_before = false;
1980 lex_error_expecting (lexer, "BEFORE", "AFTER");
1984 else if (lex_match_id (lexer, "EMPTY"))
1986 lex_match (lexer, T_EQUALS);
1987 if (lex_match_id (lexer, "INCLUDE"))
1988 c->show_empty = true;
1989 else if (lex_match_id (lexer, "EXCLUDE"))
1990 c->show_empty = false;
1993 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2000 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2001 "TOTAL", "LABEL", "POSITION", "EMPTY");
2003 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2010 if (c->n_cats >= allocated_cats)
2011 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2012 c->cats[c->n_cats++] = cat;
2017 if (c->n_cats >= allocated_cats)
2018 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2020 struct ctables_category *totals;
2023 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2024 totals = &c->cats[0];
2027 totals = &c->cats[c->n_cats];
2030 *totals = (struct ctables_category) {
2032 .total_label = total_label ? total_label : xstrdup (_("Total")),
2036 struct ctables_category *subtotal = NULL;
2037 for (size_t i = totals_before ? 0 : c->n_cats;
2038 totals_before ? i < c->n_cats : i-- > 0;
2039 totals_before ? i++ : 0)
2041 struct ctables_category *cat = &c->cats[i];
2050 cat->subtotal = subtotal;
2053 case CCT_POSTCOMPUTE:
2064 case CCT_EXCLUDED_MISSING:
2069 if (cats_start_ofs != -1)
2071 for (size_t i = 0; i < c->n_cats; i++)
2073 struct ctables_category *cat = &c->cats[i];
2076 case CCT_POSTCOMPUTE:
2077 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2078 struct msg_location *cats_location
2079 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2080 bool ok = ctables_recursive_check_postcompute (
2081 dict, cat->pc->expr, cat, c, cats_location);
2082 msg_location_destroy (cats_location);
2089 for (size_t j = 0; j < n_vars; j++)
2090 if (var_is_alpha (vars[j]))
2092 msg_at (SE, cat->location,
2093 _("This category specification may be applied "
2094 "only to numeric variables, but this "
2095 "subcommand tries to apply it to string "
2097 var_get_name (vars[j]));
2106 if (!parse_category_string (cat->location, cat->string, dict,
2107 common_format->type, &n))
2110 ss_dealloc (&cat->string);
2112 cat->type = CCT_NUMBER;
2115 else if (!all_strings (vars, n_vars, cat))
2124 if (!cat->srange[0].string)
2126 else if (!parse_category_string (cat->location,
2127 cat->srange[0], dict,
2128 common_format->type, &n[0]))
2131 if (!cat->srange[1].string)
2133 else if (!parse_category_string (cat->location,
2134 cat->srange[1], dict,
2135 common_format->type, &n[1]))
2138 ss_dealloc (&cat->srange[0]);
2139 ss_dealloc (&cat->srange[1]);
2141 cat->type = CCT_NRANGE;
2142 cat->nrange[0] = n[0];
2143 cat->nrange[1] = n[1];
2145 else if (!all_strings (vars, n_vars, cat))
2156 case CCT_EXCLUDED_MISSING:
2171 ctables_nest_uninit (struct ctables_nest *nest)
2174 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2175 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2176 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2177 free (nest->domains[dt]);
2181 ctables_stack_uninit (struct ctables_stack *stack)
2185 for (size_t i = 0; i < stack->n; i++)
2186 ctables_nest_uninit (&stack->nests[i]);
2187 free (stack->nests);
2191 static struct ctables_stack
2192 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2199 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2200 for (size_t i = 0; i < s0.n; i++)
2201 for (size_t j = 0; j < s1.n; j++)
2203 const struct ctables_nest *a = &s0.nests[i];
2204 const struct ctables_nest *b = &s1.nests[j];
2206 size_t allocate = a->n + b->n;
2207 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2209 for (size_t k = 0; k < a->n; k++)
2210 vars[n++] = a->vars[k];
2211 for (size_t k = 0; k < b->n; k++)
2212 vars[n++] = b->vars[k];
2213 assert (n == allocate);
2215 const struct ctables_nest *summary_src;
2216 if (!a->specs[CSV_CELL].var)
2218 else if (!b->specs[CSV_CELL].var)
2223 struct ctables_nest *new = &stack.nests[stack.n++];
2224 *new = (struct ctables_nest) {
2226 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2227 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2231 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2232 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2234 ctables_stack_uninit (&s0);
2235 ctables_stack_uninit (&s1);
2239 static struct ctables_stack
2240 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2242 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2243 for (size_t i = 0; i < s0.n; i++)
2244 stack.nests[stack.n++] = s0.nests[i];
2245 for (size_t i = 0; i < s1.n; i++)
2247 stack.nests[stack.n] = s1.nests[i];
2248 stack.nests[stack.n].group_head += s0.n;
2251 assert (stack.n == s0.n + s1.n);
2257 static struct ctables_stack
2258 var_fts (const struct ctables_axis *a)
2260 struct variable **vars = xmalloc (sizeof *vars);
2263 struct ctables_nest *nest = xmalloc (sizeof *nest);
2264 *nest = (struct ctables_nest) {
2267 .scale_idx = a->scale ? 0 : SIZE_MAX,
2269 if (a->specs[CSV_CELL].n || a->scale)
2270 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2272 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2273 nest->specs[sv].var = a->var;
2274 nest->specs[sv].is_scale = a->scale;
2276 return (struct ctables_stack) { .nests = nest, .n = 1 };
2279 static struct ctables_stack
2280 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2283 return (struct ctables_stack) { .n = 0 };
2291 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2292 enumerate_fts (axis_type, a->subs[1]));
2295 /* This should consider any of the scale variables found in the result to
2296 be linked to each other listwise for SMISSING=LISTWISE. */
2297 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2298 enumerate_fts (axis_type, a->subs[1]));
2304 union ctables_summary
2306 /* COUNT, VALIDN, TOTALN. */
2309 /* MINIMUM, MAXIMUM, RANGE. */
2316 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2317 struct moments1 *moments;
2319 /* MEDIAN, MODE, PTILE. */
2322 struct casewriter *writer;
2327 /* XXX multiple response */
2331 ctables_summary_init (union ctables_summary *s,
2332 const struct ctables_summary_spec *ss)
2334 switch (ss->function)
2338 case CTSF_ROWPCT_COUNT:
2339 case CTSF_COLPCT_COUNT:
2340 case CTSF_TABLEPCT_COUNT:
2341 case CTSF_SUBTABLEPCT_COUNT:
2342 case CTSF_LAYERPCT_COUNT:
2343 case CTSF_LAYERROWPCT_COUNT:
2344 case CTSF_LAYERCOLPCT_COUNT:
2345 case CTSF_ROWPCT_VALIDN:
2346 case CTSF_COLPCT_VALIDN:
2347 case CTSF_TABLEPCT_VALIDN:
2348 case CTSF_SUBTABLEPCT_VALIDN:
2349 case CTSF_LAYERPCT_VALIDN:
2350 case CTSF_LAYERROWPCT_VALIDN:
2351 case CTSF_LAYERCOLPCT_VALIDN:
2352 case CTSF_ROWPCT_TOTALN:
2353 case CTSF_COLPCT_TOTALN:
2354 case CTSF_TABLEPCT_TOTALN:
2355 case CTSF_SUBTABLEPCT_TOTALN:
2356 case CTSF_LAYERPCT_TOTALN:
2357 case CTSF_LAYERROWPCT_TOTALN:
2358 case CTSF_LAYERCOLPCT_TOTALN:
2365 case CTSF_UROWPCT_COUNT:
2366 case CTSF_UCOLPCT_COUNT:
2367 case CTSF_UTABLEPCT_COUNT:
2368 case CTSF_USUBTABLEPCT_COUNT:
2369 case CTSF_ULAYERPCT_COUNT:
2370 case CTSF_ULAYERROWPCT_COUNT:
2371 case CTSF_ULAYERCOLPCT_COUNT:
2372 case CTSF_UROWPCT_VALIDN:
2373 case CTSF_UCOLPCT_VALIDN:
2374 case CTSF_UTABLEPCT_VALIDN:
2375 case CTSF_USUBTABLEPCT_VALIDN:
2376 case CTSF_ULAYERPCT_VALIDN:
2377 case CTSF_ULAYERROWPCT_VALIDN:
2378 case CTSF_ULAYERCOLPCT_VALIDN:
2379 case CTSF_UROWPCT_TOTALN:
2380 case CTSF_UCOLPCT_TOTALN:
2381 case CTSF_UTABLEPCT_TOTALN:
2382 case CTSF_USUBTABLEPCT_TOTALN:
2383 case CTSF_ULAYERPCT_TOTALN:
2384 case CTSF_ULAYERROWPCT_TOTALN:
2385 case CTSF_ULAYERCOLPCT_TOTALN:
2395 case CTSF_SUBTABLE_ID:
2397 case CTSF_LAYERROW_ID:
2398 case CTSF_LAYERCOL_ID:
2404 s->min = s->max = SYSMIS;
2412 case CTSF_ROWPCT_SUM:
2413 case CTSF_COLPCT_SUM:
2414 case CTSF_TABLEPCT_SUM:
2415 case CTSF_SUBTABLEPCT_SUM:
2416 case CTSF_LAYERPCT_SUM:
2417 case CTSF_LAYERROWPCT_SUM:
2418 case CTSF_LAYERCOLPCT_SUM:
2423 case CTSF_UVARIANCE:
2424 case CTSF_UROWPCT_SUM:
2425 case CTSF_UCOLPCT_SUM:
2426 case CTSF_UTABLEPCT_SUM:
2427 case CTSF_USUBTABLEPCT_SUM:
2428 case CTSF_ULAYERPCT_SUM:
2429 case CTSF_ULAYERROWPCT_SUM:
2430 case CTSF_ULAYERCOLPCT_SUM:
2431 s->moments = moments1_create (MOMENT_VARIANCE);
2441 struct caseproto *proto = caseproto_create ();
2442 proto = caseproto_add_width (proto, 0);
2443 proto = caseproto_add_width (proto, 0);
2445 struct subcase ordering;
2446 subcase_init (&ordering, 0, 0, SC_ASCEND);
2447 s->writer = sort_create_writer (&ordering, proto);
2448 subcase_uninit (&ordering);
2449 caseproto_unref (proto);
2459 ctables_summary_uninit (union ctables_summary *s,
2460 const struct ctables_summary_spec *ss)
2462 switch (ss->function)
2466 case CTSF_ROWPCT_COUNT:
2467 case CTSF_COLPCT_COUNT:
2468 case CTSF_TABLEPCT_COUNT:
2469 case CTSF_SUBTABLEPCT_COUNT:
2470 case CTSF_LAYERPCT_COUNT:
2471 case CTSF_LAYERROWPCT_COUNT:
2472 case CTSF_LAYERCOLPCT_COUNT:
2473 case CTSF_ROWPCT_VALIDN:
2474 case CTSF_COLPCT_VALIDN:
2475 case CTSF_TABLEPCT_VALIDN:
2476 case CTSF_SUBTABLEPCT_VALIDN:
2477 case CTSF_LAYERPCT_VALIDN:
2478 case CTSF_LAYERROWPCT_VALIDN:
2479 case CTSF_LAYERCOLPCT_VALIDN:
2480 case CTSF_ROWPCT_TOTALN:
2481 case CTSF_COLPCT_TOTALN:
2482 case CTSF_TABLEPCT_TOTALN:
2483 case CTSF_SUBTABLEPCT_TOTALN:
2484 case CTSF_LAYERPCT_TOTALN:
2485 case CTSF_LAYERROWPCT_TOTALN:
2486 case CTSF_LAYERCOLPCT_TOTALN:
2493 case CTSF_UROWPCT_COUNT:
2494 case CTSF_UCOLPCT_COUNT:
2495 case CTSF_UTABLEPCT_COUNT:
2496 case CTSF_USUBTABLEPCT_COUNT:
2497 case CTSF_ULAYERPCT_COUNT:
2498 case CTSF_ULAYERROWPCT_COUNT:
2499 case CTSF_ULAYERCOLPCT_COUNT:
2500 case CTSF_UROWPCT_VALIDN:
2501 case CTSF_UCOLPCT_VALIDN:
2502 case CTSF_UTABLEPCT_VALIDN:
2503 case CTSF_USUBTABLEPCT_VALIDN:
2504 case CTSF_ULAYERPCT_VALIDN:
2505 case CTSF_ULAYERROWPCT_VALIDN:
2506 case CTSF_ULAYERCOLPCT_VALIDN:
2507 case CTSF_UROWPCT_TOTALN:
2508 case CTSF_UCOLPCT_TOTALN:
2509 case CTSF_UTABLEPCT_TOTALN:
2510 case CTSF_USUBTABLEPCT_TOTALN:
2511 case CTSF_ULAYERPCT_TOTALN:
2512 case CTSF_ULAYERROWPCT_TOTALN:
2513 case CTSF_ULAYERCOLPCT_TOTALN:
2522 case CTSF_SUBTABLE_ID:
2524 case CTSF_LAYERROW_ID:
2525 case CTSF_LAYERCOL_ID:
2538 case CTSF_ROWPCT_SUM:
2539 case CTSF_COLPCT_SUM:
2540 case CTSF_TABLEPCT_SUM:
2541 case CTSF_SUBTABLEPCT_SUM:
2542 case CTSF_LAYERPCT_SUM:
2543 case CTSF_LAYERROWPCT_SUM:
2544 case CTSF_LAYERCOLPCT_SUM:
2549 case CTSF_UVARIANCE:
2550 case CTSF_UROWPCT_SUM:
2551 case CTSF_UCOLPCT_SUM:
2552 case CTSF_UTABLEPCT_SUM:
2553 case CTSF_USUBTABLEPCT_SUM:
2554 case CTSF_ULAYERPCT_SUM:
2555 case CTSF_ULAYERROWPCT_SUM:
2556 case CTSF_ULAYERCOLPCT_SUM:
2557 moments1_destroy (s->moments);
2566 casewriter_destroy (s->writer);
2572 ctables_summary_add (union ctables_summary *s,
2573 const struct ctables_summary_spec *ss,
2574 const struct variable *var, const union value *value,
2575 bool is_scale, bool is_scale_missing,
2576 bool is_missing, bool excluded_missing,
2577 double d_weight, double e_weight)
2579 /* To determine whether a case is included in a given table for a particular
2580 kind of summary, consider the following charts for each variable in the
2581 table. Only if "yes" appears for every variable for the summary is the
2584 Categorical variables: VALIDN COUNT TOTALN
2585 Valid values in included categories yes yes yes
2586 Missing values in included categories --- yes yes
2587 Missing values in excluded categories --- --- yes
2588 Valid values in excluded categories --- --- ---
2590 Scale variables: VALIDN COUNT TOTALN
2591 Valid value yes yes yes
2592 Missing value --- yes yes
2594 Missing values include both user- and system-missing. (The system-missing
2595 value is always in an excluded category.)
2597 switch (ss->function)
2600 case CTSF_ROWPCT_TOTALN:
2601 case CTSF_COLPCT_TOTALN:
2602 case CTSF_TABLEPCT_TOTALN:
2603 case CTSF_SUBTABLEPCT_TOTALN:
2604 case CTSF_LAYERPCT_TOTALN:
2605 case CTSF_LAYERROWPCT_TOTALN:
2606 case CTSF_LAYERCOLPCT_TOTALN:
2607 s->count += d_weight;
2611 case CTSF_UROWPCT_TOTALN:
2612 case CTSF_UCOLPCT_TOTALN:
2613 case CTSF_UTABLEPCT_TOTALN:
2614 case CTSF_USUBTABLEPCT_TOTALN:
2615 case CTSF_ULAYERPCT_TOTALN:
2616 case CTSF_ULAYERROWPCT_TOTALN:
2617 case CTSF_ULAYERCOLPCT_TOTALN:
2622 case CTSF_ROWPCT_COUNT:
2623 case CTSF_COLPCT_COUNT:
2624 case CTSF_TABLEPCT_COUNT:
2625 case CTSF_SUBTABLEPCT_COUNT:
2626 case CTSF_LAYERPCT_COUNT:
2627 case CTSF_LAYERROWPCT_COUNT:
2628 case CTSF_LAYERCOLPCT_COUNT:
2629 if (is_scale || !excluded_missing)
2630 s->count += d_weight;
2634 case CTSF_UROWPCT_COUNT:
2635 case CTSF_UCOLPCT_COUNT:
2636 case CTSF_UTABLEPCT_COUNT:
2637 case CTSF_USUBTABLEPCT_COUNT:
2638 case CTSF_ULAYERPCT_COUNT:
2639 case CTSF_ULAYERROWPCT_COUNT:
2640 case CTSF_ULAYERCOLPCT_COUNT:
2641 if (is_scale || !excluded_missing)
2646 case CTSF_ROWPCT_VALIDN:
2647 case CTSF_COLPCT_VALIDN:
2648 case CTSF_TABLEPCT_VALIDN:
2649 case CTSF_SUBTABLEPCT_VALIDN:
2650 case CTSF_LAYERPCT_VALIDN:
2651 case CTSF_LAYERROWPCT_VALIDN:
2652 case CTSF_LAYERCOLPCT_VALIDN:
2656 s->count += d_weight;
2660 case CTSF_UROWPCT_VALIDN:
2661 case CTSF_UCOLPCT_VALIDN:
2662 case CTSF_UTABLEPCT_VALIDN:
2663 case CTSF_USUBTABLEPCT_VALIDN:
2664 case CTSF_ULAYERPCT_VALIDN:
2665 case CTSF_ULAYERROWPCT_VALIDN:
2666 case CTSF_ULAYERCOLPCT_VALIDN:
2676 case CTSF_SUBTABLE_ID:
2678 case CTSF_LAYERROW_ID:
2679 case CTSF_LAYERCOL_ID:
2684 s->count += d_weight;
2693 if (is_scale || !excluded_missing)
2694 s->count += e_weight;
2701 s->count += e_weight;
2705 s->count += e_weight;
2711 if (!is_scale_missing)
2713 assert (!var_is_alpha (var)); /* XXX? */
2714 if (s->min == SYSMIS || value->f < s->min)
2716 if (s->max == SYSMIS || value->f > s->max)
2726 case CTSF_ROWPCT_SUM:
2727 case CTSF_COLPCT_SUM:
2728 case CTSF_TABLEPCT_SUM:
2729 case CTSF_SUBTABLEPCT_SUM:
2730 case CTSF_LAYERPCT_SUM:
2731 case CTSF_LAYERROWPCT_SUM:
2732 case CTSF_LAYERCOLPCT_SUM:
2733 if (!is_scale_missing)
2734 moments1_add (s->moments, value->f, e_weight);
2741 case CTSF_UVARIANCE:
2742 case CTSF_UROWPCT_SUM:
2743 case CTSF_UCOLPCT_SUM:
2744 case CTSF_UTABLEPCT_SUM:
2745 case CTSF_USUBTABLEPCT_SUM:
2746 case CTSF_ULAYERPCT_SUM:
2747 case CTSF_ULAYERROWPCT_SUM:
2748 case CTSF_ULAYERCOLPCT_SUM:
2749 if (!is_scale_missing)
2750 moments1_add (s->moments, value->f, 1.0);
2756 d_weight = e_weight = 1.0;
2761 if (!is_scale_missing)
2763 s->ovalid += e_weight;
2765 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2766 *case_num_rw_idx (c, 0) = value->f;
2767 *case_num_rw_idx (c, 1) = e_weight;
2768 casewriter_write (s->writer, c);
2774 static enum ctables_domain_type
2775 ctables_function_domain (enum ctables_summary_function function)
2805 case CTSF_UVARIANCE:
2811 case CTSF_COLPCT_COUNT:
2812 case CTSF_COLPCT_SUM:
2813 case CTSF_COLPCT_TOTALN:
2814 case CTSF_COLPCT_VALIDN:
2815 case CTSF_UCOLPCT_COUNT:
2816 case CTSF_UCOLPCT_SUM:
2817 case CTSF_UCOLPCT_TOTALN:
2818 case CTSF_UCOLPCT_VALIDN:
2822 case CTSF_LAYERCOLPCT_COUNT:
2823 case CTSF_LAYERCOLPCT_SUM:
2824 case CTSF_LAYERCOLPCT_TOTALN:
2825 case CTSF_LAYERCOLPCT_VALIDN:
2826 case CTSF_ULAYERCOLPCT_COUNT:
2827 case CTSF_ULAYERCOLPCT_SUM:
2828 case CTSF_ULAYERCOLPCT_TOTALN:
2829 case CTSF_ULAYERCOLPCT_VALIDN:
2830 case CTSF_LAYERCOL_ID:
2831 return CTDT_LAYERCOL;
2833 case CTSF_LAYERPCT_COUNT:
2834 case CTSF_LAYERPCT_SUM:
2835 case CTSF_LAYERPCT_TOTALN:
2836 case CTSF_LAYERPCT_VALIDN:
2837 case CTSF_ULAYERPCT_COUNT:
2838 case CTSF_ULAYERPCT_SUM:
2839 case CTSF_ULAYERPCT_TOTALN:
2840 case CTSF_ULAYERPCT_VALIDN:
2844 case CTSF_LAYERROWPCT_COUNT:
2845 case CTSF_LAYERROWPCT_SUM:
2846 case CTSF_LAYERROWPCT_TOTALN:
2847 case CTSF_LAYERROWPCT_VALIDN:
2848 case CTSF_ULAYERROWPCT_COUNT:
2849 case CTSF_ULAYERROWPCT_SUM:
2850 case CTSF_ULAYERROWPCT_TOTALN:
2851 case CTSF_ULAYERROWPCT_VALIDN:
2852 case CTSF_LAYERROW_ID:
2853 return CTDT_LAYERROW;
2855 case CTSF_ROWPCT_COUNT:
2856 case CTSF_ROWPCT_SUM:
2857 case CTSF_ROWPCT_TOTALN:
2858 case CTSF_ROWPCT_VALIDN:
2859 case CTSF_UROWPCT_COUNT:
2860 case CTSF_UROWPCT_SUM:
2861 case CTSF_UROWPCT_TOTALN:
2862 case CTSF_UROWPCT_VALIDN:
2866 case CTSF_SUBTABLEPCT_COUNT:
2867 case CTSF_SUBTABLEPCT_SUM:
2868 case CTSF_SUBTABLEPCT_TOTALN:
2869 case CTSF_SUBTABLEPCT_VALIDN:
2870 case CTSF_USUBTABLEPCT_COUNT:
2871 case CTSF_USUBTABLEPCT_SUM:
2872 case CTSF_USUBTABLEPCT_TOTALN:
2873 case CTSF_USUBTABLEPCT_VALIDN:
2874 case CTSF_SUBTABLE_ID:
2875 return CTDT_SUBTABLE;
2877 case CTSF_TABLEPCT_COUNT:
2878 case CTSF_TABLEPCT_SUM:
2879 case CTSF_TABLEPCT_TOTALN:
2880 case CTSF_TABLEPCT_VALIDN:
2881 case CTSF_UTABLEPCT_COUNT:
2882 case CTSF_UTABLEPCT_SUM:
2883 case CTSF_UTABLEPCT_TOTALN:
2884 case CTSF_UTABLEPCT_VALIDN:
2892 static enum ctables_domain_type
2893 ctables_function_is_pctsum (enum ctables_summary_function function)
2923 case CTSF_UVARIANCE:
2927 case CTSF_COLPCT_COUNT:
2928 case CTSF_COLPCT_TOTALN:
2929 case CTSF_COLPCT_VALIDN:
2930 case CTSF_UCOLPCT_COUNT:
2931 case CTSF_UCOLPCT_TOTALN:
2932 case CTSF_UCOLPCT_VALIDN:
2933 case CTSF_LAYERCOLPCT_COUNT:
2934 case CTSF_LAYERCOLPCT_TOTALN:
2935 case CTSF_LAYERCOLPCT_VALIDN:
2936 case CTSF_ULAYERCOLPCT_COUNT:
2937 case CTSF_ULAYERCOLPCT_TOTALN:
2938 case CTSF_ULAYERCOLPCT_VALIDN:
2939 case CTSF_LAYERPCT_COUNT:
2940 case CTSF_LAYERPCT_TOTALN:
2941 case CTSF_LAYERPCT_VALIDN:
2942 case CTSF_ULAYERPCT_COUNT:
2943 case CTSF_ULAYERPCT_TOTALN:
2944 case CTSF_ULAYERPCT_VALIDN:
2945 case CTSF_LAYERROWPCT_COUNT:
2946 case CTSF_LAYERROWPCT_TOTALN:
2947 case CTSF_LAYERROWPCT_VALIDN:
2948 case CTSF_ULAYERROWPCT_COUNT:
2949 case CTSF_ULAYERROWPCT_TOTALN:
2950 case CTSF_ULAYERROWPCT_VALIDN:
2951 case CTSF_ROWPCT_COUNT:
2952 case CTSF_ROWPCT_TOTALN:
2953 case CTSF_ROWPCT_VALIDN:
2954 case CTSF_UROWPCT_COUNT:
2955 case CTSF_UROWPCT_TOTALN:
2956 case CTSF_UROWPCT_VALIDN:
2957 case CTSF_SUBTABLEPCT_COUNT:
2958 case CTSF_SUBTABLEPCT_TOTALN:
2959 case CTSF_SUBTABLEPCT_VALIDN:
2960 case CTSF_USUBTABLEPCT_COUNT:
2961 case CTSF_USUBTABLEPCT_TOTALN:
2962 case CTSF_USUBTABLEPCT_VALIDN:
2963 case CTSF_TABLEPCT_COUNT:
2964 case CTSF_TABLEPCT_TOTALN:
2965 case CTSF_TABLEPCT_VALIDN:
2966 case CTSF_UTABLEPCT_COUNT:
2967 case CTSF_UTABLEPCT_TOTALN:
2968 case CTSF_UTABLEPCT_VALIDN:
2972 case CTSF_SUBTABLE_ID:
2974 case CTSF_LAYERROW_ID:
2975 case CTSF_LAYERCOL_ID:
2978 case CTSF_COLPCT_SUM:
2979 case CTSF_UCOLPCT_SUM:
2980 case CTSF_LAYERCOLPCT_SUM:
2981 case CTSF_ULAYERCOLPCT_SUM:
2982 case CTSF_LAYERPCT_SUM:
2983 case CTSF_ULAYERPCT_SUM:
2984 case CTSF_LAYERROWPCT_SUM:
2985 case CTSF_ULAYERROWPCT_SUM:
2986 case CTSF_ROWPCT_SUM:
2987 case CTSF_UROWPCT_SUM:
2988 case CTSF_SUBTABLEPCT_SUM:
2989 case CTSF_USUBTABLEPCT_SUM:
2990 case CTSF_TABLEPCT_SUM:
2991 case CTSF_UTABLEPCT_SUM:
2999 ctables_summary_value (const struct ctables_cell *cell,
3000 union ctables_summary *s,
3001 const struct ctables_summary_spec *ss)
3003 switch (ss->function)
3013 case CTSF_SUBTABLE_ID:
3015 case CTSF_LAYERROW_ID:
3016 case CTSF_LAYERCOL_ID:
3017 return cell->domains[ctables_function_domain (ss->function)]->sequence;
3019 case CTSF_ROWPCT_COUNT:
3020 case CTSF_COLPCT_COUNT:
3021 case CTSF_TABLEPCT_COUNT:
3022 case CTSF_SUBTABLEPCT_COUNT:
3023 case CTSF_LAYERPCT_COUNT:
3024 case CTSF_LAYERROWPCT_COUNT:
3025 case CTSF_LAYERCOLPCT_COUNT:
3027 enum ctables_domain_type d = ctables_function_domain (ss->function);
3028 return (cell->domains[d]->e_count
3029 ? s->count / cell->domains[d]->e_count * 100
3033 case CTSF_UROWPCT_COUNT:
3034 case CTSF_UCOLPCT_COUNT:
3035 case CTSF_UTABLEPCT_COUNT:
3036 case CTSF_USUBTABLEPCT_COUNT:
3037 case CTSF_ULAYERPCT_COUNT:
3038 case CTSF_ULAYERROWPCT_COUNT:
3039 case CTSF_ULAYERCOLPCT_COUNT:
3041 enum ctables_domain_type d = ctables_function_domain (ss->function);
3042 return (cell->domains[d]->u_count
3043 ? s->count / cell->domains[d]->u_count * 100
3047 case CTSF_ROWPCT_VALIDN:
3048 case CTSF_COLPCT_VALIDN:
3049 case CTSF_TABLEPCT_VALIDN:
3050 case CTSF_SUBTABLEPCT_VALIDN:
3051 case CTSF_LAYERPCT_VALIDN:
3052 case CTSF_LAYERROWPCT_VALIDN:
3053 case CTSF_LAYERCOLPCT_VALIDN:
3055 enum ctables_domain_type d = ctables_function_domain (ss->function);
3056 return (cell->domains[d]->e_valid
3057 ? s->count / cell->domains[d]->e_valid * 100
3061 case CTSF_UROWPCT_VALIDN:
3062 case CTSF_UCOLPCT_VALIDN:
3063 case CTSF_UTABLEPCT_VALIDN:
3064 case CTSF_USUBTABLEPCT_VALIDN:
3065 case CTSF_ULAYERPCT_VALIDN:
3066 case CTSF_ULAYERROWPCT_VALIDN:
3067 case CTSF_ULAYERCOLPCT_VALIDN:
3069 enum ctables_domain_type d = ctables_function_domain (ss->function);
3070 return (cell->domains[d]->u_valid
3071 ? s->count / cell->domains[d]->u_valid * 100
3075 case CTSF_ROWPCT_TOTALN:
3076 case CTSF_COLPCT_TOTALN:
3077 case CTSF_TABLEPCT_TOTALN:
3078 case CTSF_SUBTABLEPCT_TOTALN:
3079 case CTSF_LAYERPCT_TOTALN:
3080 case CTSF_LAYERROWPCT_TOTALN:
3081 case CTSF_LAYERCOLPCT_TOTALN:
3083 enum ctables_domain_type d = ctables_function_domain (ss->function);
3084 return (cell->domains[d]->e_total
3085 ? s->count / cell->domains[d]->e_total * 100
3089 case CTSF_UROWPCT_TOTALN:
3090 case CTSF_UCOLPCT_TOTALN:
3091 case CTSF_UTABLEPCT_TOTALN:
3092 case CTSF_USUBTABLEPCT_TOTALN:
3093 case CTSF_ULAYERPCT_TOTALN:
3094 case CTSF_ULAYERROWPCT_TOTALN:
3095 case CTSF_ULAYERCOLPCT_TOTALN:
3097 enum ctables_domain_type d = ctables_function_domain (ss->function);
3098 return (cell->domains[d]->u_total
3099 ? s->count / cell->domains[d]->u_total * 100
3120 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3126 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3133 double weight, variance;
3134 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3135 return calc_semean (variance, weight);
3142 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3143 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3149 double weight, mean;
3150 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3151 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3155 case CTSF_UVARIANCE:
3158 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3162 case CTSF_ROWPCT_SUM:
3163 case CTSF_COLPCT_SUM:
3164 case CTSF_TABLEPCT_SUM:
3165 case CTSF_SUBTABLEPCT_SUM:
3166 case CTSF_LAYERPCT_SUM:
3167 case CTSF_LAYERROWPCT_SUM:
3168 case CTSF_LAYERCOLPCT_SUM:
3170 double weight, mean;
3171 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3172 if (weight == SYSMIS || mean == SYSMIS)
3174 enum ctables_domain_type d = ctables_function_domain (ss->function);
3175 double num = weight * mean;
3176 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3177 return denom != 0 ? num / denom * 100 : SYSMIS;
3179 case CTSF_UROWPCT_SUM:
3180 case CTSF_UCOLPCT_SUM:
3181 case CTSF_UTABLEPCT_SUM:
3182 case CTSF_USUBTABLEPCT_SUM:
3183 case CTSF_ULAYERPCT_SUM:
3184 case CTSF_ULAYERROWPCT_SUM:
3185 case CTSF_ULAYERCOLPCT_SUM:
3187 double weight, mean;
3188 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3189 if (weight == SYSMIS || mean == SYSMIS)
3191 enum ctables_domain_type d = ctables_function_domain (ss->function);
3192 double num = weight * mean;
3193 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3194 return denom != 0 ? num / denom * 100 : SYSMIS;
3203 struct casereader *reader = casewriter_make_reader (s->writer);
3206 struct percentile *ptile = percentile_create (
3207 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3208 struct order_stats *os = &ptile->parent;
3209 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3210 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3211 statistic_destroy (&ptile->parent.parent);
3219 struct casereader *reader = casewriter_make_reader (s->writer);
3222 struct mode *mode = mode_create ();
3223 struct order_stats *os = &mode->parent;
3224 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3225 s->ovalue = mode->mode;
3226 statistic_destroy (&mode->parent.parent);
3234 struct ctables_cell_sort_aux
3236 const struct ctables_nest *nest;
3237 enum pivot_axis_type a;
3241 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3243 const struct ctables_cell_sort_aux *aux = aux_;
3244 struct ctables_cell *const *ap = a_;
3245 struct ctables_cell *const *bp = b_;
3246 const struct ctables_cell *a = *ap;
3247 const struct ctables_cell *b = *bp;
3249 const struct ctables_nest *nest = aux->nest;
3250 for (size_t i = 0; i < nest->n; i++)
3251 if (i != nest->scale_idx)
3253 const struct variable *var = nest->vars[i];
3254 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3255 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3256 if (a_cv->category != b_cv->category)
3257 return a_cv->category > b_cv->category ? 1 : -1;
3259 const union value *a_val = &a_cv->value;
3260 const union value *b_val = &b_cv->value;
3261 switch (a_cv->category->type)
3267 case CCT_POSTCOMPUTE:
3268 case CCT_EXCLUDED_MISSING:
3269 /* Must be equal. */
3277 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3285 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3287 return a_cv->category->sort_ascending ? cmp : -cmp;
3293 const char *a_label = var_lookup_value_label (var, a_val);
3294 const char *b_label = var_lookup_value_label (var, b_val);
3300 cmp = strcmp (a_label, b_label);
3306 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3309 return a_cv->category->sort_ascending ? cmp : -cmp;
3321 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3322 const void *aux UNUSED)
3324 struct ctables_cell *const *ap = a_;
3325 struct ctables_cell *const *bp = b_;
3326 const struct ctables_cell *a = *ap;
3327 const struct ctables_cell *b = *bp;
3329 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3331 int al = a->axes[axis].leaf;
3332 int bl = b->axes[axis].leaf;
3334 return al > bl ? 1 : -1;
3342 For each ctables_table:
3343 For each combination of row vars:
3344 For each combination of column vars:
3345 For each combination of layer vars:
3347 Make a table of row values:
3348 Sort entries by row values
3349 Assign a 0-based index to each actual value
3350 Construct a dimension
3351 Make a table of column values
3352 Make a table of layer values
3354 Fill the table entry using the indexes from before.
3357 static struct ctables_domain *
3358 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3359 enum ctables_domain_type domain)
3362 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3364 const struct ctables_nest *nest = s->nests[a];
3365 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3367 size_t v_idx = nest->domains[domain][i];
3368 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3369 hash = hash_pointer (cv->category, hash);
3370 if (cv->category->type != CCT_TOTAL
3371 && cv->category->type != CCT_SUBTOTAL
3372 && cv->category->type != CCT_POSTCOMPUTE)
3373 hash = value_hash (&cv->value,
3374 var_get_width (nest->vars[v_idx]), hash);
3378 struct ctables_domain *d;
3379 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3381 const struct ctables_cell *df = d->example;
3382 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3384 const struct ctables_nest *nest = s->nests[a];
3385 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3387 size_t v_idx = nest->domains[domain][i];
3388 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3389 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3390 if (cv1->category != cv2->category
3391 || (cv1->category->type != CCT_TOTAL
3392 && cv1->category->type != CCT_SUBTOTAL
3393 && cv1->category->type != CCT_POSTCOMPUTE
3394 && !value_equal (&cv1->value, &cv2->value,
3395 var_get_width (nest->vars[v_idx]))))
3404 struct ctables_sum *sums = (s->table->n_sum_vars
3405 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3408 d = xmalloc (sizeof *d);
3409 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3410 hmap_insert (&s->domains[domain], &d->node, hash);
3414 static struct substring
3415 rtrim_value (const union value *v, const struct variable *var)
3417 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3418 var_get_width (var));
3419 ss_rtrim (&s, ss_cstr (" "));
3424 in_string_range (const union value *v, const struct variable *var,
3425 const struct substring *srange)
3427 struct substring s = rtrim_value (v, var);
3428 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3429 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3432 static const struct ctables_category *
3433 ctables_categories_match (const struct ctables_categories *c,
3434 const union value *v, const struct variable *var)
3436 if (var_is_numeric (var) && v->f == SYSMIS)
3439 const struct ctables_category *othernm = NULL;
3440 for (size_t i = c->n_cats; i-- > 0; )
3442 const struct ctables_category *cat = &c->cats[i];
3446 if (cat->number == v->f)
3451 if (ss_equals (cat->string, rtrim_value (v, var)))
3456 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3457 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3462 if (in_string_range (v, var, cat->srange))
3467 if (var_is_value_missing (var, v))
3471 case CCT_POSTCOMPUTE:
3486 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3489 case CCT_EXCLUDED_MISSING:
3494 return var_is_value_missing (var, v) ? NULL : othernm;
3497 static const struct ctables_category *
3498 ctables_categories_total (const struct ctables_categories *c)
3500 const struct ctables_category *first = &c->cats[0];
3501 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3502 return (first->type == CCT_TOTAL ? first
3503 : last->type == CCT_TOTAL ? last
3507 static struct ctables_cell *
3508 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3509 const struct ctables_category *cats[PIVOT_N_AXES][10])
3512 enum ctables_summary_variant sv = CSV_CELL;
3513 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3515 const struct ctables_nest *nest = s->nests[a];
3516 for (size_t i = 0; i < nest->n; i++)
3517 if (i != nest->scale_idx)
3519 hash = hash_pointer (cats[a][i], hash);
3520 if (cats[a][i]->type != CCT_TOTAL
3521 && cats[a][i]->type != CCT_SUBTOTAL
3522 && cats[a][i]->type != CCT_POSTCOMPUTE)
3523 hash = value_hash (case_data (c, nest->vars[i]),
3524 var_get_width (nest->vars[i]), hash);
3530 struct ctables_cell *cell;
3531 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3533 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3535 const struct ctables_nest *nest = s->nests[a];
3536 for (size_t i = 0; i < nest->n; i++)
3537 if (i != nest->scale_idx
3538 && (cats[a][i] != cell->axes[a].cvs[i].category
3539 || (cats[a][i]->type != CCT_TOTAL
3540 && cats[a][i]->type != CCT_SUBTOTAL
3541 && cats[a][i]->type != CCT_POSTCOMPUTE
3542 && !value_equal (case_data (c, nest->vars[i]),
3543 &cell->axes[a].cvs[i].value,
3544 var_get_width (nest->vars[i])))))
3553 cell = xmalloc (sizeof *cell);
3556 cell->omit_domains = 0;
3557 cell->postcompute = false;
3558 //struct string name = DS_EMPTY_INITIALIZER;
3559 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3561 const struct ctables_nest *nest = s->nests[a];
3562 cell->axes[a].cvs = (nest->n
3563 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3565 for (size_t i = 0; i < nest->n; i++)
3567 const struct ctables_category *cat = cats[a][i];
3568 const struct variable *var = nest->vars[i];
3569 const union value *value = case_data (c, var);
3570 if (i != nest->scale_idx)
3572 const struct ctables_category *subtotal = cat->subtotal;
3573 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3576 if (cat->type == CCT_TOTAL
3577 || cat->type == CCT_SUBTOTAL
3578 || cat->type == CCT_POSTCOMPUTE)
3580 /* XXX these should be more encompassing I think.*/
3584 case PIVOT_AXIS_COLUMN:
3585 cell->omit_domains |= ((1u << CTDT_TABLE) |
3586 (1u << CTDT_LAYER) |
3587 (1u << CTDT_LAYERCOL) |
3588 (1u << CTDT_SUBTABLE) |
3591 case PIVOT_AXIS_ROW:
3592 cell->omit_domains |= ((1u << CTDT_TABLE) |
3593 (1u << CTDT_LAYER) |
3594 (1u << CTDT_LAYERROW) |
3595 (1u << CTDT_SUBTABLE) |
3598 case PIVOT_AXIS_LAYER:
3599 cell->omit_domains |= ((1u << CTDT_TABLE) |
3600 (1u << CTDT_LAYER));
3604 if (cat->type == CCT_POSTCOMPUTE)
3605 cell->postcompute = true;
3608 cell->axes[a].cvs[i].category = cat;
3609 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3612 if (i != nest->scale_idx)
3614 if (!ds_is_empty (&name))
3615 ds_put_cstr (&name, ", ");
3616 char *value_s = data_out (value, var_get_encoding (var),
3617 var_get_print_format (var),
3618 settings_get_fmt_settings ());
3619 if (cat->type == CCT_TOTAL
3620 || cat->type == CCT_SUBTOTAL
3621 || cat->type == CCT_POSTCOMPUTE)
3622 ds_put_format (&name, "%s=total", var_get_name (var));
3624 ds_put_format (&name, "%s=%s", var_get_name (var),
3625 value_s + strspn (value_s, " "));
3631 //cell->name = ds_steal_cstr (&name);
3633 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3634 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3635 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3636 for (size_t i = 0; i < specs->n; i++)
3637 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3638 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3639 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3640 hmap_insert (&s->cells, &cell->node, hash);
3645 is_scale_missing (const struct ctables_summary_spec_set *specs,
3646 const struct ccase *c)
3648 if (!specs->is_scale)
3651 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3654 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3656 const struct variable *var = specs->listwise_vars[i];
3657 if (var_is_num_missing (var, case_num (c, var)))
3665 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3666 const struct ctables_category *cats[PIVOT_N_AXES][10],
3667 bool is_missing, bool excluded_missing,
3668 double d_weight, double e_weight)
3670 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3671 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3673 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3675 bool scale_missing = is_scale_missing (specs, c);
3676 for (size_t i = 0; i < specs->n; i++)
3677 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3678 specs->var, case_data (c, specs->var), specs->is_scale,
3679 scale_missing, is_missing, excluded_missing,
3680 d_weight, e_weight);
3681 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3682 if (!(cell->omit_domains && (1u << dt)))
3684 struct ctables_domain *d = cell->domains[dt];
3685 d->d_total += d_weight;
3686 d->e_total += e_weight;
3688 if (!excluded_missing)
3690 d->d_count += d_weight;
3691 d->e_count += e_weight;
3696 d->d_valid += d_weight;
3697 d->e_valid += e_weight;
3700 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3702 /* XXX listwise_missing??? */
3703 const struct variable *var = s->table->sum_vars[i];
3704 double addend = case_num (c, var);
3705 if (!var_is_num_missing (var, addend))
3707 struct ctables_sum *sum = &d->sums[i];
3708 sum->e_sum += addend * e_weight;
3709 sum->u_sum += addend;
3717 recurse_totals (struct ctables_section *s, const struct ccase *c,
3718 const struct ctables_category *cats[PIVOT_N_AXES][10],
3719 bool is_missing, bool excluded_missing,
3720 double d_weight, double e_weight,
3721 enum pivot_axis_type start_axis, size_t start_nest)
3723 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3725 const struct ctables_nest *nest = s->nests[a];
3726 for (size_t i = start_nest; i < nest->n; i++)
3728 if (i == nest->scale_idx)
3731 const struct variable *var = nest->vars[i];
3733 const struct ctables_category *total = ctables_categories_total (
3734 s->table->categories[var_get_dict_index (var)]);
3737 const struct ctables_category *save = cats[a][i];
3739 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3740 d_weight, e_weight);
3741 recurse_totals (s, c, cats, is_missing, excluded_missing,
3742 d_weight, e_weight, a, i + 1);
3751 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3752 const struct ctables_category *cats[PIVOT_N_AXES][10],
3753 bool is_missing, bool excluded_missing,
3754 double d_weight, double e_weight,
3755 enum pivot_axis_type start_axis, size_t start_nest)
3757 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3759 const struct ctables_nest *nest = s->nests[a];
3760 for (size_t i = start_nest; i < nest->n; i++)
3762 if (i == nest->scale_idx)
3765 const struct ctables_category *save = cats[a][i];
3768 cats[a][i] = save->subtotal;
3769 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3770 d_weight, e_weight);
3771 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3772 d_weight, e_weight, a, i + 1);
3781 ctables_add_occurrence (const struct variable *var,
3782 const union value *value,
3783 struct hmap *occurrences)
3785 int width = var_get_width (var);
3786 unsigned int hash = value_hash (value, width, 0);
3788 struct ctables_occurrence *o;
3789 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3791 if (value_equal (value, &o->value, width))
3794 o = xmalloc (sizeof *o);
3795 value_clone (&o->value, value, width);
3796 hmap_insert (occurrences, &o->node, hash);
3800 ctables_cell_insert (struct ctables_section *s,
3801 const struct ccase *c,
3802 double d_weight, double e_weight)
3804 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3806 /* Does at least one categorical variable have a missing value in an included
3807 or excluded category? */
3808 bool is_missing = false;
3810 /* Does at least one categorical variable have a missing value in an excluded
3812 bool excluded_missing = false;
3814 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3816 const struct ctables_nest *nest = s->nests[a];
3817 for (size_t i = 0; i < nest->n; i++)
3819 if (i == nest->scale_idx)
3822 const struct variable *var = nest->vars[i];
3823 const union value *value = case_data (c, var);
3825 bool var_missing = var_is_value_missing (var, value) != 0;
3829 cats[a][i] = ctables_categories_match (
3830 s->table->categories[var_get_dict_index (var)], value, var);
3836 static const struct ctables_category cct_excluded_missing = {
3837 .type = CCT_EXCLUDED_MISSING,
3840 cats[a][i] = &cct_excluded_missing;
3841 excluded_missing = true;
3846 if (!excluded_missing)
3847 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3849 const struct ctables_nest *nest = s->nests[a];
3850 for (size_t i = 0; i < nest->n; i++)
3851 if (i != nest->scale_idx)
3853 const struct variable *var = nest->vars[i];
3854 const union value *value = case_data (c, var);
3855 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3859 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3860 d_weight, e_weight);
3862 //if (!excluded_missing)
3864 recurse_totals (s, c, cats, is_missing, excluded_missing,
3865 d_weight, e_weight, 0, 0);
3866 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3867 d_weight, e_weight, 0, 0);
3873 const struct ctables_summary_spec_set *set;
3878 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3880 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3881 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3882 if (as->function != bs->function)
3883 return as->function > bs->function ? 1 : -1;
3884 else if (as->percentile != bs->percentile)
3885 return as->percentile < bs->percentile ? 1 : -1;
3887 const char *as_label = as->label ? as->label : "";
3888 const char *bs_label = bs->label ? bs->label : "";
3889 return strcmp (as_label, bs_label);
3892 static struct pivot_value *
3893 ctables_category_create_label__ (const struct ctables_category *cat,
3894 const struct variable *var,
3895 const union value *value)
3897 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3898 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3899 : pivot_value_new_var_value (var, value));
3902 static struct pivot_value *
3903 ctables_postcompute_label (const struct ctables_categories *cats,
3904 const struct ctables_category *cat,
3905 const struct variable *var,
3906 const union value *value)
3908 struct substring in = ss_cstr (cat->pc->label);
3909 struct substring target = ss_cstr (")LABEL[");
3911 struct string out = DS_EMPTY_INITIALIZER;
3914 size_t chunk = ss_find_substring (in, target);
3915 if (chunk == SIZE_MAX)
3917 if (ds_is_empty (&out))
3918 return pivot_value_new_user_text (in.string, in.length);
3921 ds_put_substring (&out, in);
3922 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3926 ds_put_substring (&out, ss_head (in, chunk));
3927 ss_advance (&in, chunk + target.length);
3929 struct substring idx_s;
3930 if (!ss_get_until (&in, ']', &idx_s))
3933 long int idx = strtol (idx_s.string, &tail, 10);
3934 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3937 struct ctables_category *cat2 = &cats->cats[idx - 1];
3938 struct pivot_value *label2
3939 = ctables_category_create_label__ (cat2, var, value);
3940 char *label2_s = pivot_value_to_string_defaults (label2);
3941 ds_put_cstr (&out, label2_s);
3943 pivot_value_destroy (label2);
3948 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3951 static struct pivot_value *
3952 ctables_category_create_label (const struct ctables_categories *cats,
3953 const struct ctables_category *cat,
3954 const struct variable *var,
3955 const union value *value)
3957 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3958 ? ctables_postcompute_label (cats, cat, var, value)
3959 : ctables_category_create_label__ (cat, var, value));
3962 static struct ctables_value *
3963 ctables_value_find__ (struct ctables_table *t, const union value *value,
3964 int width, unsigned int hash)
3966 struct ctables_value *clv;
3967 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3968 hash, &t->clabels_values_map)
3969 if (value_equal (value, &clv->value, width))
3975 ctables_value_insert (struct ctables_table *t, const union value *value,
3978 unsigned int hash = value_hash (value, width, 0);
3979 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3982 clv = xmalloc (sizeof *clv);
3983 value_clone (&clv->value, value, width);
3984 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3988 static struct ctables_value *
3989 ctables_value_find (struct ctables_table *t,
3990 const union value *value, int width)
3992 return ctables_value_find__ (t, value, width,
3993 value_hash (value, width, 0));
3997 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3998 size_t ix[PIVOT_N_AXES])
4000 if (a < PIVOT_N_AXES)
4002 size_t limit = MAX (t->stacks[a].n, 1);
4003 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4004 ctables_table_add_section (t, a + 1, ix);
4008 struct ctables_section *s = &t->sections[t->n_sections++];
4009 *s = (struct ctables_section) {
4011 .cells = HMAP_INITIALIZER (s->cells),
4013 for (a = 0; a < PIVOT_N_AXES; a++)
4016 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4018 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4019 for (size_t i = 0; i < nest->n; i++)
4020 hmap_init (&s->occurrences[a][i]);
4022 for (size_t i = 0; i < N_CTDTS; i++)
4023 hmap_init (&s->domains[i]);
4028 ctpo_add (double a, double b)
4034 ctpo_sub (double a, double b)
4040 ctpo_mul (double a, double b)
4046 ctpo_div (double a, double b)
4048 return b ? a / b : SYSMIS;
4052 ctpo_pow (double a, double b)
4054 int save_errno = errno;
4056 double result = pow (a, b);
4064 ctpo_neg (double a, double b UNUSED)
4069 struct ctables_pcexpr_evaluate_ctx
4071 const struct ctables_cell *cell;
4072 const struct ctables_section *section;
4073 const struct ctables_categories *cats;
4074 enum pivot_axis_type pc_a;
4077 enum fmt_type parse_format;
4080 static double ctables_pcexpr_evaluate (
4081 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4084 ctables_pcexpr_evaluate_nonterminal (
4085 const struct ctables_pcexpr_evaluate_ctx *ctx,
4086 const struct ctables_pcexpr *e, size_t n_args,
4087 double evaluate (double, double))
4089 double args[2] = { 0, 0 };
4090 for (size_t i = 0; i < n_args; i++)
4092 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4093 if (!isfinite (args[i]) || args[i] == SYSMIS)
4096 return evaluate (args[0], args[1]);
4100 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4101 const struct ctables_cell_value *pc_cv)
4103 const struct ctables_section *s = ctx->section;
4106 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4108 const struct ctables_nest *nest = s->nests[a];
4109 for (size_t i = 0; i < nest->n; i++)
4110 if (i != nest->scale_idx)
4112 const struct ctables_cell_value *cv
4113 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4114 : &ctx->cell->axes[a].cvs[i]);
4115 hash = hash_pointer (cv->category, hash);
4116 if (cv->category->type != CCT_TOTAL
4117 && cv->category->type != CCT_SUBTOTAL
4118 && cv->category->type != CCT_POSTCOMPUTE)
4119 hash = value_hash (&cv->value,
4120 var_get_width (nest->vars[i]), hash);
4124 struct ctables_cell *tc;
4125 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4127 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4129 const struct ctables_nest *nest = s->nests[a];
4130 for (size_t i = 0; i < nest->n; i++)
4131 if (i != nest->scale_idx)
4133 const struct ctables_cell_value *p_cv
4134 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4135 : &ctx->cell->axes[a].cvs[i]);
4136 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4137 if (p_cv->category != t_cv->category
4138 || (p_cv->category->type != CCT_TOTAL
4139 && p_cv->category->type != CCT_SUBTOTAL
4140 && p_cv->category->type != CCT_POSTCOMPUTE
4141 && !value_equal (&p_cv->value,
4143 var_get_width (nest->vars[i]))))
4155 const struct ctables_table *t = s->table;
4156 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4157 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4158 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4159 &specs->specs[ctx->summary_idx]);
4163 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4164 const struct ctables_pcexpr *e)
4171 case CTPO_CAT_NRANGE:
4172 case CTPO_CAT_SRANGE:
4174 struct ctables_cell_value cv = {
4175 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4177 assert (cv.category != NULL);
4179 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4180 const struct ctables_occurrence *o;
4183 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4184 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4185 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4187 cv.value = o->value;
4188 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4193 case CTPO_CAT_NUMBER:
4194 case CTPO_CAT_MISSING:
4195 case CTPO_CAT_OTHERNM:
4196 case CTPO_CAT_SUBTOTAL:
4197 case CTPO_CAT_TOTAL:
4199 struct ctables_cell_value cv = {
4200 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4201 .value = { .f = e->number },
4203 assert (cv.category != NULL);
4204 return ctables_pcexpr_evaluate_category (ctx, &cv);
4207 case CTPO_CAT_STRING:
4209 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4211 if (width > e->string.length)
4213 s = xmalloc (width);
4214 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4216 struct ctables_cell_value cv = {
4217 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4218 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4220 assert (cv.category != NULL);
4221 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4227 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4230 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4233 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4236 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4239 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4242 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4248 static const struct ctables_category *
4249 ctables_cell_postcompute (const struct ctables_section *s,
4250 const struct ctables_cell *cell,
4251 enum pivot_axis_type *pc_a_p,
4254 assert (cell->postcompute);
4255 const struct ctables_category *pc_cat = NULL;
4256 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4257 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4259 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4260 if (cv->category->type == CCT_POSTCOMPUTE)
4264 /* Multiple postcomputes cross each other. The value is
4269 pc_cat = cv->category;
4273 *pc_a_idx_p = pc_a_idx;
4277 assert (pc_cat != NULL);
4282 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4283 const struct ctables_cell *cell,
4284 const struct ctables_summary_spec *ss,
4285 struct fmt_spec *format,
4286 bool *is_ctables_format,
4289 enum pivot_axis_type pc_a = 0;
4290 size_t pc_a_idx = 0;
4291 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4292 s, cell, &pc_a, &pc_a_idx);
4296 const struct ctables_postcompute *pc = pc_cat->pc;
4299 for (size_t i = 0; i < pc->specs->n; i++)
4301 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4302 if (ss->function == ss2->function
4303 && ss->percentile == ss2->percentile)
4305 *format = ss2->format;
4306 *is_ctables_format = ss2->is_ctables_format;
4312 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4313 const struct ctables_categories *cats = s->table->categories[
4314 var_get_dict_index (var)];
4315 struct ctables_pcexpr_evaluate_ctx ctx = {
4320 .pc_a_idx = pc_a_idx,
4321 .summary_idx = summary_idx,
4322 .parse_format = pc_cat->parse_format,
4324 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4328 ctables_format (double d, const struct fmt_spec *format,
4329 const struct fmt_settings *settings)
4331 const union value v = { .f = d };
4332 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4334 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4335 produce the results we want for negative numbers, putting the negative
4336 sign in the wrong spot, before the prefix instead of after it. We can't,
4337 in fact, produce the desired results using a custom-currency
4338 specification. Instead, we postprocess the output, moving the negative
4341 NEQUAL: "-N=3" => "N=-3"
4342 PAREN: "-(3)" => "(-3)"
4343 PCTPAREN: "-(3%)" => "(-3%)"
4345 This transformation doesn't affect NEGPAREN. */
4346 char *minus_src = strchr (s, '-');
4347 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4349 char *n_equals = strstr (s, "N=");
4350 char *lparen = strchr (s, '(');
4351 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4353 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4359 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4361 struct pivot_table *pt = pivot_table_create__ (
4363 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4364 : pivot_value_new_text (N_("Custom Tables"))),
4367 pivot_table_set_caption (
4368 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4370 pivot_table_set_corner_text (
4371 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4373 bool summary_dimension = (t->summary_axis != t->slabels_axis
4374 || (!t->slabels_visible
4375 && t->summary_specs.n > 1));
4376 if (summary_dimension)
4378 struct pivot_dimension *d = pivot_dimension_create (
4379 pt, t->slabels_axis, N_("Statistics"));
4380 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4381 if (!t->slabels_visible)
4382 d->hide_all_labels = true;
4383 for (size_t i = 0; i < specs->n; i++)
4384 pivot_category_create_leaf (
4385 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4388 bool categories_dimension = t->clabels_example != NULL;
4389 if (categories_dimension)
4391 struct pivot_dimension *d = pivot_dimension_create (
4392 pt, t->label_axis[t->clabels_from_axis],
4393 t->clabels_from_axis == PIVOT_AXIS_ROW
4394 ? N_("Row Categories")
4395 : N_("Column Categories"));
4396 const struct variable *var = t->clabels_example;
4397 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4398 for (size_t i = 0; i < t->n_clabels_values; i++)
4400 const struct ctables_value *value = t->clabels_values[i];
4401 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4402 assert (cat != NULL);
4403 pivot_category_create_leaf (d->root, ctables_category_create_label (
4404 c, cat, t->clabels_example,
4409 pivot_table_set_look (pt, ct->look);
4410 struct pivot_dimension *d[PIVOT_N_AXES];
4411 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4413 static const char *names[] = {
4414 [PIVOT_AXIS_ROW] = N_("Rows"),
4415 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4416 [PIVOT_AXIS_LAYER] = N_("Layers"),
4418 d[a] = (t->axes[a] || a == t->summary_axis
4419 ? pivot_dimension_create (pt, a, names[a])
4424 assert (t->axes[a]);
4426 for (size_t i = 0; i < t->stacks[a].n; i++)
4428 struct ctables_nest *nest = &t->stacks[a].nests[i];
4429 struct ctables_section **sections = xnmalloc (t->n_sections,
4431 size_t n_sections = 0;
4433 size_t n_total_cells = 0;
4434 size_t max_depth = 0;
4435 for (size_t j = 0; j < t->n_sections; j++)
4436 if (t->sections[j].nests[a] == nest)
4438 struct ctables_section *s = &t->sections[j];
4439 sections[n_sections++] = s;
4440 n_total_cells += hmap_count (&s->cells);
4442 size_t depth = s->nests[a]->n;
4443 max_depth = MAX (depth, max_depth);
4446 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4448 size_t n_sorted = 0;
4450 for (size_t j = 0; j < n_sections; j++)
4452 struct ctables_section *s = sections[j];
4454 struct ctables_cell *cell;
4455 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4457 sorted[n_sorted++] = cell;
4458 assert (n_sorted <= n_total_cells);
4461 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4462 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4465 if (a == PIVOT_AXIS_ROW)
4467 size_t ids[N_CTDTS];
4468 memset (ids, 0, sizeof ids);
4469 for (size_t j = 0; j < n_sorted; j++)
4471 struct ctables_cell *cell = sorted[j];
4472 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4474 struct ctables_domain *domain = cell->domains[dt];
4475 if (!domain->sequence)
4476 domain->sequence = ++ids[dt];
4483 for (size_t j = 0; j < n_sorted; j++)
4485 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4490 struct ctables_level
4492 enum ctables_level_type
4494 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4495 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4496 CTL_SUMMARY, /* Summary functions. */
4500 enum settings_value_show vlabel; /* CTL_VAR only. */
4503 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4504 size_t n_levels = 0;
4505 for (size_t k = 0; k < nest->n; k++)
4507 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4508 if (vlabel != CTVL_NONE)
4510 levels[n_levels++] = (struct ctables_level) {
4512 .vlabel = (enum settings_value_show) vlabel,
4517 if (nest->scale_idx != k
4518 && (k != nest->n - 1 || t->label_axis[a] == a))
4520 levels[n_levels++] = (struct ctables_level) {
4521 .type = CTL_CATEGORY,
4527 if (!summary_dimension && a == t->slabels_axis)
4529 levels[n_levels++] = (struct ctables_level) {
4530 .type = CTL_SUMMARY,
4531 .var_idx = SIZE_MAX,
4535 /* Pivot categories:
4537 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4538 - category for nest->vars[0], if nest->scale_idx != 0
4539 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4540 - category for nest->vars[1], if nest->scale_idx != 1
4542 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4543 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4544 - summary function, if 'a == t->slabels_axis && a ==
4547 Additional dimensions:
4549 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4551 - If 't->label_axis[b] == a' for some 'b != a', add a category
4556 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4558 for (size_t j = 0; j < n_sorted; j++)
4560 struct ctables_cell *cell = sorted[j];
4561 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4563 size_t n_common = 0;
4566 for (; n_common < n_levels; n_common++)
4568 const struct ctables_level *level = &levels[n_common];
4569 if (level->type == CTL_CATEGORY)
4571 size_t var_idx = level->var_idx;
4572 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4573 if (prev->axes[a].cvs[var_idx].category != c)
4575 else if (c->type != CCT_SUBTOTAL
4576 && c->type != CCT_TOTAL
4577 && c->type != CCT_POSTCOMPUTE
4578 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4579 &cell->axes[a].cvs[var_idx].value,
4580 var_get_type (nest->vars[var_idx])))
4586 for (size_t k = n_common; k < n_levels; k++)
4588 const struct ctables_level *level = &levels[k];
4589 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4590 if (level->type == CTL_SUMMARY)
4592 assert (k == n_levels - 1);
4594 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4595 for (size_t m = 0; m < specs->n; m++)
4597 int leaf = pivot_category_create_leaf (
4598 parent, ctables_summary_label (&specs->specs[m],
4606 const struct variable *var = nest->vars[level->var_idx];
4607 struct pivot_value *label;
4608 if (level->type == CTL_VAR)
4610 label = pivot_value_new_variable (var);
4611 label->variable.show = level->vlabel;
4613 else if (level->type == CTL_CATEGORY)
4615 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4616 label = ctables_category_create_label (
4617 t->categories[var_get_dict_index (var)],
4618 cv->category, var, &cv->value);
4623 if (k == n_levels - 1)
4624 prev_leaf = pivot_category_create_leaf (parent, label);
4626 groups[k] = pivot_category_create_group__ (parent, label);
4630 cell->axes[a].leaf = prev_leaf;
4640 size_t n_total_cells = 0;
4641 for (size_t j = 0; j < t->n_sections; j++)
4642 n_total_cells += hmap_count (&t->sections[j].cells);
4644 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4645 size_t n_sorted = 0;
4646 for (size_t j = 0; j < t->n_sections; j++)
4648 const struct ctables_section *s = &t->sections[j];
4649 struct ctables_cell *cell;
4650 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4652 sorted[n_sorted++] = cell;
4654 assert (n_sorted <= n_total_cells);
4655 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4657 size_t ids[N_CTDTS];
4658 memset (ids, 0, sizeof ids);
4659 for (size_t j = 0; j < n_sorted; j++)
4661 struct ctables_cell *cell = sorted[j];
4662 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4664 struct ctables_domain *domain = cell->domains[dt];
4665 if (!domain->sequence)
4666 domain->sequence = ++ids[dt];
4673 for (size_t i = 0; i < t->n_sections; i++)
4675 struct ctables_section *s = &t->sections[i];
4677 struct ctables_cell *cell;
4678 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4683 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4684 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4685 for (size_t j = 0; j < specs->n; j++)
4688 size_t n_dindexes = 0;
4690 if (summary_dimension)
4691 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4693 if (categories_dimension)
4695 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4696 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4697 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4698 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4701 dindexes[n_dindexes++] = ctv->leaf;
4704 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4707 int leaf = cell->axes[a].leaf;
4708 if (a == t->summary_axis && !summary_dimension)
4710 dindexes[n_dindexes++] = leaf;
4713 const struct ctables_summary_spec *ss = &specs->specs[j];
4715 struct fmt_spec format = specs->specs[j].format;
4716 bool is_ctables_format = ss->is_ctables_format;
4717 double d = (cell->postcompute
4718 ? ctables_cell_calculate_postcompute (
4719 s, cell, ss, &format, &is_ctables_format, j)
4720 : ctables_summary_value (cell, &cell->summaries[j],
4723 struct pivot_value *value;
4724 if (ct->hide_threshold != 0
4725 && d < ct->hide_threshold
4726 && ctables_summary_function_is_count (ss->function))
4728 value = pivot_value_new_user_text_nocopy (
4729 xasprintf ("<%d", ct->hide_threshold));
4731 else if (d == 0 && ct->zero)
4732 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4733 else if (d == SYSMIS && ct->missing)
4734 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4735 else if (is_ctables_format)
4736 value = pivot_value_new_user_text_nocopy (
4737 ctables_format (d, &format, &ct->ctables_formats));
4740 value = pivot_value_new_number (d);
4741 value->numeric.format = format;
4743 /* XXX should text values be right-justified? */
4744 pivot_table_put (pt, dindexes, n_dindexes, value);
4749 pivot_table_submit (pt);
4753 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4755 enum pivot_axis_type label_pos = t->label_axis[a];
4759 t->clabels_from_axis = a;
4761 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4762 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4764 const struct ctables_stack *stack = &t->stacks[a];
4768 const struct ctables_nest *n0 = &stack->nests[0];
4771 assert (stack->n == 1);
4775 const struct variable *v0 = n0->vars[n0->n - 1];
4776 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4777 t->clabels_example = v0;
4779 for (size_t i = 0; i < c0->n_cats; i++)
4780 if (c0->cats[i].type == CCT_FUNCTION)
4782 msg (SE, _("%s=%s is not allowed with sorting based "
4783 "on a summary function."),
4784 subcommand_name, pos_name);
4787 if (n0->n - 1 == n0->scale_idx)
4789 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4790 "but %s is a scale variable."),
4791 subcommand_name, pos_name, var_get_name (v0));
4795 for (size_t i = 1; i < stack->n; i++)
4797 const struct ctables_nest *ni = &stack->nests[i];
4799 const struct variable *vi = ni->vars[ni->n - 1];
4800 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4802 if (ni->n - 1 == ni->scale_idx)
4804 msg (SE, _("%s=%s requires the variables to be moved to be "
4805 "categorical, but %s is a scale variable."),
4806 subcommand_name, pos_name, var_get_name (vi));
4809 if (var_get_width (v0) != var_get_width (vi))
4811 msg (SE, _("%s=%s requires the variables to be "
4812 "moved to have the same width, but %s has "
4813 "width %d and %s has width %d."),
4814 subcommand_name, pos_name,
4815 var_get_name (v0), var_get_width (v0),
4816 var_get_name (vi), var_get_width (vi));
4819 if (!val_labs_equal (var_get_value_labels (v0),
4820 var_get_value_labels (vi)))
4822 msg (SE, _("%s=%s requires the variables to be "
4823 "moved to have the same value labels, but %s "
4824 "and %s have different value labels."),
4825 subcommand_name, pos_name,
4826 var_get_name (v0), var_get_name (vi));
4829 if (!ctables_categories_equal (c0, ci))
4831 msg (SE, _("%s=%s requires the variables to be "
4832 "moved to have the same category "
4833 "specifications, but %s and %s have different "
4834 "category specifications."),
4835 subcommand_name, pos_name,
4836 var_get_name (v0), var_get_name (vi));
4845 add_sum_var (struct variable *var,
4846 struct variable ***sum_vars, size_t *n, size_t *allocated)
4848 for (size_t i = 0; i < *n; i++)
4849 if (var == (*sum_vars)[i])
4852 if (*n >= *allocated)
4853 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4854 (*sum_vars)[*n] = var;
4859 enumerate_sum_vars (const struct ctables_axis *a,
4860 struct variable ***sum_vars, size_t *n, size_t *allocated)
4868 for (size_t i = 0; i < N_CSVS; i++)
4869 for (size_t j = 0; j < a->specs[i].n; j++)
4871 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4872 if (ctables_function_is_pctsum (spec->function))
4873 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4879 for (size_t i = 0; i < 2; i++)
4880 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4886 ctables_prepare_table (struct ctables_table *t)
4888 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4891 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4893 for (size_t j = 0; j < t->stacks[a].n; j++)
4895 struct ctables_nest *nest = &t->stacks[a].nests[j];
4896 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4898 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4899 nest->n_domains[dt] = 0;
4901 for (size_t k = 0; k < nest->n; k++)
4903 if (k == nest->scale_idx)
4912 if (a != PIVOT_AXIS_LAYER)
4919 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4920 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4921 : a == PIVOT_AXIS_ROW)
4923 if (k == nest->n - 1
4924 || (nest->scale_idx == nest->n - 1
4925 && k == nest->n - 2))
4931 if (a == PIVOT_AXIS_COLUMN)
4936 if (a == PIVOT_AXIS_ROW)
4941 nest->domains[dt][nest->n_domains[dt]++] = k;
4948 struct ctables_nest *nest = xmalloc (sizeof *nest);
4949 *nest = (struct ctables_nest) { .n = 0 };
4950 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4952 /* There's no point in moving labels away from an axis that has no
4953 labels, so avoid dealing with the special cases around that. */
4954 t->label_axis[a] = a;
4957 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4958 for (size_t i = 0; i < stack->n; i++)
4960 struct ctables_nest *nest = &stack->nests[i];
4961 if (!nest->specs[CSV_CELL].n)
4963 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4964 specs->specs = xmalloc (sizeof *specs->specs);
4967 enum ctables_summary_function function
4968 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4970 *specs->specs = (struct ctables_summary_spec) {
4971 .function = function,
4972 .format = ctables_summary_default_format (function, specs->var),
4975 specs->var = nest->vars[0];
4977 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4978 &nest->specs[CSV_CELL]);
4980 else if (!nest->specs[CSV_TOTAL].n)
4981 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4982 &nest->specs[CSV_CELL]);
4984 if (t->ctables->smissing_listwise)
4986 struct variable **listwise_vars = NULL;
4988 size_t allocated = 0;
4990 for (size_t j = nest->group_head; j < stack->n; j++)
4992 const struct ctables_nest *other_nest = &stack->nests[j];
4993 if (other_nest->group_head != nest->group_head)
4996 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4999 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5000 sizeof *listwise_vars);
5001 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5004 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5007 listwise_vars = xmemdup (listwise_vars,
5008 n * sizeof *listwise_vars);
5009 nest->specs[sv].listwise_vars = listwise_vars;
5010 nest->specs[sv].n_listwise_vars = n;
5015 struct ctables_summary_spec_set *merged = &t->summary_specs;
5016 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5018 for (size_t j = 0; j < stack->n; j++)
5020 const struct ctables_nest *nest = &stack->nests[j];
5022 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5023 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5028 struct merge_item min = items[0];
5029 for (size_t j = 1; j < n_left; j++)
5030 if (merge_item_compare_3way (&items[j], &min) < 0)
5033 if (merged->n >= merged->allocated)
5034 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5035 sizeof *merged->specs);
5036 merged->specs[merged->n++] = min.set->specs[min.ofs];
5038 for (size_t j = 0; j < n_left; )
5040 if (merge_item_compare_3way (&items[j], &min) == 0)
5042 struct merge_item *item = &items[j];
5043 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5044 if (++item->ofs >= item->set->n)
5046 items[j] = items[--n_left];
5056 for (size_t j = 0; j < merged->n; j++)
5057 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5059 for (size_t j = 0; j < stack->n; j++)
5061 const struct ctables_nest *nest = &stack->nests[j];
5062 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5064 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5065 for (size_t k = 0; k < specs->n; k++)
5066 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5067 specs->specs[k].axis_idx);
5073 size_t allocated_sum_vars = 0;
5074 enumerate_sum_vars (t->axes[t->summary_axis],
5075 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5077 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5078 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5082 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5083 enum pivot_axis_type a)
5085 struct ctables_stack *stack = &t->stacks[a];
5086 for (size_t i = 0; i < stack->n; i++)
5088 const struct ctables_nest *nest = &stack->nests[i];
5089 const struct variable *var = nest->vars[nest->n - 1];
5090 const union value *value = case_data (c, var);
5092 if (var_is_numeric (var) && value->f == SYSMIS)
5095 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5097 ctables_value_insert (t, value, var_get_width (var));
5102 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5104 const struct ctables_value *const *ap = a_;
5105 const struct ctables_value *const *bp = b_;
5106 const struct ctables_value *a = *ap;
5107 const struct ctables_value *b = *bp;
5108 const int *width = width_;
5109 return value_compare_3way (&a->value, &b->value, *width);
5113 ctables_sort_clabels_values (struct ctables_table *t)
5115 const struct variable *v0 = t->clabels_example;
5116 int width = var_get_width (v0);
5118 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5121 const struct val_labs *val_labs = var_get_value_labels (v0);
5122 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5123 vl = val_labs_next (val_labs, vl))
5124 if (ctables_categories_match (c0, &vl->value, v0))
5125 ctables_value_insert (t, &vl->value, width);
5128 size_t n = hmap_count (&t->clabels_values_map);
5129 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5131 struct ctables_value *clv;
5133 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5134 t->clabels_values[i++] = clv;
5135 t->n_clabels_values = n;
5138 sort (t->clabels_values, n, sizeof *t->clabels_values,
5139 compare_clabels_values_3way, &width);
5141 for (size_t i = 0; i < n; i++)
5142 t->clabels_values[i]->leaf = i;
5146 ctables_add_category_occurrences (const struct variable *var,
5147 struct hmap *occurrences,
5148 const struct ctables_categories *cats)
5150 const struct val_labs *val_labs = var_get_value_labels (var);
5152 for (size_t i = 0; i < cats->n_cats; i++)
5154 const struct ctables_category *c = &cats->cats[i];
5158 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5164 int width = var_get_width (var);
5166 value_init (&value, width);
5167 value_copy_buf_rpad (&value, width,
5168 CHAR_CAST (uint8_t *, c->string.string),
5169 c->string.length, ' ');
5170 ctables_add_occurrence (var, &value, occurrences);
5171 value_destroy (&value, width);
5176 assert (var_is_numeric (var));
5177 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5178 vl = val_labs_next (val_labs, vl))
5179 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5180 ctables_add_occurrence (var, &vl->value, occurrences);
5184 assert (var_is_alpha (var));
5185 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5186 vl = val_labs_next (val_labs, vl))
5187 if (in_string_range (&vl->value, var, c->srange))
5188 ctables_add_occurrence (var, &vl->value, occurrences);
5192 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5193 vl = val_labs_next (val_labs, vl))
5194 if (var_is_value_missing (var, &vl->value))
5195 ctables_add_occurrence (var, &vl->value, occurrences);
5199 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5200 vl = val_labs_next (val_labs, vl))
5201 ctables_add_occurrence (var, &vl->value, occurrences);
5204 case CCT_POSTCOMPUTE:
5214 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5215 vl = val_labs_next (val_labs, vl))
5216 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5217 ctables_add_occurrence (var, &vl->value, occurrences);
5220 case CCT_EXCLUDED_MISSING:
5227 ctables_section_recurse_add_empty_categories (
5228 struct ctables_section *s,
5229 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5230 enum pivot_axis_type a, size_t a_idx)
5232 if (a >= PIVOT_N_AXES)
5233 ctables_cell_insert__ (s, c, cats);
5234 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5235 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5238 const struct variable *var = s->nests[a]->vars[a_idx];
5239 const struct ctables_categories *categories = s->table->categories[
5240 var_get_dict_index (var)];
5241 int width = var_get_width (var);
5242 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5243 const struct ctables_occurrence *o;
5244 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5246 union value *value = case_data_rw (c, var);
5247 value_destroy (value, width);
5248 value_clone (value, &o->value, width);
5249 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5250 assert (cats[a][a_idx] != NULL);
5251 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5254 for (size_t i = 0; i < categories->n_cats; i++)
5256 const struct ctables_category *cat = &categories->cats[i];
5257 if (cat->type == CCT_POSTCOMPUTE)
5259 cats[a][a_idx] = cat;
5260 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5267 ctables_section_add_empty_categories (struct ctables_section *s)
5269 bool show_empty = false;
5270 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5272 for (size_t k = 0; k < s->nests[a]->n; k++)
5273 if (k != s->nests[a]->scale_idx)
5275 const struct variable *var = s->nests[a]->vars[k];
5276 const struct ctables_categories *cats = s->table->categories[
5277 var_get_dict_index (var)];
5278 if (cats->show_empty)
5281 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5287 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5288 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5289 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5294 ctables_section_clear (struct ctables_section *s)
5296 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5298 const struct ctables_nest *nest = s->nests[a];
5299 for (size_t i = 0; i < nest->n; i++)
5300 if (i != nest->scale_idx)
5302 const struct variable *var = nest->vars[i];
5303 int width = var_get_width (var);
5304 struct ctables_occurrence *o, *next;
5305 struct hmap *map = &s->occurrences[a][i];
5306 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5308 value_destroy (&o->value, width);
5309 hmap_delete (map, &o->node);
5316 struct ctables_cell *cell, *next_cell;
5317 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5319 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5321 const struct ctables_nest *nest = s->nests[a];
5322 for (size_t i = 0; i < nest->n; i++)
5323 if (i != nest->scale_idx)
5324 value_destroy (&cell->axes[a].cvs[i].value,
5325 var_get_width (nest->vars[i]));
5326 free (cell->axes[a].cvs);
5329 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5330 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5331 for (size_t i = 0; i < specs->n; i++)
5332 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5333 free (cell->summaries);
5335 hmap_delete (&s->cells, &cell->node);
5338 hmap_shrink (&s->cells);
5340 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5342 struct ctables_domain *domain, *next_domain;
5343 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5346 free (domain->sums);
5347 hmap_delete (&s->domains[dt], &domain->node);
5350 hmap_shrink (&s->domains[dt]);
5355 ctables_section_uninit (struct ctables_section *s)
5357 ctables_section_clear (s);
5359 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5361 struct ctables_nest *nest = s->nests[a];
5362 for (size_t i = 0; i < nest->n; i++)
5363 hmap_destroy (&s->occurrences[a][i]);
5364 free (s->occurrences[a]);
5367 hmap_destroy (&s->cells);
5368 for (size_t i = 0; i < N_CTDTS; i++)
5369 hmap_destroy (&s->domains[i]);
5373 ctables_table_clear (struct ctables_table *t)
5375 for (size_t i = 0; i < t->n_sections; i++)
5376 ctables_section_clear (&t->sections[i]);
5378 if (t->clabels_example)
5380 int width = var_get_width (t->clabels_example);
5381 struct ctables_value *value, *next_value;
5382 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5383 &t->clabels_values_map)
5385 value_destroy (&value->value, width);
5386 hmap_delete (&t->clabels_values_map, &value->node);
5389 hmap_shrink (&t->clabels_values_map);
5391 free (t->clabels_values);
5392 t->clabels_values = NULL;
5393 t->n_clabels_values = 0;
5398 ctables_execute (struct dataset *ds, struct casereader *input,
5401 for (size_t i = 0; i < ct->n_tables; i++)
5403 struct ctables_table *t = ct->tables[i];
5404 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5405 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5406 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5407 sizeof *t->sections);
5408 size_t ix[PIVOT_N_AXES];
5409 ctables_table_add_section (t, 0, ix);
5412 struct dictionary *dict = dataset_dict (ds);
5413 struct casegrouper *grouper
5414 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5415 ? casegrouper_create_splits (input, dict)
5416 : casegrouper_create_vars (input, NULL, 0));
5417 struct casereader *group;
5418 while (casegrouper_get_next_group (grouper, &group))
5420 /* Output SPLIT FILE variables. */
5421 struct ccase *c = casereader_peek (group, 0);
5424 output_split_file_values (ds, c);
5428 bool warn_on_invalid = true;
5429 for (c = casereader_read (group); c;
5430 case_unref (c), c = casereader_read (group))
5432 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5433 double e_weight = (ct->e_weight
5434 ? var_force_valid_weight (ct->e_weight,
5435 case_num (c, ct->e_weight),
5439 for (size_t i = 0; i < ct->n_tables; i++)
5441 struct ctables_table *t = ct->tables[i];
5443 for (size_t j = 0; j < t->n_sections; j++)
5444 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5446 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5447 if (t->label_axis[a] != a)
5448 ctables_insert_clabels_values (t, c, a);
5451 casereader_destroy (group);
5453 for (size_t i = 0; i < ct->n_tables; i++)
5455 struct ctables_table *t = ct->tables[i];
5457 if (t->clabels_example)
5458 ctables_sort_clabels_values (t);
5460 for (size_t j = 0; j < t->n_sections; j++)
5461 ctables_section_add_empty_categories (&t->sections[j]);
5463 ctables_table_output (ct, t);
5464 ctables_table_clear (t);
5467 return casegrouper_destroy (grouper);
5472 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5473 struct dictionary *);
5476 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5482 case CTPO_CAT_STRING:
5483 ss_dealloc (&e->string);
5486 case CTPO_CAT_SRANGE:
5487 for (size_t i = 0; i < 2; i++)
5488 ss_dealloc (&e->srange[i]);
5497 for (size_t i = 0; i < 2; i++)
5498 ctables_pcexpr_destroy (e->subs[i]);
5502 case CTPO_CAT_NUMBER:
5503 case CTPO_CAT_NRANGE:
5504 case CTPO_CAT_MISSING:
5505 case CTPO_CAT_OTHERNM:
5506 case CTPO_CAT_SUBTOTAL:
5507 case CTPO_CAT_TOTAL:
5511 msg_location_destroy (e->location);
5516 static struct ctables_pcexpr *
5517 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5518 struct ctables_pcexpr *sub0,
5519 struct ctables_pcexpr *sub1)
5521 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5522 *e = (struct ctables_pcexpr) {
5524 .subs = { sub0, sub1 },
5525 .location = msg_location_merged (sub0->location, sub1->location),
5530 /* How to parse an operator. */
5533 enum token_type token;
5534 enum ctables_postcompute_op op;
5537 static const struct operator *
5538 ctables_pcexpr_match_operator (struct lexer *lexer,
5539 const struct operator ops[], size_t n_ops)
5541 for (const struct operator *op = ops; op < ops + n_ops; op++)
5542 if (lex_token (lexer) == op->token)
5544 if (op->token != T_NEG_NUM)
5553 static struct ctables_pcexpr *
5554 ctables_pcexpr_parse_binary_operators__ (
5555 struct lexer *lexer, struct dictionary *dict,
5556 const struct operator ops[], size_t n_ops,
5557 parse_recursively_func *parse_next_level,
5558 const char *chain_warning, struct ctables_pcexpr *lhs)
5560 for (int op_count = 0; ; op_count++)
5562 const struct operator *op
5563 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5566 if (op_count > 1 && chain_warning)
5567 msg_at (SW, lhs->location, "%s", chain_warning);
5572 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5575 ctables_pcexpr_destroy (lhs);
5579 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5583 static struct ctables_pcexpr *
5584 ctables_pcexpr_parse_binary_operators (
5585 struct lexer *lexer, struct dictionary *dict,
5586 const struct operator ops[], size_t n_ops,
5587 parse_recursively_func *parse_next_level, const char *chain_warning)
5589 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5593 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5595 chain_warning, lhs);
5598 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5599 struct dictionary *);
5601 static struct ctables_pcexpr
5602 ctpo_cat_nrange (double low, double high)
5604 return (struct ctables_pcexpr) {
5605 .op = CTPO_CAT_NRANGE,
5606 .nrange = { low, high },
5610 static struct ctables_pcexpr
5611 ctpo_cat_srange (struct substring low, struct substring high)
5613 return (struct ctables_pcexpr) {
5614 .op = CTPO_CAT_SRANGE,
5615 .srange = { low, high },
5619 static struct ctables_pcexpr *
5620 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5622 int start_ofs = lex_ofs (lexer);
5623 struct ctables_pcexpr e;
5624 if (lex_is_number (lexer))
5626 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5627 .number = lex_number (lexer) };
5630 else if (lex_match_id (lexer, "MISSING"))
5631 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5632 else if (lex_match_id (lexer, "OTHERNM"))
5633 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5634 else if (lex_match_id (lexer, "TOTAL"))
5635 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5636 else if (lex_match_id (lexer, "SUBTOTAL"))
5638 size_t subtotal_index = 0;
5639 if (lex_match (lexer, T_LBRACK))
5641 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5643 subtotal_index = lex_integer (lexer);
5645 if (!lex_force_match (lexer, T_RBRACK))
5648 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5649 .subtotal_index = subtotal_index };
5651 else if (lex_match (lexer, T_LBRACK))
5653 if (lex_match_id (lexer, "LO"))
5655 if (!lex_force_match_id (lexer, "THRU"))
5658 if (lex_is_string (lexer))
5660 struct substring low = { .string = NULL };
5661 struct substring high = parse_substring (lexer, dict);
5662 e = ctpo_cat_srange (low, high);
5666 if (!lex_force_num (lexer))
5668 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5672 else if (lex_is_number (lexer))
5674 double number = lex_number (lexer);
5676 if (lex_match_id (lexer, "THRU"))
5678 if (lex_match_id (lexer, "HI"))
5679 e = ctpo_cat_nrange (number, DBL_MAX);
5682 if (!lex_force_num (lexer))
5684 e = ctpo_cat_nrange (number, lex_number (lexer));
5689 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5692 else if (lex_is_string (lexer))
5694 struct substring s = parse_substring (lexer, dict);
5696 if (lex_match_id (lexer, "THRU"))
5698 struct substring high;
5700 if (lex_match_id (lexer, "HI"))
5701 high = (struct substring) { .string = NULL };
5704 if (!lex_force_string (lexer))
5709 high = parse_substring (lexer, dict);
5712 e = ctpo_cat_srange (s, high);
5715 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5719 lex_error (lexer, NULL);
5723 if (!lex_force_match (lexer, T_RBRACK))
5725 if (e.op == CTPO_CAT_STRING)
5726 ss_dealloc (&e.string);
5727 else if (e.op == CTPO_CAT_SRANGE)
5729 ss_dealloc (&e.srange[0]);
5730 ss_dealloc (&e.srange[1]);
5735 else if (lex_match (lexer, T_LPAREN))
5737 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5740 if (!lex_force_match (lexer, T_RPAREN))
5742 ctables_pcexpr_destroy (ep);
5749 lex_error (lexer, NULL);
5753 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5754 return xmemdup (&e, sizeof e);
5757 static struct ctables_pcexpr *
5758 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5759 struct lexer *lexer, int start_ofs)
5761 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5762 *e = (struct ctables_pcexpr) {
5765 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5770 static struct ctables_pcexpr *
5771 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5773 static const struct operator op = { T_EXP, CTPO_POW };
5775 const char *chain_warning =
5776 _("The exponentiation operator (`**') is left-associative: "
5777 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5778 "To disable this warning, insert parentheses.");
5780 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5781 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5782 ctables_pcexpr_parse_primary,
5785 /* Special case for situations like "-5**6", which must be parsed as
5788 int start_ofs = lex_ofs (lexer);
5789 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5790 *lhs = (struct ctables_pcexpr) {
5791 .op = CTPO_CONSTANT,
5792 .number = -lex_tokval (lexer),
5793 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5797 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5798 lexer, dict, &op, 1,
5799 ctables_pcexpr_parse_primary, chain_warning, lhs);
5803 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5806 /* Parses the unary minus level. */
5807 static struct ctables_pcexpr *
5808 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5810 int start_ofs = lex_ofs (lexer);
5811 if (!lex_match (lexer, T_DASH))
5812 return ctables_pcexpr_parse_exp (lexer, dict);
5814 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5818 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5821 /* Parses the multiplication and division level. */
5822 static struct ctables_pcexpr *
5823 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5825 static const struct operator ops[] =
5827 { T_ASTERISK, CTPO_MUL },
5828 { T_SLASH, CTPO_DIV },
5831 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5832 sizeof ops / sizeof *ops,
5833 ctables_pcexpr_parse_neg, NULL);
5836 /* Parses the addition and subtraction level. */
5837 static struct ctables_pcexpr *
5838 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5840 static const struct operator ops[] =
5842 { T_PLUS, CTPO_ADD },
5843 { T_DASH, CTPO_SUB },
5844 { T_NEG_NUM, CTPO_ADD },
5847 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5848 ops, sizeof ops / sizeof *ops,
5849 ctables_pcexpr_parse_mul, NULL);
5852 static struct ctables_postcompute *
5853 ctables_find_postcompute (struct ctables *ct, const char *name)
5855 struct ctables_postcompute *pc;
5856 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5857 utf8_hash_case_string (name, 0), &ct->postcomputes)
5858 if (!utf8_strcasecmp (pc->name, name))
5864 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5867 int pcompute_start = lex_ofs (lexer) - 1;
5869 if (!lex_match (lexer, T_AND))
5871 lex_error_expecting (lexer, "&");
5874 if (!lex_force_id (lexer))
5877 char *name = ss_xstrdup (lex_tokss (lexer));
5880 if (!lex_force_match (lexer, T_EQUALS)
5881 || !lex_force_match_id (lexer, "EXPR")
5882 || !lex_force_match (lexer, T_LPAREN))
5888 int expr_start = lex_ofs (lexer);
5889 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5890 int expr_end = lex_ofs (lexer) - 1;
5891 if (!expr || !lex_force_match (lexer, T_RPAREN))
5893 ctables_pcexpr_destroy (expr);
5897 int pcompute_end = lex_ofs (lexer) - 1;
5899 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5902 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5905 msg_at (SW, location, _("New definition of &%s will override the "
5906 "previous definition."),
5908 msg_at (SN, pc->location, _("This is the previous definition."));
5910 ctables_pcexpr_destroy (pc->expr);
5911 msg_location_destroy (pc->location);
5916 pc = xmalloc (sizeof *pc);
5917 *pc = (struct ctables_postcompute) { .name = name };
5918 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5919 utf8_hash_case_string (pc->name, 0));
5922 pc->location = location;
5924 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5929 ctables_parse_pproperties_format (struct lexer *lexer,
5930 struct ctables_summary_spec_set *sss)
5932 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5934 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5935 && !(lex_token (lexer) == T_ID
5936 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5937 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5938 lex_tokss (lexer)))))
5940 /* Parse function. */
5941 enum ctables_summary_function function;
5942 if (!parse_ctables_summary_function (lexer, &function))
5945 /* Parse percentile. */
5946 double percentile = 0;
5947 if (function == CTSF_PTILE)
5949 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5951 percentile = lex_number (lexer);
5956 struct fmt_spec format;
5957 bool is_ctables_format;
5958 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5961 if (sss->n >= sss->allocated)
5962 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5963 sizeof *sss->specs);
5964 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5965 .function = function,
5966 .percentile = percentile,
5968 .is_ctables_format = is_ctables_format,
5974 ctables_summary_spec_set_uninit (sss);
5979 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5981 struct ctables_postcompute **pcs = NULL;
5983 size_t allocated_pcs = 0;
5985 while (lex_match (lexer, T_AND))
5987 if (!lex_force_id (lexer))
5989 struct ctables_postcompute *pc
5990 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5993 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5998 if (n_pcs >= allocated_pcs)
5999 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6003 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6005 if (lex_match_id (lexer, "LABEL"))
6007 lex_match (lexer, T_EQUALS);
6008 if (!lex_force_string (lexer))
6011 for (size_t i = 0; i < n_pcs; i++)
6013 free (pcs[i]->label);
6014 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6019 else if (lex_match_id (lexer, "FORMAT"))
6021 lex_match (lexer, T_EQUALS);
6023 struct ctables_summary_spec_set sss;
6024 if (!ctables_parse_pproperties_format (lexer, &sss))
6027 for (size_t i = 0; i < n_pcs; i++)
6030 ctables_summary_spec_set_uninit (pcs[i]->specs);
6032 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6033 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6035 ctables_summary_spec_set_uninit (&sss);
6037 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6039 lex_match (lexer, T_EQUALS);
6040 bool hide_source_cats;
6041 if (!parse_bool (lexer, &hide_source_cats))
6043 for (size_t i = 0; i < n_pcs; i++)
6044 pcs[i]->hide_source_cats = hide_source_cats;
6048 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6061 put_strftime (struct string *out, time_t now, const char *format)
6063 const struct tm *tm = localtime (&now);
6065 strftime (value, sizeof value, format, tm);
6066 ds_put_cstr (out, value);
6070 skip_prefix (struct substring *s, struct substring prefix)
6072 if (ss_starts_with (*s, prefix))
6074 ss_advance (s, prefix.length);
6082 put_table_expression (struct string *out, struct lexer *lexer,
6083 struct dictionary *dict, int expr_start, int expr_end)
6086 for (int ofs = expr_start; ofs < expr_end; ofs++)
6088 const struct token *t = lex_ofs_token (lexer, ofs);
6089 if (t->type == T_LBRACK)
6091 else if (t->type == T_RBRACK && nest > 0)
6097 else if (t->type == T_ID)
6099 const struct variable *var
6100 = dict_lookup_var (dict, t->string.string);
6101 const char *label = var ? var_get_label (var) : NULL;
6102 ds_put_cstr (out, label ? label : t->string.string);
6106 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6107 ds_put_byte (out, ' ');
6109 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6110 ds_put_cstr (out, repr);
6113 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6114 ds_put_byte (out, ' ');
6120 put_title_text (struct string *out, struct substring in, time_t now,
6121 struct lexer *lexer, struct dictionary *dict,
6122 int expr_start, int expr_end)
6126 size_t chunk = ss_find_byte (in, ')');
6127 ds_put_substring (out, ss_head (in, chunk));
6128 ss_advance (&in, chunk);
6129 if (ss_is_empty (in))
6132 if (skip_prefix (&in, ss_cstr (")DATE")))
6133 put_strftime (out, now, "%x");
6134 else if (skip_prefix (&in, ss_cstr (")TIME")))
6135 put_strftime (out, now, "%X");
6136 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6137 put_table_expression (out, lexer, dict, expr_start, expr_end);
6140 ds_put_byte (out, ')');
6141 ss_advance (&in, 1);
6147 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6149 struct casereader *input = NULL;
6151 struct measure_guesser *mg = measure_guesser_create (ds);
6154 input = proc_open (ds);
6155 measure_guesser_run (mg, input);
6156 measure_guesser_destroy (mg);
6159 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6160 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6161 enum settings_value_show tvars = settings_get_show_variables ();
6162 for (size_t i = 0; i < n_vars; i++)
6163 vlabels[i] = (enum ctables_vlabel) tvars;
6165 struct pivot_table_look *look = pivot_table_look_unshare (
6166 pivot_table_look_ref (pivot_table_look_get_default ()));
6167 look->omit_empty = false;
6169 struct ctables *ct = xmalloc (sizeof *ct);
6170 *ct = (struct ctables) {
6171 .dict = dataset_dict (ds),
6173 .ctables_formats = FMT_SETTINGS_INIT,
6175 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6178 time_t now = time (NULL);
6183 const char *dot_string;
6184 const char *comma_string;
6186 static const struct ctf ctfs[4] = {
6187 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6188 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6189 { CTEF_PAREN, "-,(,),", "-.(.)." },
6190 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6192 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6193 for (size_t i = 0; i < 4; i++)
6195 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6196 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6197 fmt_number_style_from_string (s));
6200 if (!lex_force_match (lexer, T_SLASH))
6203 while (!lex_match_id (lexer, "TABLE"))
6205 if (lex_match_id (lexer, "FORMAT"))
6207 double widths[2] = { SYSMIS, SYSMIS };
6208 double units_per_inch = 72.0;
6210 while (lex_token (lexer) != T_SLASH)
6212 if (lex_match_id (lexer, "MINCOLWIDTH"))
6214 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6217 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6219 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6222 else if (lex_match_id (lexer, "UNITS"))
6224 lex_match (lexer, T_EQUALS);
6225 if (lex_match_id (lexer, "POINTS"))
6226 units_per_inch = 72.0;
6227 else if (lex_match_id (lexer, "INCHES"))
6228 units_per_inch = 1.0;
6229 else if (lex_match_id (lexer, "CM"))
6230 units_per_inch = 2.54;
6233 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6237 else if (lex_match_id (lexer, "EMPTY"))
6242 lex_match (lexer, T_EQUALS);
6243 if (lex_match_id (lexer, "ZERO"))
6245 /* Nothing to do. */
6247 else if (lex_match_id (lexer, "BLANK"))
6248 ct->zero = xstrdup ("");
6249 else if (lex_force_string (lexer))
6251 ct->zero = ss_xstrdup (lex_tokss (lexer));
6257 else if (lex_match_id (lexer, "MISSING"))
6259 lex_match (lexer, T_EQUALS);
6260 if (!lex_force_string (lexer))
6264 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6265 ? ss_xstrdup (lex_tokss (lexer))
6271 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6272 "UNITS", "EMPTY", "MISSING");
6277 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6278 && widths[0] > widths[1])
6280 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6284 for (size_t i = 0; i < 2; i++)
6285 if (widths[i] != SYSMIS)
6287 int *wr = ct->look->width_ranges[TABLE_HORZ];
6288 wr[i] = widths[i] / units_per_inch * 96.0;
6293 else if (lex_match_id (lexer, "VLABELS"))
6295 if (!lex_force_match_id (lexer, "VARIABLES"))
6297 lex_match (lexer, T_EQUALS);
6299 struct variable **vars;
6301 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6305 if (!lex_force_match_id (lexer, "DISPLAY"))
6310 lex_match (lexer, T_EQUALS);
6312 enum ctables_vlabel vlabel;
6313 if (lex_match_id (lexer, "DEFAULT"))
6314 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6315 else if (lex_match_id (lexer, "NAME"))
6317 else if (lex_match_id (lexer, "LABEL"))
6318 vlabel = CTVL_LABEL;
6319 else if (lex_match_id (lexer, "BOTH"))
6321 else if (lex_match_id (lexer, "NONE"))
6325 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6331 for (size_t i = 0; i < n_vars; i++)
6332 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6335 else if (lex_match_id (lexer, "MRSETS"))
6337 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6339 lex_match (lexer, T_EQUALS);
6340 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6343 else if (lex_match_id (lexer, "SMISSING"))
6345 if (lex_match_id (lexer, "VARIABLE"))
6346 ct->smissing_listwise = false;
6347 else if (lex_match_id (lexer, "LISTWISE"))
6348 ct->smissing_listwise = true;
6351 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6355 else if (lex_match_id (lexer, "PCOMPUTE"))
6357 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6360 else if (lex_match_id (lexer, "PPROPERTIES"))
6362 if (!ctables_parse_pproperties (lexer, ct))
6365 else if (lex_match_id (lexer, "WEIGHT"))
6367 if (!lex_force_match_id (lexer, "VARIABLE"))
6369 lex_match (lexer, T_EQUALS);
6370 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6374 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6376 if (lex_match_id (lexer, "COUNT"))
6378 lex_match (lexer, T_EQUALS);
6379 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6382 ct->hide_threshold = lex_integer (lexer);
6385 else if (ct->hide_threshold == 0)
6386 ct->hide_threshold = 5;
6390 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6391 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6392 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6396 if (!lex_force_match (lexer, T_SLASH))
6400 size_t allocated_tables = 0;
6403 if (ct->n_tables >= allocated_tables)
6404 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6405 sizeof *ct->tables);
6407 struct ctables_category *cat = xmalloc (sizeof *cat);
6408 *cat = (struct ctables_category) {
6410 .include_missing = false,
6411 .sort_ascending = true,
6414 struct ctables_categories *c = xmalloc (sizeof *c);
6415 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6416 *c = (struct ctables_categories) {
6423 struct ctables_categories **categories = xnmalloc (n_vars,
6424 sizeof *categories);
6425 for (size_t i = 0; i < n_vars; i++)
6428 struct ctables_table *t = xmalloc (sizeof *t);
6429 *t = (struct ctables_table) {
6431 .slabels_axis = PIVOT_AXIS_COLUMN,
6432 .slabels_visible = true,
6433 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6435 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6436 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6437 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6439 .clabels_from_axis = PIVOT_AXIS_LAYER,
6440 .categories = categories,
6441 .n_categories = n_vars,
6444 ct->tables[ct->n_tables++] = t;
6446 lex_match (lexer, T_EQUALS);
6447 int expr_start = lex_ofs (lexer);
6448 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6450 if (lex_match (lexer, T_BY))
6452 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6453 ct, t, PIVOT_AXIS_COLUMN))
6456 if (lex_match (lexer, T_BY))
6458 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6459 ct, t, PIVOT_AXIS_LAYER))
6463 int expr_end = lex_ofs (lexer);
6465 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6466 && !t->axes[PIVOT_AXIS_LAYER])
6468 lex_error (lexer, _("At least one variable must be specified."));
6472 const struct ctables_axis *scales[PIVOT_N_AXES];
6473 size_t n_scales = 0;
6474 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6476 scales[a] = find_scale (t->axes[a]);
6482 msg (SE, _("Scale variables may appear only on one axis."));
6483 if (scales[PIVOT_AXIS_ROW])
6484 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6485 _("This scale variable appears on the rows axis."));
6486 if (scales[PIVOT_AXIS_COLUMN])
6487 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6488 _("This scale variable appears on the columns axis."));
6489 if (scales[PIVOT_AXIS_LAYER])
6490 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6491 _("This scale variable appears on the layer axis."));
6495 const struct ctables_axis *summaries[PIVOT_N_AXES];
6496 size_t n_summaries = 0;
6497 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6499 summaries[a] = (scales[a]
6501 : find_categorical_summary_spec (t->axes[a]));
6505 if (n_summaries > 1)
6507 msg (SE, _("Summaries may appear only on one axis."));
6508 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6511 msg_at (SN, summaries[a]->loc,
6513 ? _("This variable on the rows axis has a summary.")
6514 : a == PIVOT_AXIS_COLUMN
6515 ? _("This variable on the columns axis has a summary.")
6516 : _("This variable on the layers axis has a summary."));
6518 msg_at (SN, summaries[a]->loc,
6519 _("This is a scale variable, so it always has a "
6520 "summary even if the syntax does not explicitly "
6525 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6526 if (n_summaries ? summaries[a] : t->axes[a])
6528 t->summary_axis = a;
6532 if (lex_token (lexer) == T_ENDCMD)
6534 if (!ctables_prepare_table (t))
6538 if (!lex_force_match (lexer, T_SLASH))
6541 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6543 if (lex_match_id (lexer, "SLABELS"))
6545 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6547 if (lex_match_id (lexer, "POSITION"))
6549 lex_match (lexer, T_EQUALS);
6550 if (lex_match_id (lexer, "COLUMN"))
6551 t->slabels_axis = PIVOT_AXIS_COLUMN;
6552 else if (lex_match_id (lexer, "ROW"))
6553 t->slabels_axis = PIVOT_AXIS_ROW;
6554 else if (lex_match_id (lexer, "LAYER"))
6555 t->slabels_axis = PIVOT_AXIS_LAYER;
6558 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6562 else if (lex_match_id (lexer, "VISIBLE"))
6564 lex_match (lexer, T_EQUALS);
6565 if (!parse_bool (lexer, &t->slabels_visible))
6570 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6575 else if (lex_match_id (lexer, "CLABELS"))
6577 if (lex_match_id (lexer, "AUTO"))
6579 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6580 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6582 else if (lex_match_id (lexer, "ROWLABELS"))
6584 lex_match (lexer, T_EQUALS);
6585 if (lex_match_id (lexer, "OPPOSITE"))
6586 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6587 else if (lex_match_id (lexer, "LAYER"))
6588 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6591 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6595 else if (lex_match_id (lexer, "COLLABELS"))
6597 lex_match (lexer, T_EQUALS);
6598 if (lex_match_id (lexer, "OPPOSITE"))
6599 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6600 else if (lex_match_id (lexer, "LAYER"))
6601 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6604 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6610 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6615 else if (lex_match_id (lexer, "CRITERIA"))
6617 if (!lex_force_match_id (lexer, "CILEVEL"))
6619 lex_match (lexer, T_EQUALS);
6621 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6623 t->cilevel = lex_number (lexer);
6626 else if (lex_match_id (lexer, "CATEGORIES"))
6628 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6632 else if (lex_match_id (lexer, "TITLES"))
6637 if (lex_match_id (lexer, "CAPTION"))
6638 textp = &t->caption;
6639 else if (lex_match_id (lexer, "CORNER"))
6641 else if (lex_match_id (lexer, "TITLE"))
6645 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6648 lex_match (lexer, T_EQUALS);
6650 struct string s = DS_EMPTY_INITIALIZER;
6651 while (lex_is_string (lexer))
6653 if (!ds_is_empty (&s))
6654 ds_put_byte (&s, ' ');
6655 put_title_text (&s, lex_tokss (lexer), now,
6656 lexer, dataset_dict (ds),
6657 expr_start, expr_end);
6661 *textp = ds_steal_cstr (&s);
6663 while (lex_token (lexer) != T_SLASH
6664 && lex_token (lexer) != T_ENDCMD);
6666 else if (lex_match_id (lexer, "SIGTEST"))
6670 t->chisq = xmalloc (sizeof *t->chisq);
6671 *t->chisq = (struct ctables_chisq) {
6673 .include_mrsets = true,
6674 .all_visible = true,
6680 if (lex_match_id (lexer, "TYPE"))
6682 lex_match (lexer, T_EQUALS);
6683 if (!lex_force_match_id (lexer, "CHISQUARE"))
6686 else if (lex_match_id (lexer, "ALPHA"))
6688 lex_match (lexer, T_EQUALS);
6689 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6691 t->chisq->alpha = lex_number (lexer);
6694 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6696 lex_match (lexer, T_EQUALS);
6697 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6700 else if (lex_match_id (lexer, "CATEGORIES"))
6702 lex_match (lexer, T_EQUALS);
6703 if (lex_match_id (lexer, "ALLVISIBLE"))
6704 t->chisq->all_visible = true;
6705 else if (lex_match_id (lexer, "SUBTOTALS"))
6706 t->chisq->all_visible = false;
6709 lex_error_expecting (lexer,
6710 "ALLVISIBLE", "SUBTOTALS");
6716 lex_error_expecting (lexer, "TYPE", "ALPHA",
6717 "INCLUDEMRSETS", "CATEGORIES");
6721 while (lex_token (lexer) != T_SLASH
6722 && lex_token (lexer) != T_ENDCMD);
6724 else if (lex_match_id (lexer, "COMPARETEST"))
6728 t->pairwise = xmalloc (sizeof *t->pairwise);
6729 *t->pairwise = (struct ctables_pairwise) {
6731 .alpha = { .05, .05 },
6732 .adjust = BONFERRONI,
6733 .include_mrsets = true,
6734 .meansvariance_allcats = true,
6735 .all_visible = true,
6744 if (lex_match_id (lexer, "TYPE"))
6746 lex_match (lexer, T_EQUALS);
6747 if (lex_match_id (lexer, "PROP"))
6748 t->pairwise->type = PROP;
6749 else if (lex_match_id (lexer, "MEAN"))
6750 t->pairwise->type = MEAN;
6753 lex_error_expecting (lexer, "PROP", "MEAN");
6757 else if (lex_match_id (lexer, "ALPHA"))
6759 lex_match (lexer, T_EQUALS);
6761 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6763 double a0 = lex_number (lexer);
6766 lex_match (lexer, T_COMMA);
6767 if (lex_is_number (lexer))
6769 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6771 double a1 = lex_number (lexer);
6774 t->pairwise->alpha[0] = MIN (a0, a1);
6775 t->pairwise->alpha[1] = MAX (a0, a1);
6778 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6780 else if (lex_match_id (lexer, "ADJUST"))
6782 lex_match (lexer, T_EQUALS);
6783 if (lex_match_id (lexer, "BONFERRONI"))
6784 t->pairwise->adjust = BONFERRONI;
6785 else if (lex_match_id (lexer, "BH"))
6786 t->pairwise->adjust = BH;
6787 else if (lex_match_id (lexer, "NONE"))
6788 t->pairwise->adjust = 0;
6791 lex_error_expecting (lexer, "BONFERRONI", "BH",
6796 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6798 lex_match (lexer, T_EQUALS);
6799 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6802 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6804 lex_match (lexer, T_EQUALS);
6805 if (lex_match_id (lexer, "ALLCATS"))
6806 t->pairwise->meansvariance_allcats = true;
6807 else if (lex_match_id (lexer, "TESTEDCATS"))
6808 t->pairwise->meansvariance_allcats = false;
6811 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6815 else if (lex_match_id (lexer, "CATEGORIES"))
6817 lex_match (lexer, T_EQUALS);
6818 if (lex_match_id (lexer, "ALLVISIBLE"))
6819 t->pairwise->all_visible = true;
6820 else if (lex_match_id (lexer, "SUBTOTALS"))
6821 t->pairwise->all_visible = false;
6824 lex_error_expecting (lexer, "ALLVISIBLE",
6829 else if (lex_match_id (lexer, "MERGE"))
6831 lex_match (lexer, T_EQUALS);
6832 if (!parse_bool (lexer, &t->pairwise->merge))
6835 else if (lex_match_id (lexer, "STYLE"))
6837 lex_match (lexer, T_EQUALS);
6838 if (lex_match_id (lexer, "APA"))
6839 t->pairwise->apa_style = true;
6840 else if (lex_match_id (lexer, "SIMPLE"))
6841 t->pairwise->apa_style = false;
6844 lex_error_expecting (lexer, "APA", "SIMPLE");
6848 else if (lex_match_id (lexer, "SHOWSIG"))
6850 lex_match (lexer, T_EQUALS);
6851 if (!parse_bool (lexer, &t->pairwise->show_sig))
6856 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6857 "INCLUDEMRSETS", "MEANSVARIANCE",
6858 "CATEGORIES", "MERGE", "STYLE",
6863 while (lex_token (lexer) != T_SLASH
6864 && lex_token (lexer) != T_ENDCMD);
6868 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6869 "CRITERIA", "CATEGORIES", "TITLES",
6870 "SIGTEST", "COMPARETEST");
6874 if (!lex_match (lexer, T_SLASH))
6878 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6879 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6881 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6885 if (!ctables_prepare_table (t))
6888 while (lex_token (lexer) != T_ENDCMD);
6891 input = proc_open (ds);
6892 bool ok = ctables_execute (ds, input, ct);
6893 ok = proc_commit (ds) && ok;
6895 ctables_destroy (ct);
6896 return ok ? CMD_SUCCESS : CMD_FAILURE;
6901 ctables_destroy (ct);