1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/casereader.h"
22 #include "data/dataset.h"
23 #include "data/dictionary.h"
24 #include "data/mrset.h"
25 #include "language/command.h"
26 #include "language/lexer/format-parser.h"
27 #include "language/lexer/lexer.h"
28 #include "language/lexer/variable-parser.h"
29 #include "libpspp/array.h"
30 #include "libpspp/assertion.h"
31 #include "libpspp/hash-functions.h"
32 #include "libpspp/hmap.h"
33 #include "libpspp/message.h"
34 #include "libpspp/string-array.h"
35 #include "math/moments.h"
36 #include "output/pivot-table.h"
38 #include "gl/minmax.h"
39 #include "gl/xalloc.h"
42 #define _(msgid) gettext (msgid)
43 #define N_(msgid) (msgid)
47 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
48 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
49 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
50 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
54 - unweighted summaries (U*)
55 - lower confidence limits (*.LCL)
56 - upper confidence limits (*.UCL)
57 - standard error (*.SE)
60 /* All variables. */ \
61 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
62 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
63 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
64 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
65 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
66 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
67 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
68 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
69 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
70 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
71 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
72 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
73 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
74 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
85 /* Scale variables, totals, and subtotals. */ \
86 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
87 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
88 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
89 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
90 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
91 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
92 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
93 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
94 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
95 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
96 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
97 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
98 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
99 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
100 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
101 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
102 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
103 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
104 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
105 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
106 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
107 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
108 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
110 /* Multiple response sets. */ \
111 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
112 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
113 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
114 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
115 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
116 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
117 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
118 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
119 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
120 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
121 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
122 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
123 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
124 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
125 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
126 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
127 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
128 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
129 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
130 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
131 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
132 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
134 enum ctables_summary_function
136 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
142 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
143 N_CTSF_FUNCTIONS = SUMMARIES
147 struct ctables_subtable
149 /* In struct ctables's 'subtables' hmap. Indexed by all the values in all
150 the axes except the innermost row and column variable and the scalar
151 variable, if any. (If the scalar variable is the innermost row or
152 column variable, then the second-to-innermost variable is also omitted
154 struct hmap_node node;
156 const struct ctables_freq *example;
164 /* In struct ctables's 'layers' hmap. Indexed by all the values in the
165 layer axis, except the scalar variable, if any. */
166 struct hmap_node node;
174 /* In struct ctables's 'ft' hmap. Indexed by all the values in all the
175 axes (except the scalar variable, if any). */
176 struct hmap_node node;
178 /* The subtable that contains this cell. */
179 struct ctables_subtable *subtable;
181 /* The layer that contains this cell. */
191 union ctables_summary *summaries;
196 struct pivot_table_look *look;
198 /* If this is NULL, zeros are displayed using the normal print format.
199 Otherwise, this string is displayed. */
202 /* If this is NULL, missing values are displayed using the normal print
203 format. Otherwise, this string is displayed. */
206 /* Indexed by variable dictionary index. */
207 enum ctables_vlabel *vlabels;
209 bool mrsets_count_duplicates; /* MRSETS. */
210 bool smissing_listwise; /* SMISSING. */
211 struct variable *base_weight; /* WEIGHT. */
212 int hide_threshold; /* HIDESMALLCOUNTS. */
214 struct ctables_table **tables;
218 struct ctables_postcompute
220 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
221 const char *name; /* Name, without leading &. */
223 struct ctables_postcompute_expr *expr;
226 bool hide_source_cats;
229 struct ctables_postcompute_expr
231 enum ctables_postcompute_op
239 /* XXX SUBTOTAL and HSUBTOTAL */
252 /* CTPO_CAT_NUMBER, CTPO_NUMBER. */
257 XXX what about string ranges? */
260 /* CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW. */
261 struct ctables_postcompute_expr *subs[2];
265 enum ctables_label_position
274 struct variable **vars;
279 struct ctables_summary_spec *summaries;
281 struct variable *summary_var;
286 struct var_array *vas;
292 struct ctables_axis *axes[PIVOT_N_AXES];
293 struct var_array2 vaas[PIVOT_N_AXES];
294 enum pivot_axis_type summary_axis;
296 struct hmap subtables;
298 enum pivot_axis_type slabels_position;
299 bool slabels_visible;
301 enum ctables_label_position row_labels;
302 enum ctables_label_position col_labels;
304 /* Indexed by variable dictionary index. */
305 struct ctables_categories **categories;
314 struct ctables_chisq *chisq;
315 struct ctables_pairwise *pairwise;
324 struct variable *var;
325 const struct mrset *mrset;
329 static const struct fmt_spec *
330 ctables_var_get_print_format (const struct ctables_var *var)
332 return (var->is_mrset
333 ? var_get_print_format (var->mrset->vars[0])
334 : var_get_print_format (var->var));
338 ctables_var_name (const struct ctables_var *var)
340 return var->is_mrset ? var->mrset->name : var_get_name (var->var);
343 struct ctables_categories
347 /* Explicit categories. */
348 struct ctables_cat_value *values;
351 /* Implicit categories. */
353 bool include_missing;
354 enum { CTCS_VALUE, CTCS_LABEL, CTCS_FUNCTION } key;
355 enum ctables_summary_function sort_func;
356 struct variable *sort_func_var;
364 /* Empty categories. */
368 struct ctables_cat_value
370 enum ctables_cat_value_type
384 double number; /* CCVT_NUMBER. */
385 char *string; /* CCVT_STRING. */
386 double range[2]; /* CCVT_RANGE. */
387 char *subtotal_label; /* CCVT_SUBTOTAL, CCVT_HSUBTOTAL. */
392 ctables_cat_value_uninit (struct ctables_cat_value *cv)
411 free (cv->subtotal_label);
416 ctables_categories_unref (struct ctables_categories *c)
421 assert (c->n_refs > 0);
425 for (size_t i = 0; i < c->n_values; i++)
426 ctables_cat_value_uninit (&c->values[i]);
428 free (c->total_label);
432 /* Chi-square test (SIGTEST). */
440 /* Pairwise comparison test (COMPARETEST). */
441 struct ctables_pairwise
443 enum { PROP, MEAN } type;
446 bool meansvariance_allcats;
448 enum { BONFERRONI = 1, BH } adjust;
472 struct ctables_var var;
474 struct ctables_summary_spec *summaries;
476 size_t allocated_summaries;
480 struct ctables_axis *subs[2];
483 struct msg_location *loc;
486 static void ctables_axis_destroy (struct ctables_axis *);
495 enum ctables_function_availability
497 CTFA_ALL, /* Any variables. */
498 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
499 CTFA_MRSETS, /* Only multiple-response sets */
502 struct ctables_summary_spec
504 enum ctables_summary_function function;
505 double percentile; /* CTSF_PTILE only. */
507 struct fmt_spec format; /* XXX extra CTABLES formats */
511 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
518 parse_col_width (struct lexer *lexer, const char *name, double *width)
520 lex_match (lexer, T_EQUALS);
521 if (lex_match_id (lexer, "DEFAULT"))
523 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
525 *width = lex_number (lexer);
535 parse_bool (struct lexer *lexer, bool *b)
537 if (lex_match_id (lexer, "NO"))
539 else if (lex_match_id (lexer, "YES"))
543 lex_error_expecting (lexer, "YES", "NO");
549 static enum ctables_function_availability
550 ctables_function_availability (enum ctables_summary_function f)
552 static enum ctables_function_availability availability[] = {
553 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
558 return availability[f];
562 parse_ctables_summary_function (struct lexer *lexer,
563 enum ctables_summary_function *f)
567 enum ctables_summary_function function;
568 struct substring name;
570 static struct pair names[] = {
571 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
572 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
575 /* The .COUNT suffix may be omitted. */
576 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
577 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
578 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
579 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
580 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
581 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
582 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
586 if (!lex_force_id (lexer))
589 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
590 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
592 *f = names[i].function;
597 lex_error (lexer, _("Expecting summary function name."));
602 ctables_axis_destroy (struct ctables_axis *axis)
610 for (size_t i = 0; i < axis->n_summaries; i++)
611 ctables_summary_spec_uninit (&axis->summaries[i]);
612 free (axis->summaries);
617 ctables_axis_destroy (axis->subs[0]);
618 ctables_axis_destroy (axis->subs[1]);
621 msg_location_destroy (axis->loc);
625 static struct ctables_axis *
626 ctables_axis_new_nonterminal (enum ctables_axis_op op,
627 struct ctables_axis *sub0,
628 struct ctables_axis *sub1,
629 struct lexer *lexer, int start_ofs)
631 struct ctables_axis *axis = xmalloc (sizeof *axis);
632 *axis = (struct ctables_axis) {
634 .subs = { sub0, sub1 },
635 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
640 struct ctables_axis_parse_ctx
643 struct dictionary *dict;
645 struct ctables_table *t;
648 static struct fmt_spec
649 ctables_summary_default_format (enum ctables_summary_function function,
650 const struct ctables_var *var)
652 static const enum ctables_format default_formats[] = {
653 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
657 switch (default_formats[function])
660 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
663 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
666 return *ctables_var_get_print_format (var);
674 ctables_summary_default_label (enum ctables_summary_function function,
677 static const char *default_labels[] = {
678 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
683 return (function == CTSF_PTILE
684 ? xasprintf (_("Percentile %.2f"), percentile)
685 : xstrdup (gettext (default_labels[function])));
689 ctables_summary_function_name (enum ctables_summary_function function)
691 static const char *names[] = {
692 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
696 return names[function];
700 add_summary_spec (struct ctables_axis *axis,
701 enum ctables_summary_function function, double percentile,
702 const char *label, const struct fmt_spec *format,
703 const struct msg_location *loc)
705 if (axis->op == CTAO_VAR)
707 if (axis->n_summaries >= axis->allocated_summaries)
708 axis->summaries = x2nrealloc (axis->summaries,
709 &axis->allocated_summaries,
710 sizeof *axis->summaries);
712 const char *function_name = ctables_summary_function_name (function);
713 const char *var_name = ctables_var_name (&axis->var);
714 switch (ctables_function_availability (function))
717 if (!axis->var.is_mrset)
719 msg_at (SE, loc, _("Summary function %s applies only to multiple "
720 "response sets."), function_name);
721 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
731 _("Summary function %s applies only to scale variables."),
733 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
743 struct ctables_summary_spec *dst = &axis->summaries[axis->n_summaries++];
744 *dst = (struct ctables_summary_spec) {
745 .function = function,
746 .percentile = percentile,
747 .label = xstrdup (label),
748 .format = (format ? *format
749 : ctables_summary_default_format (function, &axis->var)),
755 for (size_t i = 0; i < 2; i++)
756 if (!add_summary_spec (axis->subs[i], function, percentile, label,
763 static struct ctables_axis *ctables_axis_parse_stack (
764 struct ctables_axis_parse_ctx *);
767 ctables_var_parse (struct lexer *lexer, struct dictionary *dict,
768 struct ctables_var *var)
770 if (ss_starts_with (lex_tokss (lexer), ss_cstr ("$")))
772 *var = (struct ctables_var) {
774 .mrset = dict_lookup_mrset (dict, lex_tokcstr (lexer))
778 lex_error (lexer, _("'%s' does not name a multiple-response set "
779 "in the active file dictionary."),
780 lex_tokcstr (lexer));
788 *var = (struct ctables_var) {
790 .var = parse_variable (lexer, dict),
792 return var->var != NULL;
796 static struct ctables_axis *
797 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
799 if (lex_match (ctx->lexer, T_LPAREN))
801 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
802 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
804 ctables_axis_destroy (sub);
810 if (!lex_force_id (ctx->lexer))
813 int start_ofs = lex_ofs (ctx->lexer);
814 struct ctables_var var;
815 if (!ctables_var_parse (ctx->lexer, ctx->dict, &var))
818 struct ctables_axis *axis = xmalloc (sizeof *axis);
819 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
821 /* XXX should figure out default measures by reading data */
822 axis->scale = (var.is_mrset ? false
823 : lex_match_phrase (ctx->lexer, "[S]") ? true
824 : lex_match_phrase (ctx->lexer, "[C]") ? false
825 : var_get_measure (var.var) == MEASURE_SCALE);
826 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
827 lex_ofs (ctx->lexer) - 1);
831 static struct ctables_axis *
832 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
834 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
835 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
840 int start_ofs = lex_ofs (ctx->lexer);
842 /* Parse function. */
843 enum ctables_summary_function function;
844 if (!parse_ctables_summary_function (ctx->lexer, &function))
847 /* Parse percentile. */
848 double percentile = 0;
849 if (function == CTSF_PTILE)
851 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
853 percentile = lex_number (ctx->lexer);
854 lex_get (ctx->lexer);
859 if (lex_is_string (ctx->lexer))
861 label = ss_xstrdup (lex_tokss (ctx->lexer));
862 lex_get (ctx->lexer);
865 label = ctables_summary_default_label (function, percentile);
868 struct fmt_spec format;
869 const struct fmt_spec *formatp;
870 if (lex_token (ctx->lexer) == T_ID)
872 if (!parse_format_specifier (ctx->lexer, &format)
873 || !fmt_check_output (&format)
874 || !fmt_check_type_compat (&format, VAL_NUMERIC))
884 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
885 lex_ofs (ctx->lexer) - 1);
886 add_summary_spec (sub, function, percentile, label, formatp, loc);
888 msg_location_destroy (loc);
890 lex_match (ctx->lexer, T_COMMA);
892 while (!lex_match (ctx->lexer, T_RBRACK));
897 ctables_axis_destroy (sub);
901 static const struct ctables_axis *
902 find_scale (const struct ctables_axis *axis)
906 else if (axis->op == CTAO_VAR)
910 assert (!axis->var.is_mrset);
918 for (size_t i = 0; i < 2; i++)
920 const struct ctables_axis *scale = find_scale (axis->subs[i]);
928 static const struct ctables_axis *
929 find_categorical_summary_spec (const struct ctables_axis *axis)
933 else if (axis->op == CTAO_VAR)
934 return !axis->scale && axis->n_summaries ? axis : NULL;
937 for (size_t i = 0; i < 2; i++)
939 const struct ctables_axis *sum
940 = find_categorical_summary_spec (axis->subs[i]);
948 static struct ctables_axis *
949 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
951 int start_ofs = lex_ofs (ctx->lexer);
952 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
956 while (lex_match (ctx->lexer, T_GT))
958 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
962 struct ctables_axis *nest = ctables_axis_new_nonterminal (
963 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
965 const struct ctables_axis *outer_scale = find_scale (lhs);
966 const struct ctables_axis *inner_scale = find_scale (rhs);
967 if (outer_scale && inner_scale)
969 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
970 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
971 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
972 ctables_axis_destroy (nest);
976 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
979 msg_at (SE, nest->loc,
980 _("Summaries may only be requested for categorical variables "
981 "at the innermost nesting level."));
982 msg_at (SN, outer_sum->loc,
983 _("This outer categorical variable has a summary."));
984 ctables_axis_destroy (nest);
994 static struct ctables_axis *
995 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
997 int start_ofs = lex_ofs (ctx->lexer);
998 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1002 while (lex_match (ctx->lexer, T_PLUS))
1004 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1008 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1009 ctx->lexer, start_ofs);
1016 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1017 struct ctables *ct, struct ctables_table *t,
1018 enum pivot_axis_type a)
1020 if (lex_token (lexer) == T_BY
1021 || lex_token (lexer) == T_SLASH
1022 || lex_token (lexer) == T_ENDCMD)
1025 struct ctables_axis_parse_ctx ctx = {
1031 t->axes[a] = ctables_axis_parse_stack (&ctx);
1032 return t->axes[a] != NULL;
1036 ctables_chisq_destroy (struct ctables_chisq *chisq)
1042 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1048 ctables_table_destroy (struct ctables_table *t)
1053 for (size_t i = 0; i < t->n_categories; i++)
1054 ctables_categories_unref (t->categories[i]);
1055 free (t->categories);
1057 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1058 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1059 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1063 ctables_chisq_destroy (t->chisq);
1064 ctables_pairwise_destroy (t->pairwise);
1069 ctables_destroy (struct ctables *ct)
1074 pivot_table_look_unref (ct->look);
1078 for (size_t i = 0; i < ct->n_tables; i++)
1079 ctables_table_destroy (ct->tables[i]);
1084 static struct ctables_cat_value
1085 ccvt_range (double low, double high)
1087 return (struct ctables_cat_value) {
1089 .range = { low, high }
1094 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1095 struct ctables_table *t)
1097 if (!lex_match_id (lexer, "VARIABLES"))
1099 lex_match (lexer, T_EQUALS);
1101 struct variable **vars;
1103 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1106 struct ctables_categories *c = xmalloc (sizeof *c);
1107 *c = (struct ctables_categories) { .n_refs = n_vars };
1108 for (size_t i = 0; i < n_vars; i++)
1110 struct ctables_categories **cp
1111 = &t->categories[var_get_dict_index (vars[i])];
1112 ctables_categories_unref (*cp);
1117 if (lex_match (lexer, T_LBRACK))
1119 size_t allocated_values = 0;
1122 if (c->n_values >= allocated_values)
1123 c->values = x2nrealloc (c->values, &allocated_values,
1126 struct ctables_cat_value *v = &c->values[c->n_values];
1127 if (lex_match_id (lexer, "OTHERNM"))
1128 v->type = CCVT_OTHERNM;
1129 else if (lex_match_id (lexer, "MISSING"))
1130 v->type = CCVT_MISSING;
1131 else if (lex_match_id (lexer, "SUBTOTAL"))
1132 *v = (struct ctables_cat_value)
1133 { .type = CCVT_SUBTOTAL, .subtotal_label = NULL };
1134 else if (lex_match_id (lexer, "HSUBTOTAL"))
1135 *v = (struct ctables_cat_value)
1136 { .type = CCVT_HSUBTOTAL, .subtotal_label = NULL };
1137 else if (lex_match_id (lexer, "LO"))
1139 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
1141 *v = ccvt_range (-DBL_MAX, lex_number (lexer));
1144 else if (lex_is_number (lexer))
1146 double number = lex_number (lexer);
1148 if (lex_match_id (lexer, "THRU"))
1150 v->type = CCVT_RANGE;
1151 v->range[0] = number;
1152 if (lex_match_id (lexer, "HI"))
1153 *v = ccvt_range (number, DBL_MAX);
1156 if (!lex_force_num (lexer))
1158 *v = ccvt_range (number, lex_number (lexer));
1163 *v = (struct ctables_cat_value) {
1164 .type = CCVT_NUMBER,
1168 else if (lex_is_string (lexer))
1170 *v = (struct ctables_cat_value) {
1171 .type = CCVT_STRING,
1172 .string = ss_xstrdup (lex_tokss (lexer)),
1178 lex_error (lexer, NULL);
1182 if ((v->type == CCVT_SUBTOTAL || v->type == CCVT_HSUBTOTAL)
1183 && lex_match (lexer, T_EQUALS))
1185 if (!lex_force_string (lexer))
1188 v->subtotal_label = ss_xstrdup (lex_tokss (lexer));
1193 lex_match (lexer, T_COMMA);
1195 while (!lex_match (lexer, T_RBRACK));
1198 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1200 if (!c->n_values && lex_match_id (lexer, "ORDER"))
1202 lex_match (lexer, T_EQUALS);
1203 if (lex_match_id (lexer, "A"))
1204 c->sort_ascending = true;
1205 else if (lex_match_id (lexer, "D"))
1206 c->sort_ascending = false;
1209 lex_error_expecting (lexer, "A", "D");
1213 else if (!c->n_values && lex_match_id (lexer, "KEY"))
1215 lex_match (lexer, T_EQUALS);
1216 if (lex_match_id (lexer, "VALUE"))
1217 c->key = CTCS_VALUE;
1218 else if (lex_match_id (lexer, "LABEL"))
1219 c->key = CTCS_LABEL;
1222 c->key = CTCS_FUNCTION;
1223 if (!parse_ctables_summary_function (lexer, &c->sort_func))
1226 if (lex_match (lexer, T_LPAREN))
1228 c->sort_func_var = parse_variable (lexer, dict);
1229 if (!c->sort_func_var)
1232 if (c->sort_func == CTSF_PTILE)
1234 lex_match (lexer, T_COMMA);
1235 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1237 c->percentile = lex_number (lexer);
1241 if (!lex_force_match (lexer, T_RPAREN))
1244 else if (ctables_function_availability (c->sort_func)
1247 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1252 else if (!c->n_values && lex_match_id (lexer, "MISSING"))
1254 lex_match (lexer, T_EQUALS);
1255 if (lex_match_id (lexer, "INCLUDE"))
1256 c->include_missing = true;
1257 else if (lex_match_id (lexer, "EXCLUDE"))
1258 c->include_missing = false;
1261 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1265 else if (lex_match_id (lexer, "TOTAL"))
1267 lex_match (lexer, T_EQUALS);
1268 if (!parse_bool (lexer, &c->show_totals))
1271 else if (lex_match_id (lexer, "LABEL"))
1273 lex_match (lexer, T_EQUALS);
1274 if (!lex_force_string (lexer))
1276 free (c->total_label);
1277 c->total_label = ss_xstrdup (lex_tokss (lexer));
1280 else if (lex_match_id (lexer, "POSITION"))
1282 lex_match (lexer, T_EQUALS);
1283 if (lex_match_id (lexer, "BEFORE"))
1284 c->totals_before = true;
1285 else if (lex_match_id (lexer, "AFTER"))
1286 c->totals_before = false;
1289 lex_error_expecting (lexer, "BEFORE", "AFTER");
1293 else if (lex_match_id (lexer, "EMPTY"))
1295 lex_match (lexer, T_EQUALS);
1296 if (lex_match_id (lexer, "INCLUDE"))
1297 c->show_empty = true;
1298 else if (lex_match_id (lexer, "EXCLUDE"))
1299 c->show_empty = false;
1302 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1309 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
1310 "TOTAL", "LABEL", "POSITION", "EMPTY");
1312 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
1320 var_array_uninit (struct var_array *va)
1327 var_array2_uninit (struct var_array2 *vaa)
1331 for (size_t i = 0; i < vaa->n; i++)
1332 var_array_uninit (&vaa->vas[i]);
1337 static struct var_array2
1338 nest_fts (struct var_array2 va0, struct var_array2 va1)
1345 struct var_array2 vaa = { .vas = xnmalloc (va0.n, va1.n * sizeof *vaa.vas) };
1346 for (size_t i = 0; i < va0.n; i++)
1347 for (size_t j = 0; j < va1.n; j++)
1349 const struct var_array *a = &va0.vas[i];
1350 const struct var_array *b = &va1.vas[j];
1352 size_t allocate = a->n + b->n;
1353 struct variable **vars = xnmalloc (allocate, sizeof *vars);
1354 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
1356 for (size_t k = 0; k < a->n; k++)
1357 vars[n++] = a->vars[k];
1358 for (size_t k = 0; k < b->n; k++)
1359 vars[n++] = b->vars[k];
1360 assert (n == allocate);
1362 const struct var_array *summary_src;
1363 if (!a->summary_var)
1365 else if (!b->summary_var)
1369 vaa.vas[vaa.n++] = (struct var_array) {
1371 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
1372 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
1375 .summaries = summary_src->summaries,
1376 .n_summaries = summary_src->n_summaries,
1377 .summary_var = summary_src->summary_var,
1380 var_array2_uninit (&va0);
1381 var_array2_uninit (&va1);
1385 static struct var_array2
1386 stack_fts (struct var_array2 va0, struct var_array2 va1)
1388 struct var_array2 vaa = { .vas = xnmalloc (va0.n + va1.n, sizeof *vaa.vas) };
1389 for (size_t i = 0; i < va0.n; i++)
1390 vaa.vas[vaa.n++] = va0.vas[i];
1391 for (size_t i = 0; i < va1.n; i++)
1392 vaa.vas[vaa.n++] = va1.vas[i];
1393 assert (vaa.n == va0.n + va1.n);
1399 static struct var_array2
1400 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
1403 return (struct var_array2) { .n = 0 };
1408 assert (!a->var.is_mrset);
1410 struct variable **vars = xmalloc (sizeof *vars);
1413 struct var_array *va = xmalloc (sizeof *va);
1414 *va = (struct var_array) {
1417 .scale_idx = a->scale ? 0 : SIZE_MAX,
1419 if (a->n_summaries || a->scale)
1421 va->summaries = a->summaries;
1422 va->n_summaries = a->n_summaries;
1423 va->summary_var = a->var.var;
1425 return (struct var_array2) { .vas = va, .n = 1 };
1428 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
1429 enumerate_fts (axis_type, a->subs[1]));
1432 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
1433 enumerate_fts (axis_type, a->subs[1]));
1439 union ctables_summary
1441 /* COUNT, VALIDN, TOTALN. */
1448 /* MINIMUM, MAXIMUM, RANGE. */
1455 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
1456 struct moments1 *moments;
1458 /* XXX percentiles, median, mode, multiple response */
1462 ctables_summary_init (union ctables_summary *s,
1463 const struct ctables_summary_spec *ss)
1465 switch (ss->function)
1469 case CTSF_ROWPCT_COUNT:
1470 case CTSF_COLPCT_COUNT:
1471 case CTSF_TABLEPCT_COUNT:
1472 case CTSF_SUBTABLEPCT_COUNT:
1473 case CTSF_LAYERPCT_COUNT:
1474 case CTSF_LAYERROWPCT_COUNT:
1475 case CTSF_LAYERCOLPCT_COUNT:
1476 case CTSF_ROWPCT_VALIDN:
1477 case CTSF_COLPCT_VALIDN:
1478 case CTSF_TABLEPCT_VALIDN:
1479 case CTSF_SUBTABLEPCT_VALIDN:
1480 case CTSF_LAYERPCT_VALIDN:
1481 case CTSF_LAYERROWPCT_VALIDN:
1482 case CTSF_LAYERCOLPCT_VALIDN:
1483 case CTSF_ROWPCT_TOTALN:
1484 case CTSF_COLPCT_TOTALN:
1485 case CTSF_TABLEPCT_TOTALN:
1486 case CTSF_SUBTABLEPCT_TOTALN:
1487 case CTSF_LAYERPCT_TOTALN:
1488 case CTSF_LAYERROWPCT_TOTALN:
1489 case CTSF_LAYERCOLPCT_TOTALN:
1494 s->missing = s->valid = 0;
1500 s->min = s->max = SYSMIS;
1508 case CTSF_ROWPCT_SUM:
1509 case CTSF_COLPCT_SUM:
1510 case CTSF_TABLEPCT_SUM:
1511 case CTSF_SUBTABLEPCT_SUM:
1512 case CTSF_LAYERPCT_SUM:
1513 case CTSF_LAYERROWPCT_SUM:
1514 case CTSF_LAYERCOLPCT_SUM:
1515 s->moments = moments1_create (MOMENT_VARIANCE);
1524 case CTSF_RESPONSES:
1525 case CTSF_ROWPCT_RESPONSES:
1526 case CTSF_COLPCT_RESPONSES:
1527 case CTSF_TABLEPCT_RESPONSES:
1528 case CTSF_SUBTABLEPCT_RESPONSES:
1529 case CTSF_LAYERPCT_RESPONSES:
1530 case CTSF_LAYERROWPCT_RESPONSES:
1531 case CTSF_LAYERCOLPCT_RESPONSES:
1532 case CTSF_ROWPCT_RESPONSES_COUNT:
1533 case CTSF_COLPCT_RESPONSES_COUNT:
1534 case CTSF_TABLEPCT_RESPONSES_COUNT:
1535 case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
1536 case CTSF_LAYERPCT_RESPONSES_COUNT:
1537 case CTSF_LAYERROWPCT_RESPONSES_COUNT:
1538 case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
1539 case CTSF_ROWPCT_COUNT_RESPONSES:
1540 case CTSF_COLPCT_COUNT_RESPONSES:
1541 case CTSF_TABLEPCT_COUNT_RESPONSES:
1542 case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
1543 case CTSF_LAYERPCT_COUNT_RESPONSES:
1544 case CTSF_LAYERROWPCT_COUNT_RESPONSES:
1545 case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
1551 ctables_summary_uninit (union ctables_summary *s,
1552 const struct ctables_summary_spec *ss)
1554 switch (ss->function)
1558 case CTSF_ROWPCT_COUNT:
1559 case CTSF_COLPCT_COUNT:
1560 case CTSF_TABLEPCT_COUNT:
1561 case CTSF_SUBTABLEPCT_COUNT:
1562 case CTSF_LAYERPCT_COUNT:
1563 case CTSF_LAYERROWPCT_COUNT:
1564 case CTSF_LAYERCOLPCT_COUNT:
1565 case CTSF_ROWPCT_VALIDN:
1566 case CTSF_COLPCT_VALIDN:
1567 case CTSF_TABLEPCT_VALIDN:
1568 case CTSF_SUBTABLEPCT_VALIDN:
1569 case CTSF_LAYERPCT_VALIDN:
1570 case CTSF_LAYERROWPCT_VALIDN:
1571 case CTSF_LAYERCOLPCT_VALIDN:
1572 case CTSF_ROWPCT_TOTALN:
1573 case CTSF_COLPCT_TOTALN:
1574 case CTSF_TABLEPCT_TOTALN:
1575 case CTSF_SUBTABLEPCT_TOTALN:
1576 case CTSF_LAYERPCT_TOTALN:
1577 case CTSF_LAYERROWPCT_TOTALN:
1578 case CTSF_LAYERCOLPCT_TOTALN:
1595 case CTSF_ROWPCT_SUM:
1596 case CTSF_COLPCT_SUM:
1597 case CTSF_TABLEPCT_SUM:
1598 case CTSF_SUBTABLEPCT_SUM:
1599 case CTSF_LAYERPCT_SUM:
1600 case CTSF_LAYERROWPCT_SUM:
1601 case CTSF_LAYERCOLPCT_SUM:
1602 moments1_destroy (s->moments);
1611 case CTSF_RESPONSES:
1612 case CTSF_ROWPCT_RESPONSES:
1613 case CTSF_COLPCT_RESPONSES:
1614 case CTSF_TABLEPCT_RESPONSES:
1615 case CTSF_SUBTABLEPCT_RESPONSES:
1616 case CTSF_LAYERPCT_RESPONSES:
1617 case CTSF_LAYERROWPCT_RESPONSES:
1618 case CTSF_LAYERCOLPCT_RESPONSES:
1619 case CTSF_ROWPCT_RESPONSES_COUNT:
1620 case CTSF_COLPCT_RESPONSES_COUNT:
1621 case CTSF_TABLEPCT_RESPONSES_COUNT:
1622 case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
1623 case CTSF_LAYERPCT_RESPONSES_COUNT:
1624 case CTSF_LAYERROWPCT_RESPONSES_COUNT:
1625 case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
1626 case CTSF_ROWPCT_COUNT_RESPONSES:
1627 case CTSF_COLPCT_COUNT_RESPONSES:
1628 case CTSF_TABLEPCT_COUNT_RESPONSES:
1629 case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
1630 case CTSF_LAYERPCT_COUNT_RESPONSES:
1631 case CTSF_LAYERROWPCT_COUNT_RESPONSES:
1632 case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
1638 ctables_summary_add (union ctables_summary *s,
1639 const struct ctables_summary_spec *ss,
1640 const struct variable *var, const union value *value,
1643 switch (ss->function)
1647 case CTSF_ROWPCT_COUNT:
1648 case CTSF_COLPCT_COUNT:
1649 case CTSF_TABLEPCT_COUNT:
1650 case CTSF_SUBTABLEPCT_COUNT:
1651 case CTSF_LAYERPCT_COUNT:
1652 case CTSF_LAYERROWPCT_COUNT:
1653 case CTSF_LAYERCOLPCT_COUNT:
1654 case CTSF_ROWPCT_VALIDN:
1655 case CTSF_COLPCT_VALIDN:
1656 case CTSF_TABLEPCT_VALIDN:
1657 case CTSF_SUBTABLEPCT_VALIDN:
1658 case CTSF_LAYERPCT_VALIDN:
1659 case CTSF_LAYERROWPCT_VALIDN:
1660 case CTSF_LAYERCOLPCT_VALIDN:
1661 case CTSF_ROWPCT_TOTALN:
1662 case CTSF_COLPCT_TOTALN:
1663 case CTSF_TABLEPCT_TOTALN:
1664 case CTSF_SUBTABLEPCT_TOTALN:
1665 case CTSF_LAYERPCT_TOTALN:
1666 case CTSF_LAYERROWPCT_TOTALN:
1667 case CTSF_LAYERCOLPCT_TOTALN:
1672 if (var_is_value_missing (var, value))
1673 s->missing += weight;
1681 if (!var_is_value_missing (var, value))
1683 assert (!var_is_alpha (var)); /* XXX? */
1684 if (s->min == SYSMIS || value->f < s->min)
1686 if (s->max == SYSMIS || value->f > s->max)
1696 case CTSF_ROWPCT_SUM:
1697 case CTSF_COLPCT_SUM:
1698 case CTSF_TABLEPCT_SUM:
1699 case CTSF_SUBTABLEPCT_SUM:
1700 case CTSF_LAYERPCT_SUM:
1701 case CTSF_LAYERROWPCT_SUM:
1702 case CTSF_LAYERCOLPCT_SUM:
1703 moments1_add (s->moments, value->f, weight);
1712 case CTSF_RESPONSES:
1713 case CTSF_ROWPCT_RESPONSES:
1714 case CTSF_COLPCT_RESPONSES:
1715 case CTSF_TABLEPCT_RESPONSES:
1716 case CTSF_SUBTABLEPCT_RESPONSES:
1717 case CTSF_LAYERPCT_RESPONSES:
1718 case CTSF_LAYERROWPCT_RESPONSES:
1719 case CTSF_LAYERCOLPCT_RESPONSES:
1720 case CTSF_ROWPCT_RESPONSES_COUNT:
1721 case CTSF_COLPCT_RESPONSES_COUNT:
1722 case CTSF_TABLEPCT_RESPONSES_COUNT:
1723 case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
1724 case CTSF_LAYERPCT_RESPONSES_COUNT:
1725 case CTSF_LAYERROWPCT_RESPONSES_COUNT:
1726 case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
1727 case CTSF_ROWPCT_COUNT_RESPONSES:
1728 case CTSF_COLPCT_COUNT_RESPONSES:
1729 case CTSF_TABLEPCT_COUNT_RESPONSES:
1730 case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
1731 case CTSF_LAYERPCT_COUNT_RESPONSES:
1732 case CTSF_LAYERROWPCT_COUNT_RESPONSES:
1733 case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
1739 ctables_summary_value (const struct ctables_freq *f,
1740 union ctables_summary *s,
1741 const struct ctables_summary_spec *ss)
1743 switch (ss->function)
1749 case CTSF_SUBTABLEPCT_COUNT:
1750 return f->subtable->valid ? s->valid / f->subtable->valid * 100 : SYSMIS;
1752 case CTSF_ROWPCT_COUNT:
1753 case CTSF_COLPCT_COUNT:
1754 case CTSF_TABLEPCT_COUNT:
1755 case CTSF_LAYERPCT_COUNT:
1756 case CTSF_LAYERROWPCT_COUNT:
1757 case CTSF_LAYERCOLPCT_COUNT:
1758 case CTSF_ROWPCT_VALIDN:
1759 case CTSF_COLPCT_VALIDN:
1760 case CTSF_TABLEPCT_VALIDN:
1761 case CTSF_SUBTABLEPCT_VALIDN:
1762 case CTSF_LAYERPCT_VALIDN:
1763 case CTSF_LAYERROWPCT_VALIDN:
1764 case CTSF_LAYERCOLPCT_VALIDN:
1765 case CTSF_ROWPCT_TOTALN:
1766 case CTSF_COLPCT_TOTALN:
1767 case CTSF_TABLEPCT_TOTALN:
1768 case CTSF_SUBTABLEPCT_TOTALN:
1769 case CTSF_LAYERPCT_TOTALN:
1770 case CTSF_LAYERROWPCT_TOTALN:
1771 case CTSF_LAYERCOLPCT_TOTALN:
1776 return s->valid + s->missing;
1789 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
1794 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
1800 double weight, variance;
1801 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
1802 return calc_semean (variance, weight);
1808 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
1809 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
1814 double weight, mean;
1815 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
1816 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
1822 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
1826 case CTSF_ROWPCT_SUM:
1827 case CTSF_COLPCT_SUM:
1828 case CTSF_TABLEPCT_SUM:
1829 case CTSF_SUBTABLEPCT_SUM:
1830 case CTSF_LAYERPCT_SUM:
1831 case CTSF_LAYERROWPCT_SUM:
1832 case CTSF_LAYERCOLPCT_SUM:
1841 case CTSF_RESPONSES:
1842 case CTSF_ROWPCT_RESPONSES:
1843 case CTSF_COLPCT_RESPONSES:
1844 case CTSF_TABLEPCT_RESPONSES:
1845 case CTSF_SUBTABLEPCT_RESPONSES:
1846 case CTSF_LAYERPCT_RESPONSES:
1847 case CTSF_LAYERROWPCT_RESPONSES:
1848 case CTSF_LAYERCOLPCT_RESPONSES:
1849 case CTSF_ROWPCT_RESPONSES_COUNT:
1850 case CTSF_COLPCT_RESPONSES_COUNT:
1851 case CTSF_TABLEPCT_RESPONSES_COUNT:
1852 case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
1853 case CTSF_LAYERPCT_RESPONSES_COUNT:
1854 case CTSF_LAYERROWPCT_RESPONSES_COUNT:
1855 case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
1856 case CTSF_ROWPCT_COUNT_RESPONSES:
1857 case CTSF_COLPCT_COUNT_RESPONSES:
1858 case CTSF_TABLEPCT_COUNT_RESPONSES:
1859 case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
1860 case CTSF_LAYERPCT_COUNT_RESPONSES:
1861 case CTSF_LAYERROWPCT_COUNT_RESPONSES:
1862 case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
1869 struct ctables_freq_sort_aux
1871 const struct ctables_table *t;
1872 enum pivot_axis_type a;
1876 ctables_freq_compare_3way (const void *a_, const void *b_, const void *aux_)
1878 const struct ctables_freq_sort_aux *aux = aux_;
1879 struct ctables_freq *const *ap = a_;
1880 struct ctables_freq *const *bp = b_;
1881 const struct ctables_freq *a = *ap;
1882 const struct ctables_freq *b = *bp;
1884 size_t a_idx = a->axes[aux->a].vaa_idx;
1885 size_t b_idx = b->axes[aux->a].vaa_idx;
1887 return a_idx < b_idx ? -1 : 1;
1889 const struct var_array *va = &aux->t->vaas[aux->a].vas[a_idx];
1890 for (size_t i = 0; i < va->n; i++)
1891 if (i != va->scale_idx)
1893 int cmp = value_compare_3way (&a->axes[aux->a].values[i],
1894 &b->axes[aux->a].values[i],
1895 var_get_width (va->vars[i]));
1905 For each ctables_table:
1906 For each combination of row vars:
1907 For each combination of column vars:
1908 For each combination of layer vars:
1910 Make a table of row values:
1911 Sort entries by row values
1912 Assign a 0-based index to each actual value
1913 Construct a dimension
1914 Make a table of column values
1915 Make a table of layer values
1917 Fill the table entry using the indexes from before.
1920 static struct ctables_subtable *
1921 ctables_subtable_insert (struct ctables_table *t, struct ctables_freq *f)
1924 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1926 size_t idx = f->axes[a].vaa_idx;
1927 const struct var_array *va = &t->vaas[a].vas[idx];
1928 hash = hash_int (idx, hash);
1929 for (size_t i = 0; i < va->n; i++)
1930 if (i != va->scale_idx && i != va->subtable_idx)
1931 hash = value_hash (&f->axes[a].values[i],
1932 var_get_width (va->vars[i]), hash);
1935 struct ctables_subtable *st;
1936 HMAP_FOR_EACH_WITH_HASH (st, struct ctables_subtable, node, hash, &t->subtables)
1938 const struct ctables_freq *stf = st->example;
1939 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1941 size_t idx = f->axes[a].vaa_idx;
1942 if (idx != stf->axes[a].vaa_idx)
1945 const struct var_array *va = &t->vaas[a].vas[idx];
1946 for (size_t i = 0; i < va->n; i++)
1947 if (i != va->scale_idx && i != va->subtable_idx
1948 && !value_equal (&stf->axes[a].values[i],
1949 &f->axes[a].values[i],
1950 var_get_width (va->vars[i])))
1959 st = xmalloc (sizeof *st);
1960 *st = (struct ctables_subtable) { .example = f };
1961 hmap_insert (&t->subtables, &st->node, hash);
1966 ctables_freqtab_insert (struct ctables_table *t,
1967 const struct ccase *c,
1968 size_t ir, size_t ic, size_t il,
1971 size_t ix[PIVOT_N_AXES] = {
1972 [PIVOT_AXIS_ROW] = ir,
1973 [PIVOT_AXIS_COLUMN] = ic,
1974 [PIVOT_AXIS_LAYER] = il,
1976 const struct var_array *ss = &t->vaas[t->summary_axis].vas[ix[t->summary_axis]];
1979 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1981 const struct var_array *va = &t->vaas[a].vas[ix[a]];
1982 hash = hash_int (ix[a], hash);
1983 for (size_t i = 0; i < va->n; i++)
1984 if (i != va->scale_idx)
1985 hash = value_hash (case_data (c, va->vars[i]),
1986 var_get_width (va->vars[i]), hash);
1989 struct ctables_freq *f;
1990 HMAP_FOR_EACH_WITH_HASH (f, struct ctables_freq, node, hash, &t->ft)
1992 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1994 const struct var_array *va = &t->vaas[a].vas[ix[a]];
1995 if (f->axes[a].vaa_idx != ix[a])
1997 for (size_t i = 0; i < va->n; i++)
1998 if (i != va->scale_idx
1999 && !value_equal (case_data (c, va->vars[i]),
2000 &f->axes[a].values[i],
2001 var_get_width (va->vars[i])))
2010 f = xmalloc (sizeof *f);
2011 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2013 const struct var_array *va = &t->vaas[a].vas[ix[a]];
2014 f->axes[a].vaa_idx = ix[a];
2015 f->axes[a].values = (va->n
2016 ? xnmalloc (va->n, sizeof *f->axes[a].values)
2018 for (size_t i = 0; i < va->n; i++)
2019 value_clone (&f->axes[a].values[i], case_data (c, va->vars[i]),
2020 var_get_width (va->vars[i]));
2022 f->summaries = xmalloc (ss->n_summaries * sizeof *f->summaries);
2023 for (size_t i = 0; i < ss->n_summaries; i++)
2024 ctables_summary_init (&f->summaries[i], &ss->summaries[i]);
2025 f->subtable = ctables_subtable_insert (t, f);
2026 hmap_insert (&t->ft, &f->node, hash);
2029 for (size_t i = 0; i < ss->n_summaries; i++)
2030 ctables_summary_add (&f->summaries[i], &ss->summaries[i], ss->summary_var,
2031 case_data (c, ss->summary_var), weight);
2032 f->subtable->valid += weight;
2036 ctables_execute (struct dataset *ds, struct ctables *ct)
2038 for (size_t i = 0; i < ct->n_tables; i++)
2040 struct ctables_table *t = ct->tables[i];
2041 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2044 t->vaas[a] = enumerate_fts (a, t->axes[a]);
2045 for (size_t j = 0; j < t->vaas[a].n; j++)
2047 struct var_array *va = &t->vaas[a].vas[j];
2048 va->subtable_idx = (
2049 a == PIVOT_AXIS_LAYER ? SIZE_MAX
2050 : va->n == 0 ? SIZE_MAX
2051 : va->scale_idx != va->n - 1 ? va->n - 1
2052 : va->n == 1 ? SIZE_MAX
2058 struct var_array *va = xmalloc (sizeof *va);
2059 *va = (struct var_array) { .n = 0 };
2060 t->vaas[a] = (struct var_array2) { .vas = va, .n = 1 };
2063 for (size_t i = 0; i < t->vaas[t->summary_axis].n; i++)
2065 struct var_array *va = &t->vaas[t->summary_axis].vas[i];
2066 if (!va->n_summaries)
2068 va->summaries = xmalloc (sizeof *va->summaries);
2069 va->n_summaries = 1;
2071 enum ctables_summary_function function
2072 = va->summary_var ? CTSF_MEAN : CTSF_COUNT;
2073 struct ctables_var var = { .is_mrset = false, .var = va->summary_var };
2075 *va->summaries = (struct ctables_summary_spec) {
2076 .function = function,
2077 .format = ctables_summary_default_format (function, &var),
2078 .label = ctables_summary_default_label (function, 0),
2080 if (!va->summary_var)
2081 va->summary_var = va->vars[0];
2086 struct casereader *input = casereader_create_filter_weight (proc_open (ds),
2089 bool warn_on_invalid = true;
2090 double total_weight = 0;
2091 for (struct ccase *c = casereader_read (input); c;
2092 case_unref (c), c = casereader_read (input))
2094 double weight = dict_get_case_weight (dataset_dict (ds), c,
2096 total_weight += weight;
2098 for (size_t i = 0; i < ct->n_tables; i++)
2100 struct ctables_table *t = ct->tables[i];
2102 for (size_t ir = 0; ir < t->vaas[PIVOT_AXIS_ROW].n; ir++)
2103 for (size_t ic = 0; ic < t->vaas[PIVOT_AXIS_COLUMN].n; ic++)
2104 for (size_t il = 0; il < t->vaas[PIVOT_AXIS_LAYER].n; il++)
2105 ctables_freqtab_insert (t, c, ir, ic, il, weight);
2108 casereader_destroy (input);
2110 for (size_t i = 0; i < ct->n_tables; i++)
2112 struct ctables_table *t = ct->tables[i];
2114 struct pivot_table *pt = pivot_table_create__ (
2116 ? pivot_value_new_user_text (t->title, SIZE_MAX)
2117 : pivot_value_new_text (N_("Custom Tables"))),
2120 pivot_table_set_caption (
2121 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
2123 pivot_table_set_caption (
2124 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
2126 pivot_table_set_look (pt, ct->look);
2127 struct pivot_dimension *d[PIVOT_N_AXES];
2128 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2130 static const char *names[] = {
2131 [PIVOT_AXIS_ROW] = N_("Rows"),
2132 [PIVOT_AXIS_COLUMN] = N_("Columns"),
2133 [PIVOT_AXIS_LAYER] = N_("Layers"),
2135 d[a] = (t->axes[a] || a == t->summary_axis
2136 ? pivot_dimension_create (pt, a, names[a])
2141 assert (t->axes[a]);
2143 struct ctables_freq **sorted = xnmalloc (t->ft.count, sizeof *sorted);
2145 struct ctables_freq *f;
2147 HMAP_FOR_EACH (f, struct ctables_freq, node, &t->ft)
2149 assert (n == t->ft.count);
2151 struct ctables_freq_sort_aux aux = { .t = t, .a = a };
2152 sort (sorted, n, sizeof *sorted, ctables_freq_compare_3way, &aux);
2154 size_t max_depth = 0;
2155 for (size_t j = 0; j < t->vaas[a].n; j++)
2156 if (t->vaas[a].vas[j].n > max_depth)
2157 max_depth = t->vaas[a].vas[j].n;
2159 struct pivot_category **groups = xnmalloc (max_depth, sizeof *groups);
2160 struct pivot_category *top = NULL;
2162 for (size_t j = 0; j < n; j++)
2164 struct ctables_freq *f = sorted[j];
2165 const struct var_array *va = &t->vaas[a].vas[f->axes[a].vaa_idx];
2167 size_t n_common = 0;
2168 bool new_subtable = false;
2171 struct ctables_freq *prev = sorted[j - 1];
2172 if (prev->axes[a].vaa_idx == f->axes[a].vaa_idx)
2174 for (; n_common < va->n; n_common++)
2175 if (n_common != va->scale_idx
2176 && !value_equal (&prev->axes[a].values[n_common],
2177 &f->axes[a].values[n_common],
2178 var_get_type (va->vars[n_common])))
2182 new_subtable = true;
2185 new_subtable = true;
2189 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (va->vars[0])];
2191 if (vlabel != CTVL_NONE)
2192 top = pivot_category_create_group__ (
2193 top, pivot_value_new_variable (va->vars[0]));
2195 if (n_common == va->n)
2197 f->axes[a].leaf = prev_leaf;
2201 for (size_t k = n_common; k < va->n; k++)
2203 struct pivot_category *parent = k > 0 ? groups[k - 1] : top;
2205 struct pivot_value *label
2206 = (k != va->scale_idx
2207 ? pivot_value_new_var_value (va->vars[k],
2208 &f->axes[a].values[k])
2212 if (a == t->summary_axis)
2215 parent = pivot_category_create_group__ (parent, label);
2216 for (size_t m = 0; m < va->n_summaries; m++)
2218 int leaf = pivot_category_create_leaf (
2219 parent, pivot_value_new_text (va->summaries[m].label));
2226 /* This assertion is true as long as the summary axis
2227 is the axis where the summaries are displayed. */
2230 prev_leaf = pivot_category_create_leaf (parent, label);
2236 parent = pivot_category_create_group__ (parent, label);
2238 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (va->vars[k + 1])];
2239 if (vlabel != CTVL_NONE)
2240 parent = pivot_category_create_group__ (
2241 parent, pivot_value_new_variable (va->vars[k + 1]));
2245 f->axes[a].leaf = prev_leaf;
2250 struct ctables_freq *f;
2251 HMAP_FOR_EACH (f, struct ctables_freq, node, &t->ft)
2253 const struct var_array *ss = &t->vaas[t->summary_axis].vas[f->axes[t->summary_axis].vaa_idx];
2254 for (size_t j = 0; j < ss->n_summaries; j++)
2257 size_t n_dindexes = 0;
2259 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2262 int leaf = f->axes[a].leaf;
2263 if (a == t->summary_axis)
2265 dindexes[n_dindexes++] = leaf;
2268 double d = ctables_summary_value (f, &f->summaries[j], &ss->summaries[j]);
2269 struct pivot_value *value = pivot_value_new_number (d);
2270 value->numeric.format = ss->summaries[j].format;
2271 pivot_table_put (pt, dindexes, n_dindexes, value);
2275 pivot_table_submit (pt);
2279 for (size_t i = 0; i < ct->n_tables; i++)
2281 struct ctables_table *t = ct->tables[i];
2283 for (size_t j = 0; j < t->n_fts; j++)
2285 struct ctables_freqtab *ft = t->fts[j];
2286 struct ctables_freq *f, *next;
2287 HMAP_FOR_EACH_SAFE (f, next, struct ctables_freq, node, &ft->data)
2289 hmap_delete (&ft->data, &f->node);
2290 for (size_t k = 0; k < ft->n_summaries; k++)
2291 ctables_summary_uninit (&f->summaries[k], &ft->summaries[k]);
2292 free (f->summaries);
2293 for (size_t k = 0; k < ft->vars.n; k++)
2295 const struct variable *var = ft->vars.vars[k];
2296 value_destroy (&f->values[k], var_get_width (var));
2300 hmap_destroy (&ft->data);
2301 var_array_uninit (&ft->vars);
2308 return proc_commit (ds);
2312 cmd_ctables (struct lexer *lexer, struct dataset *ds)
2314 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
2315 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
2316 enum settings_value_show tvars = settings_get_show_variables ();
2317 for (size_t i = 0; i < n_vars; i++)
2318 vlabels[i] = (enum ctables_vlabel) tvars;
2320 struct ctables *ct = xmalloc (sizeof *ct);
2321 *ct = (struct ctables) {
2322 .look = pivot_table_look_unshare (pivot_table_look_ref (
2323 pivot_table_look_get_default ())),
2325 .hide_threshold = 5,
2327 ct->look->omit_empty = false;
2329 if (!lex_force_match (lexer, T_SLASH))
2332 while (!lex_match_id (lexer, "TABLE"))
2334 if (lex_match_id (lexer, "FORMAT"))
2336 double widths[2] = { SYSMIS, SYSMIS };
2337 double units_per_inch = 72.0;
2339 while (lex_token (lexer) != T_SLASH)
2341 if (lex_match_id (lexer, "MINCOLWIDTH"))
2343 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
2346 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
2348 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
2351 else if (lex_match_id (lexer, "UNITS"))
2353 lex_match (lexer, T_EQUALS);
2354 if (lex_match_id (lexer, "POINTS"))
2355 units_per_inch = 72.0;
2356 else if (lex_match_id (lexer, "INCHES"))
2357 units_per_inch = 1.0;
2358 else if (lex_match_id (lexer, "CM"))
2359 units_per_inch = 2.54;
2362 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
2366 else if (lex_match_id (lexer, "EMPTY"))
2371 lex_match (lexer, T_EQUALS);
2372 if (lex_match_id (lexer, "ZERO"))
2374 /* Nothing to do. */
2376 else if (lex_match_id (lexer, "BLANK"))
2377 ct->zero = xstrdup ("");
2378 else if (lex_force_string (lexer))
2380 ct->zero = ss_xstrdup (lex_tokss (lexer));
2386 else if (lex_match_id (lexer, "MISSING"))
2388 lex_match (lexer, T_EQUALS);
2389 if (!lex_force_string (lexer))
2393 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
2394 ? ss_xstrdup (lex_tokss (lexer))
2400 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
2401 "UNITS", "EMPTY", "MISSING");
2406 if (widths[0] != SYSMIS && widths[1] != SYSMIS
2407 && widths[0] > widths[1])
2409 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
2413 for (size_t i = 0; i < 2; i++)
2414 if (widths[i] != SYSMIS)
2416 int *wr = ct->look->width_ranges[TABLE_HORZ];
2417 wr[i] = widths[i] / units_per_inch * 96.0;
2422 else if (lex_match_id (lexer, "VLABELS"))
2424 if (!lex_force_match_id (lexer, "VARIABLES"))
2426 lex_match (lexer, T_EQUALS);
2428 struct variable **vars;
2430 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
2434 if (!lex_force_match_id (lexer, "DISPLAY"))
2439 lex_match (lexer, T_EQUALS);
2441 enum ctables_vlabel vlabel;
2442 if (lex_match_id (lexer, "DEFAULT"))
2443 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
2444 else if (lex_match_id (lexer, "NAME"))
2446 else if (lex_match_id (lexer, "LABEL"))
2447 vlabel = CTVL_LABEL;
2448 else if (lex_match_id (lexer, "BOTH"))
2450 else if (lex_match_id (lexer, "NONE"))
2454 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
2460 for (size_t i = 0; i < n_vars; i++)
2461 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
2464 else if (lex_match_id (lexer, "MRSETS"))
2466 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
2468 lex_match (lexer, T_EQUALS);
2469 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
2472 else if (lex_match_id (lexer, "SMISSING"))
2474 if (lex_match_id (lexer, "VARIABLE"))
2475 ct->smissing_listwise = false;
2476 else if (lex_match_id (lexer, "LISTWISE"))
2477 ct->smissing_listwise = true;
2480 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
2485 else if (lex_match_id (lexer, "WEIGHT"))
2487 if (!lex_force_match_id (lexer, "VARIABLE"))
2489 lex_match (lexer, T_EQUALS);
2490 ct->base_weight = parse_variable (lexer, dataset_dict (ds));
2491 if (!ct->base_weight)
2494 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
2496 if (!lex_force_match_id (lexer, "COUNT"))
2498 lex_match (lexer, T_EQUALS);
2499 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT", 2, INT_MAX))
2501 ct->hide_threshold = lex_integer (lexer);
2506 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
2507 "SMISSING", "PCOMPUTE", "PPROPERTIES",
2508 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
2512 if (!lex_force_match (lexer, T_SLASH))
2516 size_t allocated_tables = 0;
2519 if (ct->n_tables >= allocated_tables)
2520 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
2521 sizeof *ct->tables);
2523 struct ctables_table *t = xmalloc (sizeof *t);
2524 *t = (struct ctables_table) {
2525 .ft = HMAP_INITIALIZER (t->ft),
2526 .subtables = HMAP_INITIALIZER (t->subtables),
2527 .slabels_position = PIVOT_AXIS_COLUMN,
2528 .slabels_visible = true,
2529 .row_labels = CTLP_NORMAL,
2530 .col_labels = CTLP_NORMAL,
2531 .categories = xcalloc (dict_get_n_vars (dataset_dict (ds)),
2532 sizeof *t->categories),
2533 .n_categories = dict_get_n_vars (dataset_dict (ds)),
2536 ct->tables[ct->n_tables++] = t;
2538 lex_match (lexer, T_EQUALS);
2539 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
2541 if (lex_match (lexer, T_BY))
2543 if (!ctables_axis_parse (lexer, dataset_dict (ds),
2544 ct, t, PIVOT_AXIS_COLUMN))
2547 if (lex_match (lexer, T_BY))
2549 if (!ctables_axis_parse (lexer, dataset_dict (ds),
2550 ct, t, PIVOT_AXIS_LAYER))
2555 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
2556 && !t->axes[PIVOT_AXIS_LAYER])
2558 lex_error (lexer, _("At least one variable must be specified."));
2562 const struct ctables_axis *scales[PIVOT_N_AXES];
2563 size_t n_scales = 0;
2564 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2566 scales[a] = find_scale (t->axes[a]);
2572 msg (SE, _("Scale variables may appear only on one axis."));
2573 if (scales[PIVOT_AXIS_ROW])
2574 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
2575 _("This scale variable appears on the rows axis."));
2576 if (scales[PIVOT_AXIS_COLUMN])
2577 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
2578 _("This scale variable appears on the columns axis."));
2579 if (scales[PIVOT_AXIS_LAYER])
2580 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
2581 _("This scale variable appears on the layer axis."));
2585 const struct ctables_axis *summaries[PIVOT_N_AXES];
2586 size_t n_summaries = 0;
2587 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2589 summaries[a] = (scales[a]
2591 : find_categorical_summary_spec (t->axes[a]));
2595 if (n_summaries > 1)
2597 msg (SE, _("Summaries may appear only on one axis."));
2598 if (summaries[PIVOT_AXIS_ROW])
2599 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
2600 _("This variable on the rows axis has a summary."));
2601 if (summaries[PIVOT_AXIS_COLUMN])
2602 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
2603 _("This variable on the columns axis has a summary."));
2604 if (summaries[PIVOT_AXIS_LAYER])
2605 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
2606 _("This variable on the layers axis has a summary."));
2609 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2610 if (n_summaries ? summaries[a] : t->axes[a])
2612 t->summary_axis = a;
2616 if (lex_token (lexer) == T_ENDCMD)
2618 if (!lex_force_match (lexer, T_SLASH))
2621 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
2623 if (lex_match_id (lexer, "SLABELS"))
2625 while (lex_token (lexer) != T_SLASH)
2627 if (lex_match_id (lexer, "POSITION"))
2629 lex_match (lexer, T_EQUALS);
2630 if (lex_match_id (lexer, "COLUMN"))
2631 t->slabels_position = PIVOT_AXIS_COLUMN;
2632 else if (lex_match_id (lexer, "ROW"))
2633 t->slabels_position = PIVOT_AXIS_ROW;
2634 else if (lex_match_id (lexer, "LAYER"))
2635 t->slabels_position = PIVOT_AXIS_LAYER;
2638 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
2642 else if (lex_match_id (lexer, "VISIBLE"))
2644 lex_match (lexer, T_EQUALS);
2645 if (!parse_bool (lexer, &t->slabels_visible))
2650 lex_error_expecting (lexer, "POSITION", "VISIBLE");
2655 else if (lex_match_id (lexer, "CLABELS"))
2657 while (lex_token (lexer) != T_SLASH)
2659 if (lex_match_id (lexer, "AUTO"))
2660 t->row_labels = t->col_labels = CTLP_NORMAL;
2661 else if (lex_match_id (lexer, "ROWLABELS"))
2663 lex_match (lexer, T_EQUALS);
2664 if (lex_match_id (lexer, "OPPOSITE"))
2665 t->row_labels = CTLP_OPPOSITE;
2666 else if (lex_match_id (lexer, "LAYER"))
2667 t->row_labels = CTLP_LAYER;
2670 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
2674 else if (lex_match_id (lexer, "COLLABELS"))
2676 lex_match (lexer, T_EQUALS);
2677 if (lex_match_id (lexer, "OPPOSITE"))
2678 t->col_labels = CTLP_OPPOSITE;
2679 else if (lex_match_id (lexer, "LAYER"))
2680 t->col_labels = CTLP_LAYER;
2683 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
2689 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
2695 else if (lex_match_id (lexer, "CRITERIA"))
2697 if (!lex_force_match_id (lexer, "CILEVEL"))
2699 lex_match (lexer, T_EQUALS);
2701 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
2703 t->cilevel = lex_number (lexer);
2706 else if (lex_match_id (lexer, "CATEGORIES"))
2708 if (!ctables_table_parse_categories (lexer, dataset_dict (ds), t))
2711 else if (lex_match_id (lexer, "TITLES"))
2716 if (lex_match_id (lexer, "CAPTION"))
2717 textp = &t->caption;
2718 else if (lex_match_id (lexer, "CORNER"))
2720 else if (lex_match_id (lexer, "TITLE"))
2724 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
2727 lex_match (lexer, T_EQUALS);
2729 struct string s = DS_EMPTY_INITIALIZER;
2730 while (lex_is_string (lexer))
2732 if (!ds_is_empty (&s))
2733 ds_put_byte (&s, ' ');
2734 ds_put_substring (&s, lex_tokss (lexer));
2738 *textp = ds_steal_cstr (&s);
2740 while (lex_token (lexer) != T_SLASH
2741 && lex_token (lexer) != T_ENDCMD);
2743 else if (lex_match_id (lexer, "SIGTEST"))
2747 t->chisq = xmalloc (sizeof *t->chisq);
2748 *t->chisq = (struct ctables_chisq) {
2750 .include_mrsets = true,
2751 .all_visible = true,
2757 if (lex_match_id (lexer, "TYPE"))
2759 lex_match (lexer, T_EQUALS);
2760 if (!lex_force_match_id (lexer, "CHISQUARE"))
2763 else if (lex_match_id (lexer, "ALPHA"))
2765 lex_match (lexer, T_EQUALS);
2766 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
2768 t->chisq->alpha = lex_number (lexer);
2771 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
2773 lex_match (lexer, T_EQUALS);
2774 if (parse_bool (lexer, &t->chisq->include_mrsets))
2777 else if (lex_match_id (lexer, "CATEGORIES"))
2779 lex_match (lexer, T_EQUALS);
2780 if (lex_match_id (lexer, "ALLVISIBLE"))
2781 t->chisq->all_visible = true;
2782 else if (lex_match_id (lexer, "SUBTOTALS"))
2783 t->chisq->all_visible = false;
2786 lex_error_expecting (lexer,
2787 "ALLVISIBLE", "SUBTOTALS");
2793 lex_error_expecting (lexer, "TYPE", "ALPHA",
2794 "INCLUDEMRSETS", "CATEGORIES");
2798 while (lex_token (lexer) != T_SLASH
2799 && lex_token (lexer) != T_ENDCMD);
2801 else if (lex_match_id (lexer, "COMPARETEST"))
2805 t->pairwise = xmalloc (sizeof *t->pairwise);
2806 *t->pairwise = (struct ctables_pairwise) {
2808 .alpha = { .05, .05 },
2809 .adjust = BONFERRONI,
2810 .include_mrsets = true,
2811 .meansvariance_allcats = true,
2812 .all_visible = true,
2821 if (lex_match_id (lexer, "TYPE"))
2823 lex_match (lexer, T_EQUALS);
2824 if (lex_match_id (lexer, "PROP"))
2825 t->pairwise->type = PROP;
2826 else if (lex_match_id (lexer, "MEAN"))
2827 t->pairwise->type = MEAN;
2830 lex_error_expecting (lexer, "PROP", "MEAN");
2834 else if (lex_match_id (lexer, "ALPHA"))
2836 lex_match (lexer, T_EQUALS);
2838 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
2840 double a0 = lex_number (lexer);
2843 lex_match (lexer, T_COMMA);
2844 if (lex_is_number (lexer))
2846 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
2848 double a1 = lex_number (lexer);
2851 t->pairwise->alpha[0] = MIN (a0, a1);
2852 t->pairwise->alpha[1] = MAX (a0, a1);
2855 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
2857 else if (lex_match_id (lexer, "ADJUST"))
2859 lex_match (lexer, T_EQUALS);
2860 if (lex_match_id (lexer, "BONFERRONI"))
2861 t->pairwise->adjust = BONFERRONI;
2862 else if (lex_match_id (lexer, "BH"))
2863 t->pairwise->adjust = BH;
2864 else if (lex_match_id (lexer, "NONE"))
2865 t->pairwise->adjust = 0;
2868 lex_error_expecting (lexer, "BONFERRONI", "BH",
2873 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
2875 lex_match (lexer, T_EQUALS);
2876 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
2879 else if (lex_match_id (lexer, "MEANSVARIANCE"))
2881 lex_match (lexer, T_EQUALS);
2882 if (lex_match_id (lexer, "ALLCATS"))
2883 t->pairwise->meansvariance_allcats = true;
2884 else if (lex_match_id (lexer, "TESTEDCATS"))
2885 t->pairwise->meansvariance_allcats = false;
2888 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
2892 else if (lex_match_id (lexer, "CATEGORIES"))
2894 lex_match (lexer, T_EQUALS);
2895 if (lex_match_id (lexer, "ALLVISIBLE"))
2896 t->pairwise->all_visible = true;
2897 else if (lex_match_id (lexer, "SUBTOTALS"))
2898 t->pairwise->all_visible = false;
2901 lex_error_expecting (lexer, "ALLVISIBLE",
2906 else if (lex_match_id (lexer, "MERGE"))
2908 lex_match (lexer, T_EQUALS);
2909 if (!parse_bool (lexer, &t->pairwise->merge))
2912 else if (lex_match_id (lexer, "STYLE"))
2914 lex_match (lexer, T_EQUALS);
2915 if (lex_match_id (lexer, "APA"))
2916 t->pairwise->apa_style = true;
2917 else if (lex_match_id (lexer, "SIMPLE"))
2918 t->pairwise->apa_style = false;
2921 lex_error_expecting (lexer, "APA", "SIMPLE");
2925 else if (lex_match_id (lexer, "SHOWSIG"))
2927 lex_match (lexer, T_EQUALS);
2928 if (!parse_bool (lexer, &t->pairwise->show_sig))
2933 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
2934 "INCLUDEMRSETS", "MEANSVARIANCE",
2935 "CATEGORIES", "MERGE", "STYLE",
2940 while (lex_token (lexer) != T_SLASH
2941 && lex_token (lexer) != T_ENDCMD);
2945 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
2946 "CRITERIA", "CATEGORIES", "TITLES",
2947 "SIGTEST", "COMPARETEST");
2952 if (t->row_labels != CTLP_NORMAL && t->col_labels != CTLP_NORMAL)
2954 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
2959 while (lex_token (lexer) != T_ENDCMD);
2961 bool ok = ctables_execute (ds, ct);
2962 ctables_destroy (ct);
2963 return ok ? CMD_SUCCESS : CMD_FAILURE;
2966 ctables_destroy (ct);