1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
58 enum ctables_weighting
66 /* CTABLES table areas. */
68 enum ctables_area_type
70 /* Within a section, where stacked variables divide one section from
73 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
74 parse_ctables_summary_function() parses correctly. */
75 CTAT_TABLE, /* All layers of a whole section. */
76 CTAT_LAYERROW, /* Row in one layer within a section. */
77 CTAT_LAYERCOL, /* Column in one layer within a section. */
78 CTAT_LAYER, /* One layer within a section. */
80 /* Within a subtable, where a subtable pairs an innermost row variable with
81 an innermost column variable within a single layer. */
82 CTAT_SUBTABLE, /* Whole subtable. */
83 CTAT_ROW, /* Row within a subtable. */
84 CTAT_COL, /* Column within a subtable. */
88 static const char *ctables_area_type_name[N_CTATS] = {
89 [CTAT_TABLE] = "TABLE",
90 [CTAT_LAYER] = "LAYER",
91 [CTAT_LAYERROW] = "LAYERROW",
92 [CTAT_LAYERCOL] = "LAYERCOL",
93 [CTAT_SUBTABLE] = "SUBTABLE",
100 struct hmap_node node;
102 const struct ctables_cell *example;
105 double count[N_CTWS];
106 double valid[N_CTWS];
107 double total[N_CTWS];
108 struct ctables_sum *sums;
116 /* CTABLES summary functions. */
118 enum ctables_function_type
120 /* A function that operates on data in a single cell. It operates on
121 effective weights. It does not have an unweighted version. */
124 /* A function that operates on data in a single cell. The function
125 operates on effective weights and has a U-prefixed unweighted
129 /* A function that operates on data in a single cell. It operates on
130 dictionary weights, and has U-prefixed unweighted version and an
131 E-prefixed effective weight version. */
134 /* A function that operates on an area of cells. It operates on effective
135 weights and has a U-prefixed unweighted version. */
146 enum ctables_function_availability
148 CTFA_ALL, /* Any variables. */
149 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
150 //CTFA_MRSETS, /* Only multiple-response sets */
153 enum ctables_summary_function
155 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
156 #include "ctables.inc"
161 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
163 #include "ctables.inc"
167 struct ctables_function_info
169 struct substring basename;
170 enum ctables_function_type type;
171 enum ctables_format format;
172 enum ctables_function_availability availability;
174 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
175 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
176 bool is_area; /* Needs an area prefix. */
178 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
179 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
181 .basename = SS_LITERAL_INITIALIZER (NAME), \
184 .availability = AVAILABILITY, \
185 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
186 .e_prefix = (TYPE) == CTFT_UECELL, \
187 .is_area = (TYPE) == CTFT_AREA \
189 #include "ctables.inc"
193 static struct fmt_spec
194 ctables_summary_default_format (enum ctables_summary_function function,
195 const struct variable *var)
197 static const enum ctables_format default_formats[] = {
198 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
199 #include "ctables.inc"
202 switch (default_formats[function])
205 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
208 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
211 return *var_get_print_format (var);
218 static enum ctables_function_availability
219 ctables_function_availability (enum ctables_summary_function f)
221 static enum ctables_function_availability availability[] = {
222 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
223 #include "ctables.inc"
227 return availability[f];
231 parse_ctables_summary_function (struct lexer *lexer,
232 enum ctables_summary_function *function,
233 enum ctables_weighting *weighting,
234 enum ctables_area_type *area)
236 if (!lex_force_id (lexer))
239 struct substring name = lex_tokss (lexer);
240 if (ss_ends_with_case (name, ss_cstr (".LCL"))
241 || ss_ends_with_case (name, ss_cstr (".UCL"))
242 || ss_ends_with_case (name, ss_cstr (".SE")))
244 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
245 "is not yet implemented."));
249 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
250 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
252 bool has_area = false;
254 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
255 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
260 if (ss_equals_case (name, ss_cstr ("PCT")))
262 /* Special case where .COUNT suffix is omitted. */
263 *function = CTSF_areaPCT_COUNT;
264 *weighting = CTW_EFFECTIVE;
271 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
273 const struct ctables_function_info *cfi = &ctables_function_info[f];
274 if (ss_equals_case (cfi->basename, name))
277 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
280 *weighting = (e ? CTW_EFFECTIVE
282 : cfi->e_prefix ? CTW_DICTIONARY
289 lex_error (lexer, _("Expecting summary function name."));
294 ctables_summary_function_name (enum ctables_summary_function function,
295 enum ctables_weighting weighting,
296 enum ctables_area_type area,
297 char *buffer, size_t bufsize)
299 const struct ctables_function_info *cfi = &ctables_function_info[function];
300 snprintf (buffer, bufsize, "%s%s%s",
301 (weighting == CTW_UNWEIGHTED ? "U"
302 : weighting == CTW_DICTIONARY ? ""
303 : cfi->e_prefix ? "E"
305 cfi->is_area ? ctables_area_type_name[area] : "",
306 cfi->basename.string);
311 ctables_summary_function_label__ (enum ctables_summary_function function,
312 enum ctables_weighting weighting,
313 enum ctables_area_type area)
315 bool w = weighting != CTW_UNWEIGHTED;
316 bool d = weighting == CTW_DICTIONARY;
317 enum ctables_area_type a = area;
321 return (d ? N_("Count")
322 : w ? N_("Adjusted Count")
323 : N_("Unweighted Count"));
325 case CTSF_areaPCT_COUNT:
328 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
329 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
330 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
331 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
332 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
333 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
334 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
338 case CTSF_areaPCT_VALIDN:
341 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
342 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
343 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
344 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
345 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
346 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
347 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
351 case CTSF_areaPCT_TOTALN:
354 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
355 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
356 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
357 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
358 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
359 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
360 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
364 case CTSF_MAXIMUM: return N_("Maximum");
365 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
366 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
367 case CTSF_MINIMUM: return N_("Minimum");
368 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
369 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
370 case CTSF_PTILE: NOT_REACHED ();
371 case CTSF_RANGE: return N_("Range");
372 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
373 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
374 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
375 case CTSF_TOTALN: return (d ? N_("Total N")
376 : w ? N_("Adjusted Total N")
377 : N_("Unweighted Total N"));
378 case CTSF_VALIDN: return (d ? N_("Valid N")
379 : w ? N_("Adjusted Valid N")
380 : N_("Unweighted Valid N"));
381 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
382 case CTSF_areaPCT_SUM:
385 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
386 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
387 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
388 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
389 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
390 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
391 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
398 /* Don't bother translating these: they are for developers only. */
399 case CTAT_TABLE: return "Table ID";
400 case CTAT_LAYER: return "Layer ID";
401 case CTAT_LAYERROW: return "Layer Row ID";
402 case CTAT_LAYERCOL: return "Layer Column ID";
403 case CTAT_SUBTABLE: return "Subtable ID";
404 case CTAT_ROW: return "Row ID";
405 case CTAT_COL: return "Column ID";
413 static struct pivot_value *
414 ctables_summary_function_label (enum ctables_summary_function function,
415 enum ctables_weighting weighting,
416 enum ctables_area_type area,
419 if (function == CTSF_PTILE)
421 char *s = (weighting != CTW_UNWEIGHTED
422 ? xasprintf (_("Percentile %.2f"), percentile)
423 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
424 return pivot_value_new_user_text_nocopy (s);
427 return pivot_value_new_text (ctables_summary_function_label__ (
428 function, weighting, area));
431 /* CTABLES summaries. */
433 struct ctables_summary_spec
435 /* The calculation to be performed.
437 'function' is the function to calculate. 'weighted' specifies whether
438 to use weighted or unweighted data (for functions that do not support a
439 choice, it must be true). 'calc_area' is the area over which the
440 calculation takes place (for functions that target only an individual
441 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
442 percentile between 0 and 100 (for other functions it must be 0). */
443 enum ctables_summary_function function;
444 enum ctables_weighting weighting;
445 enum ctables_area_type calc_area;
446 double percentile; /* CTSF_PTILE only. */
448 /* How to display the result of the calculation.
450 'label' is a user-specified label, NULL if the user didn't specify
453 'user_area' is usually the same as 'calc_area', but when category labels
454 are rotated from one axis to another it swaps rows and columns.
456 'format' is the format for displaying the output. If
457 'is_ctables_format' is true, then 'format.type' is one of the special
458 CTEF_* formats instead of the standard ones. */
460 enum ctables_area_type user_area;
461 struct fmt_spec format;
462 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
469 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
470 const struct ctables_summary_spec *src)
473 dst->label = xstrdup_if_nonnull (src->label);
477 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
483 /* Collections of summary functions. */
485 struct ctables_summary_spec_set
487 struct ctables_summary_spec *specs;
491 /* The variable to which the summary specs are applied. */
492 struct variable *var;
494 /* Whether the variable to which the summary specs are applied is a scale
495 variable for the purpose of summarization.
497 (VALIDN and TOTALN act differently for summarizing scale and categorical
501 /* If any of these optional additional scale variables are missing, then
502 treat 'var' as if it's missing too. This is for implementing
503 SMISSING=LISTWISE. */
504 struct variable **listwise_vars;
505 size_t n_listwise_vars;
509 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
510 const struct ctables_summary_spec_set *src)
512 struct ctables_summary_spec *specs
513 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
514 for (size_t i = 0; i < src->n; i++)
515 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
517 *dst = (struct ctables_summary_spec_set) {
522 .is_scale = src->is_scale,
527 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
529 for (size_t i = 0; i < set->n; i++)
530 ctables_summary_spec_uninit (&set->specs[i]);
531 free (set->listwise_vars);
535 /* CTABLES postcompute expressions. */
537 struct ctables_pcexpr
547 enum ctables_pcexpr_op
550 CTPO_CONSTANT, /* 5 */
551 CTPO_CAT_NUMBER, /* [5] */
552 CTPO_CAT_STRING, /* ["STRING"] */
553 CTPO_CAT_NRANGE, /* [LO THRU 5] */
554 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
555 CTPO_CAT_MISSING, /* MISSING */
556 CTPO_CAT_OTHERNM, /* OTHERNM */
557 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
558 CTPO_CAT_TOTAL, /* TOTAL */
572 /* CTPO_CAT_NUMBER. */
575 /* CTPO_CAT_STRING, in dictionary encoding. */
576 struct substring string;
578 /* CTPO_CAT_NRANGE. */
581 /* CTPO_CAT_SRANGE. */
582 struct substring srange[2];
584 /* CTPO_CAT_SUBTOTAL. */
585 size_t subtotal_index;
587 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
588 One element: CTPO_NEG. */
589 struct ctables_pcexpr *subs[2];
592 /* Source location. */
593 struct msg_location *location;
596 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
597 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
598 struct ctables_pcexpr *sub1);
600 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
601 struct dictionary *);
604 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
610 case CTPO_CAT_STRING:
611 ss_dealloc (&e->string);
614 case CTPO_CAT_SRANGE:
615 for (size_t i = 0; i < 2; i++)
616 ss_dealloc (&e->srange[i]);
625 for (size_t i = 0; i < 2; i++)
626 ctables_pcexpr_destroy (e->subs[i]);
630 case CTPO_CAT_NUMBER:
631 case CTPO_CAT_NRANGE:
632 case CTPO_CAT_MISSING:
633 case CTPO_CAT_OTHERNM:
634 case CTPO_CAT_SUBTOTAL:
639 msg_location_destroy (e->location);
644 static struct ctables_pcexpr *
645 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
646 struct ctables_pcexpr *sub0,
647 struct ctables_pcexpr *sub1)
649 struct ctables_pcexpr *e = xmalloc (sizeof *e);
650 *e = (struct ctables_pcexpr) {
652 .subs = { sub0, sub1 },
653 .location = msg_location_merged (sub0->location, sub1->location),
658 /* How to parse an operator. */
661 enum token_type token;
662 enum ctables_pcexpr_op op;
665 static const struct operator *
666 ctables_pcexpr_match_operator (struct lexer *lexer,
667 const struct operator ops[], size_t n_ops)
669 for (const struct operator *op = ops; op < ops + n_ops; op++)
670 if (lex_token (lexer) == op->token)
672 if (op->token != T_NEG_NUM)
681 static struct ctables_pcexpr *
682 ctables_pcexpr_parse_binary_operators__ (
683 struct lexer *lexer, struct dictionary *dict,
684 const struct operator ops[], size_t n_ops,
685 parse_recursively_func *parse_next_level,
686 const char *chain_warning, struct ctables_pcexpr *lhs)
688 for (int op_count = 0; ; op_count++)
690 const struct operator *op
691 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
694 if (op_count > 1 && chain_warning)
695 msg_at (SW, lhs->location, "%s", chain_warning);
700 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
703 ctables_pcexpr_destroy (lhs);
707 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
711 static struct ctables_pcexpr *
712 ctables_pcexpr_parse_binary_operators (
713 struct lexer *lexer, struct dictionary *dict,
714 const struct operator ops[], size_t n_ops,
715 parse_recursively_func *parse_next_level, const char *chain_warning)
717 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
721 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
726 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
727 struct dictionary *);
729 static struct ctables_pcexpr
730 ctpo_cat_nrange (double low, double high)
732 return (struct ctables_pcexpr) {
733 .op = CTPO_CAT_NRANGE,
734 .nrange = { low, high },
738 static struct ctables_pcexpr
739 ctpo_cat_srange (struct substring low, struct substring high)
741 return (struct ctables_pcexpr) {
742 .op = CTPO_CAT_SRANGE,
743 .srange = { low, high },
747 static struct substring
748 parse_substring (struct lexer *lexer, struct dictionary *dict)
750 struct substring s = recode_substring_pool (
751 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
752 ss_rtrim (&s, ss_cstr (" "));
757 static struct ctables_pcexpr *
758 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
760 int start_ofs = lex_ofs (lexer);
761 struct ctables_pcexpr e;
762 if (lex_is_number (lexer))
764 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
765 .number = lex_number (lexer) };
768 else if (lex_match_id (lexer, "MISSING"))
769 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
770 else if (lex_match_id (lexer, "OTHERNM"))
771 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
772 else if (lex_match_id (lexer, "TOTAL"))
773 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
774 else if (lex_match_id (lexer, "SUBTOTAL"))
776 size_t subtotal_index = 0;
777 if (lex_match (lexer, T_LBRACK))
779 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
781 subtotal_index = lex_integer (lexer);
783 if (!lex_force_match (lexer, T_RBRACK))
786 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
787 .subtotal_index = subtotal_index };
789 else if (lex_match (lexer, T_LBRACK))
791 if (lex_match_id (lexer, "LO"))
793 if (!lex_force_match_id (lexer, "THRU"))
796 if (lex_is_string (lexer))
798 struct substring low = { .string = NULL };
799 struct substring high = parse_substring (lexer, dict);
800 e = ctpo_cat_srange (low, high);
804 if (!lex_force_num (lexer))
806 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
810 else if (lex_is_number (lexer))
812 double number = lex_number (lexer);
814 if (lex_match_id (lexer, "THRU"))
816 if (lex_match_id (lexer, "HI"))
817 e = ctpo_cat_nrange (number, DBL_MAX);
820 if (!lex_force_num (lexer))
822 e = ctpo_cat_nrange (number, lex_number (lexer));
827 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
830 else if (lex_is_string (lexer))
832 struct substring s = parse_substring (lexer, dict);
834 if (lex_match_id (lexer, "THRU"))
836 struct substring high;
838 if (lex_match_id (lexer, "HI"))
839 high = (struct substring) { .string = NULL };
842 if (!lex_force_string (lexer))
847 high = parse_substring (lexer, dict);
850 e = ctpo_cat_srange (s, high);
853 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
857 lex_error (lexer, NULL);
861 if (!lex_force_match (lexer, T_RBRACK))
863 if (e.op == CTPO_CAT_STRING)
864 ss_dealloc (&e.string);
865 else if (e.op == CTPO_CAT_SRANGE)
867 ss_dealloc (&e.srange[0]);
868 ss_dealloc (&e.srange[1]);
873 else if (lex_match (lexer, T_LPAREN))
875 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
878 if (!lex_force_match (lexer, T_RPAREN))
880 ctables_pcexpr_destroy (ep);
887 lex_error (lexer, NULL);
891 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
892 return xmemdup (&e, sizeof e);
895 static struct ctables_pcexpr *
896 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
897 struct lexer *lexer, int start_ofs)
899 struct ctables_pcexpr *e = xmalloc (sizeof *e);
900 *e = (struct ctables_pcexpr) {
903 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
908 static struct ctables_pcexpr *
909 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
911 static const struct operator op = { T_EXP, CTPO_POW };
913 const char *chain_warning =
914 _("The exponentiation operator (`**') is left-associative: "
915 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
916 "To disable this warning, insert parentheses.");
918 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
919 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
920 ctables_pcexpr_parse_primary,
923 /* Special case for situations like "-5**6", which must be parsed as
926 int start_ofs = lex_ofs (lexer);
927 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
928 *lhs = (struct ctables_pcexpr) {
930 .number = -lex_tokval (lexer),
931 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
935 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
937 ctables_pcexpr_parse_primary, chain_warning, lhs);
941 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
944 /* Parses the unary minus level. */
945 static struct ctables_pcexpr *
946 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
948 int start_ofs = lex_ofs (lexer);
949 if (!lex_match (lexer, T_DASH))
950 return ctables_pcexpr_parse_exp (lexer, dict);
952 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
956 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
959 /* Parses the multiplication and division level. */
960 static struct ctables_pcexpr *
961 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
963 static const struct operator ops[] =
965 { T_ASTERISK, CTPO_MUL },
966 { T_SLASH, CTPO_DIV },
969 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
970 sizeof ops / sizeof *ops,
971 ctables_pcexpr_parse_neg, NULL);
974 /* Parses the addition and subtraction level. */
975 static struct ctables_pcexpr *
976 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
978 static const struct operator ops[] =
980 { T_PLUS, CTPO_ADD },
981 { T_DASH, CTPO_SUB },
982 { T_NEG_NUM, CTPO_ADD },
985 return ctables_pcexpr_parse_binary_operators (lexer, dict,
986 ops, sizeof ops / sizeof *ops,
987 ctables_pcexpr_parse_mul, NULL);
990 /* CTABLES axis expressions. */
992 /* CTABLES has a number of extra formats that we implement via custom
993 currency specifications on an alternate fmt_settings. */
994 #define CTEF_NEGPAREN FMT_CCA
995 #define CTEF_NEQUAL FMT_CCB
996 #define CTEF_PAREN FMT_CCC
997 #define CTEF_PCTPAREN FMT_CCD
999 enum ctables_summary_variant
1008 enum ctables_axis_op
1024 struct variable *var;
1026 struct ctables_summary_spec_set specs[N_CSVS];
1030 struct ctables_axis *subs[2];
1033 struct msg_location *loc;
1037 ctables_axis_destroy (struct ctables_axis *axis)
1045 for (size_t i = 0; i < N_CSVS; i++)
1046 ctables_summary_spec_set_uninit (&axis->specs[i]);
1051 ctables_axis_destroy (axis->subs[0]);
1052 ctables_axis_destroy (axis->subs[1]);
1055 msg_location_destroy (axis->loc);
1059 static struct ctables_axis *
1060 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1061 struct ctables_axis *sub0,
1062 struct ctables_axis *sub1,
1063 struct lexer *lexer, int start_ofs)
1065 struct ctables_axis *axis = xmalloc (sizeof *axis);
1066 *axis = (struct ctables_axis) {
1068 .subs = { sub0, sub1 },
1069 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1074 struct ctables_axis_parse_ctx
1076 struct lexer *lexer;
1077 struct dictionary *dict;
1080 static struct pivot_value *
1081 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1084 return ctables_summary_function_label (spec->function, spec->weighting,
1085 spec->user_area, spec->percentile);
1088 struct substring in = ss_cstr (spec->label);
1089 struct substring target = ss_cstr (")CILEVEL");
1091 struct string out = DS_EMPTY_INITIALIZER;
1094 size_t chunk = ss_find_substring (in, target);
1095 ds_put_substring (&out, ss_head (in, chunk));
1096 ss_advance (&in, chunk);
1098 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1100 ss_advance (&in, target.length);
1101 ds_put_format (&out, "%g", cilevel);
1107 add_summary_spec (struct ctables_axis *axis,
1108 enum ctables_summary_function function,
1109 enum ctables_weighting weighting,
1110 enum ctables_area_type area, double percentile,
1111 const char *label, const struct fmt_spec *format,
1112 bool is_ctables_format, const struct msg_location *loc,
1113 enum ctables_summary_variant sv)
1115 if (axis->op == CTAO_VAR)
1117 char function_name[128];
1118 ctables_summary_function_name (function, weighting, area,
1119 function_name, sizeof function_name);
1120 const char *var_name = var_get_name (axis->var);
1121 switch (ctables_function_availability (function))
1125 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1126 "response sets."), function_name);
1127 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1133 if (!axis->scale && sv != CSV_TOTAL)
1136 _("Summary function %s applies only to scale variables."),
1138 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1148 struct ctables_summary_spec_set *set = &axis->specs[sv];
1149 if (set->n >= set->allocated)
1150 set->specs = x2nrealloc (set->specs, &set->allocated,
1151 sizeof *set->specs);
1153 struct ctables_summary_spec *dst = &set->specs[set->n++];
1154 *dst = (struct ctables_summary_spec) {
1155 .function = function,
1156 .weighting = weighting,
1159 .percentile = percentile,
1160 .label = xstrdup_if_nonnull (label),
1161 .format = (format ? *format
1162 : ctables_summary_default_format (function, axis->var)),
1163 .is_ctables_format = is_ctables_format,
1169 for (size_t i = 0; i < 2; i++)
1170 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1171 percentile, label, format, is_ctables_format,
1178 static struct ctables_axis *ctables_axis_parse_stack (
1179 struct ctables_axis_parse_ctx *);
1181 static struct ctables_axis *
1182 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1184 if (lex_match (ctx->lexer, T_LPAREN))
1186 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1187 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1189 ctables_axis_destroy (sub);
1195 if (!lex_force_id (ctx->lexer))
1198 if (lex_tokcstr (ctx->lexer)[0] == '$')
1200 lex_error (ctx->lexer,
1201 _("Multiple response set support not implemented."));
1205 int start_ofs = lex_ofs (ctx->lexer);
1206 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1210 struct ctables_axis *axis = xmalloc (sizeof *axis);
1211 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1213 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1214 : lex_match_phrase (ctx->lexer, "[C]") ? false
1215 : var_get_measure (var) == MEASURE_SCALE);
1216 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1217 lex_ofs (ctx->lexer) - 1);
1218 if (axis->scale && var_is_alpha (var))
1220 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1222 var_get_name (var));
1223 ctables_axis_destroy (axis);
1231 has_digit (const char *s)
1233 return s[strcspn (s, "0123456789")] != '\0';
1237 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1238 bool *is_ctables_format)
1240 char type[FMT_TYPE_LEN_MAX + 1];
1241 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1244 if (!strcasecmp (type, "NEGPAREN"))
1245 format->type = CTEF_NEGPAREN;
1246 else if (!strcasecmp (type, "NEQUAL"))
1247 format->type = CTEF_NEQUAL;
1248 else if (!strcasecmp (type, "PAREN"))
1249 format->type = CTEF_PAREN;
1250 else if (!strcasecmp (type, "PCTPAREN"))
1251 format->type = CTEF_PCTPAREN;
1254 *is_ctables_format = false;
1255 return (parse_format_specifier (lexer, format)
1256 && fmt_check_output (format)
1257 && fmt_check_type_compat (format, VAL_NUMERIC));
1263 lex_next_error (lexer, -1, -1,
1264 _("Output format %s requires width 2 or greater."), type);
1267 else if (format->d > format->w - 1)
1269 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1270 "greater than decimals."), type);
1275 *is_ctables_format = true;
1280 static struct ctables_axis *
1281 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1283 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1284 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1287 enum ctables_summary_variant sv = CSV_CELL;
1290 int start_ofs = lex_ofs (ctx->lexer);
1292 /* Parse function. */
1293 enum ctables_summary_function function;
1294 enum ctables_weighting weighting;
1295 enum ctables_area_type area;
1296 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1300 /* Parse percentile. */
1301 double percentile = 0;
1302 if (function == CTSF_PTILE)
1304 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1306 percentile = lex_number (ctx->lexer);
1307 lex_get (ctx->lexer);
1312 if (lex_is_string (ctx->lexer))
1314 label = ss_xstrdup (lex_tokss (ctx->lexer));
1315 lex_get (ctx->lexer);
1319 struct fmt_spec format;
1320 const struct fmt_spec *formatp;
1321 bool is_ctables_format = false;
1322 if (lex_token (ctx->lexer) == T_ID
1323 && has_digit (lex_tokcstr (ctx->lexer)))
1325 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1326 &is_ctables_format))
1336 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1337 lex_ofs (ctx->lexer) - 1);
1338 add_summary_spec (sub, function, weighting, area, percentile, label,
1339 formatp, is_ctables_format, loc, sv);
1341 msg_location_destroy (loc);
1343 lex_match (ctx->lexer, T_COMMA);
1344 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1346 if (!lex_force_match (ctx->lexer, T_LBRACK))
1350 else if (lex_match (ctx->lexer, T_RBRACK))
1352 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1359 ctables_axis_destroy (sub);
1363 static const struct ctables_axis *
1364 find_scale (const struct ctables_axis *axis)
1368 else if (axis->op == CTAO_VAR)
1369 return axis->scale ? axis : NULL;
1372 for (size_t i = 0; i < 2; i++)
1374 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1382 static const struct ctables_axis *
1383 find_categorical_summary_spec (const struct ctables_axis *axis)
1387 else if (axis->op == CTAO_VAR)
1388 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1391 for (size_t i = 0; i < 2; i++)
1393 const struct ctables_axis *sum
1394 = find_categorical_summary_spec (axis->subs[i]);
1402 static struct ctables_axis *
1403 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1405 int start_ofs = lex_ofs (ctx->lexer);
1406 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1410 while (lex_match (ctx->lexer, T_GT))
1412 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1415 ctables_axis_destroy (lhs);
1419 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1420 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1422 const struct ctables_axis *outer_scale = find_scale (lhs);
1423 const struct ctables_axis *inner_scale = find_scale (rhs);
1424 if (outer_scale && inner_scale)
1426 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1427 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1428 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1429 ctables_axis_destroy (nest);
1433 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1436 msg_at (SE, nest->loc,
1437 _("Summaries may only be requested for categorical variables "
1438 "at the innermost nesting level."));
1439 msg_at (SN, outer_sum->loc,
1440 _("This outer categorical variable has a summary."));
1441 ctables_axis_destroy (nest);
1451 static struct ctables_axis *
1452 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1454 int start_ofs = lex_ofs (ctx->lexer);
1455 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1459 while (lex_match (ctx->lexer, T_PLUS))
1461 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1464 ctables_axis_destroy (lhs);
1468 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1469 ctx->lexer, start_ofs);
1476 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1477 struct ctables_axis **axisp)
1480 if (lex_token (lexer) == T_BY
1481 || lex_token (lexer) == T_SLASH
1482 || lex_token (lexer) == T_ENDCMD)
1485 struct ctables_axis_parse_ctx ctx = {
1489 *axisp = ctables_axis_parse_stack (&ctx);
1493 /* CTABLES categories. */
1495 struct ctables_categories
1498 struct ctables_category *cats;
1503 struct ctables_category
1505 enum ctables_category_type
1507 /* Explicit category lists. */
1510 CCT_NRANGE, /* Numerical range. */
1511 CCT_SRANGE, /* String range. */
1516 /* Totals and subtotals. */
1520 /* Implicit category lists. */
1525 /* For contributing to TOTALN. */
1526 CCT_EXCLUDED_MISSING,
1530 struct ctables_category *subtotal;
1536 double number; /* CCT_NUMBER. */
1537 struct substring string; /* CCT_STRING, in dictionary encoding. */
1538 double nrange[2]; /* CCT_NRANGE. */
1539 struct substring srange[2]; /* CCT_SRANGE. */
1543 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1544 bool hide_subcategories; /* CCT_SUBTOTAL. */
1547 /* CCT_POSTCOMPUTE. */
1550 const struct ctables_postcompute *pc;
1551 enum fmt_type parse_format;
1554 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1557 bool include_missing;
1558 bool sort_ascending;
1561 enum ctables_summary_function sort_function;
1562 enum ctables_weighting weighting;
1563 enum ctables_area_type area;
1564 struct variable *sort_var;
1569 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
1570 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
1571 struct msg_location *location;
1575 ctables_category_uninit (struct ctables_category *cat)
1580 msg_location_destroy (cat->location);
1587 case CCT_POSTCOMPUTE:
1591 ss_dealloc (&cat->string);
1595 ss_dealloc (&cat->srange[0]);
1596 ss_dealloc (&cat->srange[1]);
1601 free (cat->total_label);
1609 case CCT_EXCLUDED_MISSING:
1615 nullable_substring_equal (const struct substring *a,
1616 const struct substring *b)
1618 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1622 ctables_category_equal (const struct ctables_category *a,
1623 const struct ctables_category *b)
1625 if (a->type != b->type)
1631 return a->number == b->number;
1634 return ss_equals (a->string, b->string);
1637 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1640 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1641 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1647 case CCT_POSTCOMPUTE:
1648 return a->pc == b->pc;
1652 return !strcmp (a->total_label, b->total_label);
1657 return (a->include_missing == b->include_missing
1658 && a->sort_ascending == b->sort_ascending
1659 && a->sort_function == b->sort_function
1660 && a->sort_var == b->sort_var
1661 && a->percentile == b->percentile);
1663 case CCT_EXCLUDED_MISSING:
1671 ctables_categories_unref (struct ctables_categories *c)
1676 assert (c->n_refs > 0);
1680 for (size_t i = 0; i < c->n_cats; i++)
1681 ctables_category_uninit (&c->cats[i]);
1687 ctables_categories_equal (const struct ctables_categories *a,
1688 const struct ctables_categories *b)
1690 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
1693 for (size_t i = 0; i < a->n_cats; i++)
1694 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1700 /* CTABLES variable nesting and stacking. */
1702 /* A nested sequence of variables, e.g. a > b > c. */
1705 struct variable **vars;
1709 size_t *areas[N_CTATS];
1710 size_t n_areas[N_CTATS];
1713 struct ctables_summary_spec_set specs[N_CSVS];
1716 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
1717 struct ctables_stack
1719 struct ctables_nest *nests;
1724 ctables_nest_uninit (struct ctables_nest *nest)
1727 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
1728 ctables_summary_spec_set_uninit (&nest->specs[sv]);
1729 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
1730 free (nest->areas[at]);
1734 ctables_stack_uninit (struct ctables_stack *stack)
1738 for (size_t i = 0; i < stack->n; i++)
1739 ctables_nest_uninit (&stack->nests[i]);
1740 free (stack->nests);
1744 static struct ctables_stack
1745 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
1752 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
1753 for (size_t i = 0; i < s0.n; i++)
1754 for (size_t j = 0; j < s1.n; j++)
1756 const struct ctables_nest *a = &s0.nests[i];
1757 const struct ctables_nest *b = &s1.nests[j];
1759 size_t allocate = a->n + b->n;
1760 struct variable **vars = xnmalloc (allocate, sizeof *vars);
1762 for (size_t k = 0; k < a->n; k++)
1763 vars[n++] = a->vars[k];
1764 for (size_t k = 0; k < b->n; k++)
1765 vars[n++] = b->vars[k];
1766 assert (n == allocate);
1768 const struct ctables_nest *summary_src;
1769 if (!a->specs[CSV_CELL].var)
1771 else if (!b->specs[CSV_CELL].var)
1776 struct ctables_nest *new = &stack.nests[stack.n++];
1777 *new = (struct ctables_nest) {
1779 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
1780 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
1782 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
1783 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
1787 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
1788 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
1790 ctables_stack_uninit (&s0);
1791 ctables_stack_uninit (&s1);
1795 static struct ctables_stack
1796 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
1798 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
1799 for (size_t i = 0; i < s0.n; i++)
1800 stack.nests[stack.n++] = s0.nests[i];
1801 for (size_t i = 0; i < s1.n; i++)
1803 stack.nests[stack.n] = s1.nests[i];
1804 stack.nests[stack.n].group_head += s0.n;
1807 assert (stack.n == s0.n + s1.n);
1813 static struct ctables_stack
1814 var_fts (const struct ctables_axis *a)
1816 struct variable **vars = xmalloc (sizeof *vars);
1819 bool is_summary = a->specs[CSV_CELL].n || a->scale;
1820 struct ctables_nest *nest = xmalloc (sizeof *nest);
1821 *nest = (struct ctables_nest) {
1824 .scale_idx = a->scale ? 0 : SIZE_MAX,
1825 .summary_idx = is_summary ? 0 : SIZE_MAX,
1828 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
1830 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
1831 nest->specs[sv].var = a->var;
1832 nest->specs[sv].is_scale = a->scale;
1834 return (struct ctables_stack) { .nests = nest, .n = 1 };
1837 static struct ctables_stack
1838 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
1841 return (struct ctables_stack) { .n = 0 };
1849 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
1850 enumerate_fts (axis_type, a->subs[1]));
1853 /* This should consider any of the scale variables found in the result to
1854 be linked to each other listwise for SMISSING=LISTWISE. */
1855 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
1856 enumerate_fts (axis_type, a->subs[1]));
1864 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
1865 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
1866 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
1867 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
1872 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
1873 all the axes (except the scalar variable, if any). */
1874 struct hmap_node node;
1876 /* The areas that contain this cell. */
1877 uint32_t omit_areas;
1878 struct ctables_area *areas[N_CTATS];
1883 enum ctables_summary_variant sv;
1885 struct ctables_cell_axis
1887 struct ctables_cell_value
1889 const struct ctables_category *category;
1897 union ctables_summary *summaries;
1900 struct ctables_postcompute
1902 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
1903 char *name; /* Name, without leading &. */
1905 struct msg_location *location; /* Location of definition. */
1906 struct ctables_pcexpr *expr;
1908 struct ctables_summary_spec_set *specs;
1909 bool hide_source_cats;
1914 const struct dictionary *dict;
1915 struct pivot_table_look *look;
1917 /* For CTEF_* formats. */
1918 struct fmt_settings ctables_formats;
1920 /* If this is NULL, zeros are displayed using the normal print format.
1921 Otherwise, this string is displayed. */
1924 /* If this is NULL, missing values are displayed using the normal print
1925 format. Otherwise, this string is displayed. */
1928 /* Indexed by variable dictionary index. */
1929 enum ctables_vlabel *vlabels;
1931 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
1933 bool mrsets_count_duplicates; /* MRSETS. */
1934 bool smissing_listwise; /* SMISSING. */
1935 struct variable *e_weight; /* WEIGHT. */
1936 int hide_threshold; /* HIDESMALLCOUNTS. */
1938 struct ctables_table **tables;
1942 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
1945 struct ctables_value
1947 struct hmap_node node;
1952 struct ctables_occurrence
1954 struct hmap_node node;
1958 struct ctables_section
1961 struct ctables_table *table;
1962 struct ctables_nest *nests[PIVOT_N_AXES];
1965 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
1966 struct hmap cells; /* Contains "struct ctables_cell"s. */
1967 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
1970 static void ctables_section_uninit (struct ctables_section *);
1972 struct ctables_table
1974 struct ctables *ctables;
1975 struct ctables_axis *axes[PIVOT_N_AXES];
1976 struct ctables_stack stacks[PIVOT_N_AXES];
1977 struct ctables_section *sections;
1979 enum pivot_axis_type summary_axis;
1980 struct ctables_summary_spec_set summary_specs;
1981 struct variable **sum_vars;
1984 enum pivot_axis_type slabels_axis;
1985 bool slabels_visible;
1987 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
1989 Most commonly, label_axis[a] == a, and in particular we always have
1990 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
1992 If ROWLABELS or COLLABELS is specified, then one of
1993 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
1994 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
1996 If any category labels are moved, then 'clabels_example' is one of the
1997 variables being moved (and it is otherwise NULL). All of the variables
1998 being moved have the same width, value labels, and categories, so this
1999 example variable can be used to find those out.
2001 The remaining members in this group are relevant only if category labels
2004 'clabels_values_map' holds a "struct ctables_value" for all the values
2005 that appear in all of the variables in the moved categories. It is
2006 accumulated as the data is read. Once the data is fully read, its
2007 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2009 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2010 enum pivot_axis_type clabels_from_axis;
2011 enum pivot_axis_type clabels_to_axis;
2012 const struct variable *clabels_example;
2013 struct hmap clabels_values_map;
2014 struct ctables_value **clabels_values;
2015 size_t n_clabels_values;
2017 /* Indexed by variable dictionary index. */
2018 struct ctables_categories **categories;
2019 size_t n_categories;
2027 struct ctables_chisq *chisq;
2028 struct ctables_pairwise *pairwise;
2031 /* Chi-square test (SIGTEST). */
2032 struct ctables_chisq
2035 bool include_mrsets;
2039 /* Pairwise comparison test (COMPARETEST). */
2040 struct ctables_pairwise
2042 enum { PROP, MEAN } type;
2044 bool include_mrsets;
2045 bool meansvariance_allcats;
2047 enum { BONFERRONI = 1, BH } adjust;
2056 parse_col_width (struct lexer *lexer, const char *name, double *width)
2058 lex_match (lexer, T_EQUALS);
2059 if (lex_match_id (lexer, "DEFAULT"))
2061 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
2063 *width = lex_number (lexer);
2073 parse_bool (struct lexer *lexer, bool *b)
2075 if (lex_match_id (lexer, "NO"))
2077 else if (lex_match_id (lexer, "YES"))
2081 lex_error_expecting (lexer, "YES", "NO");
2088 ctables_chisq_destroy (struct ctables_chisq *chisq)
2094 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
2100 ctables_table_destroy (struct ctables_table *t)
2105 for (size_t i = 0; i < t->n_sections; i++)
2106 ctables_section_uninit (&t->sections[i]);
2109 for (size_t i = 0; i < t->n_categories; i++)
2110 ctables_categories_unref (t->categories[i]);
2111 free (t->categories);
2113 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2115 ctables_axis_destroy (t->axes[a]);
2116 ctables_stack_uninit (&t->stacks[a]);
2118 free (t->summary_specs.specs);
2120 struct ctables_value *ctv, *next_ctv;
2121 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
2122 &t->clabels_values_map)
2124 value_destroy (&ctv->value, var_get_width (t->clabels_example));
2125 hmap_delete (&t->clabels_values_map, &ctv->node);
2128 hmap_destroy (&t->clabels_values_map);
2129 free (t->clabels_values);
2135 ctables_chisq_destroy (t->chisq);
2136 ctables_pairwise_destroy (t->pairwise);
2141 ctables_destroy (struct ctables *ct)
2146 struct ctables_postcompute *pc, *next_pc;
2147 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
2151 msg_location_destroy (pc->location);
2152 ctables_pcexpr_destroy (pc->expr);
2156 ctables_summary_spec_set_uninit (pc->specs);
2159 hmap_delete (&ct->postcomputes, &pc->hmap_node);
2162 hmap_destroy (&ct->postcomputes);
2164 fmt_settings_uninit (&ct->ctables_formats);
2165 pivot_table_look_unref (ct->look);
2169 for (size_t i = 0; i < ct->n_tables; i++)
2170 ctables_table_destroy (ct->tables[i]);
2175 static struct ctables_category
2176 cct_nrange (double low, double high)
2178 return (struct ctables_category) {
2180 .nrange = { low, high }
2184 static struct ctables_category
2185 cct_srange (struct substring low, struct substring high)
2187 return (struct ctables_category) {
2189 .srange = { low, high }
2194 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
2195 struct ctables_category *cat)
2198 if (lex_match (lexer, T_EQUALS))
2200 if (!lex_force_string (lexer))
2203 total_label = ss_xstrdup (lex_tokss (lexer));
2207 total_label = xstrdup (_("Subtotal"));
2209 *cat = (struct ctables_category) {
2210 .type = CCT_SUBTOTAL,
2211 .hide_subcategories = hide_subcategories,
2212 .total_label = total_label
2218 ctables_table_parse_explicit_category (struct lexer *lexer,
2219 struct dictionary *dict,
2221 struct ctables_category *cat)
2223 if (lex_match_id (lexer, "OTHERNM"))
2224 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
2225 else if (lex_match_id (lexer, "MISSING"))
2226 *cat = (struct ctables_category) { .type = CCT_MISSING };
2227 else if (lex_match_id (lexer, "SUBTOTAL"))
2228 return ctables_table_parse_subtotal (lexer, false, cat);
2229 else if (lex_match_id (lexer, "HSUBTOTAL"))
2230 return ctables_table_parse_subtotal (lexer, true, cat);
2231 else if (lex_match_id (lexer, "LO"))
2233 if (!lex_force_match_id (lexer, "THRU"))
2235 if (lex_is_string (lexer))
2237 struct substring sr0 = { .string = NULL };
2238 struct substring sr1 = parse_substring (lexer, dict);
2239 *cat = cct_srange (sr0, sr1);
2241 else if (lex_force_num (lexer))
2243 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
2249 else if (lex_is_number (lexer))
2251 double number = lex_number (lexer);
2253 if (lex_match_id (lexer, "THRU"))
2255 if (lex_match_id (lexer, "HI"))
2256 *cat = cct_nrange (number, DBL_MAX);
2259 if (!lex_force_num (lexer))
2261 *cat = cct_nrange (number, lex_number (lexer));
2266 *cat = (struct ctables_category) {
2271 else if (lex_is_string (lexer))
2273 struct substring s = parse_substring (lexer, dict);
2274 if (lex_match_id (lexer, "THRU"))
2276 if (lex_match_id (lexer, "HI"))
2278 struct substring sr1 = { .string = NULL };
2279 *cat = cct_srange (s, sr1);
2283 if (!lex_force_string (lexer))
2288 struct substring sr1 = parse_substring (lexer, dict);
2289 *cat = cct_srange (s, sr1);
2293 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
2295 else if (lex_match (lexer, T_AND))
2297 if (!lex_force_id (lexer))
2299 struct ctables_postcompute *pc = ctables_find_postcompute (
2300 ct, lex_tokcstr (lexer));
2303 struct msg_location *loc = lex_get_location (lexer, -1, 0);
2304 msg_at (SE, loc, _("Unknown postcompute &%s."),
2305 lex_tokcstr (lexer));
2306 msg_location_destroy (loc);
2311 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
2315 lex_error (lexer, NULL);
2323 parse_category_string (struct msg_location *location,
2324 struct substring s, const struct dictionary *dict,
2325 enum fmt_type format, double *n)
2328 char *error = data_in (s, dict_get_encoding (dict), format,
2329 settings_get_fmt_settings (), &v, 0, NULL);
2332 msg_at (SE, location,
2333 _("Failed to parse category specification as format %s: %s."),
2334 fmt_name (format), error);
2343 static struct ctables_category *
2344 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
2345 const struct ctables_pcexpr *e)
2347 struct ctables_category *best = NULL;
2348 size_t n_subtotals = 0;
2349 for (size_t i = 0; i < cats->n_cats; i++)
2351 struct ctables_category *cat = &cats->cats[i];
2354 case CTPO_CAT_NUMBER:
2355 if (cat->type == CCT_NUMBER && cat->number == e->number)
2359 case CTPO_CAT_STRING:
2360 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
2364 case CTPO_CAT_NRANGE:
2365 if (cat->type == CCT_NRANGE
2366 && cat->nrange[0] == e->nrange[0]
2367 && cat->nrange[1] == e->nrange[1])
2371 case CTPO_CAT_SRANGE:
2372 if (cat->type == CCT_SRANGE
2373 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
2374 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
2378 case CTPO_CAT_MISSING:
2379 if (cat->type == CCT_MISSING)
2383 case CTPO_CAT_OTHERNM:
2384 if (cat->type == CCT_OTHERNM)
2388 case CTPO_CAT_SUBTOTAL:
2389 if (cat->type == CCT_SUBTOTAL)
2392 if (e->subtotal_index == n_subtotals)
2394 else if (e->subtotal_index == 0)
2399 case CTPO_CAT_TOTAL:
2400 if (cat->type == CCT_TOTAL)
2414 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
2419 static struct ctables_category *
2420 ctables_find_category_for_postcompute (const struct dictionary *dict,
2421 const struct ctables_categories *cats,
2422 enum fmt_type parse_format,
2423 const struct ctables_pcexpr *e)
2425 if (parse_format != FMT_F)
2427 if (e->op == CTPO_CAT_STRING)
2430 if (!parse_category_string (e->location, e->string, dict,
2431 parse_format, &number))
2434 struct ctables_pcexpr e2 = {
2435 .op = CTPO_CAT_NUMBER,
2437 .location = e->location,
2439 return ctables_find_category_for_postcompute__ (cats, &e2);
2441 else if (e->op == CTPO_CAT_SRANGE)
2444 if (!e->srange[0].string)
2445 nrange[0] = -DBL_MAX;
2446 else if (!parse_category_string (e->location, e->srange[0], dict,
2447 parse_format, &nrange[0]))
2450 if (!e->srange[1].string)
2451 nrange[1] = DBL_MAX;
2452 else if (!parse_category_string (e->location, e->srange[1], dict,
2453 parse_format, &nrange[1]))
2456 struct ctables_pcexpr e2 = {
2457 .op = CTPO_CAT_NRANGE,
2458 .nrange = { nrange[0], nrange[1] },
2459 .location = e->location,
2461 return ctables_find_category_for_postcompute__ (cats, &e2);
2464 return ctables_find_category_for_postcompute__ (cats, e);
2468 ctables_recursive_check_postcompute (struct dictionary *dict,
2469 const struct ctables_pcexpr *e,
2470 struct ctables_category *pc_cat,
2471 const struct ctables_categories *cats,
2472 const struct msg_location *cats_location)
2476 case CTPO_CAT_NUMBER:
2477 case CTPO_CAT_STRING:
2478 case CTPO_CAT_NRANGE:
2479 case CTPO_CAT_SRANGE:
2480 case CTPO_CAT_MISSING:
2481 case CTPO_CAT_OTHERNM:
2482 case CTPO_CAT_SUBTOTAL:
2483 case CTPO_CAT_TOTAL:
2485 struct ctables_category *cat = ctables_find_category_for_postcompute (
2486 dict, cats, pc_cat->parse_format, e);
2489 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2491 size_t n_subtotals = 0;
2492 for (size_t i = 0; i < cats->n_cats; i++)
2493 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2494 if (n_subtotals > 1)
2496 msg_at (SE, cats_location,
2497 ngettext ("These categories include %zu instance "
2498 "of SUBTOTAL or HSUBTOTAL, so references "
2499 "from computed categories must refer to "
2500 "subtotals by position, "
2501 "e.g. SUBTOTAL[1].",
2502 "These categories include %zu instances "
2503 "of SUBTOTAL or HSUBTOTAL, so references "
2504 "from computed categories must refer to "
2505 "subtotals by position, "
2506 "e.g. SUBTOTAL[1].",
2509 msg_at (SN, e->location,
2510 _("This is the reference that lacks a position."));
2515 msg_at (SE, pc_cat->location,
2516 _("Computed category &%s references a category not included "
2517 "in the category list."),
2519 msg_at (SN, e->location, _("This is the missing category."));
2520 if (e->op == CTPO_CAT_SUBTOTAL)
2521 msg_at (SN, cats_location,
2522 _("To fix the problem, add subtotals to the "
2523 "list of categories here."));
2524 else if (e->op == CTPO_CAT_TOTAL)
2525 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2526 "CATEGORIES specification."));
2528 msg_at (SN, cats_location,
2529 _("To fix the problem, add the missing category to the "
2530 "list of categories here."));
2533 if (pc_cat->pc->hide_source_cats)
2547 for (size_t i = 0; i < 2; i++)
2548 if (e->subs[i] && !ctables_recursive_check_postcompute (
2549 dict, e->subs[i], pc_cat, cats, cats_location))
2558 all_strings (struct variable **vars, size_t n_vars,
2559 const struct ctables_category *cat)
2561 for (size_t j = 0; j < n_vars; j++)
2562 if (var_is_numeric (vars[j]))
2564 msg_at (SE, cat->location,
2565 _("This category specification may be applied only to string "
2566 "variables, but this subcommand tries to apply it to "
2567 "numeric variable %s."),
2568 var_get_name (vars[j]));
2575 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2576 struct ctables *ct, struct ctables_table *t)
2578 if (!lex_match_id (lexer, "VARIABLES"))
2580 lex_match (lexer, T_EQUALS);
2582 struct variable **vars;
2584 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2587 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2588 for (size_t i = 1; i < n_vars; i++)
2590 const struct fmt_spec *f = var_get_print_format (vars[i]);
2591 if (f->type != common_format->type)
2593 common_format = NULL;
2599 && (fmt_get_category (common_format->type)
2600 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2602 struct ctables_categories *c = xmalloc (sizeof *c);
2603 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2604 for (size_t i = 0; i < n_vars; i++)
2606 struct ctables_categories **cp
2607 = &t->categories[var_get_dict_index (vars[i])];
2608 ctables_categories_unref (*cp);
2612 size_t allocated_cats = 0;
2613 int cats_start_ofs = -1;
2614 int cats_end_ofs = -1;
2615 if (lex_match (lexer, T_LBRACK))
2617 cats_start_ofs = lex_ofs (lexer);
2620 if (c->n_cats >= allocated_cats)
2621 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2623 int start_ofs = lex_ofs (lexer);
2624 struct ctables_category *cat = &c->cats[c->n_cats];
2625 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2627 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2630 lex_match (lexer, T_COMMA);
2632 while (!lex_match (lexer, T_RBRACK));
2633 cats_end_ofs = lex_ofs (lexer) - 1;
2636 struct ctables_category cat = {
2638 .include_missing = false,
2639 .sort_ascending = true,
2641 bool show_totals = false;
2642 char *total_label = NULL;
2643 bool totals_before = false;
2644 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2646 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2648 lex_match (lexer, T_EQUALS);
2649 if (lex_match_id (lexer, "A"))
2650 cat.sort_ascending = true;
2651 else if (lex_match_id (lexer, "D"))
2652 cat.sort_ascending = false;
2655 lex_error_expecting (lexer, "A", "D");
2659 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2661 int start_ofs = lex_ofs (lexer) - 1;
2662 lex_match (lexer, T_EQUALS);
2663 if (lex_match_id (lexer, "VALUE"))
2664 cat.type = CCT_VALUE;
2665 else if (lex_match_id (lexer, "LABEL"))
2666 cat.type = CCT_LABEL;
2669 cat.type = CCT_FUNCTION;
2670 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
2671 &cat.weighting, &cat.area))
2674 if (lex_match (lexer, T_LPAREN))
2676 cat.sort_var = parse_variable (lexer, dict);
2680 if (cat.sort_function == CTSF_PTILE)
2682 lex_match (lexer, T_COMMA);
2683 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2685 cat.percentile = lex_number (lexer);
2689 if (!lex_force_match (lexer, T_RPAREN))
2692 else if (ctables_function_availability (cat.sort_function)
2695 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2699 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
2700 _("Data-dependent sorting is not implemented."));
2704 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2706 lex_match (lexer, T_EQUALS);
2707 if (lex_match_id (lexer, "INCLUDE"))
2708 cat.include_missing = true;
2709 else if (lex_match_id (lexer, "EXCLUDE"))
2710 cat.include_missing = false;
2713 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2717 else if (lex_match_id (lexer, "TOTAL"))
2719 lex_match (lexer, T_EQUALS);
2720 if (!parse_bool (lexer, &show_totals))
2723 else if (lex_match_id (lexer, "LABEL"))
2725 lex_match (lexer, T_EQUALS);
2726 if (!lex_force_string (lexer))
2729 total_label = ss_xstrdup (lex_tokss (lexer));
2732 else if (lex_match_id (lexer, "POSITION"))
2734 lex_match (lexer, T_EQUALS);
2735 if (lex_match_id (lexer, "BEFORE"))
2736 totals_before = true;
2737 else if (lex_match_id (lexer, "AFTER"))
2738 totals_before = false;
2741 lex_error_expecting (lexer, "BEFORE", "AFTER");
2745 else if (lex_match_id (lexer, "EMPTY"))
2747 lex_match (lexer, T_EQUALS);
2748 if (lex_match_id (lexer, "INCLUDE"))
2749 c->show_empty = true;
2750 else if (lex_match_id (lexer, "EXCLUDE"))
2751 c->show_empty = false;
2754 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2761 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2762 "TOTAL", "LABEL", "POSITION", "EMPTY");
2764 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2771 if (c->n_cats >= allocated_cats)
2772 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2773 c->cats[c->n_cats++] = cat;
2778 if (c->n_cats >= allocated_cats)
2779 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2781 struct ctables_category *totals;
2784 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2785 totals = &c->cats[0];
2788 totals = &c->cats[c->n_cats];
2791 *totals = (struct ctables_category) {
2793 .total_label = total_label ? total_label : xstrdup (_("Total")),
2797 struct ctables_category *subtotal = NULL;
2798 for (size_t i = totals_before ? 0 : c->n_cats;
2799 totals_before ? i < c->n_cats : i-- > 0;
2800 totals_before ? i++ : 0)
2802 struct ctables_category *cat = &c->cats[i];
2811 cat->subtotal = subtotal;
2814 case CCT_POSTCOMPUTE:
2825 case CCT_EXCLUDED_MISSING:
2830 if (cats_start_ofs != -1)
2832 for (size_t i = 0; i < c->n_cats; i++)
2834 struct ctables_category *cat = &c->cats[i];
2837 case CCT_POSTCOMPUTE:
2838 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2839 struct msg_location *cats_location
2840 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2841 bool ok = ctables_recursive_check_postcompute (
2842 dict, cat->pc->expr, cat, c, cats_location);
2843 msg_location_destroy (cats_location);
2850 for (size_t j = 0; j < n_vars; j++)
2851 if (var_is_alpha (vars[j]))
2853 msg_at (SE, cat->location,
2854 _("This category specification may be applied "
2855 "only to numeric variables, but this "
2856 "subcommand tries to apply it to string "
2858 var_get_name (vars[j]));
2867 if (!parse_category_string (cat->location, cat->string, dict,
2868 common_format->type, &n))
2871 ss_dealloc (&cat->string);
2873 cat->type = CCT_NUMBER;
2876 else if (!all_strings (vars, n_vars, cat))
2885 if (!cat->srange[0].string)
2887 else if (!parse_category_string (cat->location,
2888 cat->srange[0], dict,
2889 common_format->type, &n[0]))
2892 if (!cat->srange[1].string)
2894 else if (!parse_category_string (cat->location,
2895 cat->srange[1], dict,
2896 common_format->type, &n[1]))
2899 ss_dealloc (&cat->srange[0]);
2900 ss_dealloc (&cat->srange[1]);
2902 cat->type = CCT_NRANGE;
2903 cat->nrange[0] = n[0];
2904 cat->nrange[1] = n[1];
2906 else if (!all_strings (vars, n_vars, cat))
2917 case CCT_EXCLUDED_MISSING:
2931 union ctables_summary
2933 /* COUNT, VALIDN, TOTALN. */
2936 /* MINIMUM, MAXIMUM, RANGE. */
2943 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2944 struct moments1 *moments;
2946 /* MEDIAN, MODE, PTILE. */
2949 struct casewriter *writer;
2956 ctables_summary_init (union ctables_summary *s,
2957 const struct ctables_summary_spec *ss)
2959 switch (ss->function)
2962 case CTSF_areaPCT_COUNT:
2963 case CTSF_areaPCT_VALIDN:
2964 case CTSF_areaPCT_TOTALN:
2977 s->min = s->max = SYSMIS;
2982 case CTSF_areaPCT_SUM:
2983 s->moments = moments1_create (MOMENT_MEAN);
2989 s->moments = moments1_create (MOMENT_VARIANCE);
2996 struct caseproto *proto = caseproto_create ();
2997 proto = caseproto_add_width (proto, 0);
2998 proto = caseproto_add_width (proto, 0);
3000 struct subcase ordering;
3001 subcase_init (&ordering, 0, 0, SC_ASCEND);
3002 s->writer = sort_create_writer (&ordering, proto);
3003 subcase_uninit (&ordering);
3004 caseproto_unref (proto);
3014 ctables_summary_uninit (union ctables_summary *s,
3015 const struct ctables_summary_spec *ss)
3017 switch (ss->function)
3020 case CTSF_areaPCT_COUNT:
3021 case CTSF_areaPCT_VALIDN:
3022 case CTSF_areaPCT_TOTALN:
3041 case CTSF_areaPCT_SUM:
3042 moments1_destroy (s->moments);
3048 casewriter_destroy (s->writer);
3054 ctables_summary_add (union ctables_summary *s,
3055 const struct ctables_summary_spec *ss,
3056 const union value *value,
3057 bool is_missing, bool is_included,
3060 /* To determine whether a case is included in a given table for a particular
3061 kind of summary, consider the following charts for the variable being
3062 summarized. Only if "yes" appears is the case counted.
3064 Categorical variables: VALIDN other TOTALN
3065 Valid values in included categories yes yes yes
3066 Missing values in included categories --- yes yes
3067 Missing values in excluded categories --- --- yes
3068 Valid values in excluded categories --- --- ---
3070 Scale variables: VALIDN other TOTALN
3071 Valid value yes yes yes
3072 Missing value --- yes yes
3074 Missing values include both user- and system-missing. (The system-missing
3075 value is always in an excluded category.)
3077 One way to interpret the above table is that scale variables are like
3078 categorical variables in which all values are in included categories.
3080 switch (ss->function)
3083 case CTSF_areaPCT_TOTALN:
3088 case CTSF_areaPCT_COUNT:
3094 case CTSF_areaPCT_VALIDN:
3112 if (s->min == SYSMIS || value->f < s->min)
3114 if (s->max == SYSMIS || value->f > s->max)
3125 moments1_add (s->moments, value->f, weight);
3128 case CTSF_areaPCT_SUM:
3130 moments1_add (s->moments, value->f, weight);
3138 s->ovalid += weight;
3140 struct ccase *c = case_create (casewriter_get_proto (s->writer));
3141 *case_num_rw_idx (c, 0) = value->f;
3142 *case_num_rw_idx (c, 1) = weight;
3143 casewriter_write (s->writer, c);
3150 ctables_summary_value (const struct ctables_cell *cell,
3151 union ctables_summary *s,
3152 const struct ctables_summary_spec *ss)
3154 switch (ss->function)
3160 return cell->areas[ss->calc_area]->sequence;
3162 case CTSF_areaPCT_COUNT:
3164 const struct ctables_area *a = cell->areas[ss->calc_area];
3165 double a_count = a->count[ss->weighting];
3166 return a_count ? s->count / a_count * 100 : SYSMIS;
3169 case CTSF_areaPCT_VALIDN:
3171 const struct ctables_area *a = cell->areas[ss->calc_area];
3172 double a_valid = a->valid[ss->weighting];
3173 return a_valid ? s->count / a_valid * 100 : SYSMIS;
3176 case CTSF_areaPCT_TOTALN:
3178 const struct ctables_area *a = cell->areas[ss->calc_area];
3179 double a_total = a->total[ss->weighting];
3180 return a_total ? s->count / a_total * 100 : SYSMIS;
3195 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3200 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3206 double weight, variance;
3207 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3208 return calc_semean (variance, weight);
3214 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3215 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3220 double weight, mean;
3221 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3222 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3228 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3232 case CTSF_areaPCT_SUM:
3234 double weight, mean;
3235 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3236 if (weight == SYSMIS || mean == SYSMIS)
3239 const struct ctables_area *a = cell->areas[ss->calc_area];
3240 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
3241 double denom = sum->sum[ss->weighting];
3242 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
3249 struct casereader *reader = casewriter_make_reader (s->writer);
3252 struct percentile *ptile = percentile_create (
3253 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3254 struct order_stats *os = &ptile->parent;
3255 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3256 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3257 statistic_destroy (&ptile->parent.parent);
3264 struct casereader *reader = casewriter_make_reader (s->writer);
3267 struct mode *mode = mode_create ();
3268 struct order_stats *os = &mode->parent;
3269 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3270 s->ovalue = mode->mode;
3271 statistic_destroy (&mode->parent.parent);
3279 struct ctables_cell_sort_aux
3281 const struct ctables_nest *nest;
3282 enum pivot_axis_type a;
3286 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3288 const struct ctables_cell_sort_aux *aux = aux_;
3289 struct ctables_cell *const *ap = a_;
3290 struct ctables_cell *const *bp = b_;
3291 const struct ctables_cell *a = *ap;
3292 const struct ctables_cell *b = *bp;
3294 const struct ctables_nest *nest = aux->nest;
3295 for (size_t i = 0; i < nest->n; i++)
3296 if (i != nest->scale_idx)
3298 const struct variable *var = nest->vars[i];
3299 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3300 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3301 if (a_cv->category != b_cv->category)
3302 return a_cv->category > b_cv->category ? 1 : -1;
3304 const union value *a_val = &a_cv->value;
3305 const union value *b_val = &b_cv->value;
3306 switch (a_cv->category->type)
3312 case CCT_POSTCOMPUTE:
3313 case CCT_EXCLUDED_MISSING:
3314 /* Must be equal. */
3322 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3330 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3332 return a_cv->category->sort_ascending ? cmp : -cmp;
3338 const char *a_label = var_lookup_value_label (var, a_val);
3339 const char *b_label = var_lookup_value_label (var, b_val);
3345 cmp = strcmp (a_label, b_label);
3351 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3354 return a_cv->category->sort_ascending ? cmp : -cmp;
3366 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3367 const void *aux UNUSED)
3369 struct ctables_cell *const *ap = a_;
3370 struct ctables_cell *const *bp = b_;
3371 const struct ctables_cell *a = *ap;
3372 const struct ctables_cell *b = *bp;
3374 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3376 int al = a->axes[axis].leaf;
3377 int bl = b->axes[axis].leaf;
3379 return al > bl ? 1 : -1;
3384 static struct ctables_area *
3385 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3386 enum ctables_area_type area)
3389 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3391 const struct ctables_nest *nest = s->nests[a];
3392 for (size_t i = 0; i < nest->n_areas[area]; i++)
3394 size_t v_idx = nest->areas[area][i];
3395 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3396 hash = hash_pointer (cv->category, hash);
3397 if (cv->category->type != CCT_TOTAL
3398 && cv->category->type != CCT_SUBTOTAL
3399 && cv->category->type != CCT_POSTCOMPUTE)
3400 hash = value_hash (&cv->value,
3401 var_get_width (nest->vars[v_idx]), hash);
3405 struct ctables_area *a;
3406 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3408 const struct ctables_cell *df = a->example;
3409 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3411 const struct ctables_nest *nest = s->nests[a];
3412 for (size_t i = 0; i < nest->n_areas[area]; i++)
3414 size_t v_idx = nest->areas[area][i];
3415 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3416 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3417 if (cv1->category != cv2->category
3418 || (cv1->category->type != CCT_TOTAL
3419 && cv1->category->type != CCT_SUBTOTAL
3420 && cv1->category->type != CCT_POSTCOMPUTE
3421 && !value_equal (&cv1->value, &cv2->value,
3422 var_get_width (nest->vars[v_idx]))))
3431 struct ctables_sum *sums = (s->table->n_sum_vars
3432 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3435 a = xmalloc (sizeof *a);
3436 *a = (struct ctables_area) { .example = cell, .sums = sums };
3437 hmap_insert (&s->areas[area], &a->node, hash);
3441 static struct substring
3442 rtrim_value (const union value *v, const struct variable *var)
3444 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3445 var_get_width (var));
3446 ss_rtrim (&s, ss_cstr (" "));
3451 in_string_range (const union value *v, const struct variable *var,
3452 const struct substring *srange)
3454 struct substring s = rtrim_value (v, var);
3455 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3456 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3459 static const struct ctables_category *
3460 ctables_categories_match (const struct ctables_categories *c,
3461 const union value *v, const struct variable *var)
3463 if (var_is_numeric (var) && v->f == SYSMIS)
3466 const struct ctables_category *othernm = NULL;
3467 for (size_t i = c->n_cats; i-- > 0; )
3469 const struct ctables_category *cat = &c->cats[i];
3473 if (cat->number == v->f)
3478 if (ss_equals (cat->string, rtrim_value (v, var)))
3483 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3484 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3489 if (in_string_range (v, var, cat->srange))
3494 if (var_is_value_missing (var, v))
3498 case CCT_POSTCOMPUTE:
3513 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3516 case CCT_EXCLUDED_MISSING:
3521 return var_is_value_missing (var, v) ? NULL : othernm;
3524 static const struct ctables_category *
3525 ctables_categories_total (const struct ctables_categories *c)
3527 const struct ctables_category *first = &c->cats[0];
3528 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3529 return (first->type == CCT_TOTAL ? first
3530 : last->type == CCT_TOTAL ? last
3534 static struct ctables_cell *
3535 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3536 const struct ctables_category **cats[PIVOT_N_AXES])
3539 enum ctables_summary_variant sv = CSV_CELL;
3540 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3542 const struct ctables_nest *nest = s->nests[a];
3543 for (size_t i = 0; i < nest->n; i++)
3544 if (i != nest->scale_idx)
3546 hash = hash_pointer (cats[a][i], hash);
3547 if (cats[a][i]->type != CCT_TOTAL
3548 && cats[a][i]->type != CCT_SUBTOTAL
3549 && cats[a][i]->type != CCT_POSTCOMPUTE)
3550 hash = value_hash (case_data (c, nest->vars[i]),
3551 var_get_width (nest->vars[i]), hash);
3557 struct ctables_cell *cell;
3558 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3560 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3562 const struct ctables_nest *nest = s->nests[a];
3563 for (size_t i = 0; i < nest->n; i++)
3564 if (i != nest->scale_idx
3565 && (cats[a][i] != cell->axes[a].cvs[i].category
3566 || (cats[a][i]->type != CCT_TOTAL
3567 && cats[a][i]->type != CCT_SUBTOTAL
3568 && cats[a][i]->type != CCT_POSTCOMPUTE
3569 && !value_equal (case_data (c, nest->vars[i]),
3570 &cell->axes[a].cvs[i].value,
3571 var_get_width (nest->vars[i])))))
3580 cell = xmalloc (sizeof *cell);
3583 cell->omit_areas = 0;
3584 cell->postcompute = false;
3585 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3587 const struct ctables_nest *nest = s->nests[a];
3588 cell->axes[a].cvs = (nest->n
3589 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3591 for (size_t i = 0; i < nest->n; i++)
3593 const struct ctables_category *cat = cats[a][i];
3594 const struct variable *var = nest->vars[i];
3595 const union value *value = case_data (c, var);
3596 if (i != nest->scale_idx)
3598 const struct ctables_category *subtotal = cat->subtotal;
3599 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3602 if (cat->type == CCT_TOTAL
3603 || cat->type == CCT_SUBTOTAL
3604 || cat->type == CCT_POSTCOMPUTE)
3608 case PIVOT_AXIS_COLUMN:
3609 cell->omit_areas |= ((1u << CTAT_TABLE) |
3610 (1u << CTAT_LAYER) |
3611 (1u << CTAT_LAYERCOL) |
3612 (1u << CTAT_SUBTABLE) |
3615 case PIVOT_AXIS_ROW:
3616 cell->omit_areas |= ((1u << CTAT_TABLE) |
3617 (1u << CTAT_LAYER) |
3618 (1u << CTAT_LAYERROW) |
3619 (1u << CTAT_SUBTABLE) |
3622 case PIVOT_AXIS_LAYER:
3623 cell->omit_areas |= ((1u << CTAT_TABLE) |
3624 (1u << CTAT_LAYER));
3628 if (cat->type == CCT_POSTCOMPUTE)
3629 cell->postcompute = true;
3632 cell->axes[a].cvs[i].category = cat;
3633 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3637 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3638 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3639 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3640 for (size_t i = 0; i < specs->n; i++)
3641 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3642 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3643 cell->areas[at] = ctables_area_insert (s, cell, at);
3644 hmap_insert (&s->cells, &cell->node, hash);
3649 is_listwise_missing (const struct ctables_summary_spec_set *specs,
3650 const struct ccase *c)
3652 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3654 const struct variable *var = specs->listwise_vars[i];
3655 if (var_is_num_missing (var, case_num (c, var)))
3663 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3665 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3670 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3671 const struct ctables_category **cats[PIVOT_N_AXES],
3672 bool is_included, double weight[N_CTWS])
3674 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3675 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3677 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3678 const union value *value = case_data (c, specs->var);
3679 bool is_missing = var_is_value_missing (specs->var, value);
3680 bool is_scale_missing
3681 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3683 for (size_t i = 0; i < specs->n; i++)
3684 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3685 is_scale_missing, is_included,
3686 weight[specs->specs[i].weighting]);
3687 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3688 if (!(cell->omit_areas && (1u << at)))
3690 struct ctables_area *a = cell->areas[at];
3692 add_weight (a->total, weight);
3694 add_weight (a->count, weight);
3697 add_weight (a->valid, weight);
3699 if (!is_scale_missing)
3700 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3702 const struct variable *var = s->table->sum_vars[i];
3703 double addend = case_num (c, var);
3704 if (!var_is_num_missing (var, addend))
3705 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3706 a->sums[i].sum[wt] += addend * weight[wt];
3713 recurse_totals (struct ctables_section *s, const struct ccase *c,
3714 const struct ctables_category **cats[PIVOT_N_AXES],
3715 bool is_included, double weight[N_CTWS],
3716 enum pivot_axis_type start_axis, size_t start_nest)
3718 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3720 const struct ctables_nest *nest = s->nests[a];
3721 for (size_t i = start_nest; i < nest->n; i++)
3723 if (i == nest->scale_idx)
3726 const struct variable *var = nest->vars[i];
3728 const struct ctables_category *total = ctables_categories_total (
3729 s->table->categories[var_get_dict_index (var)]);
3732 const struct ctables_category *save = cats[a][i];
3734 ctables_cell_add__ (s, c, cats, is_included, weight);
3735 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3744 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3745 const struct ctables_category **cats[PIVOT_N_AXES],
3746 bool is_included, double weight[N_CTWS],
3747 enum pivot_axis_type start_axis, size_t start_nest)
3749 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3751 const struct ctables_nest *nest = s->nests[a];
3752 for (size_t i = start_nest; i < nest->n; i++)
3754 if (i == nest->scale_idx)
3757 const struct ctables_category *save = cats[a][i];
3760 cats[a][i] = save->subtotal;
3761 ctables_cell_add__ (s, c, cats, is_included, weight);
3762 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3771 ctables_add_occurrence (const struct variable *var,
3772 const union value *value,
3773 struct hmap *occurrences)
3775 int width = var_get_width (var);
3776 unsigned int hash = value_hash (value, width, 0);
3778 struct ctables_occurrence *o;
3779 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3781 if (value_equal (value, &o->value, width))
3784 o = xmalloc (sizeof *o);
3785 value_clone (&o->value, value, width);
3786 hmap_insert (occurrences, &o->node, hash);
3790 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3791 double weight[N_CTWS])
3793 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3794 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3795 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3796 const struct ctables_category **cats[PIVOT_N_AXES] =
3798 [PIVOT_AXIS_LAYER] = layer_cats,
3799 [PIVOT_AXIS_ROW] = row_cats,
3800 [PIVOT_AXIS_COLUMN] = column_cats,
3803 bool is_included = true;
3805 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3807 const struct ctables_nest *nest = s->nests[a];
3808 for (size_t i = 0; i < nest->n; i++)
3809 if (i != nest->scale_idx)
3811 const struct variable *var = nest->vars[i];
3812 const union value *value = case_data (c, var);
3814 cats[a][i] = ctables_categories_match (
3815 s->table->categories[var_get_dict_index (var)], value, var);
3818 if (i != nest->summary_idx)
3821 if (!var_is_value_missing (var, value))
3824 static const struct ctables_category cct_excluded_missing = {
3825 .type = CCT_EXCLUDED_MISSING,
3828 cats[a][i] = &cct_excluded_missing;
3829 is_included = false;
3835 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3837 const struct ctables_nest *nest = s->nests[a];
3838 for (size_t i = 0; i < nest->n; i++)
3839 if (i != nest->scale_idx)
3841 const struct variable *var = nest->vars[i];
3842 const union value *value = case_data (c, var);
3843 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3847 ctables_cell_add__ (s, c, cats, is_included, weight);
3848 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3849 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3854 const struct ctables_summary_spec_set *set;
3859 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3861 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3862 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3863 if (as->function != bs->function)
3864 return as->function > bs->function ? 1 : -1;
3865 else if (as->weighting != bs->weighting)
3866 return as->weighting > bs->weighting ? 1 : -1;
3867 else if (as->calc_area != bs->calc_area)
3868 return as->calc_area > bs->calc_area ? 1 : -1;
3869 else if (as->percentile != bs->percentile)
3870 return as->percentile < bs->percentile ? 1 : -1;
3872 const char *as_label = as->label ? as->label : "";
3873 const char *bs_label = bs->label ? bs->label : "";
3874 return strcmp (as_label, bs_label);
3878 ctables_category_format_number (double number, const struct variable *var,
3881 struct pivot_value *pv = pivot_value_new_var_value (
3882 var, &(union value) { .f = number });
3883 pivot_value_format (pv, NULL, s);
3884 pivot_value_destroy (pv);
3888 ctables_category_format_string (struct substring string,
3889 const struct variable *var, struct string *out)
3891 int width = var_get_width (var);
3892 char *s = xmalloc (width);
3893 buf_copy_rpad (s, width, string.string, string.length, ' ');
3894 struct pivot_value *pv = pivot_value_new_var_value (
3895 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3896 pivot_value_format (pv, NULL, out);
3897 pivot_value_destroy (pv);
3902 ctables_category_format_label (const struct ctables_category *cat,
3903 const struct variable *var,
3909 ctables_category_format_number (cat->number, var, s);
3913 ctables_category_format_string (cat->string, var, s);
3917 ctables_category_format_number (cat->nrange[0], var, s);
3918 ds_put_format (s, " THRU ");
3919 ctables_category_format_number (cat->nrange[1], var, s);
3923 ctables_category_format_string (cat->srange[0], var, s);
3924 ds_put_format (s, " THRU ");
3925 ctables_category_format_string (cat->srange[1], var, s);
3929 ds_put_cstr (s, "MISSING");
3933 ds_put_cstr (s, "OTHERNM");
3936 case CCT_POSTCOMPUTE:
3937 ds_put_format (s, "&%s", cat->pc->name);
3942 ds_put_cstr (s, cat->total_label);
3948 case CCT_EXCLUDED_MISSING:
3955 static struct pivot_value *
3956 ctables_postcompute_label (const struct ctables_categories *cats,
3957 const struct ctables_category *cat,
3958 const struct variable *var)
3960 struct substring in = ss_cstr (cat->pc->label);
3961 struct substring target = ss_cstr (")LABEL[");
3963 struct string out = DS_EMPTY_INITIALIZER;
3966 size_t chunk = ss_find_substring (in, target);
3967 if (chunk == SIZE_MAX)
3969 if (ds_is_empty (&out))
3970 return pivot_value_new_user_text (in.string, in.length);
3973 ds_put_substring (&out, in);
3974 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3978 ds_put_substring (&out, ss_head (in, chunk));
3979 ss_advance (&in, chunk + target.length);
3981 struct substring idx_s;
3982 if (!ss_get_until (&in, ']', &idx_s))
3985 long int idx = strtol (idx_s.string, &tail, 10);
3986 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3989 struct ctables_category *cat2 = &cats->cats[idx - 1];
3990 if (!ctables_category_format_label (cat2, var, &out))
3996 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3999 static struct pivot_value *
4000 ctables_category_create_value_label (const struct ctables_categories *cats,
4001 const struct ctables_category *cat,
4002 const struct variable *var,
4003 const union value *value)
4005 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4006 ? ctables_postcompute_label (cats, cat, var)
4007 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4008 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4009 : pivot_value_new_var_value (var, value));
4012 static struct ctables_value *
4013 ctables_value_find__ (struct ctables_table *t, const union value *value,
4014 int width, unsigned int hash)
4016 struct ctables_value *clv;
4017 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4018 hash, &t->clabels_values_map)
4019 if (value_equal (value, &clv->value, width))
4025 ctables_value_insert (struct ctables_table *t, const union value *value,
4028 unsigned int hash = value_hash (value, width, 0);
4029 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4032 clv = xmalloc (sizeof *clv);
4033 value_clone (&clv->value, value, width);
4034 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4038 static struct ctables_value *
4039 ctables_value_find (struct ctables_table *t,
4040 const union value *value, int width)
4042 return ctables_value_find__ (t, value, width,
4043 value_hash (value, width, 0));
4047 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4048 size_t ix[PIVOT_N_AXES])
4050 if (a < PIVOT_N_AXES)
4052 size_t limit = MAX (t->stacks[a].n, 1);
4053 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4054 ctables_table_add_section (t, a + 1, ix);
4058 struct ctables_section *s = &t->sections[t->n_sections++];
4059 *s = (struct ctables_section) {
4061 .cells = HMAP_INITIALIZER (s->cells),
4063 for (a = 0; a < PIVOT_N_AXES; a++)
4066 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4068 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4069 for (size_t i = 0; i < nest->n; i++)
4070 hmap_init (&s->occurrences[a][i]);
4072 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4073 hmap_init (&s->areas[at]);
4078 ctpo_add (double a, double b)
4084 ctpo_sub (double a, double b)
4090 ctpo_mul (double a, double b)
4096 ctpo_div (double a, double b)
4098 return b ? a / b : SYSMIS;
4102 ctpo_pow (double a, double b)
4104 int save_errno = errno;
4106 double result = pow (a, b);
4114 ctpo_neg (double a, double b UNUSED)
4119 struct ctables_pcexpr_evaluate_ctx
4121 const struct ctables_cell *cell;
4122 const struct ctables_section *section;
4123 const struct ctables_categories *cats;
4124 enum pivot_axis_type pc_a;
4127 enum fmt_type parse_format;
4130 static double ctables_pcexpr_evaluate (
4131 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4134 ctables_pcexpr_evaluate_nonterminal (
4135 const struct ctables_pcexpr_evaluate_ctx *ctx,
4136 const struct ctables_pcexpr *e, size_t n_args,
4137 double evaluate (double, double))
4139 double args[2] = { 0, 0 };
4140 for (size_t i = 0; i < n_args; i++)
4142 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4143 if (!isfinite (args[i]) || args[i] == SYSMIS)
4146 return evaluate (args[0], args[1]);
4150 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4151 const struct ctables_cell_value *pc_cv)
4153 const struct ctables_section *s = ctx->section;
4156 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4158 const struct ctables_nest *nest = s->nests[a];
4159 for (size_t i = 0; i < nest->n; i++)
4160 if (i != nest->scale_idx)
4162 const struct ctables_cell_value *cv
4163 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4164 : &ctx->cell->axes[a].cvs[i]);
4165 hash = hash_pointer (cv->category, hash);
4166 if (cv->category->type != CCT_TOTAL
4167 && cv->category->type != CCT_SUBTOTAL
4168 && cv->category->type != CCT_POSTCOMPUTE)
4169 hash = value_hash (&cv->value,
4170 var_get_width (nest->vars[i]), hash);
4174 struct ctables_cell *tc;
4175 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4177 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4179 const struct ctables_nest *nest = s->nests[a];
4180 for (size_t i = 0; i < nest->n; i++)
4181 if (i != nest->scale_idx)
4183 const struct ctables_cell_value *p_cv
4184 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4185 : &ctx->cell->axes[a].cvs[i]);
4186 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4187 if (p_cv->category != t_cv->category
4188 || (p_cv->category->type != CCT_TOTAL
4189 && p_cv->category->type != CCT_SUBTOTAL
4190 && p_cv->category->type != CCT_POSTCOMPUTE
4191 && !value_equal (&p_cv->value,
4193 var_get_width (nest->vars[i]))))
4205 const struct ctables_table *t = s->table;
4206 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4207 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4208 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4209 &specs->specs[ctx->summary_idx]);
4213 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4214 const struct ctables_pcexpr *e)
4221 case CTPO_CAT_NRANGE:
4222 case CTPO_CAT_SRANGE:
4223 case CTPO_CAT_MISSING:
4224 case CTPO_CAT_OTHERNM:
4226 struct ctables_cell_value cv = {
4227 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4229 assert (cv.category != NULL);
4231 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4232 const struct ctables_occurrence *o;
4235 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4236 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4237 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4239 cv.value = o->value;
4240 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4245 case CTPO_CAT_NUMBER:
4246 case CTPO_CAT_SUBTOTAL:
4247 case CTPO_CAT_TOTAL:
4249 struct ctables_cell_value cv = {
4250 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4251 .value = { .f = e->number },
4253 assert (cv.category != NULL);
4254 return ctables_pcexpr_evaluate_category (ctx, &cv);
4257 case CTPO_CAT_STRING:
4259 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4261 if (width > e->string.length)
4263 s = xmalloc (width);
4264 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4267 const struct ctables_category *category
4268 = ctables_find_category_for_postcompute (
4269 ctx->section->table->ctables->dict,
4270 ctx->cats, ctx->parse_format, e);
4271 assert (category != NULL);
4273 struct ctables_cell_value cv = { .category = category };
4274 if (category->type == CCT_NUMBER)
4275 cv.value.f = category->number;
4276 else if (category->type == CCT_STRING)
4277 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
4281 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4287 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4290 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4293 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4296 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4299 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4302 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4308 static const struct ctables_category *
4309 ctables_cell_postcompute (const struct ctables_section *s,
4310 const struct ctables_cell *cell,
4311 enum pivot_axis_type *pc_a_p,
4314 assert (cell->postcompute);
4315 const struct ctables_category *pc_cat = NULL;
4316 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4317 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4319 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4320 if (cv->category->type == CCT_POSTCOMPUTE)
4324 /* Multiple postcomputes cross each other. The value is
4329 pc_cat = cv->category;
4333 *pc_a_idx_p = pc_a_idx;
4337 assert (pc_cat != NULL);
4342 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4343 const struct ctables_cell *cell,
4344 const struct ctables_summary_spec *ss,
4345 struct fmt_spec *format,
4346 bool *is_ctables_format,
4349 enum pivot_axis_type pc_a = 0;
4350 size_t pc_a_idx = 0;
4351 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4352 s, cell, &pc_a, &pc_a_idx);
4356 const struct ctables_postcompute *pc = pc_cat->pc;
4359 for (size_t i = 0; i < pc->specs->n; i++)
4361 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4362 if (ss->function == ss2->function
4363 && ss->weighting == ss2->weighting
4364 && ss->calc_area == ss2->calc_area
4365 && ss->percentile == ss2->percentile)
4367 *format = ss2->format;
4368 *is_ctables_format = ss2->is_ctables_format;
4374 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4375 const struct ctables_categories *cats = s->table->categories[
4376 var_get_dict_index (var)];
4377 struct ctables_pcexpr_evaluate_ctx ctx = {
4382 .pc_a_idx = pc_a_idx,
4383 .summary_idx = summary_idx,
4384 .parse_format = pc_cat->parse_format,
4386 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4390 ctables_format (double d, const struct fmt_spec *format,
4391 const struct fmt_settings *settings)
4393 const union value v = { .f = d };
4394 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4396 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4397 produce the results we want for negative numbers, putting the negative
4398 sign in the wrong spot, before the prefix instead of after it. We can't,
4399 in fact, produce the desired results using a custom-currency
4400 specification. Instead, we postprocess the output, moving the negative
4403 NEQUAL: "-N=3" => "N=-3"
4404 PAREN: "-(3)" => "(-3)"
4405 PCTPAREN: "-(3%)" => "(-3%)"
4407 This transformation doesn't affect NEGPAREN. */
4408 char *minus_src = strchr (s, '-');
4409 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4411 char *n_equals = strstr (s, "N=");
4412 char *lparen = strchr (s, '(');
4413 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4415 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4421 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4423 for (size_t i = 0; i < t->stacks[a].n; i++)
4425 struct ctables_nest *nest = &t->stacks[a].nests[i];
4426 if (nest->n != 1 || nest->scale_idx != 0)
4429 enum ctables_vlabel vlabel
4430 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4431 if (vlabel != CTVL_NONE)
4438 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4440 struct pivot_table *pt = pivot_table_create__ (
4442 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4443 : pivot_value_new_text (N_("Custom Tables"))),
4446 pivot_table_set_caption (
4447 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4449 pivot_table_set_corner_text (
4450 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4452 bool summary_dimension = (t->summary_axis != t->slabels_axis
4453 || (!t->slabels_visible
4454 && t->summary_specs.n > 1));
4455 if (summary_dimension)
4457 struct pivot_dimension *d = pivot_dimension_create (
4458 pt, t->slabels_axis, N_("Statistics"));
4459 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4460 if (!t->slabels_visible)
4461 d->hide_all_labels = true;
4462 for (size_t i = 0; i < specs->n; i++)
4463 pivot_category_create_leaf (
4464 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4467 bool categories_dimension = t->clabels_example != NULL;
4468 if (categories_dimension)
4470 struct pivot_dimension *d = pivot_dimension_create (
4471 pt, t->label_axis[t->clabels_from_axis],
4472 t->clabels_from_axis == PIVOT_AXIS_ROW
4473 ? N_("Row Categories")
4474 : N_("Column Categories"));
4475 const struct variable *var = t->clabels_example;
4476 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4477 for (size_t i = 0; i < t->n_clabels_values; i++)
4479 const struct ctables_value *value = t->clabels_values[i];
4480 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4481 assert (cat != NULL);
4482 pivot_category_create_leaf (
4483 d->root, ctables_category_create_value_label (c, cat,
4489 pivot_table_set_look (pt, ct->look);
4490 struct pivot_dimension *d[PIVOT_N_AXES];
4491 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4493 static const char *names[] = {
4494 [PIVOT_AXIS_ROW] = N_("Rows"),
4495 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4496 [PIVOT_AXIS_LAYER] = N_("Layers"),
4498 d[a] = (t->axes[a] || a == t->summary_axis
4499 ? pivot_dimension_create (pt, a, names[a])
4504 assert (t->axes[a]);
4506 for (size_t i = 0; i < t->stacks[a].n; i++)
4508 struct ctables_nest *nest = &t->stacks[a].nests[i];
4509 struct ctables_section **sections = xnmalloc (t->n_sections,
4511 size_t n_sections = 0;
4513 size_t n_total_cells = 0;
4514 size_t max_depth = 0;
4515 for (size_t j = 0; j < t->n_sections; j++)
4516 if (t->sections[j].nests[a] == nest)
4518 struct ctables_section *s = &t->sections[j];
4519 sections[n_sections++] = s;
4520 n_total_cells += hmap_count (&s->cells);
4522 size_t depth = s->nests[a]->n;
4523 max_depth = MAX (depth, max_depth);
4526 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4528 size_t n_sorted = 0;
4530 for (size_t j = 0; j < n_sections; j++)
4532 struct ctables_section *s = sections[j];
4534 struct ctables_cell *cell;
4535 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4537 sorted[n_sorted++] = cell;
4538 assert (n_sorted <= n_total_cells);
4541 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4542 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4544 struct ctables_level
4546 enum ctables_level_type
4548 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4549 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4550 CTL_SUMMARY, /* Summary functions. */
4554 enum settings_value_show vlabel; /* CTL_VAR only. */
4557 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4558 size_t n_levels = 0;
4559 for (size_t k = 0; k < nest->n; k++)
4561 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4562 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4564 if (vlabel != CTVL_NONE)
4566 levels[n_levels++] = (struct ctables_level) {
4568 .vlabel = (enum settings_value_show) vlabel,
4573 if (nest->scale_idx != k
4574 && (k != nest->n - 1 || t->label_axis[a] == a))
4576 levels[n_levels++] = (struct ctables_level) {
4577 .type = CTL_CATEGORY,
4583 if (!summary_dimension && a == t->slabels_axis)
4585 levels[n_levels++] = (struct ctables_level) {
4586 .type = CTL_SUMMARY,
4587 .var_idx = SIZE_MAX,
4591 /* Pivot categories:
4593 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4594 - category for nest->vars[0], if nest->scale_idx != 0
4595 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4596 - category for nest->vars[1], if nest->scale_idx != 1
4598 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4599 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4600 - summary function, if 'a == t->slabels_axis && a ==
4603 Additional dimensions:
4605 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4607 - If 't->label_axis[b] == a' for some 'b != a', add a category
4612 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4614 for (size_t j = 0; j < n_sorted; j++)
4616 struct ctables_cell *cell = sorted[j];
4617 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4619 size_t n_common = 0;
4622 for (; n_common < n_levels; n_common++)
4624 const struct ctables_level *level = &levels[n_common];
4625 if (level->type == CTL_CATEGORY)
4627 size_t var_idx = level->var_idx;
4628 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4629 if (prev->axes[a].cvs[var_idx].category != c)
4631 else if (c->type != CCT_SUBTOTAL
4632 && c->type != CCT_TOTAL
4633 && c->type != CCT_POSTCOMPUTE
4634 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4635 &cell->axes[a].cvs[var_idx].value,
4636 var_get_type (nest->vars[var_idx])))
4642 for (size_t k = n_common; k < n_levels; k++)
4644 const struct ctables_level *level = &levels[k];
4645 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4646 if (level->type == CTL_SUMMARY)
4648 assert (k == n_levels - 1);
4650 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4651 for (size_t m = 0; m < specs->n; m++)
4653 int leaf = pivot_category_create_leaf (
4654 parent, ctables_summary_label (&specs->specs[m],
4662 const struct variable *var = nest->vars[level->var_idx];
4663 struct pivot_value *label;
4664 if (level->type == CTL_VAR)
4666 label = pivot_value_new_variable (var);
4667 label->variable.show = level->vlabel;
4669 else if (level->type == CTL_CATEGORY)
4671 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4672 label = ctables_category_create_value_label (
4673 t->categories[var_get_dict_index (var)],
4674 cv->category, var, &cv->value);
4679 if (k == n_levels - 1)
4680 prev_leaf = pivot_category_create_leaf (parent, label);
4682 groups[k] = pivot_category_create_group__ (parent, label);
4686 cell->axes[a].leaf = prev_leaf;
4695 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4699 size_t n_total_cells = 0;
4700 for (size_t j = 0; j < t->n_sections; j++)
4701 n_total_cells += hmap_count (&t->sections[j].cells);
4703 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4704 size_t n_sorted = 0;
4705 for (size_t j = 0; j < t->n_sections; j++)
4707 const struct ctables_section *s = &t->sections[j];
4708 struct ctables_cell *cell;
4709 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4711 sorted[n_sorted++] = cell;
4713 assert (n_sorted <= n_total_cells);
4714 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4716 size_t ids[N_CTATS];
4717 memset (ids, 0, sizeof ids);
4718 for (size_t j = 0; j < n_sorted; j++)
4720 struct ctables_cell *cell = sorted[j];
4721 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4723 struct ctables_area *area = cell->areas[at];
4724 if (!area->sequence)
4725 area->sequence = ++ids[at];
4732 for (size_t i = 0; i < t->n_sections; i++)
4734 struct ctables_section *s = &t->sections[i];
4736 struct ctables_cell *cell;
4737 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4742 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4743 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4744 for (size_t j = 0; j < specs->n; j++)
4747 size_t n_dindexes = 0;
4749 if (summary_dimension)
4750 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4752 if (categories_dimension)
4754 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4755 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4756 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4757 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4760 dindexes[n_dindexes++] = ctv->leaf;
4763 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4766 int leaf = cell->axes[a].leaf;
4767 if (a == t->summary_axis && !summary_dimension)
4769 dindexes[n_dindexes++] = leaf;
4772 const struct ctables_summary_spec *ss = &specs->specs[j];
4774 struct fmt_spec format = specs->specs[j].format;
4775 bool is_ctables_format = ss->is_ctables_format;
4776 double d = (cell->postcompute
4777 ? ctables_cell_calculate_postcompute (
4778 s, cell, ss, &format, &is_ctables_format, j)
4779 : ctables_summary_value (cell, &cell->summaries[j],
4782 struct pivot_value *value;
4783 if (ct->hide_threshold != 0
4784 && d < ct->hide_threshold
4785 && ss->function == CTSF_COUNT)
4787 value = pivot_value_new_user_text_nocopy (
4788 xasprintf ("<%d", ct->hide_threshold));
4790 else if (d == 0 && ct->zero)
4791 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4792 else if (d == SYSMIS && ct->missing)
4793 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4794 else if (is_ctables_format)
4795 value = pivot_value_new_user_text_nocopy (
4796 ctables_format (d, &format, &ct->ctables_formats));
4799 value = pivot_value_new_number (d);
4800 value->numeric.format = format;
4802 /* XXX should text values be right-justified? */
4803 pivot_table_put (pt, dindexes, n_dindexes, value);
4808 pivot_table_submit (pt);
4812 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4814 enum pivot_axis_type label_pos = t->label_axis[a];
4818 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4819 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4821 const struct ctables_stack *stack = &t->stacks[a];
4825 const struct ctables_nest *n0 = &stack->nests[0];
4828 assert (stack->n == 1);
4832 const struct variable *v0 = n0->vars[n0->n - 1];
4833 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4834 t->clabels_example = v0;
4836 for (size_t i = 0; i < c0->n_cats; i++)
4837 if (c0->cats[i].type == CCT_FUNCTION)
4839 msg (SE, _("%s=%s is not allowed with sorting based "
4840 "on a summary function."),
4841 subcommand_name, pos_name);
4844 if (n0->n - 1 == n0->scale_idx)
4846 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4847 "but %s is a scale variable."),
4848 subcommand_name, pos_name, var_get_name (v0));
4852 for (size_t i = 1; i < stack->n; i++)
4854 const struct ctables_nest *ni = &stack->nests[i];
4856 const struct variable *vi = ni->vars[ni->n - 1];
4857 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4859 if (ni->n - 1 == ni->scale_idx)
4861 msg (SE, _("%s=%s requires the variables to be moved to be "
4862 "categorical, but %s is a scale variable."),
4863 subcommand_name, pos_name, var_get_name (vi));
4866 if (var_get_width (v0) != var_get_width (vi))
4868 msg (SE, _("%s=%s requires the variables to be "
4869 "moved to have the same width, but %s has "
4870 "width %d and %s has width %d."),
4871 subcommand_name, pos_name,
4872 var_get_name (v0), var_get_width (v0),
4873 var_get_name (vi), var_get_width (vi));
4876 if (!val_labs_equal (var_get_value_labels (v0),
4877 var_get_value_labels (vi)))
4879 msg (SE, _("%s=%s requires the variables to be "
4880 "moved to have the same value labels, but %s "
4881 "and %s have different value labels."),
4882 subcommand_name, pos_name,
4883 var_get_name (v0), var_get_name (vi));
4886 if (!ctables_categories_equal (c0, ci))
4888 msg (SE, _("%s=%s requires the variables to be "
4889 "moved to have the same category "
4890 "specifications, but %s and %s have different "
4891 "category specifications."),
4892 subcommand_name, pos_name,
4893 var_get_name (v0), var_get_name (vi));
4902 add_sum_var (struct variable *var,
4903 struct variable ***sum_vars, size_t *n, size_t *allocated)
4905 for (size_t i = 0; i < *n; i++)
4906 if (var == (*sum_vars)[i])
4909 if (*n >= *allocated)
4910 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4911 (*sum_vars)[*n] = var;
4915 static enum ctables_area_type
4916 rotate_area (enum ctables_area_type area)
4927 return CTAT_LAYERCOL;
4930 return CTAT_LAYERROW;
4943 enumerate_sum_vars (const struct ctables_axis *a,
4944 struct variable ***sum_vars, size_t *n, size_t *allocated)
4952 for (size_t i = 0; i < N_CSVS; i++)
4953 for (size_t j = 0; j < a->specs[i].n; j++)
4955 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4956 if (spec->function == CTSF_areaPCT_SUM)
4957 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4963 for (size_t i = 0; i < 2; i++)
4964 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4970 ctables_prepare_table (struct ctables_table *t)
4972 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4975 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4977 for (size_t j = 0; j < t->stacks[a].n; j++)
4979 struct ctables_nest *nest = &t->stacks[a].nests[j];
4980 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4982 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4983 nest->n_areas[at] = 0;
4985 enum pivot_axis_type ata, atb;
4986 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4988 ata = PIVOT_AXIS_ROW;
4989 atb = PIVOT_AXIS_COLUMN;
4991 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4993 ata = PIVOT_AXIS_COLUMN;
4994 atb = PIVOT_AXIS_ROW;
4997 if (at == CTAT_LAYER
4998 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
4999 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5000 ? a == atb && t->label_axis[a] != a
5003 for (size_t k = nest->n - 1; k < nest->n; k--)
5004 if (k != nest->scale_idx)
5006 nest->areas[at][nest->n_areas[at]++] = k;
5012 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5013 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5014 : at == CTAT_TABLE ? true
5018 for (size_t k = 0; k < nest->n; k++)
5019 if (k != nest->scale_idx)
5020 nest->areas[at][nest->n_areas[at]++] = k;
5026 #define L PIVOT_AXIS_LAYER
5027 n_drop = (t->clabels_from_axis == L ? a != L
5028 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5029 : t->clabels_from_axis == a ? 2
5036 n_drop = a == ata && t->label_axis[ata] == atb;
5041 n_drop = (a == ata ? t->label_axis[ata] == atb
5043 : t->clabels_from_axis == atb ? -1
5044 : t->clabels_to_axis != atb ? 1
5056 size_t n = nest->n_areas[at];
5059 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5060 nest->n_areas[at]--;
5065 for (int i = 0; i < n_drop; i++)
5066 if (nest->n_areas[at] > 0)
5067 nest->n_areas[at]--;
5074 struct ctables_nest *nest = xmalloc (sizeof *nest);
5075 *nest = (struct ctables_nest) {
5077 .scale_idx = SIZE_MAX,
5078 .summary_idx = SIZE_MAX
5080 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5082 /* There's no point in moving labels away from an axis that has no
5083 labels, so avoid dealing with the special cases around that. */
5084 t->label_axis[a] = a;
5087 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5088 for (size_t i = 0; i < stack->n; i++)
5090 struct ctables_nest *nest = &stack->nests[i];
5091 if (!nest->specs[CSV_CELL].n)
5093 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5094 ss->specs = xmalloc (sizeof *ss->specs);
5097 enum ctables_summary_function function
5098 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5102 nest->summary_idx = nest->n - 1;
5103 ss->var = nest->vars[nest->summary_idx];
5105 *ss->specs = (struct ctables_summary_spec) {
5106 .function = function,
5107 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5108 .format = ctables_summary_default_format (function, ss->var),
5111 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5112 &nest->specs[CSV_CELL]);
5114 else if (!nest->specs[CSV_TOTAL].n)
5115 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5116 &nest->specs[CSV_CELL]);
5118 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5119 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5121 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5122 for (size_t i = 0; i < nest->specs[sv].n; i++)
5124 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5125 const struct ctables_function_info *cfi =
5126 &ctables_function_info[ss->function];
5128 ss->calc_area = rotate_area (ss->calc_area);
5132 if (t->ctables->smissing_listwise)
5134 struct variable **listwise_vars = NULL;
5136 size_t allocated = 0;
5138 for (size_t j = nest->group_head; j < stack->n; j++)
5140 const struct ctables_nest *other_nest = &stack->nests[j];
5141 if (other_nest->group_head != nest->group_head)
5144 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5147 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5148 sizeof *listwise_vars);
5149 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5152 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5155 listwise_vars = xmemdup (listwise_vars,
5156 n * sizeof *listwise_vars);
5157 nest->specs[sv].listwise_vars = listwise_vars;
5158 nest->specs[sv].n_listwise_vars = n;
5163 struct ctables_summary_spec_set *merged = &t->summary_specs;
5164 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5166 for (size_t j = 0; j < stack->n; j++)
5168 const struct ctables_nest *nest = &stack->nests[j];
5170 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5171 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5176 struct merge_item min = items[0];
5177 for (size_t j = 1; j < n_left; j++)
5178 if (merge_item_compare_3way (&items[j], &min) < 0)
5181 if (merged->n >= merged->allocated)
5182 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5183 sizeof *merged->specs);
5184 merged->specs[merged->n++] = min.set->specs[min.ofs];
5186 for (size_t j = 0; j < n_left; )
5188 if (merge_item_compare_3way (&items[j], &min) == 0)
5190 struct merge_item *item = &items[j];
5191 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5192 if (++item->ofs >= item->set->n)
5194 items[j] = items[--n_left];
5203 size_t allocated_sum_vars = 0;
5204 enumerate_sum_vars (t->axes[t->summary_axis],
5205 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5207 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5208 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5212 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5213 enum pivot_axis_type a)
5215 struct ctables_stack *stack = &t->stacks[a];
5216 for (size_t i = 0; i < stack->n; i++)
5218 const struct ctables_nest *nest = &stack->nests[i];
5219 const struct variable *var = nest->vars[nest->n - 1];
5220 const union value *value = case_data (c, var);
5222 if (var_is_numeric (var) && value->f == SYSMIS)
5225 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5227 ctables_value_insert (t, value, var_get_width (var));
5232 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5234 const struct ctables_value *const *ap = a_;
5235 const struct ctables_value *const *bp = b_;
5236 const struct ctables_value *a = *ap;
5237 const struct ctables_value *b = *bp;
5238 const int *width = width_;
5239 return value_compare_3way (&a->value, &b->value, *width);
5243 ctables_sort_clabels_values (struct ctables_table *t)
5245 const struct variable *v0 = t->clabels_example;
5246 int width = var_get_width (v0);
5248 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5251 const struct val_labs *val_labs = var_get_value_labels (v0);
5252 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5253 vl = val_labs_next (val_labs, vl))
5254 if (ctables_categories_match (c0, &vl->value, v0))
5255 ctables_value_insert (t, &vl->value, width);
5258 size_t n = hmap_count (&t->clabels_values_map);
5259 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5261 struct ctables_value *clv;
5263 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5264 t->clabels_values[i++] = clv;
5265 t->n_clabels_values = n;
5268 sort (t->clabels_values, n, sizeof *t->clabels_values,
5269 compare_clabels_values_3way, &width);
5271 for (size_t i = 0; i < n; i++)
5272 t->clabels_values[i]->leaf = i;
5276 ctables_add_category_occurrences (const struct variable *var,
5277 struct hmap *occurrences,
5278 const struct ctables_categories *cats)
5280 const struct val_labs *val_labs = var_get_value_labels (var);
5282 for (size_t i = 0; i < cats->n_cats; i++)
5284 const struct ctables_category *c = &cats->cats[i];
5288 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5294 int width = var_get_width (var);
5296 value_init (&value, width);
5297 value_copy_buf_rpad (&value, width,
5298 CHAR_CAST (uint8_t *, c->string.string),
5299 c->string.length, ' ');
5300 ctables_add_occurrence (var, &value, occurrences);
5301 value_destroy (&value, width);
5306 assert (var_is_numeric (var));
5307 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5308 vl = val_labs_next (val_labs, vl))
5309 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5310 ctables_add_occurrence (var, &vl->value, occurrences);
5314 assert (var_is_alpha (var));
5315 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5316 vl = val_labs_next (val_labs, vl))
5317 if (in_string_range (&vl->value, var, c->srange))
5318 ctables_add_occurrence (var, &vl->value, occurrences);
5322 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5323 vl = val_labs_next (val_labs, vl))
5324 if (var_is_value_missing (var, &vl->value))
5325 ctables_add_occurrence (var, &vl->value, occurrences);
5329 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5330 vl = val_labs_next (val_labs, vl))
5331 ctables_add_occurrence (var, &vl->value, occurrences);
5334 case CCT_POSTCOMPUTE:
5344 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5345 vl = val_labs_next (val_labs, vl))
5346 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5347 ctables_add_occurrence (var, &vl->value, occurrences);
5350 case CCT_EXCLUDED_MISSING:
5357 ctables_section_recurse_add_empty_categories (
5358 struct ctables_section *s,
5359 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5360 enum pivot_axis_type a, size_t a_idx)
5362 if (a >= PIVOT_N_AXES)
5363 ctables_cell_insert__ (s, c, cats);
5364 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5365 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5368 const struct variable *var = s->nests[a]->vars[a_idx];
5369 const struct ctables_categories *categories = s->table->categories[
5370 var_get_dict_index (var)];
5371 int width = var_get_width (var);
5372 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5373 const struct ctables_occurrence *o;
5374 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5376 union value *value = case_data_rw (c, var);
5377 value_destroy (value, width);
5378 value_clone (value, &o->value, width);
5379 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5380 assert (cats[a][a_idx] != NULL);
5381 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5384 for (size_t i = 0; i < categories->n_cats; i++)
5386 const struct ctables_category *cat = &categories->cats[i];
5387 if (cat->type == CCT_POSTCOMPUTE)
5389 cats[a][a_idx] = cat;
5390 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5397 ctables_section_add_empty_categories (struct ctables_section *s)
5399 bool show_empty = false;
5400 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5402 for (size_t k = 0; k < s->nests[a]->n; k++)
5403 if (k != s->nests[a]->scale_idx)
5405 const struct variable *var = s->nests[a]->vars[k];
5406 const struct ctables_categories *cats = s->table->categories[
5407 var_get_dict_index (var)];
5408 if (cats->show_empty)
5411 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5417 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5418 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5419 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5420 const struct ctables_category **cats[PIVOT_N_AXES] =
5422 [PIVOT_AXIS_LAYER] = layer_cats,
5423 [PIVOT_AXIS_ROW] = row_cats,
5424 [PIVOT_AXIS_COLUMN] = column_cats,
5426 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5427 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5432 ctables_section_clear (struct ctables_section *s)
5434 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5436 const struct ctables_nest *nest = s->nests[a];
5437 for (size_t i = 0; i < nest->n; i++)
5438 if (i != nest->scale_idx)
5440 const struct variable *var = nest->vars[i];
5441 int width = var_get_width (var);
5442 struct ctables_occurrence *o, *next;
5443 struct hmap *map = &s->occurrences[a][i];
5444 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5446 value_destroy (&o->value, width);
5447 hmap_delete (map, &o->node);
5454 struct ctables_cell *cell, *next_cell;
5455 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5457 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5459 const struct ctables_nest *nest = s->nests[a];
5460 for (size_t i = 0; i < nest->n; i++)
5461 if (i != nest->scale_idx)
5462 value_destroy (&cell->axes[a].cvs[i].value,
5463 var_get_width (nest->vars[i]));
5464 free (cell->axes[a].cvs);
5467 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5468 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5469 for (size_t i = 0; i < specs->n; i++)
5470 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5471 free (cell->summaries);
5473 hmap_delete (&s->cells, &cell->node);
5476 hmap_shrink (&s->cells);
5478 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5480 struct ctables_area *area, *next_area;
5481 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5485 hmap_delete (&s->areas[at], &area->node);
5488 hmap_shrink (&s->areas[at]);
5493 ctables_section_uninit (struct ctables_section *s)
5495 ctables_section_clear (s);
5497 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5499 struct ctables_nest *nest = s->nests[a];
5500 for (size_t i = 0; i < nest->n; i++)
5501 hmap_destroy (&s->occurrences[a][i]);
5502 free (s->occurrences[a]);
5505 hmap_destroy (&s->cells);
5506 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5507 hmap_destroy (&s->areas[at]);
5511 ctables_table_clear (struct ctables_table *t)
5513 for (size_t i = 0; i < t->n_sections; i++)
5514 ctables_section_clear (&t->sections[i]);
5516 if (t->clabels_example)
5518 int width = var_get_width (t->clabels_example);
5519 struct ctables_value *value, *next_value;
5520 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5521 &t->clabels_values_map)
5523 value_destroy (&value->value, width);
5524 hmap_delete (&t->clabels_values_map, &value->node);
5527 hmap_shrink (&t->clabels_values_map);
5529 free (t->clabels_values);
5530 t->clabels_values = NULL;
5531 t->n_clabels_values = 0;
5536 ctables_execute (struct dataset *ds, struct casereader *input,
5539 for (size_t i = 0; i < ct->n_tables; i++)
5541 struct ctables_table *t = ct->tables[i];
5542 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5543 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5544 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5545 sizeof *t->sections);
5546 size_t ix[PIVOT_N_AXES];
5547 ctables_table_add_section (t, 0, ix);
5550 struct dictionary *dict = dataset_dict (ds);
5552 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5553 struct casegrouper *grouper
5555 ? casegrouper_create_splits (input, dict)
5556 : casegrouper_create_vars (input, NULL, 0));
5557 struct casereader *group;
5558 while (casegrouper_get_next_group (grouper, &group))
5562 struct ccase *c = casereader_peek (group, 0);
5565 output_split_file_values (ds, c);
5570 bool warn_on_invalid = true;
5571 for (struct ccase *c = casereader_read (group); c;
5572 case_unref (c), c = casereader_read (group))
5574 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5575 double e_weight = (ct->e_weight
5576 ? var_force_valid_weight (ct->e_weight,
5577 case_num (c, ct->e_weight),
5581 [CTW_DICTIONARY] = d_weight,
5582 [CTW_EFFECTIVE] = e_weight,
5583 [CTW_UNWEIGHTED] = 1.0,
5586 for (size_t i = 0; i < ct->n_tables; i++)
5588 struct ctables_table *t = ct->tables[i];
5590 for (size_t j = 0; j < t->n_sections; j++)
5591 ctables_cell_insert (&t->sections[j], c, weight);
5593 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5594 if (t->label_axis[a] != a)
5595 ctables_insert_clabels_values (t, c, a);
5598 casereader_destroy (group);
5600 for (size_t i = 0; i < ct->n_tables; i++)
5602 struct ctables_table *t = ct->tables[i];
5604 if (t->clabels_example)
5605 ctables_sort_clabels_values (t);
5607 for (size_t j = 0; j < t->n_sections; j++)
5608 ctables_section_add_empty_categories (&t->sections[j]);
5610 ctables_table_output (ct, t);
5611 ctables_table_clear (t);
5614 return casegrouper_destroy (grouper);
5617 static struct ctables_postcompute *
5618 ctables_find_postcompute (struct ctables *ct, const char *name)
5620 struct ctables_postcompute *pc;
5621 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5622 utf8_hash_case_string (name, 0), &ct->postcomputes)
5623 if (!utf8_strcasecmp (pc->name, name))
5629 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5632 int pcompute_start = lex_ofs (lexer) - 1;
5634 if (!lex_match (lexer, T_AND))
5636 lex_error_expecting (lexer, "&");
5639 if (!lex_force_id (lexer))
5642 char *name = ss_xstrdup (lex_tokss (lexer));
5645 if (!lex_force_match (lexer, T_EQUALS)
5646 || !lex_force_match_id (lexer, "EXPR")
5647 || !lex_force_match (lexer, T_LPAREN))
5653 int expr_start = lex_ofs (lexer);
5654 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5655 int expr_end = lex_ofs (lexer) - 1;
5656 if (!expr || !lex_force_match (lexer, T_RPAREN))
5658 ctables_pcexpr_destroy (expr);
5662 int pcompute_end = lex_ofs (lexer) - 1;
5664 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5667 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5670 msg_at (SW, location, _("New definition of &%s will override the "
5671 "previous definition."),
5673 msg_at (SN, pc->location, _("This is the previous definition."));
5675 ctables_pcexpr_destroy (pc->expr);
5676 msg_location_destroy (pc->location);
5681 pc = xmalloc (sizeof *pc);
5682 *pc = (struct ctables_postcompute) { .name = name };
5683 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5684 utf8_hash_case_string (pc->name, 0));
5687 pc->location = location;
5689 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5694 ctables_parse_pproperties_format (struct lexer *lexer,
5695 struct ctables_summary_spec_set *sss)
5697 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5699 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5700 && !(lex_token (lexer) == T_ID
5701 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5702 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5703 lex_tokss (lexer)))))
5705 /* Parse function. */
5706 enum ctables_summary_function function;
5707 enum ctables_weighting weighting;
5708 enum ctables_area_type area;
5709 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5712 /* Parse percentile. */
5713 double percentile = 0;
5714 if (function == CTSF_PTILE)
5716 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5718 percentile = lex_number (lexer);
5723 struct fmt_spec format;
5724 bool is_ctables_format;
5725 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5728 if (sss->n >= sss->allocated)
5729 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5730 sizeof *sss->specs);
5731 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5732 .function = function,
5733 .weighting = weighting,
5736 .percentile = percentile,
5738 .is_ctables_format = is_ctables_format,
5744 ctables_summary_spec_set_uninit (sss);
5749 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5751 struct ctables_postcompute **pcs = NULL;
5753 size_t allocated_pcs = 0;
5755 while (lex_match (lexer, T_AND))
5757 if (!lex_force_id (lexer))
5759 struct ctables_postcompute *pc
5760 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5763 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5768 if (n_pcs >= allocated_pcs)
5769 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5773 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5775 if (lex_match_id (lexer, "LABEL"))
5777 lex_match (lexer, T_EQUALS);
5778 if (!lex_force_string (lexer))
5781 for (size_t i = 0; i < n_pcs; i++)
5783 free (pcs[i]->label);
5784 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5789 else if (lex_match_id (lexer, "FORMAT"))
5791 lex_match (lexer, T_EQUALS);
5793 struct ctables_summary_spec_set sss;
5794 if (!ctables_parse_pproperties_format (lexer, &sss))
5797 for (size_t i = 0; i < n_pcs; i++)
5800 ctables_summary_spec_set_uninit (pcs[i]->specs);
5802 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5803 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5805 ctables_summary_spec_set_uninit (&sss);
5807 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5809 lex_match (lexer, T_EQUALS);
5810 bool hide_source_cats;
5811 if (!parse_bool (lexer, &hide_source_cats))
5813 for (size_t i = 0; i < n_pcs; i++)
5814 pcs[i]->hide_source_cats = hide_source_cats;
5818 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5831 put_strftime (struct string *out, time_t now, const char *format)
5833 const struct tm *tm = localtime (&now);
5835 strftime (value, sizeof value, format, tm);
5836 ds_put_cstr (out, value);
5840 skip_prefix (struct substring *s, struct substring prefix)
5842 if (ss_starts_with (*s, prefix))
5844 ss_advance (s, prefix.length);
5852 put_table_expression (struct string *out, struct lexer *lexer,
5853 struct dictionary *dict, int expr_start, int expr_end)
5856 for (int ofs = expr_start; ofs < expr_end; ofs++)
5858 const struct token *t = lex_ofs_token (lexer, ofs);
5859 if (t->type == T_LBRACK)
5861 else if (t->type == T_RBRACK && nest > 0)
5867 else if (t->type == T_ID)
5869 const struct variable *var
5870 = dict_lookup_var (dict, t->string.string);
5871 const char *label = var ? var_get_label (var) : NULL;
5872 ds_put_cstr (out, label ? label : t->string.string);
5876 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5877 ds_put_byte (out, ' ');
5879 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5880 ds_put_cstr (out, repr);
5883 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5884 ds_put_byte (out, ' ');
5890 put_title_text (struct string *out, struct substring in, time_t now,
5891 struct lexer *lexer, struct dictionary *dict,
5892 int expr_start, int expr_end)
5896 size_t chunk = ss_find_byte (in, ')');
5897 ds_put_substring (out, ss_head (in, chunk));
5898 ss_advance (&in, chunk);
5899 if (ss_is_empty (in))
5902 if (skip_prefix (&in, ss_cstr (")DATE")))
5903 put_strftime (out, now, "%x");
5904 else if (skip_prefix (&in, ss_cstr (")TIME")))
5905 put_strftime (out, now, "%X");
5906 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5907 put_table_expression (out, lexer, dict, expr_start, expr_end);
5910 ds_put_byte (out, ')');
5911 ss_advance (&in, 1);
5917 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5919 struct casereader *input = NULL;
5921 struct measure_guesser *mg = measure_guesser_create (ds);
5924 input = proc_open (ds);
5925 measure_guesser_run (mg, input);
5926 measure_guesser_destroy (mg);
5929 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5930 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5931 enum settings_value_show tvars = settings_get_show_variables ();
5932 for (size_t i = 0; i < n_vars; i++)
5933 vlabels[i] = (enum ctables_vlabel) tvars;
5935 struct pivot_table_look *look = pivot_table_look_unshare (
5936 pivot_table_look_ref (pivot_table_look_get_default ()));
5937 look->omit_empty = false;
5939 struct ctables *ct = xmalloc (sizeof *ct);
5940 *ct = (struct ctables) {
5941 .dict = dataset_dict (ds),
5943 .ctables_formats = FMT_SETTINGS_INIT,
5945 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5948 time_t now = time (NULL);
5953 const char *dot_string;
5954 const char *comma_string;
5956 static const struct ctf ctfs[4] = {
5957 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5958 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5959 { CTEF_PAREN, "-,(,),", "-.(.)." },
5960 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5962 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5963 for (size_t i = 0; i < 4; i++)
5965 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5966 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5967 fmt_number_style_from_string (s));
5970 if (!lex_force_match (lexer, T_SLASH))
5973 while (!lex_match_id (lexer, "TABLE"))
5975 if (lex_match_id (lexer, "FORMAT"))
5977 double widths[2] = { SYSMIS, SYSMIS };
5978 double units_per_inch = 72.0;
5980 while (lex_token (lexer) != T_SLASH)
5982 if (lex_match_id (lexer, "MINCOLWIDTH"))
5984 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5987 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5989 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5992 else if (lex_match_id (lexer, "UNITS"))
5994 lex_match (lexer, T_EQUALS);
5995 if (lex_match_id (lexer, "POINTS"))
5996 units_per_inch = 72.0;
5997 else if (lex_match_id (lexer, "INCHES"))
5998 units_per_inch = 1.0;
5999 else if (lex_match_id (lexer, "CM"))
6000 units_per_inch = 2.54;
6003 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6007 else if (lex_match_id (lexer, "EMPTY"))
6012 lex_match (lexer, T_EQUALS);
6013 if (lex_match_id (lexer, "ZERO"))
6015 /* Nothing to do. */
6017 else if (lex_match_id (lexer, "BLANK"))
6018 ct->zero = xstrdup ("");
6019 else if (lex_force_string (lexer))
6021 ct->zero = ss_xstrdup (lex_tokss (lexer));
6027 else if (lex_match_id (lexer, "MISSING"))
6029 lex_match (lexer, T_EQUALS);
6030 if (!lex_force_string (lexer))
6034 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6035 ? ss_xstrdup (lex_tokss (lexer))
6041 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6042 "UNITS", "EMPTY", "MISSING");
6047 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6048 && widths[0] > widths[1])
6050 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6054 for (size_t i = 0; i < 2; i++)
6055 if (widths[i] != SYSMIS)
6057 int *wr = ct->look->width_ranges[TABLE_HORZ];
6058 wr[i] = widths[i] / units_per_inch * 96.0;
6063 else if (lex_match_id (lexer, "VLABELS"))
6065 if (!lex_force_match_id (lexer, "VARIABLES"))
6067 lex_match (lexer, T_EQUALS);
6069 struct variable **vars;
6071 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6075 if (!lex_force_match_id (lexer, "DISPLAY"))
6080 lex_match (lexer, T_EQUALS);
6082 enum ctables_vlabel vlabel;
6083 if (lex_match_id (lexer, "DEFAULT"))
6084 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6085 else if (lex_match_id (lexer, "NAME"))
6087 else if (lex_match_id (lexer, "LABEL"))
6088 vlabel = CTVL_LABEL;
6089 else if (lex_match_id (lexer, "BOTH"))
6091 else if (lex_match_id (lexer, "NONE"))
6095 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6101 for (size_t i = 0; i < n_vars; i++)
6102 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6105 else if (lex_match_id (lexer, "MRSETS"))
6107 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6109 lex_match (lexer, T_EQUALS);
6110 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6113 else if (lex_match_id (lexer, "SMISSING"))
6115 if (lex_match_id (lexer, "VARIABLE"))
6116 ct->smissing_listwise = false;
6117 else if (lex_match_id (lexer, "LISTWISE"))
6118 ct->smissing_listwise = true;
6121 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6125 else if (lex_match_id (lexer, "PCOMPUTE"))
6127 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6130 else if (lex_match_id (lexer, "PPROPERTIES"))
6132 if (!ctables_parse_pproperties (lexer, ct))
6135 else if (lex_match_id (lexer, "WEIGHT"))
6137 if (!lex_force_match_id (lexer, "VARIABLE"))
6139 lex_match (lexer, T_EQUALS);
6140 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6144 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6146 if (lex_match_id (lexer, "COUNT"))
6148 lex_match (lexer, T_EQUALS);
6149 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6152 ct->hide_threshold = lex_integer (lexer);
6155 else if (ct->hide_threshold == 0)
6156 ct->hide_threshold = 5;
6160 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6161 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6162 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6166 if (!lex_force_match (lexer, T_SLASH))
6170 size_t allocated_tables = 0;
6173 if (ct->n_tables >= allocated_tables)
6174 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6175 sizeof *ct->tables);
6177 struct ctables_category *cat = xmalloc (sizeof *cat);
6178 *cat = (struct ctables_category) {
6180 .include_missing = false,
6181 .sort_ascending = true,
6184 struct ctables_categories *c = xmalloc (sizeof *c);
6185 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6186 *c = (struct ctables_categories) {
6193 struct ctables_categories **categories = xnmalloc (n_vars,
6194 sizeof *categories);
6195 for (size_t i = 0; i < n_vars; i++)
6198 struct ctables_table *t = xmalloc (sizeof *t);
6199 *t = (struct ctables_table) {
6201 .slabels_axis = PIVOT_AXIS_COLUMN,
6202 .slabels_visible = true,
6203 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6205 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6206 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6207 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6209 .clabels_from_axis = PIVOT_AXIS_LAYER,
6210 .clabels_to_axis = PIVOT_AXIS_LAYER,
6211 .categories = categories,
6212 .n_categories = n_vars,
6215 ct->tables[ct->n_tables++] = t;
6217 lex_match (lexer, T_EQUALS);
6218 int expr_start = lex_ofs (lexer);
6219 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6220 &t->axes[PIVOT_AXIS_ROW]))
6222 if (lex_match (lexer, T_BY))
6224 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6225 &t->axes[PIVOT_AXIS_COLUMN]))
6228 if (lex_match (lexer, T_BY))
6230 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6231 &t->axes[PIVOT_AXIS_LAYER]))
6235 int expr_end = lex_ofs (lexer);
6237 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6238 && !t->axes[PIVOT_AXIS_LAYER])
6240 lex_error (lexer, _("At least one variable must be specified."));
6244 const struct ctables_axis *scales[PIVOT_N_AXES];
6245 size_t n_scales = 0;
6246 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6248 scales[a] = find_scale (t->axes[a]);
6254 msg (SE, _("Scale variables may appear only on one axis."));
6255 if (scales[PIVOT_AXIS_ROW])
6256 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6257 _("This scale variable appears on the rows axis."));
6258 if (scales[PIVOT_AXIS_COLUMN])
6259 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6260 _("This scale variable appears on the columns axis."));
6261 if (scales[PIVOT_AXIS_LAYER])
6262 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6263 _("This scale variable appears on the layer axis."));
6267 const struct ctables_axis *summaries[PIVOT_N_AXES];
6268 size_t n_summaries = 0;
6269 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6271 summaries[a] = (scales[a]
6273 : find_categorical_summary_spec (t->axes[a]));
6277 if (n_summaries > 1)
6279 msg (SE, _("Summaries may appear only on one axis."));
6280 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6283 msg_at (SN, summaries[a]->loc,
6285 ? _("This variable on the rows axis has a summary.")
6286 : a == PIVOT_AXIS_COLUMN
6287 ? _("This variable on the columns axis has a summary.")
6288 : _("This variable on the layers axis has a summary."));
6290 msg_at (SN, summaries[a]->loc,
6291 _("This is a scale variable, so it always has a "
6292 "summary even if the syntax does not explicitly "
6297 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6298 if (n_summaries ? summaries[a] : t->axes[a])
6300 t->summary_axis = a;
6304 if (lex_token (lexer) == T_ENDCMD)
6306 if (!ctables_prepare_table (t))
6310 if (!lex_force_match (lexer, T_SLASH))
6313 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6315 if (lex_match_id (lexer, "SLABELS"))
6317 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6319 if (lex_match_id (lexer, "POSITION"))
6321 lex_match (lexer, T_EQUALS);
6322 if (lex_match_id (lexer, "COLUMN"))
6323 t->slabels_axis = PIVOT_AXIS_COLUMN;
6324 else if (lex_match_id (lexer, "ROW"))
6325 t->slabels_axis = PIVOT_AXIS_ROW;
6326 else if (lex_match_id (lexer, "LAYER"))
6327 t->slabels_axis = PIVOT_AXIS_LAYER;
6330 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6334 else if (lex_match_id (lexer, "VISIBLE"))
6336 lex_match (lexer, T_EQUALS);
6337 if (!parse_bool (lexer, &t->slabels_visible))
6342 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6347 else if (lex_match_id (lexer, "CLABELS"))
6349 if (lex_match_id (lexer, "AUTO"))
6351 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6352 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6354 else if (lex_match_id (lexer, "ROWLABELS"))
6356 lex_match (lexer, T_EQUALS);
6357 if (lex_match_id (lexer, "OPPOSITE"))
6358 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6359 else if (lex_match_id (lexer, "LAYER"))
6360 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6363 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6367 else if (lex_match_id (lexer, "COLLABELS"))
6369 lex_match (lexer, T_EQUALS);
6370 if (lex_match_id (lexer, "OPPOSITE"))
6371 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6372 else if (lex_match_id (lexer, "LAYER"))
6373 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6376 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6382 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6387 else if (lex_match_id (lexer, "CRITERIA"))
6389 if (!lex_force_match_id (lexer, "CILEVEL"))
6391 lex_match (lexer, T_EQUALS);
6393 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6395 t->cilevel = lex_number (lexer);
6398 else if (lex_match_id (lexer, "CATEGORIES"))
6400 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6404 else if (lex_match_id (lexer, "TITLES"))
6409 if (lex_match_id (lexer, "CAPTION"))
6410 textp = &t->caption;
6411 else if (lex_match_id (lexer, "CORNER"))
6413 else if (lex_match_id (lexer, "TITLE"))
6417 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6420 lex_match (lexer, T_EQUALS);
6422 struct string s = DS_EMPTY_INITIALIZER;
6423 while (lex_is_string (lexer))
6425 if (!ds_is_empty (&s))
6426 ds_put_byte (&s, ' ');
6427 put_title_text (&s, lex_tokss (lexer), now,
6428 lexer, dataset_dict (ds),
6429 expr_start, expr_end);
6433 *textp = ds_steal_cstr (&s);
6435 while (lex_token (lexer) != T_SLASH
6436 && lex_token (lexer) != T_ENDCMD);
6438 else if (lex_match_id (lexer, "SIGTEST"))
6440 int start_ofs = lex_ofs (lexer) - 1;
6443 t->chisq = xmalloc (sizeof *t->chisq);
6444 *t->chisq = (struct ctables_chisq) {
6446 .include_mrsets = true,
6447 .all_visible = true,
6453 if (lex_match_id (lexer, "TYPE"))
6455 lex_match (lexer, T_EQUALS);
6456 if (!lex_force_match_id (lexer, "CHISQUARE"))
6459 else if (lex_match_id (lexer, "ALPHA"))
6461 lex_match (lexer, T_EQUALS);
6462 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6464 t->chisq->alpha = lex_number (lexer);
6467 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6469 lex_match (lexer, T_EQUALS);
6470 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6473 else if (lex_match_id (lexer, "CATEGORIES"))
6475 lex_match (lexer, T_EQUALS);
6476 if (lex_match_id (lexer, "ALLVISIBLE"))
6477 t->chisq->all_visible = true;
6478 else if (lex_match_id (lexer, "SUBTOTALS"))
6479 t->chisq->all_visible = false;
6482 lex_error_expecting (lexer,
6483 "ALLVISIBLE", "SUBTOTALS");
6489 lex_error_expecting (lexer, "TYPE", "ALPHA",
6490 "INCLUDEMRSETS", "CATEGORIES");
6494 while (lex_token (lexer) != T_SLASH
6495 && lex_token (lexer) != T_ENDCMD);
6497 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6498 _("Support for SIGTEST not yet implemented."));
6501 else if (lex_match_id (lexer, "COMPARETEST"))
6503 int start_ofs = lex_ofs (lexer);
6506 t->pairwise = xmalloc (sizeof *t->pairwise);
6507 *t->pairwise = (struct ctables_pairwise) {
6509 .alpha = { .05, .05 },
6510 .adjust = BONFERRONI,
6511 .include_mrsets = true,
6512 .meansvariance_allcats = true,
6513 .all_visible = true,
6522 if (lex_match_id (lexer, "TYPE"))
6524 lex_match (lexer, T_EQUALS);
6525 if (lex_match_id (lexer, "PROP"))
6526 t->pairwise->type = PROP;
6527 else if (lex_match_id (lexer, "MEAN"))
6528 t->pairwise->type = MEAN;
6531 lex_error_expecting (lexer, "PROP", "MEAN");
6535 else if (lex_match_id (lexer, "ALPHA"))
6537 lex_match (lexer, T_EQUALS);
6539 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6541 double a0 = lex_number (lexer);
6544 lex_match (lexer, T_COMMA);
6545 if (lex_is_number (lexer))
6547 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6549 double a1 = lex_number (lexer);
6552 t->pairwise->alpha[0] = MIN (a0, a1);
6553 t->pairwise->alpha[1] = MAX (a0, a1);
6556 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6558 else if (lex_match_id (lexer, "ADJUST"))
6560 lex_match (lexer, T_EQUALS);
6561 if (lex_match_id (lexer, "BONFERRONI"))
6562 t->pairwise->adjust = BONFERRONI;
6563 else if (lex_match_id (lexer, "BH"))
6564 t->pairwise->adjust = BH;
6565 else if (lex_match_id (lexer, "NONE"))
6566 t->pairwise->adjust = 0;
6569 lex_error_expecting (lexer, "BONFERRONI", "BH",
6574 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6576 lex_match (lexer, T_EQUALS);
6577 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6580 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6582 lex_match (lexer, T_EQUALS);
6583 if (lex_match_id (lexer, "ALLCATS"))
6584 t->pairwise->meansvariance_allcats = true;
6585 else if (lex_match_id (lexer, "TESTEDCATS"))
6586 t->pairwise->meansvariance_allcats = false;
6589 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6593 else if (lex_match_id (lexer, "CATEGORIES"))
6595 lex_match (lexer, T_EQUALS);
6596 if (lex_match_id (lexer, "ALLVISIBLE"))
6597 t->pairwise->all_visible = true;
6598 else if (lex_match_id (lexer, "SUBTOTALS"))
6599 t->pairwise->all_visible = false;
6602 lex_error_expecting (lexer, "ALLVISIBLE",
6607 else if (lex_match_id (lexer, "MERGE"))
6609 lex_match (lexer, T_EQUALS);
6610 if (!parse_bool (lexer, &t->pairwise->merge))
6613 else if (lex_match_id (lexer, "STYLE"))
6615 lex_match (lexer, T_EQUALS);
6616 if (lex_match_id (lexer, "APA"))
6617 t->pairwise->apa_style = true;
6618 else if (lex_match_id (lexer, "SIMPLE"))
6619 t->pairwise->apa_style = false;
6622 lex_error_expecting (lexer, "APA", "SIMPLE");
6626 else if (lex_match_id (lexer, "SHOWSIG"))
6628 lex_match (lexer, T_EQUALS);
6629 if (!parse_bool (lexer, &t->pairwise->show_sig))
6634 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6635 "INCLUDEMRSETS", "MEANSVARIANCE",
6636 "CATEGORIES", "MERGE", "STYLE",
6641 while (lex_token (lexer) != T_SLASH
6642 && lex_token (lexer) != T_ENDCMD);
6644 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6645 _("Support for COMPARETEST not yet implemented."));
6650 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6651 "CRITERIA", "CATEGORIES", "TITLES",
6652 "SIGTEST", "COMPARETEST");
6656 if (!lex_match (lexer, T_SLASH))
6660 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6662 t->clabels_from_axis = PIVOT_AXIS_ROW;
6663 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6665 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6669 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6670 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6671 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6673 if (!ctables_prepare_table (t))
6676 while (lex_token (lexer) != T_ENDCMD);
6679 input = proc_open (ds);
6680 bool ok = ctables_execute (ds, input, ct);
6681 ok = proc_commit (ds) && ok;
6683 ctables_destroy (ct);
6684 return ok ? CMD_SUCCESS : CMD_FAILURE;
6689 ctables_destroy (ct);