1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
58 enum ctables_weighting
66 /* CTABLES table areas. */
68 enum ctables_area_type
70 /* Within a section, where stacked variables divide one section from
73 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
74 parse_ctables_summary_function() parses correctly. */
75 CTAT_TABLE, /* All layers of a whole section. */
76 CTAT_LAYERROW, /* Row in one layer within a section. */
77 CTAT_LAYERCOL, /* Column in one layer within a section. */
78 CTAT_LAYER, /* One layer within a section. */
80 /* Within a subtable, where a subtable pairs an innermost row variable with
81 an innermost column variable within a single layer. */
82 CTAT_SUBTABLE, /* Whole subtable. */
83 CTAT_ROW, /* Row within a subtable. */
84 CTAT_COL, /* Column within a subtable. */
88 static const char *ctables_area_type_name[N_CTATS] = {
89 [CTAT_TABLE] = "TABLE",
90 [CTAT_LAYER] = "LAYER",
91 [CTAT_LAYERROW] = "LAYERROW",
92 [CTAT_LAYERCOL] = "LAYERCOL",
93 [CTAT_SUBTABLE] = "SUBTABLE",
100 struct hmap_node node;
102 const struct ctables_cell *example;
105 double count[N_CTWS];
106 double valid[N_CTWS];
107 double total[N_CTWS];
108 struct ctables_sum *sums;
116 /* CTABLES summary functions. */
118 enum ctables_function_type
120 /* A function that operates on data in a single cell. It operates on
121 effective weights. It does not have an unweighted version. */
124 /* A function that operates on data in a single cell. The function
125 operates on effective weights and has a U-prefixed unweighted
129 /* A function that operates on data in a single cell. It operates on
130 dictionary weights, and has U-prefixed unweighted version and an
131 E-prefixed effective weight version. */
134 /* A function that operates on an area of cells. It operates on effective
135 weights and has a U-prefixed unweighted version. */
146 enum ctables_function_availability
148 CTFA_ALL, /* Any variables. */
149 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
150 //CTFA_MRSETS, /* Only multiple-response sets */
153 enum ctables_summary_function
155 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
156 #include "ctables.inc"
161 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
163 #include "ctables.inc"
167 struct ctables_function_info
169 struct substring basename;
170 enum ctables_function_type type;
171 enum ctables_format format;
172 enum ctables_function_availability availability;
174 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
175 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
176 bool is_area; /* Needs an area prefix. */
178 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
179 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
181 .basename = SS_LITERAL_INITIALIZER (NAME), \
184 .availability = AVAILABILITY, \
185 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
186 .e_prefix = (TYPE) == CTFT_UECELL, \
187 .is_area = (TYPE) == CTFT_AREA \
189 #include "ctables.inc"
193 static struct fmt_spec
194 ctables_summary_default_format (enum ctables_summary_function function,
195 const struct variable *var)
197 static const enum ctables_format default_formats[] = {
198 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
199 #include "ctables.inc"
202 switch (default_formats[function])
205 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
208 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
211 return *var_get_print_format (var);
218 static enum ctables_function_availability
219 ctables_function_availability (enum ctables_summary_function f)
221 static enum ctables_function_availability availability[] = {
222 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
223 #include "ctables.inc"
227 return availability[f];
231 parse_ctables_summary_function (struct lexer *lexer,
232 enum ctables_summary_function *function,
233 enum ctables_weighting *weighting,
234 enum ctables_area_type *area)
236 if (!lex_force_id (lexer))
239 struct substring name = lex_tokss (lexer);
240 if (ss_ends_with_case (name, ss_cstr (".LCL"))
241 || ss_ends_with_case (name, ss_cstr (".UCL"))
242 || ss_ends_with_case (name, ss_cstr (".SE")))
244 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
245 "is not yet implemented."));
249 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
250 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
252 bool has_area = false;
254 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
255 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
260 if (ss_equals_case (name, ss_cstr ("PCT")))
262 /* Special case where .COUNT suffix is omitted. */
263 *function = CTSF_areaPCT_COUNT;
264 *weighting = CTW_EFFECTIVE;
271 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
273 const struct ctables_function_info *cfi = &ctables_function_info[f];
274 if (ss_equals_case (cfi->basename, name))
277 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
280 *weighting = (e ? CTW_EFFECTIVE
282 : cfi->e_prefix ? CTW_DICTIONARY
289 lex_error (lexer, _("Expecting summary function name."));
294 ctables_summary_function_name (enum ctables_summary_function function,
295 enum ctables_weighting weighting,
296 enum ctables_area_type area,
297 char *buffer, size_t bufsize)
299 const struct ctables_function_info *cfi = &ctables_function_info[function];
300 snprintf (buffer, bufsize, "%s%s%s",
301 (weighting == CTW_UNWEIGHTED ? "U"
302 : weighting == CTW_DICTIONARY ? ""
303 : cfi->e_prefix ? "E"
305 cfi->is_area ? ctables_area_type_name[area] : "",
306 cfi->basename.string);
311 ctables_summary_function_label__ (enum ctables_summary_function function,
312 enum ctables_weighting weighting,
313 enum ctables_area_type area)
315 bool w = weighting != CTW_UNWEIGHTED;
316 bool d = weighting == CTW_DICTIONARY;
317 enum ctables_area_type a = area;
321 return (d ? N_("Count")
322 : w ? N_("Adjusted Count")
323 : N_("Unweighted Count"));
325 case CTSF_areaPCT_COUNT:
328 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
329 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
330 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
331 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
332 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
333 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
334 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
338 case CTSF_areaPCT_VALIDN:
341 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
342 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
343 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
344 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
345 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
346 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
347 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
351 case CTSF_areaPCT_TOTALN:
354 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
355 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
356 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
357 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
358 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
359 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
360 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
364 case CTSF_MAXIMUM: return N_("Maximum");
365 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
366 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
367 case CTSF_MINIMUM: return N_("Minimum");
368 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
369 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
370 case CTSF_PTILE: NOT_REACHED ();
371 case CTSF_RANGE: return N_("Range");
372 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
373 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
374 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
375 case CTSF_TOTALN: return (d ? N_("Total N")
376 : w ? N_("Adjusted Total N")
377 : N_("Unweighted Total N"));
378 case CTSF_VALIDN: return (d ? N_("Valid N")
379 : w ? N_("Adjusted Valid N")
380 : N_("Unweighted Valid N"));
381 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
382 case CTSF_areaPCT_SUM:
385 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
386 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
387 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
388 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
389 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
390 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
391 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
398 /* Don't bother translating these: they are for developers only. */
399 case CTAT_TABLE: return "Table ID";
400 case CTAT_LAYER: return "Layer ID";
401 case CTAT_LAYERROW: return "Layer Row ID";
402 case CTAT_LAYERCOL: return "Layer Column ID";
403 case CTAT_SUBTABLE: return "Subtable ID";
404 case CTAT_ROW: return "Row ID";
405 case CTAT_COL: return "Column ID";
413 static struct pivot_value *
414 ctables_summary_function_label (enum ctables_summary_function function,
415 enum ctables_weighting weighting,
416 enum ctables_area_type area,
419 if (function == CTSF_PTILE)
421 char *s = (weighting != CTW_UNWEIGHTED
422 ? xasprintf (_("Percentile %.2f"), percentile)
423 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
424 return pivot_value_new_user_text_nocopy (s);
427 return pivot_value_new_text (ctables_summary_function_label__ (
428 function, weighting, area));
431 /* CTABLES summaries. */
433 struct ctables_summary_spec
435 /* The calculation to be performed.
437 'function' is the function to calculate. 'weighted' specifies whether
438 to use weighted or unweighted data (for functions that do not support a
439 choice, it must be true). 'calc_area' is the area over which the
440 calculation takes place (for functions that target only an individual
441 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
442 percentile between 0 and 100 (for other functions it must be 0). */
443 enum ctables_summary_function function;
444 enum ctables_weighting weighting;
445 enum ctables_area_type calc_area;
446 double percentile; /* CTSF_PTILE only. */
448 /* How to display the result of the calculation.
450 'label' is a user-specified label, NULL if the user didn't specify
453 'user_area' is usually the same as 'calc_area', but when category labels
454 are rotated from one axis to another it swaps rows and columns.
456 'format' is the format for displaying the output. If
457 'is_ctables_format' is true, then 'format.type' is one of the special
458 CTEF_* formats instead of the standard ones. */
460 enum ctables_area_type user_area;
461 struct fmt_spec format;
462 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
469 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
470 const struct ctables_summary_spec *src)
473 dst->label = xstrdup_if_nonnull (src->label);
477 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
483 /* Collections of summary functions. */
485 struct ctables_summary_spec_set
487 struct ctables_summary_spec *specs;
491 /* The variable to which the summary specs are applied. */
492 struct variable *var;
494 /* Whether the variable to which the summary specs are applied is a scale
495 variable for the purpose of summarization.
497 (VALIDN and TOTALN act differently for summarizing scale and categorical
501 /* If any of these optional additional scale variables are missing, then
502 treat 'var' as if it's missing too. This is for implementing
503 SMISSING=LISTWISE. */
504 struct variable **listwise_vars;
505 size_t n_listwise_vars;
509 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
510 const struct ctables_summary_spec_set *src)
512 struct ctables_summary_spec *specs
513 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
514 for (size_t i = 0; i < src->n; i++)
515 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
517 *dst = (struct ctables_summary_spec_set) {
522 .is_scale = src->is_scale,
527 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
529 for (size_t i = 0; i < set->n; i++)
530 ctables_summary_spec_uninit (&set->specs[i]);
531 free (set->listwise_vars);
536 is_listwise_missing (const struct ctables_summary_spec_set *specs,
537 const struct ccase *c)
539 for (size_t i = 0; i < specs->n_listwise_vars; i++)
541 const struct variable *var = specs->listwise_vars[i];
542 if (var_is_num_missing (var, case_num (c, var)))
549 /* CTABLES postcompute expressions. */
551 struct ctables_pcexpr
561 enum ctables_pcexpr_op
564 CTPO_CONSTANT, /* 5 */
565 CTPO_CAT_NUMBER, /* [5] */
566 CTPO_CAT_STRING, /* ["STRING"] */
567 CTPO_CAT_NRANGE, /* [LO THRU 5] */
568 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
569 CTPO_CAT_MISSING, /* MISSING */
570 CTPO_CAT_OTHERNM, /* OTHERNM */
571 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
572 CTPO_CAT_TOTAL, /* TOTAL */
586 /* CTPO_CAT_NUMBER. */
589 /* CTPO_CAT_STRING, in dictionary encoding. */
590 struct substring string;
592 /* CTPO_CAT_NRANGE. */
595 /* CTPO_CAT_SRANGE. */
596 struct substring srange[2];
598 /* CTPO_CAT_SUBTOTAL. */
599 size_t subtotal_index;
601 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
602 One element: CTPO_NEG. */
603 struct ctables_pcexpr *subs[2];
606 /* Source location. */
607 struct msg_location *location;
611 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
614 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
615 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
616 struct ctables_pcexpr *sub1);
618 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
619 struct dictionary *);
622 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
628 case CTPO_CAT_STRING:
629 ss_dealloc (&e->string);
632 case CTPO_CAT_SRANGE:
633 for (size_t i = 0; i < 2; i++)
634 ss_dealloc (&e->srange[i]);
643 for (size_t i = 0; i < 2; i++)
644 ctables_pcexpr_destroy (e->subs[i]);
648 case CTPO_CAT_NUMBER:
649 case CTPO_CAT_NRANGE:
650 case CTPO_CAT_MISSING:
651 case CTPO_CAT_OTHERNM:
652 case CTPO_CAT_SUBTOTAL:
657 msg_location_destroy (e->location);
662 static struct ctables_pcexpr *
663 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
664 struct ctables_pcexpr *sub0,
665 struct ctables_pcexpr *sub1)
667 struct ctables_pcexpr *e = xmalloc (sizeof *e);
668 *e = (struct ctables_pcexpr) {
670 .subs = { sub0, sub1 },
671 .location = msg_location_merged (sub0->location, sub1->location),
676 /* How to parse an operator. */
679 enum token_type token;
680 enum ctables_pcexpr_op op;
683 static const struct operator *
684 ctables_pcexpr_match_operator (struct lexer *lexer,
685 const struct operator ops[], size_t n_ops)
687 for (const struct operator *op = ops; op < ops + n_ops; op++)
688 if (lex_token (lexer) == op->token)
690 if (op->token != T_NEG_NUM)
699 static struct ctables_pcexpr *
700 ctables_pcexpr_parse_binary_operators__ (
701 struct lexer *lexer, struct dictionary *dict,
702 const struct operator ops[], size_t n_ops,
703 parse_recursively_func *parse_next_level,
704 const char *chain_warning, struct ctables_pcexpr *lhs)
706 for (int op_count = 0; ; op_count++)
708 const struct operator *op
709 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
712 if (op_count > 1 && chain_warning)
713 msg_at (SW, lhs->location, "%s", chain_warning);
718 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
721 ctables_pcexpr_destroy (lhs);
725 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
729 static struct ctables_pcexpr *
730 ctables_pcexpr_parse_binary_operators (
731 struct lexer *lexer, struct dictionary *dict,
732 const struct operator ops[], size_t n_ops,
733 parse_recursively_func *parse_next_level, const char *chain_warning)
735 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
739 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
744 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
745 struct dictionary *);
747 static struct ctables_pcexpr
748 ctpo_cat_nrange (double low, double high)
750 return (struct ctables_pcexpr) {
751 .op = CTPO_CAT_NRANGE,
752 .nrange = { low, high },
756 static struct ctables_pcexpr
757 ctpo_cat_srange (struct substring low, struct substring high)
759 return (struct ctables_pcexpr) {
760 .op = CTPO_CAT_SRANGE,
761 .srange = { low, high },
765 static struct substring
766 parse_substring (struct lexer *lexer, struct dictionary *dict)
768 struct substring s = recode_substring_pool (
769 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
770 ss_rtrim (&s, ss_cstr (" "));
775 static struct ctables_pcexpr *
776 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
778 int start_ofs = lex_ofs (lexer);
779 struct ctables_pcexpr e;
780 if (lex_is_number (lexer))
782 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
783 .number = lex_number (lexer) };
786 else if (lex_match_id (lexer, "MISSING"))
787 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
788 else if (lex_match_id (lexer, "OTHERNM"))
789 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
790 else if (lex_match_id (lexer, "TOTAL"))
791 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
792 else if (lex_match_id (lexer, "SUBTOTAL"))
794 size_t subtotal_index = 0;
795 if (lex_match (lexer, T_LBRACK))
797 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
799 subtotal_index = lex_integer (lexer);
801 if (!lex_force_match (lexer, T_RBRACK))
804 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
805 .subtotal_index = subtotal_index };
807 else if (lex_match (lexer, T_LBRACK))
809 if (lex_match_id (lexer, "LO"))
811 if (!lex_force_match_id (lexer, "THRU"))
814 if (lex_is_string (lexer))
816 struct substring low = { .string = NULL };
817 struct substring high = parse_substring (lexer, dict);
818 e = ctpo_cat_srange (low, high);
822 if (!lex_force_num (lexer))
824 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
828 else if (lex_is_number (lexer))
830 double number = lex_number (lexer);
832 if (lex_match_id (lexer, "THRU"))
834 if (lex_match_id (lexer, "HI"))
835 e = ctpo_cat_nrange (number, DBL_MAX);
838 if (!lex_force_num (lexer))
840 e = ctpo_cat_nrange (number, lex_number (lexer));
845 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
848 else if (lex_is_string (lexer))
850 struct substring s = parse_substring (lexer, dict);
852 if (lex_match_id (lexer, "THRU"))
854 struct substring high;
856 if (lex_match_id (lexer, "HI"))
857 high = (struct substring) { .string = NULL };
860 if (!lex_force_string (lexer))
865 high = parse_substring (lexer, dict);
868 e = ctpo_cat_srange (s, high);
871 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
875 lex_error (lexer, NULL);
879 if (!lex_force_match (lexer, T_RBRACK))
881 if (e.op == CTPO_CAT_STRING)
882 ss_dealloc (&e.string);
883 else if (e.op == CTPO_CAT_SRANGE)
885 ss_dealloc (&e.srange[0]);
886 ss_dealloc (&e.srange[1]);
891 else if (lex_match (lexer, T_LPAREN))
893 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
896 if (!lex_force_match (lexer, T_RPAREN))
898 ctables_pcexpr_destroy (ep);
905 lex_error (lexer, NULL);
909 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
910 return xmemdup (&e, sizeof e);
913 static struct ctables_pcexpr *
914 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
915 struct lexer *lexer, int start_ofs)
917 struct ctables_pcexpr *e = xmalloc (sizeof *e);
918 *e = (struct ctables_pcexpr) {
921 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
926 static struct ctables_pcexpr *
927 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
929 static const struct operator op = { T_EXP, CTPO_POW };
931 const char *chain_warning =
932 _("The exponentiation operator (`**') is left-associative: "
933 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
934 "To disable this warning, insert parentheses.");
936 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
937 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
938 ctables_pcexpr_parse_primary,
941 /* Special case for situations like "-5**6", which must be parsed as
944 int start_ofs = lex_ofs (lexer);
945 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
946 *lhs = (struct ctables_pcexpr) {
948 .number = -lex_tokval (lexer),
949 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
953 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
955 ctables_pcexpr_parse_primary, chain_warning, lhs);
959 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
962 /* Parses the unary minus level. */
963 static struct ctables_pcexpr *
964 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
966 int start_ofs = lex_ofs (lexer);
967 if (!lex_match (lexer, T_DASH))
968 return ctables_pcexpr_parse_exp (lexer, dict);
970 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
974 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
977 /* Parses the multiplication and division level. */
978 static struct ctables_pcexpr *
979 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
981 static const struct operator ops[] =
983 { T_ASTERISK, CTPO_MUL },
984 { T_SLASH, CTPO_DIV },
987 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
988 sizeof ops / sizeof *ops,
989 ctables_pcexpr_parse_neg, NULL);
992 /* Parses the addition and subtraction level. */
993 static struct ctables_pcexpr *
994 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
996 static const struct operator ops[] =
998 { T_PLUS, CTPO_ADD },
999 { T_DASH, CTPO_SUB },
1000 { T_NEG_NUM, CTPO_ADD },
1003 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1004 ops, sizeof ops / sizeof *ops,
1005 ctables_pcexpr_parse_mul, NULL);
1008 /* CTABLES axis expressions. */
1010 /* CTABLES has a number of extra formats that we implement via custom
1011 currency specifications on an alternate fmt_settings. */
1012 #define CTEF_NEGPAREN FMT_CCA
1013 #define CTEF_NEQUAL FMT_CCB
1014 #define CTEF_PAREN FMT_CCC
1015 #define CTEF_PCTPAREN FMT_CCD
1017 enum ctables_summary_variant
1026 enum ctables_axis_op
1042 struct variable *var;
1044 struct ctables_summary_spec_set specs[N_CSVS];
1048 struct ctables_axis *subs[2];
1051 struct msg_location *loc;
1055 ctables_axis_destroy (struct ctables_axis *axis)
1063 for (size_t i = 0; i < N_CSVS; i++)
1064 ctables_summary_spec_set_uninit (&axis->specs[i]);
1069 ctables_axis_destroy (axis->subs[0]);
1070 ctables_axis_destroy (axis->subs[1]);
1073 msg_location_destroy (axis->loc);
1077 static struct ctables_axis *
1078 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1079 struct ctables_axis *sub0,
1080 struct ctables_axis *sub1,
1081 struct lexer *lexer, int start_ofs)
1083 struct ctables_axis *axis = xmalloc (sizeof *axis);
1084 *axis = (struct ctables_axis) {
1086 .subs = { sub0, sub1 },
1087 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1092 struct ctables_axis_parse_ctx
1094 struct lexer *lexer;
1095 struct dictionary *dict;
1098 static struct pivot_value *
1099 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1102 return ctables_summary_function_label (spec->function, spec->weighting,
1103 spec->user_area, spec->percentile);
1106 struct substring in = ss_cstr (spec->label);
1107 struct substring target = ss_cstr (")CILEVEL");
1109 struct string out = DS_EMPTY_INITIALIZER;
1112 size_t chunk = ss_find_substring (in, target);
1113 ds_put_substring (&out, ss_head (in, chunk));
1114 ss_advance (&in, chunk);
1116 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1118 ss_advance (&in, target.length);
1119 ds_put_format (&out, "%g", cilevel);
1125 add_summary_spec (struct ctables_axis *axis,
1126 enum ctables_summary_function function,
1127 enum ctables_weighting weighting,
1128 enum ctables_area_type area, double percentile,
1129 const char *label, const struct fmt_spec *format,
1130 bool is_ctables_format, const struct msg_location *loc,
1131 enum ctables_summary_variant sv)
1133 if (axis->op == CTAO_VAR)
1135 char function_name[128];
1136 ctables_summary_function_name (function, weighting, area,
1137 function_name, sizeof function_name);
1138 const char *var_name = var_get_name (axis->var);
1139 switch (ctables_function_availability (function))
1143 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1144 "response sets."), function_name);
1145 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1151 if (!axis->scale && sv != CSV_TOTAL)
1154 _("Summary function %s applies only to scale variables."),
1156 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1166 struct ctables_summary_spec_set *set = &axis->specs[sv];
1167 if (set->n >= set->allocated)
1168 set->specs = x2nrealloc (set->specs, &set->allocated,
1169 sizeof *set->specs);
1171 struct ctables_summary_spec *dst = &set->specs[set->n++];
1172 *dst = (struct ctables_summary_spec) {
1173 .function = function,
1174 .weighting = weighting,
1177 .percentile = percentile,
1178 .label = xstrdup_if_nonnull (label),
1179 .format = (format ? *format
1180 : ctables_summary_default_format (function, axis->var)),
1181 .is_ctables_format = is_ctables_format,
1187 for (size_t i = 0; i < 2; i++)
1188 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1189 percentile, label, format, is_ctables_format,
1196 static struct ctables_axis *ctables_axis_parse_stack (
1197 struct ctables_axis_parse_ctx *);
1199 static struct ctables_axis *
1200 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1202 if (lex_match (ctx->lexer, T_LPAREN))
1204 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1205 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1207 ctables_axis_destroy (sub);
1213 if (!lex_force_id (ctx->lexer))
1216 if (lex_tokcstr (ctx->lexer)[0] == '$')
1218 lex_error (ctx->lexer,
1219 _("Multiple response set support not implemented."));
1223 int start_ofs = lex_ofs (ctx->lexer);
1224 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1228 struct ctables_axis *axis = xmalloc (sizeof *axis);
1229 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1231 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1232 : lex_match_phrase (ctx->lexer, "[C]") ? false
1233 : var_get_measure (var) == MEASURE_SCALE);
1234 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1235 lex_ofs (ctx->lexer) - 1);
1236 if (axis->scale && var_is_alpha (var))
1238 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1240 var_get_name (var));
1241 ctables_axis_destroy (axis);
1249 has_digit (const char *s)
1251 return s[strcspn (s, "0123456789")] != '\0';
1255 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1256 bool *is_ctables_format)
1258 char type[FMT_TYPE_LEN_MAX + 1];
1259 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1262 if (!strcasecmp (type, "NEGPAREN"))
1263 format->type = CTEF_NEGPAREN;
1264 else if (!strcasecmp (type, "NEQUAL"))
1265 format->type = CTEF_NEQUAL;
1266 else if (!strcasecmp (type, "PAREN"))
1267 format->type = CTEF_PAREN;
1268 else if (!strcasecmp (type, "PCTPAREN"))
1269 format->type = CTEF_PCTPAREN;
1272 *is_ctables_format = false;
1273 return (parse_format_specifier (lexer, format)
1274 && fmt_check_output (format)
1275 && fmt_check_type_compat (format, VAL_NUMERIC));
1281 lex_next_error (lexer, -1, -1,
1282 _("Output format %s requires width 2 or greater."), type);
1285 else if (format->d > format->w - 1)
1287 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1288 "greater than decimals."), type);
1293 *is_ctables_format = true;
1298 static struct ctables_axis *
1299 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1301 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1302 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1305 enum ctables_summary_variant sv = CSV_CELL;
1308 int start_ofs = lex_ofs (ctx->lexer);
1310 /* Parse function. */
1311 enum ctables_summary_function function;
1312 enum ctables_weighting weighting;
1313 enum ctables_area_type area;
1314 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1318 /* Parse percentile. */
1319 double percentile = 0;
1320 if (function == CTSF_PTILE)
1322 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1324 percentile = lex_number (ctx->lexer);
1325 lex_get (ctx->lexer);
1330 if (lex_is_string (ctx->lexer))
1332 label = ss_xstrdup (lex_tokss (ctx->lexer));
1333 lex_get (ctx->lexer);
1337 struct fmt_spec format;
1338 const struct fmt_spec *formatp;
1339 bool is_ctables_format = false;
1340 if (lex_token (ctx->lexer) == T_ID
1341 && has_digit (lex_tokcstr (ctx->lexer)))
1343 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1344 &is_ctables_format))
1354 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1355 lex_ofs (ctx->lexer) - 1);
1356 add_summary_spec (sub, function, weighting, area, percentile, label,
1357 formatp, is_ctables_format, loc, sv);
1359 msg_location_destroy (loc);
1361 lex_match (ctx->lexer, T_COMMA);
1362 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1364 if (!lex_force_match (ctx->lexer, T_LBRACK))
1368 else if (lex_match (ctx->lexer, T_RBRACK))
1370 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1377 ctables_axis_destroy (sub);
1381 static const struct ctables_axis *
1382 find_scale (const struct ctables_axis *axis)
1386 else if (axis->op == CTAO_VAR)
1387 return axis->scale ? axis : NULL;
1390 for (size_t i = 0; i < 2; i++)
1392 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1400 static const struct ctables_axis *
1401 find_categorical_summary_spec (const struct ctables_axis *axis)
1405 else if (axis->op == CTAO_VAR)
1406 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1409 for (size_t i = 0; i < 2; i++)
1411 const struct ctables_axis *sum
1412 = find_categorical_summary_spec (axis->subs[i]);
1420 static struct ctables_axis *
1421 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1423 int start_ofs = lex_ofs (ctx->lexer);
1424 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1428 while (lex_match (ctx->lexer, T_GT))
1430 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1433 ctables_axis_destroy (lhs);
1437 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1438 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1440 const struct ctables_axis *outer_scale = find_scale (lhs);
1441 const struct ctables_axis *inner_scale = find_scale (rhs);
1442 if (outer_scale && inner_scale)
1444 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1445 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1446 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1447 ctables_axis_destroy (nest);
1451 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1454 msg_at (SE, nest->loc,
1455 _("Summaries may only be requested for categorical variables "
1456 "at the innermost nesting level."));
1457 msg_at (SN, outer_sum->loc,
1458 _("This outer categorical variable has a summary."));
1459 ctables_axis_destroy (nest);
1469 static struct ctables_axis *
1470 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1472 int start_ofs = lex_ofs (ctx->lexer);
1473 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1477 while (lex_match (ctx->lexer, T_PLUS))
1479 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1482 ctables_axis_destroy (lhs);
1486 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1487 ctx->lexer, start_ofs);
1494 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1495 struct ctables_axis **axisp)
1498 if (lex_token (lexer) == T_BY
1499 || lex_token (lexer) == T_SLASH
1500 || lex_token (lexer) == T_ENDCMD)
1503 struct ctables_axis_parse_ctx ctx = {
1507 *axisp = ctables_axis_parse_stack (&ctx);
1511 /* CTABLES categories. */
1513 struct ctables_categories
1516 struct ctables_category *cats;
1521 struct ctables_category
1523 enum ctables_category_type
1525 /* Explicit category lists. */
1528 CCT_NRANGE, /* Numerical range. */
1529 CCT_SRANGE, /* String range. */
1534 /* Totals and subtotals. */
1538 /* Implicit category lists. */
1543 /* For contributing to TOTALN. */
1544 CCT_EXCLUDED_MISSING,
1548 struct ctables_category *subtotal;
1554 double number; /* CCT_NUMBER. */
1555 struct substring string; /* CCT_STRING, in dictionary encoding. */
1556 double nrange[2]; /* CCT_NRANGE. */
1557 struct substring srange[2]; /* CCT_SRANGE. */
1561 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1562 bool hide_subcategories; /* CCT_SUBTOTAL. */
1565 /* CCT_POSTCOMPUTE. */
1568 const struct ctables_postcompute *pc;
1569 enum fmt_type parse_format;
1572 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1575 bool include_missing;
1576 bool sort_ascending;
1579 enum ctables_summary_function sort_function;
1580 enum ctables_weighting weighting;
1581 enum ctables_area_type area;
1582 struct variable *sort_var;
1587 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
1588 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
1589 struct msg_location *location;
1593 ctables_category_uninit (struct ctables_category *cat)
1598 msg_location_destroy (cat->location);
1605 case CCT_POSTCOMPUTE:
1609 ss_dealloc (&cat->string);
1613 ss_dealloc (&cat->srange[0]);
1614 ss_dealloc (&cat->srange[1]);
1619 free (cat->total_label);
1627 case CCT_EXCLUDED_MISSING:
1633 nullable_substring_equal (const struct substring *a,
1634 const struct substring *b)
1636 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1640 ctables_category_equal (const struct ctables_category *a,
1641 const struct ctables_category *b)
1643 if (a->type != b->type)
1649 return a->number == b->number;
1652 return ss_equals (a->string, b->string);
1655 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1658 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1659 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1665 case CCT_POSTCOMPUTE:
1666 return a->pc == b->pc;
1670 return !strcmp (a->total_label, b->total_label);
1675 return (a->include_missing == b->include_missing
1676 && a->sort_ascending == b->sort_ascending
1677 && a->sort_function == b->sort_function
1678 && a->sort_var == b->sort_var
1679 && a->percentile == b->percentile);
1681 case CCT_EXCLUDED_MISSING:
1689 ctables_categories_unref (struct ctables_categories *c)
1694 assert (c->n_refs > 0);
1698 for (size_t i = 0; i < c->n_cats; i++)
1699 ctables_category_uninit (&c->cats[i]);
1705 ctables_categories_equal (const struct ctables_categories *a,
1706 const struct ctables_categories *b)
1708 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
1711 for (size_t i = 0; i < a->n_cats; i++)
1712 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1718 static struct ctables_category
1719 cct_nrange (double low, double high)
1721 return (struct ctables_category) {
1723 .nrange = { low, high }
1727 static struct ctables_category
1728 cct_srange (struct substring low, struct substring high)
1730 return (struct ctables_category) {
1732 .srange = { low, high }
1737 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1738 struct ctables_category *cat)
1741 if (lex_match (lexer, T_EQUALS))
1743 if (!lex_force_string (lexer))
1746 total_label = ss_xstrdup (lex_tokss (lexer));
1750 total_label = xstrdup (_("Subtotal"));
1752 *cat = (struct ctables_category) {
1753 .type = CCT_SUBTOTAL,
1754 .hide_subcategories = hide_subcategories,
1755 .total_label = total_label
1761 ctables_table_parse_explicit_category (struct lexer *lexer,
1762 struct dictionary *dict,
1764 struct ctables_category *cat)
1766 if (lex_match_id (lexer, "OTHERNM"))
1767 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1768 else if (lex_match_id (lexer, "MISSING"))
1769 *cat = (struct ctables_category) { .type = CCT_MISSING };
1770 else if (lex_match_id (lexer, "SUBTOTAL"))
1771 return ctables_table_parse_subtotal (lexer, false, cat);
1772 else if (lex_match_id (lexer, "HSUBTOTAL"))
1773 return ctables_table_parse_subtotal (lexer, true, cat);
1774 else if (lex_match_id (lexer, "LO"))
1776 if (!lex_force_match_id (lexer, "THRU"))
1778 if (lex_is_string (lexer))
1780 struct substring sr0 = { .string = NULL };
1781 struct substring sr1 = parse_substring (lexer, dict);
1782 *cat = cct_srange (sr0, sr1);
1784 else if (lex_force_num (lexer))
1786 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1792 else if (lex_is_number (lexer))
1794 double number = lex_number (lexer);
1796 if (lex_match_id (lexer, "THRU"))
1798 if (lex_match_id (lexer, "HI"))
1799 *cat = cct_nrange (number, DBL_MAX);
1802 if (!lex_force_num (lexer))
1804 *cat = cct_nrange (number, lex_number (lexer));
1809 *cat = (struct ctables_category) {
1814 else if (lex_is_string (lexer))
1816 struct substring s = parse_substring (lexer, dict);
1817 if (lex_match_id (lexer, "THRU"))
1819 if (lex_match_id (lexer, "HI"))
1821 struct substring sr1 = { .string = NULL };
1822 *cat = cct_srange (s, sr1);
1826 if (!lex_force_string (lexer))
1831 struct substring sr1 = parse_substring (lexer, dict);
1832 *cat = cct_srange (s, sr1);
1836 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1838 else if (lex_match (lexer, T_AND))
1840 if (!lex_force_id (lexer))
1842 struct ctables_postcompute *pc = ctables_find_postcompute (
1843 ct, lex_tokcstr (lexer));
1846 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1847 msg_at (SE, loc, _("Unknown postcompute &%s."),
1848 lex_tokcstr (lexer));
1849 msg_location_destroy (loc);
1854 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1858 lex_error (lexer, NULL);
1866 parse_category_string (struct msg_location *location,
1867 struct substring s, const struct dictionary *dict,
1868 enum fmt_type format, double *n)
1871 char *error = data_in (s, dict_get_encoding (dict), format,
1872 settings_get_fmt_settings (), &v, 0, NULL);
1875 msg_at (SE, location,
1876 _("Failed to parse category specification as format %s: %s."),
1877 fmt_name (format), error);
1886 static struct ctables_category *
1887 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1888 const struct ctables_pcexpr *e)
1890 struct ctables_category *best = NULL;
1891 size_t n_subtotals = 0;
1892 for (size_t i = 0; i < cats->n_cats; i++)
1894 struct ctables_category *cat = &cats->cats[i];
1897 case CTPO_CAT_NUMBER:
1898 if (cat->type == CCT_NUMBER && cat->number == e->number)
1902 case CTPO_CAT_STRING:
1903 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1907 case CTPO_CAT_NRANGE:
1908 if (cat->type == CCT_NRANGE
1909 && cat->nrange[0] == e->nrange[0]
1910 && cat->nrange[1] == e->nrange[1])
1914 case CTPO_CAT_SRANGE:
1915 if (cat->type == CCT_SRANGE
1916 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1917 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1921 case CTPO_CAT_MISSING:
1922 if (cat->type == CCT_MISSING)
1926 case CTPO_CAT_OTHERNM:
1927 if (cat->type == CCT_OTHERNM)
1931 case CTPO_CAT_SUBTOTAL:
1932 if (cat->type == CCT_SUBTOTAL)
1935 if (e->subtotal_index == n_subtotals)
1937 else if (e->subtotal_index == 0)
1942 case CTPO_CAT_TOTAL:
1943 if (cat->type == CCT_TOTAL)
1957 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1962 static struct ctables_category *
1963 ctables_find_category_for_postcompute (const struct dictionary *dict,
1964 const struct ctables_categories *cats,
1965 enum fmt_type parse_format,
1966 const struct ctables_pcexpr *e)
1968 if (parse_format != FMT_F)
1970 if (e->op == CTPO_CAT_STRING)
1973 if (!parse_category_string (e->location, e->string, dict,
1974 parse_format, &number))
1977 struct ctables_pcexpr e2 = {
1978 .op = CTPO_CAT_NUMBER,
1980 .location = e->location,
1982 return ctables_find_category_for_postcompute__ (cats, &e2);
1984 else if (e->op == CTPO_CAT_SRANGE)
1987 if (!e->srange[0].string)
1988 nrange[0] = -DBL_MAX;
1989 else if (!parse_category_string (e->location, e->srange[0], dict,
1990 parse_format, &nrange[0]))
1993 if (!e->srange[1].string)
1994 nrange[1] = DBL_MAX;
1995 else if (!parse_category_string (e->location, e->srange[1], dict,
1996 parse_format, &nrange[1]))
1999 struct ctables_pcexpr e2 = {
2000 .op = CTPO_CAT_NRANGE,
2001 .nrange = { nrange[0], nrange[1] },
2002 .location = e->location,
2004 return ctables_find_category_for_postcompute__ (cats, &e2);
2007 return ctables_find_category_for_postcompute__ (cats, e);
2010 /* CTABLES variable nesting and stacking. */
2012 /* A nested sequence of variables, e.g. a > b > c. */
2015 struct variable **vars;
2019 size_t *areas[N_CTATS];
2020 size_t n_areas[N_CTATS];
2023 struct ctables_summary_spec_set specs[N_CSVS];
2026 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2027 struct ctables_stack
2029 struct ctables_nest *nests;
2034 ctables_nest_uninit (struct ctables_nest *nest)
2037 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2038 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2039 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2040 free (nest->areas[at]);
2044 ctables_stack_uninit (struct ctables_stack *stack)
2048 for (size_t i = 0; i < stack->n; i++)
2049 ctables_nest_uninit (&stack->nests[i]);
2050 free (stack->nests);
2054 static struct ctables_stack
2055 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2062 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2063 for (size_t i = 0; i < s0.n; i++)
2064 for (size_t j = 0; j < s1.n; j++)
2066 const struct ctables_nest *a = &s0.nests[i];
2067 const struct ctables_nest *b = &s1.nests[j];
2069 size_t allocate = a->n + b->n;
2070 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2072 for (size_t k = 0; k < a->n; k++)
2073 vars[n++] = a->vars[k];
2074 for (size_t k = 0; k < b->n; k++)
2075 vars[n++] = b->vars[k];
2076 assert (n == allocate);
2078 const struct ctables_nest *summary_src;
2079 if (!a->specs[CSV_CELL].var)
2081 else if (!b->specs[CSV_CELL].var)
2086 struct ctables_nest *new = &stack.nests[stack.n++];
2087 *new = (struct ctables_nest) {
2089 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2090 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2092 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2093 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2097 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2098 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2100 ctables_stack_uninit (&s0);
2101 ctables_stack_uninit (&s1);
2105 static struct ctables_stack
2106 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2108 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2109 for (size_t i = 0; i < s0.n; i++)
2110 stack.nests[stack.n++] = s0.nests[i];
2111 for (size_t i = 0; i < s1.n; i++)
2113 stack.nests[stack.n] = s1.nests[i];
2114 stack.nests[stack.n].group_head += s0.n;
2117 assert (stack.n == s0.n + s1.n);
2123 static struct ctables_stack
2124 var_fts (const struct ctables_axis *a)
2126 struct variable **vars = xmalloc (sizeof *vars);
2129 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2130 struct ctables_nest *nest = xmalloc (sizeof *nest);
2131 *nest = (struct ctables_nest) {
2134 .scale_idx = a->scale ? 0 : SIZE_MAX,
2135 .summary_idx = is_summary ? 0 : SIZE_MAX,
2138 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2140 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2141 nest->specs[sv].var = a->var;
2142 nest->specs[sv].is_scale = a->scale;
2144 return (struct ctables_stack) { .nests = nest, .n = 1 };
2147 static struct ctables_stack
2148 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2151 return (struct ctables_stack) { .n = 0 };
2159 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2160 enumerate_fts (axis_type, a->subs[1]));
2163 /* This should consider any of the scale variables found in the result to
2164 be linked to each other listwise for SMISSING=LISTWISE. */
2165 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2166 enumerate_fts (axis_type, a->subs[1]));
2173 /* CTABLES summary calculation. */
2175 union ctables_summary
2177 /* COUNT, VALIDN, TOTALN. */
2180 /* MINIMUM, MAXIMUM, RANGE. */
2187 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2188 struct moments1 *moments;
2190 /* MEDIAN, MODE, PTILE. */
2193 struct casewriter *writer;
2200 ctables_summary_init (union ctables_summary *s,
2201 const struct ctables_summary_spec *ss)
2203 switch (ss->function)
2206 case CTSF_areaPCT_COUNT:
2207 case CTSF_areaPCT_VALIDN:
2208 case CTSF_areaPCT_TOTALN:
2221 s->min = s->max = SYSMIS;
2226 case CTSF_areaPCT_SUM:
2227 s->moments = moments1_create (MOMENT_MEAN);
2233 s->moments = moments1_create (MOMENT_VARIANCE);
2240 struct caseproto *proto = caseproto_create ();
2241 proto = caseproto_add_width (proto, 0);
2242 proto = caseproto_add_width (proto, 0);
2244 struct subcase ordering;
2245 subcase_init (&ordering, 0, 0, SC_ASCEND);
2246 s->writer = sort_create_writer (&ordering, proto);
2247 subcase_uninit (&ordering);
2248 caseproto_unref (proto);
2258 ctables_summary_uninit (union ctables_summary *s,
2259 const struct ctables_summary_spec *ss)
2261 switch (ss->function)
2264 case CTSF_areaPCT_COUNT:
2265 case CTSF_areaPCT_VALIDN:
2266 case CTSF_areaPCT_TOTALN:
2285 case CTSF_areaPCT_SUM:
2286 moments1_destroy (s->moments);
2292 casewriter_destroy (s->writer);
2298 ctables_summary_add (union ctables_summary *s,
2299 const struct ctables_summary_spec *ss,
2300 const union value *value,
2301 bool is_missing, bool is_included,
2304 /* To determine whether a case is included in a given table for a particular
2305 kind of summary, consider the following charts for the variable being
2306 summarized. Only if "yes" appears is the case counted.
2308 Categorical variables: VALIDN other TOTALN
2309 Valid values in included categories yes yes yes
2310 Missing values in included categories --- yes yes
2311 Missing values in excluded categories --- --- yes
2312 Valid values in excluded categories --- --- ---
2314 Scale variables: VALIDN other TOTALN
2315 Valid value yes yes yes
2316 Missing value --- yes yes
2318 Missing values include both user- and system-missing. (The system-missing
2319 value is always in an excluded category.)
2321 One way to interpret the above table is that scale variables are like
2322 categorical variables in which all values are in included categories.
2324 switch (ss->function)
2327 case CTSF_areaPCT_TOTALN:
2332 case CTSF_areaPCT_COUNT:
2338 case CTSF_areaPCT_VALIDN:
2356 if (s->min == SYSMIS || value->f < s->min)
2358 if (s->max == SYSMIS || value->f > s->max)
2369 moments1_add (s->moments, value->f, weight);
2372 case CTSF_areaPCT_SUM:
2374 moments1_add (s->moments, value->f, weight);
2382 s->ovalid += weight;
2384 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2385 *case_num_rw_idx (c, 0) = value->f;
2386 *case_num_rw_idx (c, 1) = weight;
2387 casewriter_write (s->writer, c);
2394 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2395 union ctables_summary *s,
2396 const struct ctables_summary_spec *ss)
2398 switch (ss->function)
2404 return areas[ss->calc_area]->sequence;
2406 case CTSF_areaPCT_COUNT:
2408 const struct ctables_area *a = areas[ss->calc_area];
2409 double a_count = a->count[ss->weighting];
2410 return a_count ? s->count / a_count * 100 : SYSMIS;
2413 case CTSF_areaPCT_VALIDN:
2415 const struct ctables_area *a = areas[ss->calc_area];
2416 double a_valid = a->valid[ss->weighting];
2417 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2420 case CTSF_areaPCT_TOTALN:
2422 const struct ctables_area *a = areas[ss->calc_area];
2423 double a_total = a->total[ss->weighting];
2424 return a_total ? s->count / a_total * 100 : SYSMIS;
2439 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2444 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2450 double weight, variance;
2451 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2452 return calc_semean (variance, weight);
2458 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2459 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2464 double weight, mean;
2465 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2466 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2472 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2476 case CTSF_areaPCT_SUM:
2478 double weight, mean;
2479 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2480 if (weight == SYSMIS || mean == SYSMIS)
2483 const struct ctables_area *a = areas[ss->calc_area];
2484 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2485 double denom = sum->sum[ss->weighting];
2486 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2493 struct casereader *reader = casewriter_make_reader (s->writer);
2496 struct percentile *ptile = percentile_create (
2497 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2498 struct order_stats *os = &ptile->parent;
2499 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2500 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2501 statistic_destroy (&ptile->parent.parent);
2508 struct casereader *reader = casewriter_make_reader (s->writer);
2511 struct mode *mode = mode_create ();
2512 struct order_stats *os = &mode->parent;
2513 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2514 s->ovalue = mode->mode;
2515 statistic_destroy (&mode->parent.parent);
2525 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2526 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2527 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2528 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2533 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2534 all the axes (except the scalar variable, if any). */
2535 struct hmap_node node;
2537 /* The areas that contain this cell. */
2538 uint32_t omit_areas;
2539 struct ctables_area *areas[N_CTATS];
2544 enum ctables_summary_variant sv;
2546 struct ctables_cell_axis
2548 struct ctables_cell_value
2550 const struct ctables_category *category;
2558 union ctables_summary *summaries;
2561 struct ctables_postcompute
2563 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
2564 char *name; /* Name, without leading &. */
2566 struct msg_location *location; /* Location of definition. */
2567 struct ctables_pcexpr *expr;
2569 struct ctables_summary_spec_set *specs;
2570 bool hide_source_cats;
2575 const struct dictionary *dict;
2576 struct pivot_table_look *look;
2578 /* For CTEF_* formats. */
2579 struct fmt_settings ctables_formats;
2581 /* If this is NULL, zeros are displayed using the normal print format.
2582 Otherwise, this string is displayed. */
2585 /* If this is NULL, missing values are displayed using the normal print
2586 format. Otherwise, this string is displayed. */
2589 /* Indexed by variable dictionary index. */
2590 enum ctables_vlabel *vlabels;
2592 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
2594 bool mrsets_count_duplicates; /* MRSETS. */
2595 bool smissing_listwise; /* SMISSING. */
2596 struct variable *e_weight; /* WEIGHT. */
2597 int hide_threshold; /* HIDESMALLCOUNTS. */
2599 struct ctables_table **tables;
2603 struct ctables_value
2605 struct hmap_node node;
2610 struct ctables_occurrence
2612 struct hmap_node node;
2616 struct ctables_section
2619 struct ctables_table *table;
2620 struct ctables_nest *nests[PIVOT_N_AXES];
2623 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2624 struct hmap cells; /* Contains "struct ctables_cell"s. */
2625 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2628 static void ctables_section_uninit (struct ctables_section *);
2630 struct ctables_table
2632 struct ctables *ctables;
2633 struct ctables_axis *axes[PIVOT_N_AXES];
2634 struct ctables_stack stacks[PIVOT_N_AXES];
2635 struct ctables_section *sections;
2637 enum pivot_axis_type summary_axis;
2638 struct ctables_summary_spec_set summary_specs;
2639 struct variable **sum_vars;
2642 enum pivot_axis_type slabels_axis;
2643 bool slabels_visible;
2645 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2647 Most commonly, label_axis[a] == a, and in particular we always have
2648 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2650 If ROWLABELS or COLLABELS is specified, then one of
2651 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2652 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2654 If any category labels are moved, then 'clabels_example' is one of the
2655 variables being moved (and it is otherwise NULL). All of the variables
2656 being moved have the same width, value labels, and categories, so this
2657 example variable can be used to find those out.
2659 The remaining members in this group are relevant only if category labels
2662 'clabels_values_map' holds a "struct ctables_value" for all the values
2663 that appear in all of the variables in the moved categories. It is
2664 accumulated as the data is read. Once the data is fully read, its
2665 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2667 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2668 enum pivot_axis_type clabels_from_axis;
2669 enum pivot_axis_type clabels_to_axis;
2670 const struct variable *clabels_example;
2671 struct hmap clabels_values_map;
2672 struct ctables_value **clabels_values;
2673 size_t n_clabels_values;
2675 /* Indexed by variable dictionary index. */
2676 struct ctables_categories **categories;
2677 size_t n_categories;
2685 struct ctables_chisq *chisq;
2686 struct ctables_pairwise *pairwise;
2689 /* Chi-square test (SIGTEST). */
2690 struct ctables_chisq
2693 bool include_mrsets;
2697 /* Pairwise comparison test (COMPARETEST). */
2698 struct ctables_pairwise
2700 enum { PROP, MEAN } type;
2702 bool include_mrsets;
2703 bool meansvariance_allcats;
2705 enum { BONFERRONI = 1, BH } adjust;
2714 parse_col_width (struct lexer *lexer, const char *name, double *width)
2716 lex_match (lexer, T_EQUALS);
2717 if (lex_match_id (lexer, "DEFAULT"))
2719 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
2721 *width = lex_number (lexer);
2731 parse_bool (struct lexer *lexer, bool *b)
2733 if (lex_match_id (lexer, "NO"))
2735 else if (lex_match_id (lexer, "YES"))
2739 lex_error_expecting (lexer, "YES", "NO");
2746 ctables_chisq_destroy (struct ctables_chisq *chisq)
2752 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
2758 ctables_table_destroy (struct ctables_table *t)
2763 for (size_t i = 0; i < t->n_sections; i++)
2764 ctables_section_uninit (&t->sections[i]);
2767 for (size_t i = 0; i < t->n_categories; i++)
2768 ctables_categories_unref (t->categories[i]);
2769 free (t->categories);
2771 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2773 ctables_axis_destroy (t->axes[a]);
2774 ctables_stack_uninit (&t->stacks[a]);
2776 free (t->summary_specs.specs);
2778 struct ctables_value *ctv, *next_ctv;
2779 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
2780 &t->clabels_values_map)
2782 value_destroy (&ctv->value, var_get_width (t->clabels_example));
2783 hmap_delete (&t->clabels_values_map, &ctv->node);
2786 hmap_destroy (&t->clabels_values_map);
2787 free (t->clabels_values);
2793 ctables_chisq_destroy (t->chisq);
2794 ctables_pairwise_destroy (t->pairwise);
2799 ctables_destroy (struct ctables *ct)
2804 struct ctables_postcompute *pc, *next_pc;
2805 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
2809 msg_location_destroy (pc->location);
2810 ctables_pcexpr_destroy (pc->expr);
2814 ctables_summary_spec_set_uninit (pc->specs);
2817 hmap_delete (&ct->postcomputes, &pc->hmap_node);
2820 hmap_destroy (&ct->postcomputes);
2822 fmt_settings_uninit (&ct->ctables_formats);
2823 pivot_table_look_unref (ct->look);
2827 for (size_t i = 0; i < ct->n_tables; i++)
2828 ctables_table_destroy (ct->tables[i]);
2834 ctables_recursive_check_postcompute (struct dictionary *dict,
2835 const struct ctables_pcexpr *e,
2836 struct ctables_category *pc_cat,
2837 const struct ctables_categories *cats,
2838 const struct msg_location *cats_location)
2842 case CTPO_CAT_NUMBER:
2843 case CTPO_CAT_STRING:
2844 case CTPO_CAT_NRANGE:
2845 case CTPO_CAT_SRANGE:
2846 case CTPO_CAT_MISSING:
2847 case CTPO_CAT_OTHERNM:
2848 case CTPO_CAT_SUBTOTAL:
2849 case CTPO_CAT_TOTAL:
2851 struct ctables_category *cat = ctables_find_category_for_postcompute (
2852 dict, cats, pc_cat->parse_format, e);
2855 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2857 size_t n_subtotals = 0;
2858 for (size_t i = 0; i < cats->n_cats; i++)
2859 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2860 if (n_subtotals > 1)
2862 msg_at (SE, cats_location,
2863 ngettext ("These categories include %zu instance "
2864 "of SUBTOTAL or HSUBTOTAL, so references "
2865 "from computed categories must refer to "
2866 "subtotals by position, "
2867 "e.g. SUBTOTAL[1].",
2868 "These categories include %zu instances "
2869 "of SUBTOTAL or HSUBTOTAL, so references "
2870 "from computed categories must refer to "
2871 "subtotals by position, "
2872 "e.g. SUBTOTAL[1].",
2875 msg_at (SN, e->location,
2876 _("This is the reference that lacks a position."));
2881 msg_at (SE, pc_cat->location,
2882 _("Computed category &%s references a category not included "
2883 "in the category list."),
2885 msg_at (SN, e->location, _("This is the missing category."));
2886 if (e->op == CTPO_CAT_SUBTOTAL)
2887 msg_at (SN, cats_location,
2888 _("To fix the problem, add subtotals to the "
2889 "list of categories here."));
2890 else if (e->op == CTPO_CAT_TOTAL)
2891 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2892 "CATEGORIES specification."));
2894 msg_at (SN, cats_location,
2895 _("To fix the problem, add the missing category to the "
2896 "list of categories here."));
2899 if (pc_cat->pc->hide_source_cats)
2913 for (size_t i = 0; i < 2; i++)
2914 if (e->subs[i] && !ctables_recursive_check_postcompute (
2915 dict, e->subs[i], pc_cat, cats, cats_location))
2924 all_strings (struct variable **vars, size_t n_vars,
2925 const struct ctables_category *cat)
2927 for (size_t j = 0; j < n_vars; j++)
2928 if (var_is_numeric (vars[j]))
2930 msg_at (SE, cat->location,
2931 _("This category specification may be applied only to string "
2932 "variables, but this subcommand tries to apply it to "
2933 "numeric variable %s."),
2934 var_get_name (vars[j]));
2941 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2942 struct ctables *ct, struct ctables_table *t)
2944 if (!lex_match_id (lexer, "VARIABLES"))
2946 lex_match (lexer, T_EQUALS);
2948 struct variable **vars;
2950 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2953 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2954 for (size_t i = 1; i < n_vars; i++)
2956 const struct fmt_spec *f = var_get_print_format (vars[i]);
2957 if (f->type != common_format->type)
2959 common_format = NULL;
2965 && (fmt_get_category (common_format->type)
2966 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2968 struct ctables_categories *c = xmalloc (sizeof *c);
2969 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2970 for (size_t i = 0; i < n_vars; i++)
2972 struct ctables_categories **cp
2973 = &t->categories[var_get_dict_index (vars[i])];
2974 ctables_categories_unref (*cp);
2978 size_t allocated_cats = 0;
2979 int cats_start_ofs = -1;
2980 int cats_end_ofs = -1;
2981 if (lex_match (lexer, T_LBRACK))
2983 cats_start_ofs = lex_ofs (lexer);
2986 if (c->n_cats >= allocated_cats)
2987 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2989 int start_ofs = lex_ofs (lexer);
2990 struct ctables_category *cat = &c->cats[c->n_cats];
2991 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2993 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2996 lex_match (lexer, T_COMMA);
2998 while (!lex_match (lexer, T_RBRACK));
2999 cats_end_ofs = lex_ofs (lexer) - 1;
3002 struct ctables_category cat = {
3004 .include_missing = false,
3005 .sort_ascending = true,
3007 bool show_totals = false;
3008 char *total_label = NULL;
3009 bool totals_before = false;
3010 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
3012 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
3014 lex_match (lexer, T_EQUALS);
3015 if (lex_match_id (lexer, "A"))
3016 cat.sort_ascending = true;
3017 else if (lex_match_id (lexer, "D"))
3018 cat.sort_ascending = false;
3021 lex_error_expecting (lexer, "A", "D");
3025 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
3027 int start_ofs = lex_ofs (lexer) - 1;
3028 lex_match (lexer, T_EQUALS);
3029 if (lex_match_id (lexer, "VALUE"))
3030 cat.type = CCT_VALUE;
3031 else if (lex_match_id (lexer, "LABEL"))
3032 cat.type = CCT_LABEL;
3035 cat.type = CCT_FUNCTION;
3036 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
3037 &cat.weighting, &cat.area))
3040 if (lex_match (lexer, T_LPAREN))
3042 cat.sort_var = parse_variable (lexer, dict);
3046 if (cat.sort_function == CTSF_PTILE)
3048 lex_match (lexer, T_COMMA);
3049 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
3051 cat.percentile = lex_number (lexer);
3055 if (!lex_force_match (lexer, T_RPAREN))
3058 else if (ctables_function_availability (cat.sort_function)
3061 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
3065 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
3066 _("Data-dependent sorting is not implemented."));
3070 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
3072 lex_match (lexer, T_EQUALS);
3073 if (lex_match_id (lexer, "INCLUDE"))
3074 cat.include_missing = true;
3075 else if (lex_match_id (lexer, "EXCLUDE"))
3076 cat.include_missing = false;
3079 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
3083 else if (lex_match_id (lexer, "TOTAL"))
3085 lex_match (lexer, T_EQUALS);
3086 if (!parse_bool (lexer, &show_totals))
3089 else if (lex_match_id (lexer, "LABEL"))
3091 lex_match (lexer, T_EQUALS);
3092 if (!lex_force_string (lexer))
3095 total_label = ss_xstrdup (lex_tokss (lexer));
3098 else if (lex_match_id (lexer, "POSITION"))
3100 lex_match (lexer, T_EQUALS);
3101 if (lex_match_id (lexer, "BEFORE"))
3102 totals_before = true;
3103 else if (lex_match_id (lexer, "AFTER"))
3104 totals_before = false;
3107 lex_error_expecting (lexer, "BEFORE", "AFTER");
3111 else if (lex_match_id (lexer, "EMPTY"))
3113 lex_match (lexer, T_EQUALS);
3114 if (lex_match_id (lexer, "INCLUDE"))
3115 c->show_empty = true;
3116 else if (lex_match_id (lexer, "EXCLUDE"))
3117 c->show_empty = false;
3120 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
3127 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
3128 "TOTAL", "LABEL", "POSITION", "EMPTY");
3130 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
3137 if (c->n_cats >= allocated_cats)
3138 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3139 c->cats[c->n_cats++] = cat;
3144 if (c->n_cats >= allocated_cats)
3145 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3147 struct ctables_category *totals;
3150 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
3151 totals = &c->cats[0];
3154 totals = &c->cats[c->n_cats];
3157 *totals = (struct ctables_category) {
3159 .total_label = total_label ? total_label : xstrdup (_("Total")),
3163 struct ctables_category *subtotal = NULL;
3164 for (size_t i = totals_before ? 0 : c->n_cats;
3165 totals_before ? i < c->n_cats : i-- > 0;
3166 totals_before ? i++ : 0)
3168 struct ctables_category *cat = &c->cats[i];
3177 cat->subtotal = subtotal;
3180 case CCT_POSTCOMPUTE:
3191 case CCT_EXCLUDED_MISSING:
3196 if (cats_start_ofs != -1)
3198 for (size_t i = 0; i < c->n_cats; i++)
3200 struct ctables_category *cat = &c->cats[i];
3203 case CCT_POSTCOMPUTE:
3204 cat->parse_format = parse_strings ? common_format->type : FMT_F;
3205 struct msg_location *cats_location
3206 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
3207 bool ok = ctables_recursive_check_postcompute (
3208 dict, cat->pc->expr, cat, c, cats_location);
3209 msg_location_destroy (cats_location);
3216 for (size_t j = 0; j < n_vars; j++)
3217 if (var_is_alpha (vars[j]))
3219 msg_at (SE, cat->location,
3220 _("This category specification may be applied "
3221 "only to numeric variables, but this "
3222 "subcommand tries to apply it to string "
3224 var_get_name (vars[j]));
3233 if (!parse_category_string (cat->location, cat->string, dict,
3234 common_format->type, &n))
3237 ss_dealloc (&cat->string);
3239 cat->type = CCT_NUMBER;
3242 else if (!all_strings (vars, n_vars, cat))
3251 if (!cat->srange[0].string)
3253 else if (!parse_category_string (cat->location,
3254 cat->srange[0], dict,
3255 common_format->type, &n[0]))
3258 if (!cat->srange[1].string)
3260 else if (!parse_category_string (cat->location,
3261 cat->srange[1], dict,
3262 common_format->type, &n[1]))
3265 ss_dealloc (&cat->srange[0]);
3266 ss_dealloc (&cat->srange[1]);
3268 cat->type = CCT_NRANGE;
3269 cat->nrange[0] = n[0];
3270 cat->nrange[1] = n[1];
3272 else if (!all_strings (vars, n_vars, cat))
3283 case CCT_EXCLUDED_MISSING:
3297 struct ctables_cell_sort_aux
3299 const struct ctables_nest *nest;
3300 enum pivot_axis_type a;
3304 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3306 const struct ctables_cell_sort_aux *aux = aux_;
3307 struct ctables_cell *const *ap = a_;
3308 struct ctables_cell *const *bp = b_;
3309 const struct ctables_cell *a = *ap;
3310 const struct ctables_cell *b = *bp;
3312 const struct ctables_nest *nest = aux->nest;
3313 for (size_t i = 0; i < nest->n; i++)
3314 if (i != nest->scale_idx)
3316 const struct variable *var = nest->vars[i];
3317 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3318 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3319 if (a_cv->category != b_cv->category)
3320 return a_cv->category > b_cv->category ? 1 : -1;
3322 const union value *a_val = &a_cv->value;
3323 const union value *b_val = &b_cv->value;
3324 switch (a_cv->category->type)
3330 case CCT_POSTCOMPUTE:
3331 case CCT_EXCLUDED_MISSING:
3332 /* Must be equal. */
3340 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3348 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3350 return a_cv->category->sort_ascending ? cmp : -cmp;
3356 const char *a_label = var_lookup_value_label (var, a_val);
3357 const char *b_label = var_lookup_value_label (var, b_val);
3363 cmp = strcmp (a_label, b_label);
3369 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3372 return a_cv->category->sort_ascending ? cmp : -cmp;
3384 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3385 const void *aux UNUSED)
3387 struct ctables_cell *const *ap = a_;
3388 struct ctables_cell *const *bp = b_;
3389 const struct ctables_cell *a = *ap;
3390 const struct ctables_cell *b = *bp;
3392 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3394 int al = a->axes[axis].leaf;
3395 int bl = b->axes[axis].leaf;
3397 return al > bl ? 1 : -1;
3402 static struct ctables_area *
3403 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3404 enum ctables_area_type area)
3407 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3409 const struct ctables_nest *nest = s->nests[a];
3410 for (size_t i = 0; i < nest->n_areas[area]; i++)
3412 size_t v_idx = nest->areas[area][i];
3413 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3414 hash = hash_pointer (cv->category, hash);
3415 if (cv->category->type != CCT_TOTAL
3416 && cv->category->type != CCT_SUBTOTAL
3417 && cv->category->type != CCT_POSTCOMPUTE)
3418 hash = value_hash (&cv->value,
3419 var_get_width (nest->vars[v_idx]), hash);
3423 struct ctables_area *a;
3424 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3426 const struct ctables_cell *df = a->example;
3427 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3429 const struct ctables_nest *nest = s->nests[a];
3430 for (size_t i = 0; i < nest->n_areas[area]; i++)
3432 size_t v_idx = nest->areas[area][i];
3433 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3434 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3435 if (cv1->category != cv2->category
3436 || (cv1->category->type != CCT_TOTAL
3437 && cv1->category->type != CCT_SUBTOTAL
3438 && cv1->category->type != CCT_POSTCOMPUTE
3439 && !value_equal (&cv1->value, &cv2->value,
3440 var_get_width (nest->vars[v_idx]))))
3449 struct ctables_sum *sums = (s->table->n_sum_vars
3450 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3453 a = xmalloc (sizeof *a);
3454 *a = (struct ctables_area) { .example = cell, .sums = sums };
3455 hmap_insert (&s->areas[area], &a->node, hash);
3459 static struct substring
3460 rtrim_value (const union value *v, const struct variable *var)
3462 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3463 var_get_width (var));
3464 ss_rtrim (&s, ss_cstr (" "));
3469 in_string_range (const union value *v, const struct variable *var,
3470 const struct substring *srange)
3472 struct substring s = rtrim_value (v, var);
3473 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3474 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3477 static const struct ctables_category *
3478 ctables_categories_match (const struct ctables_categories *c,
3479 const union value *v, const struct variable *var)
3481 if (var_is_numeric (var) && v->f == SYSMIS)
3484 const struct ctables_category *othernm = NULL;
3485 for (size_t i = c->n_cats; i-- > 0; )
3487 const struct ctables_category *cat = &c->cats[i];
3491 if (cat->number == v->f)
3496 if (ss_equals (cat->string, rtrim_value (v, var)))
3501 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3502 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3507 if (in_string_range (v, var, cat->srange))
3512 if (var_is_value_missing (var, v))
3516 case CCT_POSTCOMPUTE:
3531 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3534 case CCT_EXCLUDED_MISSING:
3539 return var_is_value_missing (var, v) ? NULL : othernm;
3542 static const struct ctables_category *
3543 ctables_categories_total (const struct ctables_categories *c)
3545 const struct ctables_category *first = &c->cats[0];
3546 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3547 return (first->type == CCT_TOTAL ? first
3548 : last->type == CCT_TOTAL ? last
3552 static struct ctables_cell *
3553 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3554 const struct ctables_category **cats[PIVOT_N_AXES])
3557 enum ctables_summary_variant sv = CSV_CELL;
3558 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3560 const struct ctables_nest *nest = s->nests[a];
3561 for (size_t i = 0; i < nest->n; i++)
3562 if (i != nest->scale_idx)
3564 hash = hash_pointer (cats[a][i], hash);
3565 if (cats[a][i]->type != CCT_TOTAL
3566 && cats[a][i]->type != CCT_SUBTOTAL
3567 && cats[a][i]->type != CCT_POSTCOMPUTE)
3568 hash = value_hash (case_data (c, nest->vars[i]),
3569 var_get_width (nest->vars[i]), hash);
3575 struct ctables_cell *cell;
3576 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3578 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3580 const struct ctables_nest *nest = s->nests[a];
3581 for (size_t i = 0; i < nest->n; i++)
3582 if (i != nest->scale_idx
3583 && (cats[a][i] != cell->axes[a].cvs[i].category
3584 || (cats[a][i]->type != CCT_TOTAL
3585 && cats[a][i]->type != CCT_SUBTOTAL
3586 && cats[a][i]->type != CCT_POSTCOMPUTE
3587 && !value_equal (case_data (c, nest->vars[i]),
3588 &cell->axes[a].cvs[i].value,
3589 var_get_width (nest->vars[i])))))
3598 cell = xmalloc (sizeof *cell);
3601 cell->omit_areas = 0;
3602 cell->postcompute = false;
3603 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3605 const struct ctables_nest *nest = s->nests[a];
3606 cell->axes[a].cvs = (nest->n
3607 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3609 for (size_t i = 0; i < nest->n; i++)
3611 const struct ctables_category *cat = cats[a][i];
3612 const struct variable *var = nest->vars[i];
3613 const union value *value = case_data (c, var);
3614 if (i != nest->scale_idx)
3616 const struct ctables_category *subtotal = cat->subtotal;
3617 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3620 if (cat->type == CCT_TOTAL
3621 || cat->type == CCT_SUBTOTAL
3622 || cat->type == CCT_POSTCOMPUTE)
3626 case PIVOT_AXIS_COLUMN:
3627 cell->omit_areas |= ((1u << CTAT_TABLE) |
3628 (1u << CTAT_LAYER) |
3629 (1u << CTAT_LAYERCOL) |
3630 (1u << CTAT_SUBTABLE) |
3633 case PIVOT_AXIS_ROW:
3634 cell->omit_areas |= ((1u << CTAT_TABLE) |
3635 (1u << CTAT_LAYER) |
3636 (1u << CTAT_LAYERROW) |
3637 (1u << CTAT_SUBTABLE) |
3640 case PIVOT_AXIS_LAYER:
3641 cell->omit_areas |= ((1u << CTAT_TABLE) |
3642 (1u << CTAT_LAYER));
3646 if (cat->type == CCT_POSTCOMPUTE)
3647 cell->postcompute = true;
3650 cell->axes[a].cvs[i].category = cat;
3651 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3655 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3656 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3657 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3658 for (size_t i = 0; i < specs->n; i++)
3659 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3660 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3661 cell->areas[at] = ctables_area_insert (s, cell, at);
3662 hmap_insert (&s->cells, &cell->node, hash);
3667 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3669 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3674 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3675 const struct ctables_category **cats[PIVOT_N_AXES],
3676 bool is_included, double weight[N_CTWS])
3678 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3679 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3681 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3682 const union value *value = case_data (c, specs->var);
3683 bool is_missing = var_is_value_missing (specs->var, value);
3684 bool is_scale_missing
3685 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3687 for (size_t i = 0; i < specs->n; i++)
3688 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3689 is_scale_missing, is_included,
3690 weight[specs->specs[i].weighting]);
3691 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3692 if (!(cell->omit_areas && (1u << at)))
3694 struct ctables_area *a = cell->areas[at];
3696 add_weight (a->total, weight);
3698 add_weight (a->count, weight);
3701 add_weight (a->valid, weight);
3703 if (!is_scale_missing)
3704 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3706 const struct variable *var = s->table->sum_vars[i];
3707 double addend = case_num (c, var);
3708 if (!var_is_num_missing (var, addend))
3709 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3710 a->sums[i].sum[wt] += addend * weight[wt];
3717 recurse_totals (struct ctables_section *s, const struct ccase *c,
3718 const struct ctables_category **cats[PIVOT_N_AXES],
3719 bool is_included, double weight[N_CTWS],
3720 enum pivot_axis_type start_axis, size_t start_nest)
3722 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3724 const struct ctables_nest *nest = s->nests[a];
3725 for (size_t i = start_nest; i < nest->n; i++)
3727 if (i == nest->scale_idx)
3730 const struct variable *var = nest->vars[i];
3732 const struct ctables_category *total = ctables_categories_total (
3733 s->table->categories[var_get_dict_index (var)]);
3736 const struct ctables_category *save = cats[a][i];
3738 ctables_cell_add__ (s, c, cats, is_included, weight);
3739 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3748 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3749 const struct ctables_category **cats[PIVOT_N_AXES],
3750 bool is_included, double weight[N_CTWS],
3751 enum pivot_axis_type start_axis, size_t start_nest)
3753 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3755 const struct ctables_nest *nest = s->nests[a];
3756 for (size_t i = start_nest; i < nest->n; i++)
3758 if (i == nest->scale_idx)
3761 const struct ctables_category *save = cats[a][i];
3764 cats[a][i] = save->subtotal;
3765 ctables_cell_add__ (s, c, cats, is_included, weight);
3766 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3775 ctables_add_occurrence (const struct variable *var,
3776 const union value *value,
3777 struct hmap *occurrences)
3779 int width = var_get_width (var);
3780 unsigned int hash = value_hash (value, width, 0);
3782 struct ctables_occurrence *o;
3783 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3785 if (value_equal (value, &o->value, width))
3788 o = xmalloc (sizeof *o);
3789 value_clone (&o->value, value, width);
3790 hmap_insert (occurrences, &o->node, hash);
3794 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3795 double weight[N_CTWS])
3797 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3798 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3799 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3800 const struct ctables_category **cats[PIVOT_N_AXES] =
3802 [PIVOT_AXIS_LAYER] = layer_cats,
3803 [PIVOT_AXIS_ROW] = row_cats,
3804 [PIVOT_AXIS_COLUMN] = column_cats,
3807 bool is_included = true;
3809 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3811 const struct ctables_nest *nest = s->nests[a];
3812 for (size_t i = 0; i < nest->n; i++)
3813 if (i != nest->scale_idx)
3815 const struct variable *var = nest->vars[i];
3816 const union value *value = case_data (c, var);
3818 cats[a][i] = ctables_categories_match (
3819 s->table->categories[var_get_dict_index (var)], value, var);
3822 if (i != nest->summary_idx)
3825 if (!var_is_value_missing (var, value))
3828 static const struct ctables_category cct_excluded_missing = {
3829 .type = CCT_EXCLUDED_MISSING,
3832 cats[a][i] = &cct_excluded_missing;
3833 is_included = false;
3839 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3841 const struct ctables_nest *nest = s->nests[a];
3842 for (size_t i = 0; i < nest->n; i++)
3843 if (i != nest->scale_idx)
3845 const struct variable *var = nest->vars[i];
3846 const union value *value = case_data (c, var);
3847 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3851 ctables_cell_add__ (s, c, cats, is_included, weight);
3852 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3853 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3858 const struct ctables_summary_spec_set *set;
3863 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3865 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3866 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3867 if (as->function != bs->function)
3868 return as->function > bs->function ? 1 : -1;
3869 else if (as->weighting != bs->weighting)
3870 return as->weighting > bs->weighting ? 1 : -1;
3871 else if (as->calc_area != bs->calc_area)
3872 return as->calc_area > bs->calc_area ? 1 : -1;
3873 else if (as->percentile != bs->percentile)
3874 return as->percentile < bs->percentile ? 1 : -1;
3876 const char *as_label = as->label ? as->label : "";
3877 const char *bs_label = bs->label ? bs->label : "";
3878 return strcmp (as_label, bs_label);
3882 ctables_category_format_number (double number, const struct variable *var,
3885 struct pivot_value *pv = pivot_value_new_var_value (
3886 var, &(union value) { .f = number });
3887 pivot_value_format (pv, NULL, s);
3888 pivot_value_destroy (pv);
3892 ctables_category_format_string (struct substring string,
3893 const struct variable *var, struct string *out)
3895 int width = var_get_width (var);
3896 char *s = xmalloc (width);
3897 buf_copy_rpad (s, width, string.string, string.length, ' ');
3898 struct pivot_value *pv = pivot_value_new_var_value (
3899 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
3900 pivot_value_format (pv, NULL, out);
3901 pivot_value_destroy (pv);
3906 ctables_category_format_label (const struct ctables_category *cat,
3907 const struct variable *var,
3913 ctables_category_format_number (cat->number, var, s);
3917 ctables_category_format_string (cat->string, var, s);
3921 ctables_category_format_number (cat->nrange[0], var, s);
3922 ds_put_format (s, " THRU ");
3923 ctables_category_format_number (cat->nrange[1], var, s);
3927 ctables_category_format_string (cat->srange[0], var, s);
3928 ds_put_format (s, " THRU ");
3929 ctables_category_format_string (cat->srange[1], var, s);
3933 ds_put_cstr (s, "MISSING");
3937 ds_put_cstr (s, "OTHERNM");
3940 case CCT_POSTCOMPUTE:
3941 ds_put_format (s, "&%s", cat->pc->name);
3946 ds_put_cstr (s, cat->total_label);
3952 case CCT_EXCLUDED_MISSING:
3959 static struct pivot_value *
3960 ctables_postcompute_label (const struct ctables_categories *cats,
3961 const struct ctables_category *cat,
3962 const struct variable *var)
3964 struct substring in = ss_cstr (cat->pc->label);
3965 struct substring target = ss_cstr (")LABEL[");
3967 struct string out = DS_EMPTY_INITIALIZER;
3970 size_t chunk = ss_find_substring (in, target);
3971 if (chunk == SIZE_MAX)
3973 if (ds_is_empty (&out))
3974 return pivot_value_new_user_text (in.string, in.length);
3977 ds_put_substring (&out, in);
3978 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3982 ds_put_substring (&out, ss_head (in, chunk));
3983 ss_advance (&in, chunk + target.length);
3985 struct substring idx_s;
3986 if (!ss_get_until (&in, ']', &idx_s))
3989 long int idx = strtol (idx_s.string, &tail, 10);
3990 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3993 struct ctables_category *cat2 = &cats->cats[idx - 1];
3994 if (!ctables_category_format_label (cat2, var, &out))
4000 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4003 static struct pivot_value *
4004 ctables_category_create_value_label (const struct ctables_categories *cats,
4005 const struct ctables_category *cat,
4006 const struct variable *var,
4007 const union value *value)
4009 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4010 ? ctables_postcompute_label (cats, cat, var)
4011 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4012 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4013 : pivot_value_new_var_value (var, value));
4016 static struct ctables_value *
4017 ctables_value_find__ (struct ctables_table *t, const union value *value,
4018 int width, unsigned int hash)
4020 struct ctables_value *clv;
4021 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4022 hash, &t->clabels_values_map)
4023 if (value_equal (value, &clv->value, width))
4029 ctables_value_insert (struct ctables_table *t, const union value *value,
4032 unsigned int hash = value_hash (value, width, 0);
4033 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4036 clv = xmalloc (sizeof *clv);
4037 value_clone (&clv->value, value, width);
4038 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4042 static struct ctables_value *
4043 ctables_value_find (struct ctables_table *t,
4044 const union value *value, int width)
4046 return ctables_value_find__ (t, value, width,
4047 value_hash (value, width, 0));
4051 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4052 size_t ix[PIVOT_N_AXES])
4054 if (a < PIVOT_N_AXES)
4056 size_t limit = MAX (t->stacks[a].n, 1);
4057 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4058 ctables_table_add_section (t, a + 1, ix);
4062 struct ctables_section *s = &t->sections[t->n_sections++];
4063 *s = (struct ctables_section) {
4065 .cells = HMAP_INITIALIZER (s->cells),
4067 for (a = 0; a < PIVOT_N_AXES; a++)
4070 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4072 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4073 for (size_t i = 0; i < nest->n; i++)
4074 hmap_init (&s->occurrences[a][i]);
4076 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4077 hmap_init (&s->areas[at]);
4082 ctpo_add (double a, double b)
4088 ctpo_sub (double a, double b)
4094 ctpo_mul (double a, double b)
4100 ctpo_div (double a, double b)
4102 return b ? a / b : SYSMIS;
4106 ctpo_pow (double a, double b)
4108 int save_errno = errno;
4110 double result = pow (a, b);
4118 ctpo_neg (double a, double b UNUSED)
4123 struct ctables_pcexpr_evaluate_ctx
4125 const struct ctables_cell *cell;
4126 const struct ctables_section *section;
4127 const struct ctables_categories *cats;
4128 enum pivot_axis_type pc_a;
4131 enum fmt_type parse_format;
4134 static double ctables_pcexpr_evaluate (
4135 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4138 ctables_pcexpr_evaluate_nonterminal (
4139 const struct ctables_pcexpr_evaluate_ctx *ctx,
4140 const struct ctables_pcexpr *e, size_t n_args,
4141 double evaluate (double, double))
4143 double args[2] = { 0, 0 };
4144 for (size_t i = 0; i < n_args; i++)
4146 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4147 if (!isfinite (args[i]) || args[i] == SYSMIS)
4150 return evaluate (args[0], args[1]);
4154 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4155 const struct ctables_cell_value *pc_cv)
4157 const struct ctables_section *s = ctx->section;
4160 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4162 const struct ctables_nest *nest = s->nests[a];
4163 for (size_t i = 0; i < nest->n; i++)
4164 if (i != nest->scale_idx)
4166 const struct ctables_cell_value *cv
4167 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4168 : &ctx->cell->axes[a].cvs[i]);
4169 hash = hash_pointer (cv->category, hash);
4170 if (cv->category->type != CCT_TOTAL
4171 && cv->category->type != CCT_SUBTOTAL
4172 && cv->category->type != CCT_POSTCOMPUTE)
4173 hash = value_hash (&cv->value,
4174 var_get_width (nest->vars[i]), hash);
4178 struct ctables_cell *tc;
4179 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4181 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4183 const struct ctables_nest *nest = s->nests[a];
4184 for (size_t i = 0; i < nest->n; i++)
4185 if (i != nest->scale_idx)
4187 const struct ctables_cell_value *p_cv
4188 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4189 : &ctx->cell->axes[a].cvs[i]);
4190 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4191 if (p_cv->category != t_cv->category
4192 || (p_cv->category->type != CCT_TOTAL
4193 && p_cv->category->type != CCT_SUBTOTAL
4194 && p_cv->category->type != CCT_POSTCOMPUTE
4195 && !value_equal (&p_cv->value,
4197 var_get_width (nest->vars[i]))))
4209 const struct ctables_table *t = s->table;
4210 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4211 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4212 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
4213 &specs->specs[ctx->summary_idx]);
4217 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4218 const struct ctables_pcexpr *e)
4225 case CTPO_CAT_NRANGE:
4226 case CTPO_CAT_SRANGE:
4227 case CTPO_CAT_MISSING:
4228 case CTPO_CAT_OTHERNM:
4230 struct ctables_cell_value cv = {
4231 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4233 assert (cv.category != NULL);
4235 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4236 const struct ctables_occurrence *o;
4239 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4240 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4241 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4243 cv.value = o->value;
4244 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4249 case CTPO_CAT_NUMBER:
4250 case CTPO_CAT_SUBTOTAL:
4251 case CTPO_CAT_TOTAL:
4253 struct ctables_cell_value cv = {
4254 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4255 .value = { .f = e->number },
4257 assert (cv.category != NULL);
4258 return ctables_pcexpr_evaluate_category (ctx, &cv);
4261 case CTPO_CAT_STRING:
4263 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4265 if (width > e->string.length)
4267 s = xmalloc (width);
4268 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4271 const struct ctables_category *category
4272 = ctables_find_category_for_postcompute (
4273 ctx->section->table->ctables->dict,
4274 ctx->cats, ctx->parse_format, e);
4275 assert (category != NULL);
4277 struct ctables_cell_value cv = { .category = category };
4278 if (category->type == CCT_NUMBER)
4279 cv.value.f = category->number;
4280 else if (category->type == CCT_STRING)
4281 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
4285 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4291 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4294 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4297 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4300 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4303 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4306 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4312 static const struct ctables_category *
4313 ctables_cell_postcompute (const struct ctables_section *s,
4314 const struct ctables_cell *cell,
4315 enum pivot_axis_type *pc_a_p,
4318 assert (cell->postcompute);
4319 const struct ctables_category *pc_cat = NULL;
4320 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4321 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4323 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4324 if (cv->category->type == CCT_POSTCOMPUTE)
4328 /* Multiple postcomputes cross each other. The value is
4333 pc_cat = cv->category;
4337 *pc_a_idx_p = pc_a_idx;
4341 assert (pc_cat != NULL);
4346 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4347 const struct ctables_cell *cell,
4348 const struct ctables_summary_spec *ss,
4349 struct fmt_spec *format,
4350 bool *is_ctables_format,
4353 enum pivot_axis_type pc_a = 0;
4354 size_t pc_a_idx = 0;
4355 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4356 s, cell, &pc_a, &pc_a_idx);
4360 const struct ctables_postcompute *pc = pc_cat->pc;
4363 for (size_t i = 0; i < pc->specs->n; i++)
4365 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4366 if (ss->function == ss2->function
4367 && ss->weighting == ss2->weighting
4368 && ss->calc_area == ss2->calc_area
4369 && ss->percentile == ss2->percentile)
4371 *format = ss2->format;
4372 *is_ctables_format = ss2->is_ctables_format;
4378 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4379 const struct ctables_categories *cats = s->table->categories[
4380 var_get_dict_index (var)];
4381 struct ctables_pcexpr_evaluate_ctx ctx = {
4386 .pc_a_idx = pc_a_idx,
4387 .summary_idx = summary_idx,
4388 .parse_format = pc_cat->parse_format,
4390 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4394 ctables_format (double d, const struct fmt_spec *format,
4395 const struct fmt_settings *settings)
4397 const union value v = { .f = d };
4398 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4400 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4401 produce the results we want for negative numbers, putting the negative
4402 sign in the wrong spot, before the prefix instead of after it. We can't,
4403 in fact, produce the desired results using a custom-currency
4404 specification. Instead, we postprocess the output, moving the negative
4407 NEQUAL: "-N=3" => "N=-3"
4408 PAREN: "-(3)" => "(-3)"
4409 PCTPAREN: "-(3%)" => "(-3%)"
4411 This transformation doesn't affect NEGPAREN. */
4412 char *minus_src = strchr (s, '-');
4413 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4415 char *n_equals = strstr (s, "N=");
4416 char *lparen = strchr (s, '(');
4417 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4419 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4425 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4427 for (size_t i = 0; i < t->stacks[a].n; i++)
4429 struct ctables_nest *nest = &t->stacks[a].nests[i];
4430 if (nest->n != 1 || nest->scale_idx != 0)
4433 enum ctables_vlabel vlabel
4434 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4435 if (vlabel != CTVL_NONE)
4442 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4444 struct pivot_table *pt = pivot_table_create__ (
4446 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4447 : pivot_value_new_text (N_("Custom Tables"))),
4450 pivot_table_set_caption (
4451 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4453 pivot_table_set_corner_text (
4454 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4456 bool summary_dimension = (t->summary_axis != t->slabels_axis
4457 || (!t->slabels_visible
4458 && t->summary_specs.n > 1));
4459 if (summary_dimension)
4461 struct pivot_dimension *d = pivot_dimension_create (
4462 pt, t->slabels_axis, N_("Statistics"));
4463 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4464 if (!t->slabels_visible)
4465 d->hide_all_labels = true;
4466 for (size_t i = 0; i < specs->n; i++)
4467 pivot_category_create_leaf (
4468 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4471 bool categories_dimension = t->clabels_example != NULL;
4472 if (categories_dimension)
4474 struct pivot_dimension *d = pivot_dimension_create (
4475 pt, t->label_axis[t->clabels_from_axis],
4476 t->clabels_from_axis == PIVOT_AXIS_ROW
4477 ? N_("Row Categories")
4478 : N_("Column Categories"));
4479 const struct variable *var = t->clabels_example;
4480 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4481 for (size_t i = 0; i < t->n_clabels_values; i++)
4483 const struct ctables_value *value = t->clabels_values[i];
4484 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4485 assert (cat != NULL);
4486 pivot_category_create_leaf (
4487 d->root, ctables_category_create_value_label (c, cat,
4493 pivot_table_set_look (pt, ct->look);
4494 struct pivot_dimension *d[PIVOT_N_AXES];
4495 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4497 static const char *names[] = {
4498 [PIVOT_AXIS_ROW] = N_("Rows"),
4499 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4500 [PIVOT_AXIS_LAYER] = N_("Layers"),
4502 d[a] = (t->axes[a] || a == t->summary_axis
4503 ? pivot_dimension_create (pt, a, names[a])
4508 assert (t->axes[a]);
4510 for (size_t i = 0; i < t->stacks[a].n; i++)
4512 struct ctables_nest *nest = &t->stacks[a].nests[i];
4513 struct ctables_section **sections = xnmalloc (t->n_sections,
4515 size_t n_sections = 0;
4517 size_t n_total_cells = 0;
4518 size_t max_depth = 0;
4519 for (size_t j = 0; j < t->n_sections; j++)
4520 if (t->sections[j].nests[a] == nest)
4522 struct ctables_section *s = &t->sections[j];
4523 sections[n_sections++] = s;
4524 n_total_cells += hmap_count (&s->cells);
4526 size_t depth = s->nests[a]->n;
4527 max_depth = MAX (depth, max_depth);
4530 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4532 size_t n_sorted = 0;
4534 for (size_t j = 0; j < n_sections; j++)
4536 struct ctables_section *s = sections[j];
4538 struct ctables_cell *cell;
4539 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4541 sorted[n_sorted++] = cell;
4542 assert (n_sorted <= n_total_cells);
4545 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4546 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4548 struct ctables_level
4550 enum ctables_level_type
4552 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4553 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4554 CTL_SUMMARY, /* Summary functions. */
4558 enum settings_value_show vlabel; /* CTL_VAR only. */
4561 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4562 size_t n_levels = 0;
4563 for (size_t k = 0; k < nest->n; k++)
4565 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4566 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4568 if (vlabel != CTVL_NONE)
4570 levels[n_levels++] = (struct ctables_level) {
4572 .vlabel = (enum settings_value_show) vlabel,
4577 if (nest->scale_idx != k
4578 && (k != nest->n - 1 || t->label_axis[a] == a))
4580 levels[n_levels++] = (struct ctables_level) {
4581 .type = CTL_CATEGORY,
4587 if (!summary_dimension && a == t->slabels_axis)
4589 levels[n_levels++] = (struct ctables_level) {
4590 .type = CTL_SUMMARY,
4591 .var_idx = SIZE_MAX,
4595 /* Pivot categories:
4597 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4598 - category for nest->vars[0], if nest->scale_idx != 0
4599 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4600 - category for nest->vars[1], if nest->scale_idx != 1
4602 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4603 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4604 - summary function, if 'a == t->slabels_axis && a ==
4607 Additional dimensions:
4609 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4611 - If 't->label_axis[b] == a' for some 'b != a', add a category
4616 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4618 for (size_t j = 0; j < n_sorted; j++)
4620 struct ctables_cell *cell = sorted[j];
4621 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4623 size_t n_common = 0;
4626 for (; n_common < n_levels; n_common++)
4628 const struct ctables_level *level = &levels[n_common];
4629 if (level->type == CTL_CATEGORY)
4631 size_t var_idx = level->var_idx;
4632 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4633 if (prev->axes[a].cvs[var_idx].category != c)
4635 else if (c->type != CCT_SUBTOTAL
4636 && c->type != CCT_TOTAL
4637 && c->type != CCT_POSTCOMPUTE
4638 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4639 &cell->axes[a].cvs[var_idx].value,
4640 var_get_type (nest->vars[var_idx])))
4646 for (size_t k = n_common; k < n_levels; k++)
4648 const struct ctables_level *level = &levels[k];
4649 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4650 if (level->type == CTL_SUMMARY)
4652 assert (k == n_levels - 1);
4654 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4655 for (size_t m = 0; m < specs->n; m++)
4657 int leaf = pivot_category_create_leaf (
4658 parent, ctables_summary_label (&specs->specs[m],
4666 const struct variable *var = nest->vars[level->var_idx];
4667 struct pivot_value *label;
4668 if (level->type == CTL_VAR)
4670 label = pivot_value_new_variable (var);
4671 label->variable.show = level->vlabel;
4673 else if (level->type == CTL_CATEGORY)
4675 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4676 label = ctables_category_create_value_label (
4677 t->categories[var_get_dict_index (var)],
4678 cv->category, var, &cv->value);
4683 if (k == n_levels - 1)
4684 prev_leaf = pivot_category_create_leaf (parent, label);
4686 groups[k] = pivot_category_create_group__ (parent, label);
4690 cell->axes[a].leaf = prev_leaf;
4699 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4703 size_t n_total_cells = 0;
4704 for (size_t j = 0; j < t->n_sections; j++)
4705 n_total_cells += hmap_count (&t->sections[j].cells);
4707 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4708 size_t n_sorted = 0;
4709 for (size_t j = 0; j < t->n_sections; j++)
4711 const struct ctables_section *s = &t->sections[j];
4712 struct ctables_cell *cell;
4713 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4715 sorted[n_sorted++] = cell;
4717 assert (n_sorted <= n_total_cells);
4718 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4720 size_t ids[N_CTATS];
4721 memset (ids, 0, sizeof ids);
4722 for (size_t j = 0; j < n_sorted; j++)
4724 struct ctables_cell *cell = sorted[j];
4725 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4727 struct ctables_area *area = cell->areas[at];
4728 if (!area->sequence)
4729 area->sequence = ++ids[at];
4736 for (size_t i = 0; i < t->n_sections; i++)
4738 struct ctables_section *s = &t->sections[i];
4740 struct ctables_cell *cell;
4741 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4746 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4747 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4748 for (size_t j = 0; j < specs->n; j++)
4751 size_t n_dindexes = 0;
4753 if (summary_dimension)
4754 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4756 if (categories_dimension)
4758 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4759 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4760 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4761 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4764 dindexes[n_dindexes++] = ctv->leaf;
4767 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4770 int leaf = cell->axes[a].leaf;
4771 if (a == t->summary_axis && !summary_dimension)
4773 dindexes[n_dindexes++] = leaf;
4776 const struct ctables_summary_spec *ss = &specs->specs[j];
4778 struct fmt_spec format = specs->specs[j].format;
4779 bool is_ctables_format = ss->is_ctables_format;
4780 double d = (cell->postcompute
4781 ? ctables_cell_calculate_postcompute (
4782 s, cell, ss, &format, &is_ctables_format, j)
4783 : ctables_summary_value (cell->areas,
4784 &cell->summaries[j], ss));
4786 struct pivot_value *value;
4787 if (ct->hide_threshold != 0
4788 && d < ct->hide_threshold
4789 && ss->function == CTSF_COUNT)
4791 value = pivot_value_new_user_text_nocopy (
4792 xasprintf ("<%d", ct->hide_threshold));
4794 else if (d == 0 && ct->zero)
4795 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4796 else if (d == SYSMIS && ct->missing)
4797 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4798 else if (is_ctables_format)
4799 value = pivot_value_new_user_text_nocopy (
4800 ctables_format (d, &format, &ct->ctables_formats));
4803 value = pivot_value_new_number (d);
4804 value->numeric.format = format;
4806 /* XXX should text values be right-justified? */
4807 pivot_table_put (pt, dindexes, n_dindexes, value);
4812 pivot_table_submit (pt);
4816 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4818 enum pivot_axis_type label_pos = t->label_axis[a];
4822 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4823 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4825 const struct ctables_stack *stack = &t->stacks[a];
4829 const struct ctables_nest *n0 = &stack->nests[0];
4832 assert (stack->n == 1);
4836 const struct variable *v0 = n0->vars[n0->n - 1];
4837 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4838 t->clabels_example = v0;
4840 for (size_t i = 0; i < c0->n_cats; i++)
4841 if (c0->cats[i].type == CCT_FUNCTION)
4843 msg (SE, _("%s=%s is not allowed with sorting based "
4844 "on a summary function."),
4845 subcommand_name, pos_name);
4848 if (n0->n - 1 == n0->scale_idx)
4850 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4851 "but %s is a scale variable."),
4852 subcommand_name, pos_name, var_get_name (v0));
4856 for (size_t i = 1; i < stack->n; i++)
4858 const struct ctables_nest *ni = &stack->nests[i];
4860 const struct variable *vi = ni->vars[ni->n - 1];
4861 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4863 if (ni->n - 1 == ni->scale_idx)
4865 msg (SE, _("%s=%s requires the variables to be moved to be "
4866 "categorical, but %s is a scale variable."),
4867 subcommand_name, pos_name, var_get_name (vi));
4870 if (var_get_width (v0) != var_get_width (vi))
4872 msg (SE, _("%s=%s requires the variables to be "
4873 "moved to have the same width, but %s has "
4874 "width %d and %s has width %d."),
4875 subcommand_name, pos_name,
4876 var_get_name (v0), var_get_width (v0),
4877 var_get_name (vi), var_get_width (vi));
4880 if (!val_labs_equal (var_get_value_labels (v0),
4881 var_get_value_labels (vi)))
4883 msg (SE, _("%s=%s requires the variables to be "
4884 "moved to have the same value labels, but %s "
4885 "and %s have different value labels."),
4886 subcommand_name, pos_name,
4887 var_get_name (v0), var_get_name (vi));
4890 if (!ctables_categories_equal (c0, ci))
4892 msg (SE, _("%s=%s requires the variables to be "
4893 "moved to have the same category "
4894 "specifications, but %s and %s have different "
4895 "category specifications."),
4896 subcommand_name, pos_name,
4897 var_get_name (v0), var_get_name (vi));
4906 add_sum_var (struct variable *var,
4907 struct variable ***sum_vars, size_t *n, size_t *allocated)
4909 for (size_t i = 0; i < *n; i++)
4910 if (var == (*sum_vars)[i])
4913 if (*n >= *allocated)
4914 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4915 (*sum_vars)[*n] = var;
4919 static enum ctables_area_type
4920 rotate_area (enum ctables_area_type area)
4931 return CTAT_LAYERCOL;
4934 return CTAT_LAYERROW;
4947 enumerate_sum_vars (const struct ctables_axis *a,
4948 struct variable ***sum_vars, size_t *n, size_t *allocated)
4956 for (size_t i = 0; i < N_CSVS; i++)
4957 for (size_t j = 0; j < a->specs[i].n; j++)
4959 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4960 if (spec->function == CTSF_areaPCT_SUM)
4961 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4967 for (size_t i = 0; i < 2; i++)
4968 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4974 ctables_prepare_table (struct ctables_table *t)
4976 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4979 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4981 for (size_t j = 0; j < t->stacks[a].n; j++)
4983 struct ctables_nest *nest = &t->stacks[a].nests[j];
4984 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4986 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4987 nest->n_areas[at] = 0;
4989 enum pivot_axis_type ata, atb;
4990 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4992 ata = PIVOT_AXIS_ROW;
4993 atb = PIVOT_AXIS_COLUMN;
4995 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4997 ata = PIVOT_AXIS_COLUMN;
4998 atb = PIVOT_AXIS_ROW;
5001 if (at == CTAT_LAYER
5002 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5003 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5004 ? a == atb && t->label_axis[a] != a
5007 for (size_t k = nest->n - 1; k < nest->n; k--)
5008 if (k != nest->scale_idx)
5010 nest->areas[at][nest->n_areas[at]++] = k;
5016 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5017 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5018 : at == CTAT_TABLE ? true
5022 for (size_t k = 0; k < nest->n; k++)
5023 if (k != nest->scale_idx)
5024 nest->areas[at][nest->n_areas[at]++] = k;
5030 #define L PIVOT_AXIS_LAYER
5031 n_drop = (t->clabels_from_axis == L ? a != L
5032 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5033 : t->clabels_from_axis == a ? 2
5040 n_drop = a == ata && t->label_axis[ata] == atb;
5045 n_drop = (a == ata ? t->label_axis[ata] == atb
5047 : t->clabels_from_axis == atb ? -1
5048 : t->clabels_to_axis != atb ? 1
5060 size_t n = nest->n_areas[at];
5063 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5064 nest->n_areas[at]--;
5069 for (int i = 0; i < n_drop; i++)
5070 if (nest->n_areas[at] > 0)
5071 nest->n_areas[at]--;
5078 struct ctables_nest *nest = xmalloc (sizeof *nest);
5079 *nest = (struct ctables_nest) {
5081 .scale_idx = SIZE_MAX,
5082 .summary_idx = SIZE_MAX
5084 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5086 /* There's no point in moving labels away from an axis that has no
5087 labels, so avoid dealing with the special cases around that. */
5088 t->label_axis[a] = a;
5091 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5092 for (size_t i = 0; i < stack->n; i++)
5094 struct ctables_nest *nest = &stack->nests[i];
5095 if (!nest->specs[CSV_CELL].n)
5097 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5098 ss->specs = xmalloc (sizeof *ss->specs);
5101 enum ctables_summary_function function
5102 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5106 nest->summary_idx = nest->n - 1;
5107 ss->var = nest->vars[nest->summary_idx];
5109 *ss->specs = (struct ctables_summary_spec) {
5110 .function = function,
5111 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5112 .format = ctables_summary_default_format (function, ss->var),
5115 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5116 &nest->specs[CSV_CELL]);
5118 else if (!nest->specs[CSV_TOTAL].n)
5119 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5120 &nest->specs[CSV_CELL]);
5122 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5123 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5125 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5126 for (size_t i = 0; i < nest->specs[sv].n; i++)
5128 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5129 const struct ctables_function_info *cfi =
5130 &ctables_function_info[ss->function];
5132 ss->calc_area = rotate_area (ss->calc_area);
5136 if (t->ctables->smissing_listwise)
5138 struct variable **listwise_vars = NULL;
5140 size_t allocated = 0;
5142 for (size_t j = nest->group_head; j < stack->n; j++)
5144 const struct ctables_nest *other_nest = &stack->nests[j];
5145 if (other_nest->group_head != nest->group_head)
5148 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5151 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5152 sizeof *listwise_vars);
5153 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5156 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5159 listwise_vars = xmemdup (listwise_vars,
5160 n * sizeof *listwise_vars);
5161 nest->specs[sv].listwise_vars = listwise_vars;
5162 nest->specs[sv].n_listwise_vars = n;
5167 struct ctables_summary_spec_set *merged = &t->summary_specs;
5168 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5170 for (size_t j = 0; j < stack->n; j++)
5172 const struct ctables_nest *nest = &stack->nests[j];
5174 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5175 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5180 struct merge_item min = items[0];
5181 for (size_t j = 1; j < n_left; j++)
5182 if (merge_item_compare_3way (&items[j], &min) < 0)
5185 if (merged->n >= merged->allocated)
5186 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5187 sizeof *merged->specs);
5188 merged->specs[merged->n++] = min.set->specs[min.ofs];
5190 for (size_t j = 0; j < n_left; )
5192 if (merge_item_compare_3way (&items[j], &min) == 0)
5194 struct merge_item *item = &items[j];
5195 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5196 if (++item->ofs >= item->set->n)
5198 items[j] = items[--n_left];
5207 size_t allocated_sum_vars = 0;
5208 enumerate_sum_vars (t->axes[t->summary_axis],
5209 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5211 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5212 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5216 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5217 enum pivot_axis_type a)
5219 struct ctables_stack *stack = &t->stacks[a];
5220 for (size_t i = 0; i < stack->n; i++)
5222 const struct ctables_nest *nest = &stack->nests[i];
5223 const struct variable *var = nest->vars[nest->n - 1];
5224 const union value *value = case_data (c, var);
5226 if (var_is_numeric (var) && value->f == SYSMIS)
5229 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5231 ctables_value_insert (t, value, var_get_width (var));
5236 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5238 const struct ctables_value *const *ap = a_;
5239 const struct ctables_value *const *bp = b_;
5240 const struct ctables_value *a = *ap;
5241 const struct ctables_value *b = *bp;
5242 const int *width = width_;
5243 return value_compare_3way (&a->value, &b->value, *width);
5247 ctables_sort_clabels_values (struct ctables_table *t)
5249 const struct variable *v0 = t->clabels_example;
5250 int width = var_get_width (v0);
5252 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5255 const struct val_labs *val_labs = var_get_value_labels (v0);
5256 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5257 vl = val_labs_next (val_labs, vl))
5258 if (ctables_categories_match (c0, &vl->value, v0))
5259 ctables_value_insert (t, &vl->value, width);
5262 size_t n = hmap_count (&t->clabels_values_map);
5263 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5265 struct ctables_value *clv;
5267 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5268 t->clabels_values[i++] = clv;
5269 t->n_clabels_values = n;
5272 sort (t->clabels_values, n, sizeof *t->clabels_values,
5273 compare_clabels_values_3way, &width);
5275 for (size_t i = 0; i < n; i++)
5276 t->clabels_values[i]->leaf = i;
5280 ctables_add_category_occurrences (const struct variable *var,
5281 struct hmap *occurrences,
5282 const struct ctables_categories *cats)
5284 const struct val_labs *val_labs = var_get_value_labels (var);
5286 for (size_t i = 0; i < cats->n_cats; i++)
5288 const struct ctables_category *c = &cats->cats[i];
5292 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5298 int width = var_get_width (var);
5300 value_init (&value, width);
5301 value_copy_buf_rpad (&value, width,
5302 CHAR_CAST (uint8_t *, c->string.string),
5303 c->string.length, ' ');
5304 ctables_add_occurrence (var, &value, occurrences);
5305 value_destroy (&value, width);
5310 assert (var_is_numeric (var));
5311 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5312 vl = val_labs_next (val_labs, vl))
5313 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5314 ctables_add_occurrence (var, &vl->value, occurrences);
5318 assert (var_is_alpha (var));
5319 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5320 vl = val_labs_next (val_labs, vl))
5321 if (in_string_range (&vl->value, var, c->srange))
5322 ctables_add_occurrence (var, &vl->value, occurrences);
5326 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5327 vl = val_labs_next (val_labs, vl))
5328 if (var_is_value_missing (var, &vl->value))
5329 ctables_add_occurrence (var, &vl->value, occurrences);
5333 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5334 vl = val_labs_next (val_labs, vl))
5335 ctables_add_occurrence (var, &vl->value, occurrences);
5338 case CCT_POSTCOMPUTE:
5348 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5349 vl = val_labs_next (val_labs, vl))
5350 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5351 ctables_add_occurrence (var, &vl->value, occurrences);
5354 case CCT_EXCLUDED_MISSING:
5361 ctables_section_recurse_add_empty_categories (
5362 struct ctables_section *s,
5363 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5364 enum pivot_axis_type a, size_t a_idx)
5366 if (a >= PIVOT_N_AXES)
5367 ctables_cell_insert__ (s, c, cats);
5368 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5369 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5372 const struct variable *var = s->nests[a]->vars[a_idx];
5373 const struct ctables_categories *categories = s->table->categories[
5374 var_get_dict_index (var)];
5375 int width = var_get_width (var);
5376 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5377 const struct ctables_occurrence *o;
5378 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5380 union value *value = case_data_rw (c, var);
5381 value_destroy (value, width);
5382 value_clone (value, &o->value, width);
5383 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5384 assert (cats[a][a_idx] != NULL);
5385 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5388 for (size_t i = 0; i < categories->n_cats; i++)
5390 const struct ctables_category *cat = &categories->cats[i];
5391 if (cat->type == CCT_POSTCOMPUTE)
5393 cats[a][a_idx] = cat;
5394 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5401 ctables_section_add_empty_categories (struct ctables_section *s)
5403 bool show_empty = false;
5404 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5406 for (size_t k = 0; k < s->nests[a]->n; k++)
5407 if (k != s->nests[a]->scale_idx)
5409 const struct variable *var = s->nests[a]->vars[k];
5410 const struct ctables_categories *cats = s->table->categories[
5411 var_get_dict_index (var)];
5412 if (cats->show_empty)
5415 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5421 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5422 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5423 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5424 const struct ctables_category **cats[PIVOT_N_AXES] =
5426 [PIVOT_AXIS_LAYER] = layer_cats,
5427 [PIVOT_AXIS_ROW] = row_cats,
5428 [PIVOT_AXIS_COLUMN] = column_cats,
5430 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5431 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5436 ctables_section_clear (struct ctables_section *s)
5438 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5440 const struct ctables_nest *nest = s->nests[a];
5441 for (size_t i = 0; i < nest->n; i++)
5442 if (i != nest->scale_idx)
5444 const struct variable *var = nest->vars[i];
5445 int width = var_get_width (var);
5446 struct ctables_occurrence *o, *next;
5447 struct hmap *map = &s->occurrences[a][i];
5448 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5450 value_destroy (&o->value, width);
5451 hmap_delete (map, &o->node);
5458 struct ctables_cell *cell, *next_cell;
5459 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5461 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5463 const struct ctables_nest *nest = s->nests[a];
5464 for (size_t i = 0; i < nest->n; i++)
5465 if (i != nest->scale_idx)
5466 value_destroy (&cell->axes[a].cvs[i].value,
5467 var_get_width (nest->vars[i]));
5468 free (cell->axes[a].cvs);
5471 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5472 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5473 for (size_t i = 0; i < specs->n; i++)
5474 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5475 free (cell->summaries);
5477 hmap_delete (&s->cells, &cell->node);
5480 hmap_shrink (&s->cells);
5482 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5484 struct ctables_area *area, *next_area;
5485 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5489 hmap_delete (&s->areas[at], &area->node);
5492 hmap_shrink (&s->areas[at]);
5497 ctables_section_uninit (struct ctables_section *s)
5499 ctables_section_clear (s);
5501 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5503 struct ctables_nest *nest = s->nests[a];
5504 for (size_t i = 0; i < nest->n; i++)
5505 hmap_destroy (&s->occurrences[a][i]);
5506 free (s->occurrences[a]);
5509 hmap_destroy (&s->cells);
5510 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5511 hmap_destroy (&s->areas[at]);
5515 ctables_table_clear (struct ctables_table *t)
5517 for (size_t i = 0; i < t->n_sections; i++)
5518 ctables_section_clear (&t->sections[i]);
5520 if (t->clabels_example)
5522 int width = var_get_width (t->clabels_example);
5523 struct ctables_value *value, *next_value;
5524 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5525 &t->clabels_values_map)
5527 value_destroy (&value->value, width);
5528 hmap_delete (&t->clabels_values_map, &value->node);
5531 hmap_shrink (&t->clabels_values_map);
5533 free (t->clabels_values);
5534 t->clabels_values = NULL;
5535 t->n_clabels_values = 0;
5540 ctables_execute (struct dataset *ds, struct casereader *input,
5543 for (size_t i = 0; i < ct->n_tables; i++)
5545 struct ctables_table *t = ct->tables[i];
5546 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5547 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5548 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5549 sizeof *t->sections);
5550 size_t ix[PIVOT_N_AXES];
5551 ctables_table_add_section (t, 0, ix);
5554 struct dictionary *dict = dataset_dict (ds);
5556 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5557 struct casegrouper *grouper
5559 ? casegrouper_create_splits (input, dict)
5560 : casegrouper_create_vars (input, NULL, 0));
5561 struct casereader *group;
5562 while (casegrouper_get_next_group (grouper, &group))
5566 struct ccase *c = casereader_peek (group, 0);
5569 output_split_file_values (ds, c);
5574 bool warn_on_invalid = true;
5575 for (struct ccase *c = casereader_read (group); c;
5576 case_unref (c), c = casereader_read (group))
5578 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5579 double e_weight = (ct->e_weight
5580 ? var_force_valid_weight (ct->e_weight,
5581 case_num (c, ct->e_weight),
5585 [CTW_DICTIONARY] = d_weight,
5586 [CTW_EFFECTIVE] = e_weight,
5587 [CTW_UNWEIGHTED] = 1.0,
5590 for (size_t i = 0; i < ct->n_tables; i++)
5592 struct ctables_table *t = ct->tables[i];
5594 for (size_t j = 0; j < t->n_sections; j++)
5595 ctables_cell_insert (&t->sections[j], c, weight);
5597 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5598 if (t->label_axis[a] != a)
5599 ctables_insert_clabels_values (t, c, a);
5602 casereader_destroy (group);
5604 for (size_t i = 0; i < ct->n_tables; i++)
5606 struct ctables_table *t = ct->tables[i];
5608 if (t->clabels_example)
5609 ctables_sort_clabels_values (t);
5611 for (size_t j = 0; j < t->n_sections; j++)
5612 ctables_section_add_empty_categories (&t->sections[j]);
5614 ctables_table_output (ct, t);
5615 ctables_table_clear (t);
5618 return casegrouper_destroy (grouper);
5621 static struct ctables_postcompute *
5622 ctables_find_postcompute (struct ctables *ct, const char *name)
5624 struct ctables_postcompute *pc;
5625 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5626 utf8_hash_case_string (name, 0), &ct->postcomputes)
5627 if (!utf8_strcasecmp (pc->name, name))
5633 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5636 int pcompute_start = lex_ofs (lexer) - 1;
5638 if (!lex_match (lexer, T_AND))
5640 lex_error_expecting (lexer, "&");
5643 if (!lex_force_id (lexer))
5646 char *name = ss_xstrdup (lex_tokss (lexer));
5649 if (!lex_force_match (lexer, T_EQUALS)
5650 || !lex_force_match_id (lexer, "EXPR")
5651 || !lex_force_match (lexer, T_LPAREN))
5657 int expr_start = lex_ofs (lexer);
5658 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5659 int expr_end = lex_ofs (lexer) - 1;
5660 if (!expr || !lex_force_match (lexer, T_RPAREN))
5662 ctables_pcexpr_destroy (expr);
5666 int pcompute_end = lex_ofs (lexer) - 1;
5668 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5671 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5674 msg_at (SW, location, _("New definition of &%s will override the "
5675 "previous definition."),
5677 msg_at (SN, pc->location, _("This is the previous definition."));
5679 ctables_pcexpr_destroy (pc->expr);
5680 msg_location_destroy (pc->location);
5685 pc = xmalloc (sizeof *pc);
5686 *pc = (struct ctables_postcompute) { .name = name };
5687 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5688 utf8_hash_case_string (pc->name, 0));
5691 pc->location = location;
5693 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5698 ctables_parse_pproperties_format (struct lexer *lexer,
5699 struct ctables_summary_spec_set *sss)
5701 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5703 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5704 && !(lex_token (lexer) == T_ID
5705 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5706 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5707 lex_tokss (lexer)))))
5709 /* Parse function. */
5710 enum ctables_summary_function function;
5711 enum ctables_weighting weighting;
5712 enum ctables_area_type area;
5713 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5716 /* Parse percentile. */
5717 double percentile = 0;
5718 if (function == CTSF_PTILE)
5720 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5722 percentile = lex_number (lexer);
5727 struct fmt_spec format;
5728 bool is_ctables_format;
5729 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5732 if (sss->n >= sss->allocated)
5733 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5734 sizeof *sss->specs);
5735 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5736 .function = function,
5737 .weighting = weighting,
5740 .percentile = percentile,
5742 .is_ctables_format = is_ctables_format,
5748 ctables_summary_spec_set_uninit (sss);
5753 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5755 struct ctables_postcompute **pcs = NULL;
5757 size_t allocated_pcs = 0;
5759 while (lex_match (lexer, T_AND))
5761 if (!lex_force_id (lexer))
5763 struct ctables_postcompute *pc
5764 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5767 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5772 if (n_pcs >= allocated_pcs)
5773 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5777 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5779 if (lex_match_id (lexer, "LABEL"))
5781 lex_match (lexer, T_EQUALS);
5782 if (!lex_force_string (lexer))
5785 for (size_t i = 0; i < n_pcs; i++)
5787 free (pcs[i]->label);
5788 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5793 else if (lex_match_id (lexer, "FORMAT"))
5795 lex_match (lexer, T_EQUALS);
5797 struct ctables_summary_spec_set sss;
5798 if (!ctables_parse_pproperties_format (lexer, &sss))
5801 for (size_t i = 0; i < n_pcs; i++)
5804 ctables_summary_spec_set_uninit (pcs[i]->specs);
5806 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5807 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5809 ctables_summary_spec_set_uninit (&sss);
5811 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5813 lex_match (lexer, T_EQUALS);
5814 bool hide_source_cats;
5815 if (!parse_bool (lexer, &hide_source_cats))
5817 for (size_t i = 0; i < n_pcs; i++)
5818 pcs[i]->hide_source_cats = hide_source_cats;
5822 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5835 put_strftime (struct string *out, time_t now, const char *format)
5837 const struct tm *tm = localtime (&now);
5839 strftime (value, sizeof value, format, tm);
5840 ds_put_cstr (out, value);
5844 skip_prefix (struct substring *s, struct substring prefix)
5846 if (ss_starts_with (*s, prefix))
5848 ss_advance (s, prefix.length);
5856 put_table_expression (struct string *out, struct lexer *lexer,
5857 struct dictionary *dict, int expr_start, int expr_end)
5860 for (int ofs = expr_start; ofs < expr_end; ofs++)
5862 const struct token *t = lex_ofs_token (lexer, ofs);
5863 if (t->type == T_LBRACK)
5865 else if (t->type == T_RBRACK && nest > 0)
5871 else if (t->type == T_ID)
5873 const struct variable *var
5874 = dict_lookup_var (dict, t->string.string);
5875 const char *label = var ? var_get_label (var) : NULL;
5876 ds_put_cstr (out, label ? label : t->string.string);
5880 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5881 ds_put_byte (out, ' ');
5883 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5884 ds_put_cstr (out, repr);
5887 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5888 ds_put_byte (out, ' ');
5894 put_title_text (struct string *out, struct substring in, time_t now,
5895 struct lexer *lexer, struct dictionary *dict,
5896 int expr_start, int expr_end)
5900 size_t chunk = ss_find_byte (in, ')');
5901 ds_put_substring (out, ss_head (in, chunk));
5902 ss_advance (&in, chunk);
5903 if (ss_is_empty (in))
5906 if (skip_prefix (&in, ss_cstr (")DATE")))
5907 put_strftime (out, now, "%x");
5908 else if (skip_prefix (&in, ss_cstr (")TIME")))
5909 put_strftime (out, now, "%X");
5910 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5911 put_table_expression (out, lexer, dict, expr_start, expr_end);
5914 ds_put_byte (out, ')');
5915 ss_advance (&in, 1);
5921 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5923 struct casereader *input = NULL;
5925 struct measure_guesser *mg = measure_guesser_create (ds);
5928 input = proc_open (ds);
5929 measure_guesser_run (mg, input);
5930 measure_guesser_destroy (mg);
5933 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5934 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5935 enum settings_value_show tvars = settings_get_show_variables ();
5936 for (size_t i = 0; i < n_vars; i++)
5937 vlabels[i] = (enum ctables_vlabel) tvars;
5939 struct pivot_table_look *look = pivot_table_look_unshare (
5940 pivot_table_look_ref (pivot_table_look_get_default ()));
5941 look->omit_empty = false;
5943 struct ctables *ct = xmalloc (sizeof *ct);
5944 *ct = (struct ctables) {
5945 .dict = dataset_dict (ds),
5947 .ctables_formats = FMT_SETTINGS_INIT,
5949 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5952 time_t now = time (NULL);
5957 const char *dot_string;
5958 const char *comma_string;
5960 static const struct ctf ctfs[4] = {
5961 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5962 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5963 { CTEF_PAREN, "-,(,),", "-.(.)." },
5964 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5966 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5967 for (size_t i = 0; i < 4; i++)
5969 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5970 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5971 fmt_number_style_from_string (s));
5974 if (!lex_force_match (lexer, T_SLASH))
5977 while (!lex_match_id (lexer, "TABLE"))
5979 if (lex_match_id (lexer, "FORMAT"))
5981 double widths[2] = { SYSMIS, SYSMIS };
5982 double units_per_inch = 72.0;
5984 while (lex_token (lexer) != T_SLASH)
5986 if (lex_match_id (lexer, "MINCOLWIDTH"))
5988 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5991 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5993 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5996 else if (lex_match_id (lexer, "UNITS"))
5998 lex_match (lexer, T_EQUALS);
5999 if (lex_match_id (lexer, "POINTS"))
6000 units_per_inch = 72.0;
6001 else if (lex_match_id (lexer, "INCHES"))
6002 units_per_inch = 1.0;
6003 else if (lex_match_id (lexer, "CM"))
6004 units_per_inch = 2.54;
6007 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6011 else if (lex_match_id (lexer, "EMPTY"))
6016 lex_match (lexer, T_EQUALS);
6017 if (lex_match_id (lexer, "ZERO"))
6019 /* Nothing to do. */
6021 else if (lex_match_id (lexer, "BLANK"))
6022 ct->zero = xstrdup ("");
6023 else if (lex_force_string (lexer))
6025 ct->zero = ss_xstrdup (lex_tokss (lexer));
6031 else if (lex_match_id (lexer, "MISSING"))
6033 lex_match (lexer, T_EQUALS);
6034 if (!lex_force_string (lexer))
6038 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6039 ? ss_xstrdup (lex_tokss (lexer))
6045 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6046 "UNITS", "EMPTY", "MISSING");
6051 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6052 && widths[0] > widths[1])
6054 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6058 for (size_t i = 0; i < 2; i++)
6059 if (widths[i] != SYSMIS)
6061 int *wr = ct->look->width_ranges[TABLE_HORZ];
6062 wr[i] = widths[i] / units_per_inch * 96.0;
6067 else if (lex_match_id (lexer, "VLABELS"))
6069 if (!lex_force_match_id (lexer, "VARIABLES"))
6071 lex_match (lexer, T_EQUALS);
6073 struct variable **vars;
6075 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6079 if (!lex_force_match_id (lexer, "DISPLAY"))
6084 lex_match (lexer, T_EQUALS);
6086 enum ctables_vlabel vlabel;
6087 if (lex_match_id (lexer, "DEFAULT"))
6088 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6089 else if (lex_match_id (lexer, "NAME"))
6091 else if (lex_match_id (lexer, "LABEL"))
6092 vlabel = CTVL_LABEL;
6093 else if (lex_match_id (lexer, "BOTH"))
6095 else if (lex_match_id (lexer, "NONE"))
6099 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6105 for (size_t i = 0; i < n_vars; i++)
6106 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6109 else if (lex_match_id (lexer, "MRSETS"))
6111 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6113 lex_match (lexer, T_EQUALS);
6114 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6117 else if (lex_match_id (lexer, "SMISSING"))
6119 if (lex_match_id (lexer, "VARIABLE"))
6120 ct->smissing_listwise = false;
6121 else if (lex_match_id (lexer, "LISTWISE"))
6122 ct->smissing_listwise = true;
6125 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6129 else if (lex_match_id (lexer, "PCOMPUTE"))
6131 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6134 else if (lex_match_id (lexer, "PPROPERTIES"))
6136 if (!ctables_parse_pproperties (lexer, ct))
6139 else if (lex_match_id (lexer, "WEIGHT"))
6141 if (!lex_force_match_id (lexer, "VARIABLE"))
6143 lex_match (lexer, T_EQUALS);
6144 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6148 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6150 if (lex_match_id (lexer, "COUNT"))
6152 lex_match (lexer, T_EQUALS);
6153 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6156 ct->hide_threshold = lex_integer (lexer);
6159 else if (ct->hide_threshold == 0)
6160 ct->hide_threshold = 5;
6164 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6165 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6166 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6170 if (!lex_force_match (lexer, T_SLASH))
6174 size_t allocated_tables = 0;
6177 if (ct->n_tables >= allocated_tables)
6178 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6179 sizeof *ct->tables);
6181 struct ctables_category *cat = xmalloc (sizeof *cat);
6182 *cat = (struct ctables_category) {
6184 .include_missing = false,
6185 .sort_ascending = true,
6188 struct ctables_categories *c = xmalloc (sizeof *c);
6189 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6190 *c = (struct ctables_categories) {
6197 struct ctables_categories **categories = xnmalloc (n_vars,
6198 sizeof *categories);
6199 for (size_t i = 0; i < n_vars; i++)
6202 struct ctables_table *t = xmalloc (sizeof *t);
6203 *t = (struct ctables_table) {
6205 .slabels_axis = PIVOT_AXIS_COLUMN,
6206 .slabels_visible = true,
6207 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6209 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6210 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6211 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6213 .clabels_from_axis = PIVOT_AXIS_LAYER,
6214 .clabels_to_axis = PIVOT_AXIS_LAYER,
6215 .categories = categories,
6216 .n_categories = n_vars,
6219 ct->tables[ct->n_tables++] = t;
6221 lex_match (lexer, T_EQUALS);
6222 int expr_start = lex_ofs (lexer);
6223 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6224 &t->axes[PIVOT_AXIS_ROW]))
6226 if (lex_match (lexer, T_BY))
6228 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6229 &t->axes[PIVOT_AXIS_COLUMN]))
6232 if (lex_match (lexer, T_BY))
6234 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6235 &t->axes[PIVOT_AXIS_LAYER]))
6239 int expr_end = lex_ofs (lexer);
6241 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6242 && !t->axes[PIVOT_AXIS_LAYER])
6244 lex_error (lexer, _("At least one variable must be specified."));
6248 const struct ctables_axis *scales[PIVOT_N_AXES];
6249 size_t n_scales = 0;
6250 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6252 scales[a] = find_scale (t->axes[a]);
6258 msg (SE, _("Scale variables may appear only on one axis."));
6259 if (scales[PIVOT_AXIS_ROW])
6260 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6261 _("This scale variable appears on the rows axis."));
6262 if (scales[PIVOT_AXIS_COLUMN])
6263 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6264 _("This scale variable appears on the columns axis."));
6265 if (scales[PIVOT_AXIS_LAYER])
6266 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6267 _("This scale variable appears on the layer axis."));
6271 const struct ctables_axis *summaries[PIVOT_N_AXES];
6272 size_t n_summaries = 0;
6273 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6275 summaries[a] = (scales[a]
6277 : find_categorical_summary_spec (t->axes[a]));
6281 if (n_summaries > 1)
6283 msg (SE, _("Summaries may appear only on one axis."));
6284 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6287 msg_at (SN, summaries[a]->loc,
6289 ? _("This variable on the rows axis has a summary.")
6290 : a == PIVOT_AXIS_COLUMN
6291 ? _("This variable on the columns axis has a summary.")
6292 : _("This variable on the layers axis has a summary."));
6294 msg_at (SN, summaries[a]->loc,
6295 _("This is a scale variable, so it always has a "
6296 "summary even if the syntax does not explicitly "
6301 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6302 if (n_summaries ? summaries[a] : t->axes[a])
6304 t->summary_axis = a;
6308 if (lex_token (lexer) == T_ENDCMD)
6310 if (!ctables_prepare_table (t))
6314 if (!lex_force_match (lexer, T_SLASH))
6317 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6319 if (lex_match_id (lexer, "SLABELS"))
6321 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6323 if (lex_match_id (lexer, "POSITION"))
6325 lex_match (lexer, T_EQUALS);
6326 if (lex_match_id (lexer, "COLUMN"))
6327 t->slabels_axis = PIVOT_AXIS_COLUMN;
6328 else if (lex_match_id (lexer, "ROW"))
6329 t->slabels_axis = PIVOT_AXIS_ROW;
6330 else if (lex_match_id (lexer, "LAYER"))
6331 t->slabels_axis = PIVOT_AXIS_LAYER;
6334 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6338 else if (lex_match_id (lexer, "VISIBLE"))
6340 lex_match (lexer, T_EQUALS);
6341 if (!parse_bool (lexer, &t->slabels_visible))
6346 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6351 else if (lex_match_id (lexer, "CLABELS"))
6353 if (lex_match_id (lexer, "AUTO"))
6355 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6356 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6358 else if (lex_match_id (lexer, "ROWLABELS"))
6360 lex_match (lexer, T_EQUALS);
6361 if (lex_match_id (lexer, "OPPOSITE"))
6362 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6363 else if (lex_match_id (lexer, "LAYER"))
6364 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6367 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6371 else if (lex_match_id (lexer, "COLLABELS"))
6373 lex_match (lexer, T_EQUALS);
6374 if (lex_match_id (lexer, "OPPOSITE"))
6375 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6376 else if (lex_match_id (lexer, "LAYER"))
6377 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6380 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6386 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6391 else if (lex_match_id (lexer, "CRITERIA"))
6393 if (!lex_force_match_id (lexer, "CILEVEL"))
6395 lex_match (lexer, T_EQUALS);
6397 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6399 t->cilevel = lex_number (lexer);
6402 else if (lex_match_id (lexer, "CATEGORIES"))
6404 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6408 else if (lex_match_id (lexer, "TITLES"))
6413 if (lex_match_id (lexer, "CAPTION"))
6414 textp = &t->caption;
6415 else if (lex_match_id (lexer, "CORNER"))
6417 else if (lex_match_id (lexer, "TITLE"))
6421 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6424 lex_match (lexer, T_EQUALS);
6426 struct string s = DS_EMPTY_INITIALIZER;
6427 while (lex_is_string (lexer))
6429 if (!ds_is_empty (&s))
6430 ds_put_byte (&s, ' ');
6431 put_title_text (&s, lex_tokss (lexer), now,
6432 lexer, dataset_dict (ds),
6433 expr_start, expr_end);
6437 *textp = ds_steal_cstr (&s);
6439 while (lex_token (lexer) != T_SLASH
6440 && lex_token (lexer) != T_ENDCMD);
6442 else if (lex_match_id (lexer, "SIGTEST"))
6444 int start_ofs = lex_ofs (lexer) - 1;
6447 t->chisq = xmalloc (sizeof *t->chisq);
6448 *t->chisq = (struct ctables_chisq) {
6450 .include_mrsets = true,
6451 .all_visible = true,
6457 if (lex_match_id (lexer, "TYPE"))
6459 lex_match (lexer, T_EQUALS);
6460 if (!lex_force_match_id (lexer, "CHISQUARE"))
6463 else if (lex_match_id (lexer, "ALPHA"))
6465 lex_match (lexer, T_EQUALS);
6466 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6468 t->chisq->alpha = lex_number (lexer);
6471 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6473 lex_match (lexer, T_EQUALS);
6474 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6477 else if (lex_match_id (lexer, "CATEGORIES"))
6479 lex_match (lexer, T_EQUALS);
6480 if (lex_match_id (lexer, "ALLVISIBLE"))
6481 t->chisq->all_visible = true;
6482 else if (lex_match_id (lexer, "SUBTOTALS"))
6483 t->chisq->all_visible = false;
6486 lex_error_expecting (lexer,
6487 "ALLVISIBLE", "SUBTOTALS");
6493 lex_error_expecting (lexer, "TYPE", "ALPHA",
6494 "INCLUDEMRSETS", "CATEGORIES");
6498 while (lex_token (lexer) != T_SLASH
6499 && lex_token (lexer) != T_ENDCMD);
6501 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6502 _("Support for SIGTEST not yet implemented."));
6505 else if (lex_match_id (lexer, "COMPARETEST"))
6507 int start_ofs = lex_ofs (lexer);
6510 t->pairwise = xmalloc (sizeof *t->pairwise);
6511 *t->pairwise = (struct ctables_pairwise) {
6513 .alpha = { .05, .05 },
6514 .adjust = BONFERRONI,
6515 .include_mrsets = true,
6516 .meansvariance_allcats = true,
6517 .all_visible = true,
6526 if (lex_match_id (lexer, "TYPE"))
6528 lex_match (lexer, T_EQUALS);
6529 if (lex_match_id (lexer, "PROP"))
6530 t->pairwise->type = PROP;
6531 else if (lex_match_id (lexer, "MEAN"))
6532 t->pairwise->type = MEAN;
6535 lex_error_expecting (lexer, "PROP", "MEAN");
6539 else if (lex_match_id (lexer, "ALPHA"))
6541 lex_match (lexer, T_EQUALS);
6543 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6545 double a0 = lex_number (lexer);
6548 lex_match (lexer, T_COMMA);
6549 if (lex_is_number (lexer))
6551 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6553 double a1 = lex_number (lexer);
6556 t->pairwise->alpha[0] = MIN (a0, a1);
6557 t->pairwise->alpha[1] = MAX (a0, a1);
6560 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6562 else if (lex_match_id (lexer, "ADJUST"))
6564 lex_match (lexer, T_EQUALS);
6565 if (lex_match_id (lexer, "BONFERRONI"))
6566 t->pairwise->adjust = BONFERRONI;
6567 else if (lex_match_id (lexer, "BH"))
6568 t->pairwise->adjust = BH;
6569 else if (lex_match_id (lexer, "NONE"))
6570 t->pairwise->adjust = 0;
6573 lex_error_expecting (lexer, "BONFERRONI", "BH",
6578 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6580 lex_match (lexer, T_EQUALS);
6581 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6584 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6586 lex_match (lexer, T_EQUALS);
6587 if (lex_match_id (lexer, "ALLCATS"))
6588 t->pairwise->meansvariance_allcats = true;
6589 else if (lex_match_id (lexer, "TESTEDCATS"))
6590 t->pairwise->meansvariance_allcats = false;
6593 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6597 else if (lex_match_id (lexer, "CATEGORIES"))
6599 lex_match (lexer, T_EQUALS);
6600 if (lex_match_id (lexer, "ALLVISIBLE"))
6601 t->pairwise->all_visible = true;
6602 else if (lex_match_id (lexer, "SUBTOTALS"))
6603 t->pairwise->all_visible = false;
6606 lex_error_expecting (lexer, "ALLVISIBLE",
6611 else if (lex_match_id (lexer, "MERGE"))
6613 lex_match (lexer, T_EQUALS);
6614 if (!parse_bool (lexer, &t->pairwise->merge))
6617 else if (lex_match_id (lexer, "STYLE"))
6619 lex_match (lexer, T_EQUALS);
6620 if (lex_match_id (lexer, "APA"))
6621 t->pairwise->apa_style = true;
6622 else if (lex_match_id (lexer, "SIMPLE"))
6623 t->pairwise->apa_style = false;
6626 lex_error_expecting (lexer, "APA", "SIMPLE");
6630 else if (lex_match_id (lexer, "SHOWSIG"))
6632 lex_match (lexer, T_EQUALS);
6633 if (!parse_bool (lexer, &t->pairwise->show_sig))
6638 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6639 "INCLUDEMRSETS", "MEANSVARIANCE",
6640 "CATEGORIES", "MERGE", "STYLE",
6645 while (lex_token (lexer) != T_SLASH
6646 && lex_token (lexer) != T_ENDCMD);
6648 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6649 _("Support for COMPARETEST not yet implemented."));
6654 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6655 "CRITERIA", "CATEGORIES", "TITLES",
6656 "SIGTEST", "COMPARETEST");
6660 if (!lex_match (lexer, T_SLASH))
6664 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6666 t->clabels_from_axis = PIVOT_AXIS_ROW;
6667 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6669 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6673 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6674 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6675 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6677 if (!ctables_prepare_table (t))
6680 while (lex_token (lexer) != T_ENDCMD);
6683 input = proc_open (ds);
6684 bool ok = ctables_execute (ds, input, ct);
6685 ok = proc_commit (ds) && ok;
6687 ctables_destroy (ct);
6688 return ok ? CMD_SUCCESS : CMD_FAILURE;
6693 ctables_destroy (ct);