1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/commands/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 /* The three forms of weighting supported by CTABLES. */
61 enum ctables_weighting
63 CTW_EFFECTIVE, /* Effective base weight (WEIGHT subcommand). */
64 CTW_DICTIONARY, /* Dictionary weight. */
65 CTW_UNWEIGHTED /* No weight. */
69 /* CTABLES table areas. */
71 enum ctables_area_type
73 /* Within a section, where stacked variables divide one section from
76 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
77 parse_ctables_summary_function() parses correctly. */
78 CTAT_TABLE, /* All layers of a whole section. */
79 CTAT_LAYERROW, /* Row in one layer within a section. */
80 CTAT_LAYERCOL, /* Column in one layer within a section. */
81 CTAT_LAYER, /* One layer within a section. */
83 /* Within a subtable, where a subtable pairs an innermost row variable with
84 an innermost column variable within a single layer. */
85 CTAT_SUBTABLE, /* Whole subtable. */
86 CTAT_ROW, /* Row within a subtable. */
87 CTAT_COL, /* Column within a subtable. */
91 static const char *ctables_area_type_name[N_CTATS] = {
92 [CTAT_TABLE] = "TABLE",
93 [CTAT_LAYER] = "LAYER",
94 [CTAT_LAYERROW] = "LAYERROW",
95 [CTAT_LAYERCOL] = "LAYERCOL",
96 [CTAT_SUBTABLE] = "SUBTABLE",
101 /* Summary statistics for an area. */
104 struct hmap_node node;
105 const struct ctables_cell *example;
107 /* Sequence number used for CTSF_ID. */
110 /* Weights for CTSF_areaPCT_COUNT, CTSF_areaPCT_VALIDN, and
111 CTSF_areaPCT_TOTALN. */
112 double count[N_CTWS];
113 double valid[N_CTWS];
114 double total[N_CTWS];
116 /* Sums for CTSF_areaPCT_SUM. */
117 struct ctables_sum *sums;
125 /* CTABLES summary functions. */
127 enum ctables_function_type
129 /* A function that operates on data in a single cell. It operates on
130 effective weights. It does not have an unweighted version. */
133 /* A function that operates on data in a single cell. The function
134 operates on effective weights and has a U-prefixed unweighted
138 /* A function that operates on data in a single cell. It operates on
139 dictionary weights, and has U-prefixed unweighted version and an
140 E-prefixed effective weight version. */
143 /* A function that operates on an area of cells. It operates on effective
144 weights and has a U-prefixed unweighted version. */
150 CTF_COUNT, /* F40.0. */
151 CTF_PERCENT, /* PCT40.1. */
152 CTF_GENERAL /* Variable's print format. */
155 enum ctables_function_availability
157 CTFA_ALL, /* Any variables. */
158 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
159 //CTFA_MRSETS, /* Only multiple-response sets */
162 enum ctables_summary_function
164 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
165 #include "ctables.inc"
170 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
172 #include "ctables.inc"
176 struct ctables_function_info
178 struct substring basename;
179 enum ctables_function_type type;
180 enum ctables_format format;
181 enum ctables_function_availability availability;
183 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
184 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
185 bool is_area; /* Needs an area prefix. */
187 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
188 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
190 .basename = SS_LITERAL_INITIALIZER (NAME), \
193 .availability = AVAILABILITY, \
194 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
195 .e_prefix = (TYPE) == CTFT_UECELL, \
196 .is_area = (TYPE) == CTFT_AREA \
198 #include "ctables.inc"
202 static struct fmt_spec
203 ctables_summary_default_format (enum ctables_summary_function function,
204 const struct variable *var)
206 static const enum ctables_format default_formats[] = {
207 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
208 #include "ctables.inc"
211 switch (default_formats[function])
214 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
217 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
220 return var_get_print_format (var);
227 static enum ctables_function_availability
228 ctables_function_availability (enum ctables_summary_function f)
230 static enum ctables_function_availability availability[] = {
231 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
232 #include "ctables.inc"
236 return availability[f];
240 parse_ctables_summary_function (struct lexer *lexer,
241 enum ctables_summary_function *function,
242 enum ctables_weighting *weighting,
243 enum ctables_area_type *area)
245 if (!lex_force_id (lexer))
248 struct substring name = lex_tokss (lexer);
249 if (ss_ends_with_case (name, ss_cstr (".LCL"))
250 || ss_ends_with_case (name, ss_cstr (".UCL"))
251 || ss_ends_with_case (name, ss_cstr (".SE")))
253 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
254 "is not yet implemented."));
258 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
259 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
261 bool has_area = false;
263 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
264 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
269 if (ss_equals_case (name, ss_cstr ("PCT")))
271 /* Special case where .COUNT suffix is omitted. */
272 *function = CTSF_areaPCT_COUNT;
273 *weighting = CTW_EFFECTIVE;
280 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
282 const struct ctables_function_info *cfi = &ctables_function_info[f];
283 if (ss_equals_case (cfi->basename, name))
286 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
289 *weighting = (e ? CTW_EFFECTIVE
291 : cfi->e_prefix ? CTW_DICTIONARY
298 lex_error (lexer, _("Syntax error expecting summary function name."));
303 ctables_summary_function_name (enum ctables_summary_function function,
304 enum ctables_weighting weighting,
305 enum ctables_area_type area,
306 char *buffer, size_t bufsize)
308 const struct ctables_function_info *cfi = &ctables_function_info[function];
309 snprintf (buffer, bufsize, "%s%s%s",
310 (weighting == CTW_UNWEIGHTED ? "U"
311 : weighting == CTW_DICTIONARY ? ""
312 : cfi->e_prefix ? "E"
314 cfi->is_area ? ctables_area_type_name[area] : "",
315 cfi->basename.string);
320 ctables_summary_function_label__ (enum ctables_summary_function function,
321 enum ctables_weighting weighting,
322 enum ctables_area_type area)
324 bool w = weighting != CTW_UNWEIGHTED;
325 bool d = weighting == CTW_DICTIONARY;
326 enum ctables_area_type a = area;
330 return d ? N_("Count") : w ? N_("Adjusted Count") : N_("Unweighted Count");
332 case CTSF_areaPCT_COUNT:
335 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
336 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
337 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
338 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
339 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
340 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
341 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
345 case CTSF_areaPCT_VALIDN:
348 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
349 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
350 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
351 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
352 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
353 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
354 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
358 case CTSF_areaPCT_TOTALN:
361 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
362 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
363 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
364 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
365 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
366 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
367 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
371 case CTSF_MAXIMUM: return N_("Maximum");
372 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
373 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
374 case CTSF_MINIMUM: return N_("Minimum");
375 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
376 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
377 case CTSF_PTILE: NOT_REACHED ();
378 case CTSF_RANGE: return N_("Range");
379 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
380 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
381 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
382 case CTSF_TOTALN: return (d ? N_("Total N")
383 : w ? N_("Adjusted Total N")
384 : N_("Unweighted Total N"));
385 case CTSF_VALIDN: return (d ? N_("Valid N")
386 : w ? N_("Adjusted Valid N")
387 : N_("Unweighted Valid N"));
388 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
389 case CTSF_areaPCT_SUM:
392 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
393 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
394 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
395 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
396 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
397 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
398 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
405 /* Don't bother translating these: they are for developers only. */
406 case CTAT_TABLE: return "Table ID";
407 case CTAT_LAYER: return "Layer ID";
408 case CTAT_LAYERROW: return "Layer Row ID";
409 case CTAT_LAYERCOL: return "Layer Column ID";
410 case CTAT_SUBTABLE: return "Subtable ID";
411 case CTAT_ROW: return "Row ID";
412 case CTAT_COL: return "Column ID";
420 static struct pivot_value *
421 ctables_summary_function_label (enum ctables_summary_function function,
422 enum ctables_weighting weighting,
423 enum ctables_area_type area,
426 if (function == CTSF_PTILE)
428 char *s = (weighting != CTW_UNWEIGHTED
429 ? xasprintf (_("Percentile %.2f"), percentile)
430 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
431 return pivot_value_new_user_text_nocopy (s);
434 return pivot_value_new_text (ctables_summary_function_label__ (
435 function, weighting, area));
438 /* CTABLES summaries. */
440 struct ctables_summary_spec
442 /* The calculation to be performed.
444 'function' is the function to calculate. 'weighted' specifies whether
445 to use weighted or unweighted data (for functions that do not support a
446 choice, it must be true). 'calc_area' is the area over which the
447 calculation takes place (for functions that target only an individual
448 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
449 percentile between 0 and 100 (for other functions it must be 0). */
450 enum ctables_summary_function function;
451 enum ctables_weighting weighting;
452 enum ctables_area_type calc_area;
453 double percentile; /* CTSF_PTILE only. */
455 /* How to display the result of the calculation.
457 'label' is a user-specified label, NULL if the user didn't specify
460 'user_area' is usually the same as 'calc_area', but when category labels
461 are rotated from one axis to another it swaps rows and columns.
463 'format' is the format for displaying the output. If
464 'is_ctables_format' is true, then 'format.type' is one of the special
465 CTEF_* formats instead of the standard ones. */
467 enum ctables_area_type user_area;
468 struct fmt_spec format;
469 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
471 size_t axis_idx; /* Leaf index if summary dimension in use. */
472 size_t sum_var_idx; /* Offset into 'sums' in ctables_area. */
476 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
477 const struct ctables_summary_spec *src)
480 dst->label = xstrdup_if_nonnull (src->label);
484 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
490 /* Collections of summary functions. */
492 struct ctables_summary_spec_set
494 struct ctables_summary_spec *specs;
498 /* The variable to which the summary specs are applied. */
499 struct variable *var;
501 /* Whether the variable to which the summary specs are applied is a scale
502 variable for the purpose of summarization.
504 (VALIDN and TOTALN act differently for summarizing scale and categorical
508 /* If any of these optional additional scale variables are missing, then
509 treat 'var' as if it's missing too. This is for implementing
510 SMISSING=LISTWISE. */
511 struct variable **listwise_vars;
512 size_t n_listwise_vars;
516 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
517 const struct ctables_summary_spec_set *src)
519 struct ctables_summary_spec *specs
520 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
521 for (size_t i = 0; i < src->n; i++)
522 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
524 *dst = (struct ctables_summary_spec_set) {
529 .is_scale = src->is_scale,
534 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
536 for (size_t i = 0; i < set->n; i++)
537 ctables_summary_spec_uninit (&set->specs[i]);
538 free (set->listwise_vars);
543 is_listwise_missing (const struct ctables_summary_spec_set *specs,
544 const struct ccase *c)
546 for (size_t i = 0; i < specs->n_listwise_vars; i++)
548 const struct variable *var = specs->listwise_vars[i];
549 if (var_is_num_missing (var, case_num (c, var)))
556 /* CTABLES postcompute expressions. */
558 struct ctables_postcompute
560 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
561 char *name; /* Name, without leading &. */
563 struct msg_location *location; /* Location of definition. */
564 struct ctables_pcexpr *expr;
566 struct ctables_summary_spec_set *specs;
567 bool hide_source_cats;
570 struct ctables_pcexpr
580 enum ctables_pcexpr_op
583 CTPO_CONSTANT, /* 5 */
584 CTPO_CAT_NUMBER, /* [5] */
585 CTPO_CAT_STRING, /* ["STRING"] */
586 CTPO_CAT_NRANGE, /* [LO THRU 5] */
587 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
588 CTPO_CAT_MISSING, /* MISSING */
589 CTPO_CAT_OTHERNM, /* OTHERNM */
590 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
591 CTPO_CAT_TOTAL, /* TOTAL */
605 /* CTPO_CAT_NUMBER. */
608 /* CTPO_CAT_STRING, in dictionary encoding. */
609 struct substring string;
611 /* CTPO_CAT_NRANGE. */
614 /* CTPO_CAT_SRANGE. */
615 struct substring srange[2];
617 /* CTPO_CAT_SUBTOTAL. */
618 size_t subtotal_index;
620 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
621 One element: CTPO_NEG. */
622 struct ctables_pcexpr *subs[2];
625 /* Source location. */
626 struct msg_location *location;
629 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
632 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
633 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
634 struct ctables_pcexpr *sub1);
636 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
637 struct dictionary *);
640 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
646 case CTPO_CAT_STRING:
647 ss_dealloc (&e->string);
650 case CTPO_CAT_SRANGE:
651 for (size_t i = 0; i < 2; i++)
652 ss_dealloc (&e->srange[i]);
661 for (size_t i = 0; i < 2; i++)
662 ctables_pcexpr_destroy (e->subs[i]);
666 case CTPO_CAT_NUMBER:
667 case CTPO_CAT_NRANGE:
668 case CTPO_CAT_MISSING:
669 case CTPO_CAT_OTHERNM:
670 case CTPO_CAT_SUBTOTAL:
675 msg_location_destroy (e->location);
680 static struct ctables_pcexpr *
681 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
682 struct ctables_pcexpr *sub0,
683 struct ctables_pcexpr *sub1)
685 struct ctables_pcexpr *e = xmalloc (sizeof *e);
686 *e = (struct ctables_pcexpr) {
688 .subs = { sub0, sub1 },
689 .location = msg_location_merged (sub0->location, sub1->location),
694 /* How to parse an operator. */
697 enum token_type token;
698 enum ctables_pcexpr_op op;
701 static const struct operator *
702 ctables_pcexpr_match_operator (struct lexer *lexer,
703 const struct operator ops[], size_t n_ops)
705 for (const struct operator *op = ops; op < ops + n_ops; op++)
706 if (lex_token (lexer) == op->token)
708 if (op->token != T_NEG_NUM)
717 static struct ctables_pcexpr *
718 ctables_pcexpr_parse_binary_operators__ (
719 struct lexer *lexer, struct dictionary *dict,
720 const struct operator ops[], size_t n_ops,
721 parse_recursively_func *parse_next_level,
722 const char *chain_warning, struct ctables_pcexpr *lhs)
724 for (int op_count = 0; ; op_count++)
726 const struct operator *op
727 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
730 if (op_count > 1 && chain_warning)
731 msg_at (SW, lhs->location, "%s", chain_warning);
736 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
739 ctables_pcexpr_destroy (lhs);
743 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
747 static struct ctables_pcexpr *
748 ctables_pcexpr_parse_binary_operators (
749 struct lexer *lexer, struct dictionary *dict,
750 const struct operator ops[], size_t n_ops,
751 parse_recursively_func *parse_next_level, const char *chain_warning)
753 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
757 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
762 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
763 struct dictionary *);
765 static struct ctables_pcexpr
766 ctpo_cat_nrange (double low, double high)
768 return (struct ctables_pcexpr) {
769 .op = CTPO_CAT_NRANGE,
770 .nrange = { low, high },
774 static struct ctables_pcexpr
775 ctpo_cat_srange (struct substring low, struct substring high)
777 return (struct ctables_pcexpr) {
778 .op = CTPO_CAT_SRANGE,
779 .srange = { low, high },
783 static struct substring
784 parse_substring (struct lexer *lexer, struct dictionary *dict)
786 struct substring s = recode_substring_pool (
787 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
788 ss_rtrim (&s, ss_cstr (" "));
793 static struct ctables_pcexpr *
794 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
796 int start_ofs = lex_ofs (lexer);
797 struct ctables_pcexpr e;
798 if (lex_is_number (lexer))
800 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
801 .number = lex_number (lexer) };
804 else if (lex_match_id (lexer, "MISSING"))
805 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
806 else if (lex_match_id (lexer, "OTHERNM"))
807 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
808 else if (lex_match_id (lexer, "TOTAL"))
809 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
810 else if (lex_match_id (lexer, "SUBTOTAL"))
812 size_t subtotal_index = 0;
813 if (lex_match (lexer, T_LBRACK))
815 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
817 subtotal_index = lex_integer (lexer);
819 if (!lex_force_match (lexer, T_RBRACK))
822 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
823 .subtotal_index = subtotal_index };
825 else if (lex_match (lexer, T_LBRACK))
827 if (lex_match_id (lexer, "LO"))
829 if (!lex_force_match_id (lexer, "THRU"))
832 if (lex_is_string (lexer))
834 struct substring low = { .string = NULL };
835 struct substring high = parse_substring (lexer, dict);
836 e = ctpo_cat_srange (low, high);
840 if (!lex_force_num (lexer))
842 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
846 else if (lex_is_number (lexer))
848 double number = lex_number (lexer);
850 if (lex_match_id (lexer, "THRU"))
852 if (lex_match_id (lexer, "HI"))
853 e = ctpo_cat_nrange (number, DBL_MAX);
856 if (!lex_force_num (lexer))
858 e = ctpo_cat_nrange (number, lex_number (lexer));
863 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
866 else if (lex_is_string (lexer))
868 struct substring s = parse_substring (lexer, dict);
870 if (lex_match_id (lexer, "THRU"))
872 struct substring high;
874 if (lex_match_id (lexer, "HI"))
875 high = (struct substring) { .string = NULL };
878 if (!lex_force_string (lexer))
883 high = parse_substring (lexer, dict);
886 e = ctpo_cat_srange (s, high);
889 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
894 _("Syntax error expecting number or string or range."));
898 if (!lex_force_match (lexer, T_RBRACK))
900 if (e.op == CTPO_CAT_STRING)
901 ss_dealloc (&e.string);
902 else if (e.op == CTPO_CAT_SRANGE)
904 ss_dealloc (&e.srange[0]);
905 ss_dealloc (&e.srange[1]);
910 else if (lex_match (lexer, T_LPAREN))
912 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
915 if (!lex_force_match (lexer, T_RPAREN))
917 ctables_pcexpr_destroy (ep);
924 lex_error (lexer, _("Syntax error in postcompute expression."));
928 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
929 return xmemdup (&e, sizeof e);
932 static struct ctables_pcexpr *
933 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
934 struct lexer *lexer, int start_ofs)
936 struct ctables_pcexpr *e = xmalloc (sizeof *e);
937 *e = (struct ctables_pcexpr) {
940 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
945 static struct ctables_pcexpr *
946 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
948 static const struct operator op = { T_EXP, CTPO_POW };
950 const char *chain_warning =
951 _("The exponentiation operator (`**') is left-associative: "
952 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
953 "To disable this warning, insert parentheses.");
955 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
956 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
957 ctables_pcexpr_parse_primary,
960 /* Special case for situations like "-5**6", which must be parsed as
963 int start_ofs = lex_ofs (lexer);
964 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
965 *lhs = (struct ctables_pcexpr) {
967 .number = -lex_tokval (lexer),
968 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
972 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
974 ctables_pcexpr_parse_primary, chain_warning, lhs);
978 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
981 /* Parses the unary minus level. */
982 static struct ctables_pcexpr *
983 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
985 int start_ofs = lex_ofs (lexer);
986 if (!lex_match (lexer, T_DASH))
987 return ctables_pcexpr_parse_exp (lexer, dict);
989 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
993 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
996 /* Parses the multiplication and division level. */
997 static struct ctables_pcexpr *
998 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
1000 static const struct operator ops[] =
1002 { T_ASTERISK, CTPO_MUL },
1003 { T_SLASH, CTPO_DIV },
1006 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
1007 sizeof ops / sizeof *ops,
1008 ctables_pcexpr_parse_neg, NULL);
1011 /* Parses the addition and subtraction level. */
1012 static struct ctables_pcexpr *
1013 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
1015 static const struct operator ops[] =
1017 { T_PLUS, CTPO_ADD },
1018 { T_DASH, CTPO_SUB },
1019 { T_NEG_NUM, CTPO_ADD },
1022 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1023 ops, sizeof ops / sizeof *ops,
1024 ctables_pcexpr_parse_mul, NULL);
1027 /* CTABLES axis expressions. */
1029 /* CTABLES has a number of extra formats that we implement via custom
1030 currency specifications on an alternate fmt_settings. */
1031 #define CTEF_NEGPAREN FMT_CCA
1032 #define CTEF_NEQUAL FMT_CCB
1033 #define CTEF_PAREN FMT_CCC
1034 #define CTEF_PCTPAREN FMT_CCD
1036 enum ctables_summary_variant
1045 enum ctables_axis_op
1061 struct variable *var;
1063 struct ctables_summary_spec_set specs[N_CSVS];
1067 struct ctables_axis *subs[2];
1070 struct msg_location *loc;
1074 ctables_axis_destroy (struct ctables_axis *axis)
1082 for (size_t i = 0; i < N_CSVS; i++)
1083 ctables_summary_spec_set_uninit (&axis->specs[i]);
1088 ctables_axis_destroy (axis->subs[0]);
1089 ctables_axis_destroy (axis->subs[1]);
1092 msg_location_destroy (axis->loc);
1096 static struct ctables_axis *
1097 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1098 struct ctables_axis *sub0,
1099 struct ctables_axis *sub1,
1100 struct lexer *lexer, int start_ofs)
1102 struct ctables_axis *axis = xmalloc (sizeof *axis);
1103 *axis = (struct ctables_axis) {
1105 .subs = { sub0, sub1 },
1106 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1111 struct ctables_axis_parse_ctx
1113 struct lexer *lexer;
1114 struct dictionary *dict;
1117 static struct pivot_value *
1118 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1121 return ctables_summary_function_label (spec->function, spec->weighting,
1122 spec->user_area, spec->percentile);
1125 struct substring in = ss_cstr (spec->label);
1126 struct substring target = ss_cstr (")CILEVEL");
1128 struct string out = DS_EMPTY_INITIALIZER;
1131 size_t chunk = ss_find_substring (in, target);
1132 ds_put_substring (&out, ss_head (in, chunk));
1133 ss_advance (&in, chunk);
1135 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1137 ss_advance (&in, target.length);
1138 ds_put_format (&out, "%g", cilevel);
1144 add_summary_spec (struct ctables_axis *axis,
1145 enum ctables_summary_function function,
1146 enum ctables_weighting weighting,
1147 enum ctables_area_type area, double percentile,
1148 const char *label, const struct fmt_spec *format,
1149 bool is_ctables_format, const struct msg_location *loc,
1150 enum ctables_summary_variant sv)
1152 if (axis->op == CTAO_VAR)
1154 char function_name[128];
1155 ctables_summary_function_name (function, weighting, area,
1156 function_name, sizeof function_name);
1157 const char *var_name = var_get_name (axis->var);
1158 switch (ctables_function_availability (function))
1162 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1163 "response sets."), function_name);
1164 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1170 if (!axis->scale && sv != CSV_TOTAL)
1173 _("Summary function %s applies only to scale variables."),
1175 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1185 struct ctables_summary_spec_set *set = &axis->specs[sv];
1186 if (set->n >= set->allocated)
1187 set->specs = x2nrealloc (set->specs, &set->allocated,
1188 sizeof *set->specs);
1190 struct ctables_summary_spec *dst = &set->specs[set->n++];
1191 *dst = (struct ctables_summary_spec) {
1192 .function = function,
1193 .weighting = weighting,
1196 .percentile = percentile,
1197 .label = xstrdup_if_nonnull (label),
1198 .format = (format ? *format
1199 : ctables_summary_default_format (function, axis->var)),
1200 .is_ctables_format = is_ctables_format,
1206 for (size_t i = 0; i < 2; i++)
1207 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1208 percentile, label, format, is_ctables_format,
1215 static struct ctables_axis *ctables_axis_parse_stack (
1216 struct ctables_axis_parse_ctx *);
1218 static struct ctables_axis *
1219 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1221 if (lex_match (ctx->lexer, T_LPAREN))
1223 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1224 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1226 ctables_axis_destroy (sub);
1232 if (!lex_force_id (ctx->lexer))
1235 if (lex_tokcstr (ctx->lexer)[0] == '$')
1237 lex_error (ctx->lexer,
1238 _("Multiple response set support not implemented."));
1242 int start_ofs = lex_ofs (ctx->lexer);
1243 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1247 struct ctables_axis *axis = xmalloc (sizeof *axis);
1248 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1250 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1251 : lex_match_phrase (ctx->lexer, "[C]") ? false
1252 : var_get_measure (var) == MEASURE_SCALE);
1253 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1254 lex_ofs (ctx->lexer) - 1);
1255 if (axis->scale && var_is_alpha (var))
1257 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1259 var_get_name (var));
1260 ctables_axis_destroy (axis);
1268 has_digit (const char *s)
1270 return s[strcspn (s, "0123456789")] != '\0';
1274 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1275 bool *is_ctables_format)
1277 char type[FMT_TYPE_LEN_MAX + 1];
1278 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1281 if (!strcasecmp (type, "NEGPAREN"))
1282 format->type = CTEF_NEGPAREN;
1283 else if (!strcasecmp (type, "NEQUAL"))
1284 format->type = CTEF_NEQUAL;
1285 else if (!strcasecmp (type, "PAREN"))
1286 format->type = CTEF_PAREN;
1287 else if (!strcasecmp (type, "PCTPAREN"))
1288 format->type = CTEF_PCTPAREN;
1291 *is_ctables_format = false;
1292 if (!parse_format_specifier (lexer, format))
1295 char *error = fmt_check_output__ (*format);
1297 error = fmt_check_type_compat__ (*format, NULL, VAL_NUMERIC);
1300 lex_next_error (lexer, -1, -1, "%s", error);
1311 lex_next_error (lexer, -1, -1,
1312 _("Output format %s requires width 2 or greater."), type);
1315 else if (format->d > format->w - 1)
1317 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1318 "greater than decimals."), type);
1323 *is_ctables_format = true;
1328 static struct ctables_axis *
1329 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1331 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1332 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1335 enum ctables_summary_variant sv = CSV_CELL;
1338 int start_ofs = lex_ofs (ctx->lexer);
1340 /* Parse function. */
1341 enum ctables_summary_function function;
1342 enum ctables_weighting weighting;
1343 enum ctables_area_type area;
1344 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1348 /* Parse percentile. */
1349 double percentile = 0;
1350 if (function == CTSF_PTILE)
1352 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1354 percentile = lex_number (ctx->lexer);
1355 lex_get (ctx->lexer);
1360 if (lex_is_string (ctx->lexer))
1362 label = ss_xstrdup (lex_tokss (ctx->lexer));
1363 lex_get (ctx->lexer);
1367 struct fmt_spec format;
1368 const struct fmt_spec *formatp;
1369 bool is_ctables_format = false;
1370 if (lex_token (ctx->lexer) == T_ID
1371 && has_digit (lex_tokcstr (ctx->lexer)))
1373 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1374 &is_ctables_format))
1384 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1385 lex_ofs (ctx->lexer) - 1);
1386 add_summary_spec (sub, function, weighting, area, percentile, label,
1387 formatp, is_ctables_format, loc, sv);
1389 msg_location_destroy (loc);
1391 lex_match (ctx->lexer, T_COMMA);
1392 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1394 if (!lex_force_match (ctx->lexer, T_LBRACK))
1398 else if (lex_match (ctx->lexer, T_RBRACK))
1400 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1407 ctables_axis_destroy (sub);
1411 static const struct ctables_axis *
1412 find_scale (const struct ctables_axis *axis)
1416 else if (axis->op == CTAO_VAR)
1417 return axis->scale ? axis : NULL;
1420 for (size_t i = 0; i < 2; i++)
1422 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1430 static const struct ctables_axis *
1431 find_categorical_summary_spec (const struct ctables_axis *axis)
1435 else if (axis->op == CTAO_VAR)
1436 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1439 for (size_t i = 0; i < 2; i++)
1441 const struct ctables_axis *sum
1442 = find_categorical_summary_spec (axis->subs[i]);
1450 static struct ctables_axis *
1451 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1453 int start_ofs = lex_ofs (ctx->lexer);
1454 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1458 while (lex_match (ctx->lexer, T_GT))
1460 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1463 ctables_axis_destroy (lhs);
1467 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1468 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1470 const struct ctables_axis *outer_scale = find_scale (lhs);
1471 const struct ctables_axis *inner_scale = find_scale (rhs);
1472 if (outer_scale && inner_scale)
1474 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1475 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1476 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1477 ctables_axis_destroy (nest);
1481 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1484 msg_at (SE, nest->loc,
1485 _("Summaries may only be requested for categorical variables "
1486 "at the innermost nesting level."));
1487 msg_at (SN, outer_sum->loc,
1488 _("This outer categorical variable has a summary."));
1489 ctables_axis_destroy (nest);
1499 static struct ctables_axis *
1500 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1502 int start_ofs = lex_ofs (ctx->lexer);
1503 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1507 while (lex_match (ctx->lexer, T_PLUS))
1509 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1512 ctables_axis_destroy (lhs);
1516 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1517 ctx->lexer, start_ofs);
1524 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1525 struct ctables_axis **axisp)
1528 if (lex_token (lexer) == T_BY
1529 || lex_token (lexer) == T_SLASH
1530 || lex_token (lexer) == T_ENDCMD)
1533 struct ctables_axis_parse_ctx ctx = {
1537 *axisp = ctables_axis_parse_stack (&ctx);
1541 /* CTABLES categories. */
1543 struct ctables_categories
1546 struct ctables_category *cats;
1550 struct ctables_category
1552 enum ctables_category_type
1554 /* Explicit category lists. */
1557 CCT_NRANGE, /* Numerical range. */
1558 CCT_SRANGE, /* String range. */
1563 /* Totals and subtotals. */
1567 /* Implicit category lists. */
1572 /* For contributing to TOTALN. */
1573 CCT_EXCLUDED_MISSING,
1577 struct ctables_category *subtotal;
1583 double number; /* CCT_NUMBER. */
1584 struct substring string; /* CCT_STRING, in dictionary encoding. */
1585 double nrange[2]; /* CCT_NRANGE. */
1586 struct substring srange[2]; /* CCT_SRANGE. */
1590 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1591 bool hide_subcategories; /* CCT_SUBTOTAL. */
1594 /* CCT_POSTCOMPUTE. */
1597 const struct ctables_postcompute *pc;
1598 enum fmt_type parse_format;
1601 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1604 bool include_missing;
1605 bool sort_ascending;
1608 enum ctables_summary_function sort_function;
1609 enum ctables_weighting weighting;
1610 enum ctables_area_type area;
1611 struct variable *sort_var;
1616 /* Source location (sometimes NULL). */
1617 struct msg_location *location;
1621 ctables_category_uninit (struct ctables_category *cat)
1626 msg_location_destroy (cat->location);
1633 case CCT_POSTCOMPUTE:
1637 ss_dealloc (&cat->string);
1641 ss_dealloc (&cat->srange[0]);
1642 ss_dealloc (&cat->srange[1]);
1647 free (cat->total_label);
1655 case CCT_EXCLUDED_MISSING:
1661 nullable_substring_equal (const struct substring *a,
1662 const struct substring *b)
1664 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1668 ctables_category_equal (const struct ctables_category *a,
1669 const struct ctables_category *b)
1671 if (a->type != b->type)
1677 return a->number == b->number;
1680 return ss_equals (a->string, b->string);
1683 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1686 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1687 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1693 case CCT_POSTCOMPUTE:
1694 return a->pc == b->pc;
1698 return !strcmp (a->total_label, b->total_label);
1703 return (a->include_missing == b->include_missing
1704 && a->sort_ascending == b->sort_ascending
1705 && a->sort_function == b->sort_function
1706 && a->sort_var == b->sort_var
1707 && a->percentile == b->percentile);
1709 case CCT_EXCLUDED_MISSING:
1717 ctables_categories_unref (struct ctables_categories *c)
1722 assert (c->n_refs > 0);
1726 for (size_t i = 0; i < c->n_cats; i++)
1727 ctables_category_uninit (&c->cats[i]);
1733 ctables_categories_equal (const struct ctables_categories *a,
1734 const struct ctables_categories *b)
1736 if (a->n_cats != b->n_cats)
1739 for (size_t i = 0; i < a->n_cats; i++)
1740 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1746 static struct ctables_category
1747 cct_nrange (double low, double high)
1749 return (struct ctables_category) {
1751 .nrange = { low, high }
1755 static struct ctables_category
1756 cct_srange (struct substring low, struct substring high)
1758 return (struct ctables_category) {
1760 .srange = { low, high }
1765 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1766 struct ctables_category *cat)
1769 if (lex_match (lexer, T_EQUALS))
1771 if (!lex_force_string (lexer))
1774 total_label = ss_xstrdup (lex_tokss (lexer));
1778 total_label = xstrdup (_("Subtotal"));
1780 *cat = (struct ctables_category) {
1781 .type = CCT_SUBTOTAL,
1782 .hide_subcategories = hide_subcategories,
1783 .total_label = total_label
1789 ctables_table_parse_explicit_category (struct lexer *lexer,
1790 struct dictionary *dict,
1792 struct ctables_category *cat)
1794 if (lex_match_id (lexer, "OTHERNM"))
1795 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1796 else if (lex_match_id (lexer, "MISSING"))
1797 *cat = (struct ctables_category) { .type = CCT_MISSING };
1798 else if (lex_match_id (lexer, "SUBTOTAL"))
1799 return ctables_table_parse_subtotal (lexer, false, cat);
1800 else if (lex_match_id (lexer, "HSUBTOTAL"))
1801 return ctables_table_parse_subtotal (lexer, true, cat);
1802 else if (lex_match_id (lexer, "LO"))
1804 if (!lex_force_match_id (lexer, "THRU"))
1806 if (lex_is_string (lexer))
1808 struct substring sr0 = { .string = NULL };
1809 struct substring sr1 = parse_substring (lexer, dict);
1810 *cat = cct_srange (sr0, sr1);
1812 else if (lex_force_num (lexer))
1814 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1820 else if (lex_is_number (lexer))
1822 double number = lex_number (lexer);
1824 if (lex_match_id (lexer, "THRU"))
1826 if (lex_match_id (lexer, "HI"))
1827 *cat = cct_nrange (number, DBL_MAX);
1830 if (!lex_force_num (lexer))
1832 *cat = cct_nrange (number, lex_number (lexer));
1837 *cat = (struct ctables_category) {
1842 else if (lex_is_string (lexer))
1844 struct substring s = parse_substring (lexer, dict);
1845 if (lex_match_id (lexer, "THRU"))
1847 if (lex_match_id (lexer, "HI"))
1849 struct substring sr1 = { .string = NULL };
1850 *cat = cct_srange (s, sr1);
1854 if (!lex_force_string (lexer))
1859 struct substring sr1 = parse_substring (lexer, dict);
1860 *cat = cct_srange (s, sr1);
1864 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1866 else if (lex_match (lexer, T_AND))
1868 if (!lex_force_id (lexer))
1870 struct ctables_postcompute *pc = ctables_find_postcompute (
1871 ct, lex_tokcstr (lexer));
1874 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1875 msg_at (SE, loc, _("Unknown postcompute &%s."),
1876 lex_tokcstr (lexer));
1877 msg_location_destroy (loc);
1882 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1886 lex_error (lexer, _("Syntax error expecting category specification."));
1894 parse_category_string (struct msg_location *location,
1895 struct substring s, const struct dictionary *dict,
1896 enum fmt_type format, double *n)
1899 char *error = data_in (s, dict_get_encoding (dict), format,
1900 settings_get_fmt_settings (), &v, 0, NULL);
1903 msg_at (SE, location,
1904 _("Failed to parse category specification as format %s: %s."),
1905 fmt_name (format), error);
1914 static struct ctables_category *
1915 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1916 const struct ctables_pcexpr *e)
1918 struct ctables_category *best = NULL;
1919 size_t n_subtotals = 0;
1920 for (size_t i = 0; i < cats->n_cats; i++)
1922 struct ctables_category *cat = &cats->cats[i];
1925 case CTPO_CAT_NUMBER:
1926 if (cat->type == CCT_NUMBER && cat->number == e->number)
1930 case CTPO_CAT_STRING:
1931 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1935 case CTPO_CAT_NRANGE:
1936 if (cat->type == CCT_NRANGE
1937 && cat->nrange[0] == e->nrange[0]
1938 && cat->nrange[1] == e->nrange[1])
1942 case CTPO_CAT_SRANGE:
1943 if (cat->type == CCT_SRANGE
1944 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1945 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1949 case CTPO_CAT_MISSING:
1950 if (cat->type == CCT_MISSING)
1954 case CTPO_CAT_OTHERNM:
1955 if (cat->type == CCT_OTHERNM)
1959 case CTPO_CAT_SUBTOTAL:
1960 if (cat->type == CCT_SUBTOTAL)
1963 if (e->subtotal_index == n_subtotals)
1965 else if (e->subtotal_index == 0)
1970 case CTPO_CAT_TOTAL:
1971 if (cat->type == CCT_TOTAL)
1985 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1990 static struct ctables_category *
1991 ctables_find_category_for_postcompute (const struct dictionary *dict,
1992 const struct ctables_categories *cats,
1993 enum fmt_type parse_format,
1994 const struct ctables_pcexpr *e)
1996 if (parse_format != FMT_F)
1998 if (e->op == CTPO_CAT_STRING)
2001 if (!parse_category_string (e->location, e->string, dict,
2002 parse_format, &number))
2005 struct ctables_pcexpr e2 = {
2006 .op = CTPO_CAT_NUMBER,
2008 .location = e->location,
2010 return ctables_find_category_for_postcompute__ (cats, &e2);
2012 else if (e->op == CTPO_CAT_SRANGE)
2015 if (!e->srange[0].string)
2016 nrange[0] = -DBL_MAX;
2017 else if (!parse_category_string (e->location, e->srange[0], dict,
2018 parse_format, &nrange[0]))
2021 if (!e->srange[1].string)
2022 nrange[1] = DBL_MAX;
2023 else if (!parse_category_string (e->location, e->srange[1], dict,
2024 parse_format, &nrange[1]))
2027 struct ctables_pcexpr e2 = {
2028 .op = CTPO_CAT_NRANGE,
2029 .nrange = { nrange[0], nrange[1] },
2030 .location = e->location,
2032 return ctables_find_category_for_postcompute__ (cats, &e2);
2035 return ctables_find_category_for_postcompute__ (cats, e);
2038 static struct substring
2039 rtrim_value (const union value *v, const struct variable *var)
2041 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2042 var_get_width (var));
2043 ss_rtrim (&s, ss_cstr (" "));
2048 in_string_range (const union value *v, const struct variable *var,
2049 const struct substring *srange)
2051 struct substring s = rtrim_value (v, var);
2052 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2053 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2056 static const struct ctables_category *
2057 ctables_categories_match (const struct ctables_categories *c,
2058 const union value *v, const struct variable *var)
2060 if (var_is_numeric (var) && v->f == SYSMIS)
2063 const struct ctables_category *othernm = NULL;
2064 for (size_t i = c->n_cats; i-- > 0; )
2066 const struct ctables_category *cat = &c->cats[i];
2070 if (cat->number == v->f)
2075 if (ss_equals (cat->string, rtrim_value (v, var)))
2080 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2081 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2086 if (in_string_range (v, var, cat->srange))
2091 if (var_is_value_missing (var, v))
2095 case CCT_POSTCOMPUTE:
2110 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2113 case CCT_EXCLUDED_MISSING:
2118 return var_is_value_missing (var, v) ? NULL : othernm;
2121 static const struct ctables_category *
2122 ctables_categories_total (const struct ctables_categories *c)
2124 const struct ctables_category *first = &c->cats[0];
2125 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2126 return (first->type == CCT_TOTAL ? first
2127 : last->type == CCT_TOTAL ? last
2132 ctables_category_format_number (double number, const struct variable *var,
2135 struct pivot_value *pv = pivot_value_new_var_value (
2136 var, &(union value) { .f = number });
2137 pivot_value_format (pv, NULL, s);
2138 pivot_value_destroy (pv);
2142 ctables_category_format_string (struct substring string,
2143 const struct variable *var, struct string *out)
2145 int width = var_get_width (var);
2146 char *s = xmalloc (width);
2147 buf_copy_rpad (s, width, string.string, string.length, ' ');
2148 struct pivot_value *pv = pivot_value_new_var_value (
2149 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
2150 pivot_value_format (pv, NULL, out);
2151 pivot_value_destroy (pv);
2156 ctables_category_format_label (const struct ctables_category *cat,
2157 const struct variable *var,
2163 ctables_category_format_number (cat->number, var, s);
2167 ctables_category_format_string (cat->string, var, s);
2171 ctables_category_format_number (cat->nrange[0], var, s);
2172 ds_put_format (s, " THRU ");
2173 ctables_category_format_number (cat->nrange[1], var, s);
2177 ctables_category_format_string (cat->srange[0], var, s);
2178 ds_put_format (s, " THRU ");
2179 ctables_category_format_string (cat->srange[1], var, s);
2183 ds_put_cstr (s, "MISSING");
2187 ds_put_cstr (s, "OTHERNM");
2190 case CCT_POSTCOMPUTE:
2191 ds_put_format (s, "&%s", cat->pc->name);
2196 ds_put_cstr (s, cat->total_label);
2202 case CCT_EXCLUDED_MISSING:
2210 ctables_recursive_check_postcompute (struct dictionary *dict,
2211 const struct ctables_pcexpr *e,
2212 struct ctables_category *pc_cat,
2213 const struct ctables_categories *cats,
2214 const struct msg_location *cats_location)
2218 case CTPO_CAT_NUMBER:
2219 case CTPO_CAT_STRING:
2220 case CTPO_CAT_NRANGE:
2221 case CTPO_CAT_SRANGE:
2222 case CTPO_CAT_MISSING:
2223 case CTPO_CAT_OTHERNM:
2224 case CTPO_CAT_SUBTOTAL:
2225 case CTPO_CAT_TOTAL:
2227 struct ctables_category *cat = ctables_find_category_for_postcompute (
2228 dict, cats, pc_cat->parse_format, e);
2231 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2233 size_t n_subtotals = 0;
2234 for (size_t i = 0; i < cats->n_cats; i++)
2235 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2236 if (n_subtotals > 1)
2238 msg_at (SE, cats_location,
2239 ngettext ("These categories include %zu instance "
2240 "of SUBTOTAL or HSUBTOTAL, so references "
2241 "from computed categories must refer to "
2242 "subtotals by position, "
2243 "e.g. SUBTOTAL[1].",
2244 "These categories include %zu instances "
2245 "of SUBTOTAL or HSUBTOTAL, so references "
2246 "from computed categories must refer to "
2247 "subtotals by position, "
2248 "e.g. SUBTOTAL[1].",
2251 msg_at (SN, e->location,
2252 _("This is the reference that lacks a position."));
2257 msg_at (SE, pc_cat->location,
2258 _("Computed category &%s references a category not included "
2259 "in the category list."),
2261 msg_at (SN, e->location, _("This is the missing category."));
2262 if (e->op == CTPO_CAT_SUBTOTAL)
2263 msg_at (SN, cats_location,
2264 _("To fix the problem, add subtotals to the "
2265 "list of categories here."));
2266 else if (e->op == CTPO_CAT_TOTAL)
2267 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2268 "CATEGORIES specification."));
2270 msg_at (SN, cats_location,
2271 _("To fix the problem, add the missing category to the "
2272 "list of categories here."));
2275 if (pc_cat->pc->hide_source_cats)
2289 for (size_t i = 0; i < 2; i++)
2290 if (e->subs[i] && !ctables_recursive_check_postcompute (
2291 dict, e->subs[i], pc_cat, cats, cats_location))
2299 static struct pivot_value *
2300 ctables_postcompute_label (const struct ctables_categories *cats,
2301 const struct ctables_category *cat,
2302 const struct variable *var)
2304 struct substring in = ss_cstr (cat->pc->label);
2305 struct substring target = ss_cstr (")LABEL[");
2307 struct string out = DS_EMPTY_INITIALIZER;
2310 size_t chunk = ss_find_substring (in, target);
2311 if (chunk == SIZE_MAX)
2313 if (ds_is_empty (&out))
2314 return pivot_value_new_user_text (in.string, in.length);
2317 ds_put_substring (&out, in);
2318 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
2322 ds_put_substring (&out, ss_head (in, chunk));
2323 ss_advance (&in, chunk + target.length);
2325 struct substring idx_s;
2326 if (!ss_get_until (&in, ']', &idx_s))
2329 long int idx = strtol (idx_s.string, &tail, 10);
2330 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
2333 struct ctables_category *cat2 = &cats->cats[idx - 1];
2334 if (!ctables_category_format_label (cat2, var, &out))
2340 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
2343 static struct pivot_value *
2344 ctables_category_create_value_label (const struct ctables_categories *cats,
2345 const struct ctables_category *cat,
2346 const struct variable *var,
2347 const union value *value)
2349 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
2350 ? ctables_postcompute_label (cats, cat, var)
2351 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
2352 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
2353 : pivot_value_new_var_value (var, value));
2356 /* CTABLES variable nesting and stacking. */
2358 /* A nested sequence of variables, e.g. a > b > c. */
2361 struct variable **vars;
2365 size_t *areas[N_CTATS];
2366 size_t n_areas[N_CTATS];
2369 struct ctables_summary_spec_set specs[N_CSVS];
2372 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2373 struct ctables_stack
2375 struct ctables_nest *nests;
2380 ctables_nest_uninit (struct ctables_nest *nest)
2383 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2384 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2385 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2386 free (nest->areas[at]);
2390 ctables_stack_uninit (struct ctables_stack *stack)
2394 for (size_t i = 0; i < stack->n; i++)
2395 ctables_nest_uninit (&stack->nests[i]);
2396 free (stack->nests);
2400 static struct ctables_stack
2401 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2408 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2409 for (size_t i = 0; i < s0.n; i++)
2410 for (size_t j = 0; j < s1.n; j++)
2412 const struct ctables_nest *a = &s0.nests[i];
2413 const struct ctables_nest *b = &s1.nests[j];
2415 size_t allocate = a->n + b->n;
2416 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2418 for (size_t k = 0; k < a->n; k++)
2419 vars[n++] = a->vars[k];
2420 for (size_t k = 0; k < b->n; k++)
2421 vars[n++] = b->vars[k];
2422 assert (n == allocate);
2424 const struct ctables_nest *summary_src;
2425 if (!a->specs[CSV_CELL].var)
2427 else if (!b->specs[CSV_CELL].var)
2432 struct ctables_nest *new = &stack.nests[stack.n++];
2433 *new = (struct ctables_nest) {
2435 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2436 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2438 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2439 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2443 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2444 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2446 ctables_stack_uninit (&s0);
2447 ctables_stack_uninit (&s1);
2451 static struct ctables_stack
2452 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2454 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2455 for (size_t i = 0; i < s0.n; i++)
2456 stack.nests[stack.n++] = s0.nests[i];
2457 for (size_t i = 0; i < s1.n; i++)
2459 stack.nests[stack.n] = s1.nests[i];
2460 stack.nests[stack.n].group_head += s0.n;
2463 assert (stack.n == s0.n + s1.n);
2469 static struct ctables_stack
2470 var_fts (const struct ctables_axis *a)
2472 struct variable **vars = xmalloc (sizeof *vars);
2475 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2476 struct ctables_nest *nest = xmalloc (sizeof *nest);
2477 *nest = (struct ctables_nest) {
2480 .scale_idx = a->scale ? 0 : SIZE_MAX,
2481 .summary_idx = is_summary ? 0 : SIZE_MAX,
2484 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2486 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2487 nest->specs[sv].var = a->var;
2488 nest->specs[sv].is_scale = a->scale;
2490 return (struct ctables_stack) { .nests = nest, .n = 1 };
2493 static struct ctables_stack
2494 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2497 return (struct ctables_stack) { .n = 0 };
2505 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2506 enumerate_fts (axis_type, a->subs[1]));
2509 /* This should consider any of the scale variables found in the result to
2510 be linked to each other listwise for SMISSING=LISTWISE. */
2511 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2512 enumerate_fts (axis_type, a->subs[1]));
2518 /* CTABLES summary calculation. */
2520 union ctables_summary
2522 /* COUNT, VALIDN, TOTALN. */
2525 /* MINIMUM, MAXIMUM, RANGE. */
2532 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2533 struct moments1 *moments;
2535 /* MEDIAN, MODE, PTILE. */
2538 struct casewriter *writer;
2545 ctables_summary_init (union ctables_summary *s,
2546 const struct ctables_summary_spec *ss)
2548 switch (ss->function)
2551 case CTSF_areaPCT_COUNT:
2552 case CTSF_areaPCT_VALIDN:
2553 case CTSF_areaPCT_TOTALN:
2566 s->min = s->max = SYSMIS;
2571 case CTSF_areaPCT_SUM:
2572 s->moments = moments1_create (MOMENT_MEAN);
2578 s->moments = moments1_create (MOMENT_VARIANCE);
2585 struct caseproto *proto = caseproto_create ();
2586 proto = caseproto_add_width (proto, 0);
2587 proto = caseproto_add_width (proto, 0);
2589 struct subcase ordering;
2590 subcase_init (&ordering, 0, 0, SC_ASCEND);
2591 s->writer = sort_create_writer (&ordering, proto);
2592 subcase_uninit (&ordering);
2593 caseproto_unref (proto);
2603 ctables_summary_uninit (union ctables_summary *s,
2604 const struct ctables_summary_spec *ss)
2606 switch (ss->function)
2609 case CTSF_areaPCT_COUNT:
2610 case CTSF_areaPCT_VALIDN:
2611 case CTSF_areaPCT_TOTALN:
2630 case CTSF_areaPCT_SUM:
2631 moments1_destroy (s->moments);
2637 casewriter_destroy (s->writer);
2643 ctables_summary_add (union ctables_summary *s,
2644 const struct ctables_summary_spec *ss,
2645 const union value *value,
2646 bool is_missing, bool is_included,
2649 /* To determine whether a case is included in a given table for a particular
2650 kind of summary, consider the following charts for the variable being
2651 summarized. Only if "yes" appears is the case counted.
2653 Categorical variables: VALIDN other TOTALN
2654 Valid values in included categories yes yes yes
2655 Missing values in included categories --- yes yes
2656 Missing values in excluded categories --- --- yes
2657 Valid values in excluded categories --- --- ---
2659 Scale variables: VALIDN other TOTALN
2660 Valid value yes yes yes
2661 Missing value --- yes yes
2663 Missing values include both user- and system-missing. (The system-missing
2664 value is always in an excluded category.)
2666 One way to interpret the above table is that scale variables are like
2667 categorical variables in which all values are in included categories.
2669 switch (ss->function)
2672 case CTSF_areaPCT_TOTALN:
2677 case CTSF_areaPCT_COUNT:
2683 case CTSF_areaPCT_VALIDN:
2701 if (s->min == SYSMIS || value->f < s->min)
2703 if (s->max == SYSMIS || value->f > s->max)
2714 moments1_add (s->moments, value->f, weight);
2717 case CTSF_areaPCT_SUM:
2719 moments1_add (s->moments, value->f, weight);
2727 s->ovalid += weight;
2729 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2730 *case_num_rw_idx (c, 0) = value->f;
2731 *case_num_rw_idx (c, 1) = weight;
2732 casewriter_write (s->writer, c);
2739 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2740 union ctables_summary *s,
2741 const struct ctables_summary_spec *ss)
2743 switch (ss->function)
2749 return areas[ss->calc_area]->sequence;
2751 case CTSF_areaPCT_COUNT:
2753 const struct ctables_area *a = areas[ss->calc_area];
2754 double a_count = a->count[ss->weighting];
2755 return a_count ? s->count / a_count * 100 : SYSMIS;
2758 case CTSF_areaPCT_VALIDN:
2760 const struct ctables_area *a = areas[ss->calc_area];
2761 double a_valid = a->valid[ss->weighting];
2762 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2765 case CTSF_areaPCT_TOTALN:
2767 const struct ctables_area *a = areas[ss->calc_area];
2768 double a_total = a->total[ss->weighting];
2769 return a_total ? s->count / a_total * 100 : SYSMIS;
2784 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2789 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2795 double weight, variance;
2796 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2797 return calc_semean (variance, weight);
2803 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2804 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2809 double weight, mean;
2810 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2811 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2817 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2821 case CTSF_areaPCT_SUM:
2823 double weight, mean;
2824 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2825 if (weight == SYSMIS || mean == SYSMIS)
2828 const struct ctables_area *a = areas[ss->calc_area];
2829 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2830 double denom = sum->sum[ss->weighting];
2831 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2838 struct casereader *reader = casewriter_make_reader (s->writer);
2841 struct percentile *ptile = percentile_create (
2842 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2843 struct order_stats *os = &ptile->parent;
2844 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2845 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2846 statistic_destroy (&ptile->parent.parent);
2853 struct casereader *reader = casewriter_make_reader (s->writer);
2856 struct mode *mode = mode_create ();
2857 struct order_stats *os = &mode->parent;
2858 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2859 s->ovalue = mode->mode;
2860 statistic_destroy (&mode->parent.parent);
2868 /* CTABLES occurrences. */
2870 struct ctables_occurrence
2872 struct hmap_node node;
2877 ctables_add_occurrence (const struct variable *var,
2878 const union value *value,
2879 struct hmap *occurrences)
2881 int width = var_get_width (var);
2882 unsigned int hash = value_hash (value, width, 0);
2884 struct ctables_occurrence *o;
2885 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
2887 if (value_equal (value, &o->value, width))
2890 o = xmalloc (sizeof *o);
2891 value_clone (&o->value, value, width);
2892 hmap_insert (occurrences, &o->node, hash);
2897 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2898 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2899 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2900 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2905 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2906 all the axes (except the scalar variable, if any). */
2907 struct hmap_node node;
2908 struct ctables_section *section;
2910 /* The areas that contain this cell. */
2911 uint32_t omit_areas;
2912 struct ctables_area *areas[N_CTATS];
2917 enum ctables_summary_variant sv;
2919 struct ctables_cell_axis
2921 struct ctables_cell_value
2923 const struct ctables_category *category;
2931 union ctables_summary *summaries;
2934 struct ctables_section
2937 struct ctables_table *table;
2938 struct ctables_nest *nests[PIVOT_N_AXES];
2941 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2942 struct hmap cells; /* Contains "struct ctables_cell"s. */
2943 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2946 static void ctables_section_uninit (struct ctables_section *);
2948 struct ctables_table
2950 struct ctables *ctables;
2951 struct ctables_axis *axes[PIVOT_N_AXES];
2952 struct ctables_stack stacks[PIVOT_N_AXES];
2953 struct ctables_section *sections;
2955 enum pivot_axis_type summary_axis;
2956 struct ctables_summary_spec_set summary_specs;
2957 struct variable **sum_vars;
2960 enum pivot_axis_type slabels_axis;
2961 bool slabels_visible;
2963 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2965 Most commonly, label_axis[a] == a, and in particular we always have
2966 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2968 If ROWLABELS or COLLABELS is specified, then one of
2969 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2970 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2972 If any category labels are moved, then 'clabels_example' is one of the
2973 variables being moved (and it is otherwise NULL). All of the variables
2974 being moved have the same width, value labels, and categories, so this
2975 example variable can be used to find those out.
2977 The remaining members in this group are relevant only if category labels
2980 'clabels_values_map' holds a "struct ctables_value" for all the values
2981 that appear in all of the variables in the moved categories. It is
2982 accumulated as the data is read. Once the data is fully read, its
2983 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2985 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2986 enum pivot_axis_type clabels_from_axis;
2987 enum pivot_axis_type clabels_to_axis;
2988 int clabels_start_ofs, clabels_end_ofs;
2989 const struct variable *clabels_example;
2990 struct hmap clabels_values_map;
2991 struct ctables_value **clabels_values;
2992 size_t n_clabels_values;
2994 /* Indexed by variable dictionary index. */
2995 struct ctables_categories **categories;
2996 size_t n_categories;
3005 struct ctables_chisq *chisq;
3006 struct ctables_pairwise *pairwise;
3009 struct ctables_cell_sort_aux
3011 const struct ctables_nest *nest;
3012 enum pivot_axis_type a;
3016 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3018 const struct ctables_cell_sort_aux *aux = aux_;
3019 struct ctables_cell *const *ap = a_;
3020 struct ctables_cell *const *bp = b_;
3021 const struct ctables_cell *a = *ap;
3022 const struct ctables_cell *b = *bp;
3024 const struct ctables_nest *nest = aux->nest;
3025 for (size_t i = 0; i < nest->n; i++)
3026 if (i != nest->scale_idx)
3028 const struct variable *var = nest->vars[i];
3029 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3030 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3031 if (a_cv->category != b_cv->category)
3032 return a_cv->category > b_cv->category ? 1 : -1;
3034 const union value *a_val = &a_cv->value;
3035 const union value *b_val = &b_cv->value;
3036 switch (a_cv->category->type)
3042 case CCT_POSTCOMPUTE:
3043 case CCT_EXCLUDED_MISSING:
3044 /* Must be equal. */
3052 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3060 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3062 return a_cv->category->sort_ascending ? cmp : -cmp;
3068 const char *a_label = var_lookup_value_label (var, a_val);
3069 const char *b_label = var_lookup_value_label (var, b_val);
3075 cmp = strcmp (a_label, b_label);
3081 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3084 return a_cv->category->sort_ascending ? cmp : -cmp;
3095 static struct ctables_area *
3096 ctables_area_insert (struct ctables_cell *cell, enum ctables_area_type area)
3098 struct ctables_section *s = cell->section;
3100 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3102 const struct ctables_nest *nest = s->nests[a];
3103 for (size_t i = 0; i < nest->n_areas[area]; i++)
3105 size_t v_idx = nest->areas[area][i];
3106 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3107 hash = hash_pointer (cv->category, hash);
3108 if (cv->category->type != CCT_TOTAL
3109 && cv->category->type != CCT_SUBTOTAL
3110 && cv->category->type != CCT_POSTCOMPUTE)
3111 hash = value_hash (&cv->value,
3112 var_get_width (nest->vars[v_idx]), hash);
3116 struct ctables_area *a;
3117 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3119 const struct ctables_cell *df = a->example;
3120 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3122 const struct ctables_nest *nest = s->nests[a];
3123 for (size_t i = 0; i < nest->n_areas[area]; i++)
3125 size_t v_idx = nest->areas[area][i];
3126 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3127 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3128 if (cv1->category != cv2->category
3129 || (cv1->category->type != CCT_TOTAL
3130 && cv1->category->type != CCT_SUBTOTAL
3131 && cv1->category->type != CCT_POSTCOMPUTE
3132 && !value_equal (&cv1->value, &cv2->value,
3133 var_get_width (nest->vars[v_idx]))))
3142 struct ctables_sum *sums = (s->table->n_sum_vars
3143 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3146 a = xmalloc (sizeof *a);
3147 *a = (struct ctables_area) { .example = cell, .sums = sums };
3148 hmap_insert (&s->areas[area], &a->node, hash);
3152 static struct ctables_cell *
3153 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3154 const struct ctables_category **cats[PIVOT_N_AXES])
3157 enum ctables_summary_variant sv = CSV_CELL;
3158 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3160 const struct ctables_nest *nest = s->nests[a];
3161 for (size_t i = 0; i < nest->n; i++)
3162 if (i != nest->scale_idx)
3164 hash = hash_pointer (cats[a][i], hash);
3165 if (cats[a][i]->type != CCT_TOTAL
3166 && cats[a][i]->type != CCT_SUBTOTAL
3167 && cats[a][i]->type != CCT_POSTCOMPUTE)
3168 hash = value_hash (case_data (c, nest->vars[i]),
3169 var_get_width (nest->vars[i]), hash);
3175 struct ctables_cell *cell;
3176 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3178 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3180 const struct ctables_nest *nest = s->nests[a];
3181 for (size_t i = 0; i < nest->n; i++)
3182 if (i != nest->scale_idx
3183 && (cats[a][i] != cell->axes[a].cvs[i].category
3184 || (cats[a][i]->type != CCT_TOTAL
3185 && cats[a][i]->type != CCT_SUBTOTAL
3186 && cats[a][i]->type != CCT_POSTCOMPUTE
3187 && !value_equal (case_data (c, nest->vars[i]),
3188 &cell->axes[a].cvs[i].value,
3189 var_get_width (nest->vars[i])))))
3198 cell = xmalloc (sizeof *cell);
3202 cell->omit_areas = 0;
3203 cell->postcompute = false;
3204 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3206 const struct ctables_nest *nest = s->nests[a];
3207 cell->axes[a].cvs = (nest->n
3208 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3210 for (size_t i = 0; i < nest->n; i++)
3212 const struct ctables_category *cat = cats[a][i];
3213 const struct variable *var = nest->vars[i];
3214 const union value *value = case_data (c, var);
3215 if (i != nest->scale_idx)
3217 const struct ctables_category *subtotal = cat->subtotal;
3218 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3221 if (cat->type == CCT_TOTAL
3222 || cat->type == CCT_SUBTOTAL
3223 || cat->type == CCT_POSTCOMPUTE)
3227 case PIVOT_AXIS_COLUMN:
3228 cell->omit_areas |= ((1u << CTAT_TABLE) |
3229 (1u << CTAT_LAYER) |
3230 (1u << CTAT_LAYERCOL) |
3231 (1u << CTAT_SUBTABLE) |
3234 case PIVOT_AXIS_ROW:
3235 cell->omit_areas |= ((1u << CTAT_TABLE) |
3236 (1u << CTAT_LAYER) |
3237 (1u << CTAT_LAYERROW) |
3238 (1u << CTAT_SUBTABLE) |
3241 case PIVOT_AXIS_LAYER:
3242 cell->omit_areas |= ((1u << CTAT_TABLE) |
3243 (1u << CTAT_LAYER));
3247 if (cat->type == CCT_POSTCOMPUTE)
3248 cell->postcompute = true;
3251 cell->axes[a].cvs[i].category = cat;
3252 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3256 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3257 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3258 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3259 for (size_t i = 0; i < specs->n; i++)
3260 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3261 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3262 cell->areas[at] = ctables_area_insert (cell, at);
3263 hmap_insert (&s->cells, &cell->node, hash);
3268 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3270 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3275 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3276 const struct ctables_category **cats[PIVOT_N_AXES],
3277 bool is_included, double weight[N_CTWS])
3279 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3280 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3282 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3283 const union value *value = case_data (c, specs->var);
3284 bool is_missing = var_is_value_missing (specs->var, value);
3285 bool is_scale_missing
3286 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3288 for (size_t i = 0; i < specs->n; i++)
3289 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3290 is_scale_missing, is_included,
3291 weight[specs->specs[i].weighting]);
3292 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3293 if (!(cell->omit_areas && (1u << at)))
3295 struct ctables_area *a = cell->areas[at];
3297 add_weight (a->total, weight);
3299 add_weight (a->count, weight);
3302 add_weight (a->valid, weight);
3304 if (!is_scale_missing)
3305 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3307 const struct variable *var = s->table->sum_vars[i];
3308 double addend = case_num (c, var);
3309 if (!var_is_num_missing (var, addend))
3310 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3311 a->sums[i].sum[wt] += addend * weight[wt];
3318 recurse_totals (struct ctables_section *s, const struct ccase *c,
3319 const struct ctables_category **cats[PIVOT_N_AXES],
3320 bool is_included, double weight[N_CTWS],
3321 enum pivot_axis_type start_axis, size_t start_nest)
3323 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3325 const struct ctables_nest *nest = s->nests[a];
3326 for (size_t i = start_nest; i < nest->n; i++)
3328 if (i == nest->scale_idx)
3331 const struct variable *var = nest->vars[i];
3333 const struct ctables_category *total = ctables_categories_total (
3334 s->table->categories[var_get_dict_index (var)]);
3337 const struct ctables_category *save = cats[a][i];
3339 ctables_cell_add__ (s, c, cats, is_included, weight);
3340 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3349 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3350 const struct ctables_category **cats[PIVOT_N_AXES],
3351 bool is_included, double weight[N_CTWS],
3352 enum pivot_axis_type start_axis, size_t start_nest)
3354 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3356 const struct ctables_nest *nest = s->nests[a];
3357 for (size_t i = start_nest; i < nest->n; i++)
3359 if (i == nest->scale_idx)
3362 const struct ctables_category *save = cats[a][i];
3365 cats[a][i] = save->subtotal;
3366 ctables_cell_add__ (s, c, cats, is_included, weight);
3367 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3376 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3377 double weight[N_CTWS])
3379 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3380 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3381 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3382 const struct ctables_category **cats[PIVOT_N_AXES] =
3384 [PIVOT_AXIS_LAYER] = layer_cats,
3385 [PIVOT_AXIS_ROW] = row_cats,
3386 [PIVOT_AXIS_COLUMN] = column_cats,
3389 bool is_included = true;
3391 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3393 const struct ctables_nest *nest = s->nests[a];
3394 for (size_t i = 0; i < nest->n; i++)
3395 if (i != nest->scale_idx)
3397 const struct variable *var = nest->vars[i];
3398 const union value *value = case_data (c, var);
3400 cats[a][i] = ctables_categories_match (
3401 s->table->categories[var_get_dict_index (var)], value, var);
3404 if (i != nest->summary_idx)
3407 if (!var_is_value_missing (var, value))
3410 static const struct ctables_category cct_excluded_missing = {
3411 .type = CCT_EXCLUDED_MISSING,
3414 cats[a][i] = &cct_excluded_missing;
3415 is_included = false;
3421 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3423 const struct ctables_nest *nest = s->nests[a];
3424 for (size_t i = 0; i < nest->n; i++)
3425 if (i != nest->scale_idx)
3427 const struct variable *var = nest->vars[i];
3428 const union value *value = case_data (c, var);
3429 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3433 ctables_cell_add__ (s, c, cats, is_included, weight);
3434 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3435 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3438 struct ctables_value
3440 struct hmap_node node;
3445 static struct ctables_value *
3446 ctables_value_find__ (const struct ctables_table *t, const union value *value,
3447 int width, unsigned int hash)
3449 struct ctables_value *clv;
3450 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3451 hash, &t->clabels_values_map)
3452 if (value_equal (value, &clv->value, width))
3458 ctables_value_insert (struct ctables_table *t, const union value *value,
3461 unsigned int hash = value_hash (value, width, 0);
3462 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3465 clv = xmalloc (sizeof *clv);
3466 value_clone (&clv->value, value, width);
3467 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3471 static const struct ctables_value *
3472 ctables_value_find (const struct ctables_cell *cell)
3474 const struct ctables_section *s = cell->section;
3475 const struct ctables_table *t = s->table;
3476 if (!t->clabels_example)
3479 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3480 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3481 const union value *value
3482 = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3483 int width = var_get_width (var);
3484 const struct ctables_value *ctv = ctables_value_find__ (
3485 t, value, width, value_hash (value, width, 0));
3486 assert (ctv != NULL);
3491 compare_ctables_values_3way (const void *a_, const void *b_, const void *width_)
3493 const struct ctables_value *const *ap = a_;
3494 const struct ctables_value *const *bp = b_;
3495 const struct ctables_value *a = *ap;
3496 const struct ctables_value *b = *bp;
3497 const int *width = width_;
3498 return value_compare_3way (&a->value, &b->value, *width);
3502 ctables_sort_clabels_values (struct ctables_table *t)
3504 const struct variable *v0 = t->clabels_example;
3505 int width = var_get_width (v0);
3507 size_t i0 = var_get_dict_index (v0);
3508 struct ctables_categories *c0 = t->categories[i0];
3509 if (t->show_empty[i0])
3511 const struct val_labs *val_labs = var_get_value_labels (v0);
3512 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
3513 vl = val_labs_next (val_labs, vl))
3514 if (ctables_categories_match (c0, &vl->value, v0))
3515 ctables_value_insert (t, &vl->value, width);
3518 size_t n = hmap_count (&t->clabels_values_map);
3519 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
3521 struct ctables_value *clv;
3523 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
3524 t->clabels_values[i++] = clv;
3525 t->n_clabels_values = n;
3528 sort (t->clabels_values, n, sizeof *t->clabels_values,
3529 compare_ctables_values_3way, &width);
3531 for (size_t i = 0; i < n; i++)
3532 t->clabels_values[i]->leaf = i;
3537 const struct dictionary *dict;
3538 struct pivot_table_look *look;
3540 /* For CTEF_* formats. */
3541 struct fmt_settings ctables_formats;
3543 /* If this is NULL, zeros are displayed using the normal print format.
3544 Otherwise, this string is displayed. */
3547 /* If this is NULL, missing values are displayed using the normal print
3548 format. Otherwise, this string is displayed. */
3551 /* Indexed by variable dictionary index. */
3552 enum ctables_vlabel *vlabels;
3554 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
3556 bool mrsets_count_duplicates; /* MRSETS. */
3557 bool smissing_listwise; /* SMISSING. */
3558 struct variable *e_weight; /* WEIGHT. */
3559 int hide_threshold; /* HIDESMALLCOUNTS. */
3561 struct ctables_table **tables;
3566 ctpo_add (double a, double b)
3572 ctpo_sub (double a, double b)
3578 ctpo_mul (double a, double b)
3584 ctpo_div (double a, double b)
3586 return b ? a / b : SYSMIS;
3590 ctpo_pow (double a, double b)
3592 int save_errno = errno;
3594 double result = pow (a, b);
3602 ctpo_neg (double a, double b UNUSED)
3607 struct ctables_pcexpr_evaluate_ctx
3609 const struct ctables_cell *cell;
3610 const struct ctables_section *section;
3611 const struct ctables_categories *cats;
3612 enum pivot_axis_type pc_a;
3615 enum fmt_type parse_format;
3618 static double ctables_pcexpr_evaluate (
3619 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3622 ctables_pcexpr_evaluate_nonterminal (
3623 const struct ctables_pcexpr_evaluate_ctx *ctx,
3624 const struct ctables_pcexpr *e, size_t n_args,
3625 double evaluate (double, double))
3627 double args[2] = { 0, 0 };
3628 for (size_t i = 0; i < n_args; i++)
3630 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3631 if (!isfinite (args[i]) || args[i] == SYSMIS)
3634 return evaluate (args[0], args[1]);
3638 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3639 const struct ctables_cell_value *pc_cv)
3641 const struct ctables_section *s = ctx->section;
3644 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3646 const struct ctables_nest *nest = s->nests[a];
3647 for (size_t i = 0; i < nest->n; i++)
3648 if (i != nest->scale_idx)
3650 const struct ctables_cell_value *cv
3651 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3652 : &ctx->cell->axes[a].cvs[i]);
3653 hash = hash_pointer (cv->category, hash);
3654 if (cv->category->type != CCT_TOTAL
3655 && cv->category->type != CCT_SUBTOTAL
3656 && cv->category->type != CCT_POSTCOMPUTE)
3657 hash = value_hash (&cv->value,
3658 var_get_width (nest->vars[i]), hash);
3662 struct ctables_cell *tc;
3663 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3665 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3667 const struct ctables_nest *nest = s->nests[a];
3668 for (size_t i = 0; i < nest->n; i++)
3669 if (i != nest->scale_idx)
3671 const struct ctables_cell_value *p_cv
3672 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3673 : &ctx->cell->axes[a].cvs[i]);
3674 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3675 if (p_cv->category != t_cv->category
3676 || (p_cv->category->type != CCT_TOTAL
3677 && p_cv->category->type != CCT_SUBTOTAL
3678 && p_cv->category->type != CCT_POSTCOMPUTE
3679 && !value_equal (&p_cv->value,
3681 var_get_width (nest->vars[i]))))
3693 const struct ctables_table *t = s->table;
3694 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3695 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3696 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
3697 &specs->specs[ctx->summary_idx]);
3701 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3702 const struct ctables_pcexpr *e)
3709 case CTPO_CAT_NRANGE:
3710 case CTPO_CAT_SRANGE:
3711 case CTPO_CAT_MISSING:
3712 case CTPO_CAT_OTHERNM:
3714 struct ctables_cell_value cv = {
3715 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3717 assert (cv.category != NULL);
3719 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3720 const struct ctables_occurrence *o;
3723 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3724 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3725 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3727 cv.value = o->value;
3728 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3733 case CTPO_CAT_NUMBER:
3734 case CTPO_CAT_SUBTOTAL:
3735 case CTPO_CAT_TOTAL:
3737 struct ctables_cell_value cv = {
3738 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3739 .value = { .f = e->number },
3741 assert (cv.category != NULL);
3742 return ctables_pcexpr_evaluate_category (ctx, &cv);
3745 case CTPO_CAT_STRING:
3747 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3749 if (width > e->string.length)
3751 s = xmalloc (width);
3752 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3755 const struct ctables_category *category
3756 = ctables_find_category_for_postcompute (
3757 ctx->section->table->ctables->dict,
3758 ctx->cats, ctx->parse_format, e);
3759 assert (category != NULL);
3761 struct ctables_cell_value cv = { .category = category };
3762 if (category->type == CCT_NUMBER)
3763 cv.value.f = category->number;
3764 else if (category->type == CCT_STRING)
3765 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3769 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3775 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3778 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3781 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3784 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3787 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3790 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3796 static const struct ctables_category *
3797 ctables_cell_postcompute (const struct ctables_section *s,
3798 const struct ctables_cell *cell,
3799 enum pivot_axis_type *pc_a_p,
3802 assert (cell->postcompute);
3803 const struct ctables_category *pc_cat = NULL;
3804 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3805 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3807 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3808 if (cv->category->type == CCT_POSTCOMPUTE)
3812 /* Multiple postcomputes cross each other. The value is
3817 pc_cat = cv->category;
3821 *pc_a_idx_p = pc_a_idx;
3825 assert (pc_cat != NULL);
3830 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3831 const struct ctables_cell *cell,
3832 const struct ctables_summary_spec *ss,
3833 struct fmt_spec *format,
3834 bool *is_ctables_format,
3837 enum pivot_axis_type pc_a = 0;
3838 size_t pc_a_idx = 0;
3839 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3840 s, cell, &pc_a, &pc_a_idx);
3844 const struct ctables_postcompute *pc = pc_cat->pc;
3847 for (size_t i = 0; i < pc->specs->n; i++)
3849 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3850 if (ss->function == ss2->function
3851 && ss->weighting == ss2->weighting
3852 && ss->calc_area == ss2->calc_area
3853 && ss->percentile == ss2->percentile)
3855 *format = ss2->format;
3856 *is_ctables_format = ss2->is_ctables_format;
3862 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3863 const struct ctables_categories *cats = s->table->categories[
3864 var_get_dict_index (var)];
3865 struct ctables_pcexpr_evaluate_ctx ctx = {
3870 .pc_a_idx = pc_a_idx,
3871 .summary_idx = summary_idx,
3872 .parse_format = pc_cat->parse_format,
3874 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3877 /* Chi-square test (SIGTEST). */
3878 struct ctables_chisq
3881 bool include_mrsets;
3885 /* Pairwise comparison test (COMPARETEST). */
3886 struct ctables_pairwise
3888 enum { PROP, MEAN } type;
3890 bool include_mrsets;
3891 bool meansvariance_allcats;
3893 enum { BONFERRONI = 1, BH } adjust;
3902 parse_col_width (struct lexer *lexer, const char *name, double *width)
3904 lex_match (lexer, T_EQUALS);
3905 if (lex_match_id (lexer, "DEFAULT"))
3907 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
3909 *width = lex_number (lexer);
3919 parse_bool (struct lexer *lexer, bool *b)
3921 if (lex_match_id (lexer, "NO"))
3923 else if (lex_match_id (lexer, "YES"))
3927 lex_error_expecting (lexer, "YES", "NO");
3934 ctables_chisq_destroy (struct ctables_chisq *chisq)
3940 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
3946 ctables_table_destroy (struct ctables_table *t)
3951 for (size_t i = 0; i < t->n_sections; i++)
3952 ctables_section_uninit (&t->sections[i]);
3955 for (size_t i = 0; i < t->n_categories; i++)
3956 ctables_categories_unref (t->categories[i]);
3957 free (t->categories);
3958 free (t->show_empty);
3960 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3962 ctables_axis_destroy (t->axes[a]);
3963 ctables_stack_uninit (&t->stacks[a]);
3965 free (t->summary_specs.specs);
3967 struct ctables_value *ctv, *next_ctv;
3968 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
3969 &t->clabels_values_map)
3971 value_destroy (&ctv->value, var_get_width (t->clabels_example));
3972 hmap_delete (&t->clabels_values_map, &ctv->node);
3975 hmap_destroy (&t->clabels_values_map);
3976 free (t->clabels_values);
3982 ctables_chisq_destroy (t->chisq);
3983 ctables_pairwise_destroy (t->pairwise);
3988 ctables_destroy (struct ctables *ct)
3993 struct ctables_postcompute *pc, *next_pc;
3994 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
3998 msg_location_destroy (pc->location);
3999 ctables_pcexpr_destroy (pc->expr);
4003 ctables_summary_spec_set_uninit (pc->specs);
4006 hmap_delete (&ct->postcomputes, &pc->hmap_node);
4009 hmap_destroy (&ct->postcomputes);
4011 fmt_settings_uninit (&ct->ctables_formats);
4012 pivot_table_look_unref (ct->look);
4016 for (size_t i = 0; i < ct->n_tables; i++)
4017 ctables_table_destroy (ct->tables[i]);
4023 all_strings (struct variable **vars, size_t n_vars,
4024 const struct ctables_category *cat)
4026 for (size_t j = 0; j < n_vars; j++)
4027 if (var_is_numeric (vars[j]))
4029 msg_at (SE, cat->location,
4030 _("This category specification may be applied only to string "
4031 "variables, but this subcommand tries to apply it to "
4032 "numeric variable %s."),
4033 var_get_name (vars[j]));
4040 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
4041 struct ctables *ct, struct ctables_table *t)
4043 if (!lex_force_match_id (lexer, "VARIABLES"))
4045 lex_match (lexer, T_EQUALS);
4047 struct variable **vars;
4049 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
4052 struct fmt_spec common_format = var_get_print_format (vars[0]);
4053 bool has_common_format = true;
4054 for (size_t i = 1; i < n_vars; i++)
4056 struct fmt_spec f = var_get_print_format (vars[i]);
4057 if (f.type != common_format.type)
4059 has_common_format = false;
4064 = (has_common_format
4065 && (fmt_get_category (common_format.type)
4066 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
4068 struct ctables_categories *c = xmalloc (sizeof *c);
4069 *c = (struct ctables_categories) { .n_refs = 1 };
4071 bool set_categories = false;
4073 size_t allocated_cats = 0;
4074 int cats_start_ofs = -1;
4075 int cats_end_ofs = -1;
4076 if (lex_match (lexer, T_LBRACK))
4078 set_categories = true;
4079 cats_start_ofs = lex_ofs (lexer);
4082 if (c->n_cats >= allocated_cats)
4083 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4085 int start_ofs = lex_ofs (lexer);
4086 struct ctables_category *cat = &c->cats[c->n_cats];
4087 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
4089 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4092 lex_match (lexer, T_COMMA);
4094 while (!lex_match (lexer, T_RBRACK));
4095 cats_end_ofs = lex_ofs (lexer) - 1;
4098 struct ctables_category cat = {
4100 .include_missing = false,
4101 .sort_ascending = true,
4103 bool show_totals = false;
4104 char *total_label = NULL;
4105 bool totals_before = false;
4106 int key_start_ofs = 0;
4107 int key_end_ofs = 0;
4108 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4110 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
4112 set_categories = true;
4113 lex_match (lexer, T_EQUALS);
4114 if (lex_match_id (lexer, "A"))
4115 cat.sort_ascending = true;
4116 else if (lex_match_id (lexer, "D"))
4117 cat.sort_ascending = false;
4120 lex_error_expecting (lexer, "A", "D");
4124 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
4126 set_categories = true;
4127 key_start_ofs = lex_ofs (lexer) - 1;
4128 lex_match (lexer, T_EQUALS);
4129 if (lex_match_id (lexer, "VALUE"))
4130 cat.type = CCT_VALUE;
4131 else if (lex_match_id (lexer, "LABEL"))
4132 cat.type = CCT_LABEL;
4135 cat.type = CCT_FUNCTION;
4136 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
4137 &cat.weighting, &cat.area))
4140 if (lex_match (lexer, T_LPAREN))
4142 cat.sort_var = parse_variable (lexer, dict);
4146 if (cat.sort_function == CTSF_PTILE)
4148 lex_match (lexer, T_COMMA);
4149 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4151 cat.percentile = lex_number (lexer);
4155 if (!lex_force_match (lexer, T_RPAREN))
4158 else if (ctables_function_availability (cat.sort_function)
4161 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
4165 key_end_ofs = lex_ofs (lexer) - 1;
4167 if (cat.type == CCT_FUNCTION)
4169 lex_ofs_error (lexer, key_start_ofs, key_end_ofs,
4170 _("Data-dependent sorting is not implemented."));
4174 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
4176 set_categories = true;
4177 lex_match (lexer, T_EQUALS);
4178 if (lex_match_id (lexer, "INCLUDE"))
4179 cat.include_missing = true;
4180 else if (lex_match_id (lexer, "EXCLUDE"))
4181 cat.include_missing = false;
4184 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4188 else if (lex_match_id (lexer, "TOTAL"))
4190 set_categories = true;
4191 lex_match (lexer, T_EQUALS);
4192 if (!parse_bool (lexer, &show_totals))
4195 else if (lex_match_id (lexer, "LABEL"))
4197 lex_match (lexer, T_EQUALS);
4198 if (!lex_force_string (lexer))
4201 total_label = ss_xstrdup (lex_tokss (lexer));
4204 else if (lex_match_id (lexer, "POSITION"))
4206 lex_match (lexer, T_EQUALS);
4207 if (lex_match_id (lexer, "BEFORE"))
4208 totals_before = true;
4209 else if (lex_match_id (lexer, "AFTER"))
4210 totals_before = false;
4213 lex_error_expecting (lexer, "BEFORE", "AFTER");
4217 else if (lex_match_id (lexer, "EMPTY"))
4219 lex_match (lexer, T_EQUALS);
4222 if (lex_match_id (lexer, "INCLUDE"))
4224 else if (lex_match_id (lexer, "EXCLUDE"))
4228 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4232 for (size_t i = 0; i < n_vars; i++)
4233 t->show_empty[var_get_dict_index (vars[i])] = show_empty;
4238 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
4239 "TOTAL", "LABEL", "POSITION", "EMPTY");
4241 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
4249 cat.location = lex_ofs_location (lexer, key_start_ofs, key_end_ofs);
4251 if (c->n_cats >= allocated_cats)
4252 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4253 c->cats[c->n_cats++] = cat;
4258 if (c->n_cats >= allocated_cats)
4259 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4261 struct ctables_category *totals;
4264 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
4265 totals = &c->cats[0];
4268 totals = &c->cats[c->n_cats];
4271 *totals = (struct ctables_category) {
4273 .total_label = total_label ? total_label : xstrdup (_("Total")),
4277 struct ctables_category *subtotal = NULL;
4278 for (size_t i = totals_before ? 0 : c->n_cats;
4279 totals_before ? i < c->n_cats : i-- > 0;
4280 totals_before ? i++ : 0)
4282 struct ctables_category *cat = &c->cats[i];
4291 cat->subtotal = subtotal;
4294 case CCT_POSTCOMPUTE:
4305 case CCT_EXCLUDED_MISSING:
4310 if (cats_start_ofs != -1)
4312 for (size_t i = 0; i < c->n_cats; i++)
4314 struct ctables_category *cat = &c->cats[i];
4317 case CCT_POSTCOMPUTE:
4318 cat->parse_format = parse_strings ? common_format.type : FMT_F;
4319 struct msg_location *cats_location
4320 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
4321 bool ok = ctables_recursive_check_postcompute (
4322 dict, cat->pc->expr, cat, c, cats_location);
4323 msg_location_destroy (cats_location);
4330 for (size_t j = 0; j < n_vars; j++)
4331 if (var_is_alpha (vars[j]))
4333 msg_at (SE, cat->location,
4334 _("This category specification may be applied "
4335 "only to numeric variables, but this "
4336 "subcommand tries to apply it to string "
4338 var_get_name (vars[j]));
4347 if (!parse_category_string (cat->location, cat->string, dict,
4348 common_format.type, &n))
4351 ss_dealloc (&cat->string);
4353 cat->type = CCT_NUMBER;
4356 else if (!all_strings (vars, n_vars, cat))
4365 if (!cat->srange[0].string)
4367 else if (!parse_category_string (cat->location,
4368 cat->srange[0], dict,
4369 common_format.type, &n[0]))
4372 if (!cat->srange[1].string)
4374 else if (!parse_category_string (cat->location,
4375 cat->srange[1], dict,
4376 common_format.type, &n[1]))
4379 ss_dealloc (&cat->srange[0]);
4380 ss_dealloc (&cat->srange[1]);
4382 cat->type = CCT_NRANGE;
4383 cat->nrange[0] = n[0];
4384 cat->nrange[1] = n[1];
4386 else if (!all_strings (vars, n_vars, cat))
4397 case CCT_EXCLUDED_MISSING:
4404 for (size_t i = 0; i < n_vars; i++)
4406 struct ctables_categories **cp
4407 = &t->categories[var_get_dict_index (vars[i])];
4408 ctables_categories_unref (*cp);
4413 ctables_categories_unref (c);
4418 ctables_categories_unref (c);
4426 const struct ctables_summary_spec_set *set;
4431 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4433 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4434 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4435 if (as->function != bs->function)
4436 return as->function > bs->function ? 1 : -1;
4437 else if (as->weighting != bs->weighting)
4438 return as->weighting > bs->weighting ? 1 : -1;
4439 else if (as->calc_area != bs->calc_area)
4440 return as->calc_area > bs->calc_area ? 1 : -1;
4441 else if (as->percentile != bs->percentile)
4442 return as->percentile < bs->percentile ? 1 : -1;
4444 const char *as_label = as->label ? as->label : "";
4445 const char *bs_label = bs->label ? bs->label : "";
4446 return strcmp (as_label, bs_label);
4450 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4451 size_t ix[PIVOT_N_AXES])
4453 if (a < PIVOT_N_AXES)
4455 size_t limit = MAX (t->stacks[a].n, 1);
4456 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4457 ctables_table_add_section (t, a + 1, ix);
4461 struct ctables_section *s = &t->sections[t->n_sections++];
4462 *s = (struct ctables_section) {
4464 .cells = HMAP_INITIALIZER (s->cells),
4466 for (a = 0; a < PIVOT_N_AXES; a++)
4469 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4471 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4472 for (size_t i = 0; i < nest->n; i++)
4473 hmap_init (&s->occurrences[a][i]);
4475 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4476 hmap_init (&s->areas[at]);
4481 ctables_format (double d, struct fmt_spec format,
4482 const struct fmt_settings *settings)
4484 const union value v = { .f = d };
4485 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4487 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4488 produce the results we want for negative numbers, putting the negative
4489 sign in the wrong spot, before the prefix instead of after it. We can't,
4490 in fact, produce the desired results using a custom-currency
4491 specification. Instead, we postprocess the output, moving the negative
4494 NEQUAL: "-N=3" => "N=-3"
4495 PAREN: "-(3)" => "(-3)"
4496 PCTPAREN: "-(3%)" => "(-3%)"
4498 This transformation doesn't affect NEGPAREN. */
4499 char *minus_src = strchr (s, '-');
4500 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4502 char *n_equals = strstr (s, "N=");
4503 char *lparen = strchr (s, '(');
4504 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4506 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4512 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4514 for (size_t i = 0; i < t->stacks[a].n; i++)
4516 struct ctables_nest *nest = &t->stacks[a].nests[i];
4517 if (nest->n != 1 || nest->scale_idx != 0)
4520 enum ctables_vlabel vlabel
4521 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4522 if (vlabel != CTVL_NONE)
4529 compare_ints_3way (int a, int b)
4531 return a < b ? -1 : a > b;
4535 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
4536 const void *aux UNUSED)
4538 struct ctables_cell *const *ap = a_;
4539 struct ctables_cell *const *bp = b_;
4540 const struct ctables_cell *a = *ap;
4541 const struct ctables_cell *b = *bp;
4549 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
4551 int cmp = compare_ints_3way (a->axes[axis].leaf, b->axes[axis].leaf);
4556 const struct ctables_value *a_ctv = ctables_value_find (a);
4557 const struct ctables_value *b_ctv = ctables_value_find (b);
4560 int cmp = compare_ints_3way (a_ctv->leaf, b_ctv->leaf);
4565 assert (!a_ctv && !b_ctv);
4570 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4572 struct pivot_table *pt = pivot_table_create__ (
4574 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4575 : pivot_value_new_text (N_("Custom Tables"))),
4578 pivot_table_set_caption (
4579 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4581 pivot_table_set_corner_text (
4582 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4584 bool summary_dimension = (t->summary_axis != t->slabels_axis
4585 || (!t->slabels_visible
4586 && t->summary_specs.n > 1));
4587 if (summary_dimension)
4589 struct pivot_dimension *d = pivot_dimension_create (
4590 pt, t->slabels_axis, N_("Statistics"));
4591 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4592 if (!t->slabels_visible)
4593 d->hide_all_labels = true;
4594 for (size_t i = 0; i < specs->n; i++)
4595 pivot_category_create_leaf (
4596 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4599 bool categories_dimension = t->clabels_example != NULL;
4600 if (categories_dimension)
4602 struct pivot_dimension *d = pivot_dimension_create (
4603 pt, t->label_axis[t->clabels_from_axis],
4604 t->clabels_from_axis == PIVOT_AXIS_ROW
4605 ? N_("Row Categories")
4606 : N_("Column Categories"));
4607 const struct variable *var = t->clabels_example;
4608 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4609 for (size_t i = 0; i < t->n_clabels_values; i++)
4611 const struct ctables_value *value = t->clabels_values[i];
4612 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4613 assert (cat != NULL);
4614 pivot_category_create_leaf (
4615 d->root, ctables_category_create_value_label (c, cat,
4621 pivot_table_set_look (pt, ct->look);
4622 struct pivot_dimension *d[PIVOT_N_AXES];
4623 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4625 static const char *names[] = {
4626 [PIVOT_AXIS_ROW] = N_("Rows"),
4627 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4628 [PIVOT_AXIS_LAYER] = N_("Layers"),
4630 d[a] = (t->axes[a] || a == t->summary_axis
4631 ? pivot_dimension_create (pt, a, names[a])
4636 assert (t->axes[a]);
4638 for (size_t i = 0; i < t->stacks[a].n; i++)
4640 struct ctables_nest *nest = &t->stacks[a].nests[i];
4641 struct ctables_section **sections = xnmalloc (t->n_sections,
4643 size_t n_sections = 0;
4645 size_t n_total_cells = 0;
4646 size_t max_depth = 0;
4647 for (size_t j = 0; j < t->n_sections; j++)
4648 if (t->sections[j].nests[a] == nest)
4650 struct ctables_section *s = &t->sections[j];
4651 sections[n_sections++] = s;
4652 n_total_cells += hmap_count (&s->cells);
4654 size_t depth = s->nests[a]->n;
4655 max_depth = MAX (depth, max_depth);
4658 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4660 size_t n_sorted = 0;
4662 for (size_t j = 0; j < n_sections; j++)
4664 struct ctables_section *s = sections[j];
4666 struct ctables_cell *cell;
4667 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4669 sorted[n_sorted++] = cell;
4670 assert (n_sorted <= n_total_cells);
4673 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4674 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4676 struct ctables_level
4678 enum ctables_level_type
4680 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4681 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4682 CTL_SUMMARY, /* Summary functions. */
4686 enum settings_value_show vlabel; /* CTL_VAR only. */
4689 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4690 size_t n_levels = 0;
4691 for (size_t k = 0; k < nest->n; k++)
4693 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4694 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4696 if (vlabel != CTVL_NONE)
4698 levels[n_levels++] = (struct ctables_level) {
4700 .vlabel = (enum settings_value_show) vlabel,
4705 if (nest->scale_idx != k
4706 && (k != nest->n - 1 || t->label_axis[a] == a))
4708 levels[n_levels++] = (struct ctables_level) {
4709 .type = CTL_CATEGORY,
4715 if (!summary_dimension && a == t->slabels_axis)
4717 levels[n_levels++] = (struct ctables_level) {
4718 .type = CTL_SUMMARY,
4719 .var_idx = SIZE_MAX,
4723 /* Pivot categories:
4725 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4726 - category for nest->vars[0], if nest->scale_idx != 0
4727 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4728 - category for nest->vars[1], if nest->scale_idx != 1
4730 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4731 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4732 - summary function, if 'a == t->slabels_axis && a ==
4735 Additional dimensions:
4737 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4739 - If 't->label_axis[b] == a' for some 'b != a', add a category
4744 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4746 for (size_t j = 0; j < n_sorted; j++)
4748 struct ctables_cell *cell = sorted[j];
4749 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4751 size_t n_common = 0;
4754 for (; n_common < n_levels; n_common++)
4756 const struct ctables_level *level = &levels[n_common];
4757 if (level->type == CTL_CATEGORY)
4759 size_t var_idx = level->var_idx;
4760 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4761 if (prev->axes[a].cvs[var_idx].category != c)
4763 else if (c->type != CCT_SUBTOTAL
4764 && c->type != CCT_TOTAL
4765 && c->type != CCT_POSTCOMPUTE
4766 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4767 &cell->axes[a].cvs[var_idx].value,
4768 var_get_type (nest->vars[var_idx])))
4774 for (size_t k = n_common; k < n_levels; k++)
4776 const struct ctables_level *level = &levels[k];
4777 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4778 if (level->type == CTL_SUMMARY)
4780 assert (k == n_levels - 1);
4782 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4783 for (size_t m = 0; m < specs->n; m++)
4785 int leaf = pivot_category_create_leaf (
4786 parent, ctables_summary_label (&specs->specs[m],
4794 const struct variable *var = nest->vars[level->var_idx];
4795 struct pivot_value *label;
4796 if (level->type == CTL_VAR)
4798 label = pivot_value_new_variable (var);
4799 label->variable.show = level->vlabel;
4801 else if (level->type == CTL_CATEGORY)
4803 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4804 label = ctables_category_create_value_label (
4805 t->categories[var_get_dict_index (var)],
4806 cv->category, var, &cv->value);
4811 if (k == n_levels - 1)
4812 prev_leaf = pivot_category_create_leaf (parent, label);
4814 groups[k] = pivot_category_create_group__ (parent, label);
4818 cell->axes[a].leaf = prev_leaf;
4827 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4831 size_t n_total_cells = 0;
4832 for (size_t j = 0; j < t->n_sections; j++)
4833 n_total_cells += hmap_count (&t->sections[j].cells);
4835 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4836 size_t n_sorted = 0;
4837 for (size_t j = 0; j < t->n_sections; j++)
4839 const struct ctables_section *s = &t->sections[j];
4840 struct ctables_cell *cell;
4841 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4843 sorted[n_sorted++] = cell;
4845 assert (n_sorted <= n_total_cells);
4846 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4848 size_t ids[N_CTATS];
4849 memset (ids, 0, sizeof ids);
4850 for (size_t j = 0; j < n_sorted; j++)
4852 struct ctables_cell *cell = sorted[j];
4853 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4855 struct ctables_area *area = cell->areas[at];
4856 if (!area->sequence)
4857 area->sequence = ++ids[at];
4864 for (size_t i = 0; i < t->n_sections; i++)
4866 struct ctables_section *s = &t->sections[i];
4868 struct ctables_cell *cell;
4869 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4874 const struct ctables_value *ctv = ctables_value_find (cell);
4875 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4876 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4877 for (size_t j = 0; j < specs->n; j++)
4880 size_t n_dindexes = 0;
4882 if (summary_dimension)
4883 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4886 dindexes[n_dindexes++] = ctv->leaf;
4888 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4891 int leaf = cell->axes[a].leaf;
4892 if (a == t->summary_axis && !summary_dimension)
4893 leaf += specs->specs[j].axis_idx;
4894 dindexes[n_dindexes++] = leaf;
4897 const struct ctables_summary_spec *ss = &specs->specs[j];
4899 struct fmt_spec format = specs->specs[j].format;
4900 bool is_ctables_format = ss->is_ctables_format;
4901 double d = (cell->postcompute
4902 ? ctables_cell_calculate_postcompute (
4903 s, cell, ss, &format, &is_ctables_format, j)
4904 : ctables_summary_value (cell->areas,
4905 &cell->summaries[j], ss));
4907 struct pivot_value *value;
4908 if (ct->hide_threshold != 0
4909 && d < ct->hide_threshold
4910 && ss->function == CTSF_COUNT)
4912 value = pivot_value_new_user_text_nocopy (
4913 xasprintf ("<%d", ct->hide_threshold));
4915 else if (d == 0 && ct->zero)
4916 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4917 else if (d == SYSMIS && ct->missing)
4918 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4919 else if (is_ctables_format)
4920 value = pivot_value_new_user_text_nocopy (
4921 ctables_format (d, format, &ct->ctables_formats));
4924 value = pivot_value_new_number (d);
4925 value->numeric.format = format;
4927 /* XXX should text values be right-justified? */
4928 pivot_table_put (pt, dindexes, n_dindexes, value);
4933 pivot_table_submit (pt);
4937 ctables_check_label_position (struct ctables_table *t, struct lexer *lexer,
4938 enum pivot_axis_type a)
4940 enum pivot_axis_type label_pos = t->label_axis[a];
4944 const struct ctables_stack *stack = &t->stacks[a];
4948 const struct ctables_nest *n0 = &stack->nests[0];
4951 assert (stack->n == 1);
4955 const struct variable *v0 = n0->vars[n0->n - 1];
4956 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4957 t->clabels_example = v0;
4959 for (size_t i = 0; i < c0->n_cats; i++)
4960 if (c0->cats[i].type == CCT_FUNCTION)
4962 msg (SE, _("Category labels may not be moved to another axis when "
4963 "sorting by a summary function."));
4964 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4965 _("This syntax moves category labels to another axis."));
4966 msg_at (SN, c0->cats[i].location,
4967 _("This syntax requests sorting by a summary function."));
4971 for (size_t i = 0; i < stack->n; i++)
4973 const struct ctables_nest *ni = &stack->nests[i];
4975 const struct variable *vi = ni->vars[ni->n - 1];
4976 if (n0->n - 1 == ni->scale_idx)
4978 msg (SE, _("To move category labels from one axis to another, "
4979 "the variables whose labels are to be moved must be "
4980 "categorical, but %s is scale."), var_get_name (vi));
4981 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4982 _("This syntax moves category labels to another axis."));
4987 for (size_t i = 1; i < stack->n; i++)
4989 const struct ctables_nest *ni = &stack->nests[i];
4991 const struct variable *vi = ni->vars[ni->n - 1];
4992 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4994 if (var_get_width (v0) != var_get_width (vi))
4996 msg (SE, _("To move category labels from one axis to another, "
4997 "the variables whose labels are to be moved must all "
4998 "have the same width, but %s has width %d and %s has "
5000 var_get_name (v0), var_get_width (v0),
5001 var_get_name (vi), var_get_width (vi));
5002 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
5003 _("This syntax moves category labels to another axis."));
5006 if (!val_labs_equal (var_get_value_labels (v0),
5007 var_get_value_labels (vi)))
5009 msg (SE, _("To move category labels from one axis to another, "
5010 "the variables whose labels are to be moved must all "
5011 "have the same value labels, but %s and %s have "
5012 "different value labels."),
5013 var_get_name (v0), var_get_name (vi));
5014 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
5015 _("This syntax moves category labels to another axis."));
5018 if (!ctables_categories_equal (c0, ci))
5020 msg (SE, _("To move category labels from one axis to another, "
5021 "the variables whose labels are to be moved must all "
5022 "have the same category specifications, but %s and %s "
5023 "have different category specifications."),
5024 var_get_name (v0), var_get_name (vi));
5025 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
5026 _("This syntax moves category labels to another axis."));
5035 add_sum_var (struct variable *var,
5036 struct variable ***sum_vars, size_t *n, size_t *allocated)
5038 for (size_t i = 0; i < *n; i++)
5039 if (var == (*sum_vars)[i])
5042 if (*n >= *allocated)
5043 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
5044 (*sum_vars)[*n] = var;
5048 static enum ctables_area_type
5049 rotate_area (enum ctables_area_type area)
5060 return CTAT_LAYERCOL;
5063 return CTAT_LAYERROW;
5076 enumerate_sum_vars (const struct ctables_axis *a,
5077 struct variable ***sum_vars, size_t *n, size_t *allocated)
5085 for (size_t i = 0; i < N_CSVS; i++)
5086 for (size_t j = 0; j < a->specs[i].n; j++)
5088 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
5089 if (spec->function == CTSF_areaPCT_SUM)
5090 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
5096 for (size_t i = 0; i < 2; i++)
5097 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
5103 ctables_prepare_table (struct ctables_table *t, struct lexer *lexer)
5105 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5108 t->stacks[a] = enumerate_fts (a, t->axes[a]);
5110 for (size_t j = 0; j < t->stacks[a].n; j++)
5112 struct ctables_nest *nest = &t->stacks[a].nests[j];
5113 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5115 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
5116 nest->n_areas[at] = 0;
5118 enum pivot_axis_type ata, atb;
5119 if (at == CTAT_ROW || at == CTAT_LAYERROW)
5121 ata = PIVOT_AXIS_ROW;
5122 atb = PIVOT_AXIS_COLUMN;
5124 else /* at == CTAT_COL || at == CTAT_LAYERCOL */
5126 ata = PIVOT_AXIS_COLUMN;
5127 atb = PIVOT_AXIS_ROW;
5130 if (at == CTAT_LAYER
5131 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5132 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5133 ? a == atb && t->label_axis[a] != a
5136 for (size_t k = nest->n - 1; k < nest->n; k--)
5137 if (k != nest->scale_idx)
5139 nest->areas[at][nest->n_areas[at]++] = k;
5145 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5146 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5147 : at == CTAT_TABLE ? true
5151 for (size_t k = 0; k < nest->n; k++)
5152 if (k != nest->scale_idx)
5153 nest->areas[at][nest->n_areas[at]++] = k;
5159 #define L PIVOT_AXIS_LAYER
5160 n_drop = (t->clabels_from_axis == L ? a != L
5161 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5162 : t->clabels_from_axis == a ? 2
5169 n_drop = a == ata && t->label_axis[ata] == atb;
5174 n_drop = (a == ata ? t->label_axis[ata] == atb
5176 : t->clabels_from_axis == atb ? -1
5177 : t->clabels_to_axis != atb ? 1
5189 size_t n = nest->n_areas[at];
5192 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5193 nest->n_areas[at]--;
5198 for (int i = 0; i < n_drop; i++)
5199 if (nest->n_areas[at] > 0)
5200 nest->n_areas[at]--;
5207 struct ctables_nest *nest = xmalloc (sizeof *nest);
5208 *nest = (struct ctables_nest) {
5210 .scale_idx = SIZE_MAX,
5211 .summary_idx = SIZE_MAX
5213 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5215 /* There's no point in moving labels away from an axis that has no
5216 labels, so avoid dealing with the special cases around that. */
5217 t->label_axis[a] = a;
5220 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5221 for (size_t i = 0; i < stack->n; i++)
5223 struct ctables_nest *nest = &stack->nests[i];
5224 if (!nest->specs[CSV_CELL].n)
5226 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5227 ss->specs = xmalloc (sizeof *ss->specs);
5230 enum ctables_summary_function function
5231 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5235 nest->summary_idx = nest->n - 1;
5236 ss->var = nest->vars[nest->summary_idx];
5238 *ss->specs = (struct ctables_summary_spec) {
5239 .function = function,
5240 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5241 .format = ctables_summary_default_format (function, ss->var),
5244 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5245 &nest->specs[CSV_CELL]);
5247 else if (!nest->specs[CSV_TOTAL].n)
5248 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5249 &nest->specs[CSV_CELL]);
5251 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5252 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5254 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5255 for (size_t i = 0; i < nest->specs[sv].n; i++)
5257 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5258 const struct ctables_function_info *cfi =
5259 &ctables_function_info[ss->function];
5261 ss->calc_area = rotate_area (ss->calc_area);
5265 if (t->ctables->smissing_listwise)
5267 struct variable **listwise_vars = NULL;
5269 size_t allocated = 0;
5271 for (size_t j = nest->group_head; j < stack->n; j++)
5273 const struct ctables_nest *other_nest = &stack->nests[j];
5274 if (other_nest->group_head != nest->group_head)
5277 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5280 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5281 sizeof *listwise_vars);
5282 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5285 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5288 listwise_vars = xmemdup (listwise_vars,
5289 n * sizeof *listwise_vars);
5290 nest->specs[sv].listwise_vars = listwise_vars;
5291 nest->specs[sv].n_listwise_vars = n;
5296 struct ctables_summary_spec_set *merged = &t->summary_specs;
5297 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5299 for (size_t j = 0; j < stack->n; j++)
5301 const struct ctables_nest *nest = &stack->nests[j];
5303 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5304 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5309 struct merge_item min = items[0];
5310 for (size_t j = 1; j < n_left; j++)
5311 if (merge_item_compare_3way (&items[j], &min) < 0)
5314 if (merged->n >= merged->allocated)
5315 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5316 sizeof *merged->specs);
5317 merged->specs[merged->n++] = min.set->specs[min.ofs];
5319 for (size_t j = 0; j < n_left; )
5321 if (merge_item_compare_3way (&items[j], &min) == 0)
5323 struct merge_item *item = &items[j];
5324 item->set->specs[item->ofs++].axis_idx = merged->n - 1;
5325 if (item->ofs >= item->set->n)
5327 items[j] = items[--n_left];
5336 size_t allocated_sum_vars = 0;
5337 enumerate_sum_vars (t->axes[t->summary_axis],
5338 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5340 return (ctables_check_label_position (t, lexer, PIVOT_AXIS_ROW)
5341 && ctables_check_label_position (t, lexer, PIVOT_AXIS_COLUMN));
5345 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5346 enum pivot_axis_type a)
5348 struct ctables_stack *stack = &t->stacks[a];
5349 for (size_t i = 0; i < stack->n; i++)
5351 const struct ctables_nest *nest = &stack->nests[i];
5352 const struct variable *var = nest->vars[nest->n - 1];
5353 const union value *value = case_data (c, var);
5355 if (var_is_numeric (var) && value->f == SYSMIS)
5358 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5360 ctables_value_insert (t, value, var_get_width (var));
5365 ctables_add_category_occurrences (const struct variable *var,
5366 struct hmap *occurrences,
5367 const struct ctables_categories *cats)
5369 const struct val_labs *val_labs = var_get_value_labels (var);
5371 for (size_t i = 0; i < cats->n_cats; i++)
5373 const struct ctables_category *c = &cats->cats[i];
5377 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5383 int width = var_get_width (var);
5385 value_init (&value, width);
5386 value_copy_buf_rpad (&value, width,
5387 CHAR_CAST (uint8_t *, c->string.string),
5388 c->string.length, ' ');
5389 ctables_add_occurrence (var, &value, occurrences);
5390 value_destroy (&value, width);
5395 assert (var_is_numeric (var));
5396 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5397 vl = val_labs_next (val_labs, vl))
5398 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5399 ctables_add_occurrence (var, &vl->value, occurrences);
5403 assert (var_is_alpha (var));
5404 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5405 vl = val_labs_next (val_labs, vl))
5406 if (in_string_range (&vl->value, var, c->srange))
5407 ctables_add_occurrence (var, &vl->value, occurrences);
5411 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5412 vl = val_labs_next (val_labs, vl))
5413 if (var_is_value_missing (var, &vl->value))
5414 ctables_add_occurrence (var, &vl->value, occurrences);
5418 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5419 vl = val_labs_next (val_labs, vl))
5420 ctables_add_occurrence (var, &vl->value, occurrences);
5423 case CCT_POSTCOMPUTE:
5433 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5434 vl = val_labs_next (val_labs, vl))
5435 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5436 ctables_add_occurrence (var, &vl->value, occurrences);
5439 case CCT_EXCLUDED_MISSING:
5446 ctables_section_recurse_add_empty_categories (
5447 struct ctables_section *s,
5448 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5449 enum pivot_axis_type a, size_t a_idx, bool add)
5451 if (a >= PIVOT_N_AXES)
5454 ctables_cell_insert__ (s, c, cats);
5456 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5457 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0, add);
5460 const struct variable *var = s->nests[a]->vars[a_idx];
5461 size_t idx = var_get_dict_index (var);
5462 bool show_empty = s->table->show_empty[idx];
5466 const struct ctables_categories *categories = s->table->categories[idx];
5467 int width = var_get_width (var);
5468 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5469 const struct ctables_occurrence *o;
5470 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5472 union value *value = case_data_rw (c, var);
5473 value_destroy (value, width);
5474 value_clone (value, &o->value, width);
5475 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5476 assert (cats[a][a_idx] != NULL);
5477 ctables_section_recurse_add_empty_categories (s, cats, c,
5481 for (size_t i = 0; i < categories->n_cats; i++)
5483 const struct ctables_category *cat = &categories->cats[i];
5484 if (cat->type == CCT_POSTCOMPUTE
5485 || (show_empty && cat->type == CCT_SUBTOTAL))
5487 cats[a][a_idx] = cat;
5488 ctables_section_recurse_add_empty_categories (s, cats, c,
5489 a, a_idx + 1, true);
5496 ctables_section_add_empty_categories (struct ctables_section *s)
5498 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5500 for (size_t k = 0; k < s->nests[a]->n; k++)
5501 if (k != s->nests[a]->scale_idx)
5503 const struct variable *var = s->nests[a]->vars[k];
5504 size_t idx = var_get_dict_index (var);
5505 const struct ctables_categories *cats = s->table->categories[idx];
5506 if (s->table->show_empty[idx])
5507 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5510 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5511 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5512 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5513 const struct ctables_category **cats[PIVOT_N_AXES] =
5515 [PIVOT_AXIS_LAYER] = layer_cats,
5516 [PIVOT_AXIS_ROW] = row_cats,
5517 [PIVOT_AXIS_COLUMN] = column_cats,
5519 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5520 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0, false);
5525 ctables_section_clear (struct ctables_section *s)
5527 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5529 const struct ctables_nest *nest = s->nests[a];
5530 for (size_t i = 0; i < nest->n; i++)
5531 if (i != nest->scale_idx)
5533 const struct variable *var = nest->vars[i];
5534 int width = var_get_width (var);
5535 struct ctables_occurrence *o, *next;
5536 struct hmap *map = &s->occurrences[a][i];
5537 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5539 value_destroy (&o->value, width);
5540 hmap_delete (map, &o->node);
5547 struct ctables_cell *cell, *next_cell;
5548 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5550 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5552 const struct ctables_nest *nest = s->nests[a];
5553 for (size_t i = 0; i < nest->n; i++)
5554 if (i != nest->scale_idx)
5555 value_destroy (&cell->axes[a].cvs[i].value,
5556 var_get_width (nest->vars[i]));
5557 free (cell->axes[a].cvs);
5560 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5561 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5562 for (size_t i = 0; i < specs->n; i++)
5563 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5564 free (cell->summaries);
5566 hmap_delete (&s->cells, &cell->node);
5569 hmap_shrink (&s->cells);
5571 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5573 struct ctables_area *area, *next_area;
5574 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5578 hmap_delete (&s->areas[at], &area->node);
5581 hmap_shrink (&s->areas[at]);
5586 ctables_section_uninit (struct ctables_section *s)
5588 ctables_section_clear (s);
5590 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5592 struct ctables_nest *nest = s->nests[a];
5593 for (size_t i = 0; i < nest->n; i++)
5594 hmap_destroy (&s->occurrences[a][i]);
5595 free (s->occurrences[a]);
5598 hmap_destroy (&s->cells);
5599 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5600 hmap_destroy (&s->areas[at]);
5604 ctables_table_clear (struct ctables_table *t)
5606 for (size_t i = 0; i < t->n_sections; i++)
5607 ctables_section_clear (&t->sections[i]);
5609 if (t->clabels_example)
5611 int width = var_get_width (t->clabels_example);
5612 struct ctables_value *value, *next_value;
5613 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5614 &t->clabels_values_map)
5616 value_destroy (&value->value, width);
5617 hmap_delete (&t->clabels_values_map, &value->node);
5620 hmap_shrink (&t->clabels_values_map);
5622 free (t->clabels_values);
5623 t->clabels_values = NULL;
5624 t->n_clabels_values = 0;
5629 ctables_execute (struct dataset *ds, struct casereader *input,
5632 for (size_t i = 0; i < ct->n_tables; i++)
5634 struct ctables_table *t = ct->tables[i];
5635 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5636 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5637 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5638 sizeof *t->sections);
5639 size_t ix[PIVOT_N_AXES];
5640 ctables_table_add_section (t, 0, ix);
5643 struct dictionary *dict = dataset_dict (ds);
5645 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5646 struct casegrouper *grouper
5648 ? casegrouper_create_splits (input, dict)
5649 : casegrouper_create_vars (input, NULL, 0));
5650 struct casereader *group;
5651 while (casegrouper_get_next_group (grouper, &group))
5654 output_split_file_values_peek (ds, group);
5656 bool warn_on_invalid = true;
5657 for (struct ccase *c = casereader_read (group); c;
5658 case_unref (c), c = casereader_read (group))
5660 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5661 double e_weight = (ct->e_weight
5662 ? var_force_valid_weight (ct->e_weight,
5663 case_num (c, ct->e_weight),
5667 [CTW_DICTIONARY] = d_weight,
5668 [CTW_EFFECTIVE] = e_weight,
5669 [CTW_UNWEIGHTED] = 1.0,
5672 for (size_t i = 0; i < ct->n_tables; i++)
5674 struct ctables_table *t = ct->tables[i];
5676 for (size_t j = 0; j < t->n_sections; j++)
5677 ctables_cell_insert (&t->sections[j], c, weight);
5679 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5680 if (t->label_axis[a] != a)
5681 ctables_insert_clabels_values (t, c, a);
5684 casereader_destroy (group);
5686 for (size_t i = 0; i < ct->n_tables; i++)
5688 struct ctables_table *t = ct->tables[i];
5690 if (t->clabels_example)
5691 ctables_sort_clabels_values (t);
5693 for (size_t j = 0; j < t->n_sections; j++)
5694 ctables_section_add_empty_categories (&t->sections[j]);
5696 ctables_table_output (ct, t);
5697 ctables_table_clear (t);
5700 return casegrouper_destroy (grouper);
5703 static struct ctables_postcompute *
5704 ctables_find_postcompute (struct ctables *ct, const char *name)
5706 struct ctables_postcompute *pc;
5707 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5708 utf8_hash_case_string (name, 0), &ct->postcomputes)
5709 if (!utf8_strcasecmp (pc->name, name))
5715 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5718 int pcompute_start = lex_ofs (lexer) - 1;
5720 if (!lex_match (lexer, T_AND))
5722 lex_error_expecting (lexer, "&");
5725 if (!lex_force_id (lexer))
5728 char *name = ss_xstrdup (lex_tokss (lexer));
5731 if (!lex_force_match_phrase (lexer, "=EXPR("))
5737 int expr_start = lex_ofs (lexer);
5738 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5739 int expr_end = lex_ofs (lexer) - 1;
5740 if (!expr || !lex_force_match (lexer, T_RPAREN))
5742 ctables_pcexpr_destroy (expr);
5746 int pcompute_end = lex_ofs (lexer) - 1;
5748 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5751 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5754 msg_at (SW, location, _("New definition of &%s will override the "
5755 "previous definition."),
5757 msg_at (SN, pc->location, _("This is the previous definition."));
5759 ctables_pcexpr_destroy (pc->expr);
5760 msg_location_destroy (pc->location);
5765 pc = xmalloc (sizeof *pc);
5766 *pc = (struct ctables_postcompute) { .name = name };
5767 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5768 utf8_hash_case_string (pc->name, 0));
5771 pc->location = location;
5773 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5778 ctables_parse_pproperties_format (struct lexer *lexer,
5779 struct ctables_summary_spec_set *sss)
5781 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5783 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5784 && !(lex_token (lexer) == T_ID
5785 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5786 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5787 lex_tokss (lexer)))))
5789 /* Parse function. */
5790 enum ctables_summary_function function;
5791 enum ctables_weighting weighting;
5792 enum ctables_area_type area;
5793 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5796 /* Parse percentile. */
5797 double percentile = 0;
5798 if (function == CTSF_PTILE)
5800 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5802 percentile = lex_number (lexer);
5807 struct fmt_spec format;
5808 bool is_ctables_format;
5809 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5812 if (sss->n >= sss->allocated)
5813 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5814 sizeof *sss->specs);
5815 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5816 .function = function,
5817 .weighting = weighting,
5820 .percentile = percentile,
5822 .is_ctables_format = is_ctables_format,
5828 ctables_summary_spec_set_uninit (sss);
5833 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5835 struct ctables_postcompute **pcs = NULL;
5837 size_t allocated_pcs = 0;
5839 while (lex_match (lexer, T_AND))
5841 if (!lex_force_id (lexer))
5843 struct ctables_postcompute *pc
5844 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5847 lex_error (lexer, _("Unknown computed category &%s."),
5848 lex_tokcstr (lexer));
5853 if (n_pcs >= allocated_pcs)
5854 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5858 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5860 if (lex_match_id (lexer, "LABEL"))
5862 lex_match (lexer, T_EQUALS);
5863 if (!lex_force_string (lexer))
5866 for (size_t i = 0; i < n_pcs; i++)
5868 free (pcs[i]->label);
5869 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5874 else if (lex_match_id (lexer, "FORMAT"))
5876 lex_match (lexer, T_EQUALS);
5878 struct ctables_summary_spec_set sss;
5879 if (!ctables_parse_pproperties_format (lexer, &sss))
5882 for (size_t i = 0; i < n_pcs; i++)
5885 ctables_summary_spec_set_uninit (pcs[i]->specs);
5887 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5888 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5890 ctables_summary_spec_set_uninit (&sss);
5892 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5894 lex_match (lexer, T_EQUALS);
5895 bool hide_source_cats;
5896 if (!parse_bool (lexer, &hide_source_cats))
5898 for (size_t i = 0; i < n_pcs; i++)
5899 pcs[i]->hide_source_cats = hide_source_cats;
5903 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5916 put_strftime (struct string *out, time_t now, const char *format)
5918 const struct tm *tm = localtime (&now);
5920 strftime (value, sizeof value, format, tm);
5921 ds_put_cstr (out, value);
5925 skip_prefix (struct substring *s, struct substring prefix)
5927 if (ss_starts_with (*s, prefix))
5929 ss_advance (s, prefix.length);
5937 put_table_expression (struct string *out, struct lexer *lexer,
5938 struct dictionary *dict, int expr_start, int expr_end)
5941 for (int ofs = expr_start; ofs < expr_end; ofs++)
5943 const struct token *t = lex_ofs_token (lexer, ofs);
5944 if (t->type == T_LBRACK)
5946 else if (t->type == T_RBRACK && nest > 0)
5952 else if (t->type == T_ID)
5954 const struct variable *var
5955 = dict_lookup_var (dict, t->string.string);
5956 const char *label = var ? var_get_label (var) : NULL;
5957 ds_put_cstr (out, label ? label : t->string.string);
5961 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5962 ds_put_byte (out, ' ');
5964 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5965 ds_put_cstr (out, repr);
5968 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5969 ds_put_byte (out, ' ');
5975 put_title_text (struct string *out, struct substring in, time_t now,
5976 struct lexer *lexer, struct dictionary *dict,
5977 int expr_start, int expr_end)
5981 size_t chunk = ss_find_byte (in, ')');
5982 ds_put_substring (out, ss_head (in, chunk));
5983 ss_advance (&in, chunk);
5984 if (ss_is_empty (in))
5987 if (skip_prefix (&in, ss_cstr (")DATE")))
5988 put_strftime (out, now, "%x");
5989 else if (skip_prefix (&in, ss_cstr (")TIME")))
5990 put_strftime (out, now, "%X");
5991 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5992 put_table_expression (out, lexer, dict, expr_start, expr_end);
5995 ds_put_byte (out, ')');
5996 ss_advance (&in, 1);
6002 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6004 struct casereader *input = NULL;
6006 struct measure_guesser *mg = measure_guesser_create (ds);
6009 input = proc_open (ds);
6010 measure_guesser_run (mg, input);
6011 measure_guesser_destroy (mg);
6014 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6015 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6016 enum settings_value_show tvars = settings_get_show_variables ();
6017 for (size_t i = 0; i < n_vars; i++)
6018 vlabels[i] = (enum ctables_vlabel) tvars;
6020 struct pivot_table_look *look = pivot_table_look_unshare (
6021 pivot_table_look_ref (pivot_table_look_get_default ()));
6023 struct ctables *ct = xmalloc (sizeof *ct);
6024 *ct = (struct ctables) {
6025 .dict = dataset_dict (ds),
6027 .ctables_formats = FMT_SETTINGS_INIT,
6029 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6032 time_t now = time (NULL);
6037 const char *dot_string;
6038 const char *comma_string;
6040 static const struct ctf ctfs[4] = {
6041 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6042 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6043 { CTEF_PAREN, "-,(,),", "-.(.)." },
6044 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6046 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6047 for (size_t i = 0; i < 4; i++)
6049 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6050 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6051 fmt_number_style_from_string (s));
6054 if (!lex_force_match (lexer, T_SLASH))
6057 while (!lex_match_id (lexer, "TABLE"))
6059 if (lex_match_id (lexer, "FORMAT"))
6061 double widths[2] = { SYSMIS, SYSMIS };
6062 double units_per_inch = 72.0;
6064 int start_ofs = lex_ofs (lexer);
6065 while (lex_token (lexer) != T_SLASH)
6067 if (lex_match_id (lexer, "MINCOLWIDTH"))
6069 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6072 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6074 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6077 else if (lex_match_id (lexer, "UNITS"))
6079 lex_match (lexer, T_EQUALS);
6080 if (lex_match_id (lexer, "POINTS"))
6081 units_per_inch = 72.0;
6082 else if (lex_match_id (lexer, "INCHES"))
6083 units_per_inch = 1.0;
6084 else if (lex_match_id (lexer, "CM"))
6085 units_per_inch = 2.54;
6088 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6092 else if (lex_match_id (lexer, "EMPTY"))
6097 lex_match (lexer, T_EQUALS);
6098 if (lex_match_id (lexer, "ZERO"))
6100 /* Nothing to do. */
6102 else if (lex_match_id (lexer, "BLANK"))
6103 ct->zero = xstrdup ("");
6104 else if (lex_force_string (lexer))
6106 ct->zero = ss_xstrdup (lex_tokss (lexer));
6112 else if (lex_match_id (lexer, "MISSING"))
6114 lex_match (lexer, T_EQUALS);
6115 if (!lex_force_string (lexer))
6119 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6120 ? ss_xstrdup (lex_tokss (lexer))
6126 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6127 "UNITS", "EMPTY", "MISSING");
6132 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6133 && widths[0] > widths[1])
6135 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6136 _("MINCOLWIDTH must not be greater than "
6141 for (size_t i = 0; i < 2; i++)
6142 if (widths[i] != SYSMIS)
6144 int *wr = ct->look->col_heading_width_range;
6145 wr[i] = widths[i] / units_per_inch * 96.0;
6150 else if (lex_match_id (lexer, "VLABELS"))
6152 if (!lex_force_match_id (lexer, "VARIABLES"))
6154 lex_match (lexer, T_EQUALS);
6156 struct variable **vars;
6158 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6162 if (!lex_force_match_id (lexer, "DISPLAY"))
6167 lex_match (lexer, T_EQUALS);
6169 enum ctables_vlabel vlabel;
6170 if (lex_match_id (lexer, "DEFAULT"))
6171 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6172 else if (lex_match_id (lexer, "NAME"))
6174 else if (lex_match_id (lexer, "LABEL"))
6175 vlabel = CTVL_LABEL;
6176 else if (lex_match_id (lexer, "BOTH"))
6178 else if (lex_match_id (lexer, "NONE"))
6182 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6188 for (size_t i = 0; i < n_vars; i++)
6189 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6192 else if (lex_match_id (lexer, "MRSETS"))
6194 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6196 lex_match (lexer, T_EQUALS);
6197 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6200 else if (lex_match_id (lexer, "SMISSING"))
6202 if (lex_match_id (lexer, "VARIABLE"))
6203 ct->smissing_listwise = false;
6204 else if (lex_match_id (lexer, "LISTWISE"))
6205 ct->smissing_listwise = true;
6208 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6212 else if (lex_match_id (lexer, "PCOMPUTE"))
6214 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6217 else if (lex_match_id (lexer, "PPROPERTIES"))
6219 if (!ctables_parse_pproperties (lexer, ct))
6222 else if (lex_match_id (lexer, "WEIGHT"))
6224 if (!lex_force_match_id (lexer, "VARIABLE"))
6226 lex_match (lexer, T_EQUALS);
6227 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6231 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6233 if (lex_match_id (lexer, "COUNT"))
6235 lex_match (lexer, T_EQUALS);
6236 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6239 ct->hide_threshold = lex_integer (lexer);
6242 else if (ct->hide_threshold == 0)
6243 ct->hide_threshold = 5;
6247 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6248 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6249 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6250 if (lex_match_id (lexer, "SLABELS")
6251 || lex_match_id (lexer, "CLABELS")
6252 || lex_match_id (lexer, "CRITERIA")
6253 || lex_match_id (lexer, "CATEGORIES")
6254 || lex_match_id (lexer, "TITLES")
6255 || lex_match_id (lexer, "SIGTEST")
6256 || lex_match_id (lexer, "COMPARETEST"))
6257 lex_next_msg (lexer, SN, -1, -1,
6258 _("TABLE must appear before this subcommand."));
6262 if (!lex_force_match (lexer, T_SLASH))
6266 size_t allocated_tables = 0;
6269 if (ct->n_tables >= allocated_tables)
6270 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6271 sizeof *ct->tables);
6273 struct ctables_category *cat = xmalloc (sizeof *cat);
6274 *cat = (struct ctables_category) {
6276 .include_missing = false,
6277 .sort_ascending = true,
6280 struct ctables_categories *c = xmalloc (sizeof *c);
6281 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6282 *c = (struct ctables_categories) {
6288 struct ctables_categories **categories = xnmalloc (n_vars,
6289 sizeof *categories);
6290 for (size_t i = 0; i < n_vars; i++)
6293 bool *show_empty = xmalloc (n_vars);
6294 memset (show_empty, true, n_vars);
6296 struct ctables_table *t = xmalloc (sizeof *t);
6297 *t = (struct ctables_table) {
6299 .slabels_axis = PIVOT_AXIS_COLUMN,
6300 .slabels_visible = true,
6301 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6303 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6304 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6305 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6307 .clabels_from_axis = PIVOT_AXIS_LAYER,
6308 .clabels_to_axis = PIVOT_AXIS_LAYER,
6309 .categories = categories,
6310 .n_categories = n_vars,
6311 .show_empty = show_empty,
6314 ct->tables[ct->n_tables++] = t;
6316 lex_match (lexer, T_EQUALS);
6317 int expr_start = lex_ofs (lexer);
6318 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6319 &t->axes[PIVOT_AXIS_ROW]))
6321 if (lex_match (lexer, T_BY))
6323 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6324 &t->axes[PIVOT_AXIS_COLUMN]))
6327 if (lex_match (lexer, T_BY))
6329 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6330 &t->axes[PIVOT_AXIS_LAYER]))
6334 int expr_end = lex_ofs (lexer);
6336 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6337 && !t->axes[PIVOT_AXIS_LAYER])
6339 lex_error (lexer, _("At least one variable must be specified."));
6343 const struct ctables_axis *scales[PIVOT_N_AXES];
6344 size_t n_scales = 0;
6345 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6347 scales[a] = find_scale (t->axes[a]);
6353 msg (SE, _("Scale variables may appear only on one axis."));
6354 if (scales[PIVOT_AXIS_ROW])
6355 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6356 _("This scale variable appears on the rows axis."));
6357 if (scales[PIVOT_AXIS_COLUMN])
6358 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6359 _("This scale variable appears on the columns axis."));
6360 if (scales[PIVOT_AXIS_LAYER])
6361 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6362 _("This scale variable appears on the layer axis."));
6366 const struct ctables_axis *summaries[PIVOT_N_AXES];
6367 size_t n_summaries = 0;
6368 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6370 summaries[a] = (scales[a]
6372 : find_categorical_summary_spec (t->axes[a]));
6376 if (n_summaries > 1)
6378 msg (SE, _("Summaries may appear only on one axis."));
6379 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6382 msg_at (SN, summaries[a]->loc,
6384 ? _("This variable on the rows axis has a summary.")
6385 : a == PIVOT_AXIS_COLUMN
6386 ? _("This variable on the columns axis has a summary.")
6387 : _("This variable on the layers axis has a summary."));
6389 msg_at (SN, summaries[a]->loc,
6390 _("This is a scale variable, so it always has a "
6391 "summary even if the syntax does not explicitly "
6396 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6397 if (n_summaries ? summaries[a] : t->axes[a])
6399 t->summary_axis = a;
6403 if (lex_token (lexer) == T_ENDCMD)
6405 if (!ctables_prepare_table (t, lexer))
6409 if (!lex_force_match (lexer, T_SLASH))
6412 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6414 if (lex_match_id (lexer, "SLABELS"))
6416 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6418 if (lex_match_id (lexer, "POSITION"))
6420 lex_match (lexer, T_EQUALS);
6421 if (lex_match_id (lexer, "COLUMN"))
6422 t->slabels_axis = PIVOT_AXIS_COLUMN;
6423 else if (lex_match_id (lexer, "ROW"))
6424 t->slabels_axis = PIVOT_AXIS_ROW;
6425 else if (lex_match_id (lexer, "LAYER"))
6426 t->slabels_axis = PIVOT_AXIS_LAYER;
6429 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6433 else if (lex_match_id (lexer, "VISIBLE"))
6435 lex_match (lexer, T_EQUALS);
6436 if (!parse_bool (lexer, &t->slabels_visible))
6441 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6446 else if (lex_match_id (lexer, "CLABELS"))
6448 int start_ofs = lex_ofs (lexer) - 1;
6449 if (lex_match_id (lexer, "AUTO"))
6451 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6452 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6454 else if (lex_match_id (lexer, "ROWLABELS"))
6456 lex_match (lexer, T_EQUALS);
6457 if (lex_match_id (lexer, "OPPOSITE"))
6458 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6459 else if (lex_match_id (lexer, "LAYER"))
6460 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6463 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6467 else if (lex_match_id (lexer, "COLLABELS"))
6469 lex_match (lexer, T_EQUALS);
6470 if (lex_match_id (lexer, "OPPOSITE"))
6471 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6472 else if (lex_match_id (lexer, "LAYER"))
6473 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6476 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6482 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6486 int end_ofs = lex_ofs (lexer) - 1;
6488 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6489 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6491 msg (SE, _("ROWLABELS and COLLABELS may not both be "
6494 lex_ofs_msg (lexer, SN, t->clabels_start_ofs,
6496 _("This is the first specification."));
6497 lex_ofs_msg (lexer, SN, start_ofs, end_ofs,
6498 _("This is the second specification."));
6502 t->clabels_start_ofs = start_ofs;
6503 t->clabels_end_ofs = end_ofs;
6505 else if (lex_match_id (lexer, "CRITERIA"))
6507 if (!lex_force_match_id (lexer, "CILEVEL"))
6509 lex_match (lexer, T_EQUALS);
6511 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6513 t->cilevel = lex_number (lexer);
6516 else if (lex_match_id (lexer, "CATEGORIES"))
6518 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6522 else if (lex_match_id (lexer, "TITLES"))
6527 if (lex_match_id (lexer, "CAPTIONS"))
6528 textp = &t->caption;
6529 else if (lex_match_id (lexer, "CORNERS"))
6531 else if (lex_match_id (lexer, "TITLES"))
6535 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6538 lex_match (lexer, T_EQUALS);
6540 struct string s = DS_EMPTY_INITIALIZER;
6541 while (lex_is_string (lexer))
6543 if (!ds_is_empty (&s))
6544 ds_put_byte (&s, '\n');
6545 put_title_text (&s, lex_tokss (lexer), now,
6546 lexer, dataset_dict (ds),
6547 expr_start, expr_end);
6551 *textp = ds_steal_cstr (&s);
6553 while (lex_token (lexer) != T_SLASH
6554 && lex_token (lexer) != T_ENDCMD);
6556 else if (lex_match_id (lexer, "SIGTEST"))
6558 int start_ofs = lex_ofs (lexer) - 1;
6561 t->chisq = xmalloc (sizeof *t->chisq);
6562 *t->chisq = (struct ctables_chisq) {
6564 .include_mrsets = true,
6565 .all_visible = true,
6571 if (lex_match_id (lexer, "TYPE"))
6573 lex_match (lexer, T_EQUALS);
6574 if (!lex_force_match_id (lexer, "CHISQUARE"))
6577 else if (lex_match_id (lexer, "ALPHA"))
6579 lex_match (lexer, T_EQUALS);
6580 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6582 t->chisq->alpha = lex_number (lexer);
6585 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6587 lex_match (lexer, T_EQUALS);
6588 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6591 else if (lex_match_id (lexer, "CATEGORIES"))
6593 lex_match (lexer, T_EQUALS);
6594 if (lex_match_id (lexer, "ALLVISIBLE"))
6595 t->chisq->all_visible = true;
6596 else if (lex_match_id (lexer, "SUBTOTALS"))
6597 t->chisq->all_visible = false;
6600 lex_error_expecting (lexer,
6601 "ALLVISIBLE", "SUBTOTALS");
6607 lex_error_expecting (lexer, "TYPE", "ALPHA",
6608 "INCLUDEMRSETS", "CATEGORIES");
6612 while (lex_token (lexer) != T_SLASH
6613 && lex_token (lexer) != T_ENDCMD);
6615 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6616 _("Support for SIGTEST not yet implemented."));
6619 else if (lex_match_id (lexer, "COMPARETEST"))
6621 int start_ofs = lex_ofs (lexer) - 1;
6624 t->pairwise = xmalloc (sizeof *t->pairwise);
6625 *t->pairwise = (struct ctables_pairwise) {
6627 .alpha = { .05, .05 },
6628 .adjust = BONFERRONI,
6629 .include_mrsets = true,
6630 .meansvariance_allcats = true,
6631 .all_visible = true,
6640 if (lex_match_id (lexer, "TYPE"))
6642 lex_match (lexer, T_EQUALS);
6643 if (lex_match_id (lexer, "PROP"))
6644 t->pairwise->type = PROP;
6645 else if (lex_match_id (lexer, "MEAN"))
6646 t->pairwise->type = MEAN;
6649 lex_error_expecting (lexer, "PROP", "MEAN");
6653 else if (lex_match_id (lexer, "ALPHA"))
6655 lex_match (lexer, T_EQUALS);
6657 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6659 double a0 = lex_number (lexer);
6662 lex_match (lexer, T_COMMA);
6663 if (lex_is_number (lexer))
6665 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6667 double a1 = lex_number (lexer);
6670 t->pairwise->alpha[0] = MIN (a0, a1);
6671 t->pairwise->alpha[1] = MAX (a0, a1);
6674 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6676 else if (lex_match_id (lexer, "ADJUST"))
6678 lex_match (lexer, T_EQUALS);
6679 if (lex_match_id (lexer, "BONFERRONI"))
6680 t->pairwise->adjust = BONFERRONI;
6681 else if (lex_match_id (lexer, "BH"))
6682 t->pairwise->adjust = BH;
6683 else if (lex_match_id (lexer, "NONE"))
6684 t->pairwise->adjust = 0;
6687 lex_error_expecting (lexer, "BONFERRONI", "BH",
6692 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6694 lex_match (lexer, T_EQUALS);
6695 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6698 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6700 lex_match (lexer, T_EQUALS);
6701 if (lex_match_id (lexer, "ALLCATS"))
6702 t->pairwise->meansvariance_allcats = true;
6703 else if (lex_match_id (lexer, "TESTEDCATS"))
6704 t->pairwise->meansvariance_allcats = false;
6707 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6711 else if (lex_match_id (lexer, "CATEGORIES"))
6713 lex_match (lexer, T_EQUALS);
6714 if (lex_match_id (lexer, "ALLVISIBLE"))
6715 t->pairwise->all_visible = true;
6716 else if (lex_match_id (lexer, "SUBTOTALS"))
6717 t->pairwise->all_visible = false;
6720 lex_error_expecting (lexer, "ALLVISIBLE",
6725 else if (lex_match_id (lexer, "MERGE"))
6727 lex_match (lexer, T_EQUALS);
6728 if (!parse_bool (lexer, &t->pairwise->merge))
6731 else if (lex_match_id (lexer, "STYLE"))
6733 lex_match (lexer, T_EQUALS);
6734 if (lex_match_id (lexer, "APA"))
6735 t->pairwise->apa_style = true;
6736 else if (lex_match_id (lexer, "SIMPLE"))
6737 t->pairwise->apa_style = false;
6740 lex_error_expecting (lexer, "APA", "SIMPLE");
6744 else if (lex_match_id (lexer, "SHOWSIG"))
6746 lex_match (lexer, T_EQUALS);
6747 if (!parse_bool (lexer, &t->pairwise->show_sig))
6752 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6753 "INCLUDEMRSETS", "MEANSVARIANCE",
6754 "CATEGORIES", "MERGE", "STYLE",
6759 while (lex_token (lexer) != T_SLASH
6760 && lex_token (lexer) != T_ENDCMD);
6762 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6763 _("Support for COMPARETEST not yet implemented."));
6768 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6769 "CRITERIA", "CATEGORIES", "TITLES",
6770 "SIGTEST", "COMPARETEST");
6771 if (lex_match_id (lexer, "FORMAT")
6772 || lex_match_id (lexer, "VLABELS")
6773 || lex_match_id (lexer, "MRSETS")
6774 || lex_match_id (lexer, "SMISSING")
6775 || lex_match_id (lexer, "PCOMPUTE")
6776 || lex_match_id (lexer, "PPROPERTIES")
6777 || lex_match_id (lexer, "WEIGHT")
6778 || lex_match_id (lexer, "HIDESMALLCOUNTS"))
6779 lex_next_msg (lexer, SN, -1, -1,
6780 _("This subcommand must appear before TABLE."));
6784 if (!lex_match (lexer, T_SLASH))
6788 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6789 t->clabels_from_axis = PIVOT_AXIS_ROW;
6790 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6791 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6792 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6794 if (!ctables_prepare_table (t, lexer))
6797 while (lex_token (lexer) != T_ENDCMD);
6800 input = proc_open (ds);
6801 bool ok = ctables_execute (ds, input, ct);
6802 ok = proc_commit (ds) && ok;
6804 ctables_destroy (ct);
6805 return ok ? CMD_SUCCESS : CMD_FAILURE;
6810 ctables_destroy (ct);