1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 /* The three forms of weighting supported by CTABLES. */
61 enum ctables_weighting
63 CTW_EFFECTIVE, /* Effective base weight (WEIGHT subcommand). */
64 CTW_DICTIONARY, /* Dictionary weight. */
65 CTW_UNWEIGHTED /* No weight. */
69 /* CTABLES table areas. */
71 enum ctables_area_type
73 /* Within a section, where stacked variables divide one section from
76 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
77 parse_ctables_summary_function() parses correctly. */
78 CTAT_TABLE, /* All layers of a whole section. */
79 CTAT_LAYERROW, /* Row in one layer within a section. */
80 CTAT_LAYERCOL, /* Column in one layer within a section. */
81 CTAT_LAYER, /* One layer within a section. */
83 /* Within a subtable, where a subtable pairs an innermost row variable with
84 an innermost column variable within a single layer. */
85 CTAT_SUBTABLE, /* Whole subtable. */
86 CTAT_ROW, /* Row within a subtable. */
87 CTAT_COL, /* Column within a subtable. */
91 static const char *ctables_area_type_name[N_CTATS] = {
92 [CTAT_TABLE] = "TABLE",
93 [CTAT_LAYER] = "LAYER",
94 [CTAT_LAYERROW] = "LAYERROW",
95 [CTAT_LAYERCOL] = "LAYERCOL",
96 [CTAT_SUBTABLE] = "SUBTABLE",
101 /* Summary statistics for an area. */
104 struct hmap_node node;
105 const struct ctables_cell *example;
107 /* Sequence number used for CTSF_ID. */
110 /* Weights for CTSF_areaPCT_COUNT, CTSF_areaPCT_VALIDN, and
111 CTSF_areaPCT_TOTALN. */
112 double count[N_CTWS];
113 double valid[N_CTWS];
114 double total[N_CTWS];
116 /* Sums for CTSF_areaPCT_SUM. */
117 struct ctables_sum *sums;
125 /* CTABLES summary functions. */
127 enum ctables_function_type
129 /* A function that operates on data in a single cell. It operates on
130 effective weights. It does not have an unweighted version. */
133 /* A function that operates on data in a single cell. The function
134 operates on effective weights and has a U-prefixed unweighted
138 /* A function that operates on data in a single cell. It operates on
139 dictionary weights, and has U-prefixed unweighted version and an
140 E-prefixed effective weight version. */
143 /* A function that operates on an area of cells. It operates on effective
144 weights and has a U-prefixed unweighted version. */
150 CTF_COUNT, /* F40.0. */
151 CTF_PERCENT, /* PCT40.1. */
152 CTF_GENERAL /* Variable's print format. */
155 enum ctables_function_availability
157 CTFA_ALL, /* Any variables. */
158 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
159 //CTFA_MRSETS, /* Only multiple-response sets */
162 enum ctables_summary_function
164 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
165 #include "ctables.inc"
170 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
172 #include "ctables.inc"
176 struct ctables_function_info
178 struct substring basename;
179 enum ctables_function_type type;
180 enum ctables_format format;
181 enum ctables_function_availability availability;
183 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
184 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
185 bool is_area; /* Needs an area prefix. */
187 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
188 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
190 .basename = SS_LITERAL_INITIALIZER (NAME), \
193 .availability = AVAILABILITY, \
194 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
195 .e_prefix = (TYPE) == CTFT_UECELL, \
196 .is_area = (TYPE) == CTFT_AREA \
198 #include "ctables.inc"
202 static struct fmt_spec
203 ctables_summary_default_format (enum ctables_summary_function function,
204 const struct variable *var)
206 static const enum ctables_format default_formats[] = {
207 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
208 #include "ctables.inc"
211 switch (default_formats[function])
214 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
217 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
220 return *var_get_print_format (var);
227 static enum ctables_function_availability
228 ctables_function_availability (enum ctables_summary_function f)
230 static enum ctables_function_availability availability[] = {
231 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
232 #include "ctables.inc"
236 return availability[f];
240 parse_ctables_summary_function (struct lexer *lexer,
241 enum ctables_summary_function *function,
242 enum ctables_weighting *weighting,
243 enum ctables_area_type *area)
245 if (!lex_force_id (lexer))
248 struct substring name = lex_tokss (lexer);
249 if (ss_ends_with_case (name, ss_cstr (".LCL"))
250 || ss_ends_with_case (name, ss_cstr (".UCL"))
251 || ss_ends_with_case (name, ss_cstr (".SE")))
253 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
254 "is not yet implemented."));
258 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
259 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
261 bool has_area = false;
263 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
264 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
269 if (ss_equals_case (name, ss_cstr ("PCT")))
271 /* Special case where .COUNT suffix is omitted. */
272 *function = CTSF_areaPCT_COUNT;
273 *weighting = CTW_EFFECTIVE;
280 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
282 const struct ctables_function_info *cfi = &ctables_function_info[f];
283 if (ss_equals_case (cfi->basename, name))
286 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
289 *weighting = (e ? CTW_EFFECTIVE
291 : cfi->e_prefix ? CTW_DICTIONARY
298 lex_error (lexer, _("Syntax error expecting summary function name."));
303 ctables_summary_function_name (enum ctables_summary_function function,
304 enum ctables_weighting weighting,
305 enum ctables_area_type area,
306 char *buffer, size_t bufsize)
308 const struct ctables_function_info *cfi = &ctables_function_info[function];
309 snprintf (buffer, bufsize, "%s%s%s",
310 (weighting == CTW_UNWEIGHTED ? "U"
311 : weighting == CTW_DICTIONARY ? ""
312 : cfi->e_prefix ? "E"
314 cfi->is_area ? ctables_area_type_name[area] : "",
315 cfi->basename.string);
320 ctables_summary_function_label__ (enum ctables_summary_function function,
321 enum ctables_weighting weighting,
322 enum ctables_area_type area)
324 bool w = weighting != CTW_UNWEIGHTED;
325 bool d = weighting == CTW_DICTIONARY;
326 enum ctables_area_type a = area;
330 return d ? N_("Count") : w ? N_("Adjusted Count") : N_("Unweighted Count");
332 case CTSF_areaPCT_COUNT:
335 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
336 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
337 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
338 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
339 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
340 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
341 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
345 case CTSF_areaPCT_VALIDN:
348 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
349 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
350 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
351 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
352 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
353 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
354 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
358 case CTSF_areaPCT_TOTALN:
361 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
362 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
363 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
364 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
365 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
366 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
367 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
371 case CTSF_MAXIMUM: return N_("Maximum");
372 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
373 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
374 case CTSF_MINIMUM: return N_("Minimum");
375 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
376 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
377 case CTSF_PTILE: NOT_REACHED ();
378 case CTSF_RANGE: return N_("Range");
379 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
380 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
381 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
382 case CTSF_TOTALN: return (d ? N_("Total N")
383 : w ? N_("Adjusted Total N")
384 : N_("Unweighted Total N"));
385 case CTSF_VALIDN: return (d ? N_("Valid N")
386 : w ? N_("Adjusted Valid N")
387 : N_("Unweighted Valid N"));
388 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
389 case CTSF_areaPCT_SUM:
392 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
393 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
394 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
395 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
396 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
397 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
398 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
405 /* Don't bother translating these: they are for developers only. */
406 case CTAT_TABLE: return "Table ID";
407 case CTAT_LAYER: return "Layer ID";
408 case CTAT_LAYERROW: return "Layer Row ID";
409 case CTAT_LAYERCOL: return "Layer Column ID";
410 case CTAT_SUBTABLE: return "Subtable ID";
411 case CTAT_ROW: return "Row ID";
412 case CTAT_COL: return "Column ID";
420 static struct pivot_value *
421 ctables_summary_function_label (enum ctables_summary_function function,
422 enum ctables_weighting weighting,
423 enum ctables_area_type area,
426 if (function == CTSF_PTILE)
428 char *s = (weighting != CTW_UNWEIGHTED
429 ? xasprintf (_("Percentile %.2f"), percentile)
430 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
431 return pivot_value_new_user_text_nocopy (s);
434 return pivot_value_new_text (ctables_summary_function_label__ (
435 function, weighting, area));
438 /* CTABLES summaries. */
440 struct ctables_summary_spec
442 /* The calculation to be performed.
444 'function' is the function to calculate. 'weighted' specifies whether
445 to use weighted or unweighted data (for functions that do not support a
446 choice, it must be true). 'calc_area' is the area over which the
447 calculation takes place (for functions that target only an individual
448 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
449 percentile between 0 and 100 (for other functions it must be 0). */
450 enum ctables_summary_function function;
451 enum ctables_weighting weighting;
452 enum ctables_area_type calc_area;
453 double percentile; /* CTSF_PTILE only. */
455 /* How to display the result of the calculation.
457 'label' is a user-specified label, NULL if the user didn't specify
460 'user_area' is usually the same as 'calc_area', but when category labels
461 are rotated from one axis to another it swaps rows and columns.
463 'format' is the format for displaying the output. If
464 'is_ctables_format' is true, then 'format.type' is one of the special
465 CTEF_* formats instead of the standard ones. */
467 enum ctables_area_type user_area;
468 struct fmt_spec format;
469 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
471 size_t axis_idx; /* Leaf index if summary dimension in use. */
472 size_t sum_var_idx; /* Offset into 'sums' in ctables_area. */
476 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
477 const struct ctables_summary_spec *src)
480 dst->label = xstrdup_if_nonnull (src->label);
484 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
490 /* Collections of summary functions. */
492 struct ctables_summary_spec_set
494 struct ctables_summary_spec *specs;
498 /* The variable to which the summary specs are applied. */
499 struct variable *var;
501 /* Whether the variable to which the summary specs are applied is a scale
502 variable for the purpose of summarization.
504 (VALIDN and TOTALN act differently for summarizing scale and categorical
508 /* If any of these optional additional scale variables are missing, then
509 treat 'var' as if it's missing too. This is for implementing
510 SMISSING=LISTWISE. */
511 struct variable **listwise_vars;
512 size_t n_listwise_vars;
516 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
517 const struct ctables_summary_spec_set *src)
519 struct ctables_summary_spec *specs
520 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
521 for (size_t i = 0; i < src->n; i++)
522 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
524 *dst = (struct ctables_summary_spec_set) {
529 .is_scale = src->is_scale,
534 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
536 for (size_t i = 0; i < set->n; i++)
537 ctables_summary_spec_uninit (&set->specs[i]);
538 free (set->listwise_vars);
543 is_listwise_missing (const struct ctables_summary_spec_set *specs,
544 const struct ccase *c)
546 for (size_t i = 0; i < specs->n_listwise_vars; i++)
548 const struct variable *var = specs->listwise_vars[i];
549 if (var_is_num_missing (var, case_num (c, var)))
556 /* CTABLES postcompute expressions. */
558 struct ctables_postcompute
560 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
561 char *name; /* Name, without leading &. */
563 struct msg_location *location; /* Location of definition. */
564 struct ctables_pcexpr *expr;
566 struct ctables_summary_spec_set *specs;
567 bool hide_source_cats;
570 struct ctables_pcexpr
580 enum ctables_pcexpr_op
583 CTPO_CONSTANT, /* 5 */
584 CTPO_CAT_NUMBER, /* [5] */
585 CTPO_CAT_STRING, /* ["STRING"] */
586 CTPO_CAT_NRANGE, /* [LO THRU 5] */
587 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
588 CTPO_CAT_MISSING, /* MISSING */
589 CTPO_CAT_OTHERNM, /* OTHERNM */
590 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
591 CTPO_CAT_TOTAL, /* TOTAL */
605 /* CTPO_CAT_NUMBER. */
608 /* CTPO_CAT_STRING, in dictionary encoding. */
609 struct substring string;
611 /* CTPO_CAT_NRANGE. */
614 /* CTPO_CAT_SRANGE. */
615 struct substring srange[2];
617 /* CTPO_CAT_SUBTOTAL. */
618 size_t subtotal_index;
620 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
621 One element: CTPO_NEG. */
622 struct ctables_pcexpr *subs[2];
625 /* Source location. */
626 struct msg_location *location;
629 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
632 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
633 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
634 struct ctables_pcexpr *sub1);
636 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
637 struct dictionary *);
640 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
646 case CTPO_CAT_STRING:
647 ss_dealloc (&e->string);
650 case CTPO_CAT_SRANGE:
651 for (size_t i = 0; i < 2; i++)
652 ss_dealloc (&e->srange[i]);
661 for (size_t i = 0; i < 2; i++)
662 ctables_pcexpr_destroy (e->subs[i]);
666 case CTPO_CAT_NUMBER:
667 case CTPO_CAT_NRANGE:
668 case CTPO_CAT_MISSING:
669 case CTPO_CAT_OTHERNM:
670 case CTPO_CAT_SUBTOTAL:
675 msg_location_destroy (e->location);
680 static struct ctables_pcexpr *
681 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
682 struct ctables_pcexpr *sub0,
683 struct ctables_pcexpr *sub1)
685 struct ctables_pcexpr *e = xmalloc (sizeof *e);
686 *e = (struct ctables_pcexpr) {
688 .subs = { sub0, sub1 },
689 .location = msg_location_merged (sub0->location, sub1->location),
694 /* How to parse an operator. */
697 enum token_type token;
698 enum ctables_pcexpr_op op;
701 static const struct operator *
702 ctables_pcexpr_match_operator (struct lexer *lexer,
703 const struct operator ops[], size_t n_ops)
705 for (const struct operator *op = ops; op < ops + n_ops; op++)
706 if (lex_token (lexer) == op->token)
708 if (op->token != T_NEG_NUM)
717 static struct ctables_pcexpr *
718 ctables_pcexpr_parse_binary_operators__ (
719 struct lexer *lexer, struct dictionary *dict,
720 const struct operator ops[], size_t n_ops,
721 parse_recursively_func *parse_next_level,
722 const char *chain_warning, struct ctables_pcexpr *lhs)
724 for (int op_count = 0; ; op_count++)
726 const struct operator *op
727 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
730 if (op_count > 1 && chain_warning)
731 msg_at (SW, lhs->location, "%s", chain_warning);
736 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
739 ctables_pcexpr_destroy (lhs);
743 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
747 static struct ctables_pcexpr *
748 ctables_pcexpr_parse_binary_operators (
749 struct lexer *lexer, struct dictionary *dict,
750 const struct operator ops[], size_t n_ops,
751 parse_recursively_func *parse_next_level, const char *chain_warning)
753 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
757 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
762 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
763 struct dictionary *);
765 static struct ctables_pcexpr
766 ctpo_cat_nrange (double low, double high)
768 return (struct ctables_pcexpr) {
769 .op = CTPO_CAT_NRANGE,
770 .nrange = { low, high },
774 static struct ctables_pcexpr
775 ctpo_cat_srange (struct substring low, struct substring high)
777 return (struct ctables_pcexpr) {
778 .op = CTPO_CAT_SRANGE,
779 .srange = { low, high },
783 static struct substring
784 parse_substring (struct lexer *lexer, struct dictionary *dict)
786 struct substring s = recode_substring_pool (
787 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
788 ss_rtrim (&s, ss_cstr (" "));
793 static struct ctables_pcexpr *
794 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
796 int start_ofs = lex_ofs (lexer);
797 struct ctables_pcexpr e;
798 if (lex_is_number (lexer))
800 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
801 .number = lex_number (lexer) };
804 else if (lex_match_id (lexer, "MISSING"))
805 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
806 else if (lex_match_id (lexer, "OTHERNM"))
807 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
808 else if (lex_match_id (lexer, "TOTAL"))
809 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
810 else if (lex_match_id (lexer, "SUBTOTAL"))
812 size_t subtotal_index = 0;
813 if (lex_match (lexer, T_LBRACK))
815 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
817 subtotal_index = lex_integer (lexer);
819 if (!lex_force_match (lexer, T_RBRACK))
822 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
823 .subtotal_index = subtotal_index };
825 else if (lex_match (lexer, T_LBRACK))
827 if (lex_match_id (lexer, "LO"))
829 if (!lex_force_match_id (lexer, "THRU"))
832 if (lex_is_string (lexer))
834 struct substring low = { .string = NULL };
835 struct substring high = parse_substring (lexer, dict);
836 e = ctpo_cat_srange (low, high);
840 if (!lex_force_num (lexer))
842 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
846 else if (lex_is_number (lexer))
848 double number = lex_number (lexer);
850 if (lex_match_id (lexer, "THRU"))
852 if (lex_match_id (lexer, "HI"))
853 e = ctpo_cat_nrange (number, DBL_MAX);
856 if (!lex_force_num (lexer))
858 e = ctpo_cat_nrange (number, lex_number (lexer));
863 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
866 else if (lex_is_string (lexer))
868 struct substring s = parse_substring (lexer, dict);
870 if (lex_match_id (lexer, "THRU"))
872 struct substring high;
874 if (lex_match_id (lexer, "HI"))
875 high = (struct substring) { .string = NULL };
878 if (!lex_force_string (lexer))
883 high = parse_substring (lexer, dict);
886 e = ctpo_cat_srange (s, high);
889 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
893 lex_error (lexer, NULL);
897 if (!lex_force_match (lexer, T_RBRACK))
899 if (e.op == CTPO_CAT_STRING)
900 ss_dealloc (&e.string);
901 else if (e.op == CTPO_CAT_SRANGE)
903 ss_dealloc (&e.srange[0]);
904 ss_dealloc (&e.srange[1]);
909 else if (lex_match (lexer, T_LPAREN))
911 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
914 if (!lex_force_match (lexer, T_RPAREN))
916 ctables_pcexpr_destroy (ep);
923 lex_error (lexer, NULL);
927 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
928 return xmemdup (&e, sizeof e);
931 static struct ctables_pcexpr *
932 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
933 struct lexer *lexer, int start_ofs)
935 struct ctables_pcexpr *e = xmalloc (sizeof *e);
936 *e = (struct ctables_pcexpr) {
939 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
944 static struct ctables_pcexpr *
945 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
947 static const struct operator op = { T_EXP, CTPO_POW };
949 const char *chain_warning =
950 _("The exponentiation operator (`**') is left-associative: "
951 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
952 "To disable this warning, insert parentheses.");
954 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
955 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
956 ctables_pcexpr_parse_primary,
959 /* Special case for situations like "-5**6", which must be parsed as
962 int start_ofs = lex_ofs (lexer);
963 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
964 *lhs = (struct ctables_pcexpr) {
966 .number = -lex_tokval (lexer),
967 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
971 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
973 ctables_pcexpr_parse_primary, chain_warning, lhs);
977 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
980 /* Parses the unary minus level. */
981 static struct ctables_pcexpr *
982 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
984 int start_ofs = lex_ofs (lexer);
985 if (!lex_match (lexer, T_DASH))
986 return ctables_pcexpr_parse_exp (lexer, dict);
988 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
992 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
995 /* Parses the multiplication and division level. */
996 static struct ctables_pcexpr *
997 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
999 static const struct operator ops[] =
1001 { T_ASTERISK, CTPO_MUL },
1002 { T_SLASH, CTPO_DIV },
1005 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
1006 sizeof ops / sizeof *ops,
1007 ctables_pcexpr_parse_neg, NULL);
1010 /* Parses the addition and subtraction level. */
1011 static struct ctables_pcexpr *
1012 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
1014 static const struct operator ops[] =
1016 { T_PLUS, CTPO_ADD },
1017 { T_DASH, CTPO_SUB },
1018 { T_NEG_NUM, CTPO_ADD },
1021 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1022 ops, sizeof ops / sizeof *ops,
1023 ctables_pcexpr_parse_mul, NULL);
1026 /* CTABLES axis expressions. */
1028 /* CTABLES has a number of extra formats that we implement via custom
1029 currency specifications on an alternate fmt_settings. */
1030 #define CTEF_NEGPAREN FMT_CCA
1031 #define CTEF_NEQUAL FMT_CCB
1032 #define CTEF_PAREN FMT_CCC
1033 #define CTEF_PCTPAREN FMT_CCD
1035 enum ctables_summary_variant
1044 enum ctables_axis_op
1060 struct variable *var;
1062 struct ctables_summary_spec_set specs[N_CSVS];
1066 struct ctables_axis *subs[2];
1069 struct msg_location *loc;
1073 ctables_axis_destroy (struct ctables_axis *axis)
1081 for (size_t i = 0; i < N_CSVS; i++)
1082 ctables_summary_spec_set_uninit (&axis->specs[i]);
1087 ctables_axis_destroy (axis->subs[0]);
1088 ctables_axis_destroy (axis->subs[1]);
1091 msg_location_destroy (axis->loc);
1095 static struct ctables_axis *
1096 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1097 struct ctables_axis *sub0,
1098 struct ctables_axis *sub1,
1099 struct lexer *lexer, int start_ofs)
1101 struct ctables_axis *axis = xmalloc (sizeof *axis);
1102 *axis = (struct ctables_axis) {
1104 .subs = { sub0, sub1 },
1105 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1110 struct ctables_axis_parse_ctx
1112 struct lexer *lexer;
1113 struct dictionary *dict;
1116 static struct pivot_value *
1117 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1120 return ctables_summary_function_label (spec->function, spec->weighting,
1121 spec->user_area, spec->percentile);
1124 struct substring in = ss_cstr (spec->label);
1125 struct substring target = ss_cstr (")CILEVEL");
1127 struct string out = DS_EMPTY_INITIALIZER;
1130 size_t chunk = ss_find_substring (in, target);
1131 ds_put_substring (&out, ss_head (in, chunk));
1132 ss_advance (&in, chunk);
1134 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1136 ss_advance (&in, target.length);
1137 ds_put_format (&out, "%g", cilevel);
1143 add_summary_spec (struct ctables_axis *axis,
1144 enum ctables_summary_function function,
1145 enum ctables_weighting weighting,
1146 enum ctables_area_type area, double percentile,
1147 const char *label, const struct fmt_spec *format,
1148 bool is_ctables_format, const struct msg_location *loc,
1149 enum ctables_summary_variant sv)
1151 if (axis->op == CTAO_VAR)
1153 char function_name[128];
1154 ctables_summary_function_name (function, weighting, area,
1155 function_name, sizeof function_name);
1156 const char *var_name = var_get_name (axis->var);
1157 switch (ctables_function_availability (function))
1161 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1162 "response sets."), function_name);
1163 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1169 if (!axis->scale && sv != CSV_TOTAL)
1172 _("Summary function %s applies only to scale variables."),
1174 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1184 struct ctables_summary_spec_set *set = &axis->specs[sv];
1185 if (set->n >= set->allocated)
1186 set->specs = x2nrealloc (set->specs, &set->allocated,
1187 sizeof *set->specs);
1189 struct ctables_summary_spec *dst = &set->specs[set->n++];
1190 *dst = (struct ctables_summary_spec) {
1191 .function = function,
1192 .weighting = weighting,
1195 .percentile = percentile,
1196 .label = xstrdup_if_nonnull (label),
1197 .format = (format ? *format
1198 : ctables_summary_default_format (function, axis->var)),
1199 .is_ctables_format = is_ctables_format,
1205 for (size_t i = 0; i < 2; i++)
1206 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1207 percentile, label, format, is_ctables_format,
1214 static struct ctables_axis *ctables_axis_parse_stack (
1215 struct ctables_axis_parse_ctx *);
1217 static struct ctables_axis *
1218 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1220 if (lex_match (ctx->lexer, T_LPAREN))
1222 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1223 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1225 ctables_axis_destroy (sub);
1231 if (!lex_force_id (ctx->lexer))
1234 if (lex_tokcstr (ctx->lexer)[0] == '$')
1236 lex_error (ctx->lexer,
1237 _("Multiple response set support not implemented."));
1241 int start_ofs = lex_ofs (ctx->lexer);
1242 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1246 struct ctables_axis *axis = xmalloc (sizeof *axis);
1247 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1249 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1250 : lex_match_phrase (ctx->lexer, "[C]") ? false
1251 : var_get_measure (var) == MEASURE_SCALE);
1252 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1253 lex_ofs (ctx->lexer) - 1);
1254 if (axis->scale && var_is_alpha (var))
1256 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1258 var_get_name (var));
1259 ctables_axis_destroy (axis);
1267 has_digit (const char *s)
1269 return s[strcspn (s, "0123456789")] != '\0';
1273 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1274 bool *is_ctables_format)
1276 char type[FMT_TYPE_LEN_MAX + 1];
1277 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1280 if (!strcasecmp (type, "NEGPAREN"))
1281 format->type = CTEF_NEGPAREN;
1282 else if (!strcasecmp (type, "NEQUAL"))
1283 format->type = CTEF_NEQUAL;
1284 else if (!strcasecmp (type, "PAREN"))
1285 format->type = CTEF_PAREN;
1286 else if (!strcasecmp (type, "PCTPAREN"))
1287 format->type = CTEF_PCTPAREN;
1290 *is_ctables_format = false;
1291 return (parse_format_specifier (lexer, format)
1292 && fmt_check_output (format)
1293 && fmt_check_type_compat (format, VAL_NUMERIC));
1299 lex_next_error (lexer, -1, -1,
1300 _("Output format %s requires width 2 or greater."), type);
1303 else if (format->d > format->w - 1)
1305 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1306 "greater than decimals."), type);
1311 *is_ctables_format = true;
1316 static struct ctables_axis *
1317 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1319 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1320 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1323 enum ctables_summary_variant sv = CSV_CELL;
1326 int start_ofs = lex_ofs (ctx->lexer);
1328 /* Parse function. */
1329 enum ctables_summary_function function;
1330 enum ctables_weighting weighting;
1331 enum ctables_area_type area;
1332 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1336 /* Parse percentile. */
1337 double percentile = 0;
1338 if (function == CTSF_PTILE)
1340 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1342 percentile = lex_number (ctx->lexer);
1343 lex_get (ctx->lexer);
1348 if (lex_is_string (ctx->lexer))
1350 label = ss_xstrdup (lex_tokss (ctx->lexer));
1351 lex_get (ctx->lexer);
1355 struct fmt_spec format;
1356 const struct fmt_spec *formatp;
1357 bool is_ctables_format = false;
1358 if (lex_token (ctx->lexer) == T_ID
1359 && has_digit (lex_tokcstr (ctx->lexer)))
1361 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1362 &is_ctables_format))
1372 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1373 lex_ofs (ctx->lexer) - 1);
1374 add_summary_spec (sub, function, weighting, area, percentile, label,
1375 formatp, is_ctables_format, loc, sv);
1377 msg_location_destroy (loc);
1379 lex_match (ctx->lexer, T_COMMA);
1380 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1382 if (!lex_force_match (ctx->lexer, T_LBRACK))
1386 else if (lex_match (ctx->lexer, T_RBRACK))
1388 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1395 ctables_axis_destroy (sub);
1399 static const struct ctables_axis *
1400 find_scale (const struct ctables_axis *axis)
1404 else if (axis->op == CTAO_VAR)
1405 return axis->scale ? axis : NULL;
1408 for (size_t i = 0; i < 2; i++)
1410 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1418 static const struct ctables_axis *
1419 find_categorical_summary_spec (const struct ctables_axis *axis)
1423 else if (axis->op == CTAO_VAR)
1424 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1427 for (size_t i = 0; i < 2; i++)
1429 const struct ctables_axis *sum
1430 = find_categorical_summary_spec (axis->subs[i]);
1438 static struct ctables_axis *
1439 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1441 int start_ofs = lex_ofs (ctx->lexer);
1442 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1446 while (lex_match (ctx->lexer, T_GT))
1448 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1451 ctables_axis_destroy (lhs);
1455 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1456 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1458 const struct ctables_axis *outer_scale = find_scale (lhs);
1459 const struct ctables_axis *inner_scale = find_scale (rhs);
1460 if (outer_scale && inner_scale)
1462 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1463 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1464 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1465 ctables_axis_destroy (nest);
1469 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1472 msg_at (SE, nest->loc,
1473 _("Summaries may only be requested for categorical variables "
1474 "at the innermost nesting level."));
1475 msg_at (SN, outer_sum->loc,
1476 _("This outer categorical variable has a summary."));
1477 ctables_axis_destroy (nest);
1487 static struct ctables_axis *
1488 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1490 int start_ofs = lex_ofs (ctx->lexer);
1491 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1495 while (lex_match (ctx->lexer, T_PLUS))
1497 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1500 ctables_axis_destroy (lhs);
1504 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1505 ctx->lexer, start_ofs);
1512 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1513 struct ctables_axis **axisp)
1516 if (lex_token (lexer) == T_BY
1517 || lex_token (lexer) == T_SLASH
1518 || lex_token (lexer) == T_ENDCMD)
1521 struct ctables_axis_parse_ctx ctx = {
1525 *axisp = ctables_axis_parse_stack (&ctx);
1529 /* CTABLES categories. */
1531 struct ctables_categories
1534 struct ctables_category *cats;
1539 struct ctables_category
1541 enum ctables_category_type
1543 /* Explicit category lists. */
1546 CCT_NRANGE, /* Numerical range. */
1547 CCT_SRANGE, /* String range. */
1552 /* Totals and subtotals. */
1556 /* Implicit category lists. */
1561 /* For contributing to TOTALN. */
1562 CCT_EXCLUDED_MISSING,
1566 struct ctables_category *subtotal;
1572 double number; /* CCT_NUMBER. */
1573 struct substring string; /* CCT_STRING, in dictionary encoding. */
1574 double nrange[2]; /* CCT_NRANGE. */
1575 struct substring srange[2]; /* CCT_SRANGE. */
1579 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1580 bool hide_subcategories; /* CCT_SUBTOTAL. */
1583 /* CCT_POSTCOMPUTE. */
1586 const struct ctables_postcompute *pc;
1587 enum fmt_type parse_format;
1590 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1593 bool include_missing;
1594 bool sort_ascending;
1597 enum ctables_summary_function sort_function;
1598 enum ctables_weighting weighting;
1599 enum ctables_area_type area;
1600 struct variable *sort_var;
1605 /* Source location (sometimes NULL). */
1606 struct msg_location *location;
1610 ctables_category_uninit (struct ctables_category *cat)
1615 msg_location_destroy (cat->location);
1622 case CCT_POSTCOMPUTE:
1626 ss_dealloc (&cat->string);
1630 ss_dealloc (&cat->srange[0]);
1631 ss_dealloc (&cat->srange[1]);
1636 free (cat->total_label);
1644 case CCT_EXCLUDED_MISSING:
1650 nullable_substring_equal (const struct substring *a,
1651 const struct substring *b)
1653 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1657 ctables_category_equal (const struct ctables_category *a,
1658 const struct ctables_category *b)
1660 if (a->type != b->type)
1666 return a->number == b->number;
1669 return ss_equals (a->string, b->string);
1672 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1675 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1676 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1682 case CCT_POSTCOMPUTE:
1683 return a->pc == b->pc;
1687 return !strcmp (a->total_label, b->total_label);
1692 return (a->include_missing == b->include_missing
1693 && a->sort_ascending == b->sort_ascending
1694 && a->sort_function == b->sort_function
1695 && a->sort_var == b->sort_var
1696 && a->percentile == b->percentile);
1698 case CCT_EXCLUDED_MISSING:
1706 ctables_categories_unref (struct ctables_categories *c)
1711 assert (c->n_refs > 0);
1715 for (size_t i = 0; i < c->n_cats; i++)
1716 ctables_category_uninit (&c->cats[i]);
1722 ctables_categories_equal (const struct ctables_categories *a,
1723 const struct ctables_categories *b)
1725 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
1728 for (size_t i = 0; i < a->n_cats; i++)
1729 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1735 static struct ctables_category
1736 cct_nrange (double low, double high)
1738 return (struct ctables_category) {
1740 .nrange = { low, high }
1744 static struct ctables_category
1745 cct_srange (struct substring low, struct substring high)
1747 return (struct ctables_category) {
1749 .srange = { low, high }
1754 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1755 struct ctables_category *cat)
1758 if (lex_match (lexer, T_EQUALS))
1760 if (!lex_force_string (lexer))
1763 total_label = ss_xstrdup (lex_tokss (lexer));
1767 total_label = xstrdup (_("Subtotal"));
1769 *cat = (struct ctables_category) {
1770 .type = CCT_SUBTOTAL,
1771 .hide_subcategories = hide_subcategories,
1772 .total_label = total_label
1778 ctables_table_parse_explicit_category (struct lexer *lexer,
1779 struct dictionary *dict,
1781 struct ctables_category *cat)
1783 if (lex_match_id (lexer, "OTHERNM"))
1784 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1785 else if (lex_match_id (lexer, "MISSING"))
1786 *cat = (struct ctables_category) { .type = CCT_MISSING };
1787 else if (lex_match_id (lexer, "SUBTOTAL"))
1788 return ctables_table_parse_subtotal (lexer, false, cat);
1789 else if (lex_match_id (lexer, "HSUBTOTAL"))
1790 return ctables_table_parse_subtotal (lexer, true, cat);
1791 else if (lex_match_id (lexer, "LO"))
1793 if (!lex_force_match_id (lexer, "THRU"))
1795 if (lex_is_string (lexer))
1797 struct substring sr0 = { .string = NULL };
1798 struct substring sr1 = parse_substring (lexer, dict);
1799 *cat = cct_srange (sr0, sr1);
1801 else if (lex_force_num (lexer))
1803 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1809 else if (lex_is_number (lexer))
1811 double number = lex_number (lexer);
1813 if (lex_match_id (lexer, "THRU"))
1815 if (lex_match_id (lexer, "HI"))
1816 *cat = cct_nrange (number, DBL_MAX);
1819 if (!lex_force_num (lexer))
1821 *cat = cct_nrange (number, lex_number (lexer));
1826 *cat = (struct ctables_category) {
1831 else if (lex_is_string (lexer))
1833 struct substring s = parse_substring (lexer, dict);
1834 if (lex_match_id (lexer, "THRU"))
1836 if (lex_match_id (lexer, "HI"))
1838 struct substring sr1 = { .string = NULL };
1839 *cat = cct_srange (s, sr1);
1843 if (!lex_force_string (lexer))
1848 struct substring sr1 = parse_substring (lexer, dict);
1849 *cat = cct_srange (s, sr1);
1853 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1855 else if (lex_match (lexer, T_AND))
1857 if (!lex_force_id (lexer))
1859 struct ctables_postcompute *pc = ctables_find_postcompute (
1860 ct, lex_tokcstr (lexer));
1863 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1864 msg_at (SE, loc, _("Unknown postcompute &%s."),
1865 lex_tokcstr (lexer));
1866 msg_location_destroy (loc);
1871 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1875 lex_error (lexer, NULL);
1883 parse_category_string (struct msg_location *location,
1884 struct substring s, const struct dictionary *dict,
1885 enum fmt_type format, double *n)
1888 char *error = data_in (s, dict_get_encoding (dict), format,
1889 settings_get_fmt_settings (), &v, 0, NULL);
1892 msg_at (SE, location,
1893 _("Failed to parse category specification as format %s: %s."),
1894 fmt_name (format), error);
1903 static struct ctables_category *
1904 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1905 const struct ctables_pcexpr *e)
1907 struct ctables_category *best = NULL;
1908 size_t n_subtotals = 0;
1909 for (size_t i = 0; i < cats->n_cats; i++)
1911 struct ctables_category *cat = &cats->cats[i];
1914 case CTPO_CAT_NUMBER:
1915 if (cat->type == CCT_NUMBER && cat->number == e->number)
1919 case CTPO_CAT_STRING:
1920 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1924 case CTPO_CAT_NRANGE:
1925 if (cat->type == CCT_NRANGE
1926 && cat->nrange[0] == e->nrange[0]
1927 && cat->nrange[1] == e->nrange[1])
1931 case CTPO_CAT_SRANGE:
1932 if (cat->type == CCT_SRANGE
1933 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1934 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1938 case CTPO_CAT_MISSING:
1939 if (cat->type == CCT_MISSING)
1943 case CTPO_CAT_OTHERNM:
1944 if (cat->type == CCT_OTHERNM)
1948 case CTPO_CAT_SUBTOTAL:
1949 if (cat->type == CCT_SUBTOTAL)
1952 if (e->subtotal_index == n_subtotals)
1954 else if (e->subtotal_index == 0)
1959 case CTPO_CAT_TOTAL:
1960 if (cat->type == CCT_TOTAL)
1974 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1979 static struct ctables_category *
1980 ctables_find_category_for_postcompute (const struct dictionary *dict,
1981 const struct ctables_categories *cats,
1982 enum fmt_type parse_format,
1983 const struct ctables_pcexpr *e)
1985 if (parse_format != FMT_F)
1987 if (e->op == CTPO_CAT_STRING)
1990 if (!parse_category_string (e->location, e->string, dict,
1991 parse_format, &number))
1994 struct ctables_pcexpr e2 = {
1995 .op = CTPO_CAT_NUMBER,
1997 .location = e->location,
1999 return ctables_find_category_for_postcompute__ (cats, &e2);
2001 else if (e->op == CTPO_CAT_SRANGE)
2004 if (!e->srange[0].string)
2005 nrange[0] = -DBL_MAX;
2006 else if (!parse_category_string (e->location, e->srange[0], dict,
2007 parse_format, &nrange[0]))
2010 if (!e->srange[1].string)
2011 nrange[1] = DBL_MAX;
2012 else if (!parse_category_string (e->location, e->srange[1], dict,
2013 parse_format, &nrange[1]))
2016 struct ctables_pcexpr e2 = {
2017 .op = CTPO_CAT_NRANGE,
2018 .nrange = { nrange[0], nrange[1] },
2019 .location = e->location,
2021 return ctables_find_category_for_postcompute__ (cats, &e2);
2024 return ctables_find_category_for_postcompute__ (cats, e);
2027 static struct substring
2028 rtrim_value (const union value *v, const struct variable *var)
2030 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2031 var_get_width (var));
2032 ss_rtrim (&s, ss_cstr (" "));
2037 in_string_range (const union value *v, const struct variable *var,
2038 const struct substring *srange)
2040 struct substring s = rtrim_value (v, var);
2041 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2042 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2045 static const struct ctables_category *
2046 ctables_categories_match (const struct ctables_categories *c,
2047 const union value *v, const struct variable *var)
2049 if (var_is_numeric (var) && v->f == SYSMIS)
2052 const struct ctables_category *othernm = NULL;
2053 for (size_t i = c->n_cats; i-- > 0; )
2055 const struct ctables_category *cat = &c->cats[i];
2059 if (cat->number == v->f)
2064 if (ss_equals (cat->string, rtrim_value (v, var)))
2069 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2070 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2075 if (in_string_range (v, var, cat->srange))
2080 if (var_is_value_missing (var, v))
2084 case CCT_POSTCOMPUTE:
2099 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2102 case CCT_EXCLUDED_MISSING:
2107 return var_is_value_missing (var, v) ? NULL : othernm;
2110 static const struct ctables_category *
2111 ctables_categories_total (const struct ctables_categories *c)
2113 const struct ctables_category *first = &c->cats[0];
2114 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2115 return (first->type == CCT_TOTAL ? first
2116 : last->type == CCT_TOTAL ? last
2121 ctables_category_format_number (double number, const struct variable *var,
2124 struct pivot_value *pv = pivot_value_new_var_value (
2125 var, &(union value) { .f = number });
2126 pivot_value_format (pv, NULL, s);
2127 pivot_value_destroy (pv);
2131 ctables_category_format_string (struct substring string,
2132 const struct variable *var, struct string *out)
2134 int width = var_get_width (var);
2135 char *s = xmalloc (width);
2136 buf_copy_rpad (s, width, string.string, string.length, ' ');
2137 struct pivot_value *pv = pivot_value_new_var_value (
2138 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
2139 pivot_value_format (pv, NULL, out);
2140 pivot_value_destroy (pv);
2145 ctables_category_format_label (const struct ctables_category *cat,
2146 const struct variable *var,
2152 ctables_category_format_number (cat->number, var, s);
2156 ctables_category_format_string (cat->string, var, s);
2160 ctables_category_format_number (cat->nrange[0], var, s);
2161 ds_put_format (s, " THRU ");
2162 ctables_category_format_number (cat->nrange[1], var, s);
2166 ctables_category_format_string (cat->srange[0], var, s);
2167 ds_put_format (s, " THRU ");
2168 ctables_category_format_string (cat->srange[1], var, s);
2172 ds_put_cstr (s, "MISSING");
2176 ds_put_cstr (s, "OTHERNM");
2179 case CCT_POSTCOMPUTE:
2180 ds_put_format (s, "&%s", cat->pc->name);
2185 ds_put_cstr (s, cat->total_label);
2191 case CCT_EXCLUDED_MISSING:
2199 ctables_recursive_check_postcompute (struct dictionary *dict,
2200 const struct ctables_pcexpr *e,
2201 struct ctables_category *pc_cat,
2202 const struct ctables_categories *cats,
2203 const struct msg_location *cats_location)
2207 case CTPO_CAT_NUMBER:
2208 case CTPO_CAT_STRING:
2209 case CTPO_CAT_NRANGE:
2210 case CTPO_CAT_SRANGE:
2211 case CTPO_CAT_MISSING:
2212 case CTPO_CAT_OTHERNM:
2213 case CTPO_CAT_SUBTOTAL:
2214 case CTPO_CAT_TOTAL:
2216 struct ctables_category *cat = ctables_find_category_for_postcompute (
2217 dict, cats, pc_cat->parse_format, e);
2220 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2222 size_t n_subtotals = 0;
2223 for (size_t i = 0; i < cats->n_cats; i++)
2224 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2225 if (n_subtotals > 1)
2227 msg_at (SE, cats_location,
2228 ngettext ("These categories include %zu instance "
2229 "of SUBTOTAL or HSUBTOTAL, so references "
2230 "from computed categories must refer to "
2231 "subtotals by position, "
2232 "e.g. SUBTOTAL[1].",
2233 "These categories include %zu instances "
2234 "of SUBTOTAL or HSUBTOTAL, so references "
2235 "from computed categories must refer to "
2236 "subtotals by position, "
2237 "e.g. SUBTOTAL[1].",
2240 msg_at (SN, e->location,
2241 _("This is the reference that lacks a position."));
2246 msg_at (SE, pc_cat->location,
2247 _("Computed category &%s references a category not included "
2248 "in the category list."),
2250 msg_at (SN, e->location, _("This is the missing category."));
2251 if (e->op == CTPO_CAT_SUBTOTAL)
2252 msg_at (SN, cats_location,
2253 _("To fix the problem, add subtotals to the "
2254 "list of categories here."));
2255 else if (e->op == CTPO_CAT_TOTAL)
2256 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2257 "CATEGORIES specification."));
2259 msg_at (SN, cats_location,
2260 _("To fix the problem, add the missing category to the "
2261 "list of categories here."));
2264 if (pc_cat->pc->hide_source_cats)
2278 for (size_t i = 0; i < 2; i++)
2279 if (e->subs[i] && !ctables_recursive_check_postcompute (
2280 dict, e->subs[i], pc_cat, cats, cats_location))
2288 static struct pivot_value *
2289 ctables_postcompute_label (const struct ctables_categories *cats,
2290 const struct ctables_category *cat,
2291 const struct variable *var)
2293 struct substring in = ss_cstr (cat->pc->label);
2294 struct substring target = ss_cstr (")LABEL[");
2296 struct string out = DS_EMPTY_INITIALIZER;
2299 size_t chunk = ss_find_substring (in, target);
2300 if (chunk == SIZE_MAX)
2302 if (ds_is_empty (&out))
2303 return pivot_value_new_user_text (in.string, in.length);
2306 ds_put_substring (&out, in);
2307 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
2311 ds_put_substring (&out, ss_head (in, chunk));
2312 ss_advance (&in, chunk + target.length);
2314 struct substring idx_s;
2315 if (!ss_get_until (&in, ']', &idx_s))
2318 long int idx = strtol (idx_s.string, &tail, 10);
2319 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
2322 struct ctables_category *cat2 = &cats->cats[idx - 1];
2323 if (!ctables_category_format_label (cat2, var, &out))
2329 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
2332 static struct pivot_value *
2333 ctables_category_create_value_label (const struct ctables_categories *cats,
2334 const struct ctables_category *cat,
2335 const struct variable *var,
2336 const union value *value)
2338 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
2339 ? ctables_postcompute_label (cats, cat, var)
2340 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
2341 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
2342 : pivot_value_new_var_value (var, value));
2345 /* CTABLES variable nesting and stacking. */
2347 /* A nested sequence of variables, e.g. a > b > c. */
2350 struct variable **vars;
2354 size_t *areas[N_CTATS];
2355 size_t n_areas[N_CTATS];
2358 struct ctables_summary_spec_set specs[N_CSVS];
2361 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2362 struct ctables_stack
2364 struct ctables_nest *nests;
2369 ctables_nest_uninit (struct ctables_nest *nest)
2372 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2373 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2374 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2375 free (nest->areas[at]);
2379 ctables_stack_uninit (struct ctables_stack *stack)
2383 for (size_t i = 0; i < stack->n; i++)
2384 ctables_nest_uninit (&stack->nests[i]);
2385 free (stack->nests);
2389 static struct ctables_stack
2390 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2397 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2398 for (size_t i = 0; i < s0.n; i++)
2399 for (size_t j = 0; j < s1.n; j++)
2401 const struct ctables_nest *a = &s0.nests[i];
2402 const struct ctables_nest *b = &s1.nests[j];
2404 size_t allocate = a->n + b->n;
2405 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2407 for (size_t k = 0; k < a->n; k++)
2408 vars[n++] = a->vars[k];
2409 for (size_t k = 0; k < b->n; k++)
2410 vars[n++] = b->vars[k];
2411 assert (n == allocate);
2413 const struct ctables_nest *summary_src;
2414 if (!a->specs[CSV_CELL].var)
2416 else if (!b->specs[CSV_CELL].var)
2421 struct ctables_nest *new = &stack.nests[stack.n++];
2422 *new = (struct ctables_nest) {
2424 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2425 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2427 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2428 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2432 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2433 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2435 ctables_stack_uninit (&s0);
2436 ctables_stack_uninit (&s1);
2440 static struct ctables_stack
2441 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2443 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2444 for (size_t i = 0; i < s0.n; i++)
2445 stack.nests[stack.n++] = s0.nests[i];
2446 for (size_t i = 0; i < s1.n; i++)
2448 stack.nests[stack.n] = s1.nests[i];
2449 stack.nests[stack.n].group_head += s0.n;
2452 assert (stack.n == s0.n + s1.n);
2458 static struct ctables_stack
2459 var_fts (const struct ctables_axis *a)
2461 struct variable **vars = xmalloc (sizeof *vars);
2464 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2465 struct ctables_nest *nest = xmalloc (sizeof *nest);
2466 *nest = (struct ctables_nest) {
2469 .scale_idx = a->scale ? 0 : SIZE_MAX,
2470 .summary_idx = is_summary ? 0 : SIZE_MAX,
2473 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2475 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2476 nest->specs[sv].var = a->var;
2477 nest->specs[sv].is_scale = a->scale;
2479 return (struct ctables_stack) { .nests = nest, .n = 1 };
2482 static struct ctables_stack
2483 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2486 return (struct ctables_stack) { .n = 0 };
2494 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2495 enumerate_fts (axis_type, a->subs[1]));
2498 /* This should consider any of the scale variables found in the result to
2499 be linked to each other listwise for SMISSING=LISTWISE. */
2500 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2501 enumerate_fts (axis_type, a->subs[1]));
2507 /* CTABLES summary calculation. */
2509 union ctables_summary
2511 /* COUNT, VALIDN, TOTALN. */
2514 /* MINIMUM, MAXIMUM, RANGE. */
2521 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2522 struct moments1 *moments;
2524 /* MEDIAN, MODE, PTILE. */
2527 struct casewriter *writer;
2534 ctables_summary_init (union ctables_summary *s,
2535 const struct ctables_summary_spec *ss)
2537 switch (ss->function)
2540 case CTSF_areaPCT_COUNT:
2541 case CTSF_areaPCT_VALIDN:
2542 case CTSF_areaPCT_TOTALN:
2555 s->min = s->max = SYSMIS;
2560 case CTSF_areaPCT_SUM:
2561 s->moments = moments1_create (MOMENT_MEAN);
2567 s->moments = moments1_create (MOMENT_VARIANCE);
2574 struct caseproto *proto = caseproto_create ();
2575 proto = caseproto_add_width (proto, 0);
2576 proto = caseproto_add_width (proto, 0);
2578 struct subcase ordering;
2579 subcase_init (&ordering, 0, 0, SC_ASCEND);
2580 s->writer = sort_create_writer (&ordering, proto);
2581 subcase_uninit (&ordering);
2582 caseproto_unref (proto);
2592 ctables_summary_uninit (union ctables_summary *s,
2593 const struct ctables_summary_spec *ss)
2595 switch (ss->function)
2598 case CTSF_areaPCT_COUNT:
2599 case CTSF_areaPCT_VALIDN:
2600 case CTSF_areaPCT_TOTALN:
2619 case CTSF_areaPCT_SUM:
2620 moments1_destroy (s->moments);
2626 casewriter_destroy (s->writer);
2632 ctables_summary_add (union ctables_summary *s,
2633 const struct ctables_summary_spec *ss,
2634 const union value *value,
2635 bool is_missing, bool is_included,
2638 /* To determine whether a case is included in a given table for a particular
2639 kind of summary, consider the following charts for the variable being
2640 summarized. Only if "yes" appears is the case counted.
2642 Categorical variables: VALIDN other TOTALN
2643 Valid values in included categories yes yes yes
2644 Missing values in included categories --- yes yes
2645 Missing values in excluded categories --- --- yes
2646 Valid values in excluded categories --- --- ---
2648 Scale variables: VALIDN other TOTALN
2649 Valid value yes yes yes
2650 Missing value --- yes yes
2652 Missing values include both user- and system-missing. (The system-missing
2653 value is always in an excluded category.)
2655 One way to interpret the above table is that scale variables are like
2656 categorical variables in which all values are in included categories.
2658 switch (ss->function)
2661 case CTSF_areaPCT_TOTALN:
2666 case CTSF_areaPCT_COUNT:
2672 case CTSF_areaPCT_VALIDN:
2690 if (s->min == SYSMIS || value->f < s->min)
2692 if (s->max == SYSMIS || value->f > s->max)
2703 moments1_add (s->moments, value->f, weight);
2706 case CTSF_areaPCT_SUM:
2708 moments1_add (s->moments, value->f, weight);
2716 s->ovalid += weight;
2718 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2719 *case_num_rw_idx (c, 0) = value->f;
2720 *case_num_rw_idx (c, 1) = weight;
2721 casewriter_write (s->writer, c);
2728 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2729 union ctables_summary *s,
2730 const struct ctables_summary_spec *ss)
2732 switch (ss->function)
2738 return areas[ss->calc_area]->sequence;
2740 case CTSF_areaPCT_COUNT:
2742 const struct ctables_area *a = areas[ss->calc_area];
2743 double a_count = a->count[ss->weighting];
2744 return a_count ? s->count / a_count * 100 : SYSMIS;
2747 case CTSF_areaPCT_VALIDN:
2749 const struct ctables_area *a = areas[ss->calc_area];
2750 double a_valid = a->valid[ss->weighting];
2751 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2754 case CTSF_areaPCT_TOTALN:
2756 const struct ctables_area *a = areas[ss->calc_area];
2757 double a_total = a->total[ss->weighting];
2758 return a_total ? s->count / a_total * 100 : SYSMIS;
2773 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2778 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2784 double weight, variance;
2785 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2786 return calc_semean (variance, weight);
2792 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2793 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2798 double weight, mean;
2799 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2800 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2806 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2810 case CTSF_areaPCT_SUM:
2812 double weight, mean;
2813 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2814 if (weight == SYSMIS || mean == SYSMIS)
2817 const struct ctables_area *a = areas[ss->calc_area];
2818 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2819 double denom = sum->sum[ss->weighting];
2820 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2827 struct casereader *reader = casewriter_make_reader (s->writer);
2830 struct percentile *ptile = percentile_create (
2831 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2832 struct order_stats *os = &ptile->parent;
2833 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2834 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2835 statistic_destroy (&ptile->parent.parent);
2842 struct casereader *reader = casewriter_make_reader (s->writer);
2845 struct mode *mode = mode_create ();
2846 struct order_stats *os = &mode->parent;
2847 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2848 s->ovalue = mode->mode;
2849 statistic_destroy (&mode->parent.parent);
2857 /* CTABLES occurrences. */
2859 struct ctables_occurrence
2861 struct hmap_node node;
2866 ctables_add_occurrence (const struct variable *var,
2867 const union value *value,
2868 struct hmap *occurrences)
2870 int width = var_get_width (var);
2871 unsigned int hash = value_hash (value, width, 0);
2873 struct ctables_occurrence *o;
2874 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
2876 if (value_equal (value, &o->value, width))
2879 o = xmalloc (sizeof *o);
2880 value_clone (&o->value, value, width);
2881 hmap_insert (occurrences, &o->node, hash);
2886 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2887 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2888 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2889 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2894 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2895 all the axes (except the scalar variable, if any). */
2896 struct hmap_node node;
2897 struct ctables_section *section;
2899 /* The areas that contain this cell. */
2900 uint32_t omit_areas;
2901 struct ctables_area *areas[N_CTATS];
2906 enum ctables_summary_variant sv;
2908 struct ctables_cell_axis
2910 struct ctables_cell_value
2912 const struct ctables_category *category;
2920 union ctables_summary *summaries;
2923 struct ctables_section
2926 struct ctables_table *table;
2927 struct ctables_nest *nests[PIVOT_N_AXES];
2930 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2931 struct hmap cells; /* Contains "struct ctables_cell"s. */
2932 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2935 static void ctables_section_uninit (struct ctables_section *);
2937 struct ctables_table
2939 struct ctables *ctables;
2940 struct ctables_axis *axes[PIVOT_N_AXES];
2941 struct ctables_stack stacks[PIVOT_N_AXES];
2942 struct ctables_section *sections;
2944 enum pivot_axis_type summary_axis;
2945 struct ctables_summary_spec_set summary_specs;
2946 struct variable **sum_vars;
2949 enum pivot_axis_type slabels_axis;
2950 bool slabels_visible;
2952 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2954 Most commonly, label_axis[a] == a, and in particular we always have
2955 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2957 If ROWLABELS or COLLABELS is specified, then one of
2958 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2959 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2961 If any category labels are moved, then 'clabels_example' is one of the
2962 variables being moved (and it is otherwise NULL). All of the variables
2963 being moved have the same width, value labels, and categories, so this
2964 example variable can be used to find those out.
2966 The remaining members in this group are relevant only if category labels
2969 'clabels_values_map' holds a "struct ctables_value" for all the values
2970 that appear in all of the variables in the moved categories. It is
2971 accumulated as the data is read. Once the data is fully read, its
2972 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2974 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2975 enum pivot_axis_type clabels_from_axis;
2976 enum pivot_axis_type clabels_to_axis;
2977 int clabels_start_ofs, clabels_end_ofs;
2978 const struct variable *clabels_example;
2979 struct hmap clabels_values_map;
2980 struct ctables_value **clabels_values;
2981 size_t n_clabels_values;
2983 /* Indexed by variable dictionary index. */
2984 struct ctables_categories **categories;
2985 size_t n_categories;
2993 struct ctables_chisq *chisq;
2994 struct ctables_pairwise *pairwise;
2997 struct ctables_cell_sort_aux
2999 const struct ctables_nest *nest;
3000 enum pivot_axis_type a;
3004 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3006 const struct ctables_cell_sort_aux *aux = aux_;
3007 struct ctables_cell *const *ap = a_;
3008 struct ctables_cell *const *bp = b_;
3009 const struct ctables_cell *a = *ap;
3010 const struct ctables_cell *b = *bp;
3012 const struct ctables_nest *nest = aux->nest;
3013 for (size_t i = 0; i < nest->n; i++)
3014 if (i != nest->scale_idx)
3016 const struct variable *var = nest->vars[i];
3017 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3018 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3019 if (a_cv->category != b_cv->category)
3020 return a_cv->category > b_cv->category ? 1 : -1;
3022 const union value *a_val = &a_cv->value;
3023 const union value *b_val = &b_cv->value;
3024 switch (a_cv->category->type)
3030 case CCT_POSTCOMPUTE:
3031 case CCT_EXCLUDED_MISSING:
3032 /* Must be equal. */
3040 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3048 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3050 return a_cv->category->sort_ascending ? cmp : -cmp;
3056 const char *a_label = var_lookup_value_label (var, a_val);
3057 const char *b_label = var_lookup_value_label (var, b_val);
3063 cmp = strcmp (a_label, b_label);
3069 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3072 return a_cv->category->sort_ascending ? cmp : -cmp;
3083 static struct ctables_area *
3084 ctables_area_insert (struct ctables_cell *cell, enum ctables_area_type area)
3086 struct ctables_section *s = cell->section;
3088 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3090 const struct ctables_nest *nest = s->nests[a];
3091 for (size_t i = 0; i < nest->n_areas[area]; i++)
3093 size_t v_idx = nest->areas[area][i];
3094 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3095 hash = hash_pointer (cv->category, hash);
3096 if (cv->category->type != CCT_TOTAL
3097 && cv->category->type != CCT_SUBTOTAL
3098 && cv->category->type != CCT_POSTCOMPUTE)
3099 hash = value_hash (&cv->value,
3100 var_get_width (nest->vars[v_idx]), hash);
3104 struct ctables_area *a;
3105 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3107 const struct ctables_cell *df = a->example;
3108 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3110 const struct ctables_nest *nest = s->nests[a];
3111 for (size_t i = 0; i < nest->n_areas[area]; i++)
3113 size_t v_idx = nest->areas[area][i];
3114 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3115 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3116 if (cv1->category != cv2->category
3117 || (cv1->category->type != CCT_TOTAL
3118 && cv1->category->type != CCT_SUBTOTAL
3119 && cv1->category->type != CCT_POSTCOMPUTE
3120 && !value_equal (&cv1->value, &cv2->value,
3121 var_get_width (nest->vars[v_idx]))))
3130 struct ctables_sum *sums = (s->table->n_sum_vars
3131 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3134 a = xmalloc (sizeof *a);
3135 *a = (struct ctables_area) { .example = cell, .sums = sums };
3136 hmap_insert (&s->areas[area], &a->node, hash);
3140 static struct ctables_cell *
3141 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3142 const struct ctables_category **cats[PIVOT_N_AXES])
3145 enum ctables_summary_variant sv = CSV_CELL;
3146 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3148 const struct ctables_nest *nest = s->nests[a];
3149 for (size_t i = 0; i < nest->n; i++)
3150 if (i != nest->scale_idx)
3152 hash = hash_pointer (cats[a][i], hash);
3153 if (cats[a][i]->type != CCT_TOTAL
3154 && cats[a][i]->type != CCT_SUBTOTAL
3155 && cats[a][i]->type != CCT_POSTCOMPUTE)
3156 hash = value_hash (case_data (c, nest->vars[i]),
3157 var_get_width (nest->vars[i]), hash);
3163 struct ctables_cell *cell;
3164 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3166 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3168 const struct ctables_nest *nest = s->nests[a];
3169 for (size_t i = 0; i < nest->n; i++)
3170 if (i != nest->scale_idx
3171 && (cats[a][i] != cell->axes[a].cvs[i].category
3172 || (cats[a][i]->type != CCT_TOTAL
3173 && cats[a][i]->type != CCT_SUBTOTAL
3174 && cats[a][i]->type != CCT_POSTCOMPUTE
3175 && !value_equal (case_data (c, nest->vars[i]),
3176 &cell->axes[a].cvs[i].value,
3177 var_get_width (nest->vars[i])))))
3186 cell = xmalloc (sizeof *cell);
3190 cell->omit_areas = 0;
3191 cell->postcompute = false;
3192 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3194 const struct ctables_nest *nest = s->nests[a];
3195 cell->axes[a].cvs = (nest->n
3196 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3198 for (size_t i = 0; i < nest->n; i++)
3200 const struct ctables_category *cat = cats[a][i];
3201 const struct variable *var = nest->vars[i];
3202 const union value *value = case_data (c, var);
3203 if (i != nest->scale_idx)
3205 const struct ctables_category *subtotal = cat->subtotal;
3206 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3209 if (cat->type == CCT_TOTAL
3210 || cat->type == CCT_SUBTOTAL
3211 || cat->type == CCT_POSTCOMPUTE)
3215 case PIVOT_AXIS_COLUMN:
3216 cell->omit_areas |= ((1u << CTAT_TABLE) |
3217 (1u << CTAT_LAYER) |
3218 (1u << CTAT_LAYERCOL) |
3219 (1u << CTAT_SUBTABLE) |
3222 case PIVOT_AXIS_ROW:
3223 cell->omit_areas |= ((1u << CTAT_TABLE) |
3224 (1u << CTAT_LAYER) |
3225 (1u << CTAT_LAYERROW) |
3226 (1u << CTAT_SUBTABLE) |
3229 case PIVOT_AXIS_LAYER:
3230 cell->omit_areas |= ((1u << CTAT_TABLE) |
3231 (1u << CTAT_LAYER));
3235 if (cat->type == CCT_POSTCOMPUTE)
3236 cell->postcompute = true;
3239 cell->axes[a].cvs[i].category = cat;
3240 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3244 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3245 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3246 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3247 for (size_t i = 0; i < specs->n; i++)
3248 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3249 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3250 cell->areas[at] = ctables_area_insert (cell, at);
3251 hmap_insert (&s->cells, &cell->node, hash);
3256 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3258 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3263 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3264 const struct ctables_category **cats[PIVOT_N_AXES],
3265 bool is_included, double weight[N_CTWS])
3267 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3268 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3270 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3271 const union value *value = case_data (c, specs->var);
3272 bool is_missing = var_is_value_missing (specs->var, value);
3273 bool is_scale_missing
3274 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3276 for (size_t i = 0; i < specs->n; i++)
3277 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3278 is_scale_missing, is_included,
3279 weight[specs->specs[i].weighting]);
3280 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3281 if (!(cell->omit_areas && (1u << at)))
3283 struct ctables_area *a = cell->areas[at];
3285 add_weight (a->total, weight);
3287 add_weight (a->count, weight);
3290 add_weight (a->valid, weight);
3292 if (!is_scale_missing)
3293 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3295 const struct variable *var = s->table->sum_vars[i];
3296 double addend = case_num (c, var);
3297 if (!var_is_num_missing (var, addend))
3298 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3299 a->sums[i].sum[wt] += addend * weight[wt];
3306 recurse_totals (struct ctables_section *s, const struct ccase *c,
3307 const struct ctables_category **cats[PIVOT_N_AXES],
3308 bool is_included, double weight[N_CTWS],
3309 enum pivot_axis_type start_axis, size_t start_nest)
3311 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3313 const struct ctables_nest *nest = s->nests[a];
3314 for (size_t i = start_nest; i < nest->n; i++)
3316 if (i == nest->scale_idx)
3319 const struct variable *var = nest->vars[i];
3321 const struct ctables_category *total = ctables_categories_total (
3322 s->table->categories[var_get_dict_index (var)]);
3325 const struct ctables_category *save = cats[a][i];
3327 ctables_cell_add__ (s, c, cats, is_included, weight);
3328 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3337 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3338 const struct ctables_category **cats[PIVOT_N_AXES],
3339 bool is_included, double weight[N_CTWS],
3340 enum pivot_axis_type start_axis, size_t start_nest)
3342 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3344 const struct ctables_nest *nest = s->nests[a];
3345 for (size_t i = start_nest; i < nest->n; i++)
3347 if (i == nest->scale_idx)
3350 const struct ctables_category *save = cats[a][i];
3353 cats[a][i] = save->subtotal;
3354 ctables_cell_add__ (s, c, cats, is_included, weight);
3355 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3364 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3365 double weight[N_CTWS])
3367 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3368 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3369 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3370 const struct ctables_category **cats[PIVOT_N_AXES] =
3372 [PIVOT_AXIS_LAYER] = layer_cats,
3373 [PIVOT_AXIS_ROW] = row_cats,
3374 [PIVOT_AXIS_COLUMN] = column_cats,
3377 bool is_included = true;
3379 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3381 const struct ctables_nest *nest = s->nests[a];
3382 for (size_t i = 0; i < nest->n; i++)
3383 if (i != nest->scale_idx)
3385 const struct variable *var = nest->vars[i];
3386 const union value *value = case_data (c, var);
3388 cats[a][i] = ctables_categories_match (
3389 s->table->categories[var_get_dict_index (var)], value, var);
3392 if (i != nest->summary_idx)
3395 if (!var_is_value_missing (var, value))
3398 static const struct ctables_category cct_excluded_missing = {
3399 .type = CCT_EXCLUDED_MISSING,
3402 cats[a][i] = &cct_excluded_missing;
3403 is_included = false;
3409 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3411 const struct ctables_nest *nest = s->nests[a];
3412 for (size_t i = 0; i < nest->n; i++)
3413 if (i != nest->scale_idx)
3415 const struct variable *var = nest->vars[i];
3416 const union value *value = case_data (c, var);
3417 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3421 ctables_cell_add__ (s, c, cats, is_included, weight);
3422 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3423 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3426 struct ctables_value
3428 struct hmap_node node;
3433 static struct ctables_value *
3434 ctables_value_find__ (const struct ctables_table *t, const union value *value,
3435 int width, unsigned int hash)
3437 struct ctables_value *clv;
3438 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3439 hash, &t->clabels_values_map)
3440 if (value_equal (value, &clv->value, width))
3446 ctables_value_insert (struct ctables_table *t, const union value *value,
3449 unsigned int hash = value_hash (value, width, 0);
3450 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3453 clv = xmalloc (sizeof *clv);
3454 value_clone (&clv->value, value, width);
3455 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3459 static const struct ctables_value *
3460 ctables_value_find (const struct ctables_cell *cell)
3462 const struct ctables_section *s = cell->section;
3463 const struct ctables_table *t = s->table;
3464 if (!t->clabels_example)
3467 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3468 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3469 const union value *value
3470 = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3471 int width = var_get_width (var);
3472 const struct ctables_value *ctv = ctables_value_find__ (
3473 t, value, width, value_hash (value, width, 0));
3474 assert (ctv != NULL);
3479 compare_ctables_values_3way (const void *a_, const void *b_, const void *width_)
3481 const struct ctables_value *const *ap = a_;
3482 const struct ctables_value *const *bp = b_;
3483 const struct ctables_value *a = *ap;
3484 const struct ctables_value *b = *bp;
3485 const int *width = width_;
3486 return value_compare_3way (&a->value, &b->value, *width);
3490 ctables_sort_clabels_values (struct ctables_table *t)
3492 const struct variable *v0 = t->clabels_example;
3493 int width = var_get_width (v0);
3495 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3498 const struct val_labs *val_labs = var_get_value_labels (v0);
3499 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
3500 vl = val_labs_next (val_labs, vl))
3501 if (ctables_categories_match (c0, &vl->value, v0))
3502 ctables_value_insert (t, &vl->value, width);
3505 size_t n = hmap_count (&t->clabels_values_map);
3506 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
3508 struct ctables_value *clv;
3510 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
3511 t->clabels_values[i++] = clv;
3512 t->n_clabels_values = n;
3515 sort (t->clabels_values, n, sizeof *t->clabels_values,
3516 compare_ctables_values_3way, &width);
3518 for (size_t i = 0; i < n; i++)
3519 t->clabels_values[i]->leaf = i;
3524 const struct dictionary *dict;
3525 struct pivot_table_look *look;
3527 /* For CTEF_* formats. */
3528 struct fmt_settings ctables_formats;
3530 /* If this is NULL, zeros are displayed using the normal print format.
3531 Otherwise, this string is displayed. */
3534 /* If this is NULL, missing values are displayed using the normal print
3535 format. Otherwise, this string is displayed. */
3538 /* Indexed by variable dictionary index. */
3539 enum ctables_vlabel *vlabels;
3541 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
3543 bool mrsets_count_duplicates; /* MRSETS. */
3544 bool smissing_listwise; /* SMISSING. */
3545 struct variable *e_weight; /* WEIGHT. */
3546 int hide_threshold; /* HIDESMALLCOUNTS. */
3548 struct ctables_table **tables;
3553 ctpo_add (double a, double b)
3559 ctpo_sub (double a, double b)
3565 ctpo_mul (double a, double b)
3571 ctpo_div (double a, double b)
3573 return b ? a / b : SYSMIS;
3577 ctpo_pow (double a, double b)
3579 int save_errno = errno;
3581 double result = pow (a, b);
3589 ctpo_neg (double a, double b UNUSED)
3594 struct ctables_pcexpr_evaluate_ctx
3596 const struct ctables_cell *cell;
3597 const struct ctables_section *section;
3598 const struct ctables_categories *cats;
3599 enum pivot_axis_type pc_a;
3602 enum fmt_type parse_format;
3605 static double ctables_pcexpr_evaluate (
3606 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3609 ctables_pcexpr_evaluate_nonterminal (
3610 const struct ctables_pcexpr_evaluate_ctx *ctx,
3611 const struct ctables_pcexpr *e, size_t n_args,
3612 double evaluate (double, double))
3614 double args[2] = { 0, 0 };
3615 for (size_t i = 0; i < n_args; i++)
3617 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3618 if (!isfinite (args[i]) || args[i] == SYSMIS)
3621 return evaluate (args[0], args[1]);
3625 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3626 const struct ctables_cell_value *pc_cv)
3628 const struct ctables_section *s = ctx->section;
3631 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3633 const struct ctables_nest *nest = s->nests[a];
3634 for (size_t i = 0; i < nest->n; i++)
3635 if (i != nest->scale_idx)
3637 const struct ctables_cell_value *cv
3638 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3639 : &ctx->cell->axes[a].cvs[i]);
3640 hash = hash_pointer (cv->category, hash);
3641 if (cv->category->type != CCT_TOTAL
3642 && cv->category->type != CCT_SUBTOTAL
3643 && cv->category->type != CCT_POSTCOMPUTE)
3644 hash = value_hash (&cv->value,
3645 var_get_width (nest->vars[i]), hash);
3649 struct ctables_cell *tc;
3650 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3652 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3654 const struct ctables_nest *nest = s->nests[a];
3655 for (size_t i = 0; i < nest->n; i++)
3656 if (i != nest->scale_idx)
3658 const struct ctables_cell_value *p_cv
3659 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3660 : &ctx->cell->axes[a].cvs[i]);
3661 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3662 if (p_cv->category != t_cv->category
3663 || (p_cv->category->type != CCT_TOTAL
3664 && p_cv->category->type != CCT_SUBTOTAL
3665 && p_cv->category->type != CCT_POSTCOMPUTE
3666 && !value_equal (&p_cv->value,
3668 var_get_width (nest->vars[i]))))
3680 const struct ctables_table *t = s->table;
3681 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3682 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3683 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
3684 &specs->specs[ctx->summary_idx]);
3688 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3689 const struct ctables_pcexpr *e)
3696 case CTPO_CAT_NRANGE:
3697 case CTPO_CAT_SRANGE:
3698 case CTPO_CAT_MISSING:
3699 case CTPO_CAT_OTHERNM:
3701 struct ctables_cell_value cv = {
3702 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3704 assert (cv.category != NULL);
3706 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3707 const struct ctables_occurrence *o;
3710 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3711 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3712 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3714 cv.value = o->value;
3715 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3720 case CTPO_CAT_NUMBER:
3721 case CTPO_CAT_SUBTOTAL:
3722 case CTPO_CAT_TOTAL:
3724 struct ctables_cell_value cv = {
3725 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3726 .value = { .f = e->number },
3728 assert (cv.category != NULL);
3729 return ctables_pcexpr_evaluate_category (ctx, &cv);
3732 case CTPO_CAT_STRING:
3734 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3736 if (width > e->string.length)
3738 s = xmalloc (width);
3739 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3742 const struct ctables_category *category
3743 = ctables_find_category_for_postcompute (
3744 ctx->section->table->ctables->dict,
3745 ctx->cats, ctx->parse_format, e);
3746 assert (category != NULL);
3748 struct ctables_cell_value cv = { .category = category };
3749 if (category->type == CCT_NUMBER)
3750 cv.value.f = category->number;
3751 else if (category->type == CCT_STRING)
3752 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3756 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3762 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3765 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3768 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3771 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3774 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3777 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3783 static const struct ctables_category *
3784 ctables_cell_postcompute (const struct ctables_section *s,
3785 const struct ctables_cell *cell,
3786 enum pivot_axis_type *pc_a_p,
3789 assert (cell->postcompute);
3790 const struct ctables_category *pc_cat = NULL;
3791 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3792 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3794 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3795 if (cv->category->type == CCT_POSTCOMPUTE)
3799 /* Multiple postcomputes cross each other. The value is
3804 pc_cat = cv->category;
3808 *pc_a_idx_p = pc_a_idx;
3812 assert (pc_cat != NULL);
3817 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3818 const struct ctables_cell *cell,
3819 const struct ctables_summary_spec *ss,
3820 struct fmt_spec *format,
3821 bool *is_ctables_format,
3824 enum pivot_axis_type pc_a = 0;
3825 size_t pc_a_idx = 0;
3826 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3827 s, cell, &pc_a, &pc_a_idx);
3831 const struct ctables_postcompute *pc = pc_cat->pc;
3834 for (size_t i = 0; i < pc->specs->n; i++)
3836 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3837 if (ss->function == ss2->function
3838 && ss->weighting == ss2->weighting
3839 && ss->calc_area == ss2->calc_area
3840 && ss->percentile == ss2->percentile)
3842 *format = ss2->format;
3843 *is_ctables_format = ss2->is_ctables_format;
3849 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3850 const struct ctables_categories *cats = s->table->categories[
3851 var_get_dict_index (var)];
3852 struct ctables_pcexpr_evaluate_ctx ctx = {
3857 .pc_a_idx = pc_a_idx,
3858 .summary_idx = summary_idx,
3859 .parse_format = pc_cat->parse_format,
3861 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3864 /* Chi-square test (SIGTEST). */
3865 struct ctables_chisq
3868 bool include_mrsets;
3872 /* Pairwise comparison test (COMPARETEST). */
3873 struct ctables_pairwise
3875 enum { PROP, MEAN } type;
3877 bool include_mrsets;
3878 bool meansvariance_allcats;
3880 enum { BONFERRONI = 1, BH } adjust;
3889 parse_col_width (struct lexer *lexer, const char *name, double *width)
3891 lex_match (lexer, T_EQUALS);
3892 if (lex_match_id (lexer, "DEFAULT"))
3894 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
3896 *width = lex_number (lexer);
3906 parse_bool (struct lexer *lexer, bool *b)
3908 if (lex_match_id (lexer, "NO"))
3910 else if (lex_match_id (lexer, "YES"))
3914 lex_error_expecting (lexer, "YES", "NO");
3921 ctables_chisq_destroy (struct ctables_chisq *chisq)
3927 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
3933 ctables_table_destroy (struct ctables_table *t)
3938 for (size_t i = 0; i < t->n_sections; i++)
3939 ctables_section_uninit (&t->sections[i]);
3942 for (size_t i = 0; i < t->n_categories; i++)
3943 ctables_categories_unref (t->categories[i]);
3944 free (t->categories);
3946 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3948 ctables_axis_destroy (t->axes[a]);
3949 ctables_stack_uninit (&t->stacks[a]);
3951 free (t->summary_specs.specs);
3953 struct ctables_value *ctv, *next_ctv;
3954 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
3955 &t->clabels_values_map)
3957 value_destroy (&ctv->value, var_get_width (t->clabels_example));
3958 hmap_delete (&t->clabels_values_map, &ctv->node);
3961 hmap_destroy (&t->clabels_values_map);
3962 free (t->clabels_values);
3968 ctables_chisq_destroy (t->chisq);
3969 ctables_pairwise_destroy (t->pairwise);
3974 ctables_destroy (struct ctables *ct)
3979 struct ctables_postcompute *pc, *next_pc;
3980 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
3984 msg_location_destroy (pc->location);
3985 ctables_pcexpr_destroy (pc->expr);
3989 ctables_summary_spec_set_uninit (pc->specs);
3992 hmap_delete (&ct->postcomputes, &pc->hmap_node);
3995 hmap_destroy (&ct->postcomputes);
3997 fmt_settings_uninit (&ct->ctables_formats);
3998 pivot_table_look_unref (ct->look);
4002 for (size_t i = 0; i < ct->n_tables; i++)
4003 ctables_table_destroy (ct->tables[i]);
4009 all_strings (struct variable **vars, size_t n_vars,
4010 const struct ctables_category *cat)
4012 for (size_t j = 0; j < n_vars; j++)
4013 if (var_is_numeric (vars[j]))
4015 msg_at (SE, cat->location,
4016 _("This category specification may be applied only to string "
4017 "variables, but this subcommand tries to apply it to "
4018 "numeric variable %s."),
4019 var_get_name (vars[j]));
4026 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
4027 struct ctables *ct, struct ctables_table *t)
4029 if (!lex_force_match_id (lexer, "VARIABLES"))
4031 lex_match (lexer, T_EQUALS);
4033 struct variable **vars;
4035 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
4038 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
4039 for (size_t i = 1; i < n_vars; i++)
4041 const struct fmt_spec *f = var_get_print_format (vars[i]);
4042 if (f->type != common_format->type)
4044 common_format = NULL;
4050 && (fmt_get_category (common_format->type)
4051 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
4053 struct ctables_categories *c = xmalloc (sizeof *c);
4054 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
4055 for (size_t i = 0; i < n_vars; i++)
4057 struct ctables_categories **cp
4058 = &t->categories[var_get_dict_index (vars[i])];
4059 ctables_categories_unref (*cp);
4063 size_t allocated_cats = 0;
4064 int cats_start_ofs = -1;
4065 int cats_end_ofs = -1;
4066 if (lex_match (lexer, T_LBRACK))
4068 cats_start_ofs = lex_ofs (lexer);
4071 if (c->n_cats >= allocated_cats)
4072 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4074 int start_ofs = lex_ofs (lexer);
4075 struct ctables_category *cat = &c->cats[c->n_cats];
4076 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
4078 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4081 lex_match (lexer, T_COMMA);
4083 while (!lex_match (lexer, T_RBRACK));
4084 cats_end_ofs = lex_ofs (lexer) - 1;
4087 struct ctables_category cat = {
4089 .include_missing = false,
4090 .sort_ascending = true,
4092 bool show_totals = false;
4093 char *total_label = NULL;
4094 bool totals_before = false;
4095 int key_start_ofs = 0;
4096 int key_end_ofs = 0;
4097 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4099 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
4101 lex_match (lexer, T_EQUALS);
4102 if (lex_match_id (lexer, "A"))
4103 cat.sort_ascending = true;
4104 else if (lex_match_id (lexer, "D"))
4105 cat.sort_ascending = false;
4108 lex_error_expecting (lexer, "A", "D");
4112 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
4114 key_start_ofs = lex_ofs (lexer) - 1;
4115 lex_match (lexer, T_EQUALS);
4116 if (lex_match_id (lexer, "VALUE"))
4117 cat.type = CCT_VALUE;
4118 else if (lex_match_id (lexer, "LABEL"))
4119 cat.type = CCT_LABEL;
4122 cat.type = CCT_FUNCTION;
4123 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
4124 &cat.weighting, &cat.area))
4127 if (lex_match (lexer, T_LPAREN))
4129 cat.sort_var = parse_variable (lexer, dict);
4133 if (cat.sort_function == CTSF_PTILE)
4135 lex_match (lexer, T_COMMA);
4136 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4138 cat.percentile = lex_number (lexer);
4142 if (!lex_force_match (lexer, T_RPAREN))
4145 else if (ctables_function_availability (cat.sort_function)
4148 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
4152 key_end_ofs = lex_ofs (lexer) - 1;
4154 if (cat.type == CCT_FUNCTION)
4156 lex_ofs_error (lexer, key_start_ofs, key_end_ofs,
4157 _("Data-dependent sorting is not implemented."));
4161 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
4163 lex_match (lexer, T_EQUALS);
4164 if (lex_match_id (lexer, "INCLUDE"))
4165 cat.include_missing = true;
4166 else if (lex_match_id (lexer, "EXCLUDE"))
4167 cat.include_missing = false;
4170 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4174 else if (lex_match_id (lexer, "TOTAL"))
4176 lex_match (lexer, T_EQUALS);
4177 if (!parse_bool (lexer, &show_totals))
4180 else if (lex_match_id (lexer, "LABEL"))
4182 lex_match (lexer, T_EQUALS);
4183 if (!lex_force_string (lexer))
4186 total_label = ss_xstrdup (lex_tokss (lexer));
4189 else if (lex_match_id (lexer, "POSITION"))
4191 lex_match (lexer, T_EQUALS);
4192 if (lex_match_id (lexer, "BEFORE"))
4193 totals_before = true;
4194 else if (lex_match_id (lexer, "AFTER"))
4195 totals_before = false;
4198 lex_error_expecting (lexer, "BEFORE", "AFTER");
4202 else if (lex_match_id (lexer, "EMPTY"))
4204 lex_match (lexer, T_EQUALS);
4205 if (lex_match_id (lexer, "INCLUDE"))
4206 c->show_empty = true;
4207 else if (lex_match_id (lexer, "EXCLUDE"))
4208 c->show_empty = false;
4211 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4218 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
4219 "TOTAL", "LABEL", "POSITION", "EMPTY");
4221 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
4229 cat.location = lex_ofs_location (lexer, key_start_ofs, key_end_ofs);
4231 if (c->n_cats >= allocated_cats)
4232 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4233 c->cats[c->n_cats++] = cat;
4238 if (c->n_cats >= allocated_cats)
4239 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4241 struct ctables_category *totals;
4244 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
4245 totals = &c->cats[0];
4248 totals = &c->cats[c->n_cats];
4251 *totals = (struct ctables_category) {
4253 .total_label = total_label ? total_label : xstrdup (_("Total")),
4257 struct ctables_category *subtotal = NULL;
4258 for (size_t i = totals_before ? 0 : c->n_cats;
4259 totals_before ? i < c->n_cats : i-- > 0;
4260 totals_before ? i++ : 0)
4262 struct ctables_category *cat = &c->cats[i];
4271 cat->subtotal = subtotal;
4274 case CCT_POSTCOMPUTE:
4285 case CCT_EXCLUDED_MISSING:
4290 if (cats_start_ofs != -1)
4292 for (size_t i = 0; i < c->n_cats; i++)
4294 struct ctables_category *cat = &c->cats[i];
4297 case CCT_POSTCOMPUTE:
4298 cat->parse_format = parse_strings ? common_format->type : FMT_F;
4299 struct msg_location *cats_location
4300 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
4301 bool ok = ctables_recursive_check_postcompute (
4302 dict, cat->pc->expr, cat, c, cats_location);
4303 msg_location_destroy (cats_location);
4310 for (size_t j = 0; j < n_vars; j++)
4311 if (var_is_alpha (vars[j]))
4313 msg_at (SE, cat->location,
4314 _("This category specification may be applied "
4315 "only to numeric variables, but this "
4316 "subcommand tries to apply it to string "
4318 var_get_name (vars[j]));
4327 if (!parse_category_string (cat->location, cat->string, dict,
4328 common_format->type, &n))
4331 ss_dealloc (&cat->string);
4333 cat->type = CCT_NUMBER;
4336 else if (!all_strings (vars, n_vars, cat))
4345 if (!cat->srange[0].string)
4347 else if (!parse_category_string (cat->location,
4348 cat->srange[0], dict,
4349 common_format->type, &n[0]))
4352 if (!cat->srange[1].string)
4354 else if (!parse_category_string (cat->location,
4355 cat->srange[1], dict,
4356 common_format->type, &n[1]))
4359 ss_dealloc (&cat->srange[0]);
4360 ss_dealloc (&cat->srange[1]);
4362 cat->type = CCT_NRANGE;
4363 cat->nrange[0] = n[0];
4364 cat->nrange[1] = n[1];
4366 else if (!all_strings (vars, n_vars, cat))
4377 case CCT_EXCLUDED_MISSING:
4394 const struct ctables_summary_spec_set *set;
4399 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4401 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4402 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4403 if (as->function != bs->function)
4404 return as->function > bs->function ? 1 : -1;
4405 else if (as->weighting != bs->weighting)
4406 return as->weighting > bs->weighting ? 1 : -1;
4407 else if (as->calc_area != bs->calc_area)
4408 return as->calc_area > bs->calc_area ? 1 : -1;
4409 else if (as->percentile != bs->percentile)
4410 return as->percentile < bs->percentile ? 1 : -1;
4412 const char *as_label = as->label ? as->label : "";
4413 const char *bs_label = bs->label ? bs->label : "";
4414 return strcmp (as_label, bs_label);
4418 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4419 size_t ix[PIVOT_N_AXES])
4421 if (a < PIVOT_N_AXES)
4423 size_t limit = MAX (t->stacks[a].n, 1);
4424 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4425 ctables_table_add_section (t, a + 1, ix);
4429 struct ctables_section *s = &t->sections[t->n_sections++];
4430 *s = (struct ctables_section) {
4432 .cells = HMAP_INITIALIZER (s->cells),
4434 for (a = 0; a < PIVOT_N_AXES; a++)
4437 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4439 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4440 for (size_t i = 0; i < nest->n; i++)
4441 hmap_init (&s->occurrences[a][i]);
4443 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4444 hmap_init (&s->areas[at]);
4449 ctables_format (double d, const struct fmt_spec *format,
4450 const struct fmt_settings *settings)
4452 const union value v = { .f = d };
4453 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4455 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4456 produce the results we want for negative numbers, putting the negative
4457 sign in the wrong spot, before the prefix instead of after it. We can't,
4458 in fact, produce the desired results using a custom-currency
4459 specification. Instead, we postprocess the output, moving the negative
4462 NEQUAL: "-N=3" => "N=-3"
4463 PAREN: "-(3)" => "(-3)"
4464 PCTPAREN: "-(3%)" => "(-3%)"
4466 This transformation doesn't affect NEGPAREN. */
4467 char *minus_src = strchr (s, '-');
4468 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4470 char *n_equals = strstr (s, "N=");
4471 char *lparen = strchr (s, '(');
4472 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4474 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4480 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4482 for (size_t i = 0; i < t->stacks[a].n; i++)
4484 struct ctables_nest *nest = &t->stacks[a].nests[i];
4485 if (nest->n != 1 || nest->scale_idx != 0)
4488 enum ctables_vlabel vlabel
4489 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4490 if (vlabel != CTVL_NONE)
4497 compare_ints_3way (int a, int b)
4499 return a < b ? -1 : a > b;
4503 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
4504 const void *aux UNUSED)
4506 struct ctables_cell *const *ap = a_;
4507 struct ctables_cell *const *bp = b_;
4508 const struct ctables_cell *a = *ap;
4509 const struct ctables_cell *b = *bp;
4517 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
4519 int cmp = compare_ints_3way (a->axes[axis].leaf, b->axes[axis].leaf);
4524 const struct ctables_value *a_ctv = ctables_value_find (a);
4525 const struct ctables_value *b_ctv = ctables_value_find (b);
4528 int cmp = compare_ints_3way (a_ctv->leaf, b_ctv->leaf);
4533 assert (!a_ctv && !b_ctv);
4538 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4540 struct pivot_table *pt = pivot_table_create__ (
4542 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4543 : pivot_value_new_text (N_("Custom Tables"))),
4546 pivot_table_set_caption (
4547 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4549 pivot_table_set_corner_text (
4550 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4552 bool summary_dimension = (t->summary_axis != t->slabels_axis
4553 || (!t->slabels_visible
4554 && t->summary_specs.n > 1));
4555 if (summary_dimension)
4557 struct pivot_dimension *d = pivot_dimension_create (
4558 pt, t->slabels_axis, N_("Statistics"));
4559 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4560 if (!t->slabels_visible)
4561 d->hide_all_labels = true;
4562 for (size_t i = 0; i < specs->n; i++)
4563 pivot_category_create_leaf (
4564 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4567 bool categories_dimension = t->clabels_example != NULL;
4568 if (categories_dimension)
4570 struct pivot_dimension *d = pivot_dimension_create (
4571 pt, t->label_axis[t->clabels_from_axis],
4572 t->clabels_from_axis == PIVOT_AXIS_ROW
4573 ? N_("Row Categories")
4574 : N_("Column Categories"));
4575 const struct variable *var = t->clabels_example;
4576 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4577 for (size_t i = 0; i < t->n_clabels_values; i++)
4579 const struct ctables_value *value = t->clabels_values[i];
4580 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4581 assert (cat != NULL);
4582 pivot_category_create_leaf (
4583 d->root, ctables_category_create_value_label (c, cat,
4589 pivot_table_set_look (pt, ct->look);
4590 struct pivot_dimension *d[PIVOT_N_AXES];
4591 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4593 static const char *names[] = {
4594 [PIVOT_AXIS_ROW] = N_("Rows"),
4595 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4596 [PIVOT_AXIS_LAYER] = N_("Layers"),
4598 d[a] = (t->axes[a] || a == t->summary_axis
4599 ? pivot_dimension_create (pt, a, names[a])
4604 assert (t->axes[a]);
4606 for (size_t i = 0; i < t->stacks[a].n; i++)
4608 struct ctables_nest *nest = &t->stacks[a].nests[i];
4609 struct ctables_section **sections = xnmalloc (t->n_sections,
4611 size_t n_sections = 0;
4613 size_t n_total_cells = 0;
4614 size_t max_depth = 0;
4615 for (size_t j = 0; j < t->n_sections; j++)
4616 if (t->sections[j].nests[a] == nest)
4618 struct ctables_section *s = &t->sections[j];
4619 sections[n_sections++] = s;
4620 n_total_cells += hmap_count (&s->cells);
4622 size_t depth = s->nests[a]->n;
4623 max_depth = MAX (depth, max_depth);
4626 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4628 size_t n_sorted = 0;
4630 for (size_t j = 0; j < n_sections; j++)
4632 struct ctables_section *s = sections[j];
4634 struct ctables_cell *cell;
4635 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4637 sorted[n_sorted++] = cell;
4638 assert (n_sorted <= n_total_cells);
4641 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4642 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4644 struct ctables_level
4646 enum ctables_level_type
4648 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4649 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4650 CTL_SUMMARY, /* Summary functions. */
4654 enum settings_value_show vlabel; /* CTL_VAR only. */
4657 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4658 size_t n_levels = 0;
4659 for (size_t k = 0; k < nest->n; k++)
4661 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4662 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4664 if (vlabel != CTVL_NONE)
4666 levels[n_levels++] = (struct ctables_level) {
4668 .vlabel = (enum settings_value_show) vlabel,
4673 if (nest->scale_idx != k
4674 && (k != nest->n - 1 || t->label_axis[a] == a))
4676 levels[n_levels++] = (struct ctables_level) {
4677 .type = CTL_CATEGORY,
4683 if (!summary_dimension && a == t->slabels_axis)
4685 levels[n_levels++] = (struct ctables_level) {
4686 .type = CTL_SUMMARY,
4687 .var_idx = SIZE_MAX,
4691 /* Pivot categories:
4693 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4694 - category for nest->vars[0], if nest->scale_idx != 0
4695 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4696 - category for nest->vars[1], if nest->scale_idx != 1
4698 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4699 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4700 - summary function, if 'a == t->slabels_axis && a ==
4703 Additional dimensions:
4705 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4707 - If 't->label_axis[b] == a' for some 'b != a', add a category
4712 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4714 for (size_t j = 0; j < n_sorted; j++)
4716 struct ctables_cell *cell = sorted[j];
4717 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4719 size_t n_common = 0;
4722 for (; n_common < n_levels; n_common++)
4724 const struct ctables_level *level = &levels[n_common];
4725 if (level->type == CTL_CATEGORY)
4727 size_t var_idx = level->var_idx;
4728 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4729 if (prev->axes[a].cvs[var_idx].category != c)
4731 else if (c->type != CCT_SUBTOTAL
4732 && c->type != CCT_TOTAL
4733 && c->type != CCT_POSTCOMPUTE
4734 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4735 &cell->axes[a].cvs[var_idx].value,
4736 var_get_type (nest->vars[var_idx])))
4742 for (size_t k = n_common; k < n_levels; k++)
4744 const struct ctables_level *level = &levels[k];
4745 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4746 if (level->type == CTL_SUMMARY)
4748 assert (k == n_levels - 1);
4750 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4751 for (size_t m = 0; m < specs->n; m++)
4753 int leaf = pivot_category_create_leaf (
4754 parent, ctables_summary_label (&specs->specs[m],
4762 const struct variable *var = nest->vars[level->var_idx];
4763 struct pivot_value *label;
4764 if (level->type == CTL_VAR)
4766 label = pivot_value_new_variable (var);
4767 label->variable.show = level->vlabel;
4769 else if (level->type == CTL_CATEGORY)
4771 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4772 label = ctables_category_create_value_label (
4773 t->categories[var_get_dict_index (var)],
4774 cv->category, var, &cv->value);
4779 if (k == n_levels - 1)
4780 prev_leaf = pivot_category_create_leaf (parent, label);
4782 groups[k] = pivot_category_create_group__ (parent, label);
4786 cell->axes[a].leaf = prev_leaf;
4795 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4799 size_t n_total_cells = 0;
4800 for (size_t j = 0; j < t->n_sections; j++)
4801 n_total_cells += hmap_count (&t->sections[j].cells);
4803 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4804 size_t n_sorted = 0;
4805 for (size_t j = 0; j < t->n_sections; j++)
4807 const struct ctables_section *s = &t->sections[j];
4808 struct ctables_cell *cell;
4809 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4811 sorted[n_sorted++] = cell;
4813 assert (n_sorted <= n_total_cells);
4814 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4816 size_t ids[N_CTATS];
4817 memset (ids, 0, sizeof ids);
4818 for (size_t j = 0; j < n_sorted; j++)
4820 struct ctables_cell *cell = sorted[j];
4821 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4823 struct ctables_area *area = cell->areas[at];
4824 if (!area->sequence)
4825 area->sequence = ++ids[at];
4832 for (size_t i = 0; i < t->n_sections; i++)
4834 struct ctables_section *s = &t->sections[i];
4836 struct ctables_cell *cell;
4837 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4842 const struct ctables_value *ctv = ctables_value_find (cell);
4843 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4844 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4845 for (size_t j = 0; j < specs->n; j++)
4848 size_t n_dindexes = 0;
4850 if (summary_dimension)
4851 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4854 dindexes[n_dindexes++] = ctv->leaf;
4856 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4859 int leaf = cell->axes[a].leaf;
4860 if (a == t->summary_axis && !summary_dimension)
4862 dindexes[n_dindexes++] = leaf;
4865 const struct ctables_summary_spec *ss = &specs->specs[j];
4867 struct fmt_spec format = specs->specs[j].format;
4868 bool is_ctables_format = ss->is_ctables_format;
4869 double d = (cell->postcompute
4870 ? ctables_cell_calculate_postcompute (
4871 s, cell, ss, &format, &is_ctables_format, j)
4872 : ctables_summary_value (cell->areas,
4873 &cell->summaries[j], ss));
4875 struct pivot_value *value;
4876 if (ct->hide_threshold != 0
4877 && d < ct->hide_threshold
4878 && ss->function == CTSF_COUNT)
4880 value = pivot_value_new_user_text_nocopy (
4881 xasprintf ("<%d", ct->hide_threshold));
4883 else if (d == 0 && ct->zero)
4884 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4885 else if (d == SYSMIS && ct->missing)
4886 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4887 else if (is_ctables_format)
4888 value = pivot_value_new_user_text_nocopy (
4889 ctables_format (d, &format, &ct->ctables_formats));
4892 value = pivot_value_new_number (d);
4893 value->numeric.format = format;
4895 /* XXX should text values be right-justified? */
4896 pivot_table_put (pt, dindexes, n_dindexes, value);
4901 pivot_table_submit (pt);
4905 ctables_check_label_position (struct ctables_table *t, struct lexer *lexer,
4906 enum pivot_axis_type a)
4908 enum pivot_axis_type label_pos = t->label_axis[a];
4912 const struct ctables_stack *stack = &t->stacks[a];
4916 const struct ctables_nest *n0 = &stack->nests[0];
4919 assert (stack->n == 1);
4923 const struct variable *v0 = n0->vars[n0->n - 1];
4924 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4925 t->clabels_example = v0;
4927 for (size_t i = 0; i < c0->n_cats; i++)
4928 if (c0->cats[i].type == CCT_FUNCTION)
4930 msg (SE, _("Category labels may not be moved to another axis when "
4931 "sorting by a summary function."));
4932 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4933 _("This syntax moves category labels to another axis."));
4934 msg_at (SN, c0->cats[i].location,
4935 _("This syntax requests sorting by a summary function."));
4939 for (size_t i = 0; i < stack->n; i++)
4941 const struct ctables_nest *ni = &stack->nests[i];
4943 const struct variable *vi = ni->vars[ni->n - 1];
4944 if (n0->n - 1 == ni->scale_idx)
4946 msg (SE, _("To move category labels from one axis to another, "
4947 "the variables whose labels are to be moved must be "
4948 "categorical, but %s is scale."), var_get_name (vi));
4949 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4950 _("This syntax moves category labels to another axis."));
4955 for (size_t i = 1; i < stack->n; i++)
4957 const struct ctables_nest *ni = &stack->nests[i];
4959 const struct variable *vi = ni->vars[ni->n - 1];
4960 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4962 if (var_get_width (v0) != var_get_width (vi))
4964 msg (SE, _("To move category labels from one axis to another, "
4965 "the variables whose labels are to be moved must all "
4966 "have the same width, but %s has width %d and %s has "
4968 var_get_name (v0), var_get_width (v0),
4969 var_get_name (vi), var_get_width (vi));
4970 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4971 _("This syntax moves category labels to another axis."));
4974 if (!val_labs_equal (var_get_value_labels (v0),
4975 var_get_value_labels (vi)))
4977 msg (SE, _("To move category labels from one axis to another, "
4978 "the variables whose labels are to be moved must all "
4979 "have the same value labels, but %s and %s have "
4980 "different value labels."),
4981 var_get_name (v0), var_get_name (vi));
4982 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4983 _("This syntax moves category labels to another axis."));
4986 if (!ctables_categories_equal (c0, ci))
4988 msg (SE, _("To move category labels from one axis to another, "
4989 "the variables whose labels are to be moved must all "
4990 "have the same category specifications, but %s and %s "
4991 "have different category specifications."),
4992 var_get_name (v0), var_get_name (vi));
4993 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4994 _("This syntax moves category labels to another axis."));
5003 add_sum_var (struct variable *var,
5004 struct variable ***sum_vars, size_t *n, size_t *allocated)
5006 for (size_t i = 0; i < *n; i++)
5007 if (var == (*sum_vars)[i])
5010 if (*n >= *allocated)
5011 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
5012 (*sum_vars)[*n] = var;
5016 static enum ctables_area_type
5017 rotate_area (enum ctables_area_type area)
5028 return CTAT_LAYERCOL;
5031 return CTAT_LAYERROW;
5044 enumerate_sum_vars (const struct ctables_axis *a,
5045 struct variable ***sum_vars, size_t *n, size_t *allocated)
5053 for (size_t i = 0; i < N_CSVS; i++)
5054 for (size_t j = 0; j < a->specs[i].n; j++)
5056 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
5057 if (spec->function == CTSF_areaPCT_SUM)
5058 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
5064 for (size_t i = 0; i < 2; i++)
5065 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
5071 ctables_prepare_table (struct ctables_table *t, struct lexer *lexer)
5073 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5076 t->stacks[a] = enumerate_fts (a, t->axes[a]);
5078 for (size_t j = 0; j < t->stacks[a].n; j++)
5080 struct ctables_nest *nest = &t->stacks[a].nests[j];
5081 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5083 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
5084 nest->n_areas[at] = 0;
5086 enum pivot_axis_type ata, atb;
5087 if (at == CTAT_ROW || at == CTAT_LAYERROW)
5089 ata = PIVOT_AXIS_ROW;
5090 atb = PIVOT_AXIS_COLUMN;
5092 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
5094 ata = PIVOT_AXIS_COLUMN;
5095 atb = PIVOT_AXIS_ROW;
5098 if (at == CTAT_LAYER
5099 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5100 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5101 ? a == atb && t->label_axis[a] != a
5104 for (size_t k = nest->n - 1; k < nest->n; k--)
5105 if (k != nest->scale_idx)
5107 nest->areas[at][nest->n_areas[at]++] = k;
5113 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5114 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5115 : at == CTAT_TABLE ? true
5119 for (size_t k = 0; k < nest->n; k++)
5120 if (k != nest->scale_idx)
5121 nest->areas[at][nest->n_areas[at]++] = k;
5127 #define L PIVOT_AXIS_LAYER
5128 n_drop = (t->clabels_from_axis == L ? a != L
5129 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5130 : t->clabels_from_axis == a ? 2
5137 n_drop = a == ata && t->label_axis[ata] == atb;
5142 n_drop = (a == ata ? t->label_axis[ata] == atb
5144 : t->clabels_from_axis == atb ? -1
5145 : t->clabels_to_axis != atb ? 1
5157 size_t n = nest->n_areas[at];
5160 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5161 nest->n_areas[at]--;
5166 for (int i = 0; i < n_drop; i++)
5167 if (nest->n_areas[at] > 0)
5168 nest->n_areas[at]--;
5175 struct ctables_nest *nest = xmalloc (sizeof *nest);
5176 *nest = (struct ctables_nest) {
5178 .scale_idx = SIZE_MAX,
5179 .summary_idx = SIZE_MAX
5181 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5183 /* There's no point in moving labels away from an axis that has no
5184 labels, so avoid dealing with the special cases around that. */
5185 t->label_axis[a] = a;
5188 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5189 for (size_t i = 0; i < stack->n; i++)
5191 struct ctables_nest *nest = &stack->nests[i];
5192 if (!nest->specs[CSV_CELL].n)
5194 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5195 ss->specs = xmalloc (sizeof *ss->specs);
5198 enum ctables_summary_function function
5199 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5203 nest->summary_idx = nest->n - 1;
5204 ss->var = nest->vars[nest->summary_idx];
5206 *ss->specs = (struct ctables_summary_spec) {
5207 .function = function,
5208 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5209 .format = ctables_summary_default_format (function, ss->var),
5212 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5213 &nest->specs[CSV_CELL]);
5215 else if (!nest->specs[CSV_TOTAL].n)
5216 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5217 &nest->specs[CSV_CELL]);
5219 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5220 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5222 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5223 for (size_t i = 0; i < nest->specs[sv].n; i++)
5225 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5226 const struct ctables_function_info *cfi =
5227 &ctables_function_info[ss->function];
5229 ss->calc_area = rotate_area (ss->calc_area);
5233 if (t->ctables->smissing_listwise)
5235 struct variable **listwise_vars = NULL;
5237 size_t allocated = 0;
5239 for (size_t j = nest->group_head; j < stack->n; j++)
5241 const struct ctables_nest *other_nest = &stack->nests[j];
5242 if (other_nest->group_head != nest->group_head)
5245 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5248 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5249 sizeof *listwise_vars);
5250 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5253 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5256 listwise_vars = xmemdup (listwise_vars,
5257 n * sizeof *listwise_vars);
5258 nest->specs[sv].listwise_vars = listwise_vars;
5259 nest->specs[sv].n_listwise_vars = n;
5264 struct ctables_summary_spec_set *merged = &t->summary_specs;
5265 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5267 for (size_t j = 0; j < stack->n; j++)
5269 const struct ctables_nest *nest = &stack->nests[j];
5271 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5272 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5277 struct merge_item min = items[0];
5278 for (size_t j = 1; j < n_left; j++)
5279 if (merge_item_compare_3way (&items[j], &min) < 0)
5282 if (merged->n >= merged->allocated)
5283 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5284 sizeof *merged->specs);
5285 merged->specs[merged->n++] = min.set->specs[min.ofs];
5287 for (size_t j = 0; j < n_left; )
5289 if (merge_item_compare_3way (&items[j], &min) == 0)
5291 struct merge_item *item = &items[j];
5292 item->set->specs[item->ofs++].axis_idx = merged->n - 1;
5293 if (item->ofs >= item->set->n)
5295 items[j] = items[--n_left];
5304 size_t allocated_sum_vars = 0;
5305 enumerate_sum_vars (t->axes[t->summary_axis],
5306 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5308 return (ctables_check_label_position (t, lexer, PIVOT_AXIS_ROW)
5309 && ctables_check_label_position (t, lexer, PIVOT_AXIS_COLUMN));
5313 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5314 enum pivot_axis_type a)
5316 struct ctables_stack *stack = &t->stacks[a];
5317 for (size_t i = 0; i < stack->n; i++)
5319 const struct ctables_nest *nest = &stack->nests[i];
5320 const struct variable *var = nest->vars[nest->n - 1];
5321 const union value *value = case_data (c, var);
5323 if (var_is_numeric (var) && value->f == SYSMIS)
5326 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5328 ctables_value_insert (t, value, var_get_width (var));
5333 ctables_add_category_occurrences (const struct variable *var,
5334 struct hmap *occurrences,
5335 const struct ctables_categories *cats)
5337 const struct val_labs *val_labs = var_get_value_labels (var);
5339 for (size_t i = 0; i < cats->n_cats; i++)
5341 const struct ctables_category *c = &cats->cats[i];
5345 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5351 int width = var_get_width (var);
5353 value_init (&value, width);
5354 value_copy_buf_rpad (&value, width,
5355 CHAR_CAST (uint8_t *, c->string.string),
5356 c->string.length, ' ');
5357 ctables_add_occurrence (var, &value, occurrences);
5358 value_destroy (&value, width);
5363 assert (var_is_numeric (var));
5364 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5365 vl = val_labs_next (val_labs, vl))
5366 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5367 ctables_add_occurrence (var, &vl->value, occurrences);
5371 assert (var_is_alpha (var));
5372 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5373 vl = val_labs_next (val_labs, vl))
5374 if (in_string_range (&vl->value, var, c->srange))
5375 ctables_add_occurrence (var, &vl->value, occurrences);
5379 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5380 vl = val_labs_next (val_labs, vl))
5381 if (var_is_value_missing (var, &vl->value))
5382 ctables_add_occurrence (var, &vl->value, occurrences);
5386 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5387 vl = val_labs_next (val_labs, vl))
5388 ctables_add_occurrence (var, &vl->value, occurrences);
5391 case CCT_POSTCOMPUTE:
5401 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5402 vl = val_labs_next (val_labs, vl))
5403 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5404 ctables_add_occurrence (var, &vl->value, occurrences);
5407 case CCT_EXCLUDED_MISSING:
5414 ctables_section_recurse_add_empty_categories (
5415 struct ctables_section *s,
5416 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5417 enum pivot_axis_type a, size_t a_idx)
5419 if (a >= PIVOT_N_AXES)
5420 ctables_cell_insert__ (s, c, cats);
5421 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5422 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5425 const struct variable *var = s->nests[a]->vars[a_idx];
5426 const struct ctables_categories *categories = s->table->categories[
5427 var_get_dict_index (var)];
5428 int width = var_get_width (var);
5429 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5430 const struct ctables_occurrence *o;
5431 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5433 union value *value = case_data_rw (c, var);
5434 value_destroy (value, width);
5435 value_clone (value, &o->value, width);
5436 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5437 assert (cats[a][a_idx] != NULL);
5438 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5441 for (size_t i = 0; i < categories->n_cats; i++)
5443 const struct ctables_category *cat = &categories->cats[i];
5444 if (cat->type == CCT_POSTCOMPUTE)
5446 cats[a][a_idx] = cat;
5447 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5454 ctables_section_add_empty_categories (struct ctables_section *s)
5456 bool show_empty = false;
5457 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5459 for (size_t k = 0; k < s->nests[a]->n; k++)
5460 if (k != s->nests[a]->scale_idx)
5462 const struct variable *var = s->nests[a]->vars[k];
5463 const struct ctables_categories *cats = s->table->categories[
5464 var_get_dict_index (var)];
5465 if (cats->show_empty)
5468 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5474 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5475 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5476 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5477 const struct ctables_category **cats[PIVOT_N_AXES] =
5479 [PIVOT_AXIS_LAYER] = layer_cats,
5480 [PIVOT_AXIS_ROW] = row_cats,
5481 [PIVOT_AXIS_COLUMN] = column_cats,
5483 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5484 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5489 ctables_section_clear (struct ctables_section *s)
5491 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5493 const struct ctables_nest *nest = s->nests[a];
5494 for (size_t i = 0; i < nest->n; i++)
5495 if (i != nest->scale_idx)
5497 const struct variable *var = nest->vars[i];
5498 int width = var_get_width (var);
5499 struct ctables_occurrence *o, *next;
5500 struct hmap *map = &s->occurrences[a][i];
5501 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5503 value_destroy (&o->value, width);
5504 hmap_delete (map, &o->node);
5511 struct ctables_cell *cell, *next_cell;
5512 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5514 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5516 const struct ctables_nest *nest = s->nests[a];
5517 for (size_t i = 0; i < nest->n; i++)
5518 if (i != nest->scale_idx)
5519 value_destroy (&cell->axes[a].cvs[i].value,
5520 var_get_width (nest->vars[i]));
5521 free (cell->axes[a].cvs);
5524 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5525 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5526 for (size_t i = 0; i < specs->n; i++)
5527 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5528 free (cell->summaries);
5530 hmap_delete (&s->cells, &cell->node);
5533 hmap_shrink (&s->cells);
5535 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5537 struct ctables_area *area, *next_area;
5538 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5542 hmap_delete (&s->areas[at], &area->node);
5545 hmap_shrink (&s->areas[at]);
5550 ctables_section_uninit (struct ctables_section *s)
5552 ctables_section_clear (s);
5554 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5556 struct ctables_nest *nest = s->nests[a];
5557 for (size_t i = 0; i < nest->n; i++)
5558 hmap_destroy (&s->occurrences[a][i]);
5559 free (s->occurrences[a]);
5562 hmap_destroy (&s->cells);
5563 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5564 hmap_destroy (&s->areas[at]);
5568 ctables_table_clear (struct ctables_table *t)
5570 for (size_t i = 0; i < t->n_sections; i++)
5571 ctables_section_clear (&t->sections[i]);
5573 if (t->clabels_example)
5575 int width = var_get_width (t->clabels_example);
5576 struct ctables_value *value, *next_value;
5577 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5578 &t->clabels_values_map)
5580 value_destroy (&value->value, width);
5581 hmap_delete (&t->clabels_values_map, &value->node);
5584 hmap_shrink (&t->clabels_values_map);
5586 free (t->clabels_values);
5587 t->clabels_values = NULL;
5588 t->n_clabels_values = 0;
5593 ctables_execute (struct dataset *ds, struct casereader *input,
5596 for (size_t i = 0; i < ct->n_tables; i++)
5598 struct ctables_table *t = ct->tables[i];
5599 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5600 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5601 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5602 sizeof *t->sections);
5603 size_t ix[PIVOT_N_AXES];
5604 ctables_table_add_section (t, 0, ix);
5607 struct dictionary *dict = dataset_dict (ds);
5609 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5610 struct casegrouper *grouper
5612 ? casegrouper_create_splits (input, dict)
5613 : casegrouper_create_vars (input, NULL, 0));
5614 struct casereader *group;
5615 while (casegrouper_get_next_group (grouper, &group))
5619 struct ccase *c = casereader_peek (group, 0);
5622 output_split_file_values (ds, c);
5627 bool warn_on_invalid = true;
5628 for (struct ccase *c = casereader_read (group); c;
5629 case_unref (c), c = casereader_read (group))
5631 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5632 double e_weight = (ct->e_weight
5633 ? var_force_valid_weight (ct->e_weight,
5634 case_num (c, ct->e_weight),
5638 [CTW_DICTIONARY] = d_weight,
5639 [CTW_EFFECTIVE] = e_weight,
5640 [CTW_UNWEIGHTED] = 1.0,
5643 for (size_t i = 0; i < ct->n_tables; i++)
5645 struct ctables_table *t = ct->tables[i];
5647 for (size_t j = 0; j < t->n_sections; j++)
5648 ctables_cell_insert (&t->sections[j], c, weight);
5650 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5651 if (t->label_axis[a] != a)
5652 ctables_insert_clabels_values (t, c, a);
5655 casereader_destroy (group);
5657 for (size_t i = 0; i < ct->n_tables; i++)
5659 struct ctables_table *t = ct->tables[i];
5661 if (t->clabels_example)
5662 ctables_sort_clabels_values (t);
5664 for (size_t j = 0; j < t->n_sections; j++)
5665 ctables_section_add_empty_categories (&t->sections[j]);
5667 ctables_table_output (ct, t);
5668 ctables_table_clear (t);
5671 return casegrouper_destroy (grouper);
5674 static struct ctables_postcompute *
5675 ctables_find_postcompute (struct ctables *ct, const char *name)
5677 struct ctables_postcompute *pc;
5678 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5679 utf8_hash_case_string (name, 0), &ct->postcomputes)
5680 if (!utf8_strcasecmp (pc->name, name))
5686 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5689 int pcompute_start = lex_ofs (lexer) - 1;
5691 if (!lex_match (lexer, T_AND))
5693 lex_error_expecting (lexer, "&");
5696 if (!lex_force_id (lexer))
5699 char *name = ss_xstrdup (lex_tokss (lexer));
5702 if (!lex_force_match (lexer, T_EQUALS)
5703 || !lex_force_match_id (lexer, "EXPR")
5704 || !lex_force_match (lexer, T_LPAREN))
5710 int expr_start = lex_ofs (lexer);
5711 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5712 int expr_end = lex_ofs (lexer) - 1;
5713 if (!expr || !lex_force_match (lexer, T_RPAREN))
5715 ctables_pcexpr_destroy (expr);
5719 int pcompute_end = lex_ofs (lexer) - 1;
5721 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5724 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5727 msg_at (SW, location, _("New definition of &%s will override the "
5728 "previous definition."),
5730 msg_at (SN, pc->location, _("This is the previous definition."));
5732 ctables_pcexpr_destroy (pc->expr);
5733 msg_location_destroy (pc->location);
5738 pc = xmalloc (sizeof *pc);
5739 *pc = (struct ctables_postcompute) { .name = name };
5740 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5741 utf8_hash_case_string (pc->name, 0));
5744 pc->location = location;
5746 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5751 ctables_parse_pproperties_format (struct lexer *lexer,
5752 struct ctables_summary_spec_set *sss)
5754 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5756 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5757 && !(lex_token (lexer) == T_ID
5758 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5759 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5760 lex_tokss (lexer)))))
5762 /* Parse function. */
5763 enum ctables_summary_function function;
5764 enum ctables_weighting weighting;
5765 enum ctables_area_type area;
5766 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5769 /* Parse percentile. */
5770 double percentile = 0;
5771 if (function == CTSF_PTILE)
5773 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5775 percentile = lex_number (lexer);
5780 struct fmt_spec format;
5781 bool is_ctables_format;
5782 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5785 if (sss->n >= sss->allocated)
5786 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5787 sizeof *sss->specs);
5788 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5789 .function = function,
5790 .weighting = weighting,
5793 .percentile = percentile,
5795 .is_ctables_format = is_ctables_format,
5801 ctables_summary_spec_set_uninit (sss);
5806 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5808 struct ctables_postcompute **pcs = NULL;
5810 size_t allocated_pcs = 0;
5812 while (lex_match (lexer, T_AND))
5814 if (!lex_force_id (lexer))
5816 struct ctables_postcompute *pc
5817 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5820 lex_error (lexer, _("Unknown computed category &%s."),
5821 lex_tokcstr (lexer));
5826 if (n_pcs >= allocated_pcs)
5827 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5831 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5833 if (lex_match_id (lexer, "LABEL"))
5835 lex_match (lexer, T_EQUALS);
5836 if (!lex_force_string (lexer))
5839 for (size_t i = 0; i < n_pcs; i++)
5841 free (pcs[i]->label);
5842 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5847 else if (lex_match_id (lexer, "FORMAT"))
5849 lex_match (lexer, T_EQUALS);
5851 struct ctables_summary_spec_set sss;
5852 if (!ctables_parse_pproperties_format (lexer, &sss))
5855 for (size_t i = 0; i < n_pcs; i++)
5858 ctables_summary_spec_set_uninit (pcs[i]->specs);
5860 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5861 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5863 ctables_summary_spec_set_uninit (&sss);
5865 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5867 lex_match (lexer, T_EQUALS);
5868 bool hide_source_cats;
5869 if (!parse_bool (lexer, &hide_source_cats))
5871 for (size_t i = 0; i < n_pcs; i++)
5872 pcs[i]->hide_source_cats = hide_source_cats;
5876 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5889 put_strftime (struct string *out, time_t now, const char *format)
5891 const struct tm *tm = localtime (&now);
5893 strftime (value, sizeof value, format, tm);
5894 ds_put_cstr (out, value);
5898 skip_prefix (struct substring *s, struct substring prefix)
5900 if (ss_starts_with (*s, prefix))
5902 ss_advance (s, prefix.length);
5910 put_table_expression (struct string *out, struct lexer *lexer,
5911 struct dictionary *dict, int expr_start, int expr_end)
5914 for (int ofs = expr_start; ofs < expr_end; ofs++)
5916 const struct token *t = lex_ofs_token (lexer, ofs);
5917 if (t->type == T_LBRACK)
5919 else if (t->type == T_RBRACK && nest > 0)
5925 else if (t->type == T_ID)
5927 const struct variable *var
5928 = dict_lookup_var (dict, t->string.string);
5929 const char *label = var ? var_get_label (var) : NULL;
5930 ds_put_cstr (out, label ? label : t->string.string);
5934 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5935 ds_put_byte (out, ' ');
5937 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5938 ds_put_cstr (out, repr);
5941 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5942 ds_put_byte (out, ' ');
5948 put_title_text (struct string *out, struct substring in, time_t now,
5949 struct lexer *lexer, struct dictionary *dict,
5950 int expr_start, int expr_end)
5954 size_t chunk = ss_find_byte (in, ')');
5955 ds_put_substring (out, ss_head (in, chunk));
5956 ss_advance (&in, chunk);
5957 if (ss_is_empty (in))
5960 if (skip_prefix (&in, ss_cstr (")DATE")))
5961 put_strftime (out, now, "%x");
5962 else if (skip_prefix (&in, ss_cstr (")TIME")))
5963 put_strftime (out, now, "%X");
5964 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5965 put_table_expression (out, lexer, dict, expr_start, expr_end);
5968 ds_put_byte (out, ')');
5969 ss_advance (&in, 1);
5975 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5977 struct casereader *input = NULL;
5979 struct measure_guesser *mg = measure_guesser_create (ds);
5982 input = proc_open (ds);
5983 measure_guesser_run (mg, input);
5984 measure_guesser_destroy (mg);
5987 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5988 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5989 enum settings_value_show tvars = settings_get_show_variables ();
5990 for (size_t i = 0; i < n_vars; i++)
5991 vlabels[i] = (enum ctables_vlabel) tvars;
5993 struct pivot_table_look *look = pivot_table_look_unshare (
5994 pivot_table_look_ref (pivot_table_look_get_default ()));
5995 look->omit_empty = false;
5997 struct ctables *ct = xmalloc (sizeof *ct);
5998 *ct = (struct ctables) {
5999 .dict = dataset_dict (ds),
6001 .ctables_formats = FMT_SETTINGS_INIT,
6003 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6006 time_t now = time (NULL);
6011 const char *dot_string;
6012 const char *comma_string;
6014 static const struct ctf ctfs[4] = {
6015 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6016 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6017 { CTEF_PAREN, "-,(,),", "-.(.)." },
6018 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6020 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6021 for (size_t i = 0; i < 4; i++)
6023 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6024 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6025 fmt_number_style_from_string (s));
6028 if (!lex_force_match (lexer, T_SLASH))
6031 while (!lex_match_id (lexer, "TABLE"))
6033 if (lex_match_id (lexer, "FORMAT"))
6035 double widths[2] = { SYSMIS, SYSMIS };
6036 double units_per_inch = 72.0;
6038 int start_ofs = lex_ofs (lexer);
6039 while (lex_token (lexer) != T_SLASH)
6041 if (lex_match_id (lexer, "MINCOLWIDTH"))
6043 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6046 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6048 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6051 else if (lex_match_id (lexer, "UNITS"))
6053 lex_match (lexer, T_EQUALS);
6054 if (lex_match_id (lexer, "POINTS"))
6055 units_per_inch = 72.0;
6056 else if (lex_match_id (lexer, "INCHES"))
6057 units_per_inch = 1.0;
6058 else if (lex_match_id (lexer, "CM"))
6059 units_per_inch = 2.54;
6062 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6066 else if (lex_match_id (lexer, "EMPTY"))
6071 lex_match (lexer, T_EQUALS);
6072 if (lex_match_id (lexer, "ZERO"))
6074 /* Nothing to do. */
6076 else if (lex_match_id (lexer, "BLANK"))
6077 ct->zero = xstrdup ("");
6078 else if (lex_force_string (lexer))
6080 ct->zero = ss_xstrdup (lex_tokss (lexer));
6086 else if (lex_match_id (lexer, "MISSING"))
6088 lex_match (lexer, T_EQUALS);
6089 if (!lex_force_string (lexer))
6093 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6094 ? ss_xstrdup (lex_tokss (lexer))
6100 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6101 "UNITS", "EMPTY", "MISSING");
6106 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6107 && widths[0] > widths[1])
6109 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6110 _("MINCOLWIDTH must not be greater than "
6115 for (size_t i = 0; i < 2; i++)
6116 if (widths[i] != SYSMIS)
6118 int *wr = ct->look->width_ranges[TABLE_HORZ];
6119 wr[i] = widths[i] / units_per_inch * 96.0;
6124 else if (lex_match_id (lexer, "VLABELS"))
6126 if (!lex_force_match_id (lexer, "VARIABLES"))
6128 lex_match (lexer, T_EQUALS);
6130 struct variable **vars;
6132 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6136 if (!lex_force_match_id (lexer, "DISPLAY"))
6141 lex_match (lexer, T_EQUALS);
6143 enum ctables_vlabel vlabel;
6144 if (lex_match_id (lexer, "DEFAULT"))
6145 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6146 else if (lex_match_id (lexer, "NAME"))
6148 else if (lex_match_id (lexer, "LABEL"))
6149 vlabel = CTVL_LABEL;
6150 else if (lex_match_id (lexer, "BOTH"))
6152 else if (lex_match_id (lexer, "NONE"))
6156 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6162 for (size_t i = 0; i < n_vars; i++)
6163 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6166 else if (lex_match_id (lexer, "MRSETS"))
6168 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6170 lex_match (lexer, T_EQUALS);
6171 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6174 else if (lex_match_id (lexer, "SMISSING"))
6176 if (lex_match_id (lexer, "VARIABLE"))
6177 ct->smissing_listwise = false;
6178 else if (lex_match_id (lexer, "LISTWISE"))
6179 ct->smissing_listwise = true;
6182 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6186 else if (lex_match_id (lexer, "PCOMPUTE"))
6188 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6191 else if (lex_match_id (lexer, "PPROPERTIES"))
6193 if (!ctables_parse_pproperties (lexer, ct))
6196 else if (lex_match_id (lexer, "WEIGHT"))
6198 if (!lex_force_match_id (lexer, "VARIABLE"))
6200 lex_match (lexer, T_EQUALS);
6201 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6205 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6207 if (lex_match_id (lexer, "COUNT"))
6209 lex_match (lexer, T_EQUALS);
6210 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6213 ct->hide_threshold = lex_integer (lexer);
6216 else if (ct->hide_threshold == 0)
6217 ct->hide_threshold = 5;
6221 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6222 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6223 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6224 if (lex_match_id (lexer, "SLABELS")
6225 || lex_match_id (lexer, "CLABELS")
6226 || lex_match_id (lexer, "CRITERIA")
6227 || lex_match_id (lexer, "CATEGORIES")
6228 || lex_match_id (lexer, "TITLES")
6229 || lex_match_id (lexer, "SIGTEST")
6230 || lex_match_id (lexer, "COMPARETEST"))
6231 lex_next_msg (lexer, SN, -1, -1,
6232 _("TABLE must appear before this subcommand."));
6236 if (!lex_force_match (lexer, T_SLASH))
6240 size_t allocated_tables = 0;
6243 if (ct->n_tables >= allocated_tables)
6244 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6245 sizeof *ct->tables);
6247 struct ctables_category *cat = xmalloc (sizeof *cat);
6248 *cat = (struct ctables_category) {
6250 .include_missing = false,
6251 .sort_ascending = true,
6254 struct ctables_categories *c = xmalloc (sizeof *c);
6255 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6256 *c = (struct ctables_categories) {
6263 struct ctables_categories **categories = xnmalloc (n_vars,
6264 sizeof *categories);
6265 for (size_t i = 0; i < n_vars; i++)
6268 struct ctables_table *t = xmalloc (sizeof *t);
6269 *t = (struct ctables_table) {
6271 .slabels_axis = PIVOT_AXIS_COLUMN,
6272 .slabels_visible = true,
6273 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6275 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6276 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6277 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6279 .clabels_from_axis = PIVOT_AXIS_LAYER,
6280 .clabels_to_axis = PIVOT_AXIS_LAYER,
6281 .categories = categories,
6282 .n_categories = n_vars,
6285 ct->tables[ct->n_tables++] = t;
6287 lex_match (lexer, T_EQUALS);
6288 int expr_start = lex_ofs (lexer);
6289 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6290 &t->axes[PIVOT_AXIS_ROW]))
6292 if (lex_match (lexer, T_BY))
6294 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6295 &t->axes[PIVOT_AXIS_COLUMN]))
6298 if (lex_match (lexer, T_BY))
6300 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6301 &t->axes[PIVOT_AXIS_LAYER]))
6305 int expr_end = lex_ofs (lexer);
6307 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6308 && !t->axes[PIVOT_AXIS_LAYER])
6310 lex_error (lexer, _("At least one variable must be specified."));
6314 const struct ctables_axis *scales[PIVOT_N_AXES];
6315 size_t n_scales = 0;
6316 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6318 scales[a] = find_scale (t->axes[a]);
6324 msg (SE, _("Scale variables may appear only on one axis."));
6325 if (scales[PIVOT_AXIS_ROW])
6326 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6327 _("This scale variable appears on the rows axis."));
6328 if (scales[PIVOT_AXIS_COLUMN])
6329 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6330 _("This scale variable appears on the columns axis."));
6331 if (scales[PIVOT_AXIS_LAYER])
6332 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6333 _("This scale variable appears on the layer axis."));
6337 const struct ctables_axis *summaries[PIVOT_N_AXES];
6338 size_t n_summaries = 0;
6339 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6341 summaries[a] = (scales[a]
6343 : find_categorical_summary_spec (t->axes[a]));
6347 if (n_summaries > 1)
6349 msg (SE, _("Summaries may appear only on one axis."));
6350 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6353 msg_at (SN, summaries[a]->loc,
6355 ? _("This variable on the rows axis has a summary.")
6356 : a == PIVOT_AXIS_COLUMN
6357 ? _("This variable on the columns axis has a summary.")
6358 : _("This variable on the layers axis has a summary."));
6360 msg_at (SN, summaries[a]->loc,
6361 _("This is a scale variable, so it always has a "
6362 "summary even if the syntax does not explicitly "
6367 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6368 if (n_summaries ? summaries[a] : t->axes[a])
6370 t->summary_axis = a;
6374 if (lex_token (lexer) == T_ENDCMD)
6376 if (!ctables_prepare_table (t, lexer))
6380 if (!lex_force_match (lexer, T_SLASH))
6383 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6385 if (lex_match_id (lexer, "SLABELS"))
6387 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6389 if (lex_match_id (lexer, "POSITION"))
6391 lex_match (lexer, T_EQUALS);
6392 if (lex_match_id (lexer, "COLUMN"))
6393 t->slabels_axis = PIVOT_AXIS_COLUMN;
6394 else if (lex_match_id (lexer, "ROW"))
6395 t->slabels_axis = PIVOT_AXIS_ROW;
6396 else if (lex_match_id (lexer, "LAYER"))
6397 t->slabels_axis = PIVOT_AXIS_LAYER;
6400 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6404 else if (lex_match_id (lexer, "VISIBLE"))
6406 lex_match (lexer, T_EQUALS);
6407 if (!parse_bool (lexer, &t->slabels_visible))
6412 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6417 else if (lex_match_id (lexer, "CLABELS"))
6419 int start_ofs = lex_ofs (lexer) - 1;
6420 if (lex_match_id (lexer, "AUTO"))
6422 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6423 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6425 else if (lex_match_id (lexer, "ROWLABELS"))
6427 lex_match (lexer, T_EQUALS);
6428 if (lex_match_id (lexer, "OPPOSITE"))
6429 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6430 else if (lex_match_id (lexer, "LAYER"))
6431 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6434 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6438 else if (lex_match_id (lexer, "COLLABELS"))
6440 lex_match (lexer, T_EQUALS);
6441 if (lex_match_id (lexer, "OPPOSITE"))
6442 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6443 else if (lex_match_id (lexer, "LAYER"))
6444 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6447 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6453 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6457 int end_ofs = lex_ofs (lexer) - 1;
6459 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6460 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6462 msg (SE, _("ROWLABELS and COLLABELS may not both be "
6465 lex_ofs_msg (lexer, SN, t->clabels_start_ofs,
6467 _("This is the first specification."));
6468 lex_ofs_msg (lexer, SN, start_ofs, end_ofs,
6469 _("This is the second specification."));
6473 t->clabels_start_ofs = start_ofs;
6474 t->clabels_end_ofs = end_ofs;
6476 else if (lex_match_id (lexer, "CRITERIA"))
6478 if (!lex_force_match_id (lexer, "CILEVEL"))
6480 lex_match (lexer, T_EQUALS);
6482 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6484 t->cilevel = lex_number (lexer);
6487 else if (lex_match_id (lexer, "CATEGORIES"))
6489 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6493 else if (lex_match_id (lexer, "TITLES"))
6498 if (lex_match_id (lexer, "CAPTIONS"))
6499 textp = &t->caption;
6500 else if (lex_match_id (lexer, "CORNERS"))
6502 else if (lex_match_id (lexer, "TITLES"))
6506 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6509 lex_match (lexer, T_EQUALS);
6511 struct string s = DS_EMPTY_INITIALIZER;
6512 while (lex_is_string (lexer))
6514 if (!ds_is_empty (&s))
6515 ds_put_byte (&s, ' ');
6516 put_title_text (&s, lex_tokss (lexer), now,
6517 lexer, dataset_dict (ds),
6518 expr_start, expr_end);
6522 *textp = ds_steal_cstr (&s);
6524 while (lex_token (lexer) != T_SLASH
6525 && lex_token (lexer) != T_ENDCMD);
6527 else if (lex_match_id (lexer, "SIGTEST"))
6529 int start_ofs = lex_ofs (lexer) - 1;
6532 t->chisq = xmalloc (sizeof *t->chisq);
6533 *t->chisq = (struct ctables_chisq) {
6535 .include_mrsets = true,
6536 .all_visible = true,
6542 if (lex_match_id (lexer, "TYPE"))
6544 lex_match (lexer, T_EQUALS);
6545 if (!lex_force_match_id (lexer, "CHISQUARE"))
6548 else if (lex_match_id (lexer, "ALPHA"))
6550 lex_match (lexer, T_EQUALS);
6551 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6553 t->chisq->alpha = lex_number (lexer);
6556 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6558 lex_match (lexer, T_EQUALS);
6559 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6562 else if (lex_match_id (lexer, "CATEGORIES"))
6564 lex_match (lexer, T_EQUALS);
6565 if (lex_match_id (lexer, "ALLVISIBLE"))
6566 t->chisq->all_visible = true;
6567 else if (lex_match_id (lexer, "SUBTOTALS"))
6568 t->chisq->all_visible = false;
6571 lex_error_expecting (lexer,
6572 "ALLVISIBLE", "SUBTOTALS");
6578 lex_error_expecting (lexer, "TYPE", "ALPHA",
6579 "INCLUDEMRSETS", "CATEGORIES");
6583 while (lex_token (lexer) != T_SLASH
6584 && lex_token (lexer) != T_ENDCMD);
6586 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6587 _("Support for SIGTEST not yet implemented."));
6590 else if (lex_match_id (lexer, "COMPARETEST"))
6592 int start_ofs = lex_ofs (lexer) - 1;
6595 t->pairwise = xmalloc (sizeof *t->pairwise);
6596 *t->pairwise = (struct ctables_pairwise) {
6598 .alpha = { .05, .05 },
6599 .adjust = BONFERRONI,
6600 .include_mrsets = true,
6601 .meansvariance_allcats = true,
6602 .all_visible = true,
6611 if (lex_match_id (lexer, "TYPE"))
6613 lex_match (lexer, T_EQUALS);
6614 if (lex_match_id (lexer, "PROP"))
6615 t->pairwise->type = PROP;
6616 else if (lex_match_id (lexer, "MEAN"))
6617 t->pairwise->type = MEAN;
6620 lex_error_expecting (lexer, "PROP", "MEAN");
6624 else if (lex_match_id (lexer, "ALPHA"))
6626 lex_match (lexer, T_EQUALS);
6628 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6630 double a0 = lex_number (lexer);
6633 lex_match (lexer, T_COMMA);
6634 if (lex_is_number (lexer))
6636 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6638 double a1 = lex_number (lexer);
6641 t->pairwise->alpha[0] = MIN (a0, a1);
6642 t->pairwise->alpha[1] = MAX (a0, a1);
6645 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6647 else if (lex_match_id (lexer, "ADJUST"))
6649 lex_match (lexer, T_EQUALS);
6650 if (lex_match_id (lexer, "BONFERRONI"))
6651 t->pairwise->adjust = BONFERRONI;
6652 else if (lex_match_id (lexer, "BH"))
6653 t->pairwise->adjust = BH;
6654 else if (lex_match_id (lexer, "NONE"))
6655 t->pairwise->adjust = 0;
6658 lex_error_expecting (lexer, "BONFERRONI", "BH",
6663 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6665 lex_match (lexer, T_EQUALS);
6666 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6669 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6671 lex_match (lexer, T_EQUALS);
6672 if (lex_match_id (lexer, "ALLCATS"))
6673 t->pairwise->meansvariance_allcats = true;
6674 else if (lex_match_id (lexer, "TESTEDCATS"))
6675 t->pairwise->meansvariance_allcats = false;
6678 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6682 else if (lex_match_id (lexer, "CATEGORIES"))
6684 lex_match (lexer, T_EQUALS);
6685 if (lex_match_id (lexer, "ALLVISIBLE"))
6686 t->pairwise->all_visible = true;
6687 else if (lex_match_id (lexer, "SUBTOTALS"))
6688 t->pairwise->all_visible = false;
6691 lex_error_expecting (lexer, "ALLVISIBLE",
6696 else if (lex_match_id (lexer, "MERGE"))
6698 lex_match (lexer, T_EQUALS);
6699 if (!parse_bool (lexer, &t->pairwise->merge))
6702 else if (lex_match_id (lexer, "STYLE"))
6704 lex_match (lexer, T_EQUALS);
6705 if (lex_match_id (lexer, "APA"))
6706 t->pairwise->apa_style = true;
6707 else if (lex_match_id (lexer, "SIMPLE"))
6708 t->pairwise->apa_style = false;
6711 lex_error_expecting (lexer, "APA", "SIMPLE");
6715 else if (lex_match_id (lexer, "SHOWSIG"))
6717 lex_match (lexer, T_EQUALS);
6718 if (!parse_bool (lexer, &t->pairwise->show_sig))
6723 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6724 "INCLUDEMRSETS", "MEANSVARIANCE",
6725 "CATEGORIES", "MERGE", "STYLE",
6730 while (lex_token (lexer) != T_SLASH
6731 && lex_token (lexer) != T_ENDCMD);
6733 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6734 _("Support for COMPARETEST not yet implemented."));
6739 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6740 "CRITERIA", "CATEGORIES", "TITLES",
6741 "SIGTEST", "COMPARETEST");
6742 if (lex_match_id (lexer, "FORMAT")
6743 || lex_match_id (lexer, "VLABELS")
6744 || lex_match_id (lexer, "MRSETS")
6745 || lex_match_id (lexer, "SMISSING")
6746 || lex_match_id (lexer, "PCOMPUTE")
6747 || lex_match_id (lexer, "PPROPERTIES")
6748 || lex_match_id (lexer, "WEIGHT")
6749 || lex_match_id (lexer, "HIDESMALLCOUNTS"))
6750 lex_next_msg (lexer, SN, -1, -1,
6751 _("This subcommand must appear before TABLE."));
6755 if (!lex_match (lexer, T_SLASH))
6759 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6760 t->clabels_from_axis = PIVOT_AXIS_ROW;
6761 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6762 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6763 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6765 if (!ctables_prepare_table (t, lexer))
6768 while (lex_token (lexer) != T_ENDCMD);
6771 input = proc_open (ds);
6772 bool ok = ctables_execute (ds, input, ct);
6773 ok = proc_commit (ds) && ok;
6775 ctables_destroy (ct);
6776 return ok ? CMD_SUCCESS : CMD_FAILURE;
6781 ctables_destroy (ct);