1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 /* The three forms of weighting supported by CTABLES. */
61 enum ctables_weighting
63 CTW_EFFECTIVE, /* Effective base weight (WEIGHT subcommand). */
64 CTW_DICTIONARY, /* Dictionary weight. */
65 CTW_UNWEIGHTED /* No weight. */
69 /* CTABLES table areas. */
71 enum ctables_area_type
73 /* Within a section, where stacked variables divide one section from
76 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
77 parse_ctables_summary_function() parses correctly. */
78 CTAT_TABLE, /* All layers of a whole section. */
79 CTAT_LAYERROW, /* Row in one layer within a section. */
80 CTAT_LAYERCOL, /* Column in one layer within a section. */
81 CTAT_LAYER, /* One layer within a section. */
83 /* Within a subtable, where a subtable pairs an innermost row variable with
84 an innermost column variable within a single layer. */
85 CTAT_SUBTABLE, /* Whole subtable. */
86 CTAT_ROW, /* Row within a subtable. */
87 CTAT_COL, /* Column within a subtable. */
91 static const char *ctables_area_type_name[N_CTATS] = {
92 [CTAT_TABLE] = "TABLE",
93 [CTAT_LAYER] = "LAYER",
94 [CTAT_LAYERROW] = "LAYERROW",
95 [CTAT_LAYERCOL] = "LAYERCOL",
96 [CTAT_SUBTABLE] = "SUBTABLE",
101 /* Summary statistics for an area. */
104 struct hmap_node node;
105 const struct ctables_cell *example;
107 /* Sequence number used for CTSF_ID. */
110 /* Weights for CTSF_areaPCT_COUNT, CTSF_areaPCT_VALIDN, and
111 CTSF_areaPCT_TOTALN. */
112 double count[N_CTWS];
113 double valid[N_CTWS];
114 double total[N_CTWS];
116 /* Sums for CTSF_areaPCT_SUM. */
117 struct ctables_sum *sums;
125 /* CTABLES summary functions. */
127 enum ctables_function_type
129 /* A function that operates on data in a single cell. It operates on
130 effective weights. It does not have an unweighted version. */
133 /* A function that operates on data in a single cell. The function
134 operates on effective weights and has a U-prefixed unweighted
138 /* A function that operates on data in a single cell. It operates on
139 dictionary weights, and has U-prefixed unweighted version and an
140 E-prefixed effective weight version. */
143 /* A function that operates on an area of cells. It operates on effective
144 weights and has a U-prefixed unweighted version. */
150 CTF_COUNT, /* F40.0. */
151 CTF_PERCENT, /* PCT40.1. */
152 CTF_GENERAL /* Variable's print format. */
155 enum ctables_function_availability
157 CTFA_ALL, /* Any variables. */
158 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
159 //CTFA_MRSETS, /* Only multiple-response sets */
162 enum ctables_summary_function
164 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
165 #include "ctables.inc"
170 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
172 #include "ctables.inc"
176 struct ctables_function_info
178 struct substring basename;
179 enum ctables_function_type type;
180 enum ctables_format format;
181 enum ctables_function_availability availability;
183 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
184 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
185 bool is_area; /* Needs an area prefix. */
187 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
188 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
190 .basename = SS_LITERAL_INITIALIZER (NAME), \
193 .availability = AVAILABILITY, \
194 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
195 .e_prefix = (TYPE) == CTFT_UECELL, \
196 .is_area = (TYPE) == CTFT_AREA \
198 #include "ctables.inc"
202 static struct fmt_spec
203 ctables_summary_default_format (enum ctables_summary_function function,
204 const struct variable *var)
206 static const enum ctables_format default_formats[] = {
207 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
208 #include "ctables.inc"
211 switch (default_formats[function])
214 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
217 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
220 return *var_get_print_format (var);
227 static enum ctables_function_availability
228 ctables_function_availability (enum ctables_summary_function f)
230 static enum ctables_function_availability availability[] = {
231 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
232 #include "ctables.inc"
236 return availability[f];
240 parse_ctables_summary_function (struct lexer *lexer,
241 enum ctables_summary_function *function,
242 enum ctables_weighting *weighting,
243 enum ctables_area_type *area)
245 if (!lex_force_id (lexer))
248 struct substring name = lex_tokss (lexer);
249 if (ss_ends_with_case (name, ss_cstr (".LCL"))
250 || ss_ends_with_case (name, ss_cstr (".UCL"))
251 || ss_ends_with_case (name, ss_cstr (".SE")))
253 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
254 "is not yet implemented."));
258 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
259 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
261 bool has_area = false;
263 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
264 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
269 if (ss_equals_case (name, ss_cstr ("PCT")))
271 /* Special case where .COUNT suffix is omitted. */
272 *function = CTSF_areaPCT_COUNT;
273 *weighting = CTW_EFFECTIVE;
280 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
282 const struct ctables_function_info *cfi = &ctables_function_info[f];
283 if (ss_equals_case (cfi->basename, name))
286 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
289 *weighting = (e ? CTW_EFFECTIVE
291 : cfi->e_prefix ? CTW_DICTIONARY
298 lex_error (lexer, _("Syntax error expecting summary function name."));
303 ctables_summary_function_name (enum ctables_summary_function function,
304 enum ctables_weighting weighting,
305 enum ctables_area_type area,
306 char *buffer, size_t bufsize)
308 const struct ctables_function_info *cfi = &ctables_function_info[function];
309 snprintf (buffer, bufsize, "%s%s%s",
310 (weighting == CTW_UNWEIGHTED ? "U"
311 : weighting == CTW_DICTIONARY ? ""
312 : cfi->e_prefix ? "E"
314 cfi->is_area ? ctables_area_type_name[area] : "",
315 cfi->basename.string);
320 ctables_summary_function_label__ (enum ctables_summary_function function,
321 enum ctables_weighting weighting,
322 enum ctables_area_type area)
324 bool w = weighting != CTW_UNWEIGHTED;
325 bool d = weighting == CTW_DICTIONARY;
326 enum ctables_area_type a = area;
330 return d ? N_("Count") : w ? N_("Adjusted Count") : N_("Unweighted Count");
332 case CTSF_areaPCT_COUNT:
335 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
336 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
337 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
338 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
339 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
340 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
341 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
345 case CTSF_areaPCT_VALIDN:
348 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
349 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
350 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
351 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
352 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
353 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
354 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
358 case CTSF_areaPCT_TOTALN:
361 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
362 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
363 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
364 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
365 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
366 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
367 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
371 case CTSF_MAXIMUM: return N_("Maximum");
372 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
373 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
374 case CTSF_MINIMUM: return N_("Minimum");
375 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
376 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
377 case CTSF_PTILE: NOT_REACHED ();
378 case CTSF_RANGE: return N_("Range");
379 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
380 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
381 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
382 case CTSF_TOTALN: return (d ? N_("Total N")
383 : w ? N_("Adjusted Total N")
384 : N_("Unweighted Total N"));
385 case CTSF_VALIDN: return (d ? N_("Valid N")
386 : w ? N_("Adjusted Valid N")
387 : N_("Unweighted Valid N"));
388 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
389 case CTSF_areaPCT_SUM:
392 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
393 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
394 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
395 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
396 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
397 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
398 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
405 /* Don't bother translating these: they are for developers only. */
406 case CTAT_TABLE: return "Table ID";
407 case CTAT_LAYER: return "Layer ID";
408 case CTAT_LAYERROW: return "Layer Row ID";
409 case CTAT_LAYERCOL: return "Layer Column ID";
410 case CTAT_SUBTABLE: return "Subtable ID";
411 case CTAT_ROW: return "Row ID";
412 case CTAT_COL: return "Column ID";
420 static struct pivot_value *
421 ctables_summary_function_label (enum ctables_summary_function function,
422 enum ctables_weighting weighting,
423 enum ctables_area_type area,
426 if (function == CTSF_PTILE)
428 char *s = (weighting != CTW_UNWEIGHTED
429 ? xasprintf (_("Percentile %.2f"), percentile)
430 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
431 return pivot_value_new_user_text_nocopy (s);
434 return pivot_value_new_text (ctables_summary_function_label__ (
435 function, weighting, area));
438 /* CTABLES summaries. */
440 struct ctables_summary_spec
442 /* The calculation to be performed.
444 'function' is the function to calculate. 'weighted' specifies whether
445 to use weighted or unweighted data (for functions that do not support a
446 choice, it must be true). 'calc_area' is the area over which the
447 calculation takes place (for functions that target only an individual
448 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
449 percentile between 0 and 100 (for other functions it must be 0). */
450 enum ctables_summary_function function;
451 enum ctables_weighting weighting;
452 enum ctables_area_type calc_area;
453 double percentile; /* CTSF_PTILE only. */
455 /* How to display the result of the calculation.
457 'label' is a user-specified label, NULL if the user didn't specify
460 'user_area' is usually the same as 'calc_area', but when category labels
461 are rotated from one axis to another it swaps rows and columns.
463 'format' is the format for displaying the output. If
464 'is_ctables_format' is true, then 'format.type' is one of the special
465 CTEF_* formats instead of the standard ones. */
467 enum ctables_area_type user_area;
468 struct fmt_spec format;
469 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
471 size_t axis_idx; /* Leaf index if summary dimension in use. */
472 size_t sum_var_idx; /* Offset into 'sums' in ctables_area. */
476 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
477 const struct ctables_summary_spec *src)
480 dst->label = xstrdup_if_nonnull (src->label);
484 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
490 /* Collections of summary functions. */
492 struct ctables_summary_spec_set
494 struct ctables_summary_spec *specs;
498 /* The variable to which the summary specs are applied. */
499 struct variable *var;
501 /* Whether the variable to which the summary specs are applied is a scale
502 variable for the purpose of summarization.
504 (VALIDN and TOTALN act differently for summarizing scale and categorical
508 /* If any of these optional additional scale variables are missing, then
509 treat 'var' as if it's missing too. This is for implementing
510 SMISSING=LISTWISE. */
511 struct variable **listwise_vars;
512 size_t n_listwise_vars;
516 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
517 const struct ctables_summary_spec_set *src)
519 struct ctables_summary_spec *specs
520 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
521 for (size_t i = 0; i < src->n; i++)
522 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
524 *dst = (struct ctables_summary_spec_set) {
529 .is_scale = src->is_scale,
534 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
536 for (size_t i = 0; i < set->n; i++)
537 ctables_summary_spec_uninit (&set->specs[i]);
538 free (set->listwise_vars);
543 is_listwise_missing (const struct ctables_summary_spec_set *specs,
544 const struct ccase *c)
546 for (size_t i = 0; i < specs->n_listwise_vars; i++)
548 const struct variable *var = specs->listwise_vars[i];
549 if (var_is_num_missing (var, case_num (c, var)))
556 /* CTABLES postcompute expressions. */
558 struct ctables_postcompute
560 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
561 char *name; /* Name, without leading &. */
563 struct msg_location *location; /* Location of definition. */
564 struct ctables_pcexpr *expr;
566 struct ctables_summary_spec_set *specs;
567 bool hide_source_cats;
570 struct ctables_pcexpr
580 enum ctables_pcexpr_op
583 CTPO_CONSTANT, /* 5 */
584 CTPO_CAT_NUMBER, /* [5] */
585 CTPO_CAT_STRING, /* ["STRING"] */
586 CTPO_CAT_NRANGE, /* [LO THRU 5] */
587 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
588 CTPO_CAT_MISSING, /* MISSING */
589 CTPO_CAT_OTHERNM, /* OTHERNM */
590 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
591 CTPO_CAT_TOTAL, /* TOTAL */
605 /* CTPO_CAT_NUMBER. */
608 /* CTPO_CAT_STRING, in dictionary encoding. */
609 struct substring string;
611 /* CTPO_CAT_NRANGE. */
614 /* CTPO_CAT_SRANGE. */
615 struct substring srange[2];
617 /* CTPO_CAT_SUBTOTAL. */
618 size_t subtotal_index;
620 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
621 One element: CTPO_NEG. */
622 struct ctables_pcexpr *subs[2];
625 /* Source location. */
626 struct msg_location *location;
629 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
632 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
633 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
634 struct ctables_pcexpr *sub1);
636 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
637 struct dictionary *);
640 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
646 case CTPO_CAT_STRING:
647 ss_dealloc (&e->string);
650 case CTPO_CAT_SRANGE:
651 for (size_t i = 0; i < 2; i++)
652 ss_dealloc (&e->srange[i]);
661 for (size_t i = 0; i < 2; i++)
662 ctables_pcexpr_destroy (e->subs[i]);
666 case CTPO_CAT_NUMBER:
667 case CTPO_CAT_NRANGE:
668 case CTPO_CAT_MISSING:
669 case CTPO_CAT_OTHERNM:
670 case CTPO_CAT_SUBTOTAL:
675 msg_location_destroy (e->location);
680 static struct ctables_pcexpr *
681 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
682 struct ctables_pcexpr *sub0,
683 struct ctables_pcexpr *sub1)
685 struct ctables_pcexpr *e = xmalloc (sizeof *e);
686 *e = (struct ctables_pcexpr) {
688 .subs = { sub0, sub1 },
689 .location = msg_location_merged (sub0->location, sub1->location),
694 /* How to parse an operator. */
697 enum token_type token;
698 enum ctables_pcexpr_op op;
701 static const struct operator *
702 ctables_pcexpr_match_operator (struct lexer *lexer,
703 const struct operator ops[], size_t n_ops)
705 for (const struct operator *op = ops; op < ops + n_ops; op++)
706 if (lex_token (lexer) == op->token)
708 if (op->token != T_NEG_NUM)
717 static struct ctables_pcexpr *
718 ctables_pcexpr_parse_binary_operators__ (
719 struct lexer *lexer, struct dictionary *dict,
720 const struct operator ops[], size_t n_ops,
721 parse_recursively_func *parse_next_level,
722 const char *chain_warning, struct ctables_pcexpr *lhs)
724 for (int op_count = 0; ; op_count++)
726 const struct operator *op
727 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
730 if (op_count > 1 && chain_warning)
731 msg_at (SW, lhs->location, "%s", chain_warning);
736 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
739 ctables_pcexpr_destroy (lhs);
743 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
747 static struct ctables_pcexpr *
748 ctables_pcexpr_parse_binary_operators (
749 struct lexer *lexer, struct dictionary *dict,
750 const struct operator ops[], size_t n_ops,
751 parse_recursively_func *parse_next_level, const char *chain_warning)
753 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
757 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
762 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
763 struct dictionary *);
765 static struct ctables_pcexpr
766 ctpo_cat_nrange (double low, double high)
768 return (struct ctables_pcexpr) {
769 .op = CTPO_CAT_NRANGE,
770 .nrange = { low, high },
774 static struct ctables_pcexpr
775 ctpo_cat_srange (struct substring low, struct substring high)
777 return (struct ctables_pcexpr) {
778 .op = CTPO_CAT_SRANGE,
779 .srange = { low, high },
783 static struct substring
784 parse_substring (struct lexer *lexer, struct dictionary *dict)
786 struct substring s = recode_substring_pool (
787 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
788 ss_rtrim (&s, ss_cstr (" "));
793 static struct ctables_pcexpr *
794 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
796 int start_ofs = lex_ofs (lexer);
797 struct ctables_pcexpr e;
798 if (lex_is_number (lexer))
800 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
801 .number = lex_number (lexer) };
804 else if (lex_match_id (lexer, "MISSING"))
805 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
806 else if (lex_match_id (lexer, "OTHERNM"))
807 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
808 else if (lex_match_id (lexer, "TOTAL"))
809 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
810 else if (lex_match_id (lexer, "SUBTOTAL"))
812 size_t subtotal_index = 0;
813 if (lex_match (lexer, T_LBRACK))
815 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
817 subtotal_index = lex_integer (lexer);
819 if (!lex_force_match (lexer, T_RBRACK))
822 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
823 .subtotal_index = subtotal_index };
825 else if (lex_match (lexer, T_LBRACK))
827 if (lex_match_id (lexer, "LO"))
829 if (!lex_force_match_id (lexer, "THRU"))
832 if (lex_is_string (lexer))
834 struct substring low = { .string = NULL };
835 struct substring high = parse_substring (lexer, dict);
836 e = ctpo_cat_srange (low, high);
840 if (!lex_force_num (lexer))
842 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
846 else if (lex_is_number (lexer))
848 double number = lex_number (lexer);
850 if (lex_match_id (lexer, "THRU"))
852 if (lex_match_id (lexer, "HI"))
853 e = ctpo_cat_nrange (number, DBL_MAX);
856 if (!lex_force_num (lexer))
858 e = ctpo_cat_nrange (number, lex_number (lexer));
863 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
866 else if (lex_is_string (lexer))
868 struct substring s = parse_substring (lexer, dict);
870 if (lex_match_id (lexer, "THRU"))
872 struct substring high;
874 if (lex_match_id (lexer, "HI"))
875 high = (struct substring) { .string = NULL };
878 if (!lex_force_string (lexer))
883 high = parse_substring (lexer, dict);
886 e = ctpo_cat_srange (s, high);
889 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
893 lex_error (lexer, NULL);
897 if (!lex_force_match (lexer, T_RBRACK))
899 if (e.op == CTPO_CAT_STRING)
900 ss_dealloc (&e.string);
901 else if (e.op == CTPO_CAT_SRANGE)
903 ss_dealloc (&e.srange[0]);
904 ss_dealloc (&e.srange[1]);
909 else if (lex_match (lexer, T_LPAREN))
911 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
914 if (!lex_force_match (lexer, T_RPAREN))
916 ctables_pcexpr_destroy (ep);
923 lex_error (lexer, NULL);
927 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
928 return xmemdup (&e, sizeof e);
931 static struct ctables_pcexpr *
932 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
933 struct lexer *lexer, int start_ofs)
935 struct ctables_pcexpr *e = xmalloc (sizeof *e);
936 *e = (struct ctables_pcexpr) {
939 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
944 static struct ctables_pcexpr *
945 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
947 static const struct operator op = { T_EXP, CTPO_POW };
949 const char *chain_warning =
950 _("The exponentiation operator (`**') is left-associative: "
951 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
952 "To disable this warning, insert parentheses.");
954 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
955 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
956 ctables_pcexpr_parse_primary,
959 /* Special case for situations like "-5**6", which must be parsed as
962 int start_ofs = lex_ofs (lexer);
963 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
964 *lhs = (struct ctables_pcexpr) {
966 .number = -lex_tokval (lexer),
967 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
971 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
973 ctables_pcexpr_parse_primary, chain_warning, lhs);
977 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
980 /* Parses the unary minus level. */
981 static struct ctables_pcexpr *
982 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
984 int start_ofs = lex_ofs (lexer);
985 if (!lex_match (lexer, T_DASH))
986 return ctables_pcexpr_parse_exp (lexer, dict);
988 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
992 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
995 /* Parses the multiplication and division level. */
996 static struct ctables_pcexpr *
997 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
999 static const struct operator ops[] =
1001 { T_ASTERISK, CTPO_MUL },
1002 { T_SLASH, CTPO_DIV },
1005 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
1006 sizeof ops / sizeof *ops,
1007 ctables_pcexpr_parse_neg, NULL);
1010 /* Parses the addition and subtraction level. */
1011 static struct ctables_pcexpr *
1012 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
1014 static const struct operator ops[] =
1016 { T_PLUS, CTPO_ADD },
1017 { T_DASH, CTPO_SUB },
1018 { T_NEG_NUM, CTPO_ADD },
1021 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1022 ops, sizeof ops / sizeof *ops,
1023 ctables_pcexpr_parse_mul, NULL);
1026 /* CTABLES axis expressions. */
1028 /* CTABLES has a number of extra formats that we implement via custom
1029 currency specifications on an alternate fmt_settings. */
1030 #define CTEF_NEGPAREN FMT_CCA
1031 #define CTEF_NEQUAL FMT_CCB
1032 #define CTEF_PAREN FMT_CCC
1033 #define CTEF_PCTPAREN FMT_CCD
1035 enum ctables_summary_variant
1044 enum ctables_axis_op
1060 struct variable *var;
1062 struct ctables_summary_spec_set specs[N_CSVS];
1066 struct ctables_axis *subs[2];
1069 struct msg_location *loc;
1073 ctables_axis_destroy (struct ctables_axis *axis)
1081 for (size_t i = 0; i < N_CSVS; i++)
1082 ctables_summary_spec_set_uninit (&axis->specs[i]);
1087 ctables_axis_destroy (axis->subs[0]);
1088 ctables_axis_destroy (axis->subs[1]);
1091 msg_location_destroy (axis->loc);
1095 static struct ctables_axis *
1096 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1097 struct ctables_axis *sub0,
1098 struct ctables_axis *sub1,
1099 struct lexer *lexer, int start_ofs)
1101 struct ctables_axis *axis = xmalloc (sizeof *axis);
1102 *axis = (struct ctables_axis) {
1104 .subs = { sub0, sub1 },
1105 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1110 struct ctables_axis_parse_ctx
1112 struct lexer *lexer;
1113 struct dictionary *dict;
1116 static struct pivot_value *
1117 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1120 return ctables_summary_function_label (spec->function, spec->weighting,
1121 spec->user_area, spec->percentile);
1124 struct substring in = ss_cstr (spec->label);
1125 struct substring target = ss_cstr (")CILEVEL");
1127 struct string out = DS_EMPTY_INITIALIZER;
1130 size_t chunk = ss_find_substring (in, target);
1131 ds_put_substring (&out, ss_head (in, chunk));
1132 ss_advance (&in, chunk);
1134 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1136 ss_advance (&in, target.length);
1137 ds_put_format (&out, "%g", cilevel);
1143 add_summary_spec (struct ctables_axis *axis,
1144 enum ctables_summary_function function,
1145 enum ctables_weighting weighting,
1146 enum ctables_area_type area, double percentile,
1147 const char *label, const struct fmt_spec *format,
1148 bool is_ctables_format, const struct msg_location *loc,
1149 enum ctables_summary_variant sv)
1151 if (axis->op == CTAO_VAR)
1153 char function_name[128];
1154 ctables_summary_function_name (function, weighting, area,
1155 function_name, sizeof function_name);
1156 const char *var_name = var_get_name (axis->var);
1157 switch (ctables_function_availability (function))
1161 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1162 "response sets."), function_name);
1163 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1169 if (!axis->scale && sv != CSV_TOTAL)
1172 _("Summary function %s applies only to scale variables."),
1174 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1184 struct ctables_summary_spec_set *set = &axis->specs[sv];
1185 if (set->n >= set->allocated)
1186 set->specs = x2nrealloc (set->specs, &set->allocated,
1187 sizeof *set->specs);
1189 struct ctables_summary_spec *dst = &set->specs[set->n++];
1190 *dst = (struct ctables_summary_spec) {
1191 .function = function,
1192 .weighting = weighting,
1195 .percentile = percentile,
1196 .label = xstrdup_if_nonnull (label),
1197 .format = (format ? *format
1198 : ctables_summary_default_format (function, axis->var)),
1199 .is_ctables_format = is_ctables_format,
1205 for (size_t i = 0; i < 2; i++)
1206 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1207 percentile, label, format, is_ctables_format,
1214 static struct ctables_axis *ctables_axis_parse_stack (
1215 struct ctables_axis_parse_ctx *);
1217 static struct ctables_axis *
1218 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1220 if (lex_match (ctx->lexer, T_LPAREN))
1222 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1223 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1225 ctables_axis_destroy (sub);
1231 if (!lex_force_id (ctx->lexer))
1234 if (lex_tokcstr (ctx->lexer)[0] == '$')
1236 lex_error (ctx->lexer,
1237 _("Multiple response set support not implemented."));
1241 int start_ofs = lex_ofs (ctx->lexer);
1242 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1246 struct ctables_axis *axis = xmalloc (sizeof *axis);
1247 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1249 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1250 : lex_match_phrase (ctx->lexer, "[C]") ? false
1251 : var_get_measure (var) == MEASURE_SCALE);
1252 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1253 lex_ofs (ctx->lexer) - 1);
1254 if (axis->scale && var_is_alpha (var))
1256 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1258 var_get_name (var));
1259 ctables_axis_destroy (axis);
1267 has_digit (const char *s)
1269 return s[strcspn (s, "0123456789")] != '\0';
1273 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1274 bool *is_ctables_format)
1276 char type[FMT_TYPE_LEN_MAX + 1];
1277 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1280 if (!strcasecmp (type, "NEGPAREN"))
1281 format->type = CTEF_NEGPAREN;
1282 else if (!strcasecmp (type, "NEQUAL"))
1283 format->type = CTEF_NEQUAL;
1284 else if (!strcasecmp (type, "PAREN"))
1285 format->type = CTEF_PAREN;
1286 else if (!strcasecmp (type, "PCTPAREN"))
1287 format->type = CTEF_PCTPAREN;
1290 *is_ctables_format = false;
1291 if (!parse_format_specifier (lexer, format))
1294 char *error = fmt_check_output__ (format);
1296 error = fmt_check_type_compat__ (format, NULL, VAL_NUMERIC);
1299 lex_next_error (lexer, -1, -1, "%s", error);
1310 lex_next_error (lexer, -1, -1,
1311 _("Output format %s requires width 2 or greater."), type);
1314 else if (format->d > format->w - 1)
1316 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1317 "greater than decimals."), type);
1322 *is_ctables_format = true;
1327 static struct ctables_axis *
1328 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1330 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1331 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1334 enum ctables_summary_variant sv = CSV_CELL;
1337 int start_ofs = lex_ofs (ctx->lexer);
1339 /* Parse function. */
1340 enum ctables_summary_function function;
1341 enum ctables_weighting weighting;
1342 enum ctables_area_type area;
1343 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1347 /* Parse percentile. */
1348 double percentile = 0;
1349 if (function == CTSF_PTILE)
1351 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1353 percentile = lex_number (ctx->lexer);
1354 lex_get (ctx->lexer);
1359 if (lex_is_string (ctx->lexer))
1361 label = ss_xstrdup (lex_tokss (ctx->lexer));
1362 lex_get (ctx->lexer);
1366 struct fmt_spec format;
1367 const struct fmt_spec *formatp;
1368 bool is_ctables_format = false;
1369 if (lex_token (ctx->lexer) == T_ID
1370 && has_digit (lex_tokcstr (ctx->lexer)))
1372 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1373 &is_ctables_format))
1383 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1384 lex_ofs (ctx->lexer) - 1);
1385 add_summary_spec (sub, function, weighting, area, percentile, label,
1386 formatp, is_ctables_format, loc, sv);
1388 msg_location_destroy (loc);
1390 lex_match (ctx->lexer, T_COMMA);
1391 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1393 if (!lex_force_match (ctx->lexer, T_LBRACK))
1397 else if (lex_match (ctx->lexer, T_RBRACK))
1399 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1406 ctables_axis_destroy (sub);
1410 static const struct ctables_axis *
1411 find_scale (const struct ctables_axis *axis)
1415 else if (axis->op == CTAO_VAR)
1416 return axis->scale ? axis : NULL;
1419 for (size_t i = 0; i < 2; i++)
1421 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1429 static const struct ctables_axis *
1430 find_categorical_summary_spec (const struct ctables_axis *axis)
1434 else if (axis->op == CTAO_VAR)
1435 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1438 for (size_t i = 0; i < 2; i++)
1440 const struct ctables_axis *sum
1441 = find_categorical_summary_spec (axis->subs[i]);
1449 static struct ctables_axis *
1450 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1452 int start_ofs = lex_ofs (ctx->lexer);
1453 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1457 while (lex_match (ctx->lexer, T_GT))
1459 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1462 ctables_axis_destroy (lhs);
1466 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1467 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1469 const struct ctables_axis *outer_scale = find_scale (lhs);
1470 const struct ctables_axis *inner_scale = find_scale (rhs);
1471 if (outer_scale && inner_scale)
1473 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1474 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1475 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1476 ctables_axis_destroy (nest);
1480 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1483 msg_at (SE, nest->loc,
1484 _("Summaries may only be requested for categorical variables "
1485 "at the innermost nesting level."));
1486 msg_at (SN, outer_sum->loc,
1487 _("This outer categorical variable has a summary."));
1488 ctables_axis_destroy (nest);
1498 static struct ctables_axis *
1499 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1501 int start_ofs = lex_ofs (ctx->lexer);
1502 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1506 while (lex_match (ctx->lexer, T_PLUS))
1508 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1511 ctables_axis_destroy (lhs);
1515 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1516 ctx->lexer, start_ofs);
1523 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1524 struct ctables_axis **axisp)
1527 if (lex_token (lexer) == T_BY
1528 || lex_token (lexer) == T_SLASH
1529 || lex_token (lexer) == T_ENDCMD)
1532 struct ctables_axis_parse_ctx ctx = {
1536 *axisp = ctables_axis_parse_stack (&ctx);
1540 /* CTABLES categories. */
1542 struct ctables_categories
1545 struct ctables_category *cats;
1550 struct ctables_category
1552 enum ctables_category_type
1554 /* Explicit category lists. */
1557 CCT_NRANGE, /* Numerical range. */
1558 CCT_SRANGE, /* String range. */
1563 /* Totals and subtotals. */
1567 /* Implicit category lists. */
1572 /* For contributing to TOTALN. */
1573 CCT_EXCLUDED_MISSING,
1577 struct ctables_category *subtotal;
1583 double number; /* CCT_NUMBER. */
1584 struct substring string; /* CCT_STRING, in dictionary encoding. */
1585 double nrange[2]; /* CCT_NRANGE. */
1586 struct substring srange[2]; /* CCT_SRANGE. */
1590 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1591 bool hide_subcategories; /* CCT_SUBTOTAL. */
1594 /* CCT_POSTCOMPUTE. */
1597 const struct ctables_postcompute *pc;
1598 enum fmt_type parse_format;
1601 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1604 bool include_missing;
1605 bool sort_ascending;
1608 enum ctables_summary_function sort_function;
1609 enum ctables_weighting weighting;
1610 enum ctables_area_type area;
1611 struct variable *sort_var;
1616 /* Source location (sometimes NULL). */
1617 struct msg_location *location;
1621 ctables_category_uninit (struct ctables_category *cat)
1626 msg_location_destroy (cat->location);
1633 case CCT_POSTCOMPUTE:
1637 ss_dealloc (&cat->string);
1641 ss_dealloc (&cat->srange[0]);
1642 ss_dealloc (&cat->srange[1]);
1647 free (cat->total_label);
1655 case CCT_EXCLUDED_MISSING:
1661 nullable_substring_equal (const struct substring *a,
1662 const struct substring *b)
1664 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1668 ctables_category_equal (const struct ctables_category *a,
1669 const struct ctables_category *b)
1671 if (a->type != b->type)
1677 return a->number == b->number;
1680 return ss_equals (a->string, b->string);
1683 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1686 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1687 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1693 case CCT_POSTCOMPUTE:
1694 return a->pc == b->pc;
1698 return !strcmp (a->total_label, b->total_label);
1703 return (a->include_missing == b->include_missing
1704 && a->sort_ascending == b->sort_ascending
1705 && a->sort_function == b->sort_function
1706 && a->sort_var == b->sort_var
1707 && a->percentile == b->percentile);
1709 case CCT_EXCLUDED_MISSING:
1717 ctables_categories_unref (struct ctables_categories *c)
1722 assert (c->n_refs > 0);
1726 for (size_t i = 0; i < c->n_cats; i++)
1727 ctables_category_uninit (&c->cats[i]);
1733 ctables_categories_equal (const struct ctables_categories *a,
1734 const struct ctables_categories *b)
1736 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
1739 for (size_t i = 0; i < a->n_cats; i++)
1740 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1746 static struct ctables_category
1747 cct_nrange (double low, double high)
1749 return (struct ctables_category) {
1751 .nrange = { low, high }
1755 static struct ctables_category
1756 cct_srange (struct substring low, struct substring high)
1758 return (struct ctables_category) {
1760 .srange = { low, high }
1765 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1766 struct ctables_category *cat)
1769 if (lex_match (lexer, T_EQUALS))
1771 if (!lex_force_string (lexer))
1774 total_label = ss_xstrdup (lex_tokss (lexer));
1778 total_label = xstrdup (_("Subtotal"));
1780 *cat = (struct ctables_category) {
1781 .type = CCT_SUBTOTAL,
1782 .hide_subcategories = hide_subcategories,
1783 .total_label = total_label
1789 ctables_table_parse_explicit_category (struct lexer *lexer,
1790 struct dictionary *dict,
1792 struct ctables_category *cat)
1794 if (lex_match_id (lexer, "OTHERNM"))
1795 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1796 else if (lex_match_id (lexer, "MISSING"))
1797 *cat = (struct ctables_category) { .type = CCT_MISSING };
1798 else if (lex_match_id (lexer, "SUBTOTAL"))
1799 return ctables_table_parse_subtotal (lexer, false, cat);
1800 else if (lex_match_id (lexer, "HSUBTOTAL"))
1801 return ctables_table_parse_subtotal (lexer, true, cat);
1802 else if (lex_match_id (lexer, "LO"))
1804 if (!lex_force_match_id (lexer, "THRU"))
1806 if (lex_is_string (lexer))
1808 struct substring sr0 = { .string = NULL };
1809 struct substring sr1 = parse_substring (lexer, dict);
1810 *cat = cct_srange (sr0, sr1);
1812 else if (lex_force_num (lexer))
1814 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1820 else if (lex_is_number (lexer))
1822 double number = lex_number (lexer);
1824 if (lex_match_id (lexer, "THRU"))
1826 if (lex_match_id (lexer, "HI"))
1827 *cat = cct_nrange (number, DBL_MAX);
1830 if (!lex_force_num (lexer))
1832 *cat = cct_nrange (number, lex_number (lexer));
1837 *cat = (struct ctables_category) {
1842 else if (lex_is_string (lexer))
1844 struct substring s = parse_substring (lexer, dict);
1845 if (lex_match_id (lexer, "THRU"))
1847 if (lex_match_id (lexer, "HI"))
1849 struct substring sr1 = { .string = NULL };
1850 *cat = cct_srange (s, sr1);
1854 if (!lex_force_string (lexer))
1859 struct substring sr1 = parse_substring (lexer, dict);
1860 *cat = cct_srange (s, sr1);
1864 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1866 else if (lex_match (lexer, T_AND))
1868 if (!lex_force_id (lexer))
1870 struct ctables_postcompute *pc = ctables_find_postcompute (
1871 ct, lex_tokcstr (lexer));
1874 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1875 msg_at (SE, loc, _("Unknown postcompute &%s."),
1876 lex_tokcstr (lexer));
1877 msg_location_destroy (loc);
1882 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1886 lex_error (lexer, NULL);
1894 parse_category_string (struct msg_location *location,
1895 struct substring s, const struct dictionary *dict,
1896 enum fmt_type format, double *n)
1899 char *error = data_in (s, dict_get_encoding (dict), format,
1900 settings_get_fmt_settings (), &v, 0, NULL);
1903 msg_at (SE, location,
1904 _("Failed to parse category specification as format %s: %s."),
1905 fmt_name (format), error);
1914 static struct ctables_category *
1915 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1916 const struct ctables_pcexpr *e)
1918 struct ctables_category *best = NULL;
1919 size_t n_subtotals = 0;
1920 for (size_t i = 0; i < cats->n_cats; i++)
1922 struct ctables_category *cat = &cats->cats[i];
1925 case CTPO_CAT_NUMBER:
1926 if (cat->type == CCT_NUMBER && cat->number == e->number)
1930 case CTPO_CAT_STRING:
1931 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1935 case CTPO_CAT_NRANGE:
1936 if (cat->type == CCT_NRANGE
1937 && cat->nrange[0] == e->nrange[0]
1938 && cat->nrange[1] == e->nrange[1])
1942 case CTPO_CAT_SRANGE:
1943 if (cat->type == CCT_SRANGE
1944 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1945 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1949 case CTPO_CAT_MISSING:
1950 if (cat->type == CCT_MISSING)
1954 case CTPO_CAT_OTHERNM:
1955 if (cat->type == CCT_OTHERNM)
1959 case CTPO_CAT_SUBTOTAL:
1960 if (cat->type == CCT_SUBTOTAL)
1963 if (e->subtotal_index == n_subtotals)
1965 else if (e->subtotal_index == 0)
1970 case CTPO_CAT_TOTAL:
1971 if (cat->type == CCT_TOTAL)
1985 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1990 static struct ctables_category *
1991 ctables_find_category_for_postcompute (const struct dictionary *dict,
1992 const struct ctables_categories *cats,
1993 enum fmt_type parse_format,
1994 const struct ctables_pcexpr *e)
1996 if (parse_format != FMT_F)
1998 if (e->op == CTPO_CAT_STRING)
2001 if (!parse_category_string (e->location, e->string, dict,
2002 parse_format, &number))
2005 struct ctables_pcexpr e2 = {
2006 .op = CTPO_CAT_NUMBER,
2008 .location = e->location,
2010 return ctables_find_category_for_postcompute__ (cats, &e2);
2012 else if (e->op == CTPO_CAT_SRANGE)
2015 if (!e->srange[0].string)
2016 nrange[0] = -DBL_MAX;
2017 else if (!parse_category_string (e->location, e->srange[0], dict,
2018 parse_format, &nrange[0]))
2021 if (!e->srange[1].string)
2022 nrange[1] = DBL_MAX;
2023 else if (!parse_category_string (e->location, e->srange[1], dict,
2024 parse_format, &nrange[1]))
2027 struct ctables_pcexpr e2 = {
2028 .op = CTPO_CAT_NRANGE,
2029 .nrange = { nrange[0], nrange[1] },
2030 .location = e->location,
2032 return ctables_find_category_for_postcompute__ (cats, &e2);
2035 return ctables_find_category_for_postcompute__ (cats, e);
2038 static struct substring
2039 rtrim_value (const union value *v, const struct variable *var)
2041 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2042 var_get_width (var));
2043 ss_rtrim (&s, ss_cstr (" "));
2048 in_string_range (const union value *v, const struct variable *var,
2049 const struct substring *srange)
2051 struct substring s = rtrim_value (v, var);
2052 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2053 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2056 static const struct ctables_category *
2057 ctables_categories_match (const struct ctables_categories *c,
2058 const union value *v, const struct variable *var)
2060 if (var_is_numeric (var) && v->f == SYSMIS)
2063 const struct ctables_category *othernm = NULL;
2064 for (size_t i = c->n_cats; i-- > 0; )
2066 const struct ctables_category *cat = &c->cats[i];
2070 if (cat->number == v->f)
2075 if (ss_equals (cat->string, rtrim_value (v, var)))
2080 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2081 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2086 if (in_string_range (v, var, cat->srange))
2091 if (var_is_value_missing (var, v))
2095 case CCT_POSTCOMPUTE:
2110 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2113 case CCT_EXCLUDED_MISSING:
2118 return var_is_value_missing (var, v) ? NULL : othernm;
2121 static const struct ctables_category *
2122 ctables_categories_total (const struct ctables_categories *c)
2124 const struct ctables_category *first = &c->cats[0];
2125 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2126 return (first->type == CCT_TOTAL ? first
2127 : last->type == CCT_TOTAL ? last
2132 ctables_category_format_number (double number, const struct variable *var,
2135 struct pivot_value *pv = pivot_value_new_var_value (
2136 var, &(union value) { .f = number });
2137 pivot_value_format (pv, NULL, s);
2138 pivot_value_destroy (pv);
2142 ctables_category_format_string (struct substring string,
2143 const struct variable *var, struct string *out)
2145 int width = var_get_width (var);
2146 char *s = xmalloc (width);
2147 buf_copy_rpad (s, width, string.string, string.length, ' ');
2148 struct pivot_value *pv = pivot_value_new_var_value (
2149 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
2150 pivot_value_format (pv, NULL, out);
2151 pivot_value_destroy (pv);
2156 ctables_category_format_label (const struct ctables_category *cat,
2157 const struct variable *var,
2163 ctables_category_format_number (cat->number, var, s);
2167 ctables_category_format_string (cat->string, var, s);
2171 ctables_category_format_number (cat->nrange[0], var, s);
2172 ds_put_format (s, " THRU ");
2173 ctables_category_format_number (cat->nrange[1], var, s);
2177 ctables_category_format_string (cat->srange[0], var, s);
2178 ds_put_format (s, " THRU ");
2179 ctables_category_format_string (cat->srange[1], var, s);
2183 ds_put_cstr (s, "MISSING");
2187 ds_put_cstr (s, "OTHERNM");
2190 case CCT_POSTCOMPUTE:
2191 ds_put_format (s, "&%s", cat->pc->name);
2196 ds_put_cstr (s, cat->total_label);
2202 case CCT_EXCLUDED_MISSING:
2210 ctables_recursive_check_postcompute (struct dictionary *dict,
2211 const struct ctables_pcexpr *e,
2212 struct ctables_category *pc_cat,
2213 const struct ctables_categories *cats,
2214 const struct msg_location *cats_location)
2218 case CTPO_CAT_NUMBER:
2219 case CTPO_CAT_STRING:
2220 case CTPO_CAT_NRANGE:
2221 case CTPO_CAT_SRANGE:
2222 case CTPO_CAT_MISSING:
2223 case CTPO_CAT_OTHERNM:
2224 case CTPO_CAT_SUBTOTAL:
2225 case CTPO_CAT_TOTAL:
2227 struct ctables_category *cat = ctables_find_category_for_postcompute (
2228 dict, cats, pc_cat->parse_format, e);
2231 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2233 size_t n_subtotals = 0;
2234 for (size_t i = 0; i < cats->n_cats; i++)
2235 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2236 if (n_subtotals > 1)
2238 msg_at (SE, cats_location,
2239 ngettext ("These categories include %zu instance "
2240 "of SUBTOTAL or HSUBTOTAL, so references "
2241 "from computed categories must refer to "
2242 "subtotals by position, "
2243 "e.g. SUBTOTAL[1].",
2244 "These categories include %zu instances "
2245 "of SUBTOTAL or HSUBTOTAL, so references "
2246 "from computed categories must refer to "
2247 "subtotals by position, "
2248 "e.g. SUBTOTAL[1].",
2251 msg_at (SN, e->location,
2252 _("This is the reference that lacks a position."));
2257 msg_at (SE, pc_cat->location,
2258 _("Computed category &%s references a category not included "
2259 "in the category list."),
2261 msg_at (SN, e->location, _("This is the missing category."));
2262 if (e->op == CTPO_CAT_SUBTOTAL)
2263 msg_at (SN, cats_location,
2264 _("To fix the problem, add subtotals to the "
2265 "list of categories here."));
2266 else if (e->op == CTPO_CAT_TOTAL)
2267 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2268 "CATEGORIES specification."));
2270 msg_at (SN, cats_location,
2271 _("To fix the problem, add the missing category to the "
2272 "list of categories here."));
2275 if (pc_cat->pc->hide_source_cats)
2289 for (size_t i = 0; i < 2; i++)
2290 if (e->subs[i] && !ctables_recursive_check_postcompute (
2291 dict, e->subs[i], pc_cat, cats, cats_location))
2299 static struct pivot_value *
2300 ctables_postcompute_label (const struct ctables_categories *cats,
2301 const struct ctables_category *cat,
2302 const struct variable *var)
2304 struct substring in = ss_cstr (cat->pc->label);
2305 struct substring target = ss_cstr (")LABEL[");
2307 struct string out = DS_EMPTY_INITIALIZER;
2310 size_t chunk = ss_find_substring (in, target);
2311 if (chunk == SIZE_MAX)
2313 if (ds_is_empty (&out))
2314 return pivot_value_new_user_text (in.string, in.length);
2317 ds_put_substring (&out, in);
2318 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
2322 ds_put_substring (&out, ss_head (in, chunk));
2323 ss_advance (&in, chunk + target.length);
2325 struct substring idx_s;
2326 if (!ss_get_until (&in, ']', &idx_s))
2329 long int idx = strtol (idx_s.string, &tail, 10);
2330 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
2333 struct ctables_category *cat2 = &cats->cats[idx - 1];
2334 if (!ctables_category_format_label (cat2, var, &out))
2340 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
2343 static struct pivot_value *
2344 ctables_category_create_value_label (const struct ctables_categories *cats,
2345 const struct ctables_category *cat,
2346 const struct variable *var,
2347 const union value *value)
2349 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
2350 ? ctables_postcompute_label (cats, cat, var)
2351 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
2352 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
2353 : pivot_value_new_var_value (var, value));
2356 /* CTABLES variable nesting and stacking. */
2358 /* A nested sequence of variables, e.g. a > b > c. */
2361 struct variable **vars;
2365 size_t *areas[N_CTATS];
2366 size_t n_areas[N_CTATS];
2369 struct ctables_summary_spec_set specs[N_CSVS];
2372 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2373 struct ctables_stack
2375 struct ctables_nest *nests;
2380 ctables_nest_uninit (struct ctables_nest *nest)
2383 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2384 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2385 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2386 free (nest->areas[at]);
2390 ctables_stack_uninit (struct ctables_stack *stack)
2394 for (size_t i = 0; i < stack->n; i++)
2395 ctables_nest_uninit (&stack->nests[i]);
2396 free (stack->nests);
2400 static struct ctables_stack
2401 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2408 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2409 for (size_t i = 0; i < s0.n; i++)
2410 for (size_t j = 0; j < s1.n; j++)
2412 const struct ctables_nest *a = &s0.nests[i];
2413 const struct ctables_nest *b = &s1.nests[j];
2415 size_t allocate = a->n + b->n;
2416 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2418 for (size_t k = 0; k < a->n; k++)
2419 vars[n++] = a->vars[k];
2420 for (size_t k = 0; k < b->n; k++)
2421 vars[n++] = b->vars[k];
2422 assert (n == allocate);
2424 const struct ctables_nest *summary_src;
2425 if (!a->specs[CSV_CELL].var)
2427 else if (!b->specs[CSV_CELL].var)
2432 struct ctables_nest *new = &stack.nests[stack.n++];
2433 *new = (struct ctables_nest) {
2435 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2436 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2438 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2439 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2443 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2444 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2446 ctables_stack_uninit (&s0);
2447 ctables_stack_uninit (&s1);
2451 static struct ctables_stack
2452 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2454 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2455 for (size_t i = 0; i < s0.n; i++)
2456 stack.nests[stack.n++] = s0.nests[i];
2457 for (size_t i = 0; i < s1.n; i++)
2459 stack.nests[stack.n] = s1.nests[i];
2460 stack.nests[stack.n].group_head += s0.n;
2463 assert (stack.n == s0.n + s1.n);
2469 static struct ctables_stack
2470 var_fts (const struct ctables_axis *a)
2472 struct variable **vars = xmalloc (sizeof *vars);
2475 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2476 struct ctables_nest *nest = xmalloc (sizeof *nest);
2477 *nest = (struct ctables_nest) {
2480 .scale_idx = a->scale ? 0 : SIZE_MAX,
2481 .summary_idx = is_summary ? 0 : SIZE_MAX,
2484 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2486 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2487 nest->specs[sv].var = a->var;
2488 nest->specs[sv].is_scale = a->scale;
2490 return (struct ctables_stack) { .nests = nest, .n = 1 };
2493 static struct ctables_stack
2494 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2497 return (struct ctables_stack) { .n = 0 };
2505 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2506 enumerate_fts (axis_type, a->subs[1]));
2509 /* This should consider any of the scale variables found in the result to
2510 be linked to each other listwise for SMISSING=LISTWISE. */
2511 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2512 enumerate_fts (axis_type, a->subs[1]));
2518 /* CTABLES summary calculation. */
2520 union ctables_summary
2522 /* COUNT, VALIDN, TOTALN. */
2525 /* MINIMUM, MAXIMUM, RANGE. */
2532 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2533 struct moments1 *moments;
2535 /* MEDIAN, MODE, PTILE. */
2538 struct casewriter *writer;
2545 ctables_summary_init (union ctables_summary *s,
2546 const struct ctables_summary_spec *ss)
2548 switch (ss->function)
2551 case CTSF_areaPCT_COUNT:
2552 case CTSF_areaPCT_VALIDN:
2553 case CTSF_areaPCT_TOTALN:
2566 s->min = s->max = SYSMIS;
2571 case CTSF_areaPCT_SUM:
2572 s->moments = moments1_create (MOMENT_MEAN);
2578 s->moments = moments1_create (MOMENT_VARIANCE);
2585 struct caseproto *proto = caseproto_create ();
2586 proto = caseproto_add_width (proto, 0);
2587 proto = caseproto_add_width (proto, 0);
2589 struct subcase ordering;
2590 subcase_init (&ordering, 0, 0, SC_ASCEND);
2591 s->writer = sort_create_writer (&ordering, proto);
2592 subcase_uninit (&ordering);
2593 caseproto_unref (proto);
2603 ctables_summary_uninit (union ctables_summary *s,
2604 const struct ctables_summary_spec *ss)
2606 switch (ss->function)
2609 case CTSF_areaPCT_COUNT:
2610 case CTSF_areaPCT_VALIDN:
2611 case CTSF_areaPCT_TOTALN:
2630 case CTSF_areaPCT_SUM:
2631 moments1_destroy (s->moments);
2637 casewriter_destroy (s->writer);
2643 ctables_summary_add (union ctables_summary *s,
2644 const struct ctables_summary_spec *ss,
2645 const union value *value,
2646 bool is_missing, bool is_included,
2649 /* To determine whether a case is included in a given table for a particular
2650 kind of summary, consider the following charts for the variable being
2651 summarized. Only if "yes" appears is the case counted.
2653 Categorical variables: VALIDN other TOTALN
2654 Valid values in included categories yes yes yes
2655 Missing values in included categories --- yes yes
2656 Missing values in excluded categories --- --- yes
2657 Valid values in excluded categories --- --- ---
2659 Scale variables: VALIDN other TOTALN
2660 Valid value yes yes yes
2661 Missing value --- yes yes
2663 Missing values include both user- and system-missing. (The system-missing
2664 value is always in an excluded category.)
2666 One way to interpret the above table is that scale variables are like
2667 categorical variables in which all values are in included categories.
2669 switch (ss->function)
2672 case CTSF_areaPCT_TOTALN:
2677 case CTSF_areaPCT_COUNT:
2683 case CTSF_areaPCT_VALIDN:
2701 if (s->min == SYSMIS || value->f < s->min)
2703 if (s->max == SYSMIS || value->f > s->max)
2714 moments1_add (s->moments, value->f, weight);
2717 case CTSF_areaPCT_SUM:
2719 moments1_add (s->moments, value->f, weight);
2727 s->ovalid += weight;
2729 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2730 *case_num_rw_idx (c, 0) = value->f;
2731 *case_num_rw_idx (c, 1) = weight;
2732 casewriter_write (s->writer, c);
2739 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2740 union ctables_summary *s,
2741 const struct ctables_summary_spec *ss)
2743 switch (ss->function)
2749 return areas[ss->calc_area]->sequence;
2751 case CTSF_areaPCT_COUNT:
2753 const struct ctables_area *a = areas[ss->calc_area];
2754 double a_count = a->count[ss->weighting];
2755 return a_count ? s->count / a_count * 100 : SYSMIS;
2758 case CTSF_areaPCT_VALIDN:
2760 const struct ctables_area *a = areas[ss->calc_area];
2761 double a_valid = a->valid[ss->weighting];
2762 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2765 case CTSF_areaPCT_TOTALN:
2767 const struct ctables_area *a = areas[ss->calc_area];
2768 double a_total = a->total[ss->weighting];
2769 return a_total ? s->count / a_total * 100 : SYSMIS;
2784 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2789 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2795 double weight, variance;
2796 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2797 return calc_semean (variance, weight);
2803 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2804 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2809 double weight, mean;
2810 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2811 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2817 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2821 case CTSF_areaPCT_SUM:
2823 double weight, mean;
2824 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2825 if (weight == SYSMIS || mean == SYSMIS)
2828 const struct ctables_area *a = areas[ss->calc_area];
2829 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2830 double denom = sum->sum[ss->weighting];
2831 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2838 struct casereader *reader = casewriter_make_reader (s->writer);
2841 struct percentile *ptile = percentile_create (
2842 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2843 struct order_stats *os = &ptile->parent;
2844 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2845 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2846 statistic_destroy (&ptile->parent.parent);
2853 struct casereader *reader = casewriter_make_reader (s->writer);
2856 struct mode *mode = mode_create ();
2857 struct order_stats *os = &mode->parent;
2858 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2859 s->ovalue = mode->mode;
2860 statistic_destroy (&mode->parent.parent);
2868 /* CTABLES occurrences. */
2870 struct ctables_occurrence
2872 struct hmap_node node;
2877 ctables_add_occurrence (const struct variable *var,
2878 const union value *value,
2879 struct hmap *occurrences)
2881 int width = var_get_width (var);
2882 unsigned int hash = value_hash (value, width, 0);
2884 struct ctables_occurrence *o;
2885 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
2887 if (value_equal (value, &o->value, width))
2890 o = xmalloc (sizeof *o);
2891 value_clone (&o->value, value, width);
2892 hmap_insert (occurrences, &o->node, hash);
2897 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2898 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2899 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2900 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2905 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2906 all the axes (except the scalar variable, if any). */
2907 struct hmap_node node;
2908 struct ctables_section *section;
2910 /* The areas that contain this cell. */
2911 uint32_t omit_areas;
2912 struct ctables_area *areas[N_CTATS];
2917 enum ctables_summary_variant sv;
2919 struct ctables_cell_axis
2921 struct ctables_cell_value
2923 const struct ctables_category *category;
2931 union ctables_summary *summaries;
2934 struct ctables_section
2937 struct ctables_table *table;
2938 struct ctables_nest *nests[PIVOT_N_AXES];
2941 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2942 struct hmap cells; /* Contains "struct ctables_cell"s. */
2943 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2946 static void ctables_section_uninit (struct ctables_section *);
2948 struct ctables_table
2950 struct ctables *ctables;
2951 struct ctables_axis *axes[PIVOT_N_AXES];
2952 struct ctables_stack stacks[PIVOT_N_AXES];
2953 struct ctables_section *sections;
2955 enum pivot_axis_type summary_axis;
2956 struct ctables_summary_spec_set summary_specs;
2957 struct variable **sum_vars;
2960 enum pivot_axis_type slabels_axis;
2961 bool slabels_visible;
2963 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2965 Most commonly, label_axis[a] == a, and in particular we always have
2966 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2968 If ROWLABELS or COLLABELS is specified, then one of
2969 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2970 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2972 If any category labels are moved, then 'clabels_example' is one of the
2973 variables being moved (and it is otherwise NULL). All of the variables
2974 being moved have the same width, value labels, and categories, so this
2975 example variable can be used to find those out.
2977 The remaining members in this group are relevant only if category labels
2980 'clabels_values_map' holds a "struct ctables_value" for all the values
2981 that appear in all of the variables in the moved categories. It is
2982 accumulated as the data is read. Once the data is fully read, its
2983 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2985 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2986 enum pivot_axis_type clabels_from_axis;
2987 enum pivot_axis_type clabels_to_axis;
2988 int clabels_start_ofs, clabels_end_ofs;
2989 const struct variable *clabels_example;
2990 struct hmap clabels_values_map;
2991 struct ctables_value **clabels_values;
2992 size_t n_clabels_values;
2994 /* Indexed by variable dictionary index. */
2995 struct ctables_categories **categories;
2996 size_t n_categories;
3004 struct ctables_chisq *chisq;
3005 struct ctables_pairwise *pairwise;
3008 struct ctables_cell_sort_aux
3010 const struct ctables_nest *nest;
3011 enum pivot_axis_type a;
3015 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3017 const struct ctables_cell_sort_aux *aux = aux_;
3018 struct ctables_cell *const *ap = a_;
3019 struct ctables_cell *const *bp = b_;
3020 const struct ctables_cell *a = *ap;
3021 const struct ctables_cell *b = *bp;
3023 const struct ctables_nest *nest = aux->nest;
3024 for (size_t i = 0; i < nest->n; i++)
3025 if (i != nest->scale_idx)
3027 const struct variable *var = nest->vars[i];
3028 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3029 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3030 if (a_cv->category != b_cv->category)
3031 return a_cv->category > b_cv->category ? 1 : -1;
3033 const union value *a_val = &a_cv->value;
3034 const union value *b_val = &b_cv->value;
3035 switch (a_cv->category->type)
3041 case CCT_POSTCOMPUTE:
3042 case CCT_EXCLUDED_MISSING:
3043 /* Must be equal. */
3051 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3059 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3061 return a_cv->category->sort_ascending ? cmp : -cmp;
3067 const char *a_label = var_lookup_value_label (var, a_val);
3068 const char *b_label = var_lookup_value_label (var, b_val);
3074 cmp = strcmp (a_label, b_label);
3080 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3083 return a_cv->category->sort_ascending ? cmp : -cmp;
3094 static struct ctables_area *
3095 ctables_area_insert (struct ctables_cell *cell, enum ctables_area_type area)
3097 struct ctables_section *s = cell->section;
3099 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3101 const struct ctables_nest *nest = s->nests[a];
3102 for (size_t i = 0; i < nest->n_areas[area]; i++)
3104 size_t v_idx = nest->areas[area][i];
3105 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3106 hash = hash_pointer (cv->category, hash);
3107 if (cv->category->type != CCT_TOTAL
3108 && cv->category->type != CCT_SUBTOTAL
3109 && cv->category->type != CCT_POSTCOMPUTE)
3110 hash = value_hash (&cv->value,
3111 var_get_width (nest->vars[v_idx]), hash);
3115 struct ctables_area *a;
3116 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3118 const struct ctables_cell *df = a->example;
3119 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3121 const struct ctables_nest *nest = s->nests[a];
3122 for (size_t i = 0; i < nest->n_areas[area]; i++)
3124 size_t v_idx = nest->areas[area][i];
3125 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3126 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3127 if (cv1->category != cv2->category
3128 || (cv1->category->type != CCT_TOTAL
3129 && cv1->category->type != CCT_SUBTOTAL
3130 && cv1->category->type != CCT_POSTCOMPUTE
3131 && !value_equal (&cv1->value, &cv2->value,
3132 var_get_width (nest->vars[v_idx]))))
3141 struct ctables_sum *sums = (s->table->n_sum_vars
3142 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3145 a = xmalloc (sizeof *a);
3146 *a = (struct ctables_area) { .example = cell, .sums = sums };
3147 hmap_insert (&s->areas[area], &a->node, hash);
3151 static struct ctables_cell *
3152 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3153 const struct ctables_category **cats[PIVOT_N_AXES])
3156 enum ctables_summary_variant sv = CSV_CELL;
3157 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3159 const struct ctables_nest *nest = s->nests[a];
3160 for (size_t i = 0; i < nest->n; i++)
3161 if (i != nest->scale_idx)
3163 hash = hash_pointer (cats[a][i], hash);
3164 if (cats[a][i]->type != CCT_TOTAL
3165 && cats[a][i]->type != CCT_SUBTOTAL
3166 && cats[a][i]->type != CCT_POSTCOMPUTE)
3167 hash = value_hash (case_data (c, nest->vars[i]),
3168 var_get_width (nest->vars[i]), hash);
3174 struct ctables_cell *cell;
3175 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3177 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3179 const struct ctables_nest *nest = s->nests[a];
3180 for (size_t i = 0; i < nest->n; i++)
3181 if (i != nest->scale_idx
3182 && (cats[a][i] != cell->axes[a].cvs[i].category
3183 || (cats[a][i]->type != CCT_TOTAL
3184 && cats[a][i]->type != CCT_SUBTOTAL
3185 && cats[a][i]->type != CCT_POSTCOMPUTE
3186 && !value_equal (case_data (c, nest->vars[i]),
3187 &cell->axes[a].cvs[i].value,
3188 var_get_width (nest->vars[i])))))
3197 cell = xmalloc (sizeof *cell);
3201 cell->omit_areas = 0;
3202 cell->postcompute = false;
3203 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3205 const struct ctables_nest *nest = s->nests[a];
3206 cell->axes[a].cvs = (nest->n
3207 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3209 for (size_t i = 0; i < nest->n; i++)
3211 const struct ctables_category *cat = cats[a][i];
3212 const struct variable *var = nest->vars[i];
3213 const union value *value = case_data (c, var);
3214 if (i != nest->scale_idx)
3216 const struct ctables_category *subtotal = cat->subtotal;
3217 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3220 if (cat->type == CCT_TOTAL
3221 || cat->type == CCT_SUBTOTAL
3222 || cat->type == CCT_POSTCOMPUTE)
3226 case PIVOT_AXIS_COLUMN:
3227 cell->omit_areas |= ((1u << CTAT_TABLE) |
3228 (1u << CTAT_LAYER) |
3229 (1u << CTAT_LAYERCOL) |
3230 (1u << CTAT_SUBTABLE) |
3233 case PIVOT_AXIS_ROW:
3234 cell->omit_areas |= ((1u << CTAT_TABLE) |
3235 (1u << CTAT_LAYER) |
3236 (1u << CTAT_LAYERROW) |
3237 (1u << CTAT_SUBTABLE) |
3240 case PIVOT_AXIS_LAYER:
3241 cell->omit_areas |= ((1u << CTAT_TABLE) |
3242 (1u << CTAT_LAYER));
3246 if (cat->type == CCT_POSTCOMPUTE)
3247 cell->postcompute = true;
3250 cell->axes[a].cvs[i].category = cat;
3251 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3255 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3256 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3257 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3258 for (size_t i = 0; i < specs->n; i++)
3259 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3260 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3261 cell->areas[at] = ctables_area_insert (cell, at);
3262 hmap_insert (&s->cells, &cell->node, hash);
3267 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3269 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3274 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3275 const struct ctables_category **cats[PIVOT_N_AXES],
3276 bool is_included, double weight[N_CTWS])
3278 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3279 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3281 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3282 const union value *value = case_data (c, specs->var);
3283 bool is_missing = var_is_value_missing (specs->var, value);
3284 bool is_scale_missing
3285 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3287 for (size_t i = 0; i < specs->n; i++)
3288 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3289 is_scale_missing, is_included,
3290 weight[specs->specs[i].weighting]);
3291 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3292 if (!(cell->omit_areas && (1u << at)))
3294 struct ctables_area *a = cell->areas[at];
3296 add_weight (a->total, weight);
3298 add_weight (a->count, weight);
3301 add_weight (a->valid, weight);
3303 if (!is_scale_missing)
3304 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3306 const struct variable *var = s->table->sum_vars[i];
3307 double addend = case_num (c, var);
3308 if (!var_is_num_missing (var, addend))
3309 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3310 a->sums[i].sum[wt] += addend * weight[wt];
3317 recurse_totals (struct ctables_section *s, const struct ccase *c,
3318 const struct ctables_category **cats[PIVOT_N_AXES],
3319 bool is_included, double weight[N_CTWS],
3320 enum pivot_axis_type start_axis, size_t start_nest)
3322 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3324 const struct ctables_nest *nest = s->nests[a];
3325 for (size_t i = start_nest; i < nest->n; i++)
3327 if (i == nest->scale_idx)
3330 const struct variable *var = nest->vars[i];
3332 const struct ctables_category *total = ctables_categories_total (
3333 s->table->categories[var_get_dict_index (var)]);
3336 const struct ctables_category *save = cats[a][i];
3338 ctables_cell_add__ (s, c, cats, is_included, weight);
3339 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3348 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3349 const struct ctables_category **cats[PIVOT_N_AXES],
3350 bool is_included, double weight[N_CTWS],
3351 enum pivot_axis_type start_axis, size_t start_nest)
3353 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3355 const struct ctables_nest *nest = s->nests[a];
3356 for (size_t i = start_nest; i < nest->n; i++)
3358 if (i == nest->scale_idx)
3361 const struct ctables_category *save = cats[a][i];
3364 cats[a][i] = save->subtotal;
3365 ctables_cell_add__ (s, c, cats, is_included, weight);
3366 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3375 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3376 double weight[N_CTWS])
3378 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3379 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3380 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3381 const struct ctables_category **cats[PIVOT_N_AXES] =
3383 [PIVOT_AXIS_LAYER] = layer_cats,
3384 [PIVOT_AXIS_ROW] = row_cats,
3385 [PIVOT_AXIS_COLUMN] = column_cats,
3388 bool is_included = true;
3390 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3392 const struct ctables_nest *nest = s->nests[a];
3393 for (size_t i = 0; i < nest->n; i++)
3394 if (i != nest->scale_idx)
3396 const struct variable *var = nest->vars[i];
3397 const union value *value = case_data (c, var);
3399 cats[a][i] = ctables_categories_match (
3400 s->table->categories[var_get_dict_index (var)], value, var);
3403 if (i != nest->summary_idx)
3406 if (!var_is_value_missing (var, value))
3409 static const struct ctables_category cct_excluded_missing = {
3410 .type = CCT_EXCLUDED_MISSING,
3413 cats[a][i] = &cct_excluded_missing;
3414 is_included = false;
3420 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3422 const struct ctables_nest *nest = s->nests[a];
3423 for (size_t i = 0; i < nest->n; i++)
3424 if (i != nest->scale_idx)
3426 const struct variable *var = nest->vars[i];
3427 const union value *value = case_data (c, var);
3428 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3432 ctables_cell_add__ (s, c, cats, is_included, weight);
3433 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3434 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3437 struct ctables_value
3439 struct hmap_node node;
3444 static struct ctables_value *
3445 ctables_value_find__ (const struct ctables_table *t, const union value *value,
3446 int width, unsigned int hash)
3448 struct ctables_value *clv;
3449 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3450 hash, &t->clabels_values_map)
3451 if (value_equal (value, &clv->value, width))
3457 ctables_value_insert (struct ctables_table *t, const union value *value,
3460 unsigned int hash = value_hash (value, width, 0);
3461 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3464 clv = xmalloc (sizeof *clv);
3465 value_clone (&clv->value, value, width);
3466 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3470 static const struct ctables_value *
3471 ctables_value_find (const struct ctables_cell *cell)
3473 const struct ctables_section *s = cell->section;
3474 const struct ctables_table *t = s->table;
3475 if (!t->clabels_example)
3478 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3479 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3480 const union value *value
3481 = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3482 int width = var_get_width (var);
3483 const struct ctables_value *ctv = ctables_value_find__ (
3484 t, value, width, value_hash (value, width, 0));
3485 assert (ctv != NULL);
3490 compare_ctables_values_3way (const void *a_, const void *b_, const void *width_)
3492 const struct ctables_value *const *ap = a_;
3493 const struct ctables_value *const *bp = b_;
3494 const struct ctables_value *a = *ap;
3495 const struct ctables_value *b = *bp;
3496 const int *width = width_;
3497 return value_compare_3way (&a->value, &b->value, *width);
3501 ctables_sort_clabels_values (struct ctables_table *t)
3503 const struct variable *v0 = t->clabels_example;
3504 int width = var_get_width (v0);
3506 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3509 const struct val_labs *val_labs = var_get_value_labels (v0);
3510 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
3511 vl = val_labs_next (val_labs, vl))
3512 if (ctables_categories_match (c0, &vl->value, v0))
3513 ctables_value_insert (t, &vl->value, width);
3516 size_t n = hmap_count (&t->clabels_values_map);
3517 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
3519 struct ctables_value *clv;
3521 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
3522 t->clabels_values[i++] = clv;
3523 t->n_clabels_values = n;
3526 sort (t->clabels_values, n, sizeof *t->clabels_values,
3527 compare_ctables_values_3way, &width);
3529 for (size_t i = 0; i < n; i++)
3530 t->clabels_values[i]->leaf = i;
3535 const struct dictionary *dict;
3536 struct pivot_table_look *look;
3538 /* For CTEF_* formats. */
3539 struct fmt_settings ctables_formats;
3541 /* If this is NULL, zeros are displayed using the normal print format.
3542 Otherwise, this string is displayed. */
3545 /* If this is NULL, missing values are displayed using the normal print
3546 format. Otherwise, this string is displayed. */
3549 /* Indexed by variable dictionary index. */
3550 enum ctables_vlabel *vlabels;
3552 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
3554 bool mrsets_count_duplicates; /* MRSETS. */
3555 bool smissing_listwise; /* SMISSING. */
3556 struct variable *e_weight; /* WEIGHT. */
3557 int hide_threshold; /* HIDESMALLCOUNTS. */
3559 struct ctables_table **tables;
3564 ctpo_add (double a, double b)
3570 ctpo_sub (double a, double b)
3576 ctpo_mul (double a, double b)
3582 ctpo_div (double a, double b)
3584 return b ? a / b : SYSMIS;
3588 ctpo_pow (double a, double b)
3590 int save_errno = errno;
3592 double result = pow (a, b);
3600 ctpo_neg (double a, double b UNUSED)
3605 struct ctables_pcexpr_evaluate_ctx
3607 const struct ctables_cell *cell;
3608 const struct ctables_section *section;
3609 const struct ctables_categories *cats;
3610 enum pivot_axis_type pc_a;
3613 enum fmt_type parse_format;
3616 static double ctables_pcexpr_evaluate (
3617 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3620 ctables_pcexpr_evaluate_nonterminal (
3621 const struct ctables_pcexpr_evaluate_ctx *ctx,
3622 const struct ctables_pcexpr *e, size_t n_args,
3623 double evaluate (double, double))
3625 double args[2] = { 0, 0 };
3626 for (size_t i = 0; i < n_args; i++)
3628 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3629 if (!isfinite (args[i]) || args[i] == SYSMIS)
3632 return evaluate (args[0], args[1]);
3636 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3637 const struct ctables_cell_value *pc_cv)
3639 const struct ctables_section *s = ctx->section;
3642 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3644 const struct ctables_nest *nest = s->nests[a];
3645 for (size_t i = 0; i < nest->n; i++)
3646 if (i != nest->scale_idx)
3648 const struct ctables_cell_value *cv
3649 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3650 : &ctx->cell->axes[a].cvs[i]);
3651 hash = hash_pointer (cv->category, hash);
3652 if (cv->category->type != CCT_TOTAL
3653 && cv->category->type != CCT_SUBTOTAL
3654 && cv->category->type != CCT_POSTCOMPUTE)
3655 hash = value_hash (&cv->value,
3656 var_get_width (nest->vars[i]), hash);
3660 struct ctables_cell *tc;
3661 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3663 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3665 const struct ctables_nest *nest = s->nests[a];
3666 for (size_t i = 0; i < nest->n; i++)
3667 if (i != nest->scale_idx)
3669 const struct ctables_cell_value *p_cv
3670 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3671 : &ctx->cell->axes[a].cvs[i]);
3672 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3673 if (p_cv->category != t_cv->category
3674 || (p_cv->category->type != CCT_TOTAL
3675 && p_cv->category->type != CCT_SUBTOTAL
3676 && p_cv->category->type != CCT_POSTCOMPUTE
3677 && !value_equal (&p_cv->value,
3679 var_get_width (nest->vars[i]))))
3691 const struct ctables_table *t = s->table;
3692 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3693 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3694 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
3695 &specs->specs[ctx->summary_idx]);
3699 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3700 const struct ctables_pcexpr *e)
3707 case CTPO_CAT_NRANGE:
3708 case CTPO_CAT_SRANGE:
3709 case CTPO_CAT_MISSING:
3710 case CTPO_CAT_OTHERNM:
3712 struct ctables_cell_value cv = {
3713 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3715 assert (cv.category != NULL);
3717 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3718 const struct ctables_occurrence *o;
3721 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3722 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3723 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3725 cv.value = o->value;
3726 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3731 case CTPO_CAT_NUMBER:
3732 case CTPO_CAT_SUBTOTAL:
3733 case CTPO_CAT_TOTAL:
3735 struct ctables_cell_value cv = {
3736 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3737 .value = { .f = e->number },
3739 assert (cv.category != NULL);
3740 return ctables_pcexpr_evaluate_category (ctx, &cv);
3743 case CTPO_CAT_STRING:
3745 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3747 if (width > e->string.length)
3749 s = xmalloc (width);
3750 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3753 const struct ctables_category *category
3754 = ctables_find_category_for_postcompute (
3755 ctx->section->table->ctables->dict,
3756 ctx->cats, ctx->parse_format, e);
3757 assert (category != NULL);
3759 struct ctables_cell_value cv = { .category = category };
3760 if (category->type == CCT_NUMBER)
3761 cv.value.f = category->number;
3762 else if (category->type == CCT_STRING)
3763 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3767 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3773 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3776 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3779 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3782 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3785 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3788 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3794 static const struct ctables_category *
3795 ctables_cell_postcompute (const struct ctables_section *s,
3796 const struct ctables_cell *cell,
3797 enum pivot_axis_type *pc_a_p,
3800 assert (cell->postcompute);
3801 const struct ctables_category *pc_cat = NULL;
3802 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3803 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3805 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3806 if (cv->category->type == CCT_POSTCOMPUTE)
3810 /* Multiple postcomputes cross each other. The value is
3815 pc_cat = cv->category;
3819 *pc_a_idx_p = pc_a_idx;
3823 assert (pc_cat != NULL);
3828 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3829 const struct ctables_cell *cell,
3830 const struct ctables_summary_spec *ss,
3831 struct fmt_spec *format,
3832 bool *is_ctables_format,
3835 enum pivot_axis_type pc_a = 0;
3836 size_t pc_a_idx = 0;
3837 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3838 s, cell, &pc_a, &pc_a_idx);
3842 const struct ctables_postcompute *pc = pc_cat->pc;
3845 for (size_t i = 0; i < pc->specs->n; i++)
3847 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3848 if (ss->function == ss2->function
3849 && ss->weighting == ss2->weighting
3850 && ss->calc_area == ss2->calc_area
3851 && ss->percentile == ss2->percentile)
3853 *format = ss2->format;
3854 *is_ctables_format = ss2->is_ctables_format;
3860 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3861 const struct ctables_categories *cats = s->table->categories[
3862 var_get_dict_index (var)];
3863 struct ctables_pcexpr_evaluate_ctx ctx = {
3868 .pc_a_idx = pc_a_idx,
3869 .summary_idx = summary_idx,
3870 .parse_format = pc_cat->parse_format,
3872 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3875 /* Chi-square test (SIGTEST). */
3876 struct ctables_chisq
3879 bool include_mrsets;
3883 /* Pairwise comparison test (COMPARETEST). */
3884 struct ctables_pairwise
3886 enum { PROP, MEAN } type;
3888 bool include_mrsets;
3889 bool meansvariance_allcats;
3891 enum { BONFERRONI = 1, BH } adjust;
3900 parse_col_width (struct lexer *lexer, const char *name, double *width)
3902 lex_match (lexer, T_EQUALS);
3903 if (lex_match_id (lexer, "DEFAULT"))
3905 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
3907 *width = lex_number (lexer);
3917 parse_bool (struct lexer *lexer, bool *b)
3919 if (lex_match_id (lexer, "NO"))
3921 else if (lex_match_id (lexer, "YES"))
3925 lex_error_expecting (lexer, "YES", "NO");
3932 ctables_chisq_destroy (struct ctables_chisq *chisq)
3938 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
3944 ctables_table_destroy (struct ctables_table *t)
3949 for (size_t i = 0; i < t->n_sections; i++)
3950 ctables_section_uninit (&t->sections[i]);
3953 for (size_t i = 0; i < t->n_categories; i++)
3954 ctables_categories_unref (t->categories[i]);
3955 free (t->categories);
3957 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3959 ctables_axis_destroy (t->axes[a]);
3960 ctables_stack_uninit (&t->stacks[a]);
3962 free (t->summary_specs.specs);
3964 struct ctables_value *ctv, *next_ctv;
3965 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
3966 &t->clabels_values_map)
3968 value_destroy (&ctv->value, var_get_width (t->clabels_example));
3969 hmap_delete (&t->clabels_values_map, &ctv->node);
3972 hmap_destroy (&t->clabels_values_map);
3973 free (t->clabels_values);
3979 ctables_chisq_destroy (t->chisq);
3980 ctables_pairwise_destroy (t->pairwise);
3985 ctables_destroy (struct ctables *ct)
3990 struct ctables_postcompute *pc, *next_pc;
3991 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
3995 msg_location_destroy (pc->location);
3996 ctables_pcexpr_destroy (pc->expr);
4000 ctables_summary_spec_set_uninit (pc->specs);
4003 hmap_delete (&ct->postcomputes, &pc->hmap_node);
4006 hmap_destroy (&ct->postcomputes);
4008 fmt_settings_uninit (&ct->ctables_formats);
4009 pivot_table_look_unref (ct->look);
4013 for (size_t i = 0; i < ct->n_tables; i++)
4014 ctables_table_destroy (ct->tables[i]);
4020 all_strings (struct variable **vars, size_t n_vars,
4021 const struct ctables_category *cat)
4023 for (size_t j = 0; j < n_vars; j++)
4024 if (var_is_numeric (vars[j]))
4026 msg_at (SE, cat->location,
4027 _("This category specification may be applied only to string "
4028 "variables, but this subcommand tries to apply it to "
4029 "numeric variable %s."),
4030 var_get_name (vars[j]));
4037 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
4038 struct ctables *ct, struct ctables_table *t)
4040 if (!lex_force_match_id (lexer, "VARIABLES"))
4042 lex_match (lexer, T_EQUALS);
4044 struct variable **vars;
4046 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
4049 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
4050 for (size_t i = 1; i < n_vars; i++)
4052 const struct fmt_spec *f = var_get_print_format (vars[i]);
4053 if (f->type != common_format->type)
4055 common_format = NULL;
4061 && (fmt_get_category (common_format->type)
4062 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
4064 struct ctables_categories *c = xmalloc (sizeof *c);
4065 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
4066 for (size_t i = 0; i < n_vars; i++)
4068 struct ctables_categories **cp
4069 = &t->categories[var_get_dict_index (vars[i])];
4070 ctables_categories_unref (*cp);
4074 size_t allocated_cats = 0;
4075 int cats_start_ofs = -1;
4076 int cats_end_ofs = -1;
4077 if (lex_match (lexer, T_LBRACK))
4079 cats_start_ofs = lex_ofs (lexer);
4082 if (c->n_cats >= allocated_cats)
4083 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4085 int start_ofs = lex_ofs (lexer);
4086 struct ctables_category *cat = &c->cats[c->n_cats];
4087 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
4089 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4092 lex_match (lexer, T_COMMA);
4094 while (!lex_match (lexer, T_RBRACK));
4095 cats_end_ofs = lex_ofs (lexer) - 1;
4098 struct ctables_category cat = {
4100 .include_missing = false,
4101 .sort_ascending = true,
4103 bool show_totals = false;
4104 char *total_label = NULL;
4105 bool totals_before = false;
4106 int key_start_ofs = 0;
4107 int key_end_ofs = 0;
4108 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4110 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
4112 lex_match (lexer, T_EQUALS);
4113 if (lex_match_id (lexer, "A"))
4114 cat.sort_ascending = true;
4115 else if (lex_match_id (lexer, "D"))
4116 cat.sort_ascending = false;
4119 lex_error_expecting (lexer, "A", "D");
4123 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
4125 key_start_ofs = lex_ofs (lexer) - 1;
4126 lex_match (lexer, T_EQUALS);
4127 if (lex_match_id (lexer, "VALUE"))
4128 cat.type = CCT_VALUE;
4129 else if (lex_match_id (lexer, "LABEL"))
4130 cat.type = CCT_LABEL;
4133 cat.type = CCT_FUNCTION;
4134 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
4135 &cat.weighting, &cat.area))
4138 if (lex_match (lexer, T_LPAREN))
4140 cat.sort_var = parse_variable (lexer, dict);
4144 if (cat.sort_function == CTSF_PTILE)
4146 lex_match (lexer, T_COMMA);
4147 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4149 cat.percentile = lex_number (lexer);
4153 if (!lex_force_match (lexer, T_RPAREN))
4156 else if (ctables_function_availability (cat.sort_function)
4159 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
4163 key_end_ofs = lex_ofs (lexer) - 1;
4165 if (cat.type == CCT_FUNCTION)
4167 lex_ofs_error (lexer, key_start_ofs, key_end_ofs,
4168 _("Data-dependent sorting is not implemented."));
4172 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
4174 lex_match (lexer, T_EQUALS);
4175 if (lex_match_id (lexer, "INCLUDE"))
4176 cat.include_missing = true;
4177 else if (lex_match_id (lexer, "EXCLUDE"))
4178 cat.include_missing = false;
4181 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4185 else if (lex_match_id (lexer, "TOTAL"))
4187 lex_match (lexer, T_EQUALS);
4188 if (!parse_bool (lexer, &show_totals))
4191 else if (lex_match_id (lexer, "LABEL"))
4193 lex_match (lexer, T_EQUALS);
4194 if (!lex_force_string (lexer))
4197 total_label = ss_xstrdup (lex_tokss (lexer));
4200 else if (lex_match_id (lexer, "POSITION"))
4202 lex_match (lexer, T_EQUALS);
4203 if (lex_match_id (lexer, "BEFORE"))
4204 totals_before = true;
4205 else if (lex_match_id (lexer, "AFTER"))
4206 totals_before = false;
4209 lex_error_expecting (lexer, "BEFORE", "AFTER");
4213 else if (lex_match_id (lexer, "EMPTY"))
4215 lex_match (lexer, T_EQUALS);
4216 if (lex_match_id (lexer, "INCLUDE"))
4217 c->show_empty = true;
4218 else if (lex_match_id (lexer, "EXCLUDE"))
4219 c->show_empty = false;
4222 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4229 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
4230 "TOTAL", "LABEL", "POSITION", "EMPTY");
4232 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
4240 cat.location = lex_ofs_location (lexer, key_start_ofs, key_end_ofs);
4242 if (c->n_cats >= allocated_cats)
4243 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4244 c->cats[c->n_cats++] = cat;
4249 if (c->n_cats >= allocated_cats)
4250 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4252 struct ctables_category *totals;
4255 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
4256 totals = &c->cats[0];
4259 totals = &c->cats[c->n_cats];
4262 *totals = (struct ctables_category) {
4264 .total_label = total_label ? total_label : xstrdup (_("Total")),
4268 struct ctables_category *subtotal = NULL;
4269 for (size_t i = totals_before ? 0 : c->n_cats;
4270 totals_before ? i < c->n_cats : i-- > 0;
4271 totals_before ? i++ : 0)
4273 struct ctables_category *cat = &c->cats[i];
4282 cat->subtotal = subtotal;
4285 case CCT_POSTCOMPUTE:
4296 case CCT_EXCLUDED_MISSING:
4301 if (cats_start_ofs != -1)
4303 for (size_t i = 0; i < c->n_cats; i++)
4305 struct ctables_category *cat = &c->cats[i];
4308 case CCT_POSTCOMPUTE:
4309 cat->parse_format = parse_strings ? common_format->type : FMT_F;
4310 struct msg_location *cats_location
4311 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
4312 bool ok = ctables_recursive_check_postcompute (
4313 dict, cat->pc->expr, cat, c, cats_location);
4314 msg_location_destroy (cats_location);
4321 for (size_t j = 0; j < n_vars; j++)
4322 if (var_is_alpha (vars[j]))
4324 msg_at (SE, cat->location,
4325 _("This category specification may be applied "
4326 "only to numeric variables, but this "
4327 "subcommand tries to apply it to string "
4329 var_get_name (vars[j]));
4338 if (!parse_category_string (cat->location, cat->string, dict,
4339 common_format->type, &n))
4342 ss_dealloc (&cat->string);
4344 cat->type = CCT_NUMBER;
4347 else if (!all_strings (vars, n_vars, cat))
4356 if (!cat->srange[0].string)
4358 else if (!parse_category_string (cat->location,
4359 cat->srange[0], dict,
4360 common_format->type, &n[0]))
4363 if (!cat->srange[1].string)
4365 else if (!parse_category_string (cat->location,
4366 cat->srange[1], dict,
4367 common_format->type, &n[1]))
4370 ss_dealloc (&cat->srange[0]);
4371 ss_dealloc (&cat->srange[1]);
4373 cat->type = CCT_NRANGE;
4374 cat->nrange[0] = n[0];
4375 cat->nrange[1] = n[1];
4377 else if (!all_strings (vars, n_vars, cat))
4388 case CCT_EXCLUDED_MISSING:
4405 const struct ctables_summary_spec_set *set;
4410 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4412 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4413 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4414 if (as->function != bs->function)
4415 return as->function > bs->function ? 1 : -1;
4416 else if (as->weighting != bs->weighting)
4417 return as->weighting > bs->weighting ? 1 : -1;
4418 else if (as->calc_area != bs->calc_area)
4419 return as->calc_area > bs->calc_area ? 1 : -1;
4420 else if (as->percentile != bs->percentile)
4421 return as->percentile < bs->percentile ? 1 : -1;
4423 const char *as_label = as->label ? as->label : "";
4424 const char *bs_label = bs->label ? bs->label : "";
4425 return strcmp (as_label, bs_label);
4429 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4430 size_t ix[PIVOT_N_AXES])
4432 if (a < PIVOT_N_AXES)
4434 size_t limit = MAX (t->stacks[a].n, 1);
4435 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4436 ctables_table_add_section (t, a + 1, ix);
4440 struct ctables_section *s = &t->sections[t->n_sections++];
4441 *s = (struct ctables_section) {
4443 .cells = HMAP_INITIALIZER (s->cells),
4445 for (a = 0; a < PIVOT_N_AXES; a++)
4448 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4450 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4451 for (size_t i = 0; i < nest->n; i++)
4452 hmap_init (&s->occurrences[a][i]);
4454 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4455 hmap_init (&s->areas[at]);
4460 ctables_format (double d, const struct fmt_spec *format,
4461 const struct fmt_settings *settings)
4463 const union value v = { .f = d };
4464 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4466 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4467 produce the results we want for negative numbers, putting the negative
4468 sign in the wrong spot, before the prefix instead of after it. We can't,
4469 in fact, produce the desired results using a custom-currency
4470 specification. Instead, we postprocess the output, moving the negative
4473 NEQUAL: "-N=3" => "N=-3"
4474 PAREN: "-(3)" => "(-3)"
4475 PCTPAREN: "-(3%)" => "(-3%)"
4477 This transformation doesn't affect NEGPAREN. */
4478 char *minus_src = strchr (s, '-');
4479 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4481 char *n_equals = strstr (s, "N=");
4482 char *lparen = strchr (s, '(');
4483 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4485 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4491 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4493 for (size_t i = 0; i < t->stacks[a].n; i++)
4495 struct ctables_nest *nest = &t->stacks[a].nests[i];
4496 if (nest->n != 1 || nest->scale_idx != 0)
4499 enum ctables_vlabel vlabel
4500 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4501 if (vlabel != CTVL_NONE)
4508 compare_ints_3way (int a, int b)
4510 return a < b ? -1 : a > b;
4514 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
4515 const void *aux UNUSED)
4517 struct ctables_cell *const *ap = a_;
4518 struct ctables_cell *const *bp = b_;
4519 const struct ctables_cell *a = *ap;
4520 const struct ctables_cell *b = *bp;
4528 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
4530 int cmp = compare_ints_3way (a->axes[axis].leaf, b->axes[axis].leaf);
4535 const struct ctables_value *a_ctv = ctables_value_find (a);
4536 const struct ctables_value *b_ctv = ctables_value_find (b);
4539 int cmp = compare_ints_3way (a_ctv->leaf, b_ctv->leaf);
4544 assert (!a_ctv && !b_ctv);
4549 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4551 struct pivot_table *pt = pivot_table_create__ (
4553 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4554 : pivot_value_new_text (N_("Custom Tables"))),
4557 pivot_table_set_caption (
4558 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4560 pivot_table_set_corner_text (
4561 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4563 bool summary_dimension = (t->summary_axis != t->slabels_axis
4564 || (!t->slabels_visible
4565 && t->summary_specs.n > 1));
4566 if (summary_dimension)
4568 struct pivot_dimension *d = pivot_dimension_create (
4569 pt, t->slabels_axis, N_("Statistics"));
4570 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4571 if (!t->slabels_visible)
4572 d->hide_all_labels = true;
4573 for (size_t i = 0; i < specs->n; i++)
4574 pivot_category_create_leaf (
4575 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4578 bool categories_dimension = t->clabels_example != NULL;
4579 if (categories_dimension)
4581 struct pivot_dimension *d = pivot_dimension_create (
4582 pt, t->label_axis[t->clabels_from_axis],
4583 t->clabels_from_axis == PIVOT_AXIS_ROW
4584 ? N_("Row Categories")
4585 : N_("Column Categories"));
4586 const struct variable *var = t->clabels_example;
4587 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4588 for (size_t i = 0; i < t->n_clabels_values; i++)
4590 const struct ctables_value *value = t->clabels_values[i];
4591 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4592 assert (cat != NULL);
4593 pivot_category_create_leaf (
4594 d->root, ctables_category_create_value_label (c, cat,
4600 pivot_table_set_look (pt, ct->look);
4601 struct pivot_dimension *d[PIVOT_N_AXES];
4602 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4604 static const char *names[] = {
4605 [PIVOT_AXIS_ROW] = N_("Rows"),
4606 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4607 [PIVOT_AXIS_LAYER] = N_("Layers"),
4609 d[a] = (t->axes[a] || a == t->summary_axis
4610 ? pivot_dimension_create (pt, a, names[a])
4615 assert (t->axes[a]);
4617 for (size_t i = 0; i < t->stacks[a].n; i++)
4619 struct ctables_nest *nest = &t->stacks[a].nests[i];
4620 struct ctables_section **sections = xnmalloc (t->n_sections,
4622 size_t n_sections = 0;
4624 size_t n_total_cells = 0;
4625 size_t max_depth = 0;
4626 for (size_t j = 0; j < t->n_sections; j++)
4627 if (t->sections[j].nests[a] == nest)
4629 struct ctables_section *s = &t->sections[j];
4630 sections[n_sections++] = s;
4631 n_total_cells += hmap_count (&s->cells);
4633 size_t depth = s->nests[a]->n;
4634 max_depth = MAX (depth, max_depth);
4637 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4639 size_t n_sorted = 0;
4641 for (size_t j = 0; j < n_sections; j++)
4643 struct ctables_section *s = sections[j];
4645 struct ctables_cell *cell;
4646 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4648 sorted[n_sorted++] = cell;
4649 assert (n_sorted <= n_total_cells);
4652 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4653 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4655 struct ctables_level
4657 enum ctables_level_type
4659 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4660 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4661 CTL_SUMMARY, /* Summary functions. */
4665 enum settings_value_show vlabel; /* CTL_VAR only. */
4668 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4669 size_t n_levels = 0;
4670 for (size_t k = 0; k < nest->n; k++)
4672 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4673 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4675 if (vlabel != CTVL_NONE)
4677 levels[n_levels++] = (struct ctables_level) {
4679 .vlabel = (enum settings_value_show) vlabel,
4684 if (nest->scale_idx != k
4685 && (k != nest->n - 1 || t->label_axis[a] == a))
4687 levels[n_levels++] = (struct ctables_level) {
4688 .type = CTL_CATEGORY,
4694 if (!summary_dimension && a == t->slabels_axis)
4696 levels[n_levels++] = (struct ctables_level) {
4697 .type = CTL_SUMMARY,
4698 .var_idx = SIZE_MAX,
4702 /* Pivot categories:
4704 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4705 - category for nest->vars[0], if nest->scale_idx != 0
4706 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4707 - category for nest->vars[1], if nest->scale_idx != 1
4709 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4710 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4711 - summary function, if 'a == t->slabels_axis && a ==
4714 Additional dimensions:
4716 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4718 - If 't->label_axis[b] == a' for some 'b != a', add a category
4723 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4725 for (size_t j = 0; j < n_sorted; j++)
4727 struct ctables_cell *cell = sorted[j];
4728 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4730 size_t n_common = 0;
4733 for (; n_common < n_levels; n_common++)
4735 const struct ctables_level *level = &levels[n_common];
4736 if (level->type == CTL_CATEGORY)
4738 size_t var_idx = level->var_idx;
4739 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4740 if (prev->axes[a].cvs[var_idx].category != c)
4742 else if (c->type != CCT_SUBTOTAL
4743 && c->type != CCT_TOTAL
4744 && c->type != CCT_POSTCOMPUTE
4745 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4746 &cell->axes[a].cvs[var_idx].value,
4747 var_get_type (nest->vars[var_idx])))
4753 for (size_t k = n_common; k < n_levels; k++)
4755 const struct ctables_level *level = &levels[k];
4756 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4757 if (level->type == CTL_SUMMARY)
4759 assert (k == n_levels - 1);
4761 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4762 for (size_t m = 0; m < specs->n; m++)
4764 int leaf = pivot_category_create_leaf (
4765 parent, ctables_summary_label (&specs->specs[m],
4773 const struct variable *var = nest->vars[level->var_idx];
4774 struct pivot_value *label;
4775 if (level->type == CTL_VAR)
4777 label = pivot_value_new_variable (var);
4778 label->variable.show = level->vlabel;
4780 else if (level->type == CTL_CATEGORY)
4782 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4783 label = ctables_category_create_value_label (
4784 t->categories[var_get_dict_index (var)],
4785 cv->category, var, &cv->value);
4790 if (k == n_levels - 1)
4791 prev_leaf = pivot_category_create_leaf (parent, label);
4793 groups[k] = pivot_category_create_group__ (parent, label);
4797 cell->axes[a].leaf = prev_leaf;
4806 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4810 size_t n_total_cells = 0;
4811 for (size_t j = 0; j < t->n_sections; j++)
4812 n_total_cells += hmap_count (&t->sections[j].cells);
4814 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4815 size_t n_sorted = 0;
4816 for (size_t j = 0; j < t->n_sections; j++)
4818 const struct ctables_section *s = &t->sections[j];
4819 struct ctables_cell *cell;
4820 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4822 sorted[n_sorted++] = cell;
4824 assert (n_sorted <= n_total_cells);
4825 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4827 size_t ids[N_CTATS];
4828 memset (ids, 0, sizeof ids);
4829 for (size_t j = 0; j < n_sorted; j++)
4831 struct ctables_cell *cell = sorted[j];
4832 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4834 struct ctables_area *area = cell->areas[at];
4835 if (!area->sequence)
4836 area->sequence = ++ids[at];
4843 for (size_t i = 0; i < t->n_sections; i++)
4845 struct ctables_section *s = &t->sections[i];
4847 struct ctables_cell *cell;
4848 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4853 const struct ctables_value *ctv = ctables_value_find (cell);
4854 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4855 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4856 for (size_t j = 0; j < specs->n; j++)
4859 size_t n_dindexes = 0;
4861 if (summary_dimension)
4862 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4865 dindexes[n_dindexes++] = ctv->leaf;
4867 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4870 int leaf = cell->axes[a].leaf;
4871 if (a == t->summary_axis && !summary_dimension)
4873 dindexes[n_dindexes++] = leaf;
4876 const struct ctables_summary_spec *ss = &specs->specs[j];
4878 struct fmt_spec format = specs->specs[j].format;
4879 bool is_ctables_format = ss->is_ctables_format;
4880 double d = (cell->postcompute
4881 ? ctables_cell_calculate_postcompute (
4882 s, cell, ss, &format, &is_ctables_format, j)
4883 : ctables_summary_value (cell->areas,
4884 &cell->summaries[j], ss));
4886 struct pivot_value *value;
4887 if (ct->hide_threshold != 0
4888 && d < ct->hide_threshold
4889 && ss->function == CTSF_COUNT)
4891 value = pivot_value_new_user_text_nocopy (
4892 xasprintf ("<%d", ct->hide_threshold));
4894 else if (d == 0 && ct->zero)
4895 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4896 else if (d == SYSMIS && ct->missing)
4897 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4898 else if (is_ctables_format)
4899 value = pivot_value_new_user_text_nocopy (
4900 ctables_format (d, &format, &ct->ctables_formats));
4903 value = pivot_value_new_number (d);
4904 value->numeric.format = format;
4906 /* XXX should text values be right-justified? */
4907 pivot_table_put (pt, dindexes, n_dindexes, value);
4912 pivot_table_submit (pt);
4916 ctables_check_label_position (struct ctables_table *t, struct lexer *lexer,
4917 enum pivot_axis_type a)
4919 enum pivot_axis_type label_pos = t->label_axis[a];
4923 const struct ctables_stack *stack = &t->stacks[a];
4927 const struct ctables_nest *n0 = &stack->nests[0];
4930 assert (stack->n == 1);
4934 const struct variable *v0 = n0->vars[n0->n - 1];
4935 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4936 t->clabels_example = v0;
4938 for (size_t i = 0; i < c0->n_cats; i++)
4939 if (c0->cats[i].type == CCT_FUNCTION)
4941 msg (SE, _("Category labels may not be moved to another axis when "
4942 "sorting by a summary function."));
4943 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4944 _("This syntax moves category labels to another axis."));
4945 msg_at (SN, c0->cats[i].location,
4946 _("This syntax requests sorting by a summary function."));
4950 for (size_t i = 0; i < stack->n; i++)
4952 const struct ctables_nest *ni = &stack->nests[i];
4954 const struct variable *vi = ni->vars[ni->n - 1];
4955 if (n0->n - 1 == ni->scale_idx)
4957 msg (SE, _("To move category labels from one axis to another, "
4958 "the variables whose labels are to be moved must be "
4959 "categorical, but %s is scale."), var_get_name (vi));
4960 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4961 _("This syntax moves category labels to another axis."));
4966 for (size_t i = 1; i < stack->n; i++)
4968 const struct ctables_nest *ni = &stack->nests[i];
4970 const struct variable *vi = ni->vars[ni->n - 1];
4971 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4973 if (var_get_width (v0) != var_get_width (vi))
4975 msg (SE, _("To move category labels from one axis to another, "
4976 "the variables whose labels are to be moved must all "
4977 "have the same width, but %s has width %d and %s has "
4979 var_get_name (v0), var_get_width (v0),
4980 var_get_name (vi), var_get_width (vi));
4981 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4982 _("This syntax moves category labels to another axis."));
4985 if (!val_labs_equal (var_get_value_labels (v0),
4986 var_get_value_labels (vi)))
4988 msg (SE, _("To move category labels from one axis to another, "
4989 "the variables whose labels are to be moved must all "
4990 "have the same value labels, but %s and %s have "
4991 "different value labels."),
4992 var_get_name (v0), var_get_name (vi));
4993 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4994 _("This syntax moves category labels to another axis."));
4997 if (!ctables_categories_equal (c0, ci))
4999 msg (SE, _("To move category labels from one axis to another, "
5000 "the variables whose labels are to be moved must all "
5001 "have the same category specifications, but %s and %s "
5002 "have different category specifications."),
5003 var_get_name (v0), var_get_name (vi));
5004 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
5005 _("This syntax moves category labels to another axis."));
5014 add_sum_var (struct variable *var,
5015 struct variable ***sum_vars, size_t *n, size_t *allocated)
5017 for (size_t i = 0; i < *n; i++)
5018 if (var == (*sum_vars)[i])
5021 if (*n >= *allocated)
5022 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
5023 (*sum_vars)[*n] = var;
5027 static enum ctables_area_type
5028 rotate_area (enum ctables_area_type area)
5039 return CTAT_LAYERCOL;
5042 return CTAT_LAYERROW;
5055 enumerate_sum_vars (const struct ctables_axis *a,
5056 struct variable ***sum_vars, size_t *n, size_t *allocated)
5064 for (size_t i = 0; i < N_CSVS; i++)
5065 for (size_t j = 0; j < a->specs[i].n; j++)
5067 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
5068 if (spec->function == CTSF_areaPCT_SUM)
5069 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
5075 for (size_t i = 0; i < 2; i++)
5076 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
5082 ctables_prepare_table (struct ctables_table *t, struct lexer *lexer)
5084 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5087 t->stacks[a] = enumerate_fts (a, t->axes[a]);
5089 for (size_t j = 0; j < t->stacks[a].n; j++)
5091 struct ctables_nest *nest = &t->stacks[a].nests[j];
5092 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5094 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
5095 nest->n_areas[at] = 0;
5097 enum pivot_axis_type ata, atb;
5098 if (at == CTAT_ROW || at == CTAT_LAYERROW)
5100 ata = PIVOT_AXIS_ROW;
5101 atb = PIVOT_AXIS_COLUMN;
5103 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
5105 ata = PIVOT_AXIS_COLUMN;
5106 atb = PIVOT_AXIS_ROW;
5109 if (at == CTAT_LAYER
5110 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5111 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5112 ? a == atb && t->label_axis[a] != a
5115 for (size_t k = nest->n - 1; k < nest->n; k--)
5116 if (k != nest->scale_idx)
5118 nest->areas[at][nest->n_areas[at]++] = k;
5124 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5125 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5126 : at == CTAT_TABLE ? true
5130 for (size_t k = 0; k < nest->n; k++)
5131 if (k != nest->scale_idx)
5132 nest->areas[at][nest->n_areas[at]++] = k;
5138 #define L PIVOT_AXIS_LAYER
5139 n_drop = (t->clabels_from_axis == L ? a != L
5140 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5141 : t->clabels_from_axis == a ? 2
5148 n_drop = a == ata && t->label_axis[ata] == atb;
5153 n_drop = (a == ata ? t->label_axis[ata] == atb
5155 : t->clabels_from_axis == atb ? -1
5156 : t->clabels_to_axis != atb ? 1
5168 size_t n = nest->n_areas[at];
5171 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5172 nest->n_areas[at]--;
5177 for (int i = 0; i < n_drop; i++)
5178 if (nest->n_areas[at] > 0)
5179 nest->n_areas[at]--;
5186 struct ctables_nest *nest = xmalloc (sizeof *nest);
5187 *nest = (struct ctables_nest) {
5189 .scale_idx = SIZE_MAX,
5190 .summary_idx = SIZE_MAX
5192 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5194 /* There's no point in moving labels away from an axis that has no
5195 labels, so avoid dealing with the special cases around that. */
5196 t->label_axis[a] = a;
5199 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5200 for (size_t i = 0; i < stack->n; i++)
5202 struct ctables_nest *nest = &stack->nests[i];
5203 if (!nest->specs[CSV_CELL].n)
5205 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5206 ss->specs = xmalloc (sizeof *ss->specs);
5209 enum ctables_summary_function function
5210 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5214 nest->summary_idx = nest->n - 1;
5215 ss->var = nest->vars[nest->summary_idx];
5217 *ss->specs = (struct ctables_summary_spec) {
5218 .function = function,
5219 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5220 .format = ctables_summary_default_format (function, ss->var),
5223 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5224 &nest->specs[CSV_CELL]);
5226 else if (!nest->specs[CSV_TOTAL].n)
5227 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5228 &nest->specs[CSV_CELL]);
5230 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5231 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5233 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5234 for (size_t i = 0; i < nest->specs[sv].n; i++)
5236 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5237 const struct ctables_function_info *cfi =
5238 &ctables_function_info[ss->function];
5240 ss->calc_area = rotate_area (ss->calc_area);
5244 if (t->ctables->smissing_listwise)
5246 struct variable **listwise_vars = NULL;
5248 size_t allocated = 0;
5250 for (size_t j = nest->group_head; j < stack->n; j++)
5252 const struct ctables_nest *other_nest = &stack->nests[j];
5253 if (other_nest->group_head != nest->group_head)
5256 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5259 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5260 sizeof *listwise_vars);
5261 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5264 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5267 listwise_vars = xmemdup (listwise_vars,
5268 n * sizeof *listwise_vars);
5269 nest->specs[sv].listwise_vars = listwise_vars;
5270 nest->specs[sv].n_listwise_vars = n;
5275 struct ctables_summary_spec_set *merged = &t->summary_specs;
5276 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5278 for (size_t j = 0; j < stack->n; j++)
5280 const struct ctables_nest *nest = &stack->nests[j];
5282 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5283 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5288 struct merge_item min = items[0];
5289 for (size_t j = 1; j < n_left; j++)
5290 if (merge_item_compare_3way (&items[j], &min) < 0)
5293 if (merged->n >= merged->allocated)
5294 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5295 sizeof *merged->specs);
5296 merged->specs[merged->n++] = min.set->specs[min.ofs];
5298 for (size_t j = 0; j < n_left; )
5300 if (merge_item_compare_3way (&items[j], &min) == 0)
5302 struct merge_item *item = &items[j];
5303 item->set->specs[item->ofs++].axis_idx = merged->n - 1;
5304 if (item->ofs >= item->set->n)
5306 items[j] = items[--n_left];
5315 size_t allocated_sum_vars = 0;
5316 enumerate_sum_vars (t->axes[t->summary_axis],
5317 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5319 return (ctables_check_label_position (t, lexer, PIVOT_AXIS_ROW)
5320 && ctables_check_label_position (t, lexer, PIVOT_AXIS_COLUMN));
5324 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5325 enum pivot_axis_type a)
5327 struct ctables_stack *stack = &t->stacks[a];
5328 for (size_t i = 0; i < stack->n; i++)
5330 const struct ctables_nest *nest = &stack->nests[i];
5331 const struct variable *var = nest->vars[nest->n - 1];
5332 const union value *value = case_data (c, var);
5334 if (var_is_numeric (var) && value->f == SYSMIS)
5337 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5339 ctables_value_insert (t, value, var_get_width (var));
5344 ctables_add_category_occurrences (const struct variable *var,
5345 struct hmap *occurrences,
5346 const struct ctables_categories *cats)
5348 const struct val_labs *val_labs = var_get_value_labels (var);
5350 for (size_t i = 0; i < cats->n_cats; i++)
5352 const struct ctables_category *c = &cats->cats[i];
5356 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5362 int width = var_get_width (var);
5364 value_init (&value, width);
5365 value_copy_buf_rpad (&value, width,
5366 CHAR_CAST (uint8_t *, c->string.string),
5367 c->string.length, ' ');
5368 ctables_add_occurrence (var, &value, occurrences);
5369 value_destroy (&value, width);
5374 assert (var_is_numeric (var));
5375 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5376 vl = val_labs_next (val_labs, vl))
5377 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5378 ctables_add_occurrence (var, &vl->value, occurrences);
5382 assert (var_is_alpha (var));
5383 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5384 vl = val_labs_next (val_labs, vl))
5385 if (in_string_range (&vl->value, var, c->srange))
5386 ctables_add_occurrence (var, &vl->value, occurrences);
5390 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5391 vl = val_labs_next (val_labs, vl))
5392 if (var_is_value_missing (var, &vl->value))
5393 ctables_add_occurrence (var, &vl->value, occurrences);
5397 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5398 vl = val_labs_next (val_labs, vl))
5399 ctables_add_occurrence (var, &vl->value, occurrences);
5402 case CCT_POSTCOMPUTE:
5412 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5413 vl = val_labs_next (val_labs, vl))
5414 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5415 ctables_add_occurrence (var, &vl->value, occurrences);
5418 case CCT_EXCLUDED_MISSING:
5425 ctables_section_recurse_add_empty_categories (
5426 struct ctables_section *s,
5427 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5428 enum pivot_axis_type a, size_t a_idx)
5430 if (a >= PIVOT_N_AXES)
5431 ctables_cell_insert__ (s, c, cats);
5432 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5433 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5436 const struct variable *var = s->nests[a]->vars[a_idx];
5437 const struct ctables_categories *categories = s->table->categories[
5438 var_get_dict_index (var)];
5439 int width = var_get_width (var);
5440 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5441 const struct ctables_occurrence *o;
5442 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5444 union value *value = case_data_rw (c, var);
5445 value_destroy (value, width);
5446 value_clone (value, &o->value, width);
5447 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5448 assert (cats[a][a_idx] != NULL);
5449 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5452 for (size_t i = 0; i < categories->n_cats; i++)
5454 const struct ctables_category *cat = &categories->cats[i];
5455 if (cat->type == CCT_POSTCOMPUTE)
5457 cats[a][a_idx] = cat;
5458 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5465 ctables_section_add_empty_categories (struct ctables_section *s)
5467 bool show_empty = false;
5468 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5470 for (size_t k = 0; k < s->nests[a]->n; k++)
5471 if (k != s->nests[a]->scale_idx)
5473 const struct variable *var = s->nests[a]->vars[k];
5474 const struct ctables_categories *cats = s->table->categories[
5475 var_get_dict_index (var)];
5476 if (cats->show_empty)
5479 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5485 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5486 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5487 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5488 const struct ctables_category **cats[PIVOT_N_AXES] =
5490 [PIVOT_AXIS_LAYER] = layer_cats,
5491 [PIVOT_AXIS_ROW] = row_cats,
5492 [PIVOT_AXIS_COLUMN] = column_cats,
5494 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5495 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5500 ctables_section_clear (struct ctables_section *s)
5502 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5504 const struct ctables_nest *nest = s->nests[a];
5505 for (size_t i = 0; i < nest->n; i++)
5506 if (i != nest->scale_idx)
5508 const struct variable *var = nest->vars[i];
5509 int width = var_get_width (var);
5510 struct ctables_occurrence *o, *next;
5511 struct hmap *map = &s->occurrences[a][i];
5512 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5514 value_destroy (&o->value, width);
5515 hmap_delete (map, &o->node);
5522 struct ctables_cell *cell, *next_cell;
5523 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5525 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5527 const struct ctables_nest *nest = s->nests[a];
5528 for (size_t i = 0; i < nest->n; i++)
5529 if (i != nest->scale_idx)
5530 value_destroy (&cell->axes[a].cvs[i].value,
5531 var_get_width (nest->vars[i]));
5532 free (cell->axes[a].cvs);
5535 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5536 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5537 for (size_t i = 0; i < specs->n; i++)
5538 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5539 free (cell->summaries);
5541 hmap_delete (&s->cells, &cell->node);
5544 hmap_shrink (&s->cells);
5546 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5548 struct ctables_area *area, *next_area;
5549 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5553 hmap_delete (&s->areas[at], &area->node);
5556 hmap_shrink (&s->areas[at]);
5561 ctables_section_uninit (struct ctables_section *s)
5563 ctables_section_clear (s);
5565 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5567 struct ctables_nest *nest = s->nests[a];
5568 for (size_t i = 0; i < nest->n; i++)
5569 hmap_destroy (&s->occurrences[a][i]);
5570 free (s->occurrences[a]);
5573 hmap_destroy (&s->cells);
5574 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5575 hmap_destroy (&s->areas[at]);
5579 ctables_table_clear (struct ctables_table *t)
5581 for (size_t i = 0; i < t->n_sections; i++)
5582 ctables_section_clear (&t->sections[i]);
5584 if (t->clabels_example)
5586 int width = var_get_width (t->clabels_example);
5587 struct ctables_value *value, *next_value;
5588 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5589 &t->clabels_values_map)
5591 value_destroy (&value->value, width);
5592 hmap_delete (&t->clabels_values_map, &value->node);
5595 hmap_shrink (&t->clabels_values_map);
5597 free (t->clabels_values);
5598 t->clabels_values = NULL;
5599 t->n_clabels_values = 0;
5604 ctables_execute (struct dataset *ds, struct casereader *input,
5607 for (size_t i = 0; i < ct->n_tables; i++)
5609 struct ctables_table *t = ct->tables[i];
5610 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5611 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5612 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5613 sizeof *t->sections);
5614 size_t ix[PIVOT_N_AXES];
5615 ctables_table_add_section (t, 0, ix);
5618 struct dictionary *dict = dataset_dict (ds);
5620 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5621 struct casegrouper *grouper
5623 ? casegrouper_create_splits (input, dict)
5624 : casegrouper_create_vars (input, NULL, 0));
5625 struct casereader *group;
5626 while (casegrouper_get_next_group (grouper, &group))
5630 struct ccase *c = casereader_peek (group, 0);
5633 output_split_file_values (ds, c);
5638 bool warn_on_invalid = true;
5639 for (struct ccase *c = casereader_read (group); c;
5640 case_unref (c), c = casereader_read (group))
5642 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5643 double e_weight = (ct->e_weight
5644 ? var_force_valid_weight (ct->e_weight,
5645 case_num (c, ct->e_weight),
5649 [CTW_DICTIONARY] = d_weight,
5650 [CTW_EFFECTIVE] = e_weight,
5651 [CTW_UNWEIGHTED] = 1.0,
5654 for (size_t i = 0; i < ct->n_tables; i++)
5656 struct ctables_table *t = ct->tables[i];
5658 for (size_t j = 0; j < t->n_sections; j++)
5659 ctables_cell_insert (&t->sections[j], c, weight);
5661 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5662 if (t->label_axis[a] != a)
5663 ctables_insert_clabels_values (t, c, a);
5666 casereader_destroy (group);
5668 for (size_t i = 0; i < ct->n_tables; i++)
5670 struct ctables_table *t = ct->tables[i];
5672 if (t->clabels_example)
5673 ctables_sort_clabels_values (t);
5675 for (size_t j = 0; j < t->n_sections; j++)
5676 ctables_section_add_empty_categories (&t->sections[j]);
5678 ctables_table_output (ct, t);
5679 ctables_table_clear (t);
5682 return casegrouper_destroy (grouper);
5685 static struct ctables_postcompute *
5686 ctables_find_postcompute (struct ctables *ct, const char *name)
5688 struct ctables_postcompute *pc;
5689 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5690 utf8_hash_case_string (name, 0), &ct->postcomputes)
5691 if (!utf8_strcasecmp (pc->name, name))
5697 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5700 int pcompute_start = lex_ofs (lexer) - 1;
5702 if (!lex_match (lexer, T_AND))
5704 lex_error_expecting (lexer, "&");
5707 if (!lex_force_id (lexer))
5710 char *name = ss_xstrdup (lex_tokss (lexer));
5713 if (!lex_force_match_phrase (lexer, "=EXPR("))
5719 int expr_start = lex_ofs (lexer);
5720 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5721 int expr_end = lex_ofs (lexer) - 1;
5722 if (!expr || !lex_force_match (lexer, T_RPAREN))
5724 ctables_pcexpr_destroy (expr);
5728 int pcompute_end = lex_ofs (lexer) - 1;
5730 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5733 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5736 msg_at (SW, location, _("New definition of &%s will override the "
5737 "previous definition."),
5739 msg_at (SN, pc->location, _("This is the previous definition."));
5741 ctables_pcexpr_destroy (pc->expr);
5742 msg_location_destroy (pc->location);
5747 pc = xmalloc (sizeof *pc);
5748 *pc = (struct ctables_postcompute) { .name = name };
5749 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5750 utf8_hash_case_string (pc->name, 0));
5753 pc->location = location;
5755 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5760 ctables_parse_pproperties_format (struct lexer *lexer,
5761 struct ctables_summary_spec_set *sss)
5763 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5765 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5766 && !(lex_token (lexer) == T_ID
5767 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5768 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5769 lex_tokss (lexer)))))
5771 /* Parse function. */
5772 enum ctables_summary_function function;
5773 enum ctables_weighting weighting;
5774 enum ctables_area_type area;
5775 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5778 /* Parse percentile. */
5779 double percentile = 0;
5780 if (function == CTSF_PTILE)
5782 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5784 percentile = lex_number (lexer);
5789 struct fmt_spec format;
5790 bool is_ctables_format;
5791 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5794 if (sss->n >= sss->allocated)
5795 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5796 sizeof *sss->specs);
5797 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5798 .function = function,
5799 .weighting = weighting,
5802 .percentile = percentile,
5804 .is_ctables_format = is_ctables_format,
5810 ctables_summary_spec_set_uninit (sss);
5815 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5817 struct ctables_postcompute **pcs = NULL;
5819 size_t allocated_pcs = 0;
5821 while (lex_match (lexer, T_AND))
5823 if (!lex_force_id (lexer))
5825 struct ctables_postcompute *pc
5826 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5829 lex_error (lexer, _("Unknown computed category &%s."),
5830 lex_tokcstr (lexer));
5835 if (n_pcs >= allocated_pcs)
5836 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5840 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5842 if (lex_match_id (lexer, "LABEL"))
5844 lex_match (lexer, T_EQUALS);
5845 if (!lex_force_string (lexer))
5848 for (size_t i = 0; i < n_pcs; i++)
5850 free (pcs[i]->label);
5851 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5856 else if (lex_match_id (lexer, "FORMAT"))
5858 lex_match (lexer, T_EQUALS);
5860 struct ctables_summary_spec_set sss;
5861 if (!ctables_parse_pproperties_format (lexer, &sss))
5864 for (size_t i = 0; i < n_pcs; i++)
5867 ctables_summary_spec_set_uninit (pcs[i]->specs);
5869 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5870 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5872 ctables_summary_spec_set_uninit (&sss);
5874 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5876 lex_match (lexer, T_EQUALS);
5877 bool hide_source_cats;
5878 if (!parse_bool (lexer, &hide_source_cats))
5880 for (size_t i = 0; i < n_pcs; i++)
5881 pcs[i]->hide_source_cats = hide_source_cats;
5885 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5898 put_strftime (struct string *out, time_t now, const char *format)
5900 const struct tm *tm = localtime (&now);
5902 strftime (value, sizeof value, format, tm);
5903 ds_put_cstr (out, value);
5907 skip_prefix (struct substring *s, struct substring prefix)
5909 if (ss_starts_with (*s, prefix))
5911 ss_advance (s, prefix.length);
5919 put_table_expression (struct string *out, struct lexer *lexer,
5920 struct dictionary *dict, int expr_start, int expr_end)
5923 for (int ofs = expr_start; ofs < expr_end; ofs++)
5925 const struct token *t = lex_ofs_token (lexer, ofs);
5926 if (t->type == T_LBRACK)
5928 else if (t->type == T_RBRACK && nest > 0)
5934 else if (t->type == T_ID)
5936 const struct variable *var
5937 = dict_lookup_var (dict, t->string.string);
5938 const char *label = var ? var_get_label (var) : NULL;
5939 ds_put_cstr (out, label ? label : t->string.string);
5943 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5944 ds_put_byte (out, ' ');
5946 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5947 ds_put_cstr (out, repr);
5950 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5951 ds_put_byte (out, ' ');
5957 put_title_text (struct string *out, struct substring in, time_t now,
5958 struct lexer *lexer, struct dictionary *dict,
5959 int expr_start, int expr_end)
5963 size_t chunk = ss_find_byte (in, ')');
5964 ds_put_substring (out, ss_head (in, chunk));
5965 ss_advance (&in, chunk);
5966 if (ss_is_empty (in))
5969 if (skip_prefix (&in, ss_cstr (")DATE")))
5970 put_strftime (out, now, "%x");
5971 else if (skip_prefix (&in, ss_cstr (")TIME")))
5972 put_strftime (out, now, "%X");
5973 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5974 put_table_expression (out, lexer, dict, expr_start, expr_end);
5977 ds_put_byte (out, ')');
5978 ss_advance (&in, 1);
5984 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5986 struct casereader *input = NULL;
5988 struct measure_guesser *mg = measure_guesser_create (ds);
5991 input = proc_open (ds);
5992 measure_guesser_run (mg, input);
5993 measure_guesser_destroy (mg);
5996 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5997 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5998 enum settings_value_show tvars = settings_get_show_variables ();
5999 for (size_t i = 0; i < n_vars; i++)
6000 vlabels[i] = (enum ctables_vlabel) tvars;
6002 struct pivot_table_look *look = pivot_table_look_unshare (
6003 pivot_table_look_ref (pivot_table_look_get_default ()));
6004 look->omit_empty = false;
6006 struct ctables *ct = xmalloc (sizeof *ct);
6007 *ct = (struct ctables) {
6008 .dict = dataset_dict (ds),
6010 .ctables_formats = FMT_SETTINGS_INIT,
6012 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6015 time_t now = time (NULL);
6020 const char *dot_string;
6021 const char *comma_string;
6023 static const struct ctf ctfs[4] = {
6024 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6025 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6026 { CTEF_PAREN, "-,(,),", "-.(.)." },
6027 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6029 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6030 for (size_t i = 0; i < 4; i++)
6032 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6033 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6034 fmt_number_style_from_string (s));
6037 if (!lex_force_match (lexer, T_SLASH))
6040 while (!lex_match_id (lexer, "TABLE"))
6042 if (lex_match_id (lexer, "FORMAT"))
6044 double widths[2] = { SYSMIS, SYSMIS };
6045 double units_per_inch = 72.0;
6047 int start_ofs = lex_ofs (lexer);
6048 while (lex_token (lexer) != T_SLASH)
6050 if (lex_match_id (lexer, "MINCOLWIDTH"))
6052 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6055 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6057 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6060 else if (lex_match_id (lexer, "UNITS"))
6062 lex_match (lexer, T_EQUALS);
6063 if (lex_match_id (lexer, "POINTS"))
6064 units_per_inch = 72.0;
6065 else if (lex_match_id (lexer, "INCHES"))
6066 units_per_inch = 1.0;
6067 else if (lex_match_id (lexer, "CM"))
6068 units_per_inch = 2.54;
6071 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6075 else if (lex_match_id (lexer, "EMPTY"))
6080 lex_match (lexer, T_EQUALS);
6081 if (lex_match_id (lexer, "ZERO"))
6083 /* Nothing to do. */
6085 else if (lex_match_id (lexer, "BLANK"))
6086 ct->zero = xstrdup ("");
6087 else if (lex_force_string (lexer))
6089 ct->zero = ss_xstrdup (lex_tokss (lexer));
6095 else if (lex_match_id (lexer, "MISSING"))
6097 lex_match (lexer, T_EQUALS);
6098 if (!lex_force_string (lexer))
6102 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6103 ? ss_xstrdup (lex_tokss (lexer))
6109 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6110 "UNITS", "EMPTY", "MISSING");
6115 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6116 && widths[0] > widths[1])
6118 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6119 _("MINCOLWIDTH must not be greater than "
6124 for (size_t i = 0; i < 2; i++)
6125 if (widths[i] != SYSMIS)
6127 int *wr = ct->look->width_ranges[TABLE_HORZ];
6128 wr[i] = widths[i] / units_per_inch * 96.0;
6133 else if (lex_match_id (lexer, "VLABELS"))
6135 if (!lex_force_match_id (lexer, "VARIABLES"))
6137 lex_match (lexer, T_EQUALS);
6139 struct variable **vars;
6141 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6145 if (!lex_force_match_id (lexer, "DISPLAY"))
6150 lex_match (lexer, T_EQUALS);
6152 enum ctables_vlabel vlabel;
6153 if (lex_match_id (lexer, "DEFAULT"))
6154 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6155 else if (lex_match_id (lexer, "NAME"))
6157 else if (lex_match_id (lexer, "LABEL"))
6158 vlabel = CTVL_LABEL;
6159 else if (lex_match_id (lexer, "BOTH"))
6161 else if (lex_match_id (lexer, "NONE"))
6165 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6171 for (size_t i = 0; i < n_vars; i++)
6172 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6175 else if (lex_match_id (lexer, "MRSETS"))
6177 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6179 lex_match (lexer, T_EQUALS);
6180 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6183 else if (lex_match_id (lexer, "SMISSING"))
6185 if (lex_match_id (lexer, "VARIABLE"))
6186 ct->smissing_listwise = false;
6187 else if (lex_match_id (lexer, "LISTWISE"))
6188 ct->smissing_listwise = true;
6191 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6195 else if (lex_match_id (lexer, "PCOMPUTE"))
6197 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6200 else if (lex_match_id (lexer, "PPROPERTIES"))
6202 if (!ctables_parse_pproperties (lexer, ct))
6205 else if (lex_match_id (lexer, "WEIGHT"))
6207 if (!lex_force_match_id (lexer, "VARIABLE"))
6209 lex_match (lexer, T_EQUALS);
6210 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6214 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6216 if (lex_match_id (lexer, "COUNT"))
6218 lex_match (lexer, T_EQUALS);
6219 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6222 ct->hide_threshold = lex_integer (lexer);
6225 else if (ct->hide_threshold == 0)
6226 ct->hide_threshold = 5;
6230 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6231 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6232 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6233 if (lex_match_id (lexer, "SLABELS")
6234 || lex_match_id (lexer, "CLABELS")
6235 || lex_match_id (lexer, "CRITERIA")
6236 || lex_match_id (lexer, "CATEGORIES")
6237 || lex_match_id (lexer, "TITLES")
6238 || lex_match_id (lexer, "SIGTEST")
6239 || lex_match_id (lexer, "COMPARETEST"))
6240 lex_next_msg (lexer, SN, -1, -1,
6241 _("TABLE must appear before this subcommand."));
6245 if (!lex_force_match (lexer, T_SLASH))
6249 size_t allocated_tables = 0;
6252 if (ct->n_tables >= allocated_tables)
6253 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6254 sizeof *ct->tables);
6256 struct ctables_category *cat = xmalloc (sizeof *cat);
6257 *cat = (struct ctables_category) {
6259 .include_missing = false,
6260 .sort_ascending = true,
6263 struct ctables_categories *c = xmalloc (sizeof *c);
6264 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6265 *c = (struct ctables_categories) {
6272 struct ctables_categories **categories = xnmalloc (n_vars,
6273 sizeof *categories);
6274 for (size_t i = 0; i < n_vars; i++)
6277 struct ctables_table *t = xmalloc (sizeof *t);
6278 *t = (struct ctables_table) {
6280 .slabels_axis = PIVOT_AXIS_COLUMN,
6281 .slabels_visible = true,
6282 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6284 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6285 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6286 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6288 .clabels_from_axis = PIVOT_AXIS_LAYER,
6289 .clabels_to_axis = PIVOT_AXIS_LAYER,
6290 .categories = categories,
6291 .n_categories = n_vars,
6294 ct->tables[ct->n_tables++] = t;
6296 lex_match (lexer, T_EQUALS);
6297 int expr_start = lex_ofs (lexer);
6298 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6299 &t->axes[PIVOT_AXIS_ROW]))
6301 if (lex_match (lexer, T_BY))
6303 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6304 &t->axes[PIVOT_AXIS_COLUMN]))
6307 if (lex_match (lexer, T_BY))
6309 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6310 &t->axes[PIVOT_AXIS_LAYER]))
6314 int expr_end = lex_ofs (lexer);
6316 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6317 && !t->axes[PIVOT_AXIS_LAYER])
6319 lex_error (lexer, _("At least one variable must be specified."));
6323 const struct ctables_axis *scales[PIVOT_N_AXES];
6324 size_t n_scales = 0;
6325 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6327 scales[a] = find_scale (t->axes[a]);
6333 msg (SE, _("Scale variables may appear only on one axis."));
6334 if (scales[PIVOT_AXIS_ROW])
6335 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6336 _("This scale variable appears on the rows axis."));
6337 if (scales[PIVOT_AXIS_COLUMN])
6338 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6339 _("This scale variable appears on the columns axis."));
6340 if (scales[PIVOT_AXIS_LAYER])
6341 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6342 _("This scale variable appears on the layer axis."));
6346 const struct ctables_axis *summaries[PIVOT_N_AXES];
6347 size_t n_summaries = 0;
6348 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6350 summaries[a] = (scales[a]
6352 : find_categorical_summary_spec (t->axes[a]));
6356 if (n_summaries > 1)
6358 msg (SE, _("Summaries may appear only on one axis."));
6359 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6362 msg_at (SN, summaries[a]->loc,
6364 ? _("This variable on the rows axis has a summary.")
6365 : a == PIVOT_AXIS_COLUMN
6366 ? _("This variable on the columns axis has a summary.")
6367 : _("This variable on the layers axis has a summary."));
6369 msg_at (SN, summaries[a]->loc,
6370 _("This is a scale variable, so it always has a "
6371 "summary even if the syntax does not explicitly "
6376 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6377 if (n_summaries ? summaries[a] : t->axes[a])
6379 t->summary_axis = a;
6383 if (lex_token (lexer) == T_ENDCMD)
6385 if (!ctables_prepare_table (t, lexer))
6389 if (!lex_force_match (lexer, T_SLASH))
6392 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6394 if (lex_match_id (lexer, "SLABELS"))
6396 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6398 if (lex_match_id (lexer, "POSITION"))
6400 lex_match (lexer, T_EQUALS);
6401 if (lex_match_id (lexer, "COLUMN"))
6402 t->slabels_axis = PIVOT_AXIS_COLUMN;
6403 else if (lex_match_id (lexer, "ROW"))
6404 t->slabels_axis = PIVOT_AXIS_ROW;
6405 else if (lex_match_id (lexer, "LAYER"))
6406 t->slabels_axis = PIVOT_AXIS_LAYER;
6409 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6413 else if (lex_match_id (lexer, "VISIBLE"))
6415 lex_match (lexer, T_EQUALS);
6416 if (!parse_bool (lexer, &t->slabels_visible))
6421 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6426 else if (lex_match_id (lexer, "CLABELS"))
6428 int start_ofs = lex_ofs (lexer) - 1;
6429 if (lex_match_id (lexer, "AUTO"))
6431 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6432 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6434 else if (lex_match_id (lexer, "ROWLABELS"))
6436 lex_match (lexer, T_EQUALS);
6437 if (lex_match_id (lexer, "OPPOSITE"))
6438 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6439 else if (lex_match_id (lexer, "LAYER"))
6440 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6443 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6447 else if (lex_match_id (lexer, "COLLABELS"))
6449 lex_match (lexer, T_EQUALS);
6450 if (lex_match_id (lexer, "OPPOSITE"))
6451 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6452 else if (lex_match_id (lexer, "LAYER"))
6453 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6456 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6462 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6466 int end_ofs = lex_ofs (lexer) - 1;
6468 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6469 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6471 msg (SE, _("ROWLABELS and COLLABELS may not both be "
6474 lex_ofs_msg (lexer, SN, t->clabels_start_ofs,
6476 _("This is the first specification."));
6477 lex_ofs_msg (lexer, SN, start_ofs, end_ofs,
6478 _("This is the second specification."));
6482 t->clabels_start_ofs = start_ofs;
6483 t->clabels_end_ofs = end_ofs;
6485 else if (lex_match_id (lexer, "CRITERIA"))
6487 if (!lex_force_match_id (lexer, "CILEVEL"))
6489 lex_match (lexer, T_EQUALS);
6491 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6493 t->cilevel = lex_number (lexer);
6496 else if (lex_match_id (lexer, "CATEGORIES"))
6498 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6502 else if (lex_match_id (lexer, "TITLES"))
6507 if (lex_match_id (lexer, "CAPTIONS"))
6508 textp = &t->caption;
6509 else if (lex_match_id (lexer, "CORNERS"))
6511 else if (lex_match_id (lexer, "TITLES"))
6515 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6518 lex_match (lexer, T_EQUALS);
6520 struct string s = DS_EMPTY_INITIALIZER;
6521 while (lex_is_string (lexer))
6523 if (!ds_is_empty (&s))
6524 ds_put_byte (&s, ' ');
6525 put_title_text (&s, lex_tokss (lexer), now,
6526 lexer, dataset_dict (ds),
6527 expr_start, expr_end);
6531 *textp = ds_steal_cstr (&s);
6533 while (lex_token (lexer) != T_SLASH
6534 && lex_token (lexer) != T_ENDCMD);
6536 else if (lex_match_id (lexer, "SIGTEST"))
6538 int start_ofs = lex_ofs (lexer) - 1;
6541 t->chisq = xmalloc (sizeof *t->chisq);
6542 *t->chisq = (struct ctables_chisq) {
6544 .include_mrsets = true,
6545 .all_visible = true,
6551 if (lex_match_id (lexer, "TYPE"))
6553 lex_match (lexer, T_EQUALS);
6554 if (!lex_force_match_id (lexer, "CHISQUARE"))
6557 else if (lex_match_id (lexer, "ALPHA"))
6559 lex_match (lexer, T_EQUALS);
6560 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6562 t->chisq->alpha = lex_number (lexer);
6565 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6567 lex_match (lexer, T_EQUALS);
6568 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6571 else if (lex_match_id (lexer, "CATEGORIES"))
6573 lex_match (lexer, T_EQUALS);
6574 if (lex_match_id (lexer, "ALLVISIBLE"))
6575 t->chisq->all_visible = true;
6576 else if (lex_match_id (lexer, "SUBTOTALS"))
6577 t->chisq->all_visible = false;
6580 lex_error_expecting (lexer,
6581 "ALLVISIBLE", "SUBTOTALS");
6587 lex_error_expecting (lexer, "TYPE", "ALPHA",
6588 "INCLUDEMRSETS", "CATEGORIES");
6592 while (lex_token (lexer) != T_SLASH
6593 && lex_token (lexer) != T_ENDCMD);
6595 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6596 _("Support for SIGTEST not yet implemented."));
6599 else if (lex_match_id (lexer, "COMPARETEST"))
6601 int start_ofs = lex_ofs (lexer) - 1;
6604 t->pairwise = xmalloc (sizeof *t->pairwise);
6605 *t->pairwise = (struct ctables_pairwise) {
6607 .alpha = { .05, .05 },
6608 .adjust = BONFERRONI,
6609 .include_mrsets = true,
6610 .meansvariance_allcats = true,
6611 .all_visible = true,
6620 if (lex_match_id (lexer, "TYPE"))
6622 lex_match (lexer, T_EQUALS);
6623 if (lex_match_id (lexer, "PROP"))
6624 t->pairwise->type = PROP;
6625 else if (lex_match_id (lexer, "MEAN"))
6626 t->pairwise->type = MEAN;
6629 lex_error_expecting (lexer, "PROP", "MEAN");
6633 else if (lex_match_id (lexer, "ALPHA"))
6635 lex_match (lexer, T_EQUALS);
6637 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6639 double a0 = lex_number (lexer);
6642 lex_match (lexer, T_COMMA);
6643 if (lex_is_number (lexer))
6645 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6647 double a1 = lex_number (lexer);
6650 t->pairwise->alpha[0] = MIN (a0, a1);
6651 t->pairwise->alpha[1] = MAX (a0, a1);
6654 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6656 else if (lex_match_id (lexer, "ADJUST"))
6658 lex_match (lexer, T_EQUALS);
6659 if (lex_match_id (lexer, "BONFERRONI"))
6660 t->pairwise->adjust = BONFERRONI;
6661 else if (lex_match_id (lexer, "BH"))
6662 t->pairwise->adjust = BH;
6663 else if (lex_match_id (lexer, "NONE"))
6664 t->pairwise->adjust = 0;
6667 lex_error_expecting (lexer, "BONFERRONI", "BH",
6672 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6674 lex_match (lexer, T_EQUALS);
6675 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6678 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6680 lex_match (lexer, T_EQUALS);
6681 if (lex_match_id (lexer, "ALLCATS"))
6682 t->pairwise->meansvariance_allcats = true;
6683 else if (lex_match_id (lexer, "TESTEDCATS"))
6684 t->pairwise->meansvariance_allcats = false;
6687 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6691 else if (lex_match_id (lexer, "CATEGORIES"))
6693 lex_match (lexer, T_EQUALS);
6694 if (lex_match_id (lexer, "ALLVISIBLE"))
6695 t->pairwise->all_visible = true;
6696 else if (lex_match_id (lexer, "SUBTOTALS"))
6697 t->pairwise->all_visible = false;
6700 lex_error_expecting (lexer, "ALLVISIBLE",
6705 else if (lex_match_id (lexer, "MERGE"))
6707 lex_match (lexer, T_EQUALS);
6708 if (!parse_bool (lexer, &t->pairwise->merge))
6711 else if (lex_match_id (lexer, "STYLE"))
6713 lex_match (lexer, T_EQUALS);
6714 if (lex_match_id (lexer, "APA"))
6715 t->pairwise->apa_style = true;
6716 else if (lex_match_id (lexer, "SIMPLE"))
6717 t->pairwise->apa_style = false;
6720 lex_error_expecting (lexer, "APA", "SIMPLE");
6724 else if (lex_match_id (lexer, "SHOWSIG"))
6726 lex_match (lexer, T_EQUALS);
6727 if (!parse_bool (lexer, &t->pairwise->show_sig))
6732 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6733 "INCLUDEMRSETS", "MEANSVARIANCE",
6734 "CATEGORIES", "MERGE", "STYLE",
6739 while (lex_token (lexer) != T_SLASH
6740 && lex_token (lexer) != T_ENDCMD);
6742 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6743 _("Support for COMPARETEST not yet implemented."));
6748 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6749 "CRITERIA", "CATEGORIES", "TITLES",
6750 "SIGTEST", "COMPARETEST");
6751 if (lex_match_id (lexer, "FORMAT")
6752 || lex_match_id (lexer, "VLABELS")
6753 || lex_match_id (lexer, "MRSETS")
6754 || lex_match_id (lexer, "SMISSING")
6755 || lex_match_id (lexer, "PCOMPUTE")
6756 || lex_match_id (lexer, "PPROPERTIES")
6757 || lex_match_id (lexer, "WEIGHT")
6758 || lex_match_id (lexer, "HIDESMALLCOUNTS"))
6759 lex_next_msg (lexer, SN, -1, -1,
6760 _("This subcommand must appear before TABLE."));
6764 if (!lex_match (lexer, T_SLASH))
6768 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6769 t->clabels_from_axis = PIVOT_AXIS_ROW;
6770 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6771 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6772 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6774 if (!ctables_prepare_table (t, lexer))
6777 while (lex_token (lexer) != T_ENDCMD);
6780 input = proc_open (ds);
6781 bool ok = ctables_execute (ds, input, ct);
6782 ok = proc_commit (ds) && ok;
6784 ctables_destroy (ct);
6785 return ok ? CMD_SUCCESS : CMD_FAILURE;
6790 ctables_destroy (ct);