1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/commands/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 /* The three forms of weighting supported by CTABLES. */
61 enum ctables_weighting
63 CTW_EFFECTIVE, /* Effective base weight (WEIGHT subcommand). */
64 CTW_DICTIONARY, /* Dictionary weight. */
65 CTW_UNWEIGHTED /* No weight. */
69 /* CTABLES table areas. */
71 enum ctables_area_type
73 /* Within a section, where stacked variables divide one section from
76 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
77 parse_ctables_summary_function() parses correctly. */
78 CTAT_TABLE, /* All layers of a whole section. */
79 CTAT_LAYERROW, /* Row in one layer within a section. */
80 CTAT_LAYERCOL, /* Column in one layer within a section. */
81 CTAT_LAYER, /* One layer within a section. */
83 /* Within a subtable, where a subtable pairs an innermost row variable with
84 an innermost column variable within a single layer. */
85 CTAT_SUBTABLE, /* Whole subtable. */
86 CTAT_ROW, /* Row within a subtable. */
87 CTAT_COL, /* Column within a subtable. */
91 static const char *ctables_area_type_name[N_CTATS] = {
92 [CTAT_TABLE] = "TABLE",
93 [CTAT_LAYER] = "LAYER",
94 [CTAT_LAYERROW] = "LAYERROW",
95 [CTAT_LAYERCOL] = "LAYERCOL",
96 [CTAT_SUBTABLE] = "SUBTABLE",
101 /* Summary statistics for an area. */
104 struct hmap_node node;
105 const struct ctables_cell *example;
107 /* Sequence number used for CTSF_ID. */
110 /* Weights for CTSF_areaPCT_COUNT, CTSF_areaPCT_VALIDN, and
111 CTSF_areaPCT_TOTALN. */
112 double count[N_CTWS];
113 double valid[N_CTWS];
114 double total[N_CTWS];
116 /* Sums for CTSF_areaPCT_SUM. */
117 struct ctables_sum *sums;
125 /* CTABLES summary functions. */
127 enum ctables_function_type
129 /* A function that operates on data in a single cell. It operates on
130 effective weights. It does not have an unweighted version. */
133 /* A function that operates on data in a single cell. The function
134 operates on effective weights and has a U-prefixed unweighted
138 /* A function that operates on data in a single cell. It operates on
139 dictionary weights, and has U-prefixed unweighted version and an
140 E-prefixed effective weight version. */
143 /* A function that operates on an area of cells. It operates on effective
144 weights and has a U-prefixed unweighted version. */
150 CTF_COUNT, /* F40.0. */
151 CTF_PERCENT, /* PCT40.1. */
152 CTF_GENERAL /* Variable's print format. */
155 enum ctables_function_availability
157 CTFA_ALL, /* Any variables. */
158 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
159 //CTFA_MRSETS, /* Only multiple-response sets */
162 enum ctables_summary_function
164 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
165 #include "ctables.inc"
170 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
172 #include "ctables.inc"
176 struct ctables_function_info
178 struct substring basename;
179 enum ctables_function_type type;
180 enum ctables_format format;
181 enum ctables_function_availability availability;
183 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
184 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
185 bool is_area; /* Needs an area prefix. */
187 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
188 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
190 .basename = SS_LITERAL_INITIALIZER (NAME), \
193 .availability = AVAILABILITY, \
194 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
195 .e_prefix = (TYPE) == CTFT_UECELL, \
196 .is_area = (TYPE) == CTFT_AREA \
198 #include "ctables.inc"
202 static struct fmt_spec
203 ctables_summary_default_format (enum ctables_summary_function function,
204 const struct variable *var)
206 static const enum ctables_format default_formats[] = {
207 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
208 #include "ctables.inc"
211 switch (default_formats[function])
214 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
217 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
220 return *var_get_print_format (var);
227 static enum ctables_function_availability
228 ctables_function_availability (enum ctables_summary_function f)
230 static enum ctables_function_availability availability[] = {
231 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
232 #include "ctables.inc"
236 return availability[f];
240 parse_ctables_summary_function (struct lexer *lexer,
241 enum ctables_summary_function *function,
242 enum ctables_weighting *weighting,
243 enum ctables_area_type *area)
245 if (!lex_force_id (lexer))
248 struct substring name = lex_tokss (lexer);
249 if (ss_ends_with_case (name, ss_cstr (".LCL"))
250 || ss_ends_with_case (name, ss_cstr (".UCL"))
251 || ss_ends_with_case (name, ss_cstr (".SE")))
253 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
254 "is not yet implemented."));
258 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
259 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
261 bool has_area = false;
263 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
264 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
269 if (ss_equals_case (name, ss_cstr ("PCT")))
271 /* Special case where .COUNT suffix is omitted. */
272 *function = CTSF_areaPCT_COUNT;
273 *weighting = CTW_EFFECTIVE;
280 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
282 const struct ctables_function_info *cfi = &ctables_function_info[f];
283 if (ss_equals_case (cfi->basename, name))
286 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
289 *weighting = (e ? CTW_EFFECTIVE
291 : cfi->e_prefix ? CTW_DICTIONARY
298 lex_error (lexer, _("Syntax error expecting summary function name."));
303 ctables_summary_function_name (enum ctables_summary_function function,
304 enum ctables_weighting weighting,
305 enum ctables_area_type area,
306 char *buffer, size_t bufsize)
308 const struct ctables_function_info *cfi = &ctables_function_info[function];
309 snprintf (buffer, bufsize, "%s%s%s",
310 (weighting == CTW_UNWEIGHTED ? "U"
311 : weighting == CTW_DICTIONARY ? ""
312 : cfi->e_prefix ? "E"
314 cfi->is_area ? ctables_area_type_name[area] : "",
315 cfi->basename.string);
320 ctables_summary_function_label__ (enum ctables_summary_function function,
321 enum ctables_weighting weighting,
322 enum ctables_area_type area)
324 bool w = weighting != CTW_UNWEIGHTED;
325 bool d = weighting == CTW_DICTIONARY;
326 enum ctables_area_type a = area;
330 return d ? N_("Count") : w ? N_("Adjusted Count") : N_("Unweighted Count");
332 case CTSF_areaPCT_COUNT:
335 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
336 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
337 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
338 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
339 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
340 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
341 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
345 case CTSF_areaPCT_VALIDN:
348 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
349 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
350 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
351 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
352 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
353 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
354 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
358 case CTSF_areaPCT_TOTALN:
361 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
362 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
363 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
364 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
365 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
366 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
367 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
371 case CTSF_MAXIMUM: return N_("Maximum");
372 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
373 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
374 case CTSF_MINIMUM: return N_("Minimum");
375 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
376 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
377 case CTSF_PTILE: NOT_REACHED ();
378 case CTSF_RANGE: return N_("Range");
379 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
380 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
381 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
382 case CTSF_TOTALN: return (d ? N_("Total N")
383 : w ? N_("Adjusted Total N")
384 : N_("Unweighted Total N"));
385 case CTSF_VALIDN: return (d ? N_("Valid N")
386 : w ? N_("Adjusted Valid N")
387 : N_("Unweighted Valid N"));
388 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
389 case CTSF_areaPCT_SUM:
392 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
393 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
394 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
395 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
396 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
397 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
398 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
405 /* Don't bother translating these: they are for developers only. */
406 case CTAT_TABLE: return "Table ID";
407 case CTAT_LAYER: return "Layer ID";
408 case CTAT_LAYERROW: return "Layer Row ID";
409 case CTAT_LAYERCOL: return "Layer Column ID";
410 case CTAT_SUBTABLE: return "Subtable ID";
411 case CTAT_ROW: return "Row ID";
412 case CTAT_COL: return "Column ID";
420 static struct pivot_value *
421 ctables_summary_function_label (enum ctables_summary_function function,
422 enum ctables_weighting weighting,
423 enum ctables_area_type area,
426 if (function == CTSF_PTILE)
428 char *s = (weighting != CTW_UNWEIGHTED
429 ? xasprintf (_("Percentile %.2f"), percentile)
430 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
431 return pivot_value_new_user_text_nocopy (s);
434 return pivot_value_new_text (ctables_summary_function_label__ (
435 function, weighting, area));
438 /* CTABLES summaries. */
440 struct ctables_summary_spec
442 /* The calculation to be performed.
444 'function' is the function to calculate. 'weighted' specifies whether
445 to use weighted or unweighted data (for functions that do not support a
446 choice, it must be true). 'calc_area' is the area over which the
447 calculation takes place (for functions that target only an individual
448 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
449 percentile between 0 and 100 (for other functions it must be 0). */
450 enum ctables_summary_function function;
451 enum ctables_weighting weighting;
452 enum ctables_area_type calc_area;
453 double percentile; /* CTSF_PTILE only. */
455 /* How to display the result of the calculation.
457 'label' is a user-specified label, NULL if the user didn't specify
460 'user_area' is usually the same as 'calc_area', but when category labels
461 are rotated from one axis to another it swaps rows and columns.
463 'format' is the format for displaying the output. If
464 'is_ctables_format' is true, then 'format.type' is one of the special
465 CTEF_* formats instead of the standard ones. */
467 enum ctables_area_type user_area;
468 struct fmt_spec format;
469 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
471 size_t axis_idx; /* Leaf index if summary dimension in use. */
472 size_t sum_var_idx; /* Offset into 'sums' in ctables_area. */
476 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
477 const struct ctables_summary_spec *src)
480 dst->label = xstrdup_if_nonnull (src->label);
484 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
490 /* Collections of summary functions. */
492 struct ctables_summary_spec_set
494 struct ctables_summary_spec *specs;
498 /* The variable to which the summary specs are applied. */
499 struct variable *var;
501 /* Whether the variable to which the summary specs are applied is a scale
502 variable for the purpose of summarization.
504 (VALIDN and TOTALN act differently for summarizing scale and categorical
508 /* If any of these optional additional scale variables are missing, then
509 treat 'var' as if it's missing too. This is for implementing
510 SMISSING=LISTWISE. */
511 struct variable **listwise_vars;
512 size_t n_listwise_vars;
516 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
517 const struct ctables_summary_spec_set *src)
519 struct ctables_summary_spec *specs
520 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
521 for (size_t i = 0; i < src->n; i++)
522 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
524 *dst = (struct ctables_summary_spec_set) {
529 .is_scale = src->is_scale,
534 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
536 for (size_t i = 0; i < set->n; i++)
537 ctables_summary_spec_uninit (&set->specs[i]);
538 free (set->listwise_vars);
543 is_listwise_missing (const struct ctables_summary_spec_set *specs,
544 const struct ccase *c)
546 for (size_t i = 0; i < specs->n_listwise_vars; i++)
548 const struct variable *var = specs->listwise_vars[i];
549 if (var_is_num_missing (var, case_num (c, var)))
556 /* CTABLES postcompute expressions. */
558 struct ctables_postcompute
560 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
561 char *name; /* Name, without leading &. */
563 struct msg_location *location; /* Location of definition. */
564 struct ctables_pcexpr *expr;
566 struct ctables_summary_spec_set *specs;
567 bool hide_source_cats;
570 struct ctables_pcexpr
580 enum ctables_pcexpr_op
583 CTPO_CONSTANT, /* 5 */
584 CTPO_CAT_NUMBER, /* [5] */
585 CTPO_CAT_STRING, /* ["STRING"] */
586 CTPO_CAT_NRANGE, /* [LO THRU 5] */
587 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
588 CTPO_CAT_MISSING, /* MISSING */
589 CTPO_CAT_OTHERNM, /* OTHERNM */
590 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
591 CTPO_CAT_TOTAL, /* TOTAL */
605 /* CTPO_CAT_NUMBER. */
608 /* CTPO_CAT_STRING, in dictionary encoding. */
609 struct substring string;
611 /* CTPO_CAT_NRANGE. */
614 /* CTPO_CAT_SRANGE. */
615 struct substring srange[2];
617 /* CTPO_CAT_SUBTOTAL. */
618 size_t subtotal_index;
620 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
621 One element: CTPO_NEG. */
622 struct ctables_pcexpr *subs[2];
625 /* Source location. */
626 struct msg_location *location;
629 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
632 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
633 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
634 struct ctables_pcexpr *sub1);
636 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
637 struct dictionary *);
640 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
646 case CTPO_CAT_STRING:
647 ss_dealloc (&e->string);
650 case CTPO_CAT_SRANGE:
651 for (size_t i = 0; i < 2; i++)
652 ss_dealloc (&e->srange[i]);
661 for (size_t i = 0; i < 2; i++)
662 ctables_pcexpr_destroy (e->subs[i]);
666 case CTPO_CAT_NUMBER:
667 case CTPO_CAT_NRANGE:
668 case CTPO_CAT_MISSING:
669 case CTPO_CAT_OTHERNM:
670 case CTPO_CAT_SUBTOTAL:
675 msg_location_destroy (e->location);
680 static struct ctables_pcexpr *
681 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
682 struct ctables_pcexpr *sub0,
683 struct ctables_pcexpr *sub1)
685 struct ctables_pcexpr *e = xmalloc (sizeof *e);
686 *e = (struct ctables_pcexpr) {
688 .subs = { sub0, sub1 },
689 .location = msg_location_merged (sub0->location, sub1->location),
694 /* How to parse an operator. */
697 enum token_type token;
698 enum ctables_pcexpr_op op;
701 static const struct operator *
702 ctables_pcexpr_match_operator (struct lexer *lexer,
703 const struct operator ops[], size_t n_ops)
705 for (const struct operator *op = ops; op < ops + n_ops; op++)
706 if (lex_token (lexer) == op->token)
708 if (op->token != T_NEG_NUM)
717 static struct ctables_pcexpr *
718 ctables_pcexpr_parse_binary_operators__ (
719 struct lexer *lexer, struct dictionary *dict,
720 const struct operator ops[], size_t n_ops,
721 parse_recursively_func *parse_next_level,
722 const char *chain_warning, struct ctables_pcexpr *lhs)
724 for (int op_count = 0; ; op_count++)
726 const struct operator *op
727 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
730 if (op_count > 1 && chain_warning)
731 msg_at (SW, lhs->location, "%s", chain_warning);
736 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
739 ctables_pcexpr_destroy (lhs);
743 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
747 static struct ctables_pcexpr *
748 ctables_pcexpr_parse_binary_operators (
749 struct lexer *lexer, struct dictionary *dict,
750 const struct operator ops[], size_t n_ops,
751 parse_recursively_func *parse_next_level, const char *chain_warning)
753 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
757 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
762 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
763 struct dictionary *);
765 static struct ctables_pcexpr
766 ctpo_cat_nrange (double low, double high)
768 return (struct ctables_pcexpr) {
769 .op = CTPO_CAT_NRANGE,
770 .nrange = { low, high },
774 static struct ctables_pcexpr
775 ctpo_cat_srange (struct substring low, struct substring high)
777 return (struct ctables_pcexpr) {
778 .op = CTPO_CAT_SRANGE,
779 .srange = { low, high },
783 static struct substring
784 parse_substring (struct lexer *lexer, struct dictionary *dict)
786 struct substring s = recode_substring_pool (
787 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
788 ss_rtrim (&s, ss_cstr (" "));
793 static struct ctables_pcexpr *
794 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
796 int start_ofs = lex_ofs (lexer);
797 struct ctables_pcexpr e;
798 if (lex_is_number (lexer))
800 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
801 .number = lex_number (lexer) };
804 else if (lex_match_id (lexer, "MISSING"))
805 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
806 else if (lex_match_id (lexer, "OTHERNM"))
807 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
808 else if (lex_match_id (lexer, "TOTAL"))
809 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
810 else if (lex_match_id (lexer, "SUBTOTAL"))
812 size_t subtotal_index = 0;
813 if (lex_match (lexer, T_LBRACK))
815 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
817 subtotal_index = lex_integer (lexer);
819 if (!lex_force_match (lexer, T_RBRACK))
822 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
823 .subtotal_index = subtotal_index };
825 else if (lex_match (lexer, T_LBRACK))
827 if (lex_match_id (lexer, "LO"))
829 if (!lex_force_match_id (lexer, "THRU"))
832 if (lex_is_string (lexer))
834 struct substring low = { .string = NULL };
835 struct substring high = parse_substring (lexer, dict);
836 e = ctpo_cat_srange (low, high);
840 if (!lex_force_num (lexer))
842 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
846 else if (lex_is_number (lexer))
848 double number = lex_number (lexer);
850 if (lex_match_id (lexer, "THRU"))
852 if (lex_match_id (lexer, "HI"))
853 e = ctpo_cat_nrange (number, DBL_MAX);
856 if (!lex_force_num (lexer))
858 e = ctpo_cat_nrange (number, lex_number (lexer));
863 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
866 else if (lex_is_string (lexer))
868 struct substring s = parse_substring (lexer, dict);
870 if (lex_match_id (lexer, "THRU"))
872 struct substring high;
874 if (lex_match_id (lexer, "HI"))
875 high = (struct substring) { .string = NULL };
878 if (!lex_force_string (lexer))
883 high = parse_substring (lexer, dict);
886 e = ctpo_cat_srange (s, high);
889 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
894 _("Syntax error expecting number or string or range."));
898 if (!lex_force_match (lexer, T_RBRACK))
900 if (e.op == CTPO_CAT_STRING)
901 ss_dealloc (&e.string);
902 else if (e.op == CTPO_CAT_SRANGE)
904 ss_dealloc (&e.srange[0]);
905 ss_dealloc (&e.srange[1]);
910 else if (lex_match (lexer, T_LPAREN))
912 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
915 if (!lex_force_match (lexer, T_RPAREN))
917 ctables_pcexpr_destroy (ep);
924 lex_error (lexer, _("Syntax error in postcompute expression."));
928 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
929 return xmemdup (&e, sizeof e);
932 static struct ctables_pcexpr *
933 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
934 struct lexer *lexer, int start_ofs)
936 struct ctables_pcexpr *e = xmalloc (sizeof *e);
937 *e = (struct ctables_pcexpr) {
940 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
945 static struct ctables_pcexpr *
946 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
948 static const struct operator op = { T_EXP, CTPO_POW };
950 const char *chain_warning =
951 _("The exponentiation operator (`**') is left-associative: "
952 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
953 "To disable this warning, insert parentheses.");
955 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
956 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
957 ctables_pcexpr_parse_primary,
960 /* Special case for situations like "-5**6", which must be parsed as
963 int start_ofs = lex_ofs (lexer);
964 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
965 *lhs = (struct ctables_pcexpr) {
967 .number = -lex_tokval (lexer),
968 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
972 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
974 ctables_pcexpr_parse_primary, chain_warning, lhs);
978 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
981 /* Parses the unary minus level. */
982 static struct ctables_pcexpr *
983 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
985 int start_ofs = lex_ofs (lexer);
986 if (!lex_match (lexer, T_DASH))
987 return ctables_pcexpr_parse_exp (lexer, dict);
989 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
993 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
996 /* Parses the multiplication and division level. */
997 static struct ctables_pcexpr *
998 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
1000 static const struct operator ops[] =
1002 { T_ASTERISK, CTPO_MUL },
1003 { T_SLASH, CTPO_DIV },
1006 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
1007 sizeof ops / sizeof *ops,
1008 ctables_pcexpr_parse_neg, NULL);
1011 /* Parses the addition and subtraction level. */
1012 static struct ctables_pcexpr *
1013 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
1015 static const struct operator ops[] =
1017 { T_PLUS, CTPO_ADD },
1018 { T_DASH, CTPO_SUB },
1019 { T_NEG_NUM, CTPO_ADD },
1022 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1023 ops, sizeof ops / sizeof *ops,
1024 ctables_pcexpr_parse_mul, NULL);
1027 /* CTABLES axis expressions. */
1029 /* CTABLES has a number of extra formats that we implement via custom
1030 currency specifications on an alternate fmt_settings. */
1031 #define CTEF_NEGPAREN FMT_CCA
1032 #define CTEF_NEQUAL FMT_CCB
1033 #define CTEF_PAREN FMT_CCC
1034 #define CTEF_PCTPAREN FMT_CCD
1036 enum ctables_summary_variant
1045 enum ctables_axis_op
1061 struct variable *var;
1063 struct ctables_summary_spec_set specs[N_CSVS];
1067 struct ctables_axis *subs[2];
1070 struct msg_location *loc;
1074 ctables_axis_destroy (struct ctables_axis *axis)
1082 for (size_t i = 0; i < N_CSVS; i++)
1083 ctables_summary_spec_set_uninit (&axis->specs[i]);
1088 ctables_axis_destroy (axis->subs[0]);
1089 ctables_axis_destroy (axis->subs[1]);
1092 msg_location_destroy (axis->loc);
1096 static struct ctables_axis *
1097 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1098 struct ctables_axis *sub0,
1099 struct ctables_axis *sub1,
1100 struct lexer *lexer, int start_ofs)
1102 struct ctables_axis *axis = xmalloc (sizeof *axis);
1103 *axis = (struct ctables_axis) {
1105 .subs = { sub0, sub1 },
1106 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1111 struct ctables_axis_parse_ctx
1113 struct lexer *lexer;
1114 struct dictionary *dict;
1117 static struct pivot_value *
1118 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1121 return ctables_summary_function_label (spec->function, spec->weighting,
1122 spec->user_area, spec->percentile);
1125 struct substring in = ss_cstr (spec->label);
1126 struct substring target = ss_cstr (")CILEVEL");
1128 struct string out = DS_EMPTY_INITIALIZER;
1131 size_t chunk = ss_find_substring (in, target);
1132 ds_put_substring (&out, ss_head (in, chunk));
1133 ss_advance (&in, chunk);
1135 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1137 ss_advance (&in, target.length);
1138 ds_put_format (&out, "%g", cilevel);
1144 add_summary_spec (struct ctables_axis *axis,
1145 enum ctables_summary_function function,
1146 enum ctables_weighting weighting,
1147 enum ctables_area_type area, double percentile,
1148 const char *label, const struct fmt_spec *format,
1149 bool is_ctables_format, const struct msg_location *loc,
1150 enum ctables_summary_variant sv)
1152 if (axis->op == CTAO_VAR)
1154 char function_name[128];
1155 ctables_summary_function_name (function, weighting, area,
1156 function_name, sizeof function_name);
1157 const char *var_name = var_get_name (axis->var);
1158 switch (ctables_function_availability (function))
1162 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1163 "response sets."), function_name);
1164 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1170 if (!axis->scale && sv != CSV_TOTAL)
1173 _("Summary function %s applies only to scale variables."),
1175 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1185 struct ctables_summary_spec_set *set = &axis->specs[sv];
1186 if (set->n >= set->allocated)
1187 set->specs = x2nrealloc (set->specs, &set->allocated,
1188 sizeof *set->specs);
1190 struct ctables_summary_spec *dst = &set->specs[set->n++];
1191 *dst = (struct ctables_summary_spec) {
1192 .function = function,
1193 .weighting = weighting,
1196 .percentile = percentile,
1197 .label = xstrdup_if_nonnull (label),
1198 .format = (format ? *format
1199 : ctables_summary_default_format (function, axis->var)),
1200 .is_ctables_format = is_ctables_format,
1206 for (size_t i = 0; i < 2; i++)
1207 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1208 percentile, label, format, is_ctables_format,
1215 static struct ctables_axis *ctables_axis_parse_stack (
1216 struct ctables_axis_parse_ctx *);
1218 static struct ctables_axis *
1219 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1221 if (lex_match (ctx->lexer, T_LPAREN))
1223 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1224 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1226 ctables_axis_destroy (sub);
1232 if (!lex_force_id (ctx->lexer))
1235 if (lex_tokcstr (ctx->lexer)[0] == '$')
1237 lex_error (ctx->lexer,
1238 _("Multiple response set support not implemented."));
1242 int start_ofs = lex_ofs (ctx->lexer);
1243 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1247 struct ctables_axis *axis = xmalloc (sizeof *axis);
1248 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1250 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1251 : lex_match_phrase (ctx->lexer, "[C]") ? false
1252 : var_get_measure (var) == MEASURE_SCALE);
1253 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1254 lex_ofs (ctx->lexer) - 1);
1255 if (axis->scale && var_is_alpha (var))
1257 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1259 var_get_name (var));
1260 ctables_axis_destroy (axis);
1268 has_digit (const char *s)
1270 return s[strcspn (s, "0123456789")] != '\0';
1274 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1275 bool *is_ctables_format)
1277 char type[FMT_TYPE_LEN_MAX + 1];
1278 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1281 if (!strcasecmp (type, "NEGPAREN"))
1282 format->type = CTEF_NEGPAREN;
1283 else if (!strcasecmp (type, "NEQUAL"))
1284 format->type = CTEF_NEQUAL;
1285 else if (!strcasecmp (type, "PAREN"))
1286 format->type = CTEF_PAREN;
1287 else if (!strcasecmp (type, "PCTPAREN"))
1288 format->type = CTEF_PCTPAREN;
1291 *is_ctables_format = false;
1292 if (!parse_format_specifier (lexer, format))
1295 char *error = fmt_check_output__ (format);
1297 error = fmt_check_type_compat__ (format, NULL, VAL_NUMERIC);
1300 lex_next_error (lexer, -1, -1, "%s", error);
1311 lex_next_error (lexer, -1, -1,
1312 _("Output format %s requires width 2 or greater."), type);
1315 else if (format->d > format->w - 1)
1317 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1318 "greater than decimals."), type);
1323 *is_ctables_format = true;
1328 static struct ctables_axis *
1329 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1331 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1332 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1335 enum ctables_summary_variant sv = CSV_CELL;
1338 int start_ofs = lex_ofs (ctx->lexer);
1340 /* Parse function. */
1341 enum ctables_summary_function function;
1342 enum ctables_weighting weighting;
1343 enum ctables_area_type area;
1344 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1348 /* Parse percentile. */
1349 double percentile = 0;
1350 if (function == CTSF_PTILE)
1352 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1354 percentile = lex_number (ctx->lexer);
1355 lex_get (ctx->lexer);
1360 if (lex_is_string (ctx->lexer))
1362 label = ss_xstrdup (lex_tokss (ctx->lexer));
1363 lex_get (ctx->lexer);
1367 struct fmt_spec format;
1368 const struct fmt_spec *formatp;
1369 bool is_ctables_format = false;
1370 if (lex_token (ctx->lexer) == T_ID
1371 && has_digit (lex_tokcstr (ctx->lexer)))
1373 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1374 &is_ctables_format))
1384 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1385 lex_ofs (ctx->lexer) - 1);
1386 add_summary_spec (sub, function, weighting, area, percentile, label,
1387 formatp, is_ctables_format, loc, sv);
1389 msg_location_destroy (loc);
1391 lex_match (ctx->lexer, T_COMMA);
1392 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1394 if (!lex_force_match (ctx->lexer, T_LBRACK))
1398 else if (lex_match (ctx->lexer, T_RBRACK))
1400 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1407 ctables_axis_destroy (sub);
1411 static const struct ctables_axis *
1412 find_scale (const struct ctables_axis *axis)
1416 else if (axis->op == CTAO_VAR)
1417 return axis->scale ? axis : NULL;
1420 for (size_t i = 0; i < 2; i++)
1422 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1430 static const struct ctables_axis *
1431 find_categorical_summary_spec (const struct ctables_axis *axis)
1435 else if (axis->op == CTAO_VAR)
1436 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1439 for (size_t i = 0; i < 2; i++)
1441 const struct ctables_axis *sum
1442 = find_categorical_summary_spec (axis->subs[i]);
1450 static struct ctables_axis *
1451 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1453 int start_ofs = lex_ofs (ctx->lexer);
1454 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1458 while (lex_match (ctx->lexer, T_GT))
1460 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1463 ctables_axis_destroy (lhs);
1467 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1468 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1470 const struct ctables_axis *outer_scale = find_scale (lhs);
1471 const struct ctables_axis *inner_scale = find_scale (rhs);
1472 if (outer_scale && inner_scale)
1474 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1475 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1476 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1477 ctables_axis_destroy (nest);
1481 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1484 msg_at (SE, nest->loc,
1485 _("Summaries may only be requested for categorical variables "
1486 "at the innermost nesting level."));
1487 msg_at (SN, outer_sum->loc,
1488 _("This outer categorical variable has a summary."));
1489 ctables_axis_destroy (nest);
1499 static struct ctables_axis *
1500 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1502 int start_ofs = lex_ofs (ctx->lexer);
1503 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1507 while (lex_match (ctx->lexer, T_PLUS))
1509 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1512 ctables_axis_destroy (lhs);
1516 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1517 ctx->lexer, start_ofs);
1524 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1525 struct ctables_axis **axisp)
1528 if (lex_token (lexer) == T_BY
1529 || lex_token (lexer) == T_SLASH
1530 || lex_token (lexer) == T_ENDCMD)
1533 struct ctables_axis_parse_ctx ctx = {
1537 *axisp = ctables_axis_parse_stack (&ctx);
1541 /* CTABLES categories. */
1543 struct ctables_categories
1546 struct ctables_category *cats;
1550 struct ctables_category
1552 enum ctables_category_type
1554 /* Explicit category lists. */
1557 CCT_NRANGE, /* Numerical range. */
1558 CCT_SRANGE, /* String range. */
1563 /* Totals and subtotals. */
1567 /* Implicit category lists. */
1572 /* For contributing to TOTALN. */
1573 CCT_EXCLUDED_MISSING,
1577 struct ctables_category *subtotal;
1583 double number; /* CCT_NUMBER. */
1584 struct substring string; /* CCT_STRING, in dictionary encoding. */
1585 double nrange[2]; /* CCT_NRANGE. */
1586 struct substring srange[2]; /* CCT_SRANGE. */
1590 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1591 bool hide_subcategories; /* CCT_SUBTOTAL. */
1594 /* CCT_POSTCOMPUTE. */
1597 const struct ctables_postcompute *pc;
1598 enum fmt_type parse_format;
1601 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1604 bool include_missing;
1605 bool sort_ascending;
1608 enum ctables_summary_function sort_function;
1609 enum ctables_weighting weighting;
1610 enum ctables_area_type area;
1611 struct variable *sort_var;
1616 /* Source location (sometimes NULL). */
1617 struct msg_location *location;
1621 ctables_category_uninit (struct ctables_category *cat)
1626 msg_location_destroy (cat->location);
1633 case CCT_POSTCOMPUTE:
1637 ss_dealloc (&cat->string);
1641 ss_dealloc (&cat->srange[0]);
1642 ss_dealloc (&cat->srange[1]);
1647 free (cat->total_label);
1655 case CCT_EXCLUDED_MISSING:
1661 nullable_substring_equal (const struct substring *a,
1662 const struct substring *b)
1664 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1668 ctables_category_equal (const struct ctables_category *a,
1669 const struct ctables_category *b)
1671 if (a->type != b->type)
1677 return a->number == b->number;
1680 return ss_equals (a->string, b->string);
1683 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1686 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1687 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1693 case CCT_POSTCOMPUTE:
1694 return a->pc == b->pc;
1698 return !strcmp (a->total_label, b->total_label);
1703 return (a->include_missing == b->include_missing
1704 && a->sort_ascending == b->sort_ascending
1705 && a->sort_function == b->sort_function
1706 && a->sort_var == b->sort_var
1707 && a->percentile == b->percentile);
1709 case CCT_EXCLUDED_MISSING:
1717 ctables_categories_unref (struct ctables_categories *c)
1722 assert (c->n_refs > 0);
1726 for (size_t i = 0; i < c->n_cats; i++)
1727 ctables_category_uninit (&c->cats[i]);
1733 ctables_categories_equal (const struct ctables_categories *a,
1734 const struct ctables_categories *b)
1736 if (a->n_cats != b->n_cats)
1739 for (size_t i = 0; i < a->n_cats; i++)
1740 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1746 static struct ctables_category
1747 cct_nrange (double low, double high)
1749 return (struct ctables_category) {
1751 .nrange = { low, high }
1755 static struct ctables_category
1756 cct_srange (struct substring low, struct substring high)
1758 return (struct ctables_category) {
1760 .srange = { low, high }
1765 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1766 struct ctables_category *cat)
1769 if (lex_match (lexer, T_EQUALS))
1771 if (!lex_force_string (lexer))
1774 total_label = ss_xstrdup (lex_tokss (lexer));
1778 total_label = xstrdup (_("Subtotal"));
1780 *cat = (struct ctables_category) {
1781 .type = CCT_SUBTOTAL,
1782 .hide_subcategories = hide_subcategories,
1783 .total_label = total_label
1789 ctables_table_parse_explicit_category (struct lexer *lexer,
1790 struct dictionary *dict,
1792 struct ctables_category *cat)
1794 if (lex_match_id (lexer, "OTHERNM"))
1795 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1796 else if (lex_match_id (lexer, "MISSING"))
1797 *cat = (struct ctables_category) { .type = CCT_MISSING };
1798 else if (lex_match_id (lexer, "SUBTOTAL"))
1799 return ctables_table_parse_subtotal (lexer, false, cat);
1800 else if (lex_match_id (lexer, "HSUBTOTAL"))
1801 return ctables_table_parse_subtotal (lexer, true, cat);
1802 else if (lex_match_id (lexer, "LO"))
1804 if (!lex_force_match_id (lexer, "THRU"))
1806 if (lex_is_string (lexer))
1808 struct substring sr0 = { .string = NULL };
1809 struct substring sr1 = parse_substring (lexer, dict);
1810 *cat = cct_srange (sr0, sr1);
1812 else if (lex_force_num (lexer))
1814 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1820 else if (lex_is_number (lexer))
1822 double number = lex_number (lexer);
1824 if (lex_match_id (lexer, "THRU"))
1826 if (lex_match_id (lexer, "HI"))
1827 *cat = cct_nrange (number, DBL_MAX);
1830 if (!lex_force_num (lexer))
1832 *cat = cct_nrange (number, lex_number (lexer));
1837 *cat = (struct ctables_category) {
1842 else if (lex_is_string (lexer))
1844 struct substring s = parse_substring (lexer, dict);
1845 if (lex_match_id (lexer, "THRU"))
1847 if (lex_match_id (lexer, "HI"))
1849 struct substring sr1 = { .string = NULL };
1850 *cat = cct_srange (s, sr1);
1854 if (!lex_force_string (lexer))
1859 struct substring sr1 = parse_substring (lexer, dict);
1860 *cat = cct_srange (s, sr1);
1864 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1866 else if (lex_match (lexer, T_AND))
1868 if (!lex_force_id (lexer))
1870 struct ctables_postcompute *pc = ctables_find_postcompute (
1871 ct, lex_tokcstr (lexer));
1874 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1875 msg_at (SE, loc, _("Unknown postcompute &%s."),
1876 lex_tokcstr (lexer));
1877 msg_location_destroy (loc);
1882 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1886 lex_error (lexer, _("Syntax error expecting category specification."));
1894 parse_category_string (struct msg_location *location,
1895 struct substring s, const struct dictionary *dict,
1896 enum fmt_type format, double *n)
1899 char *error = data_in (s, dict_get_encoding (dict), format,
1900 settings_get_fmt_settings (), &v, 0, NULL);
1903 msg_at (SE, location,
1904 _("Failed to parse category specification as format %s: %s."),
1905 fmt_name (format), error);
1914 static struct ctables_category *
1915 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1916 const struct ctables_pcexpr *e)
1918 struct ctables_category *best = NULL;
1919 size_t n_subtotals = 0;
1920 for (size_t i = 0; i < cats->n_cats; i++)
1922 struct ctables_category *cat = &cats->cats[i];
1925 case CTPO_CAT_NUMBER:
1926 if (cat->type == CCT_NUMBER && cat->number == e->number)
1930 case CTPO_CAT_STRING:
1931 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1935 case CTPO_CAT_NRANGE:
1936 if (cat->type == CCT_NRANGE
1937 && cat->nrange[0] == e->nrange[0]
1938 && cat->nrange[1] == e->nrange[1])
1942 case CTPO_CAT_SRANGE:
1943 if (cat->type == CCT_SRANGE
1944 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1945 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1949 case CTPO_CAT_MISSING:
1950 if (cat->type == CCT_MISSING)
1954 case CTPO_CAT_OTHERNM:
1955 if (cat->type == CCT_OTHERNM)
1959 case CTPO_CAT_SUBTOTAL:
1960 if (cat->type == CCT_SUBTOTAL)
1963 if (e->subtotal_index == n_subtotals)
1965 else if (e->subtotal_index == 0)
1970 case CTPO_CAT_TOTAL:
1971 if (cat->type == CCT_TOTAL)
1985 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1990 static struct ctables_category *
1991 ctables_find_category_for_postcompute (const struct dictionary *dict,
1992 const struct ctables_categories *cats,
1993 enum fmt_type parse_format,
1994 const struct ctables_pcexpr *e)
1996 if (parse_format != FMT_F)
1998 if (e->op == CTPO_CAT_STRING)
2001 if (!parse_category_string (e->location, e->string, dict,
2002 parse_format, &number))
2005 struct ctables_pcexpr e2 = {
2006 .op = CTPO_CAT_NUMBER,
2008 .location = e->location,
2010 return ctables_find_category_for_postcompute__ (cats, &e2);
2012 else if (e->op == CTPO_CAT_SRANGE)
2015 if (!e->srange[0].string)
2016 nrange[0] = -DBL_MAX;
2017 else if (!parse_category_string (e->location, e->srange[0], dict,
2018 parse_format, &nrange[0]))
2021 if (!e->srange[1].string)
2022 nrange[1] = DBL_MAX;
2023 else if (!parse_category_string (e->location, e->srange[1], dict,
2024 parse_format, &nrange[1]))
2027 struct ctables_pcexpr e2 = {
2028 .op = CTPO_CAT_NRANGE,
2029 .nrange = { nrange[0], nrange[1] },
2030 .location = e->location,
2032 return ctables_find_category_for_postcompute__ (cats, &e2);
2035 return ctables_find_category_for_postcompute__ (cats, e);
2038 static struct substring
2039 rtrim_value (const union value *v, const struct variable *var)
2041 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2042 var_get_width (var));
2043 ss_rtrim (&s, ss_cstr (" "));
2048 in_string_range (const union value *v, const struct variable *var,
2049 const struct substring *srange)
2051 struct substring s = rtrim_value (v, var);
2052 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2053 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2056 static const struct ctables_category *
2057 ctables_categories_match (const struct ctables_categories *c,
2058 const union value *v, const struct variable *var)
2060 if (var_is_numeric (var) && v->f == SYSMIS)
2063 const struct ctables_category *othernm = NULL;
2064 for (size_t i = c->n_cats; i-- > 0; )
2066 const struct ctables_category *cat = &c->cats[i];
2070 if (cat->number == v->f)
2075 if (ss_equals (cat->string, rtrim_value (v, var)))
2080 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2081 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2086 if (in_string_range (v, var, cat->srange))
2091 if (var_is_value_missing (var, v))
2095 case CCT_POSTCOMPUTE:
2110 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2113 case CCT_EXCLUDED_MISSING:
2118 return var_is_value_missing (var, v) ? NULL : othernm;
2121 static const struct ctables_category *
2122 ctables_categories_total (const struct ctables_categories *c)
2124 const struct ctables_category *first = &c->cats[0];
2125 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2126 return (first->type == CCT_TOTAL ? first
2127 : last->type == CCT_TOTAL ? last
2132 ctables_category_format_number (double number, const struct variable *var,
2135 struct pivot_value *pv = pivot_value_new_var_value (
2136 var, &(union value) { .f = number });
2137 pivot_value_format (pv, NULL, s);
2138 pivot_value_destroy (pv);
2142 ctables_category_format_string (struct substring string,
2143 const struct variable *var, struct string *out)
2145 int width = var_get_width (var);
2146 char *s = xmalloc (width);
2147 buf_copy_rpad (s, width, string.string, string.length, ' ');
2148 struct pivot_value *pv = pivot_value_new_var_value (
2149 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
2150 pivot_value_format (pv, NULL, out);
2151 pivot_value_destroy (pv);
2156 ctables_category_format_label (const struct ctables_category *cat,
2157 const struct variable *var,
2163 ctables_category_format_number (cat->number, var, s);
2167 ctables_category_format_string (cat->string, var, s);
2171 ctables_category_format_number (cat->nrange[0], var, s);
2172 ds_put_format (s, " THRU ");
2173 ctables_category_format_number (cat->nrange[1], var, s);
2177 ctables_category_format_string (cat->srange[0], var, s);
2178 ds_put_format (s, " THRU ");
2179 ctables_category_format_string (cat->srange[1], var, s);
2183 ds_put_cstr (s, "MISSING");
2187 ds_put_cstr (s, "OTHERNM");
2190 case CCT_POSTCOMPUTE:
2191 ds_put_format (s, "&%s", cat->pc->name);
2196 ds_put_cstr (s, cat->total_label);
2202 case CCT_EXCLUDED_MISSING:
2210 ctables_recursive_check_postcompute (struct dictionary *dict,
2211 const struct ctables_pcexpr *e,
2212 struct ctables_category *pc_cat,
2213 const struct ctables_categories *cats,
2214 const struct msg_location *cats_location)
2218 case CTPO_CAT_NUMBER:
2219 case CTPO_CAT_STRING:
2220 case CTPO_CAT_NRANGE:
2221 case CTPO_CAT_SRANGE:
2222 case CTPO_CAT_MISSING:
2223 case CTPO_CAT_OTHERNM:
2224 case CTPO_CAT_SUBTOTAL:
2225 case CTPO_CAT_TOTAL:
2227 struct ctables_category *cat = ctables_find_category_for_postcompute (
2228 dict, cats, pc_cat->parse_format, e);
2231 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2233 size_t n_subtotals = 0;
2234 for (size_t i = 0; i < cats->n_cats; i++)
2235 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2236 if (n_subtotals > 1)
2238 msg_at (SE, cats_location,
2239 ngettext ("These categories include %zu instance "
2240 "of SUBTOTAL or HSUBTOTAL, so references "
2241 "from computed categories must refer to "
2242 "subtotals by position, "
2243 "e.g. SUBTOTAL[1].",
2244 "These categories include %zu instances "
2245 "of SUBTOTAL or HSUBTOTAL, so references "
2246 "from computed categories must refer to "
2247 "subtotals by position, "
2248 "e.g. SUBTOTAL[1].",
2251 msg_at (SN, e->location,
2252 _("This is the reference that lacks a position."));
2257 msg_at (SE, pc_cat->location,
2258 _("Computed category &%s references a category not included "
2259 "in the category list."),
2261 msg_at (SN, e->location, _("This is the missing category."));
2262 if (e->op == CTPO_CAT_SUBTOTAL)
2263 msg_at (SN, cats_location,
2264 _("To fix the problem, add subtotals to the "
2265 "list of categories here."));
2266 else if (e->op == CTPO_CAT_TOTAL)
2267 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2268 "CATEGORIES specification."));
2270 msg_at (SN, cats_location,
2271 _("To fix the problem, add the missing category to the "
2272 "list of categories here."));
2275 if (pc_cat->pc->hide_source_cats)
2289 for (size_t i = 0; i < 2; i++)
2290 if (e->subs[i] && !ctables_recursive_check_postcompute (
2291 dict, e->subs[i], pc_cat, cats, cats_location))
2299 static struct pivot_value *
2300 ctables_postcompute_label (const struct ctables_categories *cats,
2301 const struct ctables_category *cat,
2302 const struct variable *var)
2304 struct substring in = ss_cstr (cat->pc->label);
2305 struct substring target = ss_cstr (")LABEL[");
2307 struct string out = DS_EMPTY_INITIALIZER;
2310 size_t chunk = ss_find_substring (in, target);
2311 if (chunk == SIZE_MAX)
2313 if (ds_is_empty (&out))
2314 return pivot_value_new_user_text (in.string, in.length);
2317 ds_put_substring (&out, in);
2318 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
2322 ds_put_substring (&out, ss_head (in, chunk));
2323 ss_advance (&in, chunk + target.length);
2325 struct substring idx_s;
2326 if (!ss_get_until (&in, ']', &idx_s))
2329 long int idx = strtol (idx_s.string, &tail, 10);
2330 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
2333 struct ctables_category *cat2 = &cats->cats[idx - 1];
2334 if (!ctables_category_format_label (cat2, var, &out))
2340 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
2343 static struct pivot_value *
2344 ctables_category_create_value_label (const struct ctables_categories *cats,
2345 const struct ctables_category *cat,
2346 const struct variable *var,
2347 const union value *value)
2349 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
2350 ? ctables_postcompute_label (cats, cat, var)
2351 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
2352 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
2353 : pivot_value_new_var_value (var, value));
2356 /* CTABLES variable nesting and stacking. */
2358 /* A nested sequence of variables, e.g. a > b > c. */
2361 struct variable **vars;
2365 size_t *areas[N_CTATS];
2366 size_t n_areas[N_CTATS];
2369 struct ctables_summary_spec_set specs[N_CSVS];
2372 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2373 struct ctables_stack
2375 struct ctables_nest *nests;
2380 ctables_nest_uninit (struct ctables_nest *nest)
2383 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2384 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2385 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2386 free (nest->areas[at]);
2390 ctables_stack_uninit (struct ctables_stack *stack)
2394 for (size_t i = 0; i < stack->n; i++)
2395 ctables_nest_uninit (&stack->nests[i]);
2396 free (stack->nests);
2400 static struct ctables_stack
2401 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2408 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2409 for (size_t i = 0; i < s0.n; i++)
2410 for (size_t j = 0; j < s1.n; j++)
2412 const struct ctables_nest *a = &s0.nests[i];
2413 const struct ctables_nest *b = &s1.nests[j];
2415 size_t allocate = a->n + b->n;
2416 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2418 for (size_t k = 0; k < a->n; k++)
2419 vars[n++] = a->vars[k];
2420 for (size_t k = 0; k < b->n; k++)
2421 vars[n++] = b->vars[k];
2422 assert (n == allocate);
2424 const struct ctables_nest *summary_src;
2425 if (!a->specs[CSV_CELL].var)
2427 else if (!b->specs[CSV_CELL].var)
2432 struct ctables_nest *new = &stack.nests[stack.n++];
2433 *new = (struct ctables_nest) {
2435 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2436 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2438 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2439 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2443 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2444 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2446 ctables_stack_uninit (&s0);
2447 ctables_stack_uninit (&s1);
2451 static struct ctables_stack
2452 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2454 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2455 for (size_t i = 0; i < s0.n; i++)
2456 stack.nests[stack.n++] = s0.nests[i];
2457 for (size_t i = 0; i < s1.n; i++)
2459 stack.nests[stack.n] = s1.nests[i];
2460 stack.nests[stack.n].group_head += s0.n;
2463 assert (stack.n == s0.n + s1.n);
2469 static struct ctables_stack
2470 var_fts (const struct ctables_axis *a)
2472 struct variable **vars = xmalloc (sizeof *vars);
2475 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2476 struct ctables_nest *nest = xmalloc (sizeof *nest);
2477 *nest = (struct ctables_nest) {
2480 .scale_idx = a->scale ? 0 : SIZE_MAX,
2481 .summary_idx = is_summary ? 0 : SIZE_MAX,
2484 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2486 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2487 nest->specs[sv].var = a->var;
2488 nest->specs[sv].is_scale = a->scale;
2490 return (struct ctables_stack) { .nests = nest, .n = 1 };
2493 static struct ctables_stack
2494 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2497 return (struct ctables_stack) { .n = 0 };
2505 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2506 enumerate_fts (axis_type, a->subs[1]));
2509 /* This should consider any of the scale variables found in the result to
2510 be linked to each other listwise for SMISSING=LISTWISE. */
2511 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2512 enumerate_fts (axis_type, a->subs[1]));
2518 /* CTABLES summary calculation. */
2520 union ctables_summary
2522 /* COUNT, VALIDN, TOTALN. */
2525 /* MINIMUM, MAXIMUM, RANGE. */
2532 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2533 struct moments1 *moments;
2535 /* MEDIAN, MODE, PTILE. */
2538 struct casewriter *writer;
2545 ctables_summary_init (union ctables_summary *s,
2546 const struct ctables_summary_spec *ss)
2548 switch (ss->function)
2551 case CTSF_areaPCT_COUNT:
2552 case CTSF_areaPCT_VALIDN:
2553 case CTSF_areaPCT_TOTALN:
2566 s->min = s->max = SYSMIS;
2571 case CTSF_areaPCT_SUM:
2572 s->moments = moments1_create (MOMENT_MEAN);
2578 s->moments = moments1_create (MOMENT_VARIANCE);
2585 struct caseproto *proto = caseproto_create ();
2586 proto = caseproto_add_width (proto, 0);
2587 proto = caseproto_add_width (proto, 0);
2589 struct subcase ordering;
2590 subcase_init (&ordering, 0, 0, SC_ASCEND);
2591 s->writer = sort_create_writer (&ordering, proto);
2592 subcase_uninit (&ordering);
2593 caseproto_unref (proto);
2603 ctables_summary_uninit (union ctables_summary *s,
2604 const struct ctables_summary_spec *ss)
2606 switch (ss->function)
2609 case CTSF_areaPCT_COUNT:
2610 case CTSF_areaPCT_VALIDN:
2611 case CTSF_areaPCT_TOTALN:
2630 case CTSF_areaPCT_SUM:
2631 moments1_destroy (s->moments);
2637 casewriter_destroy (s->writer);
2643 ctables_summary_add (union ctables_summary *s,
2644 const struct ctables_summary_spec *ss,
2645 const union value *value,
2646 bool is_missing, bool is_included,
2649 /* To determine whether a case is included in a given table for a particular
2650 kind of summary, consider the following charts for the variable being
2651 summarized. Only if "yes" appears is the case counted.
2653 Categorical variables: VALIDN other TOTALN
2654 Valid values in included categories yes yes yes
2655 Missing values in included categories --- yes yes
2656 Missing values in excluded categories --- --- yes
2657 Valid values in excluded categories --- --- ---
2659 Scale variables: VALIDN other TOTALN
2660 Valid value yes yes yes
2661 Missing value --- yes yes
2663 Missing values include both user- and system-missing. (The system-missing
2664 value is always in an excluded category.)
2666 One way to interpret the above table is that scale variables are like
2667 categorical variables in which all values are in included categories.
2669 switch (ss->function)
2672 case CTSF_areaPCT_TOTALN:
2677 case CTSF_areaPCT_COUNT:
2683 case CTSF_areaPCT_VALIDN:
2701 if (s->min == SYSMIS || value->f < s->min)
2703 if (s->max == SYSMIS || value->f > s->max)
2714 moments1_add (s->moments, value->f, weight);
2717 case CTSF_areaPCT_SUM:
2719 moments1_add (s->moments, value->f, weight);
2727 s->ovalid += weight;
2729 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2730 *case_num_rw_idx (c, 0) = value->f;
2731 *case_num_rw_idx (c, 1) = weight;
2732 casewriter_write (s->writer, c);
2739 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2740 union ctables_summary *s,
2741 const struct ctables_summary_spec *ss)
2743 switch (ss->function)
2749 return areas[ss->calc_area]->sequence;
2751 case CTSF_areaPCT_COUNT:
2753 const struct ctables_area *a = areas[ss->calc_area];
2754 double a_count = a->count[ss->weighting];
2755 return a_count ? s->count / a_count * 100 : SYSMIS;
2758 case CTSF_areaPCT_VALIDN:
2760 const struct ctables_area *a = areas[ss->calc_area];
2761 double a_valid = a->valid[ss->weighting];
2762 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2765 case CTSF_areaPCT_TOTALN:
2767 const struct ctables_area *a = areas[ss->calc_area];
2768 double a_total = a->total[ss->weighting];
2769 return a_total ? s->count / a_total * 100 : SYSMIS;
2784 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2789 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2795 double weight, variance;
2796 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2797 return calc_semean (variance, weight);
2803 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2804 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2809 double weight, mean;
2810 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2811 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2817 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2821 case CTSF_areaPCT_SUM:
2823 double weight, mean;
2824 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2825 if (weight == SYSMIS || mean == SYSMIS)
2828 const struct ctables_area *a = areas[ss->calc_area];
2829 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2830 double denom = sum->sum[ss->weighting];
2831 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2838 struct casereader *reader = casewriter_make_reader (s->writer);
2841 struct percentile *ptile = percentile_create (
2842 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2843 struct order_stats *os = &ptile->parent;
2844 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2845 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2846 statistic_destroy (&ptile->parent.parent);
2853 struct casereader *reader = casewriter_make_reader (s->writer);
2856 struct mode *mode = mode_create ();
2857 struct order_stats *os = &mode->parent;
2858 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2859 s->ovalue = mode->mode;
2860 statistic_destroy (&mode->parent.parent);
2868 /* CTABLES occurrences. */
2870 struct ctables_occurrence
2872 struct hmap_node node;
2877 ctables_add_occurrence (const struct variable *var,
2878 const union value *value,
2879 struct hmap *occurrences)
2881 int width = var_get_width (var);
2882 unsigned int hash = value_hash (value, width, 0);
2884 struct ctables_occurrence *o;
2885 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
2887 if (value_equal (value, &o->value, width))
2890 o = xmalloc (sizeof *o);
2891 value_clone (&o->value, value, width);
2892 hmap_insert (occurrences, &o->node, hash);
2897 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2898 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2899 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2900 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2905 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2906 all the axes (except the scalar variable, if any). */
2907 struct hmap_node node;
2908 struct ctables_section *section;
2910 /* The areas that contain this cell. */
2911 uint32_t omit_areas;
2912 struct ctables_area *areas[N_CTATS];
2917 enum ctables_summary_variant sv;
2919 struct ctables_cell_axis
2921 struct ctables_cell_value
2923 const struct ctables_category *category;
2931 union ctables_summary *summaries;
2934 struct ctables_section
2937 struct ctables_table *table;
2938 struct ctables_nest *nests[PIVOT_N_AXES];
2941 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2942 struct hmap cells; /* Contains "struct ctables_cell"s. */
2943 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2946 static void ctables_section_uninit (struct ctables_section *);
2948 struct ctables_table
2950 struct ctables *ctables;
2951 struct ctables_axis *axes[PIVOT_N_AXES];
2952 struct ctables_stack stacks[PIVOT_N_AXES];
2953 struct ctables_section *sections;
2955 enum pivot_axis_type summary_axis;
2956 struct ctables_summary_spec_set summary_specs;
2957 struct variable **sum_vars;
2960 enum pivot_axis_type slabels_axis;
2961 bool slabels_visible;
2963 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2965 Most commonly, label_axis[a] == a, and in particular we always have
2966 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2968 If ROWLABELS or COLLABELS is specified, then one of
2969 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2970 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2972 If any category labels are moved, then 'clabels_example' is one of the
2973 variables being moved (and it is otherwise NULL). All of the variables
2974 being moved have the same width, value labels, and categories, so this
2975 example variable can be used to find those out.
2977 The remaining members in this group are relevant only if category labels
2980 'clabels_values_map' holds a "struct ctables_value" for all the values
2981 that appear in all of the variables in the moved categories. It is
2982 accumulated as the data is read. Once the data is fully read, its
2983 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2985 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2986 enum pivot_axis_type clabels_from_axis;
2987 enum pivot_axis_type clabels_to_axis;
2988 int clabels_start_ofs, clabels_end_ofs;
2989 const struct variable *clabels_example;
2990 struct hmap clabels_values_map;
2991 struct ctables_value **clabels_values;
2992 size_t n_clabels_values;
2994 /* Indexed by variable dictionary index. */
2995 struct ctables_categories **categories;
2996 size_t n_categories;
3005 struct ctables_chisq *chisq;
3006 struct ctables_pairwise *pairwise;
3009 struct ctables_cell_sort_aux
3011 const struct ctables_nest *nest;
3012 enum pivot_axis_type a;
3016 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3018 const struct ctables_cell_sort_aux *aux = aux_;
3019 struct ctables_cell *const *ap = a_;
3020 struct ctables_cell *const *bp = b_;
3021 const struct ctables_cell *a = *ap;
3022 const struct ctables_cell *b = *bp;
3024 const struct ctables_nest *nest = aux->nest;
3025 for (size_t i = 0; i < nest->n; i++)
3026 if (i != nest->scale_idx)
3028 const struct variable *var = nest->vars[i];
3029 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3030 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3031 if (a_cv->category != b_cv->category)
3032 return a_cv->category > b_cv->category ? 1 : -1;
3034 const union value *a_val = &a_cv->value;
3035 const union value *b_val = &b_cv->value;
3036 switch (a_cv->category->type)
3042 case CCT_POSTCOMPUTE:
3043 case CCT_EXCLUDED_MISSING:
3044 /* Must be equal. */
3052 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3060 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3062 return a_cv->category->sort_ascending ? cmp : -cmp;
3068 const char *a_label = var_lookup_value_label (var, a_val);
3069 const char *b_label = var_lookup_value_label (var, b_val);
3075 cmp = strcmp (a_label, b_label);
3081 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3084 return a_cv->category->sort_ascending ? cmp : -cmp;
3095 static struct ctables_area *
3096 ctables_area_insert (struct ctables_cell *cell, enum ctables_area_type area)
3098 struct ctables_section *s = cell->section;
3100 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3102 const struct ctables_nest *nest = s->nests[a];
3103 for (size_t i = 0; i < nest->n_areas[area]; i++)
3105 size_t v_idx = nest->areas[area][i];
3106 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3107 hash = hash_pointer (cv->category, hash);
3108 if (cv->category->type != CCT_TOTAL
3109 && cv->category->type != CCT_SUBTOTAL
3110 && cv->category->type != CCT_POSTCOMPUTE)
3111 hash = value_hash (&cv->value,
3112 var_get_width (nest->vars[v_idx]), hash);
3116 struct ctables_area *a;
3117 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3119 const struct ctables_cell *df = a->example;
3120 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3122 const struct ctables_nest *nest = s->nests[a];
3123 for (size_t i = 0; i < nest->n_areas[area]; i++)
3125 size_t v_idx = nest->areas[area][i];
3126 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3127 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3128 if (cv1->category != cv2->category
3129 || (cv1->category->type != CCT_TOTAL
3130 && cv1->category->type != CCT_SUBTOTAL
3131 && cv1->category->type != CCT_POSTCOMPUTE
3132 && !value_equal (&cv1->value, &cv2->value,
3133 var_get_width (nest->vars[v_idx]))))
3142 struct ctables_sum *sums = (s->table->n_sum_vars
3143 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3146 a = xmalloc (sizeof *a);
3147 *a = (struct ctables_area) { .example = cell, .sums = sums };
3148 hmap_insert (&s->areas[area], &a->node, hash);
3152 static struct ctables_cell *
3153 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3154 const struct ctables_category **cats[PIVOT_N_AXES])
3157 enum ctables_summary_variant sv = CSV_CELL;
3158 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3160 const struct ctables_nest *nest = s->nests[a];
3161 for (size_t i = 0; i < nest->n; i++)
3162 if (i != nest->scale_idx)
3164 hash = hash_pointer (cats[a][i], hash);
3165 if (cats[a][i]->type != CCT_TOTAL
3166 && cats[a][i]->type != CCT_SUBTOTAL
3167 && cats[a][i]->type != CCT_POSTCOMPUTE)
3168 hash = value_hash (case_data (c, nest->vars[i]),
3169 var_get_width (nest->vars[i]), hash);
3175 struct ctables_cell *cell;
3176 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3178 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3180 const struct ctables_nest *nest = s->nests[a];
3181 for (size_t i = 0; i < nest->n; i++)
3182 if (i != nest->scale_idx
3183 && (cats[a][i] != cell->axes[a].cvs[i].category
3184 || (cats[a][i]->type != CCT_TOTAL
3185 && cats[a][i]->type != CCT_SUBTOTAL
3186 && cats[a][i]->type != CCT_POSTCOMPUTE
3187 && !value_equal (case_data (c, nest->vars[i]),
3188 &cell->axes[a].cvs[i].value,
3189 var_get_width (nest->vars[i])))))
3198 cell = xmalloc (sizeof *cell);
3202 cell->omit_areas = 0;
3203 cell->postcompute = false;
3204 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3206 const struct ctables_nest *nest = s->nests[a];
3207 cell->axes[a].cvs = (nest->n
3208 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3210 for (size_t i = 0; i < nest->n; i++)
3212 const struct ctables_category *cat = cats[a][i];
3213 const struct variable *var = nest->vars[i];
3214 const union value *value = case_data (c, var);
3215 if (i != nest->scale_idx)
3217 const struct ctables_category *subtotal = cat->subtotal;
3218 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3221 if (cat->type == CCT_TOTAL
3222 || cat->type == CCT_SUBTOTAL
3223 || cat->type == CCT_POSTCOMPUTE)
3227 case PIVOT_AXIS_COLUMN:
3228 cell->omit_areas |= ((1u << CTAT_TABLE) |
3229 (1u << CTAT_LAYER) |
3230 (1u << CTAT_LAYERCOL) |
3231 (1u << CTAT_SUBTABLE) |
3234 case PIVOT_AXIS_ROW:
3235 cell->omit_areas |= ((1u << CTAT_TABLE) |
3236 (1u << CTAT_LAYER) |
3237 (1u << CTAT_LAYERROW) |
3238 (1u << CTAT_SUBTABLE) |
3241 case PIVOT_AXIS_LAYER:
3242 cell->omit_areas |= ((1u << CTAT_TABLE) |
3243 (1u << CTAT_LAYER));
3247 if (cat->type == CCT_POSTCOMPUTE)
3248 cell->postcompute = true;
3251 cell->axes[a].cvs[i].category = cat;
3252 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3256 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3257 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3258 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3259 for (size_t i = 0; i < specs->n; i++)
3260 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3261 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3262 cell->areas[at] = ctables_area_insert (cell, at);
3263 hmap_insert (&s->cells, &cell->node, hash);
3268 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3270 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3275 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3276 const struct ctables_category **cats[PIVOT_N_AXES],
3277 bool is_included, double weight[N_CTWS])
3279 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3280 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3282 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3283 const union value *value = case_data (c, specs->var);
3284 bool is_missing = var_is_value_missing (specs->var, value);
3285 bool is_scale_missing
3286 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3288 for (size_t i = 0; i < specs->n; i++)
3289 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3290 is_scale_missing, is_included,
3291 weight[specs->specs[i].weighting]);
3292 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3293 if (!(cell->omit_areas && (1u << at)))
3295 struct ctables_area *a = cell->areas[at];
3297 add_weight (a->total, weight);
3299 add_weight (a->count, weight);
3302 add_weight (a->valid, weight);
3304 if (!is_scale_missing)
3305 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3307 const struct variable *var = s->table->sum_vars[i];
3308 double addend = case_num (c, var);
3309 if (!var_is_num_missing (var, addend))
3310 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3311 a->sums[i].sum[wt] += addend * weight[wt];
3318 recurse_totals (struct ctables_section *s, const struct ccase *c,
3319 const struct ctables_category **cats[PIVOT_N_AXES],
3320 bool is_included, double weight[N_CTWS],
3321 enum pivot_axis_type start_axis, size_t start_nest)
3323 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3325 const struct ctables_nest *nest = s->nests[a];
3326 for (size_t i = start_nest; i < nest->n; i++)
3328 if (i == nest->scale_idx)
3331 const struct variable *var = nest->vars[i];
3333 const struct ctables_category *total = ctables_categories_total (
3334 s->table->categories[var_get_dict_index (var)]);
3337 const struct ctables_category *save = cats[a][i];
3339 ctables_cell_add__ (s, c, cats, is_included, weight);
3340 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3349 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3350 const struct ctables_category **cats[PIVOT_N_AXES],
3351 bool is_included, double weight[N_CTWS],
3352 enum pivot_axis_type start_axis, size_t start_nest)
3354 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3356 const struct ctables_nest *nest = s->nests[a];
3357 for (size_t i = start_nest; i < nest->n; i++)
3359 if (i == nest->scale_idx)
3362 const struct ctables_category *save = cats[a][i];
3365 cats[a][i] = save->subtotal;
3366 ctables_cell_add__ (s, c, cats, is_included, weight);
3367 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3376 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3377 double weight[N_CTWS])
3379 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3380 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3381 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3382 const struct ctables_category **cats[PIVOT_N_AXES] =
3384 [PIVOT_AXIS_LAYER] = layer_cats,
3385 [PIVOT_AXIS_ROW] = row_cats,
3386 [PIVOT_AXIS_COLUMN] = column_cats,
3389 bool is_included = true;
3391 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3393 const struct ctables_nest *nest = s->nests[a];
3394 for (size_t i = 0; i < nest->n; i++)
3395 if (i != nest->scale_idx)
3397 const struct variable *var = nest->vars[i];
3398 const union value *value = case_data (c, var);
3400 cats[a][i] = ctables_categories_match (
3401 s->table->categories[var_get_dict_index (var)], value, var);
3404 if (i != nest->summary_idx)
3407 if (!var_is_value_missing (var, value))
3410 static const struct ctables_category cct_excluded_missing = {
3411 .type = CCT_EXCLUDED_MISSING,
3414 cats[a][i] = &cct_excluded_missing;
3415 is_included = false;
3421 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3423 const struct ctables_nest *nest = s->nests[a];
3424 for (size_t i = 0; i < nest->n; i++)
3425 if (i != nest->scale_idx)
3427 const struct variable *var = nest->vars[i];
3428 const union value *value = case_data (c, var);
3429 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3433 ctables_cell_add__ (s, c, cats, is_included, weight);
3434 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3435 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3438 struct ctables_value
3440 struct hmap_node node;
3445 static struct ctables_value *
3446 ctables_value_find__ (const struct ctables_table *t, const union value *value,
3447 int width, unsigned int hash)
3449 struct ctables_value *clv;
3450 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3451 hash, &t->clabels_values_map)
3452 if (value_equal (value, &clv->value, width))
3458 ctables_value_insert (struct ctables_table *t, const union value *value,
3461 unsigned int hash = value_hash (value, width, 0);
3462 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3465 clv = xmalloc (sizeof *clv);
3466 value_clone (&clv->value, value, width);
3467 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3471 static const struct ctables_value *
3472 ctables_value_find (const struct ctables_cell *cell)
3474 const struct ctables_section *s = cell->section;
3475 const struct ctables_table *t = s->table;
3476 if (!t->clabels_example)
3479 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3480 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3481 const union value *value
3482 = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3483 int width = var_get_width (var);
3484 const struct ctables_value *ctv = ctables_value_find__ (
3485 t, value, width, value_hash (value, width, 0));
3486 assert (ctv != NULL);
3491 compare_ctables_values_3way (const void *a_, const void *b_, const void *width_)
3493 const struct ctables_value *const *ap = a_;
3494 const struct ctables_value *const *bp = b_;
3495 const struct ctables_value *a = *ap;
3496 const struct ctables_value *b = *bp;
3497 const int *width = width_;
3498 return value_compare_3way (&a->value, &b->value, *width);
3502 ctables_sort_clabels_values (struct ctables_table *t)
3504 const struct variable *v0 = t->clabels_example;
3505 int width = var_get_width (v0);
3507 size_t i0 = var_get_dict_index (v0);
3508 struct ctables_categories *c0 = t->categories[i0];
3509 if (t->show_empty[i0])
3511 const struct val_labs *val_labs = var_get_value_labels (v0);
3512 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
3513 vl = val_labs_next (val_labs, vl))
3514 if (ctables_categories_match (c0, &vl->value, v0))
3515 ctables_value_insert (t, &vl->value, width);
3518 size_t n = hmap_count (&t->clabels_values_map);
3519 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
3521 struct ctables_value *clv;
3523 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
3524 t->clabels_values[i++] = clv;
3525 t->n_clabels_values = n;
3528 sort (t->clabels_values, n, sizeof *t->clabels_values,
3529 compare_ctables_values_3way, &width);
3531 for (size_t i = 0; i < n; i++)
3532 t->clabels_values[i]->leaf = i;
3537 const struct dictionary *dict;
3538 struct pivot_table_look *look;
3540 /* For CTEF_* formats. */
3541 struct fmt_settings ctables_formats;
3543 /* If this is NULL, zeros are displayed using the normal print format.
3544 Otherwise, this string is displayed. */
3547 /* If this is NULL, missing values are displayed using the normal print
3548 format. Otherwise, this string is displayed. */
3551 /* Indexed by variable dictionary index. */
3552 enum ctables_vlabel *vlabels;
3554 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
3556 bool mrsets_count_duplicates; /* MRSETS. */
3557 bool smissing_listwise; /* SMISSING. */
3558 struct variable *e_weight; /* WEIGHT. */
3559 int hide_threshold; /* HIDESMALLCOUNTS. */
3561 struct ctables_table **tables;
3566 ctpo_add (double a, double b)
3572 ctpo_sub (double a, double b)
3578 ctpo_mul (double a, double b)
3584 ctpo_div (double a, double b)
3586 return b ? a / b : SYSMIS;
3590 ctpo_pow (double a, double b)
3592 int save_errno = errno;
3594 double result = pow (a, b);
3602 ctpo_neg (double a, double b UNUSED)
3607 struct ctables_pcexpr_evaluate_ctx
3609 const struct ctables_cell *cell;
3610 const struct ctables_section *section;
3611 const struct ctables_categories *cats;
3612 enum pivot_axis_type pc_a;
3615 enum fmt_type parse_format;
3618 static double ctables_pcexpr_evaluate (
3619 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3622 ctables_pcexpr_evaluate_nonterminal (
3623 const struct ctables_pcexpr_evaluate_ctx *ctx,
3624 const struct ctables_pcexpr *e, size_t n_args,
3625 double evaluate (double, double))
3627 double args[2] = { 0, 0 };
3628 for (size_t i = 0; i < n_args; i++)
3630 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3631 if (!isfinite (args[i]) || args[i] == SYSMIS)
3634 return evaluate (args[0], args[1]);
3638 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3639 const struct ctables_cell_value *pc_cv)
3641 const struct ctables_section *s = ctx->section;
3644 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3646 const struct ctables_nest *nest = s->nests[a];
3647 for (size_t i = 0; i < nest->n; i++)
3648 if (i != nest->scale_idx)
3650 const struct ctables_cell_value *cv
3651 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3652 : &ctx->cell->axes[a].cvs[i]);
3653 hash = hash_pointer (cv->category, hash);
3654 if (cv->category->type != CCT_TOTAL
3655 && cv->category->type != CCT_SUBTOTAL
3656 && cv->category->type != CCT_POSTCOMPUTE)
3657 hash = value_hash (&cv->value,
3658 var_get_width (nest->vars[i]), hash);
3662 struct ctables_cell *tc;
3663 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3665 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3667 const struct ctables_nest *nest = s->nests[a];
3668 for (size_t i = 0; i < nest->n; i++)
3669 if (i != nest->scale_idx)
3671 const struct ctables_cell_value *p_cv
3672 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3673 : &ctx->cell->axes[a].cvs[i]);
3674 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3675 if (p_cv->category != t_cv->category
3676 || (p_cv->category->type != CCT_TOTAL
3677 && p_cv->category->type != CCT_SUBTOTAL
3678 && p_cv->category->type != CCT_POSTCOMPUTE
3679 && !value_equal (&p_cv->value,
3681 var_get_width (nest->vars[i]))))
3693 const struct ctables_table *t = s->table;
3694 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3695 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3696 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
3697 &specs->specs[ctx->summary_idx]);
3701 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3702 const struct ctables_pcexpr *e)
3709 case CTPO_CAT_NRANGE:
3710 case CTPO_CAT_SRANGE:
3711 case CTPO_CAT_MISSING:
3712 case CTPO_CAT_OTHERNM:
3714 struct ctables_cell_value cv = {
3715 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3717 assert (cv.category != NULL);
3719 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3720 const struct ctables_occurrence *o;
3723 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3724 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3725 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3727 cv.value = o->value;
3728 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3733 case CTPO_CAT_NUMBER:
3734 case CTPO_CAT_SUBTOTAL:
3735 case CTPO_CAT_TOTAL:
3737 struct ctables_cell_value cv = {
3738 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3739 .value = { .f = e->number },
3741 assert (cv.category != NULL);
3742 return ctables_pcexpr_evaluate_category (ctx, &cv);
3745 case CTPO_CAT_STRING:
3747 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3749 if (width > e->string.length)
3751 s = xmalloc (width);
3752 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3755 const struct ctables_category *category
3756 = ctables_find_category_for_postcompute (
3757 ctx->section->table->ctables->dict,
3758 ctx->cats, ctx->parse_format, e);
3759 assert (category != NULL);
3761 struct ctables_cell_value cv = { .category = category };
3762 if (category->type == CCT_NUMBER)
3763 cv.value.f = category->number;
3764 else if (category->type == CCT_STRING)
3765 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3769 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3775 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3778 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3781 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3784 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3787 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3790 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3796 static const struct ctables_category *
3797 ctables_cell_postcompute (const struct ctables_section *s,
3798 const struct ctables_cell *cell,
3799 enum pivot_axis_type *pc_a_p,
3802 assert (cell->postcompute);
3803 const struct ctables_category *pc_cat = NULL;
3804 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3805 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3807 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3808 if (cv->category->type == CCT_POSTCOMPUTE)
3812 /* Multiple postcomputes cross each other. The value is
3817 pc_cat = cv->category;
3821 *pc_a_idx_p = pc_a_idx;
3825 assert (pc_cat != NULL);
3830 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3831 const struct ctables_cell *cell,
3832 const struct ctables_summary_spec *ss,
3833 struct fmt_spec *format,
3834 bool *is_ctables_format,
3837 enum pivot_axis_type pc_a = 0;
3838 size_t pc_a_idx = 0;
3839 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3840 s, cell, &pc_a, &pc_a_idx);
3844 const struct ctables_postcompute *pc = pc_cat->pc;
3847 for (size_t i = 0; i < pc->specs->n; i++)
3849 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3850 if (ss->function == ss2->function
3851 && ss->weighting == ss2->weighting
3852 && ss->calc_area == ss2->calc_area
3853 && ss->percentile == ss2->percentile)
3855 *format = ss2->format;
3856 *is_ctables_format = ss2->is_ctables_format;
3862 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3863 const struct ctables_categories *cats = s->table->categories[
3864 var_get_dict_index (var)];
3865 struct ctables_pcexpr_evaluate_ctx ctx = {
3870 .pc_a_idx = pc_a_idx,
3871 .summary_idx = summary_idx,
3872 .parse_format = pc_cat->parse_format,
3874 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3877 /* Chi-square test (SIGTEST). */
3878 struct ctables_chisq
3881 bool include_mrsets;
3885 /* Pairwise comparison test (COMPARETEST). */
3886 struct ctables_pairwise
3888 enum { PROP, MEAN } type;
3890 bool include_mrsets;
3891 bool meansvariance_allcats;
3893 enum { BONFERRONI = 1, BH } adjust;
3902 parse_col_width (struct lexer *lexer, const char *name, double *width)
3904 lex_match (lexer, T_EQUALS);
3905 if (lex_match_id (lexer, "DEFAULT"))
3907 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
3909 *width = lex_number (lexer);
3919 parse_bool (struct lexer *lexer, bool *b)
3921 if (lex_match_id (lexer, "NO"))
3923 else if (lex_match_id (lexer, "YES"))
3927 lex_error_expecting (lexer, "YES", "NO");
3934 ctables_chisq_destroy (struct ctables_chisq *chisq)
3940 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
3946 ctables_table_destroy (struct ctables_table *t)
3951 for (size_t i = 0; i < t->n_sections; i++)
3952 ctables_section_uninit (&t->sections[i]);
3955 for (size_t i = 0; i < t->n_categories; i++)
3956 ctables_categories_unref (t->categories[i]);
3957 free (t->categories);
3958 free (t->show_empty);
3960 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3962 ctables_axis_destroy (t->axes[a]);
3963 ctables_stack_uninit (&t->stacks[a]);
3965 free (t->summary_specs.specs);
3967 struct ctables_value *ctv, *next_ctv;
3968 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
3969 &t->clabels_values_map)
3971 value_destroy (&ctv->value, var_get_width (t->clabels_example));
3972 hmap_delete (&t->clabels_values_map, &ctv->node);
3975 hmap_destroy (&t->clabels_values_map);
3976 free (t->clabels_values);
3982 ctables_chisq_destroy (t->chisq);
3983 ctables_pairwise_destroy (t->pairwise);
3988 ctables_destroy (struct ctables *ct)
3993 struct ctables_postcompute *pc, *next_pc;
3994 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
3998 msg_location_destroy (pc->location);
3999 ctables_pcexpr_destroy (pc->expr);
4003 ctables_summary_spec_set_uninit (pc->specs);
4006 hmap_delete (&ct->postcomputes, &pc->hmap_node);
4009 hmap_destroy (&ct->postcomputes);
4011 fmt_settings_uninit (&ct->ctables_formats);
4012 pivot_table_look_unref (ct->look);
4016 for (size_t i = 0; i < ct->n_tables; i++)
4017 ctables_table_destroy (ct->tables[i]);
4023 all_strings (struct variable **vars, size_t n_vars,
4024 const struct ctables_category *cat)
4026 for (size_t j = 0; j < n_vars; j++)
4027 if (var_is_numeric (vars[j]))
4029 msg_at (SE, cat->location,
4030 _("This category specification may be applied only to string "
4031 "variables, but this subcommand tries to apply it to "
4032 "numeric variable %s."),
4033 var_get_name (vars[j]));
4040 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
4041 struct ctables *ct, struct ctables_table *t)
4043 if (!lex_force_match_id (lexer, "VARIABLES"))
4045 lex_match (lexer, T_EQUALS);
4047 struct variable **vars;
4049 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
4052 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
4053 for (size_t i = 1; i < n_vars; i++)
4055 const struct fmt_spec *f = var_get_print_format (vars[i]);
4056 if (f->type != common_format->type)
4058 common_format = NULL;
4064 && (fmt_get_category (common_format->type)
4065 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
4067 struct ctables_categories *c = xmalloc (sizeof *c);
4068 *c = (struct ctables_categories) { .n_refs = 1 };
4070 bool set_categories = false;
4072 size_t allocated_cats = 0;
4073 int cats_start_ofs = -1;
4074 int cats_end_ofs = -1;
4075 if (lex_match (lexer, T_LBRACK))
4077 set_categories = true;
4078 cats_start_ofs = lex_ofs (lexer);
4081 if (c->n_cats >= allocated_cats)
4082 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4084 int start_ofs = lex_ofs (lexer);
4085 struct ctables_category *cat = &c->cats[c->n_cats];
4086 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
4088 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4091 lex_match (lexer, T_COMMA);
4093 while (!lex_match (lexer, T_RBRACK));
4094 cats_end_ofs = lex_ofs (lexer) - 1;
4097 struct ctables_category cat = {
4099 .include_missing = false,
4100 .sort_ascending = true,
4102 bool show_totals = false;
4103 char *total_label = NULL;
4104 bool totals_before = false;
4105 int key_start_ofs = 0;
4106 int key_end_ofs = 0;
4107 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4109 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
4111 set_categories = true;
4112 lex_match (lexer, T_EQUALS);
4113 if (lex_match_id (lexer, "A"))
4114 cat.sort_ascending = true;
4115 else if (lex_match_id (lexer, "D"))
4116 cat.sort_ascending = false;
4119 lex_error_expecting (lexer, "A", "D");
4123 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
4125 set_categories = true;
4126 key_start_ofs = lex_ofs (lexer) - 1;
4127 lex_match (lexer, T_EQUALS);
4128 if (lex_match_id (lexer, "VALUE"))
4129 cat.type = CCT_VALUE;
4130 else if (lex_match_id (lexer, "LABEL"))
4131 cat.type = CCT_LABEL;
4134 cat.type = CCT_FUNCTION;
4135 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
4136 &cat.weighting, &cat.area))
4139 if (lex_match (lexer, T_LPAREN))
4141 cat.sort_var = parse_variable (lexer, dict);
4145 if (cat.sort_function == CTSF_PTILE)
4147 lex_match (lexer, T_COMMA);
4148 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4150 cat.percentile = lex_number (lexer);
4154 if (!lex_force_match (lexer, T_RPAREN))
4157 else if (ctables_function_availability (cat.sort_function)
4160 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
4164 key_end_ofs = lex_ofs (lexer) - 1;
4166 if (cat.type == CCT_FUNCTION)
4168 lex_ofs_error (lexer, key_start_ofs, key_end_ofs,
4169 _("Data-dependent sorting is not implemented."));
4173 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
4175 set_categories = true;
4176 lex_match (lexer, T_EQUALS);
4177 if (lex_match_id (lexer, "INCLUDE"))
4178 cat.include_missing = true;
4179 else if (lex_match_id (lexer, "EXCLUDE"))
4180 cat.include_missing = false;
4183 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4187 else if (lex_match_id (lexer, "TOTAL"))
4189 set_categories = true;
4190 lex_match (lexer, T_EQUALS);
4191 if (!parse_bool (lexer, &show_totals))
4194 else if (lex_match_id (lexer, "LABEL"))
4196 lex_match (lexer, T_EQUALS);
4197 if (!lex_force_string (lexer))
4200 total_label = ss_xstrdup (lex_tokss (lexer));
4203 else if (lex_match_id (lexer, "POSITION"))
4205 lex_match (lexer, T_EQUALS);
4206 if (lex_match_id (lexer, "BEFORE"))
4207 totals_before = true;
4208 else if (lex_match_id (lexer, "AFTER"))
4209 totals_before = false;
4212 lex_error_expecting (lexer, "BEFORE", "AFTER");
4216 else if (lex_match_id (lexer, "EMPTY"))
4218 lex_match (lexer, T_EQUALS);
4221 if (lex_match_id (lexer, "INCLUDE"))
4223 else if (lex_match_id (lexer, "EXCLUDE"))
4227 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4231 for (size_t i = 0; i < n_vars; i++)
4232 t->show_empty[var_get_dict_index (vars[i])] = show_empty;
4237 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
4238 "TOTAL", "LABEL", "POSITION", "EMPTY");
4240 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
4248 cat.location = lex_ofs_location (lexer, key_start_ofs, key_end_ofs);
4250 if (c->n_cats >= allocated_cats)
4251 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4252 c->cats[c->n_cats++] = cat;
4257 if (c->n_cats >= allocated_cats)
4258 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4260 struct ctables_category *totals;
4263 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
4264 totals = &c->cats[0];
4267 totals = &c->cats[c->n_cats];
4270 *totals = (struct ctables_category) {
4272 .total_label = total_label ? total_label : xstrdup (_("Total")),
4276 struct ctables_category *subtotal = NULL;
4277 for (size_t i = totals_before ? 0 : c->n_cats;
4278 totals_before ? i < c->n_cats : i-- > 0;
4279 totals_before ? i++ : 0)
4281 struct ctables_category *cat = &c->cats[i];
4290 cat->subtotal = subtotal;
4293 case CCT_POSTCOMPUTE:
4304 case CCT_EXCLUDED_MISSING:
4309 if (cats_start_ofs != -1)
4311 for (size_t i = 0; i < c->n_cats; i++)
4313 struct ctables_category *cat = &c->cats[i];
4316 case CCT_POSTCOMPUTE:
4317 cat->parse_format = parse_strings ? common_format->type : FMT_F;
4318 struct msg_location *cats_location
4319 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
4320 bool ok = ctables_recursive_check_postcompute (
4321 dict, cat->pc->expr, cat, c, cats_location);
4322 msg_location_destroy (cats_location);
4329 for (size_t j = 0; j < n_vars; j++)
4330 if (var_is_alpha (vars[j]))
4332 msg_at (SE, cat->location,
4333 _("This category specification may be applied "
4334 "only to numeric variables, but this "
4335 "subcommand tries to apply it to string "
4337 var_get_name (vars[j]));
4346 if (!parse_category_string (cat->location, cat->string, dict,
4347 common_format->type, &n))
4350 ss_dealloc (&cat->string);
4352 cat->type = CCT_NUMBER;
4355 else if (!all_strings (vars, n_vars, cat))
4364 if (!cat->srange[0].string)
4366 else if (!parse_category_string (cat->location,
4367 cat->srange[0], dict,
4368 common_format->type, &n[0]))
4371 if (!cat->srange[1].string)
4373 else if (!parse_category_string (cat->location,
4374 cat->srange[1], dict,
4375 common_format->type, &n[1]))
4378 ss_dealloc (&cat->srange[0]);
4379 ss_dealloc (&cat->srange[1]);
4381 cat->type = CCT_NRANGE;
4382 cat->nrange[0] = n[0];
4383 cat->nrange[1] = n[1];
4385 else if (!all_strings (vars, n_vars, cat))
4396 case CCT_EXCLUDED_MISSING:
4403 for (size_t i = 0; i < n_vars; i++)
4405 struct ctables_categories **cp
4406 = &t->categories[var_get_dict_index (vars[i])];
4407 ctables_categories_unref (*cp);
4412 ctables_categories_unref (c);
4417 ctables_categories_unref (c);
4425 const struct ctables_summary_spec_set *set;
4430 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4432 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4433 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4434 if (as->function != bs->function)
4435 return as->function > bs->function ? 1 : -1;
4436 else if (as->weighting != bs->weighting)
4437 return as->weighting > bs->weighting ? 1 : -1;
4438 else if (as->calc_area != bs->calc_area)
4439 return as->calc_area > bs->calc_area ? 1 : -1;
4440 else if (as->percentile != bs->percentile)
4441 return as->percentile < bs->percentile ? 1 : -1;
4443 const char *as_label = as->label ? as->label : "";
4444 const char *bs_label = bs->label ? bs->label : "";
4445 return strcmp (as_label, bs_label);
4449 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4450 size_t ix[PIVOT_N_AXES])
4452 if (a < PIVOT_N_AXES)
4454 size_t limit = MAX (t->stacks[a].n, 1);
4455 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4456 ctables_table_add_section (t, a + 1, ix);
4460 struct ctables_section *s = &t->sections[t->n_sections++];
4461 *s = (struct ctables_section) {
4463 .cells = HMAP_INITIALIZER (s->cells),
4465 for (a = 0; a < PIVOT_N_AXES; a++)
4468 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4470 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4471 for (size_t i = 0; i < nest->n; i++)
4472 hmap_init (&s->occurrences[a][i]);
4474 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4475 hmap_init (&s->areas[at]);
4480 ctables_format (double d, const struct fmt_spec *format,
4481 const struct fmt_settings *settings)
4483 const union value v = { .f = d };
4484 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4486 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4487 produce the results we want for negative numbers, putting the negative
4488 sign in the wrong spot, before the prefix instead of after it. We can't,
4489 in fact, produce the desired results using a custom-currency
4490 specification. Instead, we postprocess the output, moving the negative
4493 NEQUAL: "-N=3" => "N=-3"
4494 PAREN: "-(3)" => "(-3)"
4495 PCTPAREN: "-(3%)" => "(-3%)"
4497 This transformation doesn't affect NEGPAREN. */
4498 char *minus_src = strchr (s, '-');
4499 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4501 char *n_equals = strstr (s, "N=");
4502 char *lparen = strchr (s, '(');
4503 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4505 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4511 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4513 for (size_t i = 0; i < t->stacks[a].n; i++)
4515 struct ctables_nest *nest = &t->stacks[a].nests[i];
4516 if (nest->n != 1 || nest->scale_idx != 0)
4519 enum ctables_vlabel vlabel
4520 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4521 if (vlabel != CTVL_NONE)
4528 compare_ints_3way (int a, int b)
4530 return a < b ? -1 : a > b;
4534 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
4535 const void *aux UNUSED)
4537 struct ctables_cell *const *ap = a_;
4538 struct ctables_cell *const *bp = b_;
4539 const struct ctables_cell *a = *ap;
4540 const struct ctables_cell *b = *bp;
4548 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
4550 int cmp = compare_ints_3way (a->axes[axis].leaf, b->axes[axis].leaf);
4555 const struct ctables_value *a_ctv = ctables_value_find (a);
4556 const struct ctables_value *b_ctv = ctables_value_find (b);
4559 int cmp = compare_ints_3way (a_ctv->leaf, b_ctv->leaf);
4564 assert (!a_ctv && !b_ctv);
4569 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4571 struct pivot_table *pt = pivot_table_create__ (
4573 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4574 : pivot_value_new_text (N_("Custom Tables"))),
4577 pivot_table_set_caption (
4578 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4580 pivot_table_set_corner_text (
4581 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4583 bool summary_dimension = (t->summary_axis != t->slabels_axis
4584 || (!t->slabels_visible
4585 && t->summary_specs.n > 1));
4586 if (summary_dimension)
4588 struct pivot_dimension *d = pivot_dimension_create (
4589 pt, t->slabels_axis, N_("Statistics"));
4590 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4591 if (!t->slabels_visible)
4592 d->hide_all_labels = true;
4593 for (size_t i = 0; i < specs->n; i++)
4594 pivot_category_create_leaf (
4595 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4598 bool categories_dimension = t->clabels_example != NULL;
4599 if (categories_dimension)
4601 struct pivot_dimension *d = pivot_dimension_create (
4602 pt, t->label_axis[t->clabels_from_axis],
4603 t->clabels_from_axis == PIVOT_AXIS_ROW
4604 ? N_("Row Categories")
4605 : N_("Column Categories"));
4606 const struct variable *var = t->clabels_example;
4607 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4608 for (size_t i = 0; i < t->n_clabels_values; i++)
4610 const struct ctables_value *value = t->clabels_values[i];
4611 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4612 assert (cat != NULL);
4613 pivot_category_create_leaf (
4614 d->root, ctables_category_create_value_label (c, cat,
4620 pivot_table_set_look (pt, ct->look);
4621 struct pivot_dimension *d[PIVOT_N_AXES];
4622 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4624 static const char *names[] = {
4625 [PIVOT_AXIS_ROW] = N_("Rows"),
4626 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4627 [PIVOT_AXIS_LAYER] = N_("Layers"),
4629 d[a] = (t->axes[a] || a == t->summary_axis
4630 ? pivot_dimension_create (pt, a, names[a])
4635 assert (t->axes[a]);
4637 for (size_t i = 0; i < t->stacks[a].n; i++)
4639 struct ctables_nest *nest = &t->stacks[a].nests[i];
4640 struct ctables_section **sections = xnmalloc (t->n_sections,
4642 size_t n_sections = 0;
4644 size_t n_total_cells = 0;
4645 size_t max_depth = 0;
4646 for (size_t j = 0; j < t->n_sections; j++)
4647 if (t->sections[j].nests[a] == nest)
4649 struct ctables_section *s = &t->sections[j];
4650 sections[n_sections++] = s;
4651 n_total_cells += hmap_count (&s->cells);
4653 size_t depth = s->nests[a]->n;
4654 max_depth = MAX (depth, max_depth);
4657 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4659 size_t n_sorted = 0;
4661 for (size_t j = 0; j < n_sections; j++)
4663 struct ctables_section *s = sections[j];
4665 struct ctables_cell *cell;
4666 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4668 sorted[n_sorted++] = cell;
4669 assert (n_sorted <= n_total_cells);
4672 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4673 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4675 struct ctables_level
4677 enum ctables_level_type
4679 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4680 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4681 CTL_SUMMARY, /* Summary functions. */
4685 enum settings_value_show vlabel; /* CTL_VAR only. */
4688 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4689 size_t n_levels = 0;
4690 for (size_t k = 0; k < nest->n; k++)
4692 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4693 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4695 if (vlabel != CTVL_NONE)
4697 levels[n_levels++] = (struct ctables_level) {
4699 .vlabel = (enum settings_value_show) vlabel,
4704 if (nest->scale_idx != k
4705 && (k != nest->n - 1 || t->label_axis[a] == a))
4707 levels[n_levels++] = (struct ctables_level) {
4708 .type = CTL_CATEGORY,
4714 if (!summary_dimension && a == t->slabels_axis)
4716 levels[n_levels++] = (struct ctables_level) {
4717 .type = CTL_SUMMARY,
4718 .var_idx = SIZE_MAX,
4722 /* Pivot categories:
4724 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4725 - category for nest->vars[0], if nest->scale_idx != 0
4726 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4727 - category for nest->vars[1], if nest->scale_idx != 1
4729 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4730 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4731 - summary function, if 'a == t->slabels_axis && a ==
4734 Additional dimensions:
4736 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4738 - If 't->label_axis[b] == a' for some 'b != a', add a category
4743 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4745 for (size_t j = 0; j < n_sorted; j++)
4747 struct ctables_cell *cell = sorted[j];
4748 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4750 size_t n_common = 0;
4753 for (; n_common < n_levels; n_common++)
4755 const struct ctables_level *level = &levels[n_common];
4756 if (level->type == CTL_CATEGORY)
4758 size_t var_idx = level->var_idx;
4759 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4760 if (prev->axes[a].cvs[var_idx].category != c)
4762 else if (c->type != CCT_SUBTOTAL
4763 && c->type != CCT_TOTAL
4764 && c->type != CCT_POSTCOMPUTE
4765 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4766 &cell->axes[a].cvs[var_idx].value,
4767 var_get_type (nest->vars[var_idx])))
4773 for (size_t k = n_common; k < n_levels; k++)
4775 const struct ctables_level *level = &levels[k];
4776 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4777 if (level->type == CTL_SUMMARY)
4779 assert (k == n_levels - 1);
4781 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4782 for (size_t m = 0; m < specs->n; m++)
4784 int leaf = pivot_category_create_leaf (
4785 parent, ctables_summary_label (&specs->specs[m],
4793 const struct variable *var = nest->vars[level->var_idx];
4794 struct pivot_value *label;
4795 if (level->type == CTL_VAR)
4797 label = pivot_value_new_variable (var);
4798 label->variable.show = level->vlabel;
4800 else if (level->type == CTL_CATEGORY)
4802 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4803 label = ctables_category_create_value_label (
4804 t->categories[var_get_dict_index (var)],
4805 cv->category, var, &cv->value);
4810 if (k == n_levels - 1)
4811 prev_leaf = pivot_category_create_leaf (parent, label);
4813 groups[k] = pivot_category_create_group__ (parent, label);
4817 cell->axes[a].leaf = prev_leaf;
4826 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4830 size_t n_total_cells = 0;
4831 for (size_t j = 0; j < t->n_sections; j++)
4832 n_total_cells += hmap_count (&t->sections[j].cells);
4834 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4835 size_t n_sorted = 0;
4836 for (size_t j = 0; j < t->n_sections; j++)
4838 const struct ctables_section *s = &t->sections[j];
4839 struct ctables_cell *cell;
4840 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4842 sorted[n_sorted++] = cell;
4844 assert (n_sorted <= n_total_cells);
4845 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4847 size_t ids[N_CTATS];
4848 memset (ids, 0, sizeof ids);
4849 for (size_t j = 0; j < n_sorted; j++)
4851 struct ctables_cell *cell = sorted[j];
4852 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4854 struct ctables_area *area = cell->areas[at];
4855 if (!area->sequence)
4856 area->sequence = ++ids[at];
4863 for (size_t i = 0; i < t->n_sections; i++)
4865 struct ctables_section *s = &t->sections[i];
4867 struct ctables_cell *cell;
4868 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4873 const struct ctables_value *ctv = ctables_value_find (cell);
4874 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4875 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4876 for (size_t j = 0; j < specs->n; j++)
4879 size_t n_dindexes = 0;
4881 if (summary_dimension)
4882 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4885 dindexes[n_dindexes++] = ctv->leaf;
4887 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4890 int leaf = cell->axes[a].leaf;
4891 if (a == t->summary_axis && !summary_dimension)
4892 leaf += specs->specs[j].axis_idx;
4893 dindexes[n_dindexes++] = leaf;
4896 const struct ctables_summary_spec *ss = &specs->specs[j];
4898 struct fmt_spec format = specs->specs[j].format;
4899 bool is_ctables_format = ss->is_ctables_format;
4900 double d = (cell->postcompute
4901 ? ctables_cell_calculate_postcompute (
4902 s, cell, ss, &format, &is_ctables_format, j)
4903 : ctables_summary_value (cell->areas,
4904 &cell->summaries[j], ss));
4906 struct pivot_value *value;
4907 if (ct->hide_threshold != 0
4908 && d < ct->hide_threshold
4909 && ss->function == CTSF_COUNT)
4911 value = pivot_value_new_user_text_nocopy (
4912 xasprintf ("<%d", ct->hide_threshold));
4914 else if (d == 0 && ct->zero)
4915 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4916 else if (d == SYSMIS && ct->missing)
4917 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4918 else if (is_ctables_format)
4919 value = pivot_value_new_user_text_nocopy (
4920 ctables_format (d, &format, &ct->ctables_formats));
4923 value = pivot_value_new_number (d);
4924 value->numeric.format = format;
4926 /* XXX should text values be right-justified? */
4927 pivot_table_put (pt, dindexes, n_dindexes, value);
4932 pivot_table_submit (pt);
4936 ctables_check_label_position (struct ctables_table *t, struct lexer *lexer,
4937 enum pivot_axis_type a)
4939 enum pivot_axis_type label_pos = t->label_axis[a];
4943 const struct ctables_stack *stack = &t->stacks[a];
4947 const struct ctables_nest *n0 = &stack->nests[0];
4950 assert (stack->n == 1);
4954 const struct variable *v0 = n0->vars[n0->n - 1];
4955 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4956 t->clabels_example = v0;
4958 for (size_t i = 0; i < c0->n_cats; i++)
4959 if (c0->cats[i].type == CCT_FUNCTION)
4961 msg (SE, _("Category labels may not be moved to another axis when "
4962 "sorting by a summary function."));
4963 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4964 _("This syntax moves category labels to another axis."));
4965 msg_at (SN, c0->cats[i].location,
4966 _("This syntax requests sorting by a summary function."));
4970 for (size_t i = 0; i < stack->n; i++)
4972 const struct ctables_nest *ni = &stack->nests[i];
4974 const struct variable *vi = ni->vars[ni->n - 1];
4975 if (n0->n - 1 == ni->scale_idx)
4977 msg (SE, _("To move category labels from one axis to another, "
4978 "the variables whose labels are to be moved must be "
4979 "categorical, but %s is scale."), var_get_name (vi));
4980 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4981 _("This syntax moves category labels to another axis."));
4986 for (size_t i = 1; i < stack->n; i++)
4988 const struct ctables_nest *ni = &stack->nests[i];
4990 const struct variable *vi = ni->vars[ni->n - 1];
4991 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4993 if (var_get_width (v0) != var_get_width (vi))
4995 msg (SE, _("To move category labels from one axis to another, "
4996 "the variables whose labels are to be moved must all "
4997 "have the same width, but %s has width %d and %s has "
4999 var_get_name (v0), var_get_width (v0),
5000 var_get_name (vi), var_get_width (vi));
5001 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
5002 _("This syntax moves category labels to another axis."));
5005 if (!val_labs_equal (var_get_value_labels (v0),
5006 var_get_value_labels (vi)))
5008 msg (SE, _("To move category labels from one axis to another, "
5009 "the variables whose labels are to be moved must all "
5010 "have the same value labels, but %s and %s have "
5011 "different value labels."),
5012 var_get_name (v0), var_get_name (vi));
5013 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
5014 _("This syntax moves category labels to another axis."));
5017 if (!ctables_categories_equal (c0, ci))
5019 msg (SE, _("To move category labels from one axis to another, "
5020 "the variables whose labels are to be moved must all "
5021 "have the same category specifications, but %s and %s "
5022 "have different category specifications."),
5023 var_get_name (v0), var_get_name (vi));
5024 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
5025 _("This syntax moves category labels to another axis."));
5034 add_sum_var (struct variable *var,
5035 struct variable ***sum_vars, size_t *n, size_t *allocated)
5037 for (size_t i = 0; i < *n; i++)
5038 if (var == (*sum_vars)[i])
5041 if (*n >= *allocated)
5042 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
5043 (*sum_vars)[*n] = var;
5047 static enum ctables_area_type
5048 rotate_area (enum ctables_area_type area)
5059 return CTAT_LAYERCOL;
5062 return CTAT_LAYERROW;
5075 enumerate_sum_vars (const struct ctables_axis *a,
5076 struct variable ***sum_vars, size_t *n, size_t *allocated)
5084 for (size_t i = 0; i < N_CSVS; i++)
5085 for (size_t j = 0; j < a->specs[i].n; j++)
5087 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
5088 if (spec->function == CTSF_areaPCT_SUM)
5089 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
5095 for (size_t i = 0; i < 2; i++)
5096 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
5102 ctables_prepare_table (struct ctables_table *t, struct lexer *lexer)
5104 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5107 t->stacks[a] = enumerate_fts (a, t->axes[a]);
5109 for (size_t j = 0; j < t->stacks[a].n; j++)
5111 struct ctables_nest *nest = &t->stacks[a].nests[j];
5112 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5114 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
5115 nest->n_areas[at] = 0;
5117 enum pivot_axis_type ata, atb;
5118 if (at == CTAT_ROW || at == CTAT_LAYERROW)
5120 ata = PIVOT_AXIS_ROW;
5121 atb = PIVOT_AXIS_COLUMN;
5123 else /* at == CTAT_COL || at == CTAT_LAYERCOL */
5125 ata = PIVOT_AXIS_COLUMN;
5126 atb = PIVOT_AXIS_ROW;
5129 if (at == CTAT_LAYER
5130 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5131 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5132 ? a == atb && t->label_axis[a] != a
5135 for (size_t k = nest->n - 1; k < nest->n; k--)
5136 if (k != nest->scale_idx)
5138 nest->areas[at][nest->n_areas[at]++] = k;
5144 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5145 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5146 : at == CTAT_TABLE ? true
5150 for (size_t k = 0; k < nest->n; k++)
5151 if (k != nest->scale_idx)
5152 nest->areas[at][nest->n_areas[at]++] = k;
5158 #define L PIVOT_AXIS_LAYER
5159 n_drop = (t->clabels_from_axis == L ? a != L
5160 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5161 : t->clabels_from_axis == a ? 2
5168 n_drop = a == ata && t->label_axis[ata] == atb;
5173 n_drop = (a == ata ? t->label_axis[ata] == atb
5175 : t->clabels_from_axis == atb ? -1
5176 : t->clabels_to_axis != atb ? 1
5188 size_t n = nest->n_areas[at];
5191 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5192 nest->n_areas[at]--;
5197 for (int i = 0; i < n_drop; i++)
5198 if (nest->n_areas[at] > 0)
5199 nest->n_areas[at]--;
5206 struct ctables_nest *nest = xmalloc (sizeof *nest);
5207 *nest = (struct ctables_nest) {
5209 .scale_idx = SIZE_MAX,
5210 .summary_idx = SIZE_MAX
5212 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5214 /* There's no point in moving labels away from an axis that has no
5215 labels, so avoid dealing with the special cases around that. */
5216 t->label_axis[a] = a;
5219 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5220 for (size_t i = 0; i < stack->n; i++)
5222 struct ctables_nest *nest = &stack->nests[i];
5223 if (!nest->specs[CSV_CELL].n)
5225 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5226 ss->specs = xmalloc (sizeof *ss->specs);
5229 enum ctables_summary_function function
5230 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5234 nest->summary_idx = nest->n - 1;
5235 ss->var = nest->vars[nest->summary_idx];
5237 *ss->specs = (struct ctables_summary_spec) {
5238 .function = function,
5239 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5240 .format = ctables_summary_default_format (function, ss->var),
5243 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5244 &nest->specs[CSV_CELL]);
5246 else if (!nest->specs[CSV_TOTAL].n)
5247 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5248 &nest->specs[CSV_CELL]);
5250 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5251 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5253 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5254 for (size_t i = 0; i < nest->specs[sv].n; i++)
5256 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5257 const struct ctables_function_info *cfi =
5258 &ctables_function_info[ss->function];
5260 ss->calc_area = rotate_area (ss->calc_area);
5264 if (t->ctables->smissing_listwise)
5266 struct variable **listwise_vars = NULL;
5268 size_t allocated = 0;
5270 for (size_t j = nest->group_head; j < stack->n; j++)
5272 const struct ctables_nest *other_nest = &stack->nests[j];
5273 if (other_nest->group_head != nest->group_head)
5276 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5279 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5280 sizeof *listwise_vars);
5281 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5284 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5287 listwise_vars = xmemdup (listwise_vars,
5288 n * sizeof *listwise_vars);
5289 nest->specs[sv].listwise_vars = listwise_vars;
5290 nest->specs[sv].n_listwise_vars = n;
5295 struct ctables_summary_spec_set *merged = &t->summary_specs;
5296 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5298 for (size_t j = 0; j < stack->n; j++)
5300 const struct ctables_nest *nest = &stack->nests[j];
5302 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5303 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5308 struct merge_item min = items[0];
5309 for (size_t j = 1; j < n_left; j++)
5310 if (merge_item_compare_3way (&items[j], &min) < 0)
5313 if (merged->n >= merged->allocated)
5314 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5315 sizeof *merged->specs);
5316 merged->specs[merged->n++] = min.set->specs[min.ofs];
5318 for (size_t j = 0; j < n_left; )
5320 if (merge_item_compare_3way (&items[j], &min) == 0)
5322 struct merge_item *item = &items[j];
5323 item->set->specs[item->ofs++].axis_idx = merged->n - 1;
5324 if (item->ofs >= item->set->n)
5326 items[j] = items[--n_left];
5335 size_t allocated_sum_vars = 0;
5336 enumerate_sum_vars (t->axes[t->summary_axis],
5337 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5339 return (ctables_check_label_position (t, lexer, PIVOT_AXIS_ROW)
5340 && ctables_check_label_position (t, lexer, PIVOT_AXIS_COLUMN));
5344 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5345 enum pivot_axis_type a)
5347 struct ctables_stack *stack = &t->stacks[a];
5348 for (size_t i = 0; i < stack->n; i++)
5350 const struct ctables_nest *nest = &stack->nests[i];
5351 const struct variable *var = nest->vars[nest->n - 1];
5352 const union value *value = case_data (c, var);
5354 if (var_is_numeric (var) && value->f == SYSMIS)
5357 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5359 ctables_value_insert (t, value, var_get_width (var));
5364 ctables_add_category_occurrences (const struct variable *var,
5365 struct hmap *occurrences,
5366 const struct ctables_categories *cats)
5368 const struct val_labs *val_labs = var_get_value_labels (var);
5370 for (size_t i = 0; i < cats->n_cats; i++)
5372 const struct ctables_category *c = &cats->cats[i];
5376 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5382 int width = var_get_width (var);
5384 value_init (&value, width);
5385 value_copy_buf_rpad (&value, width,
5386 CHAR_CAST (uint8_t *, c->string.string),
5387 c->string.length, ' ');
5388 ctables_add_occurrence (var, &value, occurrences);
5389 value_destroy (&value, width);
5394 assert (var_is_numeric (var));
5395 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5396 vl = val_labs_next (val_labs, vl))
5397 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5398 ctables_add_occurrence (var, &vl->value, occurrences);
5402 assert (var_is_alpha (var));
5403 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5404 vl = val_labs_next (val_labs, vl))
5405 if (in_string_range (&vl->value, var, c->srange))
5406 ctables_add_occurrence (var, &vl->value, occurrences);
5410 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5411 vl = val_labs_next (val_labs, vl))
5412 if (var_is_value_missing (var, &vl->value))
5413 ctables_add_occurrence (var, &vl->value, occurrences);
5417 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5418 vl = val_labs_next (val_labs, vl))
5419 ctables_add_occurrence (var, &vl->value, occurrences);
5422 case CCT_POSTCOMPUTE:
5432 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5433 vl = val_labs_next (val_labs, vl))
5434 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5435 ctables_add_occurrence (var, &vl->value, occurrences);
5438 case CCT_EXCLUDED_MISSING:
5445 ctables_section_recurse_add_empty_categories (
5446 struct ctables_section *s,
5447 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5448 enum pivot_axis_type a, size_t a_idx)
5450 if (a >= PIVOT_N_AXES)
5451 ctables_cell_insert__ (s, c, cats);
5452 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5453 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5456 const struct variable *var = s->nests[a]->vars[a_idx];
5457 const struct ctables_categories *categories = s->table->categories[
5458 var_get_dict_index (var)];
5459 int width = var_get_width (var);
5460 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5461 const struct ctables_occurrence *o;
5462 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5464 union value *value = case_data_rw (c, var);
5465 value_destroy (value, width);
5466 value_clone (value, &o->value, width);
5467 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5468 assert (cats[a][a_idx] != NULL);
5469 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5472 for (size_t i = 0; i < categories->n_cats; i++)
5474 const struct ctables_category *cat = &categories->cats[i];
5475 if (cat->type == CCT_POSTCOMPUTE)
5477 cats[a][a_idx] = cat;
5478 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5485 ctables_section_add_empty_categories (struct ctables_section *s)
5487 bool show_empty = false;
5488 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5490 for (size_t k = 0; k < s->nests[a]->n; k++)
5491 if (k != s->nests[a]->scale_idx)
5493 const struct variable *var = s->nests[a]->vars[k];
5494 size_t idx = var_get_dict_index (var);
5495 const struct ctables_categories *cats = s->table->categories[idx];
5496 if (s->table->show_empty[idx])
5499 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5505 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5506 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5507 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5508 const struct ctables_category **cats[PIVOT_N_AXES] =
5510 [PIVOT_AXIS_LAYER] = layer_cats,
5511 [PIVOT_AXIS_ROW] = row_cats,
5512 [PIVOT_AXIS_COLUMN] = column_cats,
5514 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5515 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5520 ctables_section_clear (struct ctables_section *s)
5522 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5524 const struct ctables_nest *nest = s->nests[a];
5525 for (size_t i = 0; i < nest->n; i++)
5526 if (i != nest->scale_idx)
5528 const struct variable *var = nest->vars[i];
5529 int width = var_get_width (var);
5530 struct ctables_occurrence *o, *next;
5531 struct hmap *map = &s->occurrences[a][i];
5532 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5534 value_destroy (&o->value, width);
5535 hmap_delete (map, &o->node);
5542 struct ctables_cell *cell, *next_cell;
5543 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5545 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5547 const struct ctables_nest *nest = s->nests[a];
5548 for (size_t i = 0; i < nest->n; i++)
5549 if (i != nest->scale_idx)
5550 value_destroy (&cell->axes[a].cvs[i].value,
5551 var_get_width (nest->vars[i]));
5552 free (cell->axes[a].cvs);
5555 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5556 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5557 for (size_t i = 0; i < specs->n; i++)
5558 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5559 free (cell->summaries);
5561 hmap_delete (&s->cells, &cell->node);
5564 hmap_shrink (&s->cells);
5566 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5568 struct ctables_area *area, *next_area;
5569 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5573 hmap_delete (&s->areas[at], &area->node);
5576 hmap_shrink (&s->areas[at]);
5581 ctables_section_uninit (struct ctables_section *s)
5583 ctables_section_clear (s);
5585 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5587 struct ctables_nest *nest = s->nests[a];
5588 for (size_t i = 0; i < nest->n; i++)
5589 hmap_destroy (&s->occurrences[a][i]);
5590 free (s->occurrences[a]);
5593 hmap_destroy (&s->cells);
5594 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5595 hmap_destroy (&s->areas[at]);
5599 ctables_table_clear (struct ctables_table *t)
5601 for (size_t i = 0; i < t->n_sections; i++)
5602 ctables_section_clear (&t->sections[i]);
5604 if (t->clabels_example)
5606 int width = var_get_width (t->clabels_example);
5607 struct ctables_value *value, *next_value;
5608 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5609 &t->clabels_values_map)
5611 value_destroy (&value->value, width);
5612 hmap_delete (&t->clabels_values_map, &value->node);
5615 hmap_shrink (&t->clabels_values_map);
5617 free (t->clabels_values);
5618 t->clabels_values = NULL;
5619 t->n_clabels_values = 0;
5624 ctables_execute (struct dataset *ds, struct casereader *input,
5627 for (size_t i = 0; i < ct->n_tables; i++)
5629 struct ctables_table *t = ct->tables[i];
5630 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5631 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5632 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5633 sizeof *t->sections);
5634 size_t ix[PIVOT_N_AXES];
5635 ctables_table_add_section (t, 0, ix);
5638 struct dictionary *dict = dataset_dict (ds);
5640 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5641 struct casegrouper *grouper
5643 ? casegrouper_create_splits (input, dict)
5644 : casegrouper_create_vars (input, NULL, 0));
5645 struct casereader *group;
5646 while (casegrouper_get_next_group (grouper, &group))
5649 output_split_file_values_peek (ds, group);
5651 bool warn_on_invalid = true;
5652 for (struct ccase *c = casereader_read (group); c;
5653 case_unref (c), c = casereader_read (group))
5655 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5656 double e_weight = (ct->e_weight
5657 ? var_force_valid_weight (ct->e_weight,
5658 case_num (c, ct->e_weight),
5662 [CTW_DICTIONARY] = d_weight,
5663 [CTW_EFFECTIVE] = e_weight,
5664 [CTW_UNWEIGHTED] = 1.0,
5667 for (size_t i = 0; i < ct->n_tables; i++)
5669 struct ctables_table *t = ct->tables[i];
5671 for (size_t j = 0; j < t->n_sections; j++)
5672 ctables_cell_insert (&t->sections[j], c, weight);
5674 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5675 if (t->label_axis[a] != a)
5676 ctables_insert_clabels_values (t, c, a);
5679 casereader_destroy (group);
5681 for (size_t i = 0; i < ct->n_tables; i++)
5683 struct ctables_table *t = ct->tables[i];
5685 if (t->clabels_example)
5686 ctables_sort_clabels_values (t);
5688 for (size_t j = 0; j < t->n_sections; j++)
5689 ctables_section_add_empty_categories (&t->sections[j]);
5691 ctables_table_output (ct, t);
5692 ctables_table_clear (t);
5695 return casegrouper_destroy (grouper);
5698 static struct ctables_postcompute *
5699 ctables_find_postcompute (struct ctables *ct, const char *name)
5701 struct ctables_postcompute *pc;
5702 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5703 utf8_hash_case_string (name, 0), &ct->postcomputes)
5704 if (!utf8_strcasecmp (pc->name, name))
5710 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5713 int pcompute_start = lex_ofs (lexer) - 1;
5715 if (!lex_match (lexer, T_AND))
5717 lex_error_expecting (lexer, "&");
5720 if (!lex_force_id (lexer))
5723 char *name = ss_xstrdup (lex_tokss (lexer));
5726 if (!lex_force_match_phrase (lexer, "=EXPR("))
5732 int expr_start = lex_ofs (lexer);
5733 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5734 int expr_end = lex_ofs (lexer) - 1;
5735 if (!expr || !lex_force_match (lexer, T_RPAREN))
5737 ctables_pcexpr_destroy (expr);
5741 int pcompute_end = lex_ofs (lexer) - 1;
5743 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5746 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5749 msg_at (SW, location, _("New definition of &%s will override the "
5750 "previous definition."),
5752 msg_at (SN, pc->location, _("This is the previous definition."));
5754 ctables_pcexpr_destroy (pc->expr);
5755 msg_location_destroy (pc->location);
5760 pc = xmalloc (sizeof *pc);
5761 *pc = (struct ctables_postcompute) { .name = name };
5762 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5763 utf8_hash_case_string (pc->name, 0));
5766 pc->location = location;
5768 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5773 ctables_parse_pproperties_format (struct lexer *lexer,
5774 struct ctables_summary_spec_set *sss)
5776 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5778 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5779 && !(lex_token (lexer) == T_ID
5780 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5781 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5782 lex_tokss (lexer)))))
5784 /* Parse function. */
5785 enum ctables_summary_function function;
5786 enum ctables_weighting weighting;
5787 enum ctables_area_type area;
5788 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5791 /* Parse percentile. */
5792 double percentile = 0;
5793 if (function == CTSF_PTILE)
5795 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5797 percentile = lex_number (lexer);
5802 struct fmt_spec format;
5803 bool is_ctables_format;
5804 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5807 if (sss->n >= sss->allocated)
5808 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5809 sizeof *sss->specs);
5810 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5811 .function = function,
5812 .weighting = weighting,
5815 .percentile = percentile,
5817 .is_ctables_format = is_ctables_format,
5823 ctables_summary_spec_set_uninit (sss);
5828 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5830 struct ctables_postcompute **pcs = NULL;
5832 size_t allocated_pcs = 0;
5834 while (lex_match (lexer, T_AND))
5836 if (!lex_force_id (lexer))
5838 struct ctables_postcompute *pc
5839 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5842 lex_error (lexer, _("Unknown computed category &%s."),
5843 lex_tokcstr (lexer));
5848 if (n_pcs >= allocated_pcs)
5849 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5853 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5855 if (lex_match_id (lexer, "LABEL"))
5857 lex_match (lexer, T_EQUALS);
5858 if (!lex_force_string (lexer))
5861 for (size_t i = 0; i < n_pcs; i++)
5863 free (pcs[i]->label);
5864 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5869 else if (lex_match_id (lexer, "FORMAT"))
5871 lex_match (lexer, T_EQUALS);
5873 struct ctables_summary_spec_set sss;
5874 if (!ctables_parse_pproperties_format (lexer, &sss))
5877 for (size_t i = 0; i < n_pcs; i++)
5880 ctables_summary_spec_set_uninit (pcs[i]->specs);
5882 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5883 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5885 ctables_summary_spec_set_uninit (&sss);
5887 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5889 lex_match (lexer, T_EQUALS);
5890 bool hide_source_cats;
5891 if (!parse_bool (lexer, &hide_source_cats))
5893 for (size_t i = 0; i < n_pcs; i++)
5894 pcs[i]->hide_source_cats = hide_source_cats;
5898 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5911 put_strftime (struct string *out, time_t now, const char *format)
5913 const struct tm *tm = localtime (&now);
5915 strftime (value, sizeof value, format, tm);
5916 ds_put_cstr (out, value);
5920 skip_prefix (struct substring *s, struct substring prefix)
5922 if (ss_starts_with (*s, prefix))
5924 ss_advance (s, prefix.length);
5932 put_table_expression (struct string *out, struct lexer *lexer,
5933 struct dictionary *dict, int expr_start, int expr_end)
5936 for (int ofs = expr_start; ofs < expr_end; ofs++)
5938 const struct token *t = lex_ofs_token (lexer, ofs);
5939 if (t->type == T_LBRACK)
5941 else if (t->type == T_RBRACK && nest > 0)
5947 else if (t->type == T_ID)
5949 const struct variable *var
5950 = dict_lookup_var (dict, t->string.string);
5951 const char *label = var ? var_get_label (var) : NULL;
5952 ds_put_cstr (out, label ? label : t->string.string);
5956 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5957 ds_put_byte (out, ' ');
5959 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5960 ds_put_cstr (out, repr);
5963 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5964 ds_put_byte (out, ' ');
5970 put_title_text (struct string *out, struct substring in, time_t now,
5971 struct lexer *lexer, struct dictionary *dict,
5972 int expr_start, int expr_end)
5976 size_t chunk = ss_find_byte (in, ')');
5977 ds_put_substring (out, ss_head (in, chunk));
5978 ss_advance (&in, chunk);
5979 if (ss_is_empty (in))
5982 if (skip_prefix (&in, ss_cstr (")DATE")))
5983 put_strftime (out, now, "%x");
5984 else if (skip_prefix (&in, ss_cstr (")TIME")))
5985 put_strftime (out, now, "%X");
5986 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5987 put_table_expression (out, lexer, dict, expr_start, expr_end);
5990 ds_put_byte (out, ')');
5991 ss_advance (&in, 1);
5997 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5999 struct casereader *input = NULL;
6001 struct measure_guesser *mg = measure_guesser_create (ds);
6004 input = proc_open (ds);
6005 measure_guesser_run (mg, input);
6006 measure_guesser_destroy (mg);
6009 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6010 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6011 enum settings_value_show tvars = settings_get_show_variables ();
6012 for (size_t i = 0; i < n_vars; i++)
6013 vlabels[i] = (enum ctables_vlabel) tvars;
6015 struct pivot_table_look *look = pivot_table_look_unshare (
6016 pivot_table_look_ref (pivot_table_look_get_default ()));
6017 look->omit_empty = false;
6019 struct ctables *ct = xmalloc (sizeof *ct);
6020 *ct = (struct ctables) {
6021 .dict = dataset_dict (ds),
6023 .ctables_formats = FMT_SETTINGS_INIT,
6025 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6028 time_t now = time (NULL);
6033 const char *dot_string;
6034 const char *comma_string;
6036 static const struct ctf ctfs[4] = {
6037 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6038 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6039 { CTEF_PAREN, "-,(,),", "-.(.)." },
6040 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6042 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6043 for (size_t i = 0; i < 4; i++)
6045 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6046 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6047 fmt_number_style_from_string (s));
6050 if (!lex_force_match (lexer, T_SLASH))
6053 while (!lex_match_id (lexer, "TABLE"))
6055 if (lex_match_id (lexer, "FORMAT"))
6057 double widths[2] = { SYSMIS, SYSMIS };
6058 double units_per_inch = 72.0;
6060 int start_ofs = lex_ofs (lexer);
6061 while (lex_token (lexer) != T_SLASH)
6063 if (lex_match_id (lexer, "MINCOLWIDTH"))
6065 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6068 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6070 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6073 else if (lex_match_id (lexer, "UNITS"))
6075 lex_match (lexer, T_EQUALS);
6076 if (lex_match_id (lexer, "POINTS"))
6077 units_per_inch = 72.0;
6078 else if (lex_match_id (lexer, "INCHES"))
6079 units_per_inch = 1.0;
6080 else if (lex_match_id (lexer, "CM"))
6081 units_per_inch = 2.54;
6084 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6088 else if (lex_match_id (lexer, "EMPTY"))
6093 lex_match (lexer, T_EQUALS);
6094 if (lex_match_id (lexer, "ZERO"))
6096 /* Nothing to do. */
6098 else if (lex_match_id (lexer, "BLANK"))
6099 ct->zero = xstrdup ("");
6100 else if (lex_force_string (lexer))
6102 ct->zero = ss_xstrdup (lex_tokss (lexer));
6108 else if (lex_match_id (lexer, "MISSING"))
6110 lex_match (lexer, T_EQUALS);
6111 if (!lex_force_string (lexer))
6115 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6116 ? ss_xstrdup (lex_tokss (lexer))
6122 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6123 "UNITS", "EMPTY", "MISSING");
6128 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6129 && widths[0] > widths[1])
6131 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6132 _("MINCOLWIDTH must not be greater than "
6137 for (size_t i = 0; i < 2; i++)
6138 if (widths[i] != SYSMIS)
6140 int *wr = ct->look->width_ranges[TABLE_HORZ];
6141 wr[i] = widths[i] / units_per_inch * 96.0;
6146 else if (lex_match_id (lexer, "VLABELS"))
6148 if (!lex_force_match_id (lexer, "VARIABLES"))
6150 lex_match (lexer, T_EQUALS);
6152 struct variable **vars;
6154 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6158 if (!lex_force_match_id (lexer, "DISPLAY"))
6163 lex_match (lexer, T_EQUALS);
6165 enum ctables_vlabel vlabel;
6166 if (lex_match_id (lexer, "DEFAULT"))
6167 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6168 else if (lex_match_id (lexer, "NAME"))
6170 else if (lex_match_id (lexer, "LABEL"))
6171 vlabel = CTVL_LABEL;
6172 else if (lex_match_id (lexer, "BOTH"))
6174 else if (lex_match_id (lexer, "NONE"))
6178 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6184 for (size_t i = 0; i < n_vars; i++)
6185 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6188 else if (lex_match_id (lexer, "MRSETS"))
6190 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6192 lex_match (lexer, T_EQUALS);
6193 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6196 else if (lex_match_id (lexer, "SMISSING"))
6198 if (lex_match_id (lexer, "VARIABLE"))
6199 ct->smissing_listwise = false;
6200 else if (lex_match_id (lexer, "LISTWISE"))
6201 ct->smissing_listwise = true;
6204 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6208 else if (lex_match_id (lexer, "PCOMPUTE"))
6210 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6213 else if (lex_match_id (lexer, "PPROPERTIES"))
6215 if (!ctables_parse_pproperties (lexer, ct))
6218 else if (lex_match_id (lexer, "WEIGHT"))
6220 if (!lex_force_match_id (lexer, "VARIABLE"))
6222 lex_match (lexer, T_EQUALS);
6223 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6227 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6229 if (lex_match_id (lexer, "COUNT"))
6231 lex_match (lexer, T_EQUALS);
6232 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6235 ct->hide_threshold = lex_integer (lexer);
6238 else if (ct->hide_threshold == 0)
6239 ct->hide_threshold = 5;
6243 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6244 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6245 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6246 if (lex_match_id (lexer, "SLABELS")
6247 || lex_match_id (lexer, "CLABELS")
6248 || lex_match_id (lexer, "CRITERIA")
6249 || lex_match_id (lexer, "CATEGORIES")
6250 || lex_match_id (lexer, "TITLES")
6251 || lex_match_id (lexer, "SIGTEST")
6252 || lex_match_id (lexer, "COMPARETEST"))
6253 lex_next_msg (lexer, SN, -1, -1,
6254 _("TABLE must appear before this subcommand."));
6258 if (!lex_force_match (lexer, T_SLASH))
6262 size_t allocated_tables = 0;
6265 if (ct->n_tables >= allocated_tables)
6266 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6267 sizeof *ct->tables);
6269 struct ctables_category *cat = xmalloc (sizeof *cat);
6270 *cat = (struct ctables_category) {
6272 .include_missing = false,
6273 .sort_ascending = true,
6276 struct ctables_categories *c = xmalloc (sizeof *c);
6277 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6278 *c = (struct ctables_categories) {
6284 struct ctables_categories **categories = xnmalloc (n_vars,
6285 sizeof *categories);
6286 for (size_t i = 0; i < n_vars; i++)
6289 bool *show_empty = xmalloc (n_vars);
6290 memset (show_empty, true, n_vars);
6292 struct ctables_table *t = xmalloc (sizeof *t);
6293 *t = (struct ctables_table) {
6295 .slabels_axis = PIVOT_AXIS_COLUMN,
6296 .slabels_visible = true,
6297 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6299 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6300 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6301 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6303 .clabels_from_axis = PIVOT_AXIS_LAYER,
6304 .clabels_to_axis = PIVOT_AXIS_LAYER,
6305 .categories = categories,
6306 .n_categories = n_vars,
6307 .show_empty = show_empty,
6310 ct->tables[ct->n_tables++] = t;
6312 lex_match (lexer, T_EQUALS);
6313 int expr_start = lex_ofs (lexer);
6314 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6315 &t->axes[PIVOT_AXIS_ROW]))
6317 if (lex_match (lexer, T_BY))
6319 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6320 &t->axes[PIVOT_AXIS_COLUMN]))
6323 if (lex_match (lexer, T_BY))
6325 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6326 &t->axes[PIVOT_AXIS_LAYER]))
6330 int expr_end = lex_ofs (lexer);
6332 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6333 && !t->axes[PIVOT_AXIS_LAYER])
6335 lex_error (lexer, _("At least one variable must be specified."));
6339 const struct ctables_axis *scales[PIVOT_N_AXES];
6340 size_t n_scales = 0;
6341 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6343 scales[a] = find_scale (t->axes[a]);
6349 msg (SE, _("Scale variables may appear only on one axis."));
6350 if (scales[PIVOT_AXIS_ROW])
6351 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6352 _("This scale variable appears on the rows axis."));
6353 if (scales[PIVOT_AXIS_COLUMN])
6354 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6355 _("This scale variable appears on the columns axis."));
6356 if (scales[PIVOT_AXIS_LAYER])
6357 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6358 _("This scale variable appears on the layer axis."));
6362 const struct ctables_axis *summaries[PIVOT_N_AXES];
6363 size_t n_summaries = 0;
6364 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6366 summaries[a] = (scales[a]
6368 : find_categorical_summary_spec (t->axes[a]));
6372 if (n_summaries > 1)
6374 msg (SE, _("Summaries may appear only on one axis."));
6375 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6378 msg_at (SN, summaries[a]->loc,
6380 ? _("This variable on the rows axis has a summary.")
6381 : a == PIVOT_AXIS_COLUMN
6382 ? _("This variable on the columns axis has a summary.")
6383 : _("This variable on the layers axis has a summary."));
6385 msg_at (SN, summaries[a]->loc,
6386 _("This is a scale variable, so it always has a "
6387 "summary even if the syntax does not explicitly "
6392 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6393 if (n_summaries ? summaries[a] : t->axes[a])
6395 t->summary_axis = a;
6399 if (lex_token (lexer) == T_ENDCMD)
6401 if (!ctables_prepare_table (t, lexer))
6405 if (!lex_force_match (lexer, T_SLASH))
6408 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6410 if (lex_match_id (lexer, "SLABELS"))
6412 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6414 if (lex_match_id (lexer, "POSITION"))
6416 lex_match (lexer, T_EQUALS);
6417 if (lex_match_id (lexer, "COLUMN"))
6418 t->slabels_axis = PIVOT_AXIS_COLUMN;
6419 else if (lex_match_id (lexer, "ROW"))
6420 t->slabels_axis = PIVOT_AXIS_ROW;
6421 else if (lex_match_id (lexer, "LAYER"))
6422 t->slabels_axis = PIVOT_AXIS_LAYER;
6425 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6429 else if (lex_match_id (lexer, "VISIBLE"))
6431 lex_match (lexer, T_EQUALS);
6432 if (!parse_bool (lexer, &t->slabels_visible))
6437 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6442 else if (lex_match_id (lexer, "CLABELS"))
6444 int start_ofs = lex_ofs (lexer) - 1;
6445 if (lex_match_id (lexer, "AUTO"))
6447 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6448 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6450 else if (lex_match_id (lexer, "ROWLABELS"))
6452 lex_match (lexer, T_EQUALS);
6453 if (lex_match_id (lexer, "OPPOSITE"))
6454 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6455 else if (lex_match_id (lexer, "LAYER"))
6456 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6459 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6463 else if (lex_match_id (lexer, "COLLABELS"))
6465 lex_match (lexer, T_EQUALS);
6466 if (lex_match_id (lexer, "OPPOSITE"))
6467 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6468 else if (lex_match_id (lexer, "LAYER"))
6469 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6472 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6478 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6482 int end_ofs = lex_ofs (lexer) - 1;
6484 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6485 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6487 msg (SE, _("ROWLABELS and COLLABELS may not both be "
6490 lex_ofs_msg (lexer, SN, t->clabels_start_ofs,
6492 _("This is the first specification."));
6493 lex_ofs_msg (lexer, SN, start_ofs, end_ofs,
6494 _("This is the second specification."));
6498 t->clabels_start_ofs = start_ofs;
6499 t->clabels_end_ofs = end_ofs;
6501 else if (lex_match_id (lexer, "CRITERIA"))
6503 if (!lex_force_match_id (lexer, "CILEVEL"))
6505 lex_match (lexer, T_EQUALS);
6507 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6509 t->cilevel = lex_number (lexer);
6512 else if (lex_match_id (lexer, "CATEGORIES"))
6514 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6518 else if (lex_match_id (lexer, "TITLES"))
6523 if (lex_match_id (lexer, "CAPTIONS"))
6524 textp = &t->caption;
6525 else if (lex_match_id (lexer, "CORNERS"))
6527 else if (lex_match_id (lexer, "TITLES"))
6531 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6534 lex_match (lexer, T_EQUALS);
6536 struct string s = DS_EMPTY_INITIALIZER;
6537 while (lex_is_string (lexer))
6539 if (!ds_is_empty (&s))
6540 ds_put_byte (&s, '\n');
6541 put_title_text (&s, lex_tokss (lexer), now,
6542 lexer, dataset_dict (ds),
6543 expr_start, expr_end);
6547 *textp = ds_steal_cstr (&s);
6549 while (lex_token (lexer) != T_SLASH
6550 && lex_token (lexer) != T_ENDCMD);
6552 else if (lex_match_id (lexer, "SIGTEST"))
6554 int start_ofs = lex_ofs (lexer) - 1;
6557 t->chisq = xmalloc (sizeof *t->chisq);
6558 *t->chisq = (struct ctables_chisq) {
6560 .include_mrsets = true,
6561 .all_visible = true,
6567 if (lex_match_id (lexer, "TYPE"))
6569 lex_match (lexer, T_EQUALS);
6570 if (!lex_force_match_id (lexer, "CHISQUARE"))
6573 else if (lex_match_id (lexer, "ALPHA"))
6575 lex_match (lexer, T_EQUALS);
6576 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6578 t->chisq->alpha = lex_number (lexer);
6581 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6583 lex_match (lexer, T_EQUALS);
6584 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6587 else if (lex_match_id (lexer, "CATEGORIES"))
6589 lex_match (lexer, T_EQUALS);
6590 if (lex_match_id (lexer, "ALLVISIBLE"))
6591 t->chisq->all_visible = true;
6592 else if (lex_match_id (lexer, "SUBTOTALS"))
6593 t->chisq->all_visible = false;
6596 lex_error_expecting (lexer,
6597 "ALLVISIBLE", "SUBTOTALS");
6603 lex_error_expecting (lexer, "TYPE", "ALPHA",
6604 "INCLUDEMRSETS", "CATEGORIES");
6608 while (lex_token (lexer) != T_SLASH
6609 && lex_token (lexer) != T_ENDCMD);
6611 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6612 _("Support for SIGTEST not yet implemented."));
6615 else if (lex_match_id (lexer, "COMPARETEST"))
6617 int start_ofs = lex_ofs (lexer) - 1;
6620 t->pairwise = xmalloc (sizeof *t->pairwise);
6621 *t->pairwise = (struct ctables_pairwise) {
6623 .alpha = { .05, .05 },
6624 .adjust = BONFERRONI,
6625 .include_mrsets = true,
6626 .meansvariance_allcats = true,
6627 .all_visible = true,
6636 if (lex_match_id (lexer, "TYPE"))
6638 lex_match (lexer, T_EQUALS);
6639 if (lex_match_id (lexer, "PROP"))
6640 t->pairwise->type = PROP;
6641 else if (lex_match_id (lexer, "MEAN"))
6642 t->pairwise->type = MEAN;
6645 lex_error_expecting (lexer, "PROP", "MEAN");
6649 else if (lex_match_id (lexer, "ALPHA"))
6651 lex_match (lexer, T_EQUALS);
6653 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6655 double a0 = lex_number (lexer);
6658 lex_match (lexer, T_COMMA);
6659 if (lex_is_number (lexer))
6661 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6663 double a1 = lex_number (lexer);
6666 t->pairwise->alpha[0] = MIN (a0, a1);
6667 t->pairwise->alpha[1] = MAX (a0, a1);
6670 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6672 else if (lex_match_id (lexer, "ADJUST"))
6674 lex_match (lexer, T_EQUALS);
6675 if (lex_match_id (lexer, "BONFERRONI"))
6676 t->pairwise->adjust = BONFERRONI;
6677 else if (lex_match_id (lexer, "BH"))
6678 t->pairwise->adjust = BH;
6679 else if (lex_match_id (lexer, "NONE"))
6680 t->pairwise->adjust = 0;
6683 lex_error_expecting (lexer, "BONFERRONI", "BH",
6688 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6690 lex_match (lexer, T_EQUALS);
6691 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6694 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6696 lex_match (lexer, T_EQUALS);
6697 if (lex_match_id (lexer, "ALLCATS"))
6698 t->pairwise->meansvariance_allcats = true;
6699 else if (lex_match_id (lexer, "TESTEDCATS"))
6700 t->pairwise->meansvariance_allcats = false;
6703 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6707 else if (lex_match_id (lexer, "CATEGORIES"))
6709 lex_match (lexer, T_EQUALS);
6710 if (lex_match_id (lexer, "ALLVISIBLE"))
6711 t->pairwise->all_visible = true;
6712 else if (lex_match_id (lexer, "SUBTOTALS"))
6713 t->pairwise->all_visible = false;
6716 lex_error_expecting (lexer, "ALLVISIBLE",
6721 else if (lex_match_id (lexer, "MERGE"))
6723 lex_match (lexer, T_EQUALS);
6724 if (!parse_bool (lexer, &t->pairwise->merge))
6727 else if (lex_match_id (lexer, "STYLE"))
6729 lex_match (lexer, T_EQUALS);
6730 if (lex_match_id (lexer, "APA"))
6731 t->pairwise->apa_style = true;
6732 else if (lex_match_id (lexer, "SIMPLE"))
6733 t->pairwise->apa_style = false;
6736 lex_error_expecting (lexer, "APA", "SIMPLE");
6740 else if (lex_match_id (lexer, "SHOWSIG"))
6742 lex_match (lexer, T_EQUALS);
6743 if (!parse_bool (lexer, &t->pairwise->show_sig))
6748 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6749 "INCLUDEMRSETS", "MEANSVARIANCE",
6750 "CATEGORIES", "MERGE", "STYLE",
6755 while (lex_token (lexer) != T_SLASH
6756 && lex_token (lexer) != T_ENDCMD);
6758 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6759 _("Support for COMPARETEST not yet implemented."));
6764 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6765 "CRITERIA", "CATEGORIES", "TITLES",
6766 "SIGTEST", "COMPARETEST");
6767 if (lex_match_id (lexer, "FORMAT")
6768 || lex_match_id (lexer, "VLABELS")
6769 || lex_match_id (lexer, "MRSETS")
6770 || lex_match_id (lexer, "SMISSING")
6771 || lex_match_id (lexer, "PCOMPUTE")
6772 || lex_match_id (lexer, "PPROPERTIES")
6773 || lex_match_id (lexer, "WEIGHT")
6774 || lex_match_id (lexer, "HIDESMALLCOUNTS"))
6775 lex_next_msg (lexer, SN, -1, -1,
6776 _("This subcommand must appear before TABLE."));
6780 if (!lex_match (lexer, T_SLASH))
6784 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6785 t->clabels_from_axis = PIVOT_AXIS_ROW;
6786 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6787 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6788 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6790 if (!ctables_prepare_table (t, lexer))
6793 while (lex_token (lexer) != T_ENDCMD);
6796 input = proc_open (ds);
6797 bool ok = ctables_execute (ds, input, ct);
6798 ok = proc_commit (ds) && ok;
6800 ctables_destroy (ct);
6801 return ok ? CMD_SUCCESS : CMD_FAILURE;
6806 ctables_destroy (ct);