1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 /* The three forms of weighting supported by CTABLES. */
61 enum ctables_weighting
63 CTW_EFFECTIVE, /* Effective base weight (WEIGHT subcommand). */
64 CTW_DICTIONARY, /* Dictionary weight. */
65 CTW_UNWEIGHTED /* No weight. */
69 /* CTABLES table areas. */
71 enum ctables_area_type
73 /* Within a section, where stacked variables divide one section from
76 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
77 parse_ctables_summary_function() parses correctly. */
78 CTAT_TABLE, /* All layers of a whole section. */
79 CTAT_LAYERROW, /* Row in one layer within a section. */
80 CTAT_LAYERCOL, /* Column in one layer within a section. */
81 CTAT_LAYER, /* One layer within a section. */
83 /* Within a subtable, where a subtable pairs an innermost row variable with
84 an innermost column variable within a single layer. */
85 CTAT_SUBTABLE, /* Whole subtable. */
86 CTAT_ROW, /* Row within a subtable. */
87 CTAT_COL, /* Column within a subtable. */
91 static const char *ctables_area_type_name[N_CTATS] = {
92 [CTAT_TABLE] = "TABLE",
93 [CTAT_LAYER] = "LAYER",
94 [CTAT_LAYERROW] = "LAYERROW",
95 [CTAT_LAYERCOL] = "LAYERCOL",
96 [CTAT_SUBTABLE] = "SUBTABLE",
101 /* Summary statistics for an area. */
104 struct hmap_node node;
105 const struct ctables_cell *example;
107 /* Sequence number used for CTSF_ID. */
110 /* Weights for CTSF_areaPCT_COUNT, CTSF_areaPCT_VALIDN, and
111 CTSF_areaPCT_TOTALN. */
112 double count[N_CTWS];
113 double valid[N_CTWS];
114 double total[N_CTWS];
116 /* Sums for CTSF_areaPCT_SUM. */
117 struct ctables_sum *sums;
125 /* CTABLES summary functions. */
127 enum ctables_function_type
129 /* A function that operates on data in a single cell. It operates on
130 effective weights. It does not have an unweighted version. */
133 /* A function that operates on data in a single cell. The function
134 operates on effective weights and has a U-prefixed unweighted
138 /* A function that operates on data in a single cell. It operates on
139 dictionary weights, and has U-prefixed unweighted version and an
140 E-prefixed effective weight version. */
143 /* A function that operates on an area of cells. It operates on effective
144 weights and has a U-prefixed unweighted version. */
150 CTF_COUNT, /* F40.0. */
151 CTF_PERCENT, /* PCT40.1. */
152 CTF_GENERAL /* Variable's print format. */
155 enum ctables_function_availability
157 CTFA_ALL, /* Any variables. */
158 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
159 //CTFA_MRSETS, /* Only multiple-response sets */
162 enum ctables_summary_function
164 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
165 #include "ctables.inc"
170 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
172 #include "ctables.inc"
176 struct ctables_function_info
178 struct substring basename;
179 enum ctables_function_type type;
180 enum ctables_format format;
181 enum ctables_function_availability availability;
183 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
184 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
185 bool is_area; /* Needs an area prefix. */
187 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
188 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
190 .basename = SS_LITERAL_INITIALIZER (NAME), \
193 .availability = AVAILABILITY, \
194 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
195 .e_prefix = (TYPE) == CTFT_UECELL, \
196 .is_area = (TYPE) == CTFT_AREA \
198 #include "ctables.inc"
202 static struct fmt_spec
203 ctables_summary_default_format (enum ctables_summary_function function,
204 const struct variable *var)
206 static const enum ctables_format default_formats[] = {
207 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
208 #include "ctables.inc"
211 switch (default_formats[function])
214 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
217 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
220 return *var_get_print_format (var);
227 static enum ctables_function_availability
228 ctables_function_availability (enum ctables_summary_function f)
230 static enum ctables_function_availability availability[] = {
231 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
232 #include "ctables.inc"
236 return availability[f];
240 parse_ctables_summary_function (struct lexer *lexer,
241 enum ctables_summary_function *function,
242 enum ctables_weighting *weighting,
243 enum ctables_area_type *area)
245 if (!lex_force_id (lexer))
248 struct substring name = lex_tokss (lexer);
249 if (ss_ends_with_case (name, ss_cstr (".LCL"))
250 || ss_ends_with_case (name, ss_cstr (".UCL"))
251 || ss_ends_with_case (name, ss_cstr (".SE")))
253 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
254 "is not yet implemented."));
258 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
259 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
261 bool has_area = false;
263 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
264 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
269 if (ss_equals_case (name, ss_cstr ("PCT")))
271 /* Special case where .COUNT suffix is omitted. */
272 *function = CTSF_areaPCT_COUNT;
273 *weighting = CTW_EFFECTIVE;
280 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
282 const struct ctables_function_info *cfi = &ctables_function_info[f];
283 if (ss_equals_case (cfi->basename, name))
286 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
289 *weighting = (e ? CTW_EFFECTIVE
291 : cfi->e_prefix ? CTW_DICTIONARY
298 lex_error (lexer, _("Expecting summary function name."));
303 ctables_summary_function_name (enum ctables_summary_function function,
304 enum ctables_weighting weighting,
305 enum ctables_area_type area,
306 char *buffer, size_t bufsize)
308 const struct ctables_function_info *cfi = &ctables_function_info[function];
309 snprintf (buffer, bufsize, "%s%s%s",
310 (weighting == CTW_UNWEIGHTED ? "U"
311 : weighting == CTW_DICTIONARY ? ""
312 : cfi->e_prefix ? "E"
314 cfi->is_area ? ctables_area_type_name[area] : "",
315 cfi->basename.string);
320 ctables_summary_function_label__ (enum ctables_summary_function function,
321 enum ctables_weighting weighting,
322 enum ctables_area_type area)
324 bool w = weighting != CTW_UNWEIGHTED;
325 bool d = weighting == CTW_DICTIONARY;
326 enum ctables_area_type a = area;
330 return d ? N_("Count") : w ? N_("Adjusted Count") : N_("Unweighted Count");
332 case CTSF_areaPCT_COUNT:
335 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
336 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
337 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
338 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
339 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
340 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
341 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
345 case CTSF_areaPCT_VALIDN:
348 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
349 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
350 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
351 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
352 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
353 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
354 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
358 case CTSF_areaPCT_TOTALN:
361 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
362 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
363 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
364 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
365 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
366 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
367 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
371 case CTSF_MAXIMUM: return N_("Maximum");
372 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
373 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
374 case CTSF_MINIMUM: return N_("Minimum");
375 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
376 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
377 case CTSF_PTILE: NOT_REACHED ();
378 case CTSF_RANGE: return N_("Range");
379 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
380 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
381 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
382 case CTSF_TOTALN: return (d ? N_("Total N")
383 : w ? N_("Adjusted Total N")
384 : N_("Unweighted Total N"));
385 case CTSF_VALIDN: return (d ? N_("Valid N")
386 : w ? N_("Adjusted Valid N")
387 : N_("Unweighted Valid N"));
388 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
389 case CTSF_areaPCT_SUM:
392 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
393 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
394 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
395 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
396 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
397 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
398 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
405 /* Don't bother translating these: they are for developers only. */
406 case CTAT_TABLE: return "Table ID";
407 case CTAT_LAYER: return "Layer ID";
408 case CTAT_LAYERROW: return "Layer Row ID";
409 case CTAT_LAYERCOL: return "Layer Column ID";
410 case CTAT_SUBTABLE: return "Subtable ID";
411 case CTAT_ROW: return "Row ID";
412 case CTAT_COL: return "Column ID";
420 static struct pivot_value *
421 ctables_summary_function_label (enum ctables_summary_function function,
422 enum ctables_weighting weighting,
423 enum ctables_area_type area,
426 if (function == CTSF_PTILE)
428 char *s = (weighting != CTW_UNWEIGHTED
429 ? xasprintf (_("Percentile %.2f"), percentile)
430 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
431 return pivot_value_new_user_text_nocopy (s);
434 return pivot_value_new_text (ctables_summary_function_label__ (
435 function, weighting, area));
438 /* CTABLES summaries. */
440 struct ctables_summary_spec
442 /* The calculation to be performed.
444 'function' is the function to calculate. 'weighted' specifies whether
445 to use weighted or unweighted data (for functions that do not support a
446 choice, it must be true). 'calc_area' is the area over which the
447 calculation takes place (for functions that target only an individual
448 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
449 percentile between 0 and 100 (for other functions it must be 0). */
450 enum ctables_summary_function function;
451 enum ctables_weighting weighting;
452 enum ctables_area_type calc_area;
453 double percentile; /* CTSF_PTILE only. */
455 /* How to display the result of the calculation.
457 'label' is a user-specified label, NULL if the user didn't specify
460 'user_area' is usually the same as 'calc_area', but when category labels
461 are rotated from one axis to another it swaps rows and columns.
463 'format' is the format for displaying the output. If
464 'is_ctables_format' is true, then 'format.type' is one of the special
465 CTEF_* formats instead of the standard ones. */
467 enum ctables_area_type user_area;
468 struct fmt_spec format;
469 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
471 size_t axis_idx; /* Leaf index if summary dimension in use. */
472 size_t sum_var_idx; /* Offset into 'sums' in ctables_area. */
476 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
477 const struct ctables_summary_spec *src)
480 dst->label = xstrdup_if_nonnull (src->label);
484 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
490 /* Collections of summary functions. */
492 struct ctables_summary_spec_set
494 struct ctables_summary_spec *specs;
498 /* The variable to which the summary specs are applied. */
499 struct variable *var;
501 /* Whether the variable to which the summary specs are applied is a scale
502 variable for the purpose of summarization.
504 (VALIDN and TOTALN act differently for summarizing scale and categorical
508 /* If any of these optional additional scale variables are missing, then
509 treat 'var' as if it's missing too. This is for implementing
510 SMISSING=LISTWISE. */
511 struct variable **listwise_vars;
512 size_t n_listwise_vars;
516 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
517 const struct ctables_summary_spec_set *src)
519 struct ctables_summary_spec *specs
520 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
521 for (size_t i = 0; i < src->n; i++)
522 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
524 *dst = (struct ctables_summary_spec_set) {
529 .is_scale = src->is_scale,
534 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
536 for (size_t i = 0; i < set->n; i++)
537 ctables_summary_spec_uninit (&set->specs[i]);
538 free (set->listwise_vars);
543 is_listwise_missing (const struct ctables_summary_spec_set *specs,
544 const struct ccase *c)
546 for (size_t i = 0; i < specs->n_listwise_vars; i++)
548 const struct variable *var = specs->listwise_vars[i];
549 if (var_is_num_missing (var, case_num (c, var)))
556 /* CTABLES postcompute expressions. */
558 struct ctables_postcompute
560 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
561 char *name; /* Name, without leading &. */
563 struct msg_location *location; /* Location of definition. */
564 struct ctables_pcexpr *expr;
566 struct ctables_summary_spec_set *specs;
567 bool hide_source_cats;
570 struct ctables_pcexpr
580 enum ctables_pcexpr_op
583 CTPO_CONSTANT, /* 5 */
584 CTPO_CAT_NUMBER, /* [5] */
585 CTPO_CAT_STRING, /* ["STRING"] */
586 CTPO_CAT_NRANGE, /* [LO THRU 5] */
587 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
588 CTPO_CAT_MISSING, /* MISSING */
589 CTPO_CAT_OTHERNM, /* OTHERNM */
590 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
591 CTPO_CAT_TOTAL, /* TOTAL */
605 /* CTPO_CAT_NUMBER. */
608 /* CTPO_CAT_STRING, in dictionary encoding. */
609 struct substring string;
611 /* CTPO_CAT_NRANGE. */
614 /* CTPO_CAT_SRANGE. */
615 struct substring srange[2];
617 /* CTPO_CAT_SUBTOTAL. */
618 size_t subtotal_index;
620 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
621 One element: CTPO_NEG. */
622 struct ctables_pcexpr *subs[2];
625 /* Source location. */
626 struct msg_location *location;
629 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
632 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
633 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
634 struct ctables_pcexpr *sub1);
636 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
637 struct dictionary *);
640 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
646 case CTPO_CAT_STRING:
647 ss_dealloc (&e->string);
650 case CTPO_CAT_SRANGE:
651 for (size_t i = 0; i < 2; i++)
652 ss_dealloc (&e->srange[i]);
661 for (size_t i = 0; i < 2; i++)
662 ctables_pcexpr_destroy (e->subs[i]);
666 case CTPO_CAT_NUMBER:
667 case CTPO_CAT_NRANGE:
668 case CTPO_CAT_MISSING:
669 case CTPO_CAT_OTHERNM:
670 case CTPO_CAT_SUBTOTAL:
675 msg_location_destroy (e->location);
680 static struct ctables_pcexpr *
681 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
682 struct ctables_pcexpr *sub0,
683 struct ctables_pcexpr *sub1)
685 struct ctables_pcexpr *e = xmalloc (sizeof *e);
686 *e = (struct ctables_pcexpr) {
688 .subs = { sub0, sub1 },
689 .location = msg_location_merged (sub0->location, sub1->location),
694 /* How to parse an operator. */
697 enum token_type token;
698 enum ctables_pcexpr_op op;
701 static const struct operator *
702 ctables_pcexpr_match_operator (struct lexer *lexer,
703 const struct operator ops[], size_t n_ops)
705 for (const struct operator *op = ops; op < ops + n_ops; op++)
706 if (lex_token (lexer) == op->token)
708 if (op->token != T_NEG_NUM)
717 static struct ctables_pcexpr *
718 ctables_pcexpr_parse_binary_operators__ (
719 struct lexer *lexer, struct dictionary *dict,
720 const struct operator ops[], size_t n_ops,
721 parse_recursively_func *parse_next_level,
722 const char *chain_warning, struct ctables_pcexpr *lhs)
724 for (int op_count = 0; ; op_count++)
726 const struct operator *op
727 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
730 if (op_count > 1 && chain_warning)
731 msg_at (SW, lhs->location, "%s", chain_warning);
736 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
739 ctables_pcexpr_destroy (lhs);
743 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
747 static struct ctables_pcexpr *
748 ctables_pcexpr_parse_binary_operators (
749 struct lexer *lexer, struct dictionary *dict,
750 const struct operator ops[], size_t n_ops,
751 parse_recursively_func *parse_next_level, const char *chain_warning)
753 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
757 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
762 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
763 struct dictionary *);
765 static struct ctables_pcexpr
766 ctpo_cat_nrange (double low, double high)
768 return (struct ctables_pcexpr) {
769 .op = CTPO_CAT_NRANGE,
770 .nrange = { low, high },
774 static struct ctables_pcexpr
775 ctpo_cat_srange (struct substring low, struct substring high)
777 return (struct ctables_pcexpr) {
778 .op = CTPO_CAT_SRANGE,
779 .srange = { low, high },
783 static struct substring
784 parse_substring (struct lexer *lexer, struct dictionary *dict)
786 struct substring s = recode_substring_pool (
787 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
788 ss_rtrim (&s, ss_cstr (" "));
793 static struct ctables_pcexpr *
794 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
796 int start_ofs = lex_ofs (lexer);
797 struct ctables_pcexpr e;
798 if (lex_is_number (lexer))
800 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
801 .number = lex_number (lexer) };
804 else if (lex_match_id (lexer, "MISSING"))
805 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
806 else if (lex_match_id (lexer, "OTHERNM"))
807 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
808 else if (lex_match_id (lexer, "TOTAL"))
809 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
810 else if (lex_match_id (lexer, "SUBTOTAL"))
812 size_t subtotal_index = 0;
813 if (lex_match (lexer, T_LBRACK))
815 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
817 subtotal_index = lex_integer (lexer);
819 if (!lex_force_match (lexer, T_RBRACK))
822 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
823 .subtotal_index = subtotal_index };
825 else if (lex_match (lexer, T_LBRACK))
827 if (lex_match_id (lexer, "LO"))
829 if (!lex_force_match_id (lexer, "THRU"))
832 if (lex_is_string (lexer))
834 struct substring low = { .string = NULL };
835 struct substring high = parse_substring (lexer, dict);
836 e = ctpo_cat_srange (low, high);
840 if (!lex_force_num (lexer))
842 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
846 else if (lex_is_number (lexer))
848 double number = lex_number (lexer);
850 if (lex_match_id (lexer, "THRU"))
852 if (lex_match_id (lexer, "HI"))
853 e = ctpo_cat_nrange (number, DBL_MAX);
856 if (!lex_force_num (lexer))
858 e = ctpo_cat_nrange (number, lex_number (lexer));
863 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
866 else if (lex_is_string (lexer))
868 struct substring s = parse_substring (lexer, dict);
870 if (lex_match_id (lexer, "THRU"))
872 struct substring high;
874 if (lex_match_id (lexer, "HI"))
875 high = (struct substring) { .string = NULL };
878 if (!lex_force_string (lexer))
883 high = parse_substring (lexer, dict);
886 e = ctpo_cat_srange (s, high);
889 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
893 lex_error (lexer, NULL);
897 if (!lex_force_match (lexer, T_RBRACK))
899 if (e.op == CTPO_CAT_STRING)
900 ss_dealloc (&e.string);
901 else if (e.op == CTPO_CAT_SRANGE)
903 ss_dealloc (&e.srange[0]);
904 ss_dealloc (&e.srange[1]);
909 else if (lex_match (lexer, T_LPAREN))
911 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
914 if (!lex_force_match (lexer, T_RPAREN))
916 ctables_pcexpr_destroy (ep);
923 lex_error (lexer, NULL);
927 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
928 return xmemdup (&e, sizeof e);
931 static struct ctables_pcexpr *
932 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
933 struct lexer *lexer, int start_ofs)
935 struct ctables_pcexpr *e = xmalloc (sizeof *e);
936 *e = (struct ctables_pcexpr) {
939 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
944 static struct ctables_pcexpr *
945 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
947 static const struct operator op = { T_EXP, CTPO_POW };
949 const char *chain_warning =
950 _("The exponentiation operator (`**') is left-associative: "
951 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
952 "To disable this warning, insert parentheses.");
954 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
955 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
956 ctables_pcexpr_parse_primary,
959 /* Special case for situations like "-5**6", which must be parsed as
962 int start_ofs = lex_ofs (lexer);
963 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
964 *lhs = (struct ctables_pcexpr) {
966 .number = -lex_tokval (lexer),
967 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
971 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
973 ctables_pcexpr_parse_primary, chain_warning, lhs);
977 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
980 /* Parses the unary minus level. */
981 static struct ctables_pcexpr *
982 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
984 int start_ofs = lex_ofs (lexer);
985 if (!lex_match (lexer, T_DASH))
986 return ctables_pcexpr_parse_exp (lexer, dict);
988 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
992 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
995 /* Parses the multiplication and division level. */
996 static struct ctables_pcexpr *
997 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
999 static const struct operator ops[] =
1001 { T_ASTERISK, CTPO_MUL },
1002 { T_SLASH, CTPO_DIV },
1005 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
1006 sizeof ops / sizeof *ops,
1007 ctables_pcexpr_parse_neg, NULL);
1010 /* Parses the addition and subtraction level. */
1011 static struct ctables_pcexpr *
1012 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
1014 static const struct operator ops[] =
1016 { T_PLUS, CTPO_ADD },
1017 { T_DASH, CTPO_SUB },
1018 { T_NEG_NUM, CTPO_ADD },
1021 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1022 ops, sizeof ops / sizeof *ops,
1023 ctables_pcexpr_parse_mul, NULL);
1026 /* CTABLES axis expressions. */
1028 /* CTABLES has a number of extra formats that we implement via custom
1029 currency specifications on an alternate fmt_settings. */
1030 #define CTEF_NEGPAREN FMT_CCA
1031 #define CTEF_NEQUAL FMT_CCB
1032 #define CTEF_PAREN FMT_CCC
1033 #define CTEF_PCTPAREN FMT_CCD
1035 enum ctables_summary_variant
1044 enum ctables_axis_op
1060 struct variable *var;
1062 struct ctables_summary_spec_set specs[N_CSVS];
1066 struct ctables_axis *subs[2];
1069 struct msg_location *loc;
1073 ctables_axis_destroy (struct ctables_axis *axis)
1081 for (size_t i = 0; i < N_CSVS; i++)
1082 ctables_summary_spec_set_uninit (&axis->specs[i]);
1087 ctables_axis_destroy (axis->subs[0]);
1088 ctables_axis_destroy (axis->subs[1]);
1091 msg_location_destroy (axis->loc);
1095 static struct ctables_axis *
1096 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1097 struct ctables_axis *sub0,
1098 struct ctables_axis *sub1,
1099 struct lexer *lexer, int start_ofs)
1101 struct ctables_axis *axis = xmalloc (sizeof *axis);
1102 *axis = (struct ctables_axis) {
1104 .subs = { sub0, sub1 },
1105 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1110 struct ctables_axis_parse_ctx
1112 struct lexer *lexer;
1113 struct dictionary *dict;
1116 static struct pivot_value *
1117 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1120 return ctables_summary_function_label (spec->function, spec->weighting,
1121 spec->user_area, spec->percentile);
1124 struct substring in = ss_cstr (spec->label);
1125 struct substring target = ss_cstr (")CILEVEL");
1127 struct string out = DS_EMPTY_INITIALIZER;
1130 size_t chunk = ss_find_substring (in, target);
1131 ds_put_substring (&out, ss_head (in, chunk));
1132 ss_advance (&in, chunk);
1134 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1136 ss_advance (&in, target.length);
1137 ds_put_format (&out, "%g", cilevel);
1143 add_summary_spec (struct ctables_axis *axis,
1144 enum ctables_summary_function function,
1145 enum ctables_weighting weighting,
1146 enum ctables_area_type area, double percentile,
1147 const char *label, const struct fmt_spec *format,
1148 bool is_ctables_format, const struct msg_location *loc,
1149 enum ctables_summary_variant sv)
1151 if (axis->op == CTAO_VAR)
1153 char function_name[128];
1154 ctables_summary_function_name (function, weighting, area,
1155 function_name, sizeof function_name);
1156 const char *var_name = var_get_name (axis->var);
1157 switch (ctables_function_availability (function))
1161 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1162 "response sets."), function_name);
1163 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1169 if (!axis->scale && sv != CSV_TOTAL)
1172 _("Summary function %s applies only to scale variables."),
1174 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1184 struct ctables_summary_spec_set *set = &axis->specs[sv];
1185 if (set->n >= set->allocated)
1186 set->specs = x2nrealloc (set->specs, &set->allocated,
1187 sizeof *set->specs);
1189 struct ctables_summary_spec *dst = &set->specs[set->n++];
1190 *dst = (struct ctables_summary_spec) {
1191 .function = function,
1192 .weighting = weighting,
1195 .percentile = percentile,
1196 .label = xstrdup_if_nonnull (label),
1197 .format = (format ? *format
1198 : ctables_summary_default_format (function, axis->var)),
1199 .is_ctables_format = is_ctables_format,
1205 for (size_t i = 0; i < 2; i++)
1206 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1207 percentile, label, format, is_ctables_format,
1214 static struct ctables_axis *ctables_axis_parse_stack (
1215 struct ctables_axis_parse_ctx *);
1217 static struct ctables_axis *
1218 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1220 if (lex_match (ctx->lexer, T_LPAREN))
1222 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1223 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1225 ctables_axis_destroy (sub);
1231 if (!lex_force_id (ctx->lexer))
1234 if (lex_tokcstr (ctx->lexer)[0] == '$')
1236 lex_error (ctx->lexer,
1237 _("Multiple response set support not implemented."));
1241 int start_ofs = lex_ofs (ctx->lexer);
1242 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1246 struct ctables_axis *axis = xmalloc (sizeof *axis);
1247 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1249 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1250 : lex_match_phrase (ctx->lexer, "[C]") ? false
1251 : var_get_measure (var) == MEASURE_SCALE);
1252 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1253 lex_ofs (ctx->lexer) - 1);
1254 if (axis->scale && var_is_alpha (var))
1256 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1258 var_get_name (var));
1259 ctables_axis_destroy (axis);
1267 has_digit (const char *s)
1269 return s[strcspn (s, "0123456789")] != '\0';
1273 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1274 bool *is_ctables_format)
1276 char type[FMT_TYPE_LEN_MAX + 1];
1277 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1280 if (!strcasecmp (type, "NEGPAREN"))
1281 format->type = CTEF_NEGPAREN;
1282 else if (!strcasecmp (type, "NEQUAL"))
1283 format->type = CTEF_NEQUAL;
1284 else if (!strcasecmp (type, "PAREN"))
1285 format->type = CTEF_PAREN;
1286 else if (!strcasecmp (type, "PCTPAREN"))
1287 format->type = CTEF_PCTPAREN;
1290 *is_ctables_format = false;
1291 return (parse_format_specifier (lexer, format)
1292 && fmt_check_output (format)
1293 && fmt_check_type_compat (format, VAL_NUMERIC));
1299 lex_next_error (lexer, -1, -1,
1300 _("Output format %s requires width 2 or greater."), type);
1303 else if (format->d > format->w - 1)
1305 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1306 "greater than decimals."), type);
1311 *is_ctables_format = true;
1316 static struct ctables_axis *
1317 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1319 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1320 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1323 enum ctables_summary_variant sv = CSV_CELL;
1326 int start_ofs = lex_ofs (ctx->lexer);
1328 /* Parse function. */
1329 enum ctables_summary_function function;
1330 enum ctables_weighting weighting;
1331 enum ctables_area_type area;
1332 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1336 /* Parse percentile. */
1337 double percentile = 0;
1338 if (function == CTSF_PTILE)
1340 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1342 percentile = lex_number (ctx->lexer);
1343 lex_get (ctx->lexer);
1348 if (lex_is_string (ctx->lexer))
1350 label = ss_xstrdup (lex_tokss (ctx->lexer));
1351 lex_get (ctx->lexer);
1355 struct fmt_spec format;
1356 const struct fmt_spec *formatp;
1357 bool is_ctables_format = false;
1358 if (lex_token (ctx->lexer) == T_ID
1359 && has_digit (lex_tokcstr (ctx->lexer)))
1361 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1362 &is_ctables_format))
1372 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1373 lex_ofs (ctx->lexer) - 1);
1374 add_summary_spec (sub, function, weighting, area, percentile, label,
1375 formatp, is_ctables_format, loc, sv);
1377 msg_location_destroy (loc);
1379 lex_match (ctx->lexer, T_COMMA);
1380 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1382 if (!lex_force_match (ctx->lexer, T_LBRACK))
1386 else if (lex_match (ctx->lexer, T_RBRACK))
1388 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1395 ctables_axis_destroy (sub);
1399 static const struct ctables_axis *
1400 find_scale (const struct ctables_axis *axis)
1404 else if (axis->op == CTAO_VAR)
1405 return axis->scale ? axis : NULL;
1408 for (size_t i = 0; i < 2; i++)
1410 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1418 static const struct ctables_axis *
1419 find_categorical_summary_spec (const struct ctables_axis *axis)
1423 else if (axis->op == CTAO_VAR)
1424 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1427 for (size_t i = 0; i < 2; i++)
1429 const struct ctables_axis *sum
1430 = find_categorical_summary_spec (axis->subs[i]);
1438 static struct ctables_axis *
1439 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1441 int start_ofs = lex_ofs (ctx->lexer);
1442 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1446 while (lex_match (ctx->lexer, T_GT))
1448 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1451 ctables_axis_destroy (lhs);
1455 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1456 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1458 const struct ctables_axis *outer_scale = find_scale (lhs);
1459 const struct ctables_axis *inner_scale = find_scale (rhs);
1460 if (outer_scale && inner_scale)
1462 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1463 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1464 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1465 ctables_axis_destroy (nest);
1469 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1472 msg_at (SE, nest->loc,
1473 _("Summaries may only be requested for categorical variables "
1474 "at the innermost nesting level."));
1475 msg_at (SN, outer_sum->loc,
1476 _("This outer categorical variable has a summary."));
1477 ctables_axis_destroy (nest);
1487 static struct ctables_axis *
1488 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1490 int start_ofs = lex_ofs (ctx->lexer);
1491 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1495 while (lex_match (ctx->lexer, T_PLUS))
1497 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1500 ctables_axis_destroy (lhs);
1504 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1505 ctx->lexer, start_ofs);
1512 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1513 struct ctables_axis **axisp)
1516 if (lex_token (lexer) == T_BY
1517 || lex_token (lexer) == T_SLASH
1518 || lex_token (lexer) == T_ENDCMD)
1521 struct ctables_axis_parse_ctx ctx = {
1525 *axisp = ctables_axis_parse_stack (&ctx);
1529 /* CTABLES categories. */
1531 struct ctables_categories
1534 struct ctables_category *cats;
1539 struct ctables_category
1541 enum ctables_category_type
1543 /* Explicit category lists. */
1546 CCT_NRANGE, /* Numerical range. */
1547 CCT_SRANGE, /* String range. */
1552 /* Totals and subtotals. */
1556 /* Implicit category lists. */
1561 /* For contributing to TOTALN. */
1562 CCT_EXCLUDED_MISSING,
1566 struct ctables_category *subtotal;
1572 double number; /* CCT_NUMBER. */
1573 struct substring string; /* CCT_STRING, in dictionary encoding. */
1574 double nrange[2]; /* CCT_NRANGE. */
1575 struct substring srange[2]; /* CCT_SRANGE. */
1579 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1580 bool hide_subcategories; /* CCT_SUBTOTAL. */
1583 /* CCT_POSTCOMPUTE. */
1586 const struct ctables_postcompute *pc;
1587 enum fmt_type parse_format;
1590 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1593 bool include_missing;
1594 bool sort_ascending;
1597 enum ctables_summary_function sort_function;
1598 enum ctables_weighting weighting;
1599 enum ctables_area_type area;
1600 struct variable *sort_var;
1605 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
1606 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
1607 struct msg_location *location;
1611 ctables_category_uninit (struct ctables_category *cat)
1616 msg_location_destroy (cat->location);
1623 case CCT_POSTCOMPUTE:
1627 ss_dealloc (&cat->string);
1631 ss_dealloc (&cat->srange[0]);
1632 ss_dealloc (&cat->srange[1]);
1637 free (cat->total_label);
1645 case CCT_EXCLUDED_MISSING:
1651 nullable_substring_equal (const struct substring *a,
1652 const struct substring *b)
1654 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1658 ctables_category_equal (const struct ctables_category *a,
1659 const struct ctables_category *b)
1661 if (a->type != b->type)
1667 return a->number == b->number;
1670 return ss_equals (a->string, b->string);
1673 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1676 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1677 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1683 case CCT_POSTCOMPUTE:
1684 return a->pc == b->pc;
1688 return !strcmp (a->total_label, b->total_label);
1693 return (a->include_missing == b->include_missing
1694 && a->sort_ascending == b->sort_ascending
1695 && a->sort_function == b->sort_function
1696 && a->sort_var == b->sort_var
1697 && a->percentile == b->percentile);
1699 case CCT_EXCLUDED_MISSING:
1707 ctables_categories_unref (struct ctables_categories *c)
1712 assert (c->n_refs > 0);
1716 for (size_t i = 0; i < c->n_cats; i++)
1717 ctables_category_uninit (&c->cats[i]);
1723 ctables_categories_equal (const struct ctables_categories *a,
1724 const struct ctables_categories *b)
1726 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
1729 for (size_t i = 0; i < a->n_cats; i++)
1730 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1736 static struct ctables_category
1737 cct_nrange (double low, double high)
1739 return (struct ctables_category) {
1741 .nrange = { low, high }
1745 static struct ctables_category
1746 cct_srange (struct substring low, struct substring high)
1748 return (struct ctables_category) {
1750 .srange = { low, high }
1755 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1756 struct ctables_category *cat)
1759 if (lex_match (lexer, T_EQUALS))
1761 if (!lex_force_string (lexer))
1764 total_label = ss_xstrdup (lex_tokss (lexer));
1768 total_label = xstrdup (_("Subtotal"));
1770 *cat = (struct ctables_category) {
1771 .type = CCT_SUBTOTAL,
1772 .hide_subcategories = hide_subcategories,
1773 .total_label = total_label
1779 ctables_table_parse_explicit_category (struct lexer *lexer,
1780 struct dictionary *dict,
1782 struct ctables_category *cat)
1784 if (lex_match_id (lexer, "OTHERNM"))
1785 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1786 else if (lex_match_id (lexer, "MISSING"))
1787 *cat = (struct ctables_category) { .type = CCT_MISSING };
1788 else if (lex_match_id (lexer, "SUBTOTAL"))
1789 return ctables_table_parse_subtotal (lexer, false, cat);
1790 else if (lex_match_id (lexer, "HSUBTOTAL"))
1791 return ctables_table_parse_subtotal (lexer, true, cat);
1792 else if (lex_match_id (lexer, "LO"))
1794 if (!lex_force_match_id (lexer, "THRU"))
1796 if (lex_is_string (lexer))
1798 struct substring sr0 = { .string = NULL };
1799 struct substring sr1 = parse_substring (lexer, dict);
1800 *cat = cct_srange (sr0, sr1);
1802 else if (lex_force_num (lexer))
1804 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1810 else if (lex_is_number (lexer))
1812 double number = lex_number (lexer);
1814 if (lex_match_id (lexer, "THRU"))
1816 if (lex_match_id (lexer, "HI"))
1817 *cat = cct_nrange (number, DBL_MAX);
1820 if (!lex_force_num (lexer))
1822 *cat = cct_nrange (number, lex_number (lexer));
1827 *cat = (struct ctables_category) {
1832 else if (lex_is_string (lexer))
1834 struct substring s = parse_substring (lexer, dict);
1835 if (lex_match_id (lexer, "THRU"))
1837 if (lex_match_id (lexer, "HI"))
1839 struct substring sr1 = { .string = NULL };
1840 *cat = cct_srange (s, sr1);
1844 if (!lex_force_string (lexer))
1849 struct substring sr1 = parse_substring (lexer, dict);
1850 *cat = cct_srange (s, sr1);
1854 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1856 else if (lex_match (lexer, T_AND))
1858 if (!lex_force_id (lexer))
1860 struct ctables_postcompute *pc = ctables_find_postcompute (
1861 ct, lex_tokcstr (lexer));
1864 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1865 msg_at (SE, loc, _("Unknown postcompute &%s."),
1866 lex_tokcstr (lexer));
1867 msg_location_destroy (loc);
1872 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1876 lex_error (lexer, NULL);
1884 parse_category_string (struct msg_location *location,
1885 struct substring s, const struct dictionary *dict,
1886 enum fmt_type format, double *n)
1889 char *error = data_in (s, dict_get_encoding (dict), format,
1890 settings_get_fmt_settings (), &v, 0, NULL);
1893 msg_at (SE, location,
1894 _("Failed to parse category specification as format %s: %s."),
1895 fmt_name (format), error);
1904 static struct ctables_category *
1905 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1906 const struct ctables_pcexpr *e)
1908 struct ctables_category *best = NULL;
1909 size_t n_subtotals = 0;
1910 for (size_t i = 0; i < cats->n_cats; i++)
1912 struct ctables_category *cat = &cats->cats[i];
1915 case CTPO_CAT_NUMBER:
1916 if (cat->type == CCT_NUMBER && cat->number == e->number)
1920 case CTPO_CAT_STRING:
1921 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1925 case CTPO_CAT_NRANGE:
1926 if (cat->type == CCT_NRANGE
1927 && cat->nrange[0] == e->nrange[0]
1928 && cat->nrange[1] == e->nrange[1])
1932 case CTPO_CAT_SRANGE:
1933 if (cat->type == CCT_SRANGE
1934 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1935 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1939 case CTPO_CAT_MISSING:
1940 if (cat->type == CCT_MISSING)
1944 case CTPO_CAT_OTHERNM:
1945 if (cat->type == CCT_OTHERNM)
1949 case CTPO_CAT_SUBTOTAL:
1950 if (cat->type == CCT_SUBTOTAL)
1953 if (e->subtotal_index == n_subtotals)
1955 else if (e->subtotal_index == 0)
1960 case CTPO_CAT_TOTAL:
1961 if (cat->type == CCT_TOTAL)
1975 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1980 static struct ctables_category *
1981 ctables_find_category_for_postcompute (const struct dictionary *dict,
1982 const struct ctables_categories *cats,
1983 enum fmt_type parse_format,
1984 const struct ctables_pcexpr *e)
1986 if (parse_format != FMT_F)
1988 if (e->op == CTPO_CAT_STRING)
1991 if (!parse_category_string (e->location, e->string, dict,
1992 parse_format, &number))
1995 struct ctables_pcexpr e2 = {
1996 .op = CTPO_CAT_NUMBER,
1998 .location = e->location,
2000 return ctables_find_category_for_postcompute__ (cats, &e2);
2002 else if (e->op == CTPO_CAT_SRANGE)
2005 if (!e->srange[0].string)
2006 nrange[0] = -DBL_MAX;
2007 else if (!parse_category_string (e->location, e->srange[0], dict,
2008 parse_format, &nrange[0]))
2011 if (!e->srange[1].string)
2012 nrange[1] = DBL_MAX;
2013 else if (!parse_category_string (e->location, e->srange[1], dict,
2014 parse_format, &nrange[1]))
2017 struct ctables_pcexpr e2 = {
2018 .op = CTPO_CAT_NRANGE,
2019 .nrange = { nrange[0], nrange[1] },
2020 .location = e->location,
2022 return ctables_find_category_for_postcompute__ (cats, &e2);
2025 return ctables_find_category_for_postcompute__ (cats, e);
2028 static struct substring
2029 rtrim_value (const union value *v, const struct variable *var)
2031 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2032 var_get_width (var));
2033 ss_rtrim (&s, ss_cstr (" "));
2038 in_string_range (const union value *v, const struct variable *var,
2039 const struct substring *srange)
2041 struct substring s = rtrim_value (v, var);
2042 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2043 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2046 static const struct ctables_category *
2047 ctables_categories_match (const struct ctables_categories *c,
2048 const union value *v, const struct variable *var)
2050 if (var_is_numeric (var) && v->f == SYSMIS)
2053 const struct ctables_category *othernm = NULL;
2054 for (size_t i = c->n_cats; i-- > 0; )
2056 const struct ctables_category *cat = &c->cats[i];
2060 if (cat->number == v->f)
2065 if (ss_equals (cat->string, rtrim_value (v, var)))
2070 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2071 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2076 if (in_string_range (v, var, cat->srange))
2081 if (var_is_value_missing (var, v))
2085 case CCT_POSTCOMPUTE:
2100 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2103 case CCT_EXCLUDED_MISSING:
2108 return var_is_value_missing (var, v) ? NULL : othernm;
2111 static const struct ctables_category *
2112 ctables_categories_total (const struct ctables_categories *c)
2114 const struct ctables_category *first = &c->cats[0];
2115 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2116 return (first->type == CCT_TOTAL ? first
2117 : last->type == CCT_TOTAL ? last
2122 ctables_category_format_number (double number, const struct variable *var,
2125 struct pivot_value *pv = pivot_value_new_var_value (
2126 var, &(union value) { .f = number });
2127 pivot_value_format (pv, NULL, s);
2128 pivot_value_destroy (pv);
2132 ctables_category_format_string (struct substring string,
2133 const struct variable *var, struct string *out)
2135 int width = var_get_width (var);
2136 char *s = xmalloc (width);
2137 buf_copy_rpad (s, width, string.string, string.length, ' ');
2138 struct pivot_value *pv = pivot_value_new_var_value (
2139 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
2140 pivot_value_format (pv, NULL, out);
2141 pivot_value_destroy (pv);
2146 ctables_category_format_label (const struct ctables_category *cat,
2147 const struct variable *var,
2153 ctables_category_format_number (cat->number, var, s);
2157 ctables_category_format_string (cat->string, var, s);
2161 ctables_category_format_number (cat->nrange[0], var, s);
2162 ds_put_format (s, " THRU ");
2163 ctables_category_format_number (cat->nrange[1], var, s);
2167 ctables_category_format_string (cat->srange[0], var, s);
2168 ds_put_format (s, " THRU ");
2169 ctables_category_format_string (cat->srange[1], var, s);
2173 ds_put_cstr (s, "MISSING");
2177 ds_put_cstr (s, "OTHERNM");
2180 case CCT_POSTCOMPUTE:
2181 ds_put_format (s, "&%s", cat->pc->name);
2186 ds_put_cstr (s, cat->total_label);
2192 case CCT_EXCLUDED_MISSING:
2200 ctables_recursive_check_postcompute (struct dictionary *dict,
2201 const struct ctables_pcexpr *e,
2202 struct ctables_category *pc_cat,
2203 const struct ctables_categories *cats,
2204 const struct msg_location *cats_location)
2208 case CTPO_CAT_NUMBER:
2209 case CTPO_CAT_STRING:
2210 case CTPO_CAT_NRANGE:
2211 case CTPO_CAT_SRANGE:
2212 case CTPO_CAT_MISSING:
2213 case CTPO_CAT_OTHERNM:
2214 case CTPO_CAT_SUBTOTAL:
2215 case CTPO_CAT_TOTAL:
2217 struct ctables_category *cat = ctables_find_category_for_postcompute (
2218 dict, cats, pc_cat->parse_format, e);
2221 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2223 size_t n_subtotals = 0;
2224 for (size_t i = 0; i < cats->n_cats; i++)
2225 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2226 if (n_subtotals > 1)
2228 msg_at (SE, cats_location,
2229 ngettext ("These categories include %zu instance "
2230 "of SUBTOTAL or HSUBTOTAL, so references "
2231 "from computed categories must refer to "
2232 "subtotals by position, "
2233 "e.g. SUBTOTAL[1].",
2234 "These categories include %zu instances "
2235 "of SUBTOTAL or HSUBTOTAL, so references "
2236 "from computed categories must refer to "
2237 "subtotals by position, "
2238 "e.g. SUBTOTAL[1].",
2241 msg_at (SN, e->location,
2242 _("This is the reference that lacks a position."));
2247 msg_at (SE, pc_cat->location,
2248 _("Computed category &%s references a category not included "
2249 "in the category list."),
2251 msg_at (SN, e->location, _("This is the missing category."));
2252 if (e->op == CTPO_CAT_SUBTOTAL)
2253 msg_at (SN, cats_location,
2254 _("To fix the problem, add subtotals to the "
2255 "list of categories here."));
2256 else if (e->op == CTPO_CAT_TOTAL)
2257 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2258 "CATEGORIES specification."));
2260 msg_at (SN, cats_location,
2261 _("To fix the problem, add the missing category to the "
2262 "list of categories here."));
2265 if (pc_cat->pc->hide_source_cats)
2279 for (size_t i = 0; i < 2; i++)
2280 if (e->subs[i] && !ctables_recursive_check_postcompute (
2281 dict, e->subs[i], pc_cat, cats, cats_location))
2289 static struct pivot_value *
2290 ctables_postcompute_label (const struct ctables_categories *cats,
2291 const struct ctables_category *cat,
2292 const struct variable *var)
2294 struct substring in = ss_cstr (cat->pc->label);
2295 struct substring target = ss_cstr (")LABEL[");
2297 struct string out = DS_EMPTY_INITIALIZER;
2300 size_t chunk = ss_find_substring (in, target);
2301 if (chunk == SIZE_MAX)
2303 if (ds_is_empty (&out))
2304 return pivot_value_new_user_text (in.string, in.length);
2307 ds_put_substring (&out, in);
2308 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
2312 ds_put_substring (&out, ss_head (in, chunk));
2313 ss_advance (&in, chunk + target.length);
2315 struct substring idx_s;
2316 if (!ss_get_until (&in, ']', &idx_s))
2319 long int idx = strtol (idx_s.string, &tail, 10);
2320 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
2323 struct ctables_category *cat2 = &cats->cats[idx - 1];
2324 if (!ctables_category_format_label (cat2, var, &out))
2330 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
2333 static struct pivot_value *
2334 ctables_category_create_value_label (const struct ctables_categories *cats,
2335 const struct ctables_category *cat,
2336 const struct variable *var,
2337 const union value *value)
2339 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
2340 ? ctables_postcompute_label (cats, cat, var)
2341 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
2342 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
2343 : pivot_value_new_var_value (var, value));
2346 /* CTABLES variable nesting and stacking. */
2348 /* A nested sequence of variables, e.g. a > b > c. */
2351 struct variable **vars;
2355 size_t *areas[N_CTATS];
2356 size_t n_areas[N_CTATS];
2359 struct ctables_summary_spec_set specs[N_CSVS];
2362 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2363 struct ctables_stack
2365 struct ctables_nest *nests;
2370 ctables_nest_uninit (struct ctables_nest *nest)
2373 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2374 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2375 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2376 free (nest->areas[at]);
2380 ctables_stack_uninit (struct ctables_stack *stack)
2384 for (size_t i = 0; i < stack->n; i++)
2385 ctables_nest_uninit (&stack->nests[i]);
2386 free (stack->nests);
2390 static struct ctables_stack
2391 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2398 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2399 for (size_t i = 0; i < s0.n; i++)
2400 for (size_t j = 0; j < s1.n; j++)
2402 const struct ctables_nest *a = &s0.nests[i];
2403 const struct ctables_nest *b = &s1.nests[j];
2405 size_t allocate = a->n + b->n;
2406 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2408 for (size_t k = 0; k < a->n; k++)
2409 vars[n++] = a->vars[k];
2410 for (size_t k = 0; k < b->n; k++)
2411 vars[n++] = b->vars[k];
2412 assert (n == allocate);
2414 const struct ctables_nest *summary_src;
2415 if (!a->specs[CSV_CELL].var)
2417 else if (!b->specs[CSV_CELL].var)
2422 struct ctables_nest *new = &stack.nests[stack.n++];
2423 *new = (struct ctables_nest) {
2425 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2426 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2428 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2429 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2433 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2434 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2436 ctables_stack_uninit (&s0);
2437 ctables_stack_uninit (&s1);
2441 static struct ctables_stack
2442 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2444 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2445 for (size_t i = 0; i < s0.n; i++)
2446 stack.nests[stack.n++] = s0.nests[i];
2447 for (size_t i = 0; i < s1.n; i++)
2449 stack.nests[stack.n] = s1.nests[i];
2450 stack.nests[stack.n].group_head += s0.n;
2453 assert (stack.n == s0.n + s1.n);
2459 static struct ctables_stack
2460 var_fts (const struct ctables_axis *a)
2462 struct variable **vars = xmalloc (sizeof *vars);
2465 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2466 struct ctables_nest *nest = xmalloc (sizeof *nest);
2467 *nest = (struct ctables_nest) {
2470 .scale_idx = a->scale ? 0 : SIZE_MAX,
2471 .summary_idx = is_summary ? 0 : SIZE_MAX,
2474 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2476 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2477 nest->specs[sv].var = a->var;
2478 nest->specs[sv].is_scale = a->scale;
2480 return (struct ctables_stack) { .nests = nest, .n = 1 };
2483 static struct ctables_stack
2484 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2487 return (struct ctables_stack) { .n = 0 };
2495 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2496 enumerate_fts (axis_type, a->subs[1]));
2499 /* This should consider any of the scale variables found in the result to
2500 be linked to each other listwise for SMISSING=LISTWISE. */
2501 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2502 enumerate_fts (axis_type, a->subs[1]));
2508 /* CTABLES summary calculation. */
2510 union ctables_summary
2512 /* COUNT, VALIDN, TOTALN. */
2515 /* MINIMUM, MAXIMUM, RANGE. */
2522 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2523 struct moments1 *moments;
2525 /* MEDIAN, MODE, PTILE. */
2528 struct casewriter *writer;
2535 ctables_summary_init (union ctables_summary *s,
2536 const struct ctables_summary_spec *ss)
2538 switch (ss->function)
2541 case CTSF_areaPCT_COUNT:
2542 case CTSF_areaPCT_VALIDN:
2543 case CTSF_areaPCT_TOTALN:
2556 s->min = s->max = SYSMIS;
2561 case CTSF_areaPCT_SUM:
2562 s->moments = moments1_create (MOMENT_MEAN);
2568 s->moments = moments1_create (MOMENT_VARIANCE);
2575 struct caseproto *proto = caseproto_create ();
2576 proto = caseproto_add_width (proto, 0);
2577 proto = caseproto_add_width (proto, 0);
2579 struct subcase ordering;
2580 subcase_init (&ordering, 0, 0, SC_ASCEND);
2581 s->writer = sort_create_writer (&ordering, proto);
2582 subcase_uninit (&ordering);
2583 caseproto_unref (proto);
2593 ctables_summary_uninit (union ctables_summary *s,
2594 const struct ctables_summary_spec *ss)
2596 switch (ss->function)
2599 case CTSF_areaPCT_COUNT:
2600 case CTSF_areaPCT_VALIDN:
2601 case CTSF_areaPCT_TOTALN:
2620 case CTSF_areaPCT_SUM:
2621 moments1_destroy (s->moments);
2627 casewriter_destroy (s->writer);
2633 ctables_summary_add (union ctables_summary *s,
2634 const struct ctables_summary_spec *ss,
2635 const union value *value,
2636 bool is_missing, bool is_included,
2639 /* To determine whether a case is included in a given table for a particular
2640 kind of summary, consider the following charts for the variable being
2641 summarized. Only if "yes" appears is the case counted.
2643 Categorical variables: VALIDN other TOTALN
2644 Valid values in included categories yes yes yes
2645 Missing values in included categories --- yes yes
2646 Missing values in excluded categories --- --- yes
2647 Valid values in excluded categories --- --- ---
2649 Scale variables: VALIDN other TOTALN
2650 Valid value yes yes yes
2651 Missing value --- yes yes
2653 Missing values include both user- and system-missing. (The system-missing
2654 value is always in an excluded category.)
2656 One way to interpret the above table is that scale variables are like
2657 categorical variables in which all values are in included categories.
2659 switch (ss->function)
2662 case CTSF_areaPCT_TOTALN:
2667 case CTSF_areaPCT_COUNT:
2673 case CTSF_areaPCT_VALIDN:
2691 if (s->min == SYSMIS || value->f < s->min)
2693 if (s->max == SYSMIS || value->f > s->max)
2704 moments1_add (s->moments, value->f, weight);
2707 case CTSF_areaPCT_SUM:
2709 moments1_add (s->moments, value->f, weight);
2717 s->ovalid += weight;
2719 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2720 *case_num_rw_idx (c, 0) = value->f;
2721 *case_num_rw_idx (c, 1) = weight;
2722 casewriter_write (s->writer, c);
2729 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2730 union ctables_summary *s,
2731 const struct ctables_summary_spec *ss)
2733 switch (ss->function)
2739 return areas[ss->calc_area]->sequence;
2741 case CTSF_areaPCT_COUNT:
2743 const struct ctables_area *a = areas[ss->calc_area];
2744 double a_count = a->count[ss->weighting];
2745 return a_count ? s->count / a_count * 100 : SYSMIS;
2748 case CTSF_areaPCT_VALIDN:
2750 const struct ctables_area *a = areas[ss->calc_area];
2751 double a_valid = a->valid[ss->weighting];
2752 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2755 case CTSF_areaPCT_TOTALN:
2757 const struct ctables_area *a = areas[ss->calc_area];
2758 double a_total = a->total[ss->weighting];
2759 return a_total ? s->count / a_total * 100 : SYSMIS;
2774 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2779 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2785 double weight, variance;
2786 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2787 return calc_semean (variance, weight);
2793 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2794 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2799 double weight, mean;
2800 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2801 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2807 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2811 case CTSF_areaPCT_SUM:
2813 double weight, mean;
2814 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2815 if (weight == SYSMIS || mean == SYSMIS)
2818 const struct ctables_area *a = areas[ss->calc_area];
2819 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2820 double denom = sum->sum[ss->weighting];
2821 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2828 struct casereader *reader = casewriter_make_reader (s->writer);
2831 struct percentile *ptile = percentile_create (
2832 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2833 struct order_stats *os = &ptile->parent;
2834 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2835 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2836 statistic_destroy (&ptile->parent.parent);
2843 struct casereader *reader = casewriter_make_reader (s->writer);
2846 struct mode *mode = mode_create ();
2847 struct order_stats *os = &mode->parent;
2848 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2849 s->ovalue = mode->mode;
2850 statistic_destroy (&mode->parent.parent);
2858 /* CTABLES occurrences. */
2860 struct ctables_occurrence
2862 struct hmap_node node;
2867 ctables_add_occurrence (const struct variable *var,
2868 const union value *value,
2869 struct hmap *occurrences)
2871 int width = var_get_width (var);
2872 unsigned int hash = value_hash (value, width, 0);
2874 struct ctables_occurrence *o;
2875 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
2877 if (value_equal (value, &o->value, width))
2880 o = xmalloc (sizeof *o);
2881 value_clone (&o->value, value, width);
2882 hmap_insert (occurrences, &o->node, hash);
2887 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2888 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2889 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2890 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2895 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2896 all the axes (except the scalar variable, if any). */
2897 struct hmap_node node;
2898 struct ctables_section *section;
2900 /* The areas that contain this cell. */
2901 uint32_t omit_areas;
2902 struct ctables_area *areas[N_CTATS];
2907 enum ctables_summary_variant sv;
2909 struct ctables_cell_axis
2911 struct ctables_cell_value
2913 const struct ctables_category *category;
2921 union ctables_summary *summaries;
2924 struct ctables_section
2927 struct ctables_table *table;
2928 struct ctables_nest *nests[PIVOT_N_AXES];
2931 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2932 struct hmap cells; /* Contains "struct ctables_cell"s. */
2933 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2936 static void ctables_section_uninit (struct ctables_section *);
2938 struct ctables_table
2940 struct ctables *ctables;
2941 struct ctables_axis *axes[PIVOT_N_AXES];
2942 struct ctables_stack stacks[PIVOT_N_AXES];
2943 struct ctables_section *sections;
2945 enum pivot_axis_type summary_axis;
2946 struct ctables_summary_spec_set summary_specs;
2947 struct variable **sum_vars;
2950 enum pivot_axis_type slabels_axis;
2951 bool slabels_visible;
2953 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2955 Most commonly, label_axis[a] == a, and in particular we always have
2956 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2958 If ROWLABELS or COLLABELS is specified, then one of
2959 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2960 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2962 If any category labels are moved, then 'clabels_example' is one of the
2963 variables being moved (and it is otherwise NULL). All of the variables
2964 being moved have the same width, value labels, and categories, so this
2965 example variable can be used to find those out.
2967 The remaining members in this group are relevant only if category labels
2970 'clabels_values_map' holds a "struct ctables_value" for all the values
2971 that appear in all of the variables in the moved categories. It is
2972 accumulated as the data is read. Once the data is fully read, its
2973 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2975 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2976 enum pivot_axis_type clabels_from_axis;
2977 enum pivot_axis_type clabels_to_axis;
2978 const struct variable *clabels_example;
2979 struct hmap clabels_values_map;
2980 struct ctables_value **clabels_values;
2981 size_t n_clabels_values;
2983 /* Indexed by variable dictionary index. */
2984 struct ctables_categories **categories;
2985 size_t n_categories;
2993 struct ctables_chisq *chisq;
2994 struct ctables_pairwise *pairwise;
2997 struct ctables_cell_sort_aux
2999 const struct ctables_nest *nest;
3000 enum pivot_axis_type a;
3004 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3006 const struct ctables_cell_sort_aux *aux = aux_;
3007 struct ctables_cell *const *ap = a_;
3008 struct ctables_cell *const *bp = b_;
3009 const struct ctables_cell *a = *ap;
3010 const struct ctables_cell *b = *bp;
3012 const struct ctables_nest *nest = aux->nest;
3013 for (size_t i = 0; i < nest->n; i++)
3014 if (i != nest->scale_idx)
3016 const struct variable *var = nest->vars[i];
3017 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3018 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3019 if (a_cv->category != b_cv->category)
3020 return a_cv->category > b_cv->category ? 1 : -1;
3022 const union value *a_val = &a_cv->value;
3023 const union value *b_val = &b_cv->value;
3024 switch (a_cv->category->type)
3030 case CCT_POSTCOMPUTE:
3031 case CCT_EXCLUDED_MISSING:
3032 /* Must be equal. */
3040 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3048 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3050 return a_cv->category->sort_ascending ? cmp : -cmp;
3056 const char *a_label = var_lookup_value_label (var, a_val);
3057 const char *b_label = var_lookup_value_label (var, b_val);
3063 cmp = strcmp (a_label, b_label);
3069 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3072 return a_cv->category->sort_ascending ? cmp : -cmp;
3083 static struct ctables_area *
3084 ctables_area_insert (struct ctables_cell *cell, enum ctables_area_type area)
3086 struct ctables_section *s = cell->section;
3088 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3090 const struct ctables_nest *nest = s->nests[a];
3091 for (size_t i = 0; i < nest->n_areas[area]; i++)
3093 size_t v_idx = nest->areas[area][i];
3094 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3095 hash = hash_pointer (cv->category, hash);
3096 if (cv->category->type != CCT_TOTAL
3097 && cv->category->type != CCT_SUBTOTAL
3098 && cv->category->type != CCT_POSTCOMPUTE)
3099 hash = value_hash (&cv->value,
3100 var_get_width (nest->vars[v_idx]), hash);
3104 struct ctables_area *a;
3105 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3107 const struct ctables_cell *df = a->example;
3108 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3110 const struct ctables_nest *nest = s->nests[a];
3111 for (size_t i = 0; i < nest->n_areas[area]; i++)
3113 size_t v_idx = nest->areas[area][i];
3114 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3115 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3116 if (cv1->category != cv2->category
3117 || (cv1->category->type != CCT_TOTAL
3118 && cv1->category->type != CCT_SUBTOTAL
3119 && cv1->category->type != CCT_POSTCOMPUTE
3120 && !value_equal (&cv1->value, &cv2->value,
3121 var_get_width (nest->vars[v_idx]))))
3130 struct ctables_sum *sums = (s->table->n_sum_vars
3131 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3134 a = xmalloc (sizeof *a);
3135 *a = (struct ctables_area) { .example = cell, .sums = sums };
3136 hmap_insert (&s->areas[area], &a->node, hash);
3140 static struct ctables_cell *
3141 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3142 const struct ctables_category **cats[PIVOT_N_AXES])
3145 enum ctables_summary_variant sv = CSV_CELL;
3146 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3148 const struct ctables_nest *nest = s->nests[a];
3149 for (size_t i = 0; i < nest->n; i++)
3150 if (i != nest->scale_idx)
3152 hash = hash_pointer (cats[a][i], hash);
3153 if (cats[a][i]->type != CCT_TOTAL
3154 && cats[a][i]->type != CCT_SUBTOTAL
3155 && cats[a][i]->type != CCT_POSTCOMPUTE)
3156 hash = value_hash (case_data (c, nest->vars[i]),
3157 var_get_width (nest->vars[i]), hash);
3163 struct ctables_cell *cell;
3164 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3166 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3168 const struct ctables_nest *nest = s->nests[a];
3169 for (size_t i = 0; i < nest->n; i++)
3170 if (i != nest->scale_idx
3171 && (cats[a][i] != cell->axes[a].cvs[i].category
3172 || (cats[a][i]->type != CCT_TOTAL
3173 && cats[a][i]->type != CCT_SUBTOTAL
3174 && cats[a][i]->type != CCT_POSTCOMPUTE
3175 && !value_equal (case_data (c, nest->vars[i]),
3176 &cell->axes[a].cvs[i].value,
3177 var_get_width (nest->vars[i])))))
3186 cell = xmalloc (sizeof *cell);
3190 cell->omit_areas = 0;
3191 cell->postcompute = false;
3192 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3194 const struct ctables_nest *nest = s->nests[a];
3195 cell->axes[a].cvs = (nest->n
3196 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3198 for (size_t i = 0; i < nest->n; i++)
3200 const struct ctables_category *cat = cats[a][i];
3201 const struct variable *var = nest->vars[i];
3202 const union value *value = case_data (c, var);
3203 if (i != nest->scale_idx)
3205 const struct ctables_category *subtotal = cat->subtotal;
3206 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3209 if (cat->type == CCT_TOTAL
3210 || cat->type == CCT_SUBTOTAL
3211 || cat->type == CCT_POSTCOMPUTE)
3215 case PIVOT_AXIS_COLUMN:
3216 cell->omit_areas |= ((1u << CTAT_TABLE) |
3217 (1u << CTAT_LAYER) |
3218 (1u << CTAT_LAYERCOL) |
3219 (1u << CTAT_SUBTABLE) |
3222 case PIVOT_AXIS_ROW:
3223 cell->omit_areas |= ((1u << CTAT_TABLE) |
3224 (1u << CTAT_LAYER) |
3225 (1u << CTAT_LAYERROW) |
3226 (1u << CTAT_SUBTABLE) |
3229 case PIVOT_AXIS_LAYER:
3230 cell->omit_areas |= ((1u << CTAT_TABLE) |
3231 (1u << CTAT_LAYER));
3235 if (cat->type == CCT_POSTCOMPUTE)
3236 cell->postcompute = true;
3239 cell->axes[a].cvs[i].category = cat;
3240 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3244 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3245 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3246 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3247 for (size_t i = 0; i < specs->n; i++)
3248 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3249 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3250 cell->areas[at] = ctables_area_insert (cell, at);
3251 hmap_insert (&s->cells, &cell->node, hash);
3256 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3258 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3263 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3264 const struct ctables_category **cats[PIVOT_N_AXES],
3265 bool is_included, double weight[N_CTWS])
3267 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3268 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3270 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3271 const union value *value = case_data (c, specs->var);
3272 bool is_missing = var_is_value_missing (specs->var, value);
3273 bool is_scale_missing
3274 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3276 for (size_t i = 0; i < specs->n; i++)
3277 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3278 is_scale_missing, is_included,
3279 weight[specs->specs[i].weighting]);
3280 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3281 if (!(cell->omit_areas && (1u << at)))
3283 struct ctables_area *a = cell->areas[at];
3285 add_weight (a->total, weight);
3287 add_weight (a->count, weight);
3290 add_weight (a->valid, weight);
3292 if (!is_scale_missing)
3293 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3295 const struct variable *var = s->table->sum_vars[i];
3296 double addend = case_num (c, var);
3297 if (!var_is_num_missing (var, addend))
3298 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3299 a->sums[i].sum[wt] += addend * weight[wt];
3306 recurse_totals (struct ctables_section *s, const struct ccase *c,
3307 const struct ctables_category **cats[PIVOT_N_AXES],
3308 bool is_included, double weight[N_CTWS],
3309 enum pivot_axis_type start_axis, size_t start_nest)
3311 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3313 const struct ctables_nest *nest = s->nests[a];
3314 for (size_t i = start_nest; i < nest->n; i++)
3316 if (i == nest->scale_idx)
3319 const struct variable *var = nest->vars[i];
3321 const struct ctables_category *total = ctables_categories_total (
3322 s->table->categories[var_get_dict_index (var)]);
3325 const struct ctables_category *save = cats[a][i];
3327 ctables_cell_add__ (s, c, cats, is_included, weight);
3328 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3337 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3338 const struct ctables_category **cats[PIVOT_N_AXES],
3339 bool is_included, double weight[N_CTWS],
3340 enum pivot_axis_type start_axis, size_t start_nest)
3342 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3344 const struct ctables_nest *nest = s->nests[a];
3345 for (size_t i = start_nest; i < nest->n; i++)
3347 if (i == nest->scale_idx)
3350 const struct ctables_category *save = cats[a][i];
3353 cats[a][i] = save->subtotal;
3354 ctables_cell_add__ (s, c, cats, is_included, weight);
3355 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3364 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3365 double weight[N_CTWS])
3367 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3368 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3369 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3370 const struct ctables_category **cats[PIVOT_N_AXES] =
3372 [PIVOT_AXIS_LAYER] = layer_cats,
3373 [PIVOT_AXIS_ROW] = row_cats,
3374 [PIVOT_AXIS_COLUMN] = column_cats,
3377 bool is_included = true;
3379 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3381 const struct ctables_nest *nest = s->nests[a];
3382 for (size_t i = 0; i < nest->n; i++)
3383 if (i != nest->scale_idx)
3385 const struct variable *var = nest->vars[i];
3386 const union value *value = case_data (c, var);
3388 cats[a][i] = ctables_categories_match (
3389 s->table->categories[var_get_dict_index (var)], value, var);
3392 if (i != nest->summary_idx)
3395 if (!var_is_value_missing (var, value))
3398 static const struct ctables_category cct_excluded_missing = {
3399 .type = CCT_EXCLUDED_MISSING,
3402 cats[a][i] = &cct_excluded_missing;
3403 is_included = false;
3409 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3411 const struct ctables_nest *nest = s->nests[a];
3412 for (size_t i = 0; i < nest->n; i++)
3413 if (i != nest->scale_idx)
3415 const struct variable *var = nest->vars[i];
3416 const union value *value = case_data (c, var);
3417 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3421 ctables_cell_add__ (s, c, cats, is_included, weight);
3422 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3423 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3426 struct ctables_value
3428 struct hmap_node node;
3433 static struct ctables_value *
3434 ctables_value_find__ (const struct ctables_table *t, const union value *value,
3435 int width, unsigned int hash)
3437 struct ctables_value *clv;
3438 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3439 hash, &t->clabels_values_map)
3440 if (value_equal (value, &clv->value, width))
3446 ctables_value_insert (struct ctables_table *t, const union value *value,
3449 unsigned int hash = value_hash (value, width, 0);
3450 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3453 clv = xmalloc (sizeof *clv);
3454 value_clone (&clv->value, value, width);
3455 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3459 static const struct ctables_value *
3460 ctables_value_find (const struct ctables_cell *cell)
3462 const struct ctables_section *s = cell->section;
3463 const struct ctables_table *t = s->table;
3464 if (!t->clabels_example)
3467 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3468 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3469 const union value *value
3470 = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3471 int width = var_get_width (var);
3472 const struct ctables_value *ctv = ctables_value_find__ (
3473 t, value, width, value_hash (value, width, 0));
3474 assert (ctv != NULL);
3479 compare_ctables_values_3way (const void *a_, const void *b_, const void *width_)
3481 const struct ctables_value *const *ap = a_;
3482 const struct ctables_value *const *bp = b_;
3483 const struct ctables_value *a = *ap;
3484 const struct ctables_value *b = *bp;
3485 const int *width = width_;
3486 return value_compare_3way (&a->value, &b->value, *width);
3490 ctables_sort_clabels_values (struct ctables_table *t)
3492 const struct variable *v0 = t->clabels_example;
3493 int width = var_get_width (v0);
3495 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3498 const struct val_labs *val_labs = var_get_value_labels (v0);
3499 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
3500 vl = val_labs_next (val_labs, vl))
3501 if (ctables_categories_match (c0, &vl->value, v0))
3502 ctables_value_insert (t, &vl->value, width);
3505 size_t n = hmap_count (&t->clabels_values_map);
3506 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
3508 struct ctables_value *clv;
3510 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
3511 t->clabels_values[i++] = clv;
3512 t->n_clabels_values = n;
3515 sort (t->clabels_values, n, sizeof *t->clabels_values,
3516 compare_ctables_values_3way, &width);
3518 for (size_t i = 0; i < n; i++)
3519 t->clabels_values[i]->leaf = i;
3524 const struct dictionary *dict;
3525 struct pivot_table_look *look;
3527 /* For CTEF_* formats. */
3528 struct fmt_settings ctables_formats;
3530 /* If this is NULL, zeros are displayed using the normal print format.
3531 Otherwise, this string is displayed. */
3534 /* If this is NULL, missing values are displayed using the normal print
3535 format. Otherwise, this string is displayed. */
3538 /* Indexed by variable dictionary index. */
3539 enum ctables_vlabel *vlabels;
3541 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
3543 bool mrsets_count_duplicates; /* MRSETS. */
3544 bool smissing_listwise; /* SMISSING. */
3545 struct variable *e_weight; /* WEIGHT. */
3546 int hide_threshold; /* HIDESMALLCOUNTS. */
3548 struct ctables_table **tables;
3553 ctpo_add (double a, double b)
3559 ctpo_sub (double a, double b)
3565 ctpo_mul (double a, double b)
3571 ctpo_div (double a, double b)
3573 return b ? a / b : SYSMIS;
3577 ctpo_pow (double a, double b)
3579 int save_errno = errno;
3581 double result = pow (a, b);
3589 ctpo_neg (double a, double b UNUSED)
3594 struct ctables_pcexpr_evaluate_ctx
3596 const struct ctables_cell *cell;
3597 const struct ctables_section *section;
3598 const struct ctables_categories *cats;
3599 enum pivot_axis_type pc_a;
3602 enum fmt_type parse_format;
3605 static double ctables_pcexpr_evaluate (
3606 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3609 ctables_pcexpr_evaluate_nonterminal (
3610 const struct ctables_pcexpr_evaluate_ctx *ctx,
3611 const struct ctables_pcexpr *e, size_t n_args,
3612 double evaluate (double, double))
3614 double args[2] = { 0, 0 };
3615 for (size_t i = 0; i < n_args; i++)
3617 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3618 if (!isfinite (args[i]) || args[i] == SYSMIS)
3621 return evaluate (args[0], args[1]);
3625 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3626 const struct ctables_cell_value *pc_cv)
3628 const struct ctables_section *s = ctx->section;
3631 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3633 const struct ctables_nest *nest = s->nests[a];
3634 for (size_t i = 0; i < nest->n; i++)
3635 if (i != nest->scale_idx)
3637 const struct ctables_cell_value *cv
3638 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3639 : &ctx->cell->axes[a].cvs[i]);
3640 hash = hash_pointer (cv->category, hash);
3641 if (cv->category->type != CCT_TOTAL
3642 && cv->category->type != CCT_SUBTOTAL
3643 && cv->category->type != CCT_POSTCOMPUTE)
3644 hash = value_hash (&cv->value,
3645 var_get_width (nest->vars[i]), hash);
3649 struct ctables_cell *tc;
3650 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3652 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3654 const struct ctables_nest *nest = s->nests[a];
3655 for (size_t i = 0; i < nest->n; i++)
3656 if (i != nest->scale_idx)
3658 const struct ctables_cell_value *p_cv
3659 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3660 : &ctx->cell->axes[a].cvs[i]);
3661 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3662 if (p_cv->category != t_cv->category
3663 || (p_cv->category->type != CCT_TOTAL
3664 && p_cv->category->type != CCT_SUBTOTAL
3665 && p_cv->category->type != CCT_POSTCOMPUTE
3666 && !value_equal (&p_cv->value,
3668 var_get_width (nest->vars[i]))))
3680 const struct ctables_table *t = s->table;
3681 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3682 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3683 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
3684 &specs->specs[ctx->summary_idx]);
3688 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3689 const struct ctables_pcexpr *e)
3696 case CTPO_CAT_NRANGE:
3697 case CTPO_CAT_SRANGE:
3698 case CTPO_CAT_MISSING:
3699 case CTPO_CAT_OTHERNM:
3701 struct ctables_cell_value cv = {
3702 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3704 assert (cv.category != NULL);
3706 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3707 const struct ctables_occurrence *o;
3710 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3711 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3712 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3714 cv.value = o->value;
3715 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3720 case CTPO_CAT_NUMBER:
3721 case CTPO_CAT_SUBTOTAL:
3722 case CTPO_CAT_TOTAL:
3724 struct ctables_cell_value cv = {
3725 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3726 .value = { .f = e->number },
3728 assert (cv.category != NULL);
3729 return ctables_pcexpr_evaluate_category (ctx, &cv);
3732 case CTPO_CAT_STRING:
3734 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3736 if (width > e->string.length)
3738 s = xmalloc (width);
3739 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3742 const struct ctables_category *category
3743 = ctables_find_category_for_postcompute (
3744 ctx->section->table->ctables->dict,
3745 ctx->cats, ctx->parse_format, e);
3746 assert (category != NULL);
3748 struct ctables_cell_value cv = { .category = category };
3749 if (category->type == CCT_NUMBER)
3750 cv.value.f = category->number;
3751 else if (category->type == CCT_STRING)
3752 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3756 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3762 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3765 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3768 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3771 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3774 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3777 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3783 static const struct ctables_category *
3784 ctables_cell_postcompute (const struct ctables_section *s,
3785 const struct ctables_cell *cell,
3786 enum pivot_axis_type *pc_a_p,
3789 assert (cell->postcompute);
3790 const struct ctables_category *pc_cat = NULL;
3791 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3792 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3794 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3795 if (cv->category->type == CCT_POSTCOMPUTE)
3799 /* Multiple postcomputes cross each other. The value is
3804 pc_cat = cv->category;
3808 *pc_a_idx_p = pc_a_idx;
3812 assert (pc_cat != NULL);
3817 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3818 const struct ctables_cell *cell,
3819 const struct ctables_summary_spec *ss,
3820 struct fmt_spec *format,
3821 bool *is_ctables_format,
3824 enum pivot_axis_type pc_a = 0;
3825 size_t pc_a_idx = 0;
3826 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3827 s, cell, &pc_a, &pc_a_idx);
3831 const struct ctables_postcompute *pc = pc_cat->pc;
3834 for (size_t i = 0; i < pc->specs->n; i++)
3836 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3837 if (ss->function == ss2->function
3838 && ss->weighting == ss2->weighting
3839 && ss->calc_area == ss2->calc_area
3840 && ss->percentile == ss2->percentile)
3842 *format = ss2->format;
3843 *is_ctables_format = ss2->is_ctables_format;
3849 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3850 const struct ctables_categories *cats = s->table->categories[
3851 var_get_dict_index (var)];
3852 struct ctables_pcexpr_evaluate_ctx ctx = {
3857 .pc_a_idx = pc_a_idx,
3858 .summary_idx = summary_idx,
3859 .parse_format = pc_cat->parse_format,
3861 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3864 /* Chi-square test (SIGTEST). */
3865 struct ctables_chisq
3868 bool include_mrsets;
3872 /* Pairwise comparison test (COMPARETEST). */
3873 struct ctables_pairwise
3875 enum { PROP, MEAN } type;
3877 bool include_mrsets;
3878 bool meansvariance_allcats;
3880 enum { BONFERRONI = 1, BH } adjust;
3889 parse_col_width (struct lexer *lexer, const char *name, double *width)
3891 lex_match (lexer, T_EQUALS);
3892 if (lex_match_id (lexer, "DEFAULT"))
3894 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
3896 *width = lex_number (lexer);
3906 parse_bool (struct lexer *lexer, bool *b)
3908 if (lex_match_id (lexer, "NO"))
3910 else if (lex_match_id (lexer, "YES"))
3914 lex_error_expecting (lexer, "YES", "NO");
3921 ctables_chisq_destroy (struct ctables_chisq *chisq)
3927 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
3933 ctables_table_destroy (struct ctables_table *t)
3938 for (size_t i = 0; i < t->n_sections; i++)
3939 ctables_section_uninit (&t->sections[i]);
3942 for (size_t i = 0; i < t->n_categories; i++)
3943 ctables_categories_unref (t->categories[i]);
3944 free (t->categories);
3946 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3948 ctables_axis_destroy (t->axes[a]);
3949 ctables_stack_uninit (&t->stacks[a]);
3951 free (t->summary_specs.specs);
3953 struct ctables_value *ctv, *next_ctv;
3954 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
3955 &t->clabels_values_map)
3957 value_destroy (&ctv->value, var_get_width (t->clabels_example));
3958 hmap_delete (&t->clabels_values_map, &ctv->node);
3961 hmap_destroy (&t->clabels_values_map);
3962 free (t->clabels_values);
3968 ctables_chisq_destroy (t->chisq);
3969 ctables_pairwise_destroy (t->pairwise);
3974 ctables_destroy (struct ctables *ct)
3979 struct ctables_postcompute *pc, *next_pc;
3980 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
3984 msg_location_destroy (pc->location);
3985 ctables_pcexpr_destroy (pc->expr);
3989 ctables_summary_spec_set_uninit (pc->specs);
3992 hmap_delete (&ct->postcomputes, &pc->hmap_node);
3995 hmap_destroy (&ct->postcomputes);
3997 fmt_settings_uninit (&ct->ctables_formats);
3998 pivot_table_look_unref (ct->look);
4002 for (size_t i = 0; i < ct->n_tables; i++)
4003 ctables_table_destroy (ct->tables[i]);
4009 all_strings (struct variable **vars, size_t n_vars,
4010 const struct ctables_category *cat)
4012 for (size_t j = 0; j < n_vars; j++)
4013 if (var_is_numeric (vars[j]))
4015 msg_at (SE, cat->location,
4016 _("This category specification may be applied only to string "
4017 "variables, but this subcommand tries to apply it to "
4018 "numeric variable %s."),
4019 var_get_name (vars[j]));
4026 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
4027 struct ctables *ct, struct ctables_table *t)
4029 if (!lex_force_match_id (lexer, "VARIABLES"))
4031 lex_match (lexer, T_EQUALS);
4033 struct variable **vars;
4035 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
4038 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
4039 for (size_t i = 1; i < n_vars; i++)
4041 const struct fmt_spec *f = var_get_print_format (vars[i]);
4042 if (f->type != common_format->type)
4044 common_format = NULL;
4050 && (fmt_get_category (common_format->type)
4051 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
4053 struct ctables_categories *c = xmalloc (sizeof *c);
4054 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
4055 for (size_t i = 0; i < n_vars; i++)
4057 struct ctables_categories **cp
4058 = &t->categories[var_get_dict_index (vars[i])];
4059 ctables_categories_unref (*cp);
4063 size_t allocated_cats = 0;
4064 int cats_start_ofs = -1;
4065 int cats_end_ofs = -1;
4066 if (lex_match (lexer, T_LBRACK))
4068 cats_start_ofs = lex_ofs (lexer);
4071 if (c->n_cats >= allocated_cats)
4072 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4074 int start_ofs = lex_ofs (lexer);
4075 struct ctables_category *cat = &c->cats[c->n_cats];
4076 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
4078 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4081 lex_match (lexer, T_COMMA);
4083 while (!lex_match (lexer, T_RBRACK));
4084 cats_end_ofs = lex_ofs (lexer) - 1;
4087 struct ctables_category cat = {
4089 .include_missing = false,
4090 .sort_ascending = true,
4092 bool show_totals = false;
4093 char *total_label = NULL;
4094 bool totals_before = false;
4095 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4097 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
4099 lex_match (lexer, T_EQUALS);
4100 if (lex_match_id (lexer, "A"))
4101 cat.sort_ascending = true;
4102 else if (lex_match_id (lexer, "D"))
4103 cat.sort_ascending = false;
4106 lex_error_expecting (lexer, "A", "D");
4110 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
4112 int start_ofs = lex_ofs (lexer) - 1;
4113 lex_match (lexer, T_EQUALS);
4114 if (lex_match_id (lexer, "VALUE"))
4115 cat.type = CCT_VALUE;
4116 else if (lex_match_id (lexer, "LABEL"))
4117 cat.type = CCT_LABEL;
4120 cat.type = CCT_FUNCTION;
4121 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
4122 &cat.weighting, &cat.area))
4125 if (lex_match (lexer, T_LPAREN))
4127 cat.sort_var = parse_variable (lexer, dict);
4131 if (cat.sort_function == CTSF_PTILE)
4133 lex_match (lexer, T_COMMA);
4134 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4136 cat.percentile = lex_number (lexer);
4140 if (!lex_force_match (lexer, T_RPAREN))
4143 else if (ctables_function_availability (cat.sort_function)
4146 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
4150 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
4151 _("Data-dependent sorting is not implemented."));
4155 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
4157 lex_match (lexer, T_EQUALS);
4158 if (lex_match_id (lexer, "INCLUDE"))
4159 cat.include_missing = true;
4160 else if (lex_match_id (lexer, "EXCLUDE"))
4161 cat.include_missing = false;
4164 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4168 else if (lex_match_id (lexer, "TOTAL"))
4170 lex_match (lexer, T_EQUALS);
4171 if (!parse_bool (lexer, &show_totals))
4174 else if (lex_match_id (lexer, "LABEL"))
4176 lex_match (lexer, T_EQUALS);
4177 if (!lex_force_string (lexer))
4180 total_label = ss_xstrdup (lex_tokss (lexer));
4183 else if (lex_match_id (lexer, "POSITION"))
4185 lex_match (lexer, T_EQUALS);
4186 if (lex_match_id (lexer, "BEFORE"))
4187 totals_before = true;
4188 else if (lex_match_id (lexer, "AFTER"))
4189 totals_before = false;
4192 lex_error_expecting (lexer, "BEFORE", "AFTER");
4196 else if (lex_match_id (lexer, "EMPTY"))
4198 lex_match (lexer, T_EQUALS);
4199 if (lex_match_id (lexer, "INCLUDE"))
4200 c->show_empty = true;
4201 else if (lex_match_id (lexer, "EXCLUDE"))
4202 c->show_empty = false;
4205 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4212 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
4213 "TOTAL", "LABEL", "POSITION", "EMPTY");
4215 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
4222 if (c->n_cats >= allocated_cats)
4223 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4224 c->cats[c->n_cats++] = cat;
4229 if (c->n_cats >= allocated_cats)
4230 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4232 struct ctables_category *totals;
4235 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
4236 totals = &c->cats[0];
4239 totals = &c->cats[c->n_cats];
4242 *totals = (struct ctables_category) {
4244 .total_label = total_label ? total_label : xstrdup (_("Total")),
4248 struct ctables_category *subtotal = NULL;
4249 for (size_t i = totals_before ? 0 : c->n_cats;
4250 totals_before ? i < c->n_cats : i-- > 0;
4251 totals_before ? i++ : 0)
4253 struct ctables_category *cat = &c->cats[i];
4262 cat->subtotal = subtotal;
4265 case CCT_POSTCOMPUTE:
4276 case CCT_EXCLUDED_MISSING:
4281 if (cats_start_ofs != -1)
4283 for (size_t i = 0; i < c->n_cats; i++)
4285 struct ctables_category *cat = &c->cats[i];
4288 case CCT_POSTCOMPUTE:
4289 cat->parse_format = parse_strings ? common_format->type : FMT_F;
4290 struct msg_location *cats_location
4291 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
4292 bool ok = ctables_recursive_check_postcompute (
4293 dict, cat->pc->expr, cat, c, cats_location);
4294 msg_location_destroy (cats_location);
4301 for (size_t j = 0; j < n_vars; j++)
4302 if (var_is_alpha (vars[j]))
4304 msg_at (SE, cat->location,
4305 _("This category specification may be applied "
4306 "only to numeric variables, but this "
4307 "subcommand tries to apply it to string "
4309 var_get_name (vars[j]));
4318 if (!parse_category_string (cat->location, cat->string, dict,
4319 common_format->type, &n))
4322 ss_dealloc (&cat->string);
4324 cat->type = CCT_NUMBER;
4327 else if (!all_strings (vars, n_vars, cat))
4336 if (!cat->srange[0].string)
4338 else if (!parse_category_string (cat->location,
4339 cat->srange[0], dict,
4340 common_format->type, &n[0]))
4343 if (!cat->srange[1].string)
4345 else if (!parse_category_string (cat->location,
4346 cat->srange[1], dict,
4347 common_format->type, &n[1]))
4350 ss_dealloc (&cat->srange[0]);
4351 ss_dealloc (&cat->srange[1]);
4353 cat->type = CCT_NRANGE;
4354 cat->nrange[0] = n[0];
4355 cat->nrange[1] = n[1];
4357 else if (!all_strings (vars, n_vars, cat))
4368 case CCT_EXCLUDED_MISSING:
4385 const struct ctables_summary_spec_set *set;
4390 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4392 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4393 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4394 if (as->function != bs->function)
4395 return as->function > bs->function ? 1 : -1;
4396 else if (as->weighting != bs->weighting)
4397 return as->weighting > bs->weighting ? 1 : -1;
4398 else if (as->calc_area != bs->calc_area)
4399 return as->calc_area > bs->calc_area ? 1 : -1;
4400 else if (as->percentile != bs->percentile)
4401 return as->percentile < bs->percentile ? 1 : -1;
4403 const char *as_label = as->label ? as->label : "";
4404 const char *bs_label = bs->label ? bs->label : "";
4405 return strcmp (as_label, bs_label);
4409 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4410 size_t ix[PIVOT_N_AXES])
4412 if (a < PIVOT_N_AXES)
4414 size_t limit = MAX (t->stacks[a].n, 1);
4415 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4416 ctables_table_add_section (t, a + 1, ix);
4420 struct ctables_section *s = &t->sections[t->n_sections++];
4421 *s = (struct ctables_section) {
4423 .cells = HMAP_INITIALIZER (s->cells),
4425 for (a = 0; a < PIVOT_N_AXES; a++)
4428 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4430 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4431 for (size_t i = 0; i < nest->n; i++)
4432 hmap_init (&s->occurrences[a][i]);
4434 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4435 hmap_init (&s->areas[at]);
4440 ctables_format (double d, const struct fmt_spec *format,
4441 const struct fmt_settings *settings)
4443 const union value v = { .f = d };
4444 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4446 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4447 produce the results we want for negative numbers, putting the negative
4448 sign in the wrong spot, before the prefix instead of after it. We can't,
4449 in fact, produce the desired results using a custom-currency
4450 specification. Instead, we postprocess the output, moving the negative
4453 NEQUAL: "-N=3" => "N=-3"
4454 PAREN: "-(3)" => "(-3)"
4455 PCTPAREN: "-(3%)" => "(-3%)"
4457 This transformation doesn't affect NEGPAREN. */
4458 char *minus_src = strchr (s, '-');
4459 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4461 char *n_equals = strstr (s, "N=");
4462 char *lparen = strchr (s, '(');
4463 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4465 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4471 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4473 for (size_t i = 0; i < t->stacks[a].n; i++)
4475 struct ctables_nest *nest = &t->stacks[a].nests[i];
4476 if (nest->n != 1 || nest->scale_idx != 0)
4479 enum ctables_vlabel vlabel
4480 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4481 if (vlabel != CTVL_NONE)
4488 compare_ints_3way (int a, int b)
4490 return a < b ? -1 : a > b;
4494 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
4495 const void *aux UNUSED)
4497 struct ctables_cell *const *ap = a_;
4498 struct ctables_cell *const *bp = b_;
4499 const struct ctables_cell *a = *ap;
4500 const struct ctables_cell *b = *bp;
4508 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
4510 int cmp = compare_ints_3way (a->axes[axis].leaf, b->axes[axis].leaf);
4515 const struct ctables_value *a_ctv = ctables_value_find (a);
4516 const struct ctables_value *b_ctv = ctables_value_find (b);
4519 int cmp = compare_ints_3way (a_ctv->leaf, b_ctv->leaf);
4524 assert (!a_ctv && !b_ctv);
4529 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4531 struct pivot_table *pt = pivot_table_create__ (
4533 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4534 : pivot_value_new_text (N_("Custom Tables"))),
4537 pivot_table_set_caption (
4538 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4540 pivot_table_set_corner_text (
4541 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4543 bool summary_dimension = (t->summary_axis != t->slabels_axis
4544 || (!t->slabels_visible
4545 && t->summary_specs.n > 1));
4546 if (summary_dimension)
4548 struct pivot_dimension *d = pivot_dimension_create (
4549 pt, t->slabels_axis, N_("Statistics"));
4550 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4551 if (!t->slabels_visible)
4552 d->hide_all_labels = true;
4553 for (size_t i = 0; i < specs->n; i++)
4554 pivot_category_create_leaf (
4555 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4558 bool categories_dimension = t->clabels_example != NULL;
4559 if (categories_dimension)
4561 struct pivot_dimension *d = pivot_dimension_create (
4562 pt, t->label_axis[t->clabels_from_axis],
4563 t->clabels_from_axis == PIVOT_AXIS_ROW
4564 ? N_("Row Categories")
4565 : N_("Column Categories"));
4566 const struct variable *var = t->clabels_example;
4567 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4568 for (size_t i = 0; i < t->n_clabels_values; i++)
4570 const struct ctables_value *value = t->clabels_values[i];
4571 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4572 assert (cat != NULL);
4573 pivot_category_create_leaf (
4574 d->root, ctables_category_create_value_label (c, cat,
4580 pivot_table_set_look (pt, ct->look);
4581 struct pivot_dimension *d[PIVOT_N_AXES];
4582 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4584 static const char *names[] = {
4585 [PIVOT_AXIS_ROW] = N_("Rows"),
4586 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4587 [PIVOT_AXIS_LAYER] = N_("Layers"),
4589 d[a] = (t->axes[a] || a == t->summary_axis
4590 ? pivot_dimension_create (pt, a, names[a])
4595 assert (t->axes[a]);
4597 for (size_t i = 0; i < t->stacks[a].n; i++)
4599 struct ctables_nest *nest = &t->stacks[a].nests[i];
4600 struct ctables_section **sections = xnmalloc (t->n_sections,
4602 size_t n_sections = 0;
4604 size_t n_total_cells = 0;
4605 size_t max_depth = 0;
4606 for (size_t j = 0; j < t->n_sections; j++)
4607 if (t->sections[j].nests[a] == nest)
4609 struct ctables_section *s = &t->sections[j];
4610 sections[n_sections++] = s;
4611 n_total_cells += hmap_count (&s->cells);
4613 size_t depth = s->nests[a]->n;
4614 max_depth = MAX (depth, max_depth);
4617 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4619 size_t n_sorted = 0;
4621 for (size_t j = 0; j < n_sections; j++)
4623 struct ctables_section *s = sections[j];
4625 struct ctables_cell *cell;
4626 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4628 sorted[n_sorted++] = cell;
4629 assert (n_sorted <= n_total_cells);
4632 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4633 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4635 struct ctables_level
4637 enum ctables_level_type
4639 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4640 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4641 CTL_SUMMARY, /* Summary functions. */
4645 enum settings_value_show vlabel; /* CTL_VAR only. */
4648 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4649 size_t n_levels = 0;
4650 for (size_t k = 0; k < nest->n; k++)
4652 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4653 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4655 if (vlabel != CTVL_NONE)
4657 levels[n_levels++] = (struct ctables_level) {
4659 .vlabel = (enum settings_value_show) vlabel,
4664 if (nest->scale_idx != k
4665 && (k != nest->n - 1 || t->label_axis[a] == a))
4667 levels[n_levels++] = (struct ctables_level) {
4668 .type = CTL_CATEGORY,
4674 if (!summary_dimension && a == t->slabels_axis)
4676 levels[n_levels++] = (struct ctables_level) {
4677 .type = CTL_SUMMARY,
4678 .var_idx = SIZE_MAX,
4682 /* Pivot categories:
4684 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4685 - category for nest->vars[0], if nest->scale_idx != 0
4686 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4687 - category for nest->vars[1], if nest->scale_idx != 1
4689 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4690 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4691 - summary function, if 'a == t->slabels_axis && a ==
4694 Additional dimensions:
4696 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4698 - If 't->label_axis[b] == a' for some 'b != a', add a category
4703 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4705 for (size_t j = 0; j < n_sorted; j++)
4707 struct ctables_cell *cell = sorted[j];
4708 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4710 size_t n_common = 0;
4713 for (; n_common < n_levels; n_common++)
4715 const struct ctables_level *level = &levels[n_common];
4716 if (level->type == CTL_CATEGORY)
4718 size_t var_idx = level->var_idx;
4719 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4720 if (prev->axes[a].cvs[var_idx].category != c)
4722 else if (c->type != CCT_SUBTOTAL
4723 && c->type != CCT_TOTAL
4724 && c->type != CCT_POSTCOMPUTE
4725 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4726 &cell->axes[a].cvs[var_idx].value,
4727 var_get_type (nest->vars[var_idx])))
4733 for (size_t k = n_common; k < n_levels; k++)
4735 const struct ctables_level *level = &levels[k];
4736 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4737 if (level->type == CTL_SUMMARY)
4739 assert (k == n_levels - 1);
4741 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4742 for (size_t m = 0; m < specs->n; m++)
4744 int leaf = pivot_category_create_leaf (
4745 parent, ctables_summary_label (&specs->specs[m],
4753 const struct variable *var = nest->vars[level->var_idx];
4754 struct pivot_value *label;
4755 if (level->type == CTL_VAR)
4757 label = pivot_value_new_variable (var);
4758 label->variable.show = level->vlabel;
4760 else if (level->type == CTL_CATEGORY)
4762 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4763 label = ctables_category_create_value_label (
4764 t->categories[var_get_dict_index (var)],
4765 cv->category, var, &cv->value);
4770 if (k == n_levels - 1)
4771 prev_leaf = pivot_category_create_leaf (parent, label);
4773 groups[k] = pivot_category_create_group__ (parent, label);
4777 cell->axes[a].leaf = prev_leaf;
4786 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4790 size_t n_total_cells = 0;
4791 for (size_t j = 0; j < t->n_sections; j++)
4792 n_total_cells += hmap_count (&t->sections[j].cells);
4794 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4795 size_t n_sorted = 0;
4796 for (size_t j = 0; j < t->n_sections; j++)
4798 const struct ctables_section *s = &t->sections[j];
4799 struct ctables_cell *cell;
4800 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4802 sorted[n_sorted++] = cell;
4804 assert (n_sorted <= n_total_cells);
4805 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4807 size_t ids[N_CTATS];
4808 memset (ids, 0, sizeof ids);
4809 for (size_t j = 0; j < n_sorted; j++)
4811 struct ctables_cell *cell = sorted[j];
4812 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4814 struct ctables_area *area = cell->areas[at];
4815 if (!area->sequence)
4816 area->sequence = ++ids[at];
4823 for (size_t i = 0; i < t->n_sections; i++)
4825 struct ctables_section *s = &t->sections[i];
4827 struct ctables_cell *cell;
4828 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4833 const struct ctables_value *ctv = ctables_value_find (cell);
4834 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4835 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4836 for (size_t j = 0; j < specs->n; j++)
4839 size_t n_dindexes = 0;
4841 if (summary_dimension)
4842 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4845 dindexes[n_dindexes++] = ctv->leaf;
4847 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4850 int leaf = cell->axes[a].leaf;
4851 if (a == t->summary_axis && !summary_dimension)
4853 dindexes[n_dindexes++] = leaf;
4856 const struct ctables_summary_spec *ss = &specs->specs[j];
4858 struct fmt_spec format = specs->specs[j].format;
4859 bool is_ctables_format = ss->is_ctables_format;
4860 double d = (cell->postcompute
4861 ? ctables_cell_calculate_postcompute (
4862 s, cell, ss, &format, &is_ctables_format, j)
4863 : ctables_summary_value (cell->areas,
4864 &cell->summaries[j], ss));
4866 struct pivot_value *value;
4867 if (ct->hide_threshold != 0
4868 && d < ct->hide_threshold
4869 && ss->function == CTSF_COUNT)
4871 value = pivot_value_new_user_text_nocopy (
4872 xasprintf ("<%d", ct->hide_threshold));
4874 else if (d == 0 && ct->zero)
4875 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4876 else if (d == SYSMIS && ct->missing)
4877 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4878 else if (is_ctables_format)
4879 value = pivot_value_new_user_text_nocopy (
4880 ctables_format (d, &format, &ct->ctables_formats));
4883 value = pivot_value_new_number (d);
4884 value->numeric.format = format;
4886 /* XXX should text values be right-justified? */
4887 pivot_table_put (pt, dindexes, n_dindexes, value);
4892 pivot_table_submit (pt);
4896 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4898 enum pivot_axis_type label_pos = t->label_axis[a];
4902 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4903 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4905 const struct ctables_stack *stack = &t->stacks[a];
4909 const struct ctables_nest *n0 = &stack->nests[0];
4912 assert (stack->n == 1);
4916 const struct variable *v0 = n0->vars[n0->n - 1];
4917 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4918 t->clabels_example = v0;
4920 for (size_t i = 0; i < c0->n_cats; i++)
4921 if (c0->cats[i].type == CCT_FUNCTION)
4923 msg (SE, _("%s=%s is not allowed with sorting based "
4924 "on a summary function."),
4925 subcommand_name, pos_name);
4928 if (n0->n - 1 == n0->scale_idx)
4930 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4931 "but %s is a scale variable."),
4932 subcommand_name, pos_name, var_get_name (v0));
4936 for (size_t i = 1; i < stack->n; i++)
4938 const struct ctables_nest *ni = &stack->nests[i];
4940 const struct variable *vi = ni->vars[ni->n - 1];
4941 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4943 if (ni->n - 1 == ni->scale_idx)
4945 msg (SE, _("%s=%s requires the variables to be moved to be "
4946 "categorical, but %s is a scale variable."),
4947 subcommand_name, pos_name, var_get_name (vi));
4950 if (var_get_width (v0) != var_get_width (vi))
4952 msg (SE, _("%s=%s requires the variables to be "
4953 "moved to have the same width, but %s has "
4954 "width %d and %s has width %d."),
4955 subcommand_name, pos_name,
4956 var_get_name (v0), var_get_width (v0),
4957 var_get_name (vi), var_get_width (vi));
4960 if (!val_labs_equal (var_get_value_labels (v0),
4961 var_get_value_labels (vi)))
4963 msg (SE, _("%s=%s requires the variables to be "
4964 "moved to have the same value labels, but %s "
4965 "and %s have different value labels."),
4966 subcommand_name, pos_name,
4967 var_get_name (v0), var_get_name (vi));
4970 if (!ctables_categories_equal (c0, ci))
4972 msg (SE, _("%s=%s requires the variables to be "
4973 "moved to have the same category "
4974 "specifications, but %s and %s have different "
4975 "category specifications."),
4976 subcommand_name, pos_name,
4977 var_get_name (v0), var_get_name (vi));
4986 add_sum_var (struct variable *var,
4987 struct variable ***sum_vars, size_t *n, size_t *allocated)
4989 for (size_t i = 0; i < *n; i++)
4990 if (var == (*sum_vars)[i])
4993 if (*n >= *allocated)
4994 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4995 (*sum_vars)[*n] = var;
4999 static enum ctables_area_type
5000 rotate_area (enum ctables_area_type area)
5011 return CTAT_LAYERCOL;
5014 return CTAT_LAYERROW;
5027 enumerate_sum_vars (const struct ctables_axis *a,
5028 struct variable ***sum_vars, size_t *n, size_t *allocated)
5036 for (size_t i = 0; i < N_CSVS; i++)
5037 for (size_t j = 0; j < a->specs[i].n; j++)
5039 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
5040 if (spec->function == CTSF_areaPCT_SUM)
5041 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
5047 for (size_t i = 0; i < 2; i++)
5048 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
5054 ctables_prepare_table (struct ctables_table *t)
5056 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5059 t->stacks[a] = enumerate_fts (a, t->axes[a]);
5061 for (size_t j = 0; j < t->stacks[a].n; j++)
5063 struct ctables_nest *nest = &t->stacks[a].nests[j];
5064 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5066 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
5067 nest->n_areas[at] = 0;
5069 enum pivot_axis_type ata, atb;
5070 if (at == CTAT_ROW || at == CTAT_LAYERROW)
5072 ata = PIVOT_AXIS_ROW;
5073 atb = PIVOT_AXIS_COLUMN;
5075 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
5077 ata = PIVOT_AXIS_COLUMN;
5078 atb = PIVOT_AXIS_ROW;
5081 if (at == CTAT_LAYER
5082 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5083 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5084 ? a == atb && t->label_axis[a] != a
5087 for (size_t k = nest->n - 1; k < nest->n; k--)
5088 if (k != nest->scale_idx)
5090 nest->areas[at][nest->n_areas[at]++] = k;
5096 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5097 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5098 : at == CTAT_TABLE ? true
5102 for (size_t k = 0; k < nest->n; k++)
5103 if (k != nest->scale_idx)
5104 nest->areas[at][nest->n_areas[at]++] = k;
5110 #define L PIVOT_AXIS_LAYER
5111 n_drop = (t->clabels_from_axis == L ? a != L
5112 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5113 : t->clabels_from_axis == a ? 2
5120 n_drop = a == ata && t->label_axis[ata] == atb;
5125 n_drop = (a == ata ? t->label_axis[ata] == atb
5127 : t->clabels_from_axis == atb ? -1
5128 : t->clabels_to_axis != atb ? 1
5140 size_t n = nest->n_areas[at];
5143 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5144 nest->n_areas[at]--;
5149 for (int i = 0; i < n_drop; i++)
5150 if (nest->n_areas[at] > 0)
5151 nest->n_areas[at]--;
5158 struct ctables_nest *nest = xmalloc (sizeof *nest);
5159 *nest = (struct ctables_nest) {
5161 .scale_idx = SIZE_MAX,
5162 .summary_idx = SIZE_MAX
5164 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5166 /* There's no point in moving labels away from an axis that has no
5167 labels, so avoid dealing with the special cases around that. */
5168 t->label_axis[a] = a;
5171 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5172 for (size_t i = 0; i < stack->n; i++)
5174 struct ctables_nest *nest = &stack->nests[i];
5175 if (!nest->specs[CSV_CELL].n)
5177 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5178 ss->specs = xmalloc (sizeof *ss->specs);
5181 enum ctables_summary_function function
5182 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5186 nest->summary_idx = nest->n - 1;
5187 ss->var = nest->vars[nest->summary_idx];
5189 *ss->specs = (struct ctables_summary_spec) {
5190 .function = function,
5191 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5192 .format = ctables_summary_default_format (function, ss->var),
5195 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5196 &nest->specs[CSV_CELL]);
5198 else if (!nest->specs[CSV_TOTAL].n)
5199 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5200 &nest->specs[CSV_CELL]);
5202 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5203 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5205 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5206 for (size_t i = 0; i < nest->specs[sv].n; i++)
5208 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5209 const struct ctables_function_info *cfi =
5210 &ctables_function_info[ss->function];
5212 ss->calc_area = rotate_area (ss->calc_area);
5216 if (t->ctables->smissing_listwise)
5218 struct variable **listwise_vars = NULL;
5220 size_t allocated = 0;
5222 for (size_t j = nest->group_head; j < stack->n; j++)
5224 const struct ctables_nest *other_nest = &stack->nests[j];
5225 if (other_nest->group_head != nest->group_head)
5228 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5231 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5232 sizeof *listwise_vars);
5233 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5236 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5239 listwise_vars = xmemdup (listwise_vars,
5240 n * sizeof *listwise_vars);
5241 nest->specs[sv].listwise_vars = listwise_vars;
5242 nest->specs[sv].n_listwise_vars = n;
5247 struct ctables_summary_spec_set *merged = &t->summary_specs;
5248 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5250 for (size_t j = 0; j < stack->n; j++)
5252 const struct ctables_nest *nest = &stack->nests[j];
5254 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5255 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5260 struct merge_item min = items[0];
5261 for (size_t j = 1; j < n_left; j++)
5262 if (merge_item_compare_3way (&items[j], &min) < 0)
5265 if (merged->n >= merged->allocated)
5266 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5267 sizeof *merged->specs);
5268 merged->specs[merged->n++] = min.set->specs[min.ofs];
5270 for (size_t j = 0; j < n_left; )
5272 if (merge_item_compare_3way (&items[j], &min) == 0)
5274 struct merge_item *item = &items[j];
5275 item->set->specs[item->ofs++].axis_idx = merged->n - 1;
5276 if (item->ofs >= item->set->n)
5278 items[j] = items[--n_left];
5287 size_t allocated_sum_vars = 0;
5288 enumerate_sum_vars (t->axes[t->summary_axis],
5289 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5291 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5292 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5296 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5297 enum pivot_axis_type a)
5299 struct ctables_stack *stack = &t->stacks[a];
5300 for (size_t i = 0; i < stack->n; i++)
5302 const struct ctables_nest *nest = &stack->nests[i];
5303 const struct variable *var = nest->vars[nest->n - 1];
5304 const union value *value = case_data (c, var);
5306 if (var_is_numeric (var) && value->f == SYSMIS)
5309 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5311 ctables_value_insert (t, value, var_get_width (var));
5316 ctables_add_category_occurrences (const struct variable *var,
5317 struct hmap *occurrences,
5318 const struct ctables_categories *cats)
5320 const struct val_labs *val_labs = var_get_value_labels (var);
5322 for (size_t i = 0; i < cats->n_cats; i++)
5324 const struct ctables_category *c = &cats->cats[i];
5328 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5334 int width = var_get_width (var);
5336 value_init (&value, width);
5337 value_copy_buf_rpad (&value, width,
5338 CHAR_CAST (uint8_t *, c->string.string),
5339 c->string.length, ' ');
5340 ctables_add_occurrence (var, &value, occurrences);
5341 value_destroy (&value, width);
5346 assert (var_is_numeric (var));
5347 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5348 vl = val_labs_next (val_labs, vl))
5349 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5350 ctables_add_occurrence (var, &vl->value, occurrences);
5354 assert (var_is_alpha (var));
5355 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5356 vl = val_labs_next (val_labs, vl))
5357 if (in_string_range (&vl->value, var, c->srange))
5358 ctables_add_occurrence (var, &vl->value, occurrences);
5362 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5363 vl = val_labs_next (val_labs, vl))
5364 if (var_is_value_missing (var, &vl->value))
5365 ctables_add_occurrence (var, &vl->value, occurrences);
5369 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5370 vl = val_labs_next (val_labs, vl))
5371 ctables_add_occurrence (var, &vl->value, occurrences);
5374 case CCT_POSTCOMPUTE:
5384 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5385 vl = val_labs_next (val_labs, vl))
5386 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5387 ctables_add_occurrence (var, &vl->value, occurrences);
5390 case CCT_EXCLUDED_MISSING:
5397 ctables_section_recurse_add_empty_categories (
5398 struct ctables_section *s,
5399 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5400 enum pivot_axis_type a, size_t a_idx)
5402 if (a >= PIVOT_N_AXES)
5403 ctables_cell_insert__ (s, c, cats);
5404 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5405 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5408 const struct variable *var = s->nests[a]->vars[a_idx];
5409 const struct ctables_categories *categories = s->table->categories[
5410 var_get_dict_index (var)];
5411 int width = var_get_width (var);
5412 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5413 const struct ctables_occurrence *o;
5414 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5416 union value *value = case_data_rw (c, var);
5417 value_destroy (value, width);
5418 value_clone (value, &o->value, width);
5419 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5420 assert (cats[a][a_idx] != NULL);
5421 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5424 for (size_t i = 0; i < categories->n_cats; i++)
5426 const struct ctables_category *cat = &categories->cats[i];
5427 if (cat->type == CCT_POSTCOMPUTE)
5429 cats[a][a_idx] = cat;
5430 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5437 ctables_section_add_empty_categories (struct ctables_section *s)
5439 bool show_empty = false;
5440 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5442 for (size_t k = 0; k < s->nests[a]->n; k++)
5443 if (k != s->nests[a]->scale_idx)
5445 const struct variable *var = s->nests[a]->vars[k];
5446 const struct ctables_categories *cats = s->table->categories[
5447 var_get_dict_index (var)];
5448 if (cats->show_empty)
5451 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5457 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5458 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5459 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5460 const struct ctables_category **cats[PIVOT_N_AXES] =
5462 [PIVOT_AXIS_LAYER] = layer_cats,
5463 [PIVOT_AXIS_ROW] = row_cats,
5464 [PIVOT_AXIS_COLUMN] = column_cats,
5466 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5467 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5472 ctables_section_clear (struct ctables_section *s)
5474 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5476 const struct ctables_nest *nest = s->nests[a];
5477 for (size_t i = 0; i < nest->n; i++)
5478 if (i != nest->scale_idx)
5480 const struct variable *var = nest->vars[i];
5481 int width = var_get_width (var);
5482 struct ctables_occurrence *o, *next;
5483 struct hmap *map = &s->occurrences[a][i];
5484 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5486 value_destroy (&o->value, width);
5487 hmap_delete (map, &o->node);
5494 struct ctables_cell *cell, *next_cell;
5495 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5497 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5499 const struct ctables_nest *nest = s->nests[a];
5500 for (size_t i = 0; i < nest->n; i++)
5501 if (i != nest->scale_idx)
5502 value_destroy (&cell->axes[a].cvs[i].value,
5503 var_get_width (nest->vars[i]));
5504 free (cell->axes[a].cvs);
5507 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5508 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5509 for (size_t i = 0; i < specs->n; i++)
5510 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5511 free (cell->summaries);
5513 hmap_delete (&s->cells, &cell->node);
5516 hmap_shrink (&s->cells);
5518 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5520 struct ctables_area *area, *next_area;
5521 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5525 hmap_delete (&s->areas[at], &area->node);
5528 hmap_shrink (&s->areas[at]);
5533 ctables_section_uninit (struct ctables_section *s)
5535 ctables_section_clear (s);
5537 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5539 struct ctables_nest *nest = s->nests[a];
5540 for (size_t i = 0; i < nest->n; i++)
5541 hmap_destroy (&s->occurrences[a][i]);
5542 free (s->occurrences[a]);
5545 hmap_destroy (&s->cells);
5546 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5547 hmap_destroy (&s->areas[at]);
5551 ctables_table_clear (struct ctables_table *t)
5553 for (size_t i = 0; i < t->n_sections; i++)
5554 ctables_section_clear (&t->sections[i]);
5556 if (t->clabels_example)
5558 int width = var_get_width (t->clabels_example);
5559 struct ctables_value *value, *next_value;
5560 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5561 &t->clabels_values_map)
5563 value_destroy (&value->value, width);
5564 hmap_delete (&t->clabels_values_map, &value->node);
5567 hmap_shrink (&t->clabels_values_map);
5569 free (t->clabels_values);
5570 t->clabels_values = NULL;
5571 t->n_clabels_values = 0;
5576 ctables_execute (struct dataset *ds, struct casereader *input,
5579 for (size_t i = 0; i < ct->n_tables; i++)
5581 struct ctables_table *t = ct->tables[i];
5582 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5583 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5584 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5585 sizeof *t->sections);
5586 size_t ix[PIVOT_N_AXES];
5587 ctables_table_add_section (t, 0, ix);
5590 struct dictionary *dict = dataset_dict (ds);
5592 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5593 struct casegrouper *grouper
5595 ? casegrouper_create_splits (input, dict)
5596 : casegrouper_create_vars (input, NULL, 0));
5597 struct casereader *group;
5598 while (casegrouper_get_next_group (grouper, &group))
5602 struct ccase *c = casereader_peek (group, 0);
5605 output_split_file_values (ds, c);
5610 bool warn_on_invalid = true;
5611 for (struct ccase *c = casereader_read (group); c;
5612 case_unref (c), c = casereader_read (group))
5614 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5615 double e_weight = (ct->e_weight
5616 ? var_force_valid_weight (ct->e_weight,
5617 case_num (c, ct->e_weight),
5621 [CTW_DICTIONARY] = d_weight,
5622 [CTW_EFFECTIVE] = e_weight,
5623 [CTW_UNWEIGHTED] = 1.0,
5626 for (size_t i = 0; i < ct->n_tables; i++)
5628 struct ctables_table *t = ct->tables[i];
5630 for (size_t j = 0; j < t->n_sections; j++)
5631 ctables_cell_insert (&t->sections[j], c, weight);
5633 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5634 if (t->label_axis[a] != a)
5635 ctables_insert_clabels_values (t, c, a);
5638 casereader_destroy (group);
5640 for (size_t i = 0; i < ct->n_tables; i++)
5642 struct ctables_table *t = ct->tables[i];
5644 if (t->clabels_example)
5645 ctables_sort_clabels_values (t);
5647 for (size_t j = 0; j < t->n_sections; j++)
5648 ctables_section_add_empty_categories (&t->sections[j]);
5650 ctables_table_output (ct, t);
5651 ctables_table_clear (t);
5654 return casegrouper_destroy (grouper);
5657 static struct ctables_postcompute *
5658 ctables_find_postcompute (struct ctables *ct, const char *name)
5660 struct ctables_postcompute *pc;
5661 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5662 utf8_hash_case_string (name, 0), &ct->postcomputes)
5663 if (!utf8_strcasecmp (pc->name, name))
5669 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5672 int pcompute_start = lex_ofs (lexer) - 1;
5674 if (!lex_match (lexer, T_AND))
5676 lex_error_expecting (lexer, "&");
5679 if (!lex_force_id (lexer))
5682 char *name = ss_xstrdup (lex_tokss (lexer));
5685 if (!lex_force_match (lexer, T_EQUALS)
5686 || !lex_force_match_id (lexer, "EXPR")
5687 || !lex_force_match (lexer, T_LPAREN))
5693 int expr_start = lex_ofs (lexer);
5694 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5695 int expr_end = lex_ofs (lexer) - 1;
5696 if (!expr || !lex_force_match (lexer, T_RPAREN))
5698 ctables_pcexpr_destroy (expr);
5702 int pcompute_end = lex_ofs (lexer) - 1;
5704 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5707 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5710 msg_at (SW, location, _("New definition of &%s will override the "
5711 "previous definition."),
5713 msg_at (SN, pc->location, _("This is the previous definition."));
5715 ctables_pcexpr_destroy (pc->expr);
5716 msg_location_destroy (pc->location);
5721 pc = xmalloc (sizeof *pc);
5722 *pc = (struct ctables_postcompute) { .name = name };
5723 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5724 utf8_hash_case_string (pc->name, 0));
5727 pc->location = location;
5729 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5734 ctables_parse_pproperties_format (struct lexer *lexer,
5735 struct ctables_summary_spec_set *sss)
5737 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5739 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5740 && !(lex_token (lexer) == T_ID
5741 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5742 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5743 lex_tokss (lexer)))))
5745 /* Parse function. */
5746 enum ctables_summary_function function;
5747 enum ctables_weighting weighting;
5748 enum ctables_area_type area;
5749 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5752 /* Parse percentile. */
5753 double percentile = 0;
5754 if (function == CTSF_PTILE)
5756 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5758 percentile = lex_number (lexer);
5763 struct fmt_spec format;
5764 bool is_ctables_format;
5765 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5768 if (sss->n >= sss->allocated)
5769 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5770 sizeof *sss->specs);
5771 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5772 .function = function,
5773 .weighting = weighting,
5776 .percentile = percentile,
5778 .is_ctables_format = is_ctables_format,
5784 ctables_summary_spec_set_uninit (sss);
5789 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5791 struct ctables_postcompute **pcs = NULL;
5793 size_t allocated_pcs = 0;
5795 while (lex_match (lexer, T_AND))
5797 if (!lex_force_id (lexer))
5799 struct ctables_postcompute *pc
5800 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5803 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5808 if (n_pcs >= allocated_pcs)
5809 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5813 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5815 if (lex_match_id (lexer, "LABEL"))
5817 lex_match (lexer, T_EQUALS);
5818 if (!lex_force_string (lexer))
5821 for (size_t i = 0; i < n_pcs; i++)
5823 free (pcs[i]->label);
5824 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5829 else if (lex_match_id (lexer, "FORMAT"))
5831 lex_match (lexer, T_EQUALS);
5833 struct ctables_summary_spec_set sss;
5834 if (!ctables_parse_pproperties_format (lexer, &sss))
5837 for (size_t i = 0; i < n_pcs; i++)
5840 ctables_summary_spec_set_uninit (pcs[i]->specs);
5842 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5843 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5845 ctables_summary_spec_set_uninit (&sss);
5847 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5849 lex_match (lexer, T_EQUALS);
5850 bool hide_source_cats;
5851 if (!parse_bool (lexer, &hide_source_cats))
5853 for (size_t i = 0; i < n_pcs; i++)
5854 pcs[i]->hide_source_cats = hide_source_cats;
5858 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5871 put_strftime (struct string *out, time_t now, const char *format)
5873 const struct tm *tm = localtime (&now);
5875 strftime (value, sizeof value, format, tm);
5876 ds_put_cstr (out, value);
5880 skip_prefix (struct substring *s, struct substring prefix)
5882 if (ss_starts_with (*s, prefix))
5884 ss_advance (s, prefix.length);
5892 put_table_expression (struct string *out, struct lexer *lexer,
5893 struct dictionary *dict, int expr_start, int expr_end)
5896 for (int ofs = expr_start; ofs < expr_end; ofs++)
5898 const struct token *t = lex_ofs_token (lexer, ofs);
5899 if (t->type == T_LBRACK)
5901 else if (t->type == T_RBRACK && nest > 0)
5907 else if (t->type == T_ID)
5909 const struct variable *var
5910 = dict_lookup_var (dict, t->string.string);
5911 const char *label = var ? var_get_label (var) : NULL;
5912 ds_put_cstr (out, label ? label : t->string.string);
5916 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5917 ds_put_byte (out, ' ');
5919 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5920 ds_put_cstr (out, repr);
5923 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5924 ds_put_byte (out, ' ');
5930 put_title_text (struct string *out, struct substring in, time_t now,
5931 struct lexer *lexer, struct dictionary *dict,
5932 int expr_start, int expr_end)
5936 size_t chunk = ss_find_byte (in, ')');
5937 ds_put_substring (out, ss_head (in, chunk));
5938 ss_advance (&in, chunk);
5939 if (ss_is_empty (in))
5942 if (skip_prefix (&in, ss_cstr (")DATE")))
5943 put_strftime (out, now, "%x");
5944 else if (skip_prefix (&in, ss_cstr (")TIME")))
5945 put_strftime (out, now, "%X");
5946 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5947 put_table_expression (out, lexer, dict, expr_start, expr_end);
5950 ds_put_byte (out, ')');
5951 ss_advance (&in, 1);
5957 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5959 struct casereader *input = NULL;
5961 struct measure_guesser *mg = measure_guesser_create (ds);
5964 input = proc_open (ds);
5965 measure_guesser_run (mg, input);
5966 measure_guesser_destroy (mg);
5969 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5970 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5971 enum settings_value_show tvars = settings_get_show_variables ();
5972 for (size_t i = 0; i < n_vars; i++)
5973 vlabels[i] = (enum ctables_vlabel) tvars;
5975 struct pivot_table_look *look = pivot_table_look_unshare (
5976 pivot_table_look_ref (pivot_table_look_get_default ()));
5977 look->omit_empty = false;
5979 struct ctables *ct = xmalloc (sizeof *ct);
5980 *ct = (struct ctables) {
5981 .dict = dataset_dict (ds),
5983 .ctables_formats = FMT_SETTINGS_INIT,
5985 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5988 time_t now = time (NULL);
5993 const char *dot_string;
5994 const char *comma_string;
5996 static const struct ctf ctfs[4] = {
5997 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5998 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5999 { CTEF_PAREN, "-,(,),", "-.(.)." },
6000 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6002 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6003 for (size_t i = 0; i < 4; i++)
6005 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6006 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6007 fmt_number_style_from_string (s));
6010 if (!lex_force_match (lexer, T_SLASH))
6013 while (!lex_match_id (lexer, "TABLE"))
6015 if (lex_match_id (lexer, "FORMAT"))
6017 double widths[2] = { SYSMIS, SYSMIS };
6018 double units_per_inch = 72.0;
6020 while (lex_token (lexer) != T_SLASH)
6022 if (lex_match_id (lexer, "MINCOLWIDTH"))
6024 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6027 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6029 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6032 else if (lex_match_id (lexer, "UNITS"))
6034 lex_match (lexer, T_EQUALS);
6035 if (lex_match_id (lexer, "POINTS"))
6036 units_per_inch = 72.0;
6037 else if (lex_match_id (lexer, "INCHES"))
6038 units_per_inch = 1.0;
6039 else if (lex_match_id (lexer, "CM"))
6040 units_per_inch = 2.54;
6043 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6047 else if (lex_match_id (lexer, "EMPTY"))
6052 lex_match (lexer, T_EQUALS);
6053 if (lex_match_id (lexer, "ZERO"))
6055 /* Nothing to do. */
6057 else if (lex_match_id (lexer, "BLANK"))
6058 ct->zero = xstrdup ("");
6059 else if (lex_force_string (lexer))
6061 ct->zero = ss_xstrdup (lex_tokss (lexer));
6067 else if (lex_match_id (lexer, "MISSING"))
6069 lex_match (lexer, T_EQUALS);
6070 if (!lex_force_string (lexer))
6074 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6075 ? ss_xstrdup (lex_tokss (lexer))
6081 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6082 "UNITS", "EMPTY", "MISSING");
6087 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6088 && widths[0] > widths[1])
6090 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6094 for (size_t i = 0; i < 2; i++)
6095 if (widths[i] != SYSMIS)
6097 int *wr = ct->look->width_ranges[TABLE_HORZ];
6098 wr[i] = widths[i] / units_per_inch * 96.0;
6103 else if (lex_match_id (lexer, "VLABELS"))
6105 if (!lex_force_match_id (lexer, "VARIABLES"))
6107 lex_match (lexer, T_EQUALS);
6109 struct variable **vars;
6111 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6115 if (!lex_force_match_id (lexer, "DISPLAY"))
6120 lex_match (lexer, T_EQUALS);
6122 enum ctables_vlabel vlabel;
6123 if (lex_match_id (lexer, "DEFAULT"))
6124 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6125 else if (lex_match_id (lexer, "NAME"))
6127 else if (lex_match_id (lexer, "LABEL"))
6128 vlabel = CTVL_LABEL;
6129 else if (lex_match_id (lexer, "BOTH"))
6131 else if (lex_match_id (lexer, "NONE"))
6135 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6141 for (size_t i = 0; i < n_vars; i++)
6142 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6145 else if (lex_match_id (lexer, "MRSETS"))
6147 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6149 lex_match (lexer, T_EQUALS);
6150 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6153 else if (lex_match_id (lexer, "SMISSING"))
6155 if (lex_match_id (lexer, "VARIABLE"))
6156 ct->smissing_listwise = false;
6157 else if (lex_match_id (lexer, "LISTWISE"))
6158 ct->smissing_listwise = true;
6161 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6165 else if (lex_match_id (lexer, "PCOMPUTE"))
6167 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6170 else if (lex_match_id (lexer, "PPROPERTIES"))
6172 if (!ctables_parse_pproperties (lexer, ct))
6175 else if (lex_match_id (lexer, "WEIGHT"))
6177 if (!lex_force_match_id (lexer, "VARIABLE"))
6179 lex_match (lexer, T_EQUALS);
6180 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6184 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6186 if (lex_match_id (lexer, "COUNT"))
6188 lex_match (lexer, T_EQUALS);
6189 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6192 ct->hide_threshold = lex_integer (lexer);
6195 else if (ct->hide_threshold == 0)
6196 ct->hide_threshold = 5;
6200 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6201 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6202 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6206 if (!lex_force_match (lexer, T_SLASH))
6210 size_t allocated_tables = 0;
6213 if (ct->n_tables >= allocated_tables)
6214 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6215 sizeof *ct->tables);
6217 struct ctables_category *cat = xmalloc (sizeof *cat);
6218 *cat = (struct ctables_category) {
6220 .include_missing = false,
6221 .sort_ascending = true,
6224 struct ctables_categories *c = xmalloc (sizeof *c);
6225 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6226 *c = (struct ctables_categories) {
6233 struct ctables_categories **categories = xnmalloc (n_vars,
6234 sizeof *categories);
6235 for (size_t i = 0; i < n_vars; i++)
6238 struct ctables_table *t = xmalloc (sizeof *t);
6239 *t = (struct ctables_table) {
6241 .slabels_axis = PIVOT_AXIS_COLUMN,
6242 .slabels_visible = true,
6243 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6245 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6246 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6247 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6249 .clabels_from_axis = PIVOT_AXIS_LAYER,
6250 .clabels_to_axis = PIVOT_AXIS_LAYER,
6251 .categories = categories,
6252 .n_categories = n_vars,
6255 ct->tables[ct->n_tables++] = t;
6257 lex_match (lexer, T_EQUALS);
6258 int expr_start = lex_ofs (lexer);
6259 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6260 &t->axes[PIVOT_AXIS_ROW]))
6262 if (lex_match (lexer, T_BY))
6264 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6265 &t->axes[PIVOT_AXIS_COLUMN]))
6268 if (lex_match (lexer, T_BY))
6270 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6271 &t->axes[PIVOT_AXIS_LAYER]))
6275 int expr_end = lex_ofs (lexer);
6277 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6278 && !t->axes[PIVOT_AXIS_LAYER])
6280 lex_error (lexer, _("At least one variable must be specified."));
6284 const struct ctables_axis *scales[PIVOT_N_AXES];
6285 size_t n_scales = 0;
6286 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6288 scales[a] = find_scale (t->axes[a]);
6294 msg (SE, _("Scale variables may appear only on one axis."));
6295 if (scales[PIVOT_AXIS_ROW])
6296 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6297 _("This scale variable appears on the rows axis."));
6298 if (scales[PIVOT_AXIS_COLUMN])
6299 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6300 _("This scale variable appears on the columns axis."));
6301 if (scales[PIVOT_AXIS_LAYER])
6302 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6303 _("This scale variable appears on the layer axis."));
6307 const struct ctables_axis *summaries[PIVOT_N_AXES];
6308 size_t n_summaries = 0;
6309 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6311 summaries[a] = (scales[a]
6313 : find_categorical_summary_spec (t->axes[a]));
6317 if (n_summaries > 1)
6319 msg (SE, _("Summaries may appear only on one axis."));
6320 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6323 msg_at (SN, summaries[a]->loc,
6325 ? _("This variable on the rows axis has a summary.")
6326 : a == PIVOT_AXIS_COLUMN
6327 ? _("This variable on the columns axis has a summary.")
6328 : _("This variable on the layers axis has a summary."));
6330 msg_at (SN, summaries[a]->loc,
6331 _("This is a scale variable, so it always has a "
6332 "summary even if the syntax does not explicitly "
6337 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6338 if (n_summaries ? summaries[a] : t->axes[a])
6340 t->summary_axis = a;
6344 if (lex_token (lexer) == T_ENDCMD)
6346 if (!ctables_prepare_table (t))
6350 if (!lex_force_match (lexer, T_SLASH))
6353 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6355 if (lex_match_id (lexer, "SLABELS"))
6357 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6359 if (lex_match_id (lexer, "POSITION"))
6361 lex_match (lexer, T_EQUALS);
6362 if (lex_match_id (lexer, "COLUMN"))
6363 t->slabels_axis = PIVOT_AXIS_COLUMN;
6364 else if (lex_match_id (lexer, "ROW"))
6365 t->slabels_axis = PIVOT_AXIS_ROW;
6366 else if (lex_match_id (lexer, "LAYER"))
6367 t->slabels_axis = PIVOT_AXIS_LAYER;
6370 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6374 else if (lex_match_id (lexer, "VISIBLE"))
6376 lex_match (lexer, T_EQUALS);
6377 if (!parse_bool (lexer, &t->slabels_visible))
6382 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6387 else if (lex_match_id (lexer, "CLABELS"))
6389 if (lex_match_id (lexer, "AUTO"))
6391 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6392 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6394 else if (lex_match_id (lexer, "ROWLABELS"))
6396 lex_match (lexer, T_EQUALS);
6397 if (lex_match_id (lexer, "OPPOSITE"))
6398 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6399 else if (lex_match_id (lexer, "LAYER"))
6400 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6403 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6407 else if (lex_match_id (lexer, "COLLABELS"))
6409 lex_match (lexer, T_EQUALS);
6410 if (lex_match_id (lexer, "OPPOSITE"))
6411 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6412 else if (lex_match_id (lexer, "LAYER"))
6413 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6416 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6422 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6427 else if (lex_match_id (lexer, "CRITERIA"))
6429 if (!lex_force_match_id (lexer, "CILEVEL"))
6431 lex_match (lexer, T_EQUALS);
6433 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6435 t->cilevel = lex_number (lexer);
6438 else if (lex_match_id (lexer, "CATEGORIES"))
6440 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6444 else if (lex_match_id (lexer, "TITLES"))
6449 if (lex_match_id (lexer, "CAPTION"))
6450 textp = &t->caption;
6451 else if (lex_match_id (lexer, "CORNER"))
6453 else if (lex_match_id (lexer, "TITLE"))
6457 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6460 lex_match (lexer, T_EQUALS);
6462 struct string s = DS_EMPTY_INITIALIZER;
6463 while (lex_is_string (lexer))
6465 if (!ds_is_empty (&s))
6466 ds_put_byte (&s, ' ');
6467 put_title_text (&s, lex_tokss (lexer), now,
6468 lexer, dataset_dict (ds),
6469 expr_start, expr_end);
6473 *textp = ds_steal_cstr (&s);
6475 while (lex_token (lexer) != T_SLASH
6476 && lex_token (lexer) != T_ENDCMD);
6478 else if (lex_match_id (lexer, "SIGTEST"))
6480 int start_ofs = lex_ofs (lexer) - 1;
6483 t->chisq = xmalloc (sizeof *t->chisq);
6484 *t->chisq = (struct ctables_chisq) {
6486 .include_mrsets = true,
6487 .all_visible = true,
6493 if (lex_match_id (lexer, "TYPE"))
6495 lex_match (lexer, T_EQUALS);
6496 if (!lex_force_match_id (lexer, "CHISQUARE"))
6499 else if (lex_match_id (lexer, "ALPHA"))
6501 lex_match (lexer, T_EQUALS);
6502 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6504 t->chisq->alpha = lex_number (lexer);
6507 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6509 lex_match (lexer, T_EQUALS);
6510 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6513 else if (lex_match_id (lexer, "CATEGORIES"))
6515 lex_match (lexer, T_EQUALS);
6516 if (lex_match_id (lexer, "ALLVISIBLE"))
6517 t->chisq->all_visible = true;
6518 else if (lex_match_id (lexer, "SUBTOTALS"))
6519 t->chisq->all_visible = false;
6522 lex_error_expecting (lexer,
6523 "ALLVISIBLE", "SUBTOTALS");
6529 lex_error_expecting (lexer, "TYPE", "ALPHA",
6530 "INCLUDEMRSETS", "CATEGORIES");
6534 while (lex_token (lexer) != T_SLASH
6535 && lex_token (lexer) != T_ENDCMD);
6537 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6538 _("Support for SIGTEST not yet implemented."));
6541 else if (lex_match_id (lexer, "COMPARETEST"))
6543 int start_ofs = lex_ofs (lexer);
6546 t->pairwise = xmalloc (sizeof *t->pairwise);
6547 *t->pairwise = (struct ctables_pairwise) {
6549 .alpha = { .05, .05 },
6550 .adjust = BONFERRONI,
6551 .include_mrsets = true,
6552 .meansvariance_allcats = true,
6553 .all_visible = true,
6562 if (lex_match_id (lexer, "TYPE"))
6564 lex_match (lexer, T_EQUALS);
6565 if (lex_match_id (lexer, "PROP"))
6566 t->pairwise->type = PROP;
6567 else if (lex_match_id (lexer, "MEAN"))
6568 t->pairwise->type = MEAN;
6571 lex_error_expecting (lexer, "PROP", "MEAN");
6575 else if (lex_match_id (lexer, "ALPHA"))
6577 lex_match (lexer, T_EQUALS);
6579 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6581 double a0 = lex_number (lexer);
6584 lex_match (lexer, T_COMMA);
6585 if (lex_is_number (lexer))
6587 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6589 double a1 = lex_number (lexer);
6592 t->pairwise->alpha[0] = MIN (a0, a1);
6593 t->pairwise->alpha[1] = MAX (a0, a1);
6596 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6598 else if (lex_match_id (lexer, "ADJUST"))
6600 lex_match (lexer, T_EQUALS);
6601 if (lex_match_id (lexer, "BONFERRONI"))
6602 t->pairwise->adjust = BONFERRONI;
6603 else if (lex_match_id (lexer, "BH"))
6604 t->pairwise->adjust = BH;
6605 else if (lex_match_id (lexer, "NONE"))
6606 t->pairwise->adjust = 0;
6609 lex_error_expecting (lexer, "BONFERRONI", "BH",
6614 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6616 lex_match (lexer, T_EQUALS);
6617 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6620 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6622 lex_match (lexer, T_EQUALS);
6623 if (lex_match_id (lexer, "ALLCATS"))
6624 t->pairwise->meansvariance_allcats = true;
6625 else if (lex_match_id (lexer, "TESTEDCATS"))
6626 t->pairwise->meansvariance_allcats = false;
6629 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6633 else if (lex_match_id (lexer, "CATEGORIES"))
6635 lex_match (lexer, T_EQUALS);
6636 if (lex_match_id (lexer, "ALLVISIBLE"))
6637 t->pairwise->all_visible = true;
6638 else if (lex_match_id (lexer, "SUBTOTALS"))
6639 t->pairwise->all_visible = false;
6642 lex_error_expecting (lexer, "ALLVISIBLE",
6647 else if (lex_match_id (lexer, "MERGE"))
6649 lex_match (lexer, T_EQUALS);
6650 if (!parse_bool (lexer, &t->pairwise->merge))
6653 else if (lex_match_id (lexer, "STYLE"))
6655 lex_match (lexer, T_EQUALS);
6656 if (lex_match_id (lexer, "APA"))
6657 t->pairwise->apa_style = true;
6658 else if (lex_match_id (lexer, "SIMPLE"))
6659 t->pairwise->apa_style = false;
6662 lex_error_expecting (lexer, "APA", "SIMPLE");
6666 else if (lex_match_id (lexer, "SHOWSIG"))
6668 lex_match (lexer, T_EQUALS);
6669 if (!parse_bool (lexer, &t->pairwise->show_sig))
6674 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6675 "INCLUDEMRSETS", "MEANSVARIANCE",
6676 "CATEGORIES", "MERGE", "STYLE",
6681 while (lex_token (lexer) != T_SLASH
6682 && lex_token (lexer) != T_ENDCMD);
6684 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6685 _("Support for COMPARETEST not yet implemented."));
6690 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6691 "CRITERIA", "CATEGORIES", "TITLES",
6692 "SIGTEST", "COMPARETEST");
6696 if (!lex_match (lexer, T_SLASH))
6700 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6702 t->clabels_from_axis = PIVOT_AXIS_ROW;
6703 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6705 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6709 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6710 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6711 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6713 if (!ctables_prepare_table (t))
6716 while (lex_token (lexer) != T_ENDCMD);
6719 input = proc_open (ds);
6720 bool ok = ctables_execute (ds, input, ct);
6721 ok = proc_commit (ds) && ok;
6723 ctables_destroy (ct);
6724 return ok ? CMD_SUCCESS : CMD_FAILURE;
6729 ctables_destroy (ct);