1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 /* The three forms of weighting supported by CTABLES. */
61 enum ctables_weighting
63 CTW_EFFECTIVE, /* Effective base weight (WEIGHT subcommand). */
64 CTW_DICTIONARY, /* Dictionary weight. */
65 CTW_UNWEIGHTED /* No weight. */
69 /* CTABLES table areas. */
71 enum ctables_area_type
73 /* Within a section, where stacked variables divide one section from
76 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
77 parse_ctables_summary_function() parses correctly. */
78 CTAT_TABLE, /* All layers of a whole section. */
79 CTAT_LAYERROW, /* Row in one layer within a section. */
80 CTAT_LAYERCOL, /* Column in one layer within a section. */
81 CTAT_LAYER, /* One layer within a section. */
83 /* Within a subtable, where a subtable pairs an innermost row variable with
84 an innermost column variable within a single layer. */
85 CTAT_SUBTABLE, /* Whole subtable. */
86 CTAT_ROW, /* Row within a subtable. */
87 CTAT_COL, /* Column within a subtable. */
91 static const char *ctables_area_type_name[N_CTATS] = {
92 [CTAT_TABLE] = "TABLE",
93 [CTAT_LAYER] = "LAYER",
94 [CTAT_LAYERROW] = "LAYERROW",
95 [CTAT_LAYERCOL] = "LAYERCOL",
96 [CTAT_SUBTABLE] = "SUBTABLE",
101 /* Summary statistics for an area. */
104 struct hmap_node node;
105 const struct ctables_cell *example;
107 /* Sequence number used for CTSF_ID. */
110 /* Weights for CTSF_areaPCT_COUNT, CTSF_areaPCT_VALIDN, and
111 CTSF_areaPCT_TOTALN. */
112 double count[N_CTWS];
113 double valid[N_CTWS];
114 double total[N_CTWS];
116 /* Sums for CTSF_areaPCT_SUM. */
117 struct ctables_sum *sums;
125 /* CTABLES summary functions. */
127 enum ctables_function_type
129 /* A function that operates on data in a single cell. It operates on
130 effective weights. It does not have an unweighted version. */
133 /* A function that operates on data in a single cell. The function
134 operates on effective weights and has a U-prefixed unweighted
138 /* A function that operates on data in a single cell. It operates on
139 dictionary weights, and has U-prefixed unweighted version and an
140 E-prefixed effective weight version. */
143 /* A function that operates on an area of cells. It operates on effective
144 weights and has a U-prefixed unweighted version. */
150 CTF_COUNT, /* F40.0. */
151 CTF_PERCENT, /* PCT40.1. */
152 CTF_GENERAL /* Variable's print format. */
155 enum ctables_function_availability
157 CTFA_ALL, /* Any variables. */
158 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
159 //CTFA_MRSETS, /* Only multiple-response sets */
162 enum ctables_summary_function
164 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
165 #include "ctables.inc"
170 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
172 #include "ctables.inc"
176 struct ctables_function_info
178 struct substring basename;
179 enum ctables_function_type type;
180 enum ctables_format format;
181 enum ctables_function_availability availability;
183 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
184 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
185 bool is_area; /* Needs an area prefix. */
187 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
188 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
190 .basename = SS_LITERAL_INITIALIZER (NAME), \
193 .availability = AVAILABILITY, \
194 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
195 .e_prefix = (TYPE) == CTFT_UECELL, \
196 .is_area = (TYPE) == CTFT_AREA \
198 #include "ctables.inc"
202 static struct fmt_spec
203 ctables_summary_default_format (enum ctables_summary_function function,
204 const struct variable *var)
206 static const enum ctables_format default_formats[] = {
207 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
208 #include "ctables.inc"
211 switch (default_formats[function])
214 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
217 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
220 return *var_get_print_format (var);
227 static enum ctables_function_availability
228 ctables_function_availability (enum ctables_summary_function f)
230 static enum ctables_function_availability availability[] = {
231 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
232 #include "ctables.inc"
236 return availability[f];
240 parse_ctables_summary_function (struct lexer *lexer,
241 enum ctables_summary_function *function,
242 enum ctables_weighting *weighting,
243 enum ctables_area_type *area)
245 if (!lex_force_id (lexer))
248 struct substring name = lex_tokss (lexer);
249 if (ss_ends_with_case (name, ss_cstr (".LCL"))
250 || ss_ends_with_case (name, ss_cstr (".UCL"))
251 || ss_ends_with_case (name, ss_cstr (".SE")))
253 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
254 "is not yet implemented."));
258 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
259 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
261 bool has_area = false;
263 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
264 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
269 if (ss_equals_case (name, ss_cstr ("PCT")))
271 /* Special case where .COUNT suffix is omitted. */
272 *function = CTSF_areaPCT_COUNT;
273 *weighting = CTW_EFFECTIVE;
280 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
282 const struct ctables_function_info *cfi = &ctables_function_info[f];
283 if (ss_equals_case (cfi->basename, name))
286 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
289 *weighting = (e ? CTW_EFFECTIVE
291 : cfi->e_prefix ? CTW_DICTIONARY
298 lex_error (lexer, _("Expecting summary function name."));
303 ctables_summary_function_name (enum ctables_summary_function function,
304 enum ctables_weighting weighting,
305 enum ctables_area_type area,
306 char *buffer, size_t bufsize)
308 const struct ctables_function_info *cfi = &ctables_function_info[function];
309 snprintf (buffer, bufsize, "%s%s%s",
310 (weighting == CTW_UNWEIGHTED ? "U"
311 : weighting == CTW_DICTIONARY ? ""
312 : cfi->e_prefix ? "E"
314 cfi->is_area ? ctables_area_type_name[area] : "",
315 cfi->basename.string);
320 ctables_summary_function_label__ (enum ctables_summary_function function,
321 enum ctables_weighting weighting,
322 enum ctables_area_type area)
324 bool w = weighting != CTW_UNWEIGHTED;
325 bool d = weighting == CTW_DICTIONARY;
326 enum ctables_area_type a = area;
330 return d ? N_("Count") : w ? N_("Adjusted Count") : N_("Unweighted Count");
332 case CTSF_areaPCT_COUNT:
335 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
336 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
337 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
338 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
339 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
340 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
341 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
345 case CTSF_areaPCT_VALIDN:
348 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
349 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
350 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
351 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
352 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
353 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
354 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
358 case CTSF_areaPCT_TOTALN:
361 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
362 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
363 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
364 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
365 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
366 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
367 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
371 case CTSF_MAXIMUM: return N_("Maximum");
372 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
373 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
374 case CTSF_MINIMUM: return N_("Minimum");
375 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
376 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
377 case CTSF_PTILE: NOT_REACHED ();
378 case CTSF_RANGE: return N_("Range");
379 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
380 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
381 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
382 case CTSF_TOTALN: return (d ? N_("Total N")
383 : w ? N_("Adjusted Total N")
384 : N_("Unweighted Total N"));
385 case CTSF_VALIDN: return (d ? N_("Valid N")
386 : w ? N_("Adjusted Valid N")
387 : N_("Unweighted Valid N"));
388 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
389 case CTSF_areaPCT_SUM:
392 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
393 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
394 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
395 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
396 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
397 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
398 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
405 /* Don't bother translating these: they are for developers only. */
406 case CTAT_TABLE: return "Table ID";
407 case CTAT_LAYER: return "Layer ID";
408 case CTAT_LAYERROW: return "Layer Row ID";
409 case CTAT_LAYERCOL: return "Layer Column ID";
410 case CTAT_SUBTABLE: return "Subtable ID";
411 case CTAT_ROW: return "Row ID";
412 case CTAT_COL: return "Column ID";
420 static struct pivot_value *
421 ctables_summary_function_label (enum ctables_summary_function function,
422 enum ctables_weighting weighting,
423 enum ctables_area_type area,
426 if (function == CTSF_PTILE)
428 char *s = (weighting != CTW_UNWEIGHTED
429 ? xasprintf (_("Percentile %.2f"), percentile)
430 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
431 return pivot_value_new_user_text_nocopy (s);
434 return pivot_value_new_text (ctables_summary_function_label__ (
435 function, weighting, area));
438 /* CTABLES summaries. */
440 struct ctables_summary_spec
442 /* The calculation to be performed.
444 'function' is the function to calculate. 'weighted' specifies whether
445 to use weighted or unweighted data (for functions that do not support a
446 choice, it must be true). 'calc_area' is the area over which the
447 calculation takes place (for functions that target only an individual
448 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
449 percentile between 0 and 100 (for other functions it must be 0). */
450 enum ctables_summary_function function;
451 enum ctables_weighting weighting;
452 enum ctables_area_type calc_area;
453 double percentile; /* CTSF_PTILE only. */
455 /* How to display the result of the calculation.
457 'label' is a user-specified label, NULL if the user didn't specify
460 'user_area' is usually the same as 'calc_area', but when category labels
461 are rotated from one axis to another it swaps rows and columns.
463 'format' is the format for displaying the output. If
464 'is_ctables_format' is true, then 'format.type' is one of the special
465 CTEF_* formats instead of the standard ones. */
467 enum ctables_area_type user_area;
468 struct fmt_spec format;
469 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
471 size_t axis_idx; /* Leaf index if summary dimension in use. */
472 size_t sum_var_idx; /* Offset into 'sums' in ctables_area. */
476 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
477 const struct ctables_summary_spec *src)
480 dst->label = xstrdup_if_nonnull (src->label);
484 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
490 /* Collections of summary functions. */
492 struct ctables_summary_spec_set
494 struct ctables_summary_spec *specs;
498 /* The variable to which the summary specs are applied. */
499 struct variable *var;
501 /* Whether the variable to which the summary specs are applied is a scale
502 variable for the purpose of summarization.
504 (VALIDN and TOTALN act differently for summarizing scale and categorical
508 /* If any of these optional additional scale variables are missing, then
509 treat 'var' as if it's missing too. This is for implementing
510 SMISSING=LISTWISE. */
511 struct variable **listwise_vars;
512 size_t n_listwise_vars;
516 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
517 const struct ctables_summary_spec_set *src)
519 struct ctables_summary_spec *specs
520 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
521 for (size_t i = 0; i < src->n; i++)
522 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
524 *dst = (struct ctables_summary_spec_set) {
529 .is_scale = src->is_scale,
534 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
536 for (size_t i = 0; i < set->n; i++)
537 ctables_summary_spec_uninit (&set->specs[i]);
538 free (set->listwise_vars);
543 is_listwise_missing (const struct ctables_summary_spec_set *specs,
544 const struct ccase *c)
546 for (size_t i = 0; i < specs->n_listwise_vars; i++)
548 const struct variable *var = specs->listwise_vars[i];
549 if (var_is_num_missing (var, case_num (c, var)))
556 /* CTABLES postcompute expressions. */
558 struct ctables_postcompute
560 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
561 char *name; /* Name, without leading &. */
563 struct msg_location *location; /* Location of definition. */
564 struct ctables_pcexpr *expr;
566 struct ctables_summary_spec_set *specs;
567 bool hide_source_cats;
570 struct ctables_pcexpr
580 enum ctables_pcexpr_op
583 CTPO_CONSTANT, /* 5 */
584 CTPO_CAT_NUMBER, /* [5] */
585 CTPO_CAT_STRING, /* ["STRING"] */
586 CTPO_CAT_NRANGE, /* [LO THRU 5] */
587 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
588 CTPO_CAT_MISSING, /* MISSING */
589 CTPO_CAT_OTHERNM, /* OTHERNM */
590 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
591 CTPO_CAT_TOTAL, /* TOTAL */
605 /* CTPO_CAT_NUMBER. */
608 /* CTPO_CAT_STRING, in dictionary encoding. */
609 struct substring string;
611 /* CTPO_CAT_NRANGE. */
614 /* CTPO_CAT_SRANGE. */
615 struct substring srange[2];
617 /* CTPO_CAT_SUBTOTAL. */
618 size_t subtotal_index;
620 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
621 One element: CTPO_NEG. */
622 struct ctables_pcexpr *subs[2];
625 /* Source location. */
626 struct msg_location *location;
629 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
632 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
633 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
634 struct ctables_pcexpr *sub1);
636 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
637 struct dictionary *);
640 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
646 case CTPO_CAT_STRING:
647 ss_dealloc (&e->string);
650 case CTPO_CAT_SRANGE:
651 for (size_t i = 0; i < 2; i++)
652 ss_dealloc (&e->srange[i]);
661 for (size_t i = 0; i < 2; i++)
662 ctables_pcexpr_destroy (e->subs[i]);
666 case CTPO_CAT_NUMBER:
667 case CTPO_CAT_NRANGE:
668 case CTPO_CAT_MISSING:
669 case CTPO_CAT_OTHERNM:
670 case CTPO_CAT_SUBTOTAL:
675 msg_location_destroy (e->location);
680 static struct ctables_pcexpr *
681 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
682 struct ctables_pcexpr *sub0,
683 struct ctables_pcexpr *sub1)
685 struct ctables_pcexpr *e = xmalloc (sizeof *e);
686 *e = (struct ctables_pcexpr) {
688 .subs = { sub0, sub1 },
689 .location = msg_location_merged (sub0->location, sub1->location),
694 /* How to parse an operator. */
697 enum token_type token;
698 enum ctables_pcexpr_op op;
701 static const struct operator *
702 ctables_pcexpr_match_operator (struct lexer *lexer,
703 const struct operator ops[], size_t n_ops)
705 for (const struct operator *op = ops; op < ops + n_ops; op++)
706 if (lex_token (lexer) == op->token)
708 if (op->token != T_NEG_NUM)
717 static struct ctables_pcexpr *
718 ctables_pcexpr_parse_binary_operators__ (
719 struct lexer *lexer, struct dictionary *dict,
720 const struct operator ops[], size_t n_ops,
721 parse_recursively_func *parse_next_level,
722 const char *chain_warning, struct ctables_pcexpr *lhs)
724 for (int op_count = 0; ; op_count++)
726 const struct operator *op
727 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
730 if (op_count > 1 && chain_warning)
731 msg_at (SW, lhs->location, "%s", chain_warning);
736 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
739 ctables_pcexpr_destroy (lhs);
743 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
747 static struct ctables_pcexpr *
748 ctables_pcexpr_parse_binary_operators (
749 struct lexer *lexer, struct dictionary *dict,
750 const struct operator ops[], size_t n_ops,
751 parse_recursively_func *parse_next_level, const char *chain_warning)
753 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
757 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
762 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
763 struct dictionary *);
765 static struct ctables_pcexpr
766 ctpo_cat_nrange (double low, double high)
768 return (struct ctables_pcexpr) {
769 .op = CTPO_CAT_NRANGE,
770 .nrange = { low, high },
774 static struct ctables_pcexpr
775 ctpo_cat_srange (struct substring low, struct substring high)
777 return (struct ctables_pcexpr) {
778 .op = CTPO_CAT_SRANGE,
779 .srange = { low, high },
783 static struct substring
784 parse_substring (struct lexer *lexer, struct dictionary *dict)
786 struct substring s = recode_substring_pool (
787 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
788 ss_rtrim (&s, ss_cstr (" "));
793 static struct ctables_pcexpr *
794 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
796 int start_ofs = lex_ofs (lexer);
797 struct ctables_pcexpr e;
798 if (lex_is_number (lexer))
800 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
801 .number = lex_number (lexer) };
804 else if (lex_match_id (lexer, "MISSING"))
805 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
806 else if (lex_match_id (lexer, "OTHERNM"))
807 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
808 else if (lex_match_id (lexer, "TOTAL"))
809 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
810 else if (lex_match_id (lexer, "SUBTOTAL"))
812 size_t subtotal_index = 0;
813 if (lex_match (lexer, T_LBRACK))
815 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
817 subtotal_index = lex_integer (lexer);
819 if (!lex_force_match (lexer, T_RBRACK))
822 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
823 .subtotal_index = subtotal_index };
825 else if (lex_match (lexer, T_LBRACK))
827 if (lex_match_id (lexer, "LO"))
829 if (!lex_force_match_id (lexer, "THRU"))
832 if (lex_is_string (lexer))
834 struct substring low = { .string = NULL };
835 struct substring high = parse_substring (lexer, dict);
836 e = ctpo_cat_srange (low, high);
840 if (!lex_force_num (lexer))
842 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
846 else if (lex_is_number (lexer))
848 double number = lex_number (lexer);
850 if (lex_match_id (lexer, "THRU"))
852 if (lex_match_id (lexer, "HI"))
853 e = ctpo_cat_nrange (number, DBL_MAX);
856 if (!lex_force_num (lexer))
858 e = ctpo_cat_nrange (number, lex_number (lexer));
863 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
866 else if (lex_is_string (lexer))
868 struct substring s = parse_substring (lexer, dict);
870 if (lex_match_id (lexer, "THRU"))
872 struct substring high;
874 if (lex_match_id (lexer, "HI"))
875 high = (struct substring) { .string = NULL };
878 if (!lex_force_string (lexer))
883 high = parse_substring (lexer, dict);
886 e = ctpo_cat_srange (s, high);
889 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
893 lex_error (lexer, NULL);
897 if (!lex_force_match (lexer, T_RBRACK))
899 if (e.op == CTPO_CAT_STRING)
900 ss_dealloc (&e.string);
901 else if (e.op == CTPO_CAT_SRANGE)
903 ss_dealloc (&e.srange[0]);
904 ss_dealloc (&e.srange[1]);
909 else if (lex_match (lexer, T_LPAREN))
911 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
914 if (!lex_force_match (lexer, T_RPAREN))
916 ctables_pcexpr_destroy (ep);
923 lex_error (lexer, NULL);
927 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
928 return xmemdup (&e, sizeof e);
931 static struct ctables_pcexpr *
932 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
933 struct lexer *lexer, int start_ofs)
935 struct ctables_pcexpr *e = xmalloc (sizeof *e);
936 *e = (struct ctables_pcexpr) {
939 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
944 static struct ctables_pcexpr *
945 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
947 static const struct operator op = { T_EXP, CTPO_POW };
949 const char *chain_warning =
950 _("The exponentiation operator (`**') is left-associative: "
951 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
952 "To disable this warning, insert parentheses.");
954 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
955 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
956 ctables_pcexpr_parse_primary,
959 /* Special case for situations like "-5**6", which must be parsed as
962 int start_ofs = lex_ofs (lexer);
963 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
964 *lhs = (struct ctables_pcexpr) {
966 .number = -lex_tokval (lexer),
967 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
971 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
973 ctables_pcexpr_parse_primary, chain_warning, lhs);
977 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
980 /* Parses the unary minus level. */
981 static struct ctables_pcexpr *
982 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
984 int start_ofs = lex_ofs (lexer);
985 if (!lex_match (lexer, T_DASH))
986 return ctables_pcexpr_parse_exp (lexer, dict);
988 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
992 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
995 /* Parses the multiplication and division level. */
996 static struct ctables_pcexpr *
997 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
999 static const struct operator ops[] =
1001 { T_ASTERISK, CTPO_MUL },
1002 { T_SLASH, CTPO_DIV },
1005 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
1006 sizeof ops / sizeof *ops,
1007 ctables_pcexpr_parse_neg, NULL);
1010 /* Parses the addition and subtraction level. */
1011 static struct ctables_pcexpr *
1012 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
1014 static const struct operator ops[] =
1016 { T_PLUS, CTPO_ADD },
1017 { T_DASH, CTPO_SUB },
1018 { T_NEG_NUM, CTPO_ADD },
1021 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1022 ops, sizeof ops / sizeof *ops,
1023 ctables_pcexpr_parse_mul, NULL);
1026 /* CTABLES axis expressions. */
1028 /* CTABLES has a number of extra formats that we implement via custom
1029 currency specifications on an alternate fmt_settings. */
1030 #define CTEF_NEGPAREN FMT_CCA
1031 #define CTEF_NEQUAL FMT_CCB
1032 #define CTEF_PAREN FMT_CCC
1033 #define CTEF_PCTPAREN FMT_CCD
1035 enum ctables_summary_variant
1044 enum ctables_axis_op
1060 struct variable *var;
1062 struct ctables_summary_spec_set specs[N_CSVS];
1066 struct ctables_axis *subs[2];
1069 struct msg_location *loc;
1073 ctables_axis_destroy (struct ctables_axis *axis)
1081 for (size_t i = 0; i < N_CSVS; i++)
1082 ctables_summary_spec_set_uninit (&axis->specs[i]);
1087 ctables_axis_destroy (axis->subs[0]);
1088 ctables_axis_destroy (axis->subs[1]);
1091 msg_location_destroy (axis->loc);
1095 static struct ctables_axis *
1096 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1097 struct ctables_axis *sub0,
1098 struct ctables_axis *sub1,
1099 struct lexer *lexer, int start_ofs)
1101 struct ctables_axis *axis = xmalloc (sizeof *axis);
1102 *axis = (struct ctables_axis) {
1104 .subs = { sub0, sub1 },
1105 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1110 struct ctables_axis_parse_ctx
1112 struct lexer *lexer;
1113 struct dictionary *dict;
1116 static struct pivot_value *
1117 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1120 return ctables_summary_function_label (spec->function, spec->weighting,
1121 spec->user_area, spec->percentile);
1124 struct substring in = ss_cstr (spec->label);
1125 struct substring target = ss_cstr (")CILEVEL");
1127 struct string out = DS_EMPTY_INITIALIZER;
1130 size_t chunk = ss_find_substring (in, target);
1131 ds_put_substring (&out, ss_head (in, chunk));
1132 ss_advance (&in, chunk);
1134 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1136 ss_advance (&in, target.length);
1137 ds_put_format (&out, "%g", cilevel);
1143 add_summary_spec (struct ctables_axis *axis,
1144 enum ctables_summary_function function,
1145 enum ctables_weighting weighting,
1146 enum ctables_area_type area, double percentile,
1147 const char *label, const struct fmt_spec *format,
1148 bool is_ctables_format, const struct msg_location *loc,
1149 enum ctables_summary_variant sv)
1151 if (axis->op == CTAO_VAR)
1153 char function_name[128];
1154 ctables_summary_function_name (function, weighting, area,
1155 function_name, sizeof function_name);
1156 const char *var_name = var_get_name (axis->var);
1157 switch (ctables_function_availability (function))
1161 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1162 "response sets."), function_name);
1163 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1169 if (!axis->scale && sv != CSV_TOTAL)
1172 _("Summary function %s applies only to scale variables."),
1174 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1184 struct ctables_summary_spec_set *set = &axis->specs[sv];
1185 if (set->n >= set->allocated)
1186 set->specs = x2nrealloc (set->specs, &set->allocated,
1187 sizeof *set->specs);
1189 struct ctables_summary_spec *dst = &set->specs[set->n++];
1190 *dst = (struct ctables_summary_spec) {
1191 .function = function,
1192 .weighting = weighting,
1195 .percentile = percentile,
1196 .label = xstrdup_if_nonnull (label),
1197 .format = (format ? *format
1198 : ctables_summary_default_format (function, axis->var)),
1199 .is_ctables_format = is_ctables_format,
1205 for (size_t i = 0; i < 2; i++)
1206 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1207 percentile, label, format, is_ctables_format,
1214 static struct ctables_axis *ctables_axis_parse_stack (
1215 struct ctables_axis_parse_ctx *);
1217 static struct ctables_axis *
1218 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1220 if (lex_match (ctx->lexer, T_LPAREN))
1222 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1223 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1225 ctables_axis_destroy (sub);
1231 if (!lex_force_id (ctx->lexer))
1234 if (lex_tokcstr (ctx->lexer)[0] == '$')
1236 lex_error (ctx->lexer,
1237 _("Multiple response set support not implemented."));
1241 int start_ofs = lex_ofs (ctx->lexer);
1242 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1246 struct ctables_axis *axis = xmalloc (sizeof *axis);
1247 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1249 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1250 : lex_match_phrase (ctx->lexer, "[C]") ? false
1251 : var_get_measure (var) == MEASURE_SCALE);
1252 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1253 lex_ofs (ctx->lexer) - 1);
1254 if (axis->scale && var_is_alpha (var))
1256 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1258 var_get_name (var));
1259 ctables_axis_destroy (axis);
1267 has_digit (const char *s)
1269 return s[strcspn (s, "0123456789")] != '\0';
1273 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1274 bool *is_ctables_format)
1276 char type[FMT_TYPE_LEN_MAX + 1];
1277 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1280 if (!strcasecmp (type, "NEGPAREN"))
1281 format->type = CTEF_NEGPAREN;
1282 else if (!strcasecmp (type, "NEQUAL"))
1283 format->type = CTEF_NEQUAL;
1284 else if (!strcasecmp (type, "PAREN"))
1285 format->type = CTEF_PAREN;
1286 else if (!strcasecmp (type, "PCTPAREN"))
1287 format->type = CTEF_PCTPAREN;
1290 *is_ctables_format = false;
1291 return (parse_format_specifier (lexer, format)
1292 && fmt_check_output (format)
1293 && fmt_check_type_compat (format, VAL_NUMERIC));
1299 lex_next_error (lexer, -1, -1,
1300 _("Output format %s requires width 2 or greater."), type);
1303 else if (format->d > format->w - 1)
1305 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1306 "greater than decimals."), type);
1311 *is_ctables_format = true;
1316 static struct ctables_axis *
1317 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1319 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1320 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1323 enum ctables_summary_variant sv = CSV_CELL;
1326 int start_ofs = lex_ofs (ctx->lexer);
1328 /* Parse function. */
1329 enum ctables_summary_function function;
1330 enum ctables_weighting weighting;
1331 enum ctables_area_type area;
1332 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1336 /* Parse percentile. */
1337 double percentile = 0;
1338 if (function == CTSF_PTILE)
1340 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1342 percentile = lex_number (ctx->lexer);
1343 lex_get (ctx->lexer);
1348 if (lex_is_string (ctx->lexer))
1350 label = ss_xstrdup (lex_tokss (ctx->lexer));
1351 lex_get (ctx->lexer);
1355 struct fmt_spec format;
1356 const struct fmt_spec *formatp;
1357 bool is_ctables_format = false;
1358 if (lex_token (ctx->lexer) == T_ID
1359 && has_digit (lex_tokcstr (ctx->lexer)))
1361 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1362 &is_ctables_format))
1372 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1373 lex_ofs (ctx->lexer) - 1);
1374 add_summary_spec (sub, function, weighting, area, percentile, label,
1375 formatp, is_ctables_format, loc, sv);
1377 msg_location_destroy (loc);
1379 lex_match (ctx->lexer, T_COMMA);
1380 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1382 if (!lex_force_match (ctx->lexer, T_LBRACK))
1386 else if (lex_match (ctx->lexer, T_RBRACK))
1388 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1395 ctables_axis_destroy (sub);
1399 static const struct ctables_axis *
1400 find_scale (const struct ctables_axis *axis)
1404 else if (axis->op == CTAO_VAR)
1405 return axis->scale ? axis : NULL;
1408 for (size_t i = 0; i < 2; i++)
1410 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1418 static const struct ctables_axis *
1419 find_categorical_summary_spec (const struct ctables_axis *axis)
1423 else if (axis->op == CTAO_VAR)
1424 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1427 for (size_t i = 0; i < 2; i++)
1429 const struct ctables_axis *sum
1430 = find_categorical_summary_spec (axis->subs[i]);
1438 static struct ctables_axis *
1439 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1441 int start_ofs = lex_ofs (ctx->lexer);
1442 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1446 while (lex_match (ctx->lexer, T_GT))
1448 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1451 ctables_axis_destroy (lhs);
1455 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1456 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1458 const struct ctables_axis *outer_scale = find_scale (lhs);
1459 const struct ctables_axis *inner_scale = find_scale (rhs);
1460 if (outer_scale && inner_scale)
1462 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1463 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1464 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1465 ctables_axis_destroy (nest);
1469 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1472 msg_at (SE, nest->loc,
1473 _("Summaries may only be requested for categorical variables "
1474 "at the innermost nesting level."));
1475 msg_at (SN, outer_sum->loc,
1476 _("This outer categorical variable has a summary."));
1477 ctables_axis_destroy (nest);
1487 static struct ctables_axis *
1488 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1490 int start_ofs = lex_ofs (ctx->lexer);
1491 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1495 while (lex_match (ctx->lexer, T_PLUS))
1497 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1500 ctables_axis_destroy (lhs);
1504 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1505 ctx->lexer, start_ofs);
1512 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1513 struct ctables_axis **axisp)
1516 if (lex_token (lexer) == T_BY
1517 || lex_token (lexer) == T_SLASH
1518 || lex_token (lexer) == T_ENDCMD)
1521 struct ctables_axis_parse_ctx ctx = {
1525 *axisp = ctables_axis_parse_stack (&ctx);
1529 /* CTABLES categories. */
1531 struct ctables_categories
1534 struct ctables_category *cats;
1539 struct ctables_category
1541 enum ctables_category_type
1543 /* Explicit category lists. */
1546 CCT_NRANGE, /* Numerical range. */
1547 CCT_SRANGE, /* String range. */
1552 /* Totals and subtotals. */
1556 /* Implicit category lists. */
1561 /* For contributing to TOTALN. */
1562 CCT_EXCLUDED_MISSING,
1566 struct ctables_category *subtotal;
1572 double number; /* CCT_NUMBER. */
1573 struct substring string; /* CCT_STRING, in dictionary encoding. */
1574 double nrange[2]; /* CCT_NRANGE. */
1575 struct substring srange[2]; /* CCT_SRANGE. */
1579 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1580 bool hide_subcategories; /* CCT_SUBTOTAL. */
1583 /* CCT_POSTCOMPUTE. */
1586 const struct ctables_postcompute *pc;
1587 enum fmt_type parse_format;
1590 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1593 bool include_missing;
1594 bool sort_ascending;
1597 enum ctables_summary_function sort_function;
1598 enum ctables_weighting weighting;
1599 enum ctables_area_type area;
1600 struct variable *sort_var;
1605 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
1606 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
1607 struct msg_location *location;
1611 ctables_category_uninit (struct ctables_category *cat)
1616 msg_location_destroy (cat->location);
1623 case CCT_POSTCOMPUTE:
1627 ss_dealloc (&cat->string);
1631 ss_dealloc (&cat->srange[0]);
1632 ss_dealloc (&cat->srange[1]);
1637 free (cat->total_label);
1645 case CCT_EXCLUDED_MISSING:
1651 nullable_substring_equal (const struct substring *a,
1652 const struct substring *b)
1654 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1658 ctables_category_equal (const struct ctables_category *a,
1659 const struct ctables_category *b)
1661 if (a->type != b->type)
1667 return a->number == b->number;
1670 return ss_equals (a->string, b->string);
1673 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1676 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1677 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1683 case CCT_POSTCOMPUTE:
1684 return a->pc == b->pc;
1688 return !strcmp (a->total_label, b->total_label);
1693 return (a->include_missing == b->include_missing
1694 && a->sort_ascending == b->sort_ascending
1695 && a->sort_function == b->sort_function
1696 && a->sort_var == b->sort_var
1697 && a->percentile == b->percentile);
1699 case CCT_EXCLUDED_MISSING:
1707 ctables_categories_unref (struct ctables_categories *c)
1712 assert (c->n_refs > 0);
1716 for (size_t i = 0; i < c->n_cats; i++)
1717 ctables_category_uninit (&c->cats[i]);
1723 ctables_categories_equal (const struct ctables_categories *a,
1724 const struct ctables_categories *b)
1726 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
1729 for (size_t i = 0; i < a->n_cats; i++)
1730 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1736 static struct ctables_category
1737 cct_nrange (double low, double high)
1739 return (struct ctables_category) {
1741 .nrange = { low, high }
1745 static struct ctables_category
1746 cct_srange (struct substring low, struct substring high)
1748 return (struct ctables_category) {
1750 .srange = { low, high }
1755 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1756 struct ctables_category *cat)
1759 if (lex_match (lexer, T_EQUALS))
1761 if (!lex_force_string (lexer))
1764 total_label = ss_xstrdup (lex_tokss (lexer));
1768 total_label = xstrdup (_("Subtotal"));
1770 *cat = (struct ctables_category) {
1771 .type = CCT_SUBTOTAL,
1772 .hide_subcategories = hide_subcategories,
1773 .total_label = total_label
1779 ctables_table_parse_explicit_category (struct lexer *lexer,
1780 struct dictionary *dict,
1782 struct ctables_category *cat)
1784 if (lex_match_id (lexer, "OTHERNM"))
1785 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1786 else if (lex_match_id (lexer, "MISSING"))
1787 *cat = (struct ctables_category) { .type = CCT_MISSING };
1788 else if (lex_match_id (lexer, "SUBTOTAL"))
1789 return ctables_table_parse_subtotal (lexer, false, cat);
1790 else if (lex_match_id (lexer, "HSUBTOTAL"))
1791 return ctables_table_parse_subtotal (lexer, true, cat);
1792 else if (lex_match_id (lexer, "LO"))
1794 if (!lex_force_match_id (lexer, "THRU"))
1796 if (lex_is_string (lexer))
1798 struct substring sr0 = { .string = NULL };
1799 struct substring sr1 = parse_substring (lexer, dict);
1800 *cat = cct_srange (sr0, sr1);
1802 else if (lex_force_num (lexer))
1804 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1810 else if (lex_is_number (lexer))
1812 double number = lex_number (lexer);
1814 if (lex_match_id (lexer, "THRU"))
1816 if (lex_match_id (lexer, "HI"))
1817 *cat = cct_nrange (number, DBL_MAX);
1820 if (!lex_force_num (lexer))
1822 *cat = cct_nrange (number, lex_number (lexer));
1827 *cat = (struct ctables_category) {
1832 else if (lex_is_string (lexer))
1834 struct substring s = parse_substring (lexer, dict);
1835 if (lex_match_id (lexer, "THRU"))
1837 if (lex_match_id (lexer, "HI"))
1839 struct substring sr1 = { .string = NULL };
1840 *cat = cct_srange (s, sr1);
1844 if (!lex_force_string (lexer))
1849 struct substring sr1 = parse_substring (lexer, dict);
1850 *cat = cct_srange (s, sr1);
1854 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1856 else if (lex_match (lexer, T_AND))
1858 if (!lex_force_id (lexer))
1860 struct ctables_postcompute *pc = ctables_find_postcompute (
1861 ct, lex_tokcstr (lexer));
1864 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1865 msg_at (SE, loc, _("Unknown postcompute &%s."),
1866 lex_tokcstr (lexer));
1867 msg_location_destroy (loc);
1872 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1876 lex_error (lexer, NULL);
1884 parse_category_string (struct msg_location *location,
1885 struct substring s, const struct dictionary *dict,
1886 enum fmt_type format, double *n)
1889 char *error = data_in (s, dict_get_encoding (dict), format,
1890 settings_get_fmt_settings (), &v, 0, NULL);
1893 msg_at (SE, location,
1894 _("Failed to parse category specification as format %s: %s."),
1895 fmt_name (format), error);
1904 static struct ctables_category *
1905 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1906 const struct ctables_pcexpr *e)
1908 struct ctables_category *best = NULL;
1909 size_t n_subtotals = 0;
1910 for (size_t i = 0; i < cats->n_cats; i++)
1912 struct ctables_category *cat = &cats->cats[i];
1915 case CTPO_CAT_NUMBER:
1916 if (cat->type == CCT_NUMBER && cat->number == e->number)
1920 case CTPO_CAT_STRING:
1921 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1925 case CTPO_CAT_NRANGE:
1926 if (cat->type == CCT_NRANGE
1927 && cat->nrange[0] == e->nrange[0]
1928 && cat->nrange[1] == e->nrange[1])
1932 case CTPO_CAT_SRANGE:
1933 if (cat->type == CCT_SRANGE
1934 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1935 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1939 case CTPO_CAT_MISSING:
1940 if (cat->type == CCT_MISSING)
1944 case CTPO_CAT_OTHERNM:
1945 if (cat->type == CCT_OTHERNM)
1949 case CTPO_CAT_SUBTOTAL:
1950 if (cat->type == CCT_SUBTOTAL)
1953 if (e->subtotal_index == n_subtotals)
1955 else if (e->subtotal_index == 0)
1960 case CTPO_CAT_TOTAL:
1961 if (cat->type == CCT_TOTAL)
1975 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1980 static struct ctables_category *
1981 ctables_find_category_for_postcompute (const struct dictionary *dict,
1982 const struct ctables_categories *cats,
1983 enum fmt_type parse_format,
1984 const struct ctables_pcexpr *e)
1986 if (parse_format != FMT_F)
1988 if (e->op == CTPO_CAT_STRING)
1991 if (!parse_category_string (e->location, e->string, dict,
1992 parse_format, &number))
1995 struct ctables_pcexpr e2 = {
1996 .op = CTPO_CAT_NUMBER,
1998 .location = e->location,
2000 return ctables_find_category_for_postcompute__ (cats, &e2);
2002 else if (e->op == CTPO_CAT_SRANGE)
2005 if (!e->srange[0].string)
2006 nrange[0] = -DBL_MAX;
2007 else if (!parse_category_string (e->location, e->srange[0], dict,
2008 parse_format, &nrange[0]))
2011 if (!e->srange[1].string)
2012 nrange[1] = DBL_MAX;
2013 else if (!parse_category_string (e->location, e->srange[1], dict,
2014 parse_format, &nrange[1]))
2017 struct ctables_pcexpr e2 = {
2018 .op = CTPO_CAT_NRANGE,
2019 .nrange = { nrange[0], nrange[1] },
2020 .location = e->location,
2022 return ctables_find_category_for_postcompute__ (cats, &e2);
2025 return ctables_find_category_for_postcompute__ (cats, e);
2028 static struct substring
2029 rtrim_value (const union value *v, const struct variable *var)
2031 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2032 var_get_width (var));
2033 ss_rtrim (&s, ss_cstr (" "));
2038 in_string_range (const union value *v, const struct variable *var,
2039 const struct substring *srange)
2041 struct substring s = rtrim_value (v, var);
2042 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2043 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2046 static const struct ctables_category *
2047 ctables_categories_match (const struct ctables_categories *c,
2048 const union value *v, const struct variable *var)
2050 if (var_is_numeric (var) && v->f == SYSMIS)
2053 const struct ctables_category *othernm = NULL;
2054 for (size_t i = c->n_cats; i-- > 0; )
2056 const struct ctables_category *cat = &c->cats[i];
2060 if (cat->number == v->f)
2065 if (ss_equals (cat->string, rtrim_value (v, var)))
2070 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2071 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2076 if (in_string_range (v, var, cat->srange))
2081 if (var_is_value_missing (var, v))
2085 case CCT_POSTCOMPUTE:
2100 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2103 case CCT_EXCLUDED_MISSING:
2108 return var_is_value_missing (var, v) ? NULL : othernm;
2111 static const struct ctables_category *
2112 ctables_categories_total (const struct ctables_categories *c)
2114 const struct ctables_category *first = &c->cats[0];
2115 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2116 return (first->type == CCT_TOTAL ? first
2117 : last->type == CCT_TOTAL ? last
2122 ctables_category_format_number (double number, const struct variable *var,
2125 struct pivot_value *pv = pivot_value_new_var_value (
2126 var, &(union value) { .f = number });
2127 pivot_value_format (pv, NULL, s);
2128 pivot_value_destroy (pv);
2132 ctables_category_format_string (struct substring string,
2133 const struct variable *var, struct string *out)
2135 int width = var_get_width (var);
2136 char *s = xmalloc (width);
2137 buf_copy_rpad (s, width, string.string, string.length, ' ');
2138 struct pivot_value *pv = pivot_value_new_var_value (
2139 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
2140 pivot_value_format (pv, NULL, out);
2141 pivot_value_destroy (pv);
2146 ctables_category_format_label (const struct ctables_category *cat,
2147 const struct variable *var,
2153 ctables_category_format_number (cat->number, var, s);
2157 ctables_category_format_string (cat->string, var, s);
2161 ctables_category_format_number (cat->nrange[0], var, s);
2162 ds_put_format (s, " THRU ");
2163 ctables_category_format_number (cat->nrange[1], var, s);
2167 ctables_category_format_string (cat->srange[0], var, s);
2168 ds_put_format (s, " THRU ");
2169 ctables_category_format_string (cat->srange[1], var, s);
2173 ds_put_cstr (s, "MISSING");
2177 ds_put_cstr (s, "OTHERNM");
2180 case CCT_POSTCOMPUTE:
2181 ds_put_format (s, "&%s", cat->pc->name);
2186 ds_put_cstr (s, cat->total_label);
2192 case CCT_EXCLUDED_MISSING:
2200 ctables_recursive_check_postcompute (struct dictionary *dict,
2201 const struct ctables_pcexpr *e,
2202 struct ctables_category *pc_cat,
2203 const struct ctables_categories *cats,
2204 const struct msg_location *cats_location)
2208 case CTPO_CAT_NUMBER:
2209 case CTPO_CAT_STRING:
2210 case CTPO_CAT_NRANGE:
2211 case CTPO_CAT_SRANGE:
2212 case CTPO_CAT_MISSING:
2213 case CTPO_CAT_OTHERNM:
2214 case CTPO_CAT_SUBTOTAL:
2215 case CTPO_CAT_TOTAL:
2217 struct ctables_category *cat = ctables_find_category_for_postcompute (
2218 dict, cats, pc_cat->parse_format, e);
2221 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2223 size_t n_subtotals = 0;
2224 for (size_t i = 0; i < cats->n_cats; i++)
2225 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2226 if (n_subtotals > 1)
2228 msg_at (SE, cats_location,
2229 ngettext ("These categories include %zu instance "
2230 "of SUBTOTAL or HSUBTOTAL, so references "
2231 "from computed categories must refer to "
2232 "subtotals by position, "
2233 "e.g. SUBTOTAL[1].",
2234 "These categories include %zu instances "
2235 "of SUBTOTAL or HSUBTOTAL, so references "
2236 "from computed categories must refer to "
2237 "subtotals by position, "
2238 "e.g. SUBTOTAL[1].",
2241 msg_at (SN, e->location,
2242 _("This is the reference that lacks a position."));
2247 msg_at (SE, pc_cat->location,
2248 _("Computed category &%s references a category not included "
2249 "in the category list."),
2251 msg_at (SN, e->location, _("This is the missing category."));
2252 if (e->op == CTPO_CAT_SUBTOTAL)
2253 msg_at (SN, cats_location,
2254 _("To fix the problem, add subtotals to the "
2255 "list of categories here."));
2256 else if (e->op == CTPO_CAT_TOTAL)
2257 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2258 "CATEGORIES specification."));
2260 msg_at (SN, cats_location,
2261 _("To fix the problem, add the missing category to the "
2262 "list of categories here."));
2265 if (pc_cat->pc->hide_source_cats)
2279 for (size_t i = 0; i < 2; i++)
2280 if (e->subs[i] && !ctables_recursive_check_postcompute (
2281 dict, e->subs[i], pc_cat, cats, cats_location))
2289 static struct pivot_value *
2290 ctables_postcompute_label (const struct ctables_categories *cats,
2291 const struct ctables_category *cat,
2292 const struct variable *var)
2294 struct substring in = ss_cstr (cat->pc->label);
2295 struct substring target = ss_cstr (")LABEL[");
2297 struct string out = DS_EMPTY_INITIALIZER;
2300 size_t chunk = ss_find_substring (in, target);
2301 if (chunk == SIZE_MAX)
2303 if (ds_is_empty (&out))
2304 return pivot_value_new_user_text (in.string, in.length);
2307 ds_put_substring (&out, in);
2308 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
2312 ds_put_substring (&out, ss_head (in, chunk));
2313 ss_advance (&in, chunk + target.length);
2315 struct substring idx_s;
2316 if (!ss_get_until (&in, ']', &idx_s))
2319 long int idx = strtol (idx_s.string, &tail, 10);
2320 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
2323 struct ctables_category *cat2 = &cats->cats[idx - 1];
2324 if (!ctables_category_format_label (cat2, var, &out))
2330 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
2333 static struct pivot_value *
2334 ctables_category_create_value_label (const struct ctables_categories *cats,
2335 const struct ctables_category *cat,
2336 const struct variable *var,
2337 const union value *value)
2339 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
2340 ? ctables_postcompute_label (cats, cat, var)
2341 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
2342 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
2343 : pivot_value_new_var_value (var, value));
2346 /* CTABLES variable nesting and stacking. */
2348 /* A nested sequence of variables, e.g. a > b > c. */
2351 struct variable **vars;
2355 size_t *areas[N_CTATS];
2356 size_t n_areas[N_CTATS];
2359 struct ctables_summary_spec_set specs[N_CSVS];
2362 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2363 struct ctables_stack
2365 struct ctables_nest *nests;
2370 ctables_nest_uninit (struct ctables_nest *nest)
2373 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2374 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2375 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2376 free (nest->areas[at]);
2380 ctables_stack_uninit (struct ctables_stack *stack)
2384 for (size_t i = 0; i < stack->n; i++)
2385 ctables_nest_uninit (&stack->nests[i]);
2386 free (stack->nests);
2390 static struct ctables_stack
2391 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2398 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2399 for (size_t i = 0; i < s0.n; i++)
2400 for (size_t j = 0; j < s1.n; j++)
2402 const struct ctables_nest *a = &s0.nests[i];
2403 const struct ctables_nest *b = &s1.nests[j];
2405 size_t allocate = a->n + b->n;
2406 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2408 for (size_t k = 0; k < a->n; k++)
2409 vars[n++] = a->vars[k];
2410 for (size_t k = 0; k < b->n; k++)
2411 vars[n++] = b->vars[k];
2412 assert (n == allocate);
2414 const struct ctables_nest *summary_src;
2415 if (!a->specs[CSV_CELL].var)
2417 else if (!b->specs[CSV_CELL].var)
2422 struct ctables_nest *new = &stack.nests[stack.n++];
2423 *new = (struct ctables_nest) {
2425 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2426 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2428 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2429 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2433 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2434 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2436 ctables_stack_uninit (&s0);
2437 ctables_stack_uninit (&s1);
2441 static struct ctables_stack
2442 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2444 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2445 for (size_t i = 0; i < s0.n; i++)
2446 stack.nests[stack.n++] = s0.nests[i];
2447 for (size_t i = 0; i < s1.n; i++)
2449 stack.nests[stack.n] = s1.nests[i];
2450 stack.nests[stack.n].group_head += s0.n;
2453 assert (stack.n == s0.n + s1.n);
2459 static struct ctables_stack
2460 var_fts (const struct ctables_axis *a)
2462 struct variable **vars = xmalloc (sizeof *vars);
2465 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2466 struct ctables_nest *nest = xmalloc (sizeof *nest);
2467 *nest = (struct ctables_nest) {
2470 .scale_idx = a->scale ? 0 : SIZE_MAX,
2471 .summary_idx = is_summary ? 0 : SIZE_MAX,
2474 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2476 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2477 nest->specs[sv].var = a->var;
2478 nest->specs[sv].is_scale = a->scale;
2480 return (struct ctables_stack) { .nests = nest, .n = 1 };
2483 static struct ctables_stack
2484 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2487 return (struct ctables_stack) { .n = 0 };
2495 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2496 enumerate_fts (axis_type, a->subs[1]));
2499 /* This should consider any of the scale variables found in the result to
2500 be linked to each other listwise for SMISSING=LISTWISE. */
2501 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2502 enumerate_fts (axis_type, a->subs[1]));
2508 /* CTABLES summary calculation. */
2510 union ctables_summary
2512 /* COUNT, VALIDN, TOTALN. */
2515 /* MINIMUM, MAXIMUM, RANGE. */
2522 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2523 struct moments1 *moments;
2525 /* MEDIAN, MODE, PTILE. */
2528 struct casewriter *writer;
2535 ctables_summary_init (union ctables_summary *s,
2536 const struct ctables_summary_spec *ss)
2538 switch (ss->function)
2541 case CTSF_areaPCT_COUNT:
2542 case CTSF_areaPCT_VALIDN:
2543 case CTSF_areaPCT_TOTALN:
2556 s->min = s->max = SYSMIS;
2561 case CTSF_areaPCT_SUM:
2562 s->moments = moments1_create (MOMENT_MEAN);
2568 s->moments = moments1_create (MOMENT_VARIANCE);
2575 struct caseproto *proto = caseproto_create ();
2576 proto = caseproto_add_width (proto, 0);
2577 proto = caseproto_add_width (proto, 0);
2579 struct subcase ordering;
2580 subcase_init (&ordering, 0, 0, SC_ASCEND);
2581 s->writer = sort_create_writer (&ordering, proto);
2582 subcase_uninit (&ordering);
2583 caseproto_unref (proto);
2593 ctables_summary_uninit (union ctables_summary *s,
2594 const struct ctables_summary_spec *ss)
2596 switch (ss->function)
2599 case CTSF_areaPCT_COUNT:
2600 case CTSF_areaPCT_VALIDN:
2601 case CTSF_areaPCT_TOTALN:
2620 case CTSF_areaPCT_SUM:
2621 moments1_destroy (s->moments);
2627 casewriter_destroy (s->writer);
2633 ctables_summary_add (union ctables_summary *s,
2634 const struct ctables_summary_spec *ss,
2635 const union value *value,
2636 bool is_missing, bool is_included,
2639 /* To determine whether a case is included in a given table for a particular
2640 kind of summary, consider the following charts for the variable being
2641 summarized. Only if "yes" appears is the case counted.
2643 Categorical variables: VALIDN other TOTALN
2644 Valid values in included categories yes yes yes
2645 Missing values in included categories --- yes yes
2646 Missing values in excluded categories --- --- yes
2647 Valid values in excluded categories --- --- ---
2649 Scale variables: VALIDN other TOTALN
2650 Valid value yes yes yes
2651 Missing value --- yes yes
2653 Missing values include both user- and system-missing. (The system-missing
2654 value is always in an excluded category.)
2656 One way to interpret the above table is that scale variables are like
2657 categorical variables in which all values are in included categories.
2659 switch (ss->function)
2662 case CTSF_areaPCT_TOTALN:
2667 case CTSF_areaPCT_COUNT:
2673 case CTSF_areaPCT_VALIDN:
2691 if (s->min == SYSMIS || value->f < s->min)
2693 if (s->max == SYSMIS || value->f > s->max)
2704 moments1_add (s->moments, value->f, weight);
2707 case CTSF_areaPCT_SUM:
2709 moments1_add (s->moments, value->f, weight);
2717 s->ovalid += weight;
2719 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2720 *case_num_rw_idx (c, 0) = value->f;
2721 *case_num_rw_idx (c, 1) = weight;
2722 casewriter_write (s->writer, c);
2729 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2730 union ctables_summary *s,
2731 const struct ctables_summary_spec *ss)
2733 switch (ss->function)
2739 return areas[ss->calc_area]->sequence;
2741 case CTSF_areaPCT_COUNT:
2743 const struct ctables_area *a = areas[ss->calc_area];
2744 double a_count = a->count[ss->weighting];
2745 return a_count ? s->count / a_count * 100 : SYSMIS;
2748 case CTSF_areaPCT_VALIDN:
2750 const struct ctables_area *a = areas[ss->calc_area];
2751 double a_valid = a->valid[ss->weighting];
2752 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2755 case CTSF_areaPCT_TOTALN:
2757 const struct ctables_area *a = areas[ss->calc_area];
2758 double a_total = a->total[ss->weighting];
2759 return a_total ? s->count / a_total * 100 : SYSMIS;
2774 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2779 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2785 double weight, variance;
2786 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2787 return calc_semean (variance, weight);
2793 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2794 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2799 double weight, mean;
2800 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2801 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2807 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2811 case CTSF_areaPCT_SUM:
2813 double weight, mean;
2814 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2815 if (weight == SYSMIS || mean == SYSMIS)
2818 const struct ctables_area *a = areas[ss->calc_area];
2819 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2820 double denom = sum->sum[ss->weighting];
2821 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2828 struct casereader *reader = casewriter_make_reader (s->writer);
2831 struct percentile *ptile = percentile_create (
2832 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2833 struct order_stats *os = &ptile->parent;
2834 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2835 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2836 statistic_destroy (&ptile->parent.parent);
2843 struct casereader *reader = casewriter_make_reader (s->writer);
2846 struct mode *mode = mode_create ();
2847 struct order_stats *os = &mode->parent;
2848 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2849 s->ovalue = mode->mode;
2850 statistic_destroy (&mode->parent.parent);
2858 /* CTABLES occurrences. */
2860 struct ctables_occurrence
2862 struct hmap_node node;
2867 ctables_add_occurrence (const struct variable *var,
2868 const union value *value,
2869 struct hmap *occurrences)
2871 int width = var_get_width (var);
2872 unsigned int hash = value_hash (value, width, 0);
2874 struct ctables_occurrence *o;
2875 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
2877 if (value_equal (value, &o->value, width))
2880 o = xmalloc (sizeof *o);
2881 value_clone (&o->value, value, width);
2882 hmap_insert (occurrences, &o->node, hash);
2887 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2888 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2889 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2890 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2895 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2896 all the axes (except the scalar variable, if any). */
2897 struct hmap_node node;
2899 /* The areas that contain this cell. */
2900 uint32_t omit_areas;
2901 struct ctables_area *areas[N_CTATS];
2906 enum ctables_summary_variant sv;
2908 struct ctables_cell_axis
2910 struct ctables_cell_value
2912 const struct ctables_category *category;
2920 union ctables_summary *summaries;
2923 struct ctables_section
2926 struct ctables_table *table;
2927 struct ctables_nest *nests[PIVOT_N_AXES];
2930 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2931 struct hmap cells; /* Contains "struct ctables_cell"s. */
2932 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2935 static void ctables_section_uninit (struct ctables_section *);
2937 struct ctables_table
2939 struct ctables *ctables;
2940 struct ctables_axis *axes[PIVOT_N_AXES];
2941 struct ctables_stack stacks[PIVOT_N_AXES];
2942 struct ctables_section *sections;
2944 enum pivot_axis_type summary_axis;
2945 struct ctables_summary_spec_set summary_specs;
2946 struct variable **sum_vars;
2949 enum pivot_axis_type slabels_axis;
2950 bool slabels_visible;
2952 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2954 Most commonly, label_axis[a] == a, and in particular we always have
2955 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2957 If ROWLABELS or COLLABELS is specified, then one of
2958 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2959 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2961 If any category labels are moved, then 'clabels_example' is one of the
2962 variables being moved (and it is otherwise NULL). All of the variables
2963 being moved have the same width, value labels, and categories, so this
2964 example variable can be used to find those out.
2966 The remaining members in this group are relevant only if category labels
2969 'clabels_values_map' holds a "struct ctables_value" for all the values
2970 that appear in all of the variables in the moved categories. It is
2971 accumulated as the data is read. Once the data is fully read, its
2972 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2974 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2975 enum pivot_axis_type clabels_from_axis;
2976 enum pivot_axis_type clabels_to_axis;
2977 const struct variable *clabels_example;
2978 struct hmap clabels_values_map;
2979 struct ctables_value **clabels_values;
2980 size_t n_clabels_values;
2982 /* Indexed by variable dictionary index. */
2983 struct ctables_categories **categories;
2984 size_t n_categories;
2992 struct ctables_chisq *chisq;
2993 struct ctables_pairwise *pairwise;
2996 struct ctables_cell_sort_aux
2998 const struct ctables_nest *nest;
2999 enum pivot_axis_type a;
3003 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3005 const struct ctables_cell_sort_aux *aux = aux_;
3006 struct ctables_cell *const *ap = a_;
3007 struct ctables_cell *const *bp = b_;
3008 const struct ctables_cell *a = *ap;
3009 const struct ctables_cell *b = *bp;
3011 const struct ctables_nest *nest = aux->nest;
3012 for (size_t i = 0; i < nest->n; i++)
3013 if (i != nest->scale_idx)
3015 const struct variable *var = nest->vars[i];
3016 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3017 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3018 if (a_cv->category != b_cv->category)
3019 return a_cv->category > b_cv->category ? 1 : -1;
3021 const union value *a_val = &a_cv->value;
3022 const union value *b_val = &b_cv->value;
3023 switch (a_cv->category->type)
3029 case CCT_POSTCOMPUTE:
3030 case CCT_EXCLUDED_MISSING:
3031 /* Must be equal. */
3039 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3047 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3049 return a_cv->category->sort_ascending ? cmp : -cmp;
3055 const char *a_label = var_lookup_value_label (var, a_val);
3056 const char *b_label = var_lookup_value_label (var, b_val);
3062 cmp = strcmp (a_label, b_label);
3068 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3071 return a_cv->category->sort_ascending ? cmp : -cmp;
3083 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3084 const void *aux UNUSED)
3086 struct ctables_cell *const *ap = a_;
3087 struct ctables_cell *const *bp = b_;
3088 const struct ctables_cell *a = *ap;
3089 const struct ctables_cell *b = *bp;
3091 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3093 int al = a->axes[axis].leaf;
3094 int bl = b->axes[axis].leaf;
3096 return al > bl ? 1 : -1;
3101 static struct ctables_area *
3102 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3103 enum ctables_area_type area)
3106 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3108 const struct ctables_nest *nest = s->nests[a];
3109 for (size_t i = 0; i < nest->n_areas[area]; i++)
3111 size_t v_idx = nest->areas[area][i];
3112 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3113 hash = hash_pointer (cv->category, hash);
3114 if (cv->category->type != CCT_TOTAL
3115 && cv->category->type != CCT_SUBTOTAL
3116 && cv->category->type != CCT_POSTCOMPUTE)
3117 hash = value_hash (&cv->value,
3118 var_get_width (nest->vars[v_idx]), hash);
3122 struct ctables_area *a;
3123 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3125 const struct ctables_cell *df = a->example;
3126 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3128 const struct ctables_nest *nest = s->nests[a];
3129 for (size_t i = 0; i < nest->n_areas[area]; i++)
3131 size_t v_idx = nest->areas[area][i];
3132 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3133 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3134 if (cv1->category != cv2->category
3135 || (cv1->category->type != CCT_TOTAL
3136 && cv1->category->type != CCT_SUBTOTAL
3137 && cv1->category->type != CCT_POSTCOMPUTE
3138 && !value_equal (&cv1->value, &cv2->value,
3139 var_get_width (nest->vars[v_idx]))))
3148 struct ctables_sum *sums = (s->table->n_sum_vars
3149 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3152 a = xmalloc (sizeof *a);
3153 *a = (struct ctables_area) { .example = cell, .sums = sums };
3154 hmap_insert (&s->areas[area], &a->node, hash);
3158 static struct ctables_cell *
3159 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3160 const struct ctables_category **cats[PIVOT_N_AXES])
3163 enum ctables_summary_variant sv = CSV_CELL;
3164 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3166 const struct ctables_nest *nest = s->nests[a];
3167 for (size_t i = 0; i < nest->n; i++)
3168 if (i != nest->scale_idx)
3170 hash = hash_pointer (cats[a][i], hash);
3171 if (cats[a][i]->type != CCT_TOTAL
3172 && cats[a][i]->type != CCT_SUBTOTAL
3173 && cats[a][i]->type != CCT_POSTCOMPUTE)
3174 hash = value_hash (case_data (c, nest->vars[i]),
3175 var_get_width (nest->vars[i]), hash);
3181 struct ctables_cell *cell;
3182 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3184 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3186 const struct ctables_nest *nest = s->nests[a];
3187 for (size_t i = 0; i < nest->n; i++)
3188 if (i != nest->scale_idx
3189 && (cats[a][i] != cell->axes[a].cvs[i].category
3190 || (cats[a][i]->type != CCT_TOTAL
3191 && cats[a][i]->type != CCT_SUBTOTAL
3192 && cats[a][i]->type != CCT_POSTCOMPUTE
3193 && !value_equal (case_data (c, nest->vars[i]),
3194 &cell->axes[a].cvs[i].value,
3195 var_get_width (nest->vars[i])))))
3204 cell = xmalloc (sizeof *cell);
3207 cell->omit_areas = 0;
3208 cell->postcompute = false;
3209 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3211 const struct ctables_nest *nest = s->nests[a];
3212 cell->axes[a].cvs = (nest->n
3213 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3215 for (size_t i = 0; i < nest->n; i++)
3217 const struct ctables_category *cat = cats[a][i];
3218 const struct variable *var = nest->vars[i];
3219 const union value *value = case_data (c, var);
3220 if (i != nest->scale_idx)
3222 const struct ctables_category *subtotal = cat->subtotal;
3223 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3226 if (cat->type == CCT_TOTAL
3227 || cat->type == CCT_SUBTOTAL
3228 || cat->type == CCT_POSTCOMPUTE)
3232 case PIVOT_AXIS_COLUMN:
3233 cell->omit_areas |= ((1u << CTAT_TABLE) |
3234 (1u << CTAT_LAYER) |
3235 (1u << CTAT_LAYERCOL) |
3236 (1u << CTAT_SUBTABLE) |
3239 case PIVOT_AXIS_ROW:
3240 cell->omit_areas |= ((1u << CTAT_TABLE) |
3241 (1u << CTAT_LAYER) |
3242 (1u << CTAT_LAYERROW) |
3243 (1u << CTAT_SUBTABLE) |
3246 case PIVOT_AXIS_LAYER:
3247 cell->omit_areas |= ((1u << CTAT_TABLE) |
3248 (1u << CTAT_LAYER));
3252 if (cat->type == CCT_POSTCOMPUTE)
3253 cell->postcompute = true;
3256 cell->axes[a].cvs[i].category = cat;
3257 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3261 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3262 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3263 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3264 for (size_t i = 0; i < specs->n; i++)
3265 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3266 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3267 cell->areas[at] = ctables_area_insert (s, cell, at);
3268 hmap_insert (&s->cells, &cell->node, hash);
3273 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3275 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3280 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3281 const struct ctables_category **cats[PIVOT_N_AXES],
3282 bool is_included, double weight[N_CTWS])
3284 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3285 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3287 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3288 const union value *value = case_data (c, specs->var);
3289 bool is_missing = var_is_value_missing (specs->var, value);
3290 bool is_scale_missing
3291 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3293 for (size_t i = 0; i < specs->n; i++)
3294 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3295 is_scale_missing, is_included,
3296 weight[specs->specs[i].weighting]);
3297 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3298 if (!(cell->omit_areas && (1u << at)))
3300 struct ctables_area *a = cell->areas[at];
3302 add_weight (a->total, weight);
3304 add_weight (a->count, weight);
3307 add_weight (a->valid, weight);
3309 if (!is_scale_missing)
3310 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3312 const struct variable *var = s->table->sum_vars[i];
3313 double addend = case_num (c, var);
3314 if (!var_is_num_missing (var, addend))
3315 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3316 a->sums[i].sum[wt] += addend * weight[wt];
3323 recurse_totals (struct ctables_section *s, const struct ccase *c,
3324 const struct ctables_category **cats[PIVOT_N_AXES],
3325 bool is_included, double weight[N_CTWS],
3326 enum pivot_axis_type start_axis, size_t start_nest)
3328 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3330 const struct ctables_nest *nest = s->nests[a];
3331 for (size_t i = start_nest; i < nest->n; i++)
3333 if (i == nest->scale_idx)
3336 const struct variable *var = nest->vars[i];
3338 const struct ctables_category *total = ctables_categories_total (
3339 s->table->categories[var_get_dict_index (var)]);
3342 const struct ctables_category *save = cats[a][i];
3344 ctables_cell_add__ (s, c, cats, is_included, weight);
3345 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3354 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3355 const struct ctables_category **cats[PIVOT_N_AXES],
3356 bool is_included, double weight[N_CTWS],
3357 enum pivot_axis_type start_axis, size_t start_nest)
3359 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3361 const struct ctables_nest *nest = s->nests[a];
3362 for (size_t i = start_nest; i < nest->n; i++)
3364 if (i == nest->scale_idx)
3367 const struct ctables_category *save = cats[a][i];
3370 cats[a][i] = save->subtotal;
3371 ctables_cell_add__ (s, c, cats, is_included, weight);
3372 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3381 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3382 double weight[N_CTWS])
3384 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3385 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3386 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3387 const struct ctables_category **cats[PIVOT_N_AXES] =
3389 [PIVOT_AXIS_LAYER] = layer_cats,
3390 [PIVOT_AXIS_ROW] = row_cats,
3391 [PIVOT_AXIS_COLUMN] = column_cats,
3394 bool is_included = true;
3396 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3398 const struct ctables_nest *nest = s->nests[a];
3399 for (size_t i = 0; i < nest->n; i++)
3400 if (i != nest->scale_idx)
3402 const struct variable *var = nest->vars[i];
3403 const union value *value = case_data (c, var);
3405 cats[a][i] = ctables_categories_match (
3406 s->table->categories[var_get_dict_index (var)], value, var);
3409 if (i != nest->summary_idx)
3412 if (!var_is_value_missing (var, value))
3415 static const struct ctables_category cct_excluded_missing = {
3416 .type = CCT_EXCLUDED_MISSING,
3419 cats[a][i] = &cct_excluded_missing;
3420 is_included = false;
3426 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3428 const struct ctables_nest *nest = s->nests[a];
3429 for (size_t i = 0; i < nest->n; i++)
3430 if (i != nest->scale_idx)
3432 const struct variable *var = nest->vars[i];
3433 const union value *value = case_data (c, var);
3434 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3438 ctables_cell_add__ (s, c, cats, is_included, weight);
3439 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3440 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3443 struct ctables_value
3445 struct hmap_node node;
3450 static struct ctables_value *
3451 ctables_value_find__ (struct ctables_table *t, const union value *value,
3452 int width, unsigned int hash)
3454 struct ctables_value *clv;
3455 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3456 hash, &t->clabels_values_map)
3457 if (value_equal (value, &clv->value, width))
3463 ctables_value_insert (struct ctables_table *t, const union value *value,
3466 unsigned int hash = value_hash (value, width, 0);
3467 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3470 clv = xmalloc (sizeof *clv);
3471 value_clone (&clv->value, value, width);
3472 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3476 static struct ctables_value *
3477 ctables_value_find (struct ctables_table *t,
3478 const union value *value, int width)
3480 return ctables_value_find__ (t, value, width,
3481 value_hash (value, width, 0));
3485 compare_ctables_values_3way (const void *a_, const void *b_, const void *width_)
3487 const struct ctables_value *const *ap = a_;
3488 const struct ctables_value *const *bp = b_;
3489 const struct ctables_value *a = *ap;
3490 const struct ctables_value *b = *bp;
3491 const int *width = width_;
3492 return value_compare_3way (&a->value, &b->value, *width);
3496 ctables_sort_clabels_values (struct ctables_table *t)
3498 const struct variable *v0 = t->clabels_example;
3499 int width = var_get_width (v0);
3501 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3504 const struct val_labs *val_labs = var_get_value_labels (v0);
3505 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
3506 vl = val_labs_next (val_labs, vl))
3507 if (ctables_categories_match (c0, &vl->value, v0))
3508 ctables_value_insert (t, &vl->value, width);
3511 size_t n = hmap_count (&t->clabels_values_map);
3512 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
3514 struct ctables_value *clv;
3516 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
3517 t->clabels_values[i++] = clv;
3518 t->n_clabels_values = n;
3521 sort (t->clabels_values, n, sizeof *t->clabels_values,
3522 compare_ctables_values_3way, &width);
3524 for (size_t i = 0; i < n; i++)
3525 t->clabels_values[i]->leaf = i;
3530 const struct dictionary *dict;
3531 struct pivot_table_look *look;
3533 /* For CTEF_* formats. */
3534 struct fmt_settings ctables_formats;
3536 /* If this is NULL, zeros are displayed using the normal print format.
3537 Otherwise, this string is displayed. */
3540 /* If this is NULL, missing values are displayed using the normal print
3541 format. Otherwise, this string is displayed. */
3544 /* Indexed by variable dictionary index. */
3545 enum ctables_vlabel *vlabels;
3547 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
3549 bool mrsets_count_duplicates; /* MRSETS. */
3550 bool smissing_listwise; /* SMISSING. */
3551 struct variable *e_weight; /* WEIGHT. */
3552 int hide_threshold; /* HIDESMALLCOUNTS. */
3554 struct ctables_table **tables;
3559 ctpo_add (double a, double b)
3565 ctpo_sub (double a, double b)
3571 ctpo_mul (double a, double b)
3577 ctpo_div (double a, double b)
3579 return b ? a / b : SYSMIS;
3583 ctpo_pow (double a, double b)
3585 int save_errno = errno;
3587 double result = pow (a, b);
3595 ctpo_neg (double a, double b UNUSED)
3600 struct ctables_pcexpr_evaluate_ctx
3602 const struct ctables_cell *cell;
3603 const struct ctables_section *section;
3604 const struct ctables_categories *cats;
3605 enum pivot_axis_type pc_a;
3608 enum fmt_type parse_format;
3611 static double ctables_pcexpr_evaluate (
3612 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3615 ctables_pcexpr_evaluate_nonterminal (
3616 const struct ctables_pcexpr_evaluate_ctx *ctx,
3617 const struct ctables_pcexpr *e, size_t n_args,
3618 double evaluate (double, double))
3620 double args[2] = { 0, 0 };
3621 for (size_t i = 0; i < n_args; i++)
3623 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3624 if (!isfinite (args[i]) || args[i] == SYSMIS)
3627 return evaluate (args[0], args[1]);
3631 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3632 const struct ctables_cell_value *pc_cv)
3634 const struct ctables_section *s = ctx->section;
3637 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3639 const struct ctables_nest *nest = s->nests[a];
3640 for (size_t i = 0; i < nest->n; i++)
3641 if (i != nest->scale_idx)
3643 const struct ctables_cell_value *cv
3644 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3645 : &ctx->cell->axes[a].cvs[i]);
3646 hash = hash_pointer (cv->category, hash);
3647 if (cv->category->type != CCT_TOTAL
3648 && cv->category->type != CCT_SUBTOTAL
3649 && cv->category->type != CCT_POSTCOMPUTE)
3650 hash = value_hash (&cv->value,
3651 var_get_width (nest->vars[i]), hash);
3655 struct ctables_cell *tc;
3656 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3658 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3660 const struct ctables_nest *nest = s->nests[a];
3661 for (size_t i = 0; i < nest->n; i++)
3662 if (i != nest->scale_idx)
3664 const struct ctables_cell_value *p_cv
3665 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3666 : &ctx->cell->axes[a].cvs[i]);
3667 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3668 if (p_cv->category != t_cv->category
3669 || (p_cv->category->type != CCT_TOTAL
3670 && p_cv->category->type != CCT_SUBTOTAL
3671 && p_cv->category->type != CCT_POSTCOMPUTE
3672 && !value_equal (&p_cv->value,
3674 var_get_width (nest->vars[i]))))
3686 const struct ctables_table *t = s->table;
3687 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3688 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3689 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
3690 &specs->specs[ctx->summary_idx]);
3694 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3695 const struct ctables_pcexpr *e)
3702 case CTPO_CAT_NRANGE:
3703 case CTPO_CAT_SRANGE:
3704 case CTPO_CAT_MISSING:
3705 case CTPO_CAT_OTHERNM:
3707 struct ctables_cell_value cv = {
3708 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3710 assert (cv.category != NULL);
3712 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3713 const struct ctables_occurrence *o;
3716 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3717 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3718 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3720 cv.value = o->value;
3721 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3726 case CTPO_CAT_NUMBER:
3727 case CTPO_CAT_SUBTOTAL:
3728 case CTPO_CAT_TOTAL:
3730 struct ctables_cell_value cv = {
3731 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3732 .value = { .f = e->number },
3734 assert (cv.category != NULL);
3735 return ctables_pcexpr_evaluate_category (ctx, &cv);
3738 case CTPO_CAT_STRING:
3740 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3742 if (width > e->string.length)
3744 s = xmalloc (width);
3745 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3748 const struct ctables_category *category
3749 = ctables_find_category_for_postcompute (
3750 ctx->section->table->ctables->dict,
3751 ctx->cats, ctx->parse_format, e);
3752 assert (category != NULL);
3754 struct ctables_cell_value cv = { .category = category };
3755 if (category->type == CCT_NUMBER)
3756 cv.value.f = category->number;
3757 else if (category->type == CCT_STRING)
3758 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3762 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3768 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3771 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3774 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3777 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3780 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3783 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3789 static const struct ctables_category *
3790 ctables_cell_postcompute (const struct ctables_section *s,
3791 const struct ctables_cell *cell,
3792 enum pivot_axis_type *pc_a_p,
3795 assert (cell->postcompute);
3796 const struct ctables_category *pc_cat = NULL;
3797 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3798 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3800 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3801 if (cv->category->type == CCT_POSTCOMPUTE)
3805 /* Multiple postcomputes cross each other. The value is
3810 pc_cat = cv->category;
3814 *pc_a_idx_p = pc_a_idx;
3818 assert (pc_cat != NULL);
3823 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3824 const struct ctables_cell *cell,
3825 const struct ctables_summary_spec *ss,
3826 struct fmt_spec *format,
3827 bool *is_ctables_format,
3830 enum pivot_axis_type pc_a = 0;
3831 size_t pc_a_idx = 0;
3832 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3833 s, cell, &pc_a, &pc_a_idx);
3837 const struct ctables_postcompute *pc = pc_cat->pc;
3840 for (size_t i = 0; i < pc->specs->n; i++)
3842 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3843 if (ss->function == ss2->function
3844 && ss->weighting == ss2->weighting
3845 && ss->calc_area == ss2->calc_area
3846 && ss->percentile == ss2->percentile)
3848 *format = ss2->format;
3849 *is_ctables_format = ss2->is_ctables_format;
3855 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3856 const struct ctables_categories *cats = s->table->categories[
3857 var_get_dict_index (var)];
3858 struct ctables_pcexpr_evaluate_ctx ctx = {
3863 .pc_a_idx = pc_a_idx,
3864 .summary_idx = summary_idx,
3865 .parse_format = pc_cat->parse_format,
3867 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3870 /* Chi-square test (SIGTEST). */
3871 struct ctables_chisq
3874 bool include_mrsets;
3878 /* Pairwise comparison test (COMPARETEST). */
3879 struct ctables_pairwise
3881 enum { PROP, MEAN } type;
3883 bool include_mrsets;
3884 bool meansvariance_allcats;
3886 enum { BONFERRONI = 1, BH } adjust;
3895 parse_col_width (struct lexer *lexer, const char *name, double *width)
3897 lex_match (lexer, T_EQUALS);
3898 if (lex_match_id (lexer, "DEFAULT"))
3900 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
3902 *width = lex_number (lexer);
3912 parse_bool (struct lexer *lexer, bool *b)
3914 if (lex_match_id (lexer, "NO"))
3916 else if (lex_match_id (lexer, "YES"))
3920 lex_error_expecting (lexer, "YES", "NO");
3927 ctables_chisq_destroy (struct ctables_chisq *chisq)
3933 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
3939 ctables_table_destroy (struct ctables_table *t)
3944 for (size_t i = 0; i < t->n_sections; i++)
3945 ctables_section_uninit (&t->sections[i]);
3948 for (size_t i = 0; i < t->n_categories; i++)
3949 ctables_categories_unref (t->categories[i]);
3950 free (t->categories);
3952 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3954 ctables_axis_destroy (t->axes[a]);
3955 ctables_stack_uninit (&t->stacks[a]);
3957 free (t->summary_specs.specs);
3959 struct ctables_value *ctv, *next_ctv;
3960 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
3961 &t->clabels_values_map)
3963 value_destroy (&ctv->value, var_get_width (t->clabels_example));
3964 hmap_delete (&t->clabels_values_map, &ctv->node);
3967 hmap_destroy (&t->clabels_values_map);
3968 free (t->clabels_values);
3974 ctables_chisq_destroy (t->chisq);
3975 ctables_pairwise_destroy (t->pairwise);
3980 ctables_destroy (struct ctables *ct)
3985 struct ctables_postcompute *pc, *next_pc;
3986 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
3990 msg_location_destroy (pc->location);
3991 ctables_pcexpr_destroy (pc->expr);
3995 ctables_summary_spec_set_uninit (pc->specs);
3998 hmap_delete (&ct->postcomputes, &pc->hmap_node);
4001 hmap_destroy (&ct->postcomputes);
4003 fmt_settings_uninit (&ct->ctables_formats);
4004 pivot_table_look_unref (ct->look);
4008 for (size_t i = 0; i < ct->n_tables; i++)
4009 ctables_table_destroy (ct->tables[i]);
4015 all_strings (struct variable **vars, size_t n_vars,
4016 const struct ctables_category *cat)
4018 for (size_t j = 0; j < n_vars; j++)
4019 if (var_is_numeric (vars[j]))
4021 msg_at (SE, cat->location,
4022 _("This category specification may be applied only to string "
4023 "variables, but this subcommand tries to apply it to "
4024 "numeric variable %s."),
4025 var_get_name (vars[j]));
4032 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
4033 struct ctables *ct, struct ctables_table *t)
4035 if (!lex_force_match_id (lexer, "VARIABLES"))
4037 lex_match (lexer, T_EQUALS);
4039 struct variable **vars;
4041 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
4044 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
4045 for (size_t i = 1; i < n_vars; i++)
4047 const struct fmt_spec *f = var_get_print_format (vars[i]);
4048 if (f->type != common_format->type)
4050 common_format = NULL;
4056 && (fmt_get_category (common_format->type)
4057 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
4059 struct ctables_categories *c = xmalloc (sizeof *c);
4060 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
4061 for (size_t i = 0; i < n_vars; i++)
4063 struct ctables_categories **cp
4064 = &t->categories[var_get_dict_index (vars[i])];
4065 ctables_categories_unref (*cp);
4069 size_t allocated_cats = 0;
4070 int cats_start_ofs = -1;
4071 int cats_end_ofs = -1;
4072 if (lex_match (lexer, T_LBRACK))
4074 cats_start_ofs = lex_ofs (lexer);
4077 if (c->n_cats >= allocated_cats)
4078 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4080 int start_ofs = lex_ofs (lexer);
4081 struct ctables_category *cat = &c->cats[c->n_cats];
4082 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
4084 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4087 lex_match (lexer, T_COMMA);
4089 while (!lex_match (lexer, T_RBRACK));
4090 cats_end_ofs = lex_ofs (lexer) - 1;
4093 struct ctables_category cat = {
4095 .include_missing = false,
4096 .sort_ascending = true,
4098 bool show_totals = false;
4099 char *total_label = NULL;
4100 bool totals_before = false;
4101 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4103 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
4105 lex_match (lexer, T_EQUALS);
4106 if (lex_match_id (lexer, "A"))
4107 cat.sort_ascending = true;
4108 else if (lex_match_id (lexer, "D"))
4109 cat.sort_ascending = false;
4112 lex_error_expecting (lexer, "A", "D");
4116 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
4118 int start_ofs = lex_ofs (lexer) - 1;
4119 lex_match (lexer, T_EQUALS);
4120 if (lex_match_id (lexer, "VALUE"))
4121 cat.type = CCT_VALUE;
4122 else if (lex_match_id (lexer, "LABEL"))
4123 cat.type = CCT_LABEL;
4126 cat.type = CCT_FUNCTION;
4127 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
4128 &cat.weighting, &cat.area))
4131 if (lex_match (lexer, T_LPAREN))
4133 cat.sort_var = parse_variable (lexer, dict);
4137 if (cat.sort_function == CTSF_PTILE)
4139 lex_match (lexer, T_COMMA);
4140 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4142 cat.percentile = lex_number (lexer);
4146 if (!lex_force_match (lexer, T_RPAREN))
4149 else if (ctables_function_availability (cat.sort_function)
4152 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
4156 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
4157 _("Data-dependent sorting is not implemented."));
4161 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
4163 lex_match (lexer, T_EQUALS);
4164 if (lex_match_id (lexer, "INCLUDE"))
4165 cat.include_missing = true;
4166 else if (lex_match_id (lexer, "EXCLUDE"))
4167 cat.include_missing = false;
4170 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4174 else if (lex_match_id (lexer, "TOTAL"))
4176 lex_match (lexer, T_EQUALS);
4177 if (!parse_bool (lexer, &show_totals))
4180 else if (lex_match_id (lexer, "LABEL"))
4182 lex_match (lexer, T_EQUALS);
4183 if (!lex_force_string (lexer))
4186 total_label = ss_xstrdup (lex_tokss (lexer));
4189 else if (lex_match_id (lexer, "POSITION"))
4191 lex_match (lexer, T_EQUALS);
4192 if (lex_match_id (lexer, "BEFORE"))
4193 totals_before = true;
4194 else if (lex_match_id (lexer, "AFTER"))
4195 totals_before = false;
4198 lex_error_expecting (lexer, "BEFORE", "AFTER");
4202 else if (lex_match_id (lexer, "EMPTY"))
4204 lex_match (lexer, T_EQUALS);
4205 if (lex_match_id (lexer, "INCLUDE"))
4206 c->show_empty = true;
4207 else if (lex_match_id (lexer, "EXCLUDE"))
4208 c->show_empty = false;
4211 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4218 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
4219 "TOTAL", "LABEL", "POSITION", "EMPTY");
4221 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
4228 if (c->n_cats >= allocated_cats)
4229 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4230 c->cats[c->n_cats++] = cat;
4235 if (c->n_cats >= allocated_cats)
4236 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4238 struct ctables_category *totals;
4241 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
4242 totals = &c->cats[0];
4245 totals = &c->cats[c->n_cats];
4248 *totals = (struct ctables_category) {
4250 .total_label = total_label ? total_label : xstrdup (_("Total")),
4254 struct ctables_category *subtotal = NULL;
4255 for (size_t i = totals_before ? 0 : c->n_cats;
4256 totals_before ? i < c->n_cats : i-- > 0;
4257 totals_before ? i++ : 0)
4259 struct ctables_category *cat = &c->cats[i];
4268 cat->subtotal = subtotal;
4271 case CCT_POSTCOMPUTE:
4282 case CCT_EXCLUDED_MISSING:
4287 if (cats_start_ofs != -1)
4289 for (size_t i = 0; i < c->n_cats; i++)
4291 struct ctables_category *cat = &c->cats[i];
4294 case CCT_POSTCOMPUTE:
4295 cat->parse_format = parse_strings ? common_format->type : FMT_F;
4296 struct msg_location *cats_location
4297 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
4298 bool ok = ctables_recursive_check_postcompute (
4299 dict, cat->pc->expr, cat, c, cats_location);
4300 msg_location_destroy (cats_location);
4307 for (size_t j = 0; j < n_vars; j++)
4308 if (var_is_alpha (vars[j]))
4310 msg_at (SE, cat->location,
4311 _("This category specification may be applied "
4312 "only to numeric variables, but this "
4313 "subcommand tries to apply it to string "
4315 var_get_name (vars[j]));
4324 if (!parse_category_string (cat->location, cat->string, dict,
4325 common_format->type, &n))
4328 ss_dealloc (&cat->string);
4330 cat->type = CCT_NUMBER;
4333 else if (!all_strings (vars, n_vars, cat))
4342 if (!cat->srange[0].string)
4344 else if (!parse_category_string (cat->location,
4345 cat->srange[0], dict,
4346 common_format->type, &n[0]))
4349 if (!cat->srange[1].string)
4351 else if (!parse_category_string (cat->location,
4352 cat->srange[1], dict,
4353 common_format->type, &n[1]))
4356 ss_dealloc (&cat->srange[0]);
4357 ss_dealloc (&cat->srange[1]);
4359 cat->type = CCT_NRANGE;
4360 cat->nrange[0] = n[0];
4361 cat->nrange[1] = n[1];
4363 else if (!all_strings (vars, n_vars, cat))
4374 case CCT_EXCLUDED_MISSING:
4391 const struct ctables_summary_spec_set *set;
4396 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4398 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4399 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4400 if (as->function != bs->function)
4401 return as->function > bs->function ? 1 : -1;
4402 else if (as->weighting != bs->weighting)
4403 return as->weighting > bs->weighting ? 1 : -1;
4404 else if (as->calc_area != bs->calc_area)
4405 return as->calc_area > bs->calc_area ? 1 : -1;
4406 else if (as->percentile != bs->percentile)
4407 return as->percentile < bs->percentile ? 1 : -1;
4409 const char *as_label = as->label ? as->label : "";
4410 const char *bs_label = bs->label ? bs->label : "";
4411 return strcmp (as_label, bs_label);
4415 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4416 size_t ix[PIVOT_N_AXES])
4418 if (a < PIVOT_N_AXES)
4420 size_t limit = MAX (t->stacks[a].n, 1);
4421 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4422 ctables_table_add_section (t, a + 1, ix);
4426 struct ctables_section *s = &t->sections[t->n_sections++];
4427 *s = (struct ctables_section) {
4429 .cells = HMAP_INITIALIZER (s->cells),
4431 for (a = 0; a < PIVOT_N_AXES; a++)
4434 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4436 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4437 for (size_t i = 0; i < nest->n; i++)
4438 hmap_init (&s->occurrences[a][i]);
4440 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4441 hmap_init (&s->areas[at]);
4446 ctables_format (double d, const struct fmt_spec *format,
4447 const struct fmt_settings *settings)
4449 const union value v = { .f = d };
4450 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4452 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4453 produce the results we want for negative numbers, putting the negative
4454 sign in the wrong spot, before the prefix instead of after it. We can't,
4455 in fact, produce the desired results using a custom-currency
4456 specification. Instead, we postprocess the output, moving the negative
4459 NEQUAL: "-N=3" => "N=-3"
4460 PAREN: "-(3)" => "(-3)"
4461 PCTPAREN: "-(3%)" => "(-3%)"
4463 This transformation doesn't affect NEGPAREN. */
4464 char *minus_src = strchr (s, '-');
4465 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4467 char *n_equals = strstr (s, "N=");
4468 char *lparen = strchr (s, '(');
4469 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4471 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4477 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4479 for (size_t i = 0; i < t->stacks[a].n; i++)
4481 struct ctables_nest *nest = &t->stacks[a].nests[i];
4482 if (nest->n != 1 || nest->scale_idx != 0)
4485 enum ctables_vlabel vlabel
4486 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4487 if (vlabel != CTVL_NONE)
4494 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4496 struct pivot_table *pt = pivot_table_create__ (
4498 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4499 : pivot_value_new_text (N_("Custom Tables"))),
4502 pivot_table_set_caption (
4503 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4505 pivot_table_set_corner_text (
4506 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4508 bool summary_dimension = (t->summary_axis != t->slabels_axis
4509 || (!t->slabels_visible
4510 && t->summary_specs.n > 1));
4511 if (summary_dimension)
4513 struct pivot_dimension *d = pivot_dimension_create (
4514 pt, t->slabels_axis, N_("Statistics"));
4515 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4516 if (!t->slabels_visible)
4517 d->hide_all_labels = true;
4518 for (size_t i = 0; i < specs->n; i++)
4519 pivot_category_create_leaf (
4520 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4523 bool categories_dimension = t->clabels_example != NULL;
4524 if (categories_dimension)
4526 struct pivot_dimension *d = pivot_dimension_create (
4527 pt, t->label_axis[t->clabels_from_axis],
4528 t->clabels_from_axis == PIVOT_AXIS_ROW
4529 ? N_("Row Categories")
4530 : N_("Column Categories"));
4531 const struct variable *var = t->clabels_example;
4532 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4533 for (size_t i = 0; i < t->n_clabels_values; i++)
4535 const struct ctables_value *value = t->clabels_values[i];
4536 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4537 assert (cat != NULL);
4538 pivot_category_create_leaf (
4539 d->root, ctables_category_create_value_label (c, cat,
4545 pivot_table_set_look (pt, ct->look);
4546 struct pivot_dimension *d[PIVOT_N_AXES];
4547 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4549 static const char *names[] = {
4550 [PIVOT_AXIS_ROW] = N_("Rows"),
4551 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4552 [PIVOT_AXIS_LAYER] = N_("Layers"),
4554 d[a] = (t->axes[a] || a == t->summary_axis
4555 ? pivot_dimension_create (pt, a, names[a])
4560 assert (t->axes[a]);
4562 for (size_t i = 0; i < t->stacks[a].n; i++)
4564 struct ctables_nest *nest = &t->stacks[a].nests[i];
4565 struct ctables_section **sections = xnmalloc (t->n_sections,
4567 size_t n_sections = 0;
4569 size_t n_total_cells = 0;
4570 size_t max_depth = 0;
4571 for (size_t j = 0; j < t->n_sections; j++)
4572 if (t->sections[j].nests[a] == nest)
4574 struct ctables_section *s = &t->sections[j];
4575 sections[n_sections++] = s;
4576 n_total_cells += hmap_count (&s->cells);
4578 size_t depth = s->nests[a]->n;
4579 max_depth = MAX (depth, max_depth);
4582 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4584 size_t n_sorted = 0;
4586 for (size_t j = 0; j < n_sections; j++)
4588 struct ctables_section *s = sections[j];
4590 struct ctables_cell *cell;
4591 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4593 sorted[n_sorted++] = cell;
4594 assert (n_sorted <= n_total_cells);
4597 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4598 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4600 struct ctables_level
4602 enum ctables_level_type
4604 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4605 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4606 CTL_SUMMARY, /* Summary functions. */
4610 enum settings_value_show vlabel; /* CTL_VAR only. */
4613 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4614 size_t n_levels = 0;
4615 for (size_t k = 0; k < nest->n; k++)
4617 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4618 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4620 if (vlabel != CTVL_NONE)
4622 levels[n_levels++] = (struct ctables_level) {
4624 .vlabel = (enum settings_value_show) vlabel,
4629 if (nest->scale_idx != k
4630 && (k != nest->n - 1 || t->label_axis[a] == a))
4632 levels[n_levels++] = (struct ctables_level) {
4633 .type = CTL_CATEGORY,
4639 if (!summary_dimension && a == t->slabels_axis)
4641 levels[n_levels++] = (struct ctables_level) {
4642 .type = CTL_SUMMARY,
4643 .var_idx = SIZE_MAX,
4647 /* Pivot categories:
4649 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4650 - category for nest->vars[0], if nest->scale_idx != 0
4651 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4652 - category for nest->vars[1], if nest->scale_idx != 1
4654 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4655 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4656 - summary function, if 'a == t->slabels_axis && a ==
4659 Additional dimensions:
4661 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4663 - If 't->label_axis[b] == a' for some 'b != a', add a category
4668 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4670 for (size_t j = 0; j < n_sorted; j++)
4672 struct ctables_cell *cell = sorted[j];
4673 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4675 size_t n_common = 0;
4678 for (; n_common < n_levels; n_common++)
4680 const struct ctables_level *level = &levels[n_common];
4681 if (level->type == CTL_CATEGORY)
4683 size_t var_idx = level->var_idx;
4684 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4685 if (prev->axes[a].cvs[var_idx].category != c)
4687 else if (c->type != CCT_SUBTOTAL
4688 && c->type != CCT_TOTAL
4689 && c->type != CCT_POSTCOMPUTE
4690 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4691 &cell->axes[a].cvs[var_idx].value,
4692 var_get_type (nest->vars[var_idx])))
4698 for (size_t k = n_common; k < n_levels; k++)
4700 const struct ctables_level *level = &levels[k];
4701 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4702 if (level->type == CTL_SUMMARY)
4704 assert (k == n_levels - 1);
4706 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4707 for (size_t m = 0; m < specs->n; m++)
4709 int leaf = pivot_category_create_leaf (
4710 parent, ctables_summary_label (&specs->specs[m],
4718 const struct variable *var = nest->vars[level->var_idx];
4719 struct pivot_value *label;
4720 if (level->type == CTL_VAR)
4722 label = pivot_value_new_variable (var);
4723 label->variable.show = level->vlabel;
4725 else if (level->type == CTL_CATEGORY)
4727 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4728 label = ctables_category_create_value_label (
4729 t->categories[var_get_dict_index (var)],
4730 cv->category, var, &cv->value);
4735 if (k == n_levels - 1)
4736 prev_leaf = pivot_category_create_leaf (parent, label);
4738 groups[k] = pivot_category_create_group__ (parent, label);
4742 cell->axes[a].leaf = prev_leaf;
4751 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4755 size_t n_total_cells = 0;
4756 for (size_t j = 0; j < t->n_sections; j++)
4757 n_total_cells += hmap_count (&t->sections[j].cells);
4759 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4760 size_t n_sorted = 0;
4761 for (size_t j = 0; j < t->n_sections; j++)
4763 const struct ctables_section *s = &t->sections[j];
4764 struct ctables_cell *cell;
4765 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4767 sorted[n_sorted++] = cell;
4769 assert (n_sorted <= n_total_cells);
4770 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4772 size_t ids[N_CTATS];
4773 memset (ids, 0, sizeof ids);
4774 for (size_t j = 0; j < n_sorted; j++)
4776 struct ctables_cell *cell = sorted[j];
4777 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4779 struct ctables_area *area = cell->areas[at];
4780 if (!area->sequence)
4781 area->sequence = ++ids[at];
4788 for (size_t i = 0; i < t->n_sections; i++)
4790 struct ctables_section *s = &t->sections[i];
4792 struct ctables_cell *cell;
4793 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4798 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4799 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4800 for (size_t j = 0; j < specs->n; j++)
4803 size_t n_dindexes = 0;
4805 if (summary_dimension)
4806 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4808 if (categories_dimension)
4810 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4811 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4812 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4813 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4816 dindexes[n_dindexes++] = ctv->leaf;
4819 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4822 int leaf = cell->axes[a].leaf;
4823 if (a == t->summary_axis && !summary_dimension)
4825 dindexes[n_dindexes++] = leaf;
4828 const struct ctables_summary_spec *ss = &specs->specs[j];
4830 struct fmt_spec format = specs->specs[j].format;
4831 bool is_ctables_format = ss->is_ctables_format;
4832 double d = (cell->postcompute
4833 ? ctables_cell_calculate_postcompute (
4834 s, cell, ss, &format, &is_ctables_format, j)
4835 : ctables_summary_value (cell->areas,
4836 &cell->summaries[j], ss));
4838 struct pivot_value *value;
4839 if (ct->hide_threshold != 0
4840 && d < ct->hide_threshold
4841 && ss->function == CTSF_COUNT)
4843 value = pivot_value_new_user_text_nocopy (
4844 xasprintf ("<%d", ct->hide_threshold));
4846 else if (d == 0 && ct->zero)
4847 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4848 else if (d == SYSMIS && ct->missing)
4849 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4850 else if (is_ctables_format)
4851 value = pivot_value_new_user_text_nocopy (
4852 ctables_format (d, &format, &ct->ctables_formats));
4855 value = pivot_value_new_number (d);
4856 value->numeric.format = format;
4858 /* XXX should text values be right-justified? */
4859 pivot_table_put (pt, dindexes, n_dindexes, value);
4864 pivot_table_submit (pt);
4868 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4870 enum pivot_axis_type label_pos = t->label_axis[a];
4874 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4875 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4877 const struct ctables_stack *stack = &t->stacks[a];
4881 const struct ctables_nest *n0 = &stack->nests[0];
4884 assert (stack->n == 1);
4888 const struct variable *v0 = n0->vars[n0->n - 1];
4889 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4890 t->clabels_example = v0;
4892 for (size_t i = 0; i < c0->n_cats; i++)
4893 if (c0->cats[i].type == CCT_FUNCTION)
4895 msg (SE, _("%s=%s is not allowed with sorting based "
4896 "on a summary function."),
4897 subcommand_name, pos_name);
4900 if (n0->n - 1 == n0->scale_idx)
4902 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4903 "but %s is a scale variable."),
4904 subcommand_name, pos_name, var_get_name (v0));
4908 for (size_t i = 1; i < stack->n; i++)
4910 const struct ctables_nest *ni = &stack->nests[i];
4912 const struct variable *vi = ni->vars[ni->n - 1];
4913 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4915 if (ni->n - 1 == ni->scale_idx)
4917 msg (SE, _("%s=%s requires the variables to be moved to be "
4918 "categorical, but %s is a scale variable."),
4919 subcommand_name, pos_name, var_get_name (vi));
4922 if (var_get_width (v0) != var_get_width (vi))
4924 msg (SE, _("%s=%s requires the variables to be "
4925 "moved to have the same width, but %s has "
4926 "width %d and %s has width %d."),
4927 subcommand_name, pos_name,
4928 var_get_name (v0), var_get_width (v0),
4929 var_get_name (vi), var_get_width (vi));
4932 if (!val_labs_equal (var_get_value_labels (v0),
4933 var_get_value_labels (vi)))
4935 msg (SE, _("%s=%s requires the variables to be "
4936 "moved to have the same value labels, but %s "
4937 "and %s have different value labels."),
4938 subcommand_name, pos_name,
4939 var_get_name (v0), var_get_name (vi));
4942 if (!ctables_categories_equal (c0, ci))
4944 msg (SE, _("%s=%s requires the variables to be "
4945 "moved to have the same category "
4946 "specifications, but %s and %s have different "
4947 "category specifications."),
4948 subcommand_name, pos_name,
4949 var_get_name (v0), var_get_name (vi));
4958 add_sum_var (struct variable *var,
4959 struct variable ***sum_vars, size_t *n, size_t *allocated)
4961 for (size_t i = 0; i < *n; i++)
4962 if (var == (*sum_vars)[i])
4965 if (*n >= *allocated)
4966 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4967 (*sum_vars)[*n] = var;
4971 static enum ctables_area_type
4972 rotate_area (enum ctables_area_type area)
4983 return CTAT_LAYERCOL;
4986 return CTAT_LAYERROW;
4999 enumerate_sum_vars (const struct ctables_axis *a,
5000 struct variable ***sum_vars, size_t *n, size_t *allocated)
5008 for (size_t i = 0; i < N_CSVS; i++)
5009 for (size_t j = 0; j < a->specs[i].n; j++)
5011 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
5012 if (spec->function == CTSF_areaPCT_SUM)
5013 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
5019 for (size_t i = 0; i < 2; i++)
5020 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
5026 ctables_prepare_table (struct ctables_table *t)
5028 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5031 t->stacks[a] = enumerate_fts (a, t->axes[a]);
5033 for (size_t j = 0; j < t->stacks[a].n; j++)
5035 struct ctables_nest *nest = &t->stacks[a].nests[j];
5036 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5038 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
5039 nest->n_areas[at] = 0;
5041 enum pivot_axis_type ata, atb;
5042 if (at == CTAT_ROW || at == CTAT_LAYERROW)
5044 ata = PIVOT_AXIS_ROW;
5045 atb = PIVOT_AXIS_COLUMN;
5047 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
5049 ata = PIVOT_AXIS_COLUMN;
5050 atb = PIVOT_AXIS_ROW;
5053 if (at == CTAT_LAYER
5054 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5055 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5056 ? a == atb && t->label_axis[a] != a
5059 for (size_t k = nest->n - 1; k < nest->n; k--)
5060 if (k != nest->scale_idx)
5062 nest->areas[at][nest->n_areas[at]++] = k;
5068 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5069 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5070 : at == CTAT_TABLE ? true
5074 for (size_t k = 0; k < nest->n; k++)
5075 if (k != nest->scale_idx)
5076 nest->areas[at][nest->n_areas[at]++] = k;
5082 #define L PIVOT_AXIS_LAYER
5083 n_drop = (t->clabels_from_axis == L ? a != L
5084 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5085 : t->clabels_from_axis == a ? 2
5092 n_drop = a == ata && t->label_axis[ata] == atb;
5097 n_drop = (a == ata ? t->label_axis[ata] == atb
5099 : t->clabels_from_axis == atb ? -1
5100 : t->clabels_to_axis != atb ? 1
5112 size_t n = nest->n_areas[at];
5115 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5116 nest->n_areas[at]--;
5121 for (int i = 0; i < n_drop; i++)
5122 if (nest->n_areas[at] > 0)
5123 nest->n_areas[at]--;
5130 struct ctables_nest *nest = xmalloc (sizeof *nest);
5131 *nest = (struct ctables_nest) {
5133 .scale_idx = SIZE_MAX,
5134 .summary_idx = SIZE_MAX
5136 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5138 /* There's no point in moving labels away from an axis that has no
5139 labels, so avoid dealing with the special cases around that. */
5140 t->label_axis[a] = a;
5143 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5144 for (size_t i = 0; i < stack->n; i++)
5146 struct ctables_nest *nest = &stack->nests[i];
5147 if (!nest->specs[CSV_CELL].n)
5149 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5150 ss->specs = xmalloc (sizeof *ss->specs);
5153 enum ctables_summary_function function
5154 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5158 nest->summary_idx = nest->n - 1;
5159 ss->var = nest->vars[nest->summary_idx];
5161 *ss->specs = (struct ctables_summary_spec) {
5162 .function = function,
5163 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5164 .format = ctables_summary_default_format (function, ss->var),
5167 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5168 &nest->specs[CSV_CELL]);
5170 else if (!nest->specs[CSV_TOTAL].n)
5171 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5172 &nest->specs[CSV_CELL]);
5174 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5175 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5177 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5178 for (size_t i = 0; i < nest->specs[sv].n; i++)
5180 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5181 const struct ctables_function_info *cfi =
5182 &ctables_function_info[ss->function];
5184 ss->calc_area = rotate_area (ss->calc_area);
5188 if (t->ctables->smissing_listwise)
5190 struct variable **listwise_vars = NULL;
5192 size_t allocated = 0;
5194 for (size_t j = nest->group_head; j < stack->n; j++)
5196 const struct ctables_nest *other_nest = &stack->nests[j];
5197 if (other_nest->group_head != nest->group_head)
5200 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5203 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5204 sizeof *listwise_vars);
5205 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5208 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5211 listwise_vars = xmemdup (listwise_vars,
5212 n * sizeof *listwise_vars);
5213 nest->specs[sv].listwise_vars = listwise_vars;
5214 nest->specs[sv].n_listwise_vars = n;
5219 struct ctables_summary_spec_set *merged = &t->summary_specs;
5220 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5222 for (size_t j = 0; j < stack->n; j++)
5224 const struct ctables_nest *nest = &stack->nests[j];
5226 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5227 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5232 struct merge_item min = items[0];
5233 for (size_t j = 1; j < n_left; j++)
5234 if (merge_item_compare_3way (&items[j], &min) < 0)
5237 if (merged->n >= merged->allocated)
5238 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5239 sizeof *merged->specs);
5240 merged->specs[merged->n++] = min.set->specs[min.ofs];
5242 for (size_t j = 0; j < n_left; )
5244 if (merge_item_compare_3way (&items[j], &min) == 0)
5246 struct merge_item *item = &items[j];
5247 item->set->specs[item->ofs++].axis_idx = merged->n - 1;
5248 if (item->ofs >= item->set->n)
5250 items[j] = items[--n_left];
5259 size_t allocated_sum_vars = 0;
5260 enumerate_sum_vars (t->axes[t->summary_axis],
5261 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5263 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5264 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5268 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5269 enum pivot_axis_type a)
5271 struct ctables_stack *stack = &t->stacks[a];
5272 for (size_t i = 0; i < stack->n; i++)
5274 const struct ctables_nest *nest = &stack->nests[i];
5275 const struct variable *var = nest->vars[nest->n - 1];
5276 const union value *value = case_data (c, var);
5278 if (var_is_numeric (var) && value->f == SYSMIS)
5281 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5283 ctables_value_insert (t, value, var_get_width (var));
5288 ctables_add_category_occurrences (const struct variable *var,
5289 struct hmap *occurrences,
5290 const struct ctables_categories *cats)
5292 const struct val_labs *val_labs = var_get_value_labels (var);
5294 for (size_t i = 0; i < cats->n_cats; i++)
5296 const struct ctables_category *c = &cats->cats[i];
5300 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5306 int width = var_get_width (var);
5308 value_init (&value, width);
5309 value_copy_buf_rpad (&value, width,
5310 CHAR_CAST (uint8_t *, c->string.string),
5311 c->string.length, ' ');
5312 ctables_add_occurrence (var, &value, occurrences);
5313 value_destroy (&value, width);
5318 assert (var_is_numeric (var));
5319 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5320 vl = val_labs_next (val_labs, vl))
5321 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5322 ctables_add_occurrence (var, &vl->value, occurrences);
5326 assert (var_is_alpha (var));
5327 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5328 vl = val_labs_next (val_labs, vl))
5329 if (in_string_range (&vl->value, var, c->srange))
5330 ctables_add_occurrence (var, &vl->value, occurrences);
5334 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5335 vl = val_labs_next (val_labs, vl))
5336 if (var_is_value_missing (var, &vl->value))
5337 ctables_add_occurrence (var, &vl->value, occurrences);
5341 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5342 vl = val_labs_next (val_labs, vl))
5343 ctables_add_occurrence (var, &vl->value, occurrences);
5346 case CCT_POSTCOMPUTE:
5356 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5357 vl = val_labs_next (val_labs, vl))
5358 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5359 ctables_add_occurrence (var, &vl->value, occurrences);
5362 case CCT_EXCLUDED_MISSING:
5369 ctables_section_recurse_add_empty_categories (
5370 struct ctables_section *s,
5371 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5372 enum pivot_axis_type a, size_t a_idx)
5374 if (a >= PIVOT_N_AXES)
5375 ctables_cell_insert__ (s, c, cats);
5376 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5377 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5380 const struct variable *var = s->nests[a]->vars[a_idx];
5381 const struct ctables_categories *categories = s->table->categories[
5382 var_get_dict_index (var)];
5383 int width = var_get_width (var);
5384 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5385 const struct ctables_occurrence *o;
5386 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5388 union value *value = case_data_rw (c, var);
5389 value_destroy (value, width);
5390 value_clone (value, &o->value, width);
5391 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5392 assert (cats[a][a_idx] != NULL);
5393 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5396 for (size_t i = 0; i < categories->n_cats; i++)
5398 const struct ctables_category *cat = &categories->cats[i];
5399 if (cat->type == CCT_POSTCOMPUTE)
5401 cats[a][a_idx] = cat;
5402 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5409 ctables_section_add_empty_categories (struct ctables_section *s)
5411 bool show_empty = false;
5412 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5414 for (size_t k = 0; k < s->nests[a]->n; k++)
5415 if (k != s->nests[a]->scale_idx)
5417 const struct variable *var = s->nests[a]->vars[k];
5418 const struct ctables_categories *cats = s->table->categories[
5419 var_get_dict_index (var)];
5420 if (cats->show_empty)
5423 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5429 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5430 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5431 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5432 const struct ctables_category **cats[PIVOT_N_AXES] =
5434 [PIVOT_AXIS_LAYER] = layer_cats,
5435 [PIVOT_AXIS_ROW] = row_cats,
5436 [PIVOT_AXIS_COLUMN] = column_cats,
5438 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5439 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5444 ctables_section_clear (struct ctables_section *s)
5446 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5448 const struct ctables_nest *nest = s->nests[a];
5449 for (size_t i = 0; i < nest->n; i++)
5450 if (i != nest->scale_idx)
5452 const struct variable *var = nest->vars[i];
5453 int width = var_get_width (var);
5454 struct ctables_occurrence *o, *next;
5455 struct hmap *map = &s->occurrences[a][i];
5456 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5458 value_destroy (&o->value, width);
5459 hmap_delete (map, &o->node);
5466 struct ctables_cell *cell, *next_cell;
5467 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5469 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5471 const struct ctables_nest *nest = s->nests[a];
5472 for (size_t i = 0; i < nest->n; i++)
5473 if (i != nest->scale_idx)
5474 value_destroy (&cell->axes[a].cvs[i].value,
5475 var_get_width (nest->vars[i]));
5476 free (cell->axes[a].cvs);
5479 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5480 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5481 for (size_t i = 0; i < specs->n; i++)
5482 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5483 free (cell->summaries);
5485 hmap_delete (&s->cells, &cell->node);
5488 hmap_shrink (&s->cells);
5490 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5492 struct ctables_area *area, *next_area;
5493 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5497 hmap_delete (&s->areas[at], &area->node);
5500 hmap_shrink (&s->areas[at]);
5505 ctables_section_uninit (struct ctables_section *s)
5507 ctables_section_clear (s);
5509 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5511 struct ctables_nest *nest = s->nests[a];
5512 for (size_t i = 0; i < nest->n; i++)
5513 hmap_destroy (&s->occurrences[a][i]);
5514 free (s->occurrences[a]);
5517 hmap_destroy (&s->cells);
5518 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5519 hmap_destroy (&s->areas[at]);
5523 ctables_table_clear (struct ctables_table *t)
5525 for (size_t i = 0; i < t->n_sections; i++)
5526 ctables_section_clear (&t->sections[i]);
5528 if (t->clabels_example)
5530 int width = var_get_width (t->clabels_example);
5531 struct ctables_value *value, *next_value;
5532 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5533 &t->clabels_values_map)
5535 value_destroy (&value->value, width);
5536 hmap_delete (&t->clabels_values_map, &value->node);
5539 hmap_shrink (&t->clabels_values_map);
5541 free (t->clabels_values);
5542 t->clabels_values = NULL;
5543 t->n_clabels_values = 0;
5548 ctables_execute (struct dataset *ds, struct casereader *input,
5551 for (size_t i = 0; i < ct->n_tables; i++)
5553 struct ctables_table *t = ct->tables[i];
5554 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5555 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5556 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5557 sizeof *t->sections);
5558 size_t ix[PIVOT_N_AXES];
5559 ctables_table_add_section (t, 0, ix);
5562 struct dictionary *dict = dataset_dict (ds);
5564 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5565 struct casegrouper *grouper
5567 ? casegrouper_create_splits (input, dict)
5568 : casegrouper_create_vars (input, NULL, 0));
5569 struct casereader *group;
5570 while (casegrouper_get_next_group (grouper, &group))
5574 struct ccase *c = casereader_peek (group, 0);
5577 output_split_file_values (ds, c);
5582 bool warn_on_invalid = true;
5583 for (struct ccase *c = casereader_read (group); c;
5584 case_unref (c), c = casereader_read (group))
5586 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5587 double e_weight = (ct->e_weight
5588 ? var_force_valid_weight (ct->e_weight,
5589 case_num (c, ct->e_weight),
5593 [CTW_DICTIONARY] = d_weight,
5594 [CTW_EFFECTIVE] = e_weight,
5595 [CTW_UNWEIGHTED] = 1.0,
5598 for (size_t i = 0; i < ct->n_tables; i++)
5600 struct ctables_table *t = ct->tables[i];
5602 for (size_t j = 0; j < t->n_sections; j++)
5603 ctables_cell_insert (&t->sections[j], c, weight);
5605 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5606 if (t->label_axis[a] != a)
5607 ctables_insert_clabels_values (t, c, a);
5610 casereader_destroy (group);
5612 for (size_t i = 0; i < ct->n_tables; i++)
5614 struct ctables_table *t = ct->tables[i];
5616 if (t->clabels_example)
5617 ctables_sort_clabels_values (t);
5619 for (size_t j = 0; j < t->n_sections; j++)
5620 ctables_section_add_empty_categories (&t->sections[j]);
5622 ctables_table_output (ct, t);
5623 ctables_table_clear (t);
5626 return casegrouper_destroy (grouper);
5629 static struct ctables_postcompute *
5630 ctables_find_postcompute (struct ctables *ct, const char *name)
5632 struct ctables_postcompute *pc;
5633 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5634 utf8_hash_case_string (name, 0), &ct->postcomputes)
5635 if (!utf8_strcasecmp (pc->name, name))
5641 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5644 int pcompute_start = lex_ofs (lexer) - 1;
5646 if (!lex_match (lexer, T_AND))
5648 lex_error_expecting (lexer, "&");
5651 if (!lex_force_id (lexer))
5654 char *name = ss_xstrdup (lex_tokss (lexer));
5657 if (!lex_force_match (lexer, T_EQUALS)
5658 || !lex_force_match_id (lexer, "EXPR")
5659 || !lex_force_match (lexer, T_LPAREN))
5665 int expr_start = lex_ofs (lexer);
5666 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5667 int expr_end = lex_ofs (lexer) - 1;
5668 if (!expr || !lex_force_match (lexer, T_RPAREN))
5670 ctables_pcexpr_destroy (expr);
5674 int pcompute_end = lex_ofs (lexer) - 1;
5676 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5679 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5682 msg_at (SW, location, _("New definition of &%s will override the "
5683 "previous definition."),
5685 msg_at (SN, pc->location, _("This is the previous definition."));
5687 ctables_pcexpr_destroy (pc->expr);
5688 msg_location_destroy (pc->location);
5693 pc = xmalloc (sizeof *pc);
5694 *pc = (struct ctables_postcompute) { .name = name };
5695 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5696 utf8_hash_case_string (pc->name, 0));
5699 pc->location = location;
5701 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5706 ctables_parse_pproperties_format (struct lexer *lexer,
5707 struct ctables_summary_spec_set *sss)
5709 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5711 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5712 && !(lex_token (lexer) == T_ID
5713 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5714 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5715 lex_tokss (lexer)))))
5717 /* Parse function. */
5718 enum ctables_summary_function function;
5719 enum ctables_weighting weighting;
5720 enum ctables_area_type area;
5721 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5724 /* Parse percentile. */
5725 double percentile = 0;
5726 if (function == CTSF_PTILE)
5728 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5730 percentile = lex_number (lexer);
5735 struct fmt_spec format;
5736 bool is_ctables_format;
5737 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5740 if (sss->n >= sss->allocated)
5741 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5742 sizeof *sss->specs);
5743 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5744 .function = function,
5745 .weighting = weighting,
5748 .percentile = percentile,
5750 .is_ctables_format = is_ctables_format,
5756 ctables_summary_spec_set_uninit (sss);
5761 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5763 struct ctables_postcompute **pcs = NULL;
5765 size_t allocated_pcs = 0;
5767 while (lex_match (lexer, T_AND))
5769 if (!lex_force_id (lexer))
5771 struct ctables_postcompute *pc
5772 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5775 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5780 if (n_pcs >= allocated_pcs)
5781 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5785 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5787 if (lex_match_id (lexer, "LABEL"))
5789 lex_match (lexer, T_EQUALS);
5790 if (!lex_force_string (lexer))
5793 for (size_t i = 0; i < n_pcs; i++)
5795 free (pcs[i]->label);
5796 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5801 else if (lex_match_id (lexer, "FORMAT"))
5803 lex_match (lexer, T_EQUALS);
5805 struct ctables_summary_spec_set sss;
5806 if (!ctables_parse_pproperties_format (lexer, &sss))
5809 for (size_t i = 0; i < n_pcs; i++)
5812 ctables_summary_spec_set_uninit (pcs[i]->specs);
5814 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5815 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5817 ctables_summary_spec_set_uninit (&sss);
5819 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5821 lex_match (lexer, T_EQUALS);
5822 bool hide_source_cats;
5823 if (!parse_bool (lexer, &hide_source_cats))
5825 for (size_t i = 0; i < n_pcs; i++)
5826 pcs[i]->hide_source_cats = hide_source_cats;
5830 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5843 put_strftime (struct string *out, time_t now, const char *format)
5845 const struct tm *tm = localtime (&now);
5847 strftime (value, sizeof value, format, tm);
5848 ds_put_cstr (out, value);
5852 skip_prefix (struct substring *s, struct substring prefix)
5854 if (ss_starts_with (*s, prefix))
5856 ss_advance (s, prefix.length);
5864 put_table_expression (struct string *out, struct lexer *lexer,
5865 struct dictionary *dict, int expr_start, int expr_end)
5868 for (int ofs = expr_start; ofs < expr_end; ofs++)
5870 const struct token *t = lex_ofs_token (lexer, ofs);
5871 if (t->type == T_LBRACK)
5873 else if (t->type == T_RBRACK && nest > 0)
5879 else if (t->type == T_ID)
5881 const struct variable *var
5882 = dict_lookup_var (dict, t->string.string);
5883 const char *label = var ? var_get_label (var) : NULL;
5884 ds_put_cstr (out, label ? label : t->string.string);
5888 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5889 ds_put_byte (out, ' ');
5891 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5892 ds_put_cstr (out, repr);
5895 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5896 ds_put_byte (out, ' ');
5902 put_title_text (struct string *out, struct substring in, time_t now,
5903 struct lexer *lexer, struct dictionary *dict,
5904 int expr_start, int expr_end)
5908 size_t chunk = ss_find_byte (in, ')');
5909 ds_put_substring (out, ss_head (in, chunk));
5910 ss_advance (&in, chunk);
5911 if (ss_is_empty (in))
5914 if (skip_prefix (&in, ss_cstr (")DATE")))
5915 put_strftime (out, now, "%x");
5916 else if (skip_prefix (&in, ss_cstr (")TIME")))
5917 put_strftime (out, now, "%X");
5918 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5919 put_table_expression (out, lexer, dict, expr_start, expr_end);
5922 ds_put_byte (out, ')');
5923 ss_advance (&in, 1);
5929 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5931 struct casereader *input = NULL;
5933 struct measure_guesser *mg = measure_guesser_create (ds);
5936 input = proc_open (ds);
5937 measure_guesser_run (mg, input);
5938 measure_guesser_destroy (mg);
5941 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5942 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5943 enum settings_value_show tvars = settings_get_show_variables ();
5944 for (size_t i = 0; i < n_vars; i++)
5945 vlabels[i] = (enum ctables_vlabel) tvars;
5947 struct pivot_table_look *look = pivot_table_look_unshare (
5948 pivot_table_look_ref (pivot_table_look_get_default ()));
5949 look->omit_empty = false;
5951 struct ctables *ct = xmalloc (sizeof *ct);
5952 *ct = (struct ctables) {
5953 .dict = dataset_dict (ds),
5955 .ctables_formats = FMT_SETTINGS_INIT,
5957 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5960 time_t now = time (NULL);
5965 const char *dot_string;
5966 const char *comma_string;
5968 static const struct ctf ctfs[4] = {
5969 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5970 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5971 { CTEF_PAREN, "-,(,),", "-.(.)." },
5972 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5974 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5975 for (size_t i = 0; i < 4; i++)
5977 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5978 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5979 fmt_number_style_from_string (s));
5982 if (!lex_force_match (lexer, T_SLASH))
5985 while (!lex_match_id (lexer, "TABLE"))
5987 if (lex_match_id (lexer, "FORMAT"))
5989 double widths[2] = { SYSMIS, SYSMIS };
5990 double units_per_inch = 72.0;
5992 while (lex_token (lexer) != T_SLASH)
5994 if (lex_match_id (lexer, "MINCOLWIDTH"))
5996 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5999 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6001 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6004 else if (lex_match_id (lexer, "UNITS"))
6006 lex_match (lexer, T_EQUALS);
6007 if (lex_match_id (lexer, "POINTS"))
6008 units_per_inch = 72.0;
6009 else if (lex_match_id (lexer, "INCHES"))
6010 units_per_inch = 1.0;
6011 else if (lex_match_id (lexer, "CM"))
6012 units_per_inch = 2.54;
6015 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6019 else if (lex_match_id (lexer, "EMPTY"))
6024 lex_match (lexer, T_EQUALS);
6025 if (lex_match_id (lexer, "ZERO"))
6027 /* Nothing to do. */
6029 else if (lex_match_id (lexer, "BLANK"))
6030 ct->zero = xstrdup ("");
6031 else if (lex_force_string (lexer))
6033 ct->zero = ss_xstrdup (lex_tokss (lexer));
6039 else if (lex_match_id (lexer, "MISSING"))
6041 lex_match (lexer, T_EQUALS);
6042 if (!lex_force_string (lexer))
6046 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6047 ? ss_xstrdup (lex_tokss (lexer))
6053 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6054 "UNITS", "EMPTY", "MISSING");
6059 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6060 && widths[0] > widths[1])
6062 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6066 for (size_t i = 0; i < 2; i++)
6067 if (widths[i] != SYSMIS)
6069 int *wr = ct->look->width_ranges[TABLE_HORZ];
6070 wr[i] = widths[i] / units_per_inch * 96.0;
6075 else if (lex_match_id (lexer, "VLABELS"))
6077 if (!lex_force_match_id (lexer, "VARIABLES"))
6079 lex_match (lexer, T_EQUALS);
6081 struct variable **vars;
6083 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6087 if (!lex_force_match_id (lexer, "DISPLAY"))
6092 lex_match (lexer, T_EQUALS);
6094 enum ctables_vlabel vlabel;
6095 if (lex_match_id (lexer, "DEFAULT"))
6096 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6097 else if (lex_match_id (lexer, "NAME"))
6099 else if (lex_match_id (lexer, "LABEL"))
6100 vlabel = CTVL_LABEL;
6101 else if (lex_match_id (lexer, "BOTH"))
6103 else if (lex_match_id (lexer, "NONE"))
6107 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6113 for (size_t i = 0; i < n_vars; i++)
6114 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6117 else if (lex_match_id (lexer, "MRSETS"))
6119 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6121 lex_match (lexer, T_EQUALS);
6122 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6125 else if (lex_match_id (lexer, "SMISSING"))
6127 if (lex_match_id (lexer, "VARIABLE"))
6128 ct->smissing_listwise = false;
6129 else if (lex_match_id (lexer, "LISTWISE"))
6130 ct->smissing_listwise = true;
6133 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6137 else if (lex_match_id (lexer, "PCOMPUTE"))
6139 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6142 else if (lex_match_id (lexer, "PPROPERTIES"))
6144 if (!ctables_parse_pproperties (lexer, ct))
6147 else if (lex_match_id (lexer, "WEIGHT"))
6149 if (!lex_force_match_id (lexer, "VARIABLE"))
6151 lex_match (lexer, T_EQUALS);
6152 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6156 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6158 if (lex_match_id (lexer, "COUNT"))
6160 lex_match (lexer, T_EQUALS);
6161 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6164 ct->hide_threshold = lex_integer (lexer);
6167 else if (ct->hide_threshold == 0)
6168 ct->hide_threshold = 5;
6172 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6173 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6174 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6178 if (!lex_force_match (lexer, T_SLASH))
6182 size_t allocated_tables = 0;
6185 if (ct->n_tables >= allocated_tables)
6186 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6187 sizeof *ct->tables);
6189 struct ctables_category *cat = xmalloc (sizeof *cat);
6190 *cat = (struct ctables_category) {
6192 .include_missing = false,
6193 .sort_ascending = true,
6196 struct ctables_categories *c = xmalloc (sizeof *c);
6197 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6198 *c = (struct ctables_categories) {
6205 struct ctables_categories **categories = xnmalloc (n_vars,
6206 sizeof *categories);
6207 for (size_t i = 0; i < n_vars; i++)
6210 struct ctables_table *t = xmalloc (sizeof *t);
6211 *t = (struct ctables_table) {
6213 .slabels_axis = PIVOT_AXIS_COLUMN,
6214 .slabels_visible = true,
6215 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6217 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6218 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6219 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6221 .clabels_from_axis = PIVOT_AXIS_LAYER,
6222 .clabels_to_axis = PIVOT_AXIS_LAYER,
6223 .categories = categories,
6224 .n_categories = n_vars,
6227 ct->tables[ct->n_tables++] = t;
6229 lex_match (lexer, T_EQUALS);
6230 int expr_start = lex_ofs (lexer);
6231 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6232 &t->axes[PIVOT_AXIS_ROW]))
6234 if (lex_match (lexer, T_BY))
6236 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6237 &t->axes[PIVOT_AXIS_COLUMN]))
6240 if (lex_match (lexer, T_BY))
6242 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6243 &t->axes[PIVOT_AXIS_LAYER]))
6247 int expr_end = lex_ofs (lexer);
6249 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6250 && !t->axes[PIVOT_AXIS_LAYER])
6252 lex_error (lexer, _("At least one variable must be specified."));
6256 const struct ctables_axis *scales[PIVOT_N_AXES];
6257 size_t n_scales = 0;
6258 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6260 scales[a] = find_scale (t->axes[a]);
6266 msg (SE, _("Scale variables may appear only on one axis."));
6267 if (scales[PIVOT_AXIS_ROW])
6268 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6269 _("This scale variable appears on the rows axis."));
6270 if (scales[PIVOT_AXIS_COLUMN])
6271 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6272 _("This scale variable appears on the columns axis."));
6273 if (scales[PIVOT_AXIS_LAYER])
6274 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6275 _("This scale variable appears on the layer axis."));
6279 const struct ctables_axis *summaries[PIVOT_N_AXES];
6280 size_t n_summaries = 0;
6281 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6283 summaries[a] = (scales[a]
6285 : find_categorical_summary_spec (t->axes[a]));
6289 if (n_summaries > 1)
6291 msg (SE, _("Summaries may appear only on one axis."));
6292 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6295 msg_at (SN, summaries[a]->loc,
6297 ? _("This variable on the rows axis has a summary.")
6298 : a == PIVOT_AXIS_COLUMN
6299 ? _("This variable on the columns axis has a summary.")
6300 : _("This variable on the layers axis has a summary."));
6302 msg_at (SN, summaries[a]->loc,
6303 _("This is a scale variable, so it always has a "
6304 "summary even if the syntax does not explicitly "
6309 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6310 if (n_summaries ? summaries[a] : t->axes[a])
6312 t->summary_axis = a;
6316 if (lex_token (lexer) == T_ENDCMD)
6318 if (!ctables_prepare_table (t))
6322 if (!lex_force_match (lexer, T_SLASH))
6325 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6327 if (lex_match_id (lexer, "SLABELS"))
6329 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6331 if (lex_match_id (lexer, "POSITION"))
6333 lex_match (lexer, T_EQUALS);
6334 if (lex_match_id (lexer, "COLUMN"))
6335 t->slabels_axis = PIVOT_AXIS_COLUMN;
6336 else if (lex_match_id (lexer, "ROW"))
6337 t->slabels_axis = PIVOT_AXIS_ROW;
6338 else if (lex_match_id (lexer, "LAYER"))
6339 t->slabels_axis = PIVOT_AXIS_LAYER;
6342 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6346 else if (lex_match_id (lexer, "VISIBLE"))
6348 lex_match (lexer, T_EQUALS);
6349 if (!parse_bool (lexer, &t->slabels_visible))
6354 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6359 else if (lex_match_id (lexer, "CLABELS"))
6361 if (lex_match_id (lexer, "AUTO"))
6363 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6364 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6366 else if (lex_match_id (lexer, "ROWLABELS"))
6368 lex_match (lexer, T_EQUALS);
6369 if (lex_match_id (lexer, "OPPOSITE"))
6370 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6371 else if (lex_match_id (lexer, "LAYER"))
6372 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6375 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6379 else if (lex_match_id (lexer, "COLLABELS"))
6381 lex_match (lexer, T_EQUALS);
6382 if (lex_match_id (lexer, "OPPOSITE"))
6383 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6384 else if (lex_match_id (lexer, "LAYER"))
6385 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6388 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6394 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6399 else if (lex_match_id (lexer, "CRITERIA"))
6401 if (!lex_force_match_id (lexer, "CILEVEL"))
6403 lex_match (lexer, T_EQUALS);
6405 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6407 t->cilevel = lex_number (lexer);
6410 else if (lex_match_id (lexer, "CATEGORIES"))
6412 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6416 else if (lex_match_id (lexer, "TITLES"))
6421 if (lex_match_id (lexer, "CAPTION"))
6422 textp = &t->caption;
6423 else if (lex_match_id (lexer, "CORNER"))
6425 else if (lex_match_id (lexer, "TITLE"))
6429 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6432 lex_match (lexer, T_EQUALS);
6434 struct string s = DS_EMPTY_INITIALIZER;
6435 while (lex_is_string (lexer))
6437 if (!ds_is_empty (&s))
6438 ds_put_byte (&s, ' ');
6439 put_title_text (&s, lex_tokss (lexer), now,
6440 lexer, dataset_dict (ds),
6441 expr_start, expr_end);
6445 *textp = ds_steal_cstr (&s);
6447 while (lex_token (lexer) != T_SLASH
6448 && lex_token (lexer) != T_ENDCMD);
6450 else if (lex_match_id (lexer, "SIGTEST"))
6452 int start_ofs = lex_ofs (lexer) - 1;
6455 t->chisq = xmalloc (sizeof *t->chisq);
6456 *t->chisq = (struct ctables_chisq) {
6458 .include_mrsets = true,
6459 .all_visible = true,
6465 if (lex_match_id (lexer, "TYPE"))
6467 lex_match (lexer, T_EQUALS);
6468 if (!lex_force_match_id (lexer, "CHISQUARE"))
6471 else if (lex_match_id (lexer, "ALPHA"))
6473 lex_match (lexer, T_EQUALS);
6474 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6476 t->chisq->alpha = lex_number (lexer);
6479 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6481 lex_match (lexer, T_EQUALS);
6482 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6485 else if (lex_match_id (lexer, "CATEGORIES"))
6487 lex_match (lexer, T_EQUALS);
6488 if (lex_match_id (lexer, "ALLVISIBLE"))
6489 t->chisq->all_visible = true;
6490 else if (lex_match_id (lexer, "SUBTOTALS"))
6491 t->chisq->all_visible = false;
6494 lex_error_expecting (lexer,
6495 "ALLVISIBLE", "SUBTOTALS");
6501 lex_error_expecting (lexer, "TYPE", "ALPHA",
6502 "INCLUDEMRSETS", "CATEGORIES");
6506 while (lex_token (lexer) != T_SLASH
6507 && lex_token (lexer) != T_ENDCMD);
6509 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6510 _("Support for SIGTEST not yet implemented."));
6513 else if (lex_match_id (lexer, "COMPARETEST"))
6515 int start_ofs = lex_ofs (lexer);
6518 t->pairwise = xmalloc (sizeof *t->pairwise);
6519 *t->pairwise = (struct ctables_pairwise) {
6521 .alpha = { .05, .05 },
6522 .adjust = BONFERRONI,
6523 .include_mrsets = true,
6524 .meansvariance_allcats = true,
6525 .all_visible = true,
6534 if (lex_match_id (lexer, "TYPE"))
6536 lex_match (lexer, T_EQUALS);
6537 if (lex_match_id (lexer, "PROP"))
6538 t->pairwise->type = PROP;
6539 else if (lex_match_id (lexer, "MEAN"))
6540 t->pairwise->type = MEAN;
6543 lex_error_expecting (lexer, "PROP", "MEAN");
6547 else if (lex_match_id (lexer, "ALPHA"))
6549 lex_match (lexer, T_EQUALS);
6551 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6553 double a0 = lex_number (lexer);
6556 lex_match (lexer, T_COMMA);
6557 if (lex_is_number (lexer))
6559 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6561 double a1 = lex_number (lexer);
6564 t->pairwise->alpha[0] = MIN (a0, a1);
6565 t->pairwise->alpha[1] = MAX (a0, a1);
6568 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6570 else if (lex_match_id (lexer, "ADJUST"))
6572 lex_match (lexer, T_EQUALS);
6573 if (lex_match_id (lexer, "BONFERRONI"))
6574 t->pairwise->adjust = BONFERRONI;
6575 else if (lex_match_id (lexer, "BH"))
6576 t->pairwise->adjust = BH;
6577 else if (lex_match_id (lexer, "NONE"))
6578 t->pairwise->adjust = 0;
6581 lex_error_expecting (lexer, "BONFERRONI", "BH",
6586 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6588 lex_match (lexer, T_EQUALS);
6589 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6592 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6594 lex_match (lexer, T_EQUALS);
6595 if (lex_match_id (lexer, "ALLCATS"))
6596 t->pairwise->meansvariance_allcats = true;
6597 else if (lex_match_id (lexer, "TESTEDCATS"))
6598 t->pairwise->meansvariance_allcats = false;
6601 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6605 else if (lex_match_id (lexer, "CATEGORIES"))
6607 lex_match (lexer, T_EQUALS);
6608 if (lex_match_id (lexer, "ALLVISIBLE"))
6609 t->pairwise->all_visible = true;
6610 else if (lex_match_id (lexer, "SUBTOTALS"))
6611 t->pairwise->all_visible = false;
6614 lex_error_expecting (lexer, "ALLVISIBLE",
6619 else if (lex_match_id (lexer, "MERGE"))
6621 lex_match (lexer, T_EQUALS);
6622 if (!parse_bool (lexer, &t->pairwise->merge))
6625 else if (lex_match_id (lexer, "STYLE"))
6627 lex_match (lexer, T_EQUALS);
6628 if (lex_match_id (lexer, "APA"))
6629 t->pairwise->apa_style = true;
6630 else if (lex_match_id (lexer, "SIMPLE"))
6631 t->pairwise->apa_style = false;
6634 lex_error_expecting (lexer, "APA", "SIMPLE");
6638 else if (lex_match_id (lexer, "SHOWSIG"))
6640 lex_match (lexer, T_EQUALS);
6641 if (!parse_bool (lexer, &t->pairwise->show_sig))
6646 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6647 "INCLUDEMRSETS", "MEANSVARIANCE",
6648 "CATEGORIES", "MERGE", "STYLE",
6653 while (lex_token (lexer) != T_SLASH
6654 && lex_token (lexer) != T_ENDCMD);
6656 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6657 _("Support for COMPARETEST not yet implemented."));
6662 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6663 "CRITERIA", "CATEGORIES", "TITLES",
6664 "SIGTEST", "COMPARETEST");
6668 if (!lex_match (lexer, T_SLASH))
6672 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6674 t->clabels_from_axis = PIVOT_AXIS_ROW;
6675 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6677 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6681 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6682 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6683 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6685 if (!ctables_prepare_table (t))
6688 while (lex_token (lexer) != T_ENDCMD);
6691 input = proc_open (ds);
6692 bool ok = ctables_execute (ds, input, ct);
6693 ok = proc_commit (ds) && ok;
6695 ctables_destroy (ct);
6696 return ok ? CMD_SUCCESS : CMD_FAILURE;
6701 ctables_destroy (ct);