1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 /* The three forms of weighting supported by CTABLES. */
61 enum ctables_weighting
63 CTW_EFFECTIVE, /* Effective base weight (WEIGHT subcommand). */
64 CTW_DICTIONARY, /* Dictionary weight. */
65 CTW_UNWEIGHTED /* No weight. */
69 /* CTABLES table areas. */
71 enum ctables_area_type
73 /* Within a section, where stacked variables divide one section from
76 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
77 parse_ctables_summary_function() parses correctly. */
78 CTAT_TABLE, /* All layers of a whole section. */
79 CTAT_LAYERROW, /* Row in one layer within a section. */
80 CTAT_LAYERCOL, /* Column in one layer within a section. */
81 CTAT_LAYER, /* One layer within a section. */
83 /* Within a subtable, where a subtable pairs an innermost row variable with
84 an innermost column variable within a single layer. */
85 CTAT_SUBTABLE, /* Whole subtable. */
86 CTAT_ROW, /* Row within a subtable. */
87 CTAT_COL, /* Column within a subtable. */
91 static const char *ctables_area_type_name[N_CTATS] = {
92 [CTAT_TABLE] = "TABLE",
93 [CTAT_LAYER] = "LAYER",
94 [CTAT_LAYERROW] = "LAYERROW",
95 [CTAT_LAYERCOL] = "LAYERCOL",
96 [CTAT_SUBTABLE] = "SUBTABLE",
101 /* Summary statistics for an area. */
104 struct hmap_node node;
105 const struct ctables_cell *example;
107 /* Sequence number used for CTSF_ID. */
110 /* Weights for CTSF_areaPCT_COUNT, CTSF_areaPCT_VALIDN, and
111 CTSF_areaPCT_TOTALN. */
112 double count[N_CTWS];
113 double valid[N_CTWS];
114 double total[N_CTWS];
116 /* Sums for CTSF_areaPCT_SUM. */
117 struct ctables_sum *sums;
125 /* CTABLES summary functions. */
127 enum ctables_function_type
129 /* A function that operates on data in a single cell. It operates on
130 effective weights. It does not have an unweighted version. */
133 /* A function that operates on data in a single cell. The function
134 operates on effective weights and has a U-prefixed unweighted
138 /* A function that operates on data in a single cell. It operates on
139 dictionary weights, and has U-prefixed unweighted version and an
140 E-prefixed effective weight version. */
143 /* A function that operates on an area of cells. It operates on effective
144 weights and has a U-prefixed unweighted version. */
150 CTF_COUNT, /* F40.0. */
151 CTF_PERCENT, /* PCT40.1. */
152 CTF_GENERAL /* Variable's print format. */
155 enum ctables_function_availability
157 CTFA_ALL, /* Any variables. */
158 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
159 //CTFA_MRSETS, /* Only multiple-response sets */
162 enum ctables_summary_function
164 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
165 #include "ctables.inc"
170 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
172 #include "ctables.inc"
176 struct ctables_function_info
178 struct substring basename;
179 enum ctables_function_type type;
180 enum ctables_format format;
181 enum ctables_function_availability availability;
183 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
184 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
185 bool is_area; /* Needs an area prefix. */
187 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
188 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
190 .basename = SS_LITERAL_INITIALIZER (NAME), \
193 .availability = AVAILABILITY, \
194 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
195 .e_prefix = (TYPE) == CTFT_UECELL, \
196 .is_area = (TYPE) == CTFT_AREA \
198 #include "ctables.inc"
202 static struct fmt_spec
203 ctables_summary_default_format (enum ctables_summary_function function,
204 const struct variable *var)
206 static const enum ctables_format default_formats[] = {
207 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
208 #include "ctables.inc"
211 switch (default_formats[function])
214 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
217 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
220 return *var_get_print_format (var);
227 static enum ctables_function_availability
228 ctables_function_availability (enum ctables_summary_function f)
230 static enum ctables_function_availability availability[] = {
231 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
232 #include "ctables.inc"
236 return availability[f];
240 parse_ctables_summary_function (struct lexer *lexer,
241 enum ctables_summary_function *function,
242 enum ctables_weighting *weighting,
243 enum ctables_area_type *area)
245 if (!lex_force_id (lexer))
248 struct substring name = lex_tokss (lexer);
249 if (ss_ends_with_case (name, ss_cstr (".LCL"))
250 || ss_ends_with_case (name, ss_cstr (".UCL"))
251 || ss_ends_with_case (name, ss_cstr (".SE")))
253 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
254 "is not yet implemented."));
258 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
259 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
261 bool has_area = false;
263 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
264 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
269 if (ss_equals_case (name, ss_cstr ("PCT")))
271 /* Special case where .COUNT suffix is omitted. */
272 *function = CTSF_areaPCT_COUNT;
273 *weighting = CTW_EFFECTIVE;
280 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
282 const struct ctables_function_info *cfi = &ctables_function_info[f];
283 if (ss_equals_case (cfi->basename, name))
286 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
289 *weighting = (e ? CTW_EFFECTIVE
291 : cfi->e_prefix ? CTW_DICTIONARY
298 lex_error (lexer, _("Syntax error expecting summary function name."));
303 ctables_summary_function_name (enum ctables_summary_function function,
304 enum ctables_weighting weighting,
305 enum ctables_area_type area,
306 char *buffer, size_t bufsize)
308 const struct ctables_function_info *cfi = &ctables_function_info[function];
309 snprintf (buffer, bufsize, "%s%s%s",
310 (weighting == CTW_UNWEIGHTED ? "U"
311 : weighting == CTW_DICTIONARY ? ""
312 : cfi->e_prefix ? "E"
314 cfi->is_area ? ctables_area_type_name[area] : "",
315 cfi->basename.string);
320 ctables_summary_function_label__ (enum ctables_summary_function function,
321 enum ctables_weighting weighting,
322 enum ctables_area_type area)
324 bool w = weighting != CTW_UNWEIGHTED;
325 bool d = weighting == CTW_DICTIONARY;
326 enum ctables_area_type a = area;
330 return d ? N_("Count") : w ? N_("Adjusted Count") : N_("Unweighted Count");
332 case CTSF_areaPCT_COUNT:
335 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
336 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
337 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
338 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
339 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
340 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
341 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
345 case CTSF_areaPCT_VALIDN:
348 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
349 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
350 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
351 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
352 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
353 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
354 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
358 case CTSF_areaPCT_TOTALN:
361 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
362 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
363 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
364 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
365 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
366 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
367 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
371 case CTSF_MAXIMUM: return N_("Maximum");
372 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
373 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
374 case CTSF_MINIMUM: return N_("Minimum");
375 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
376 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
377 case CTSF_PTILE: NOT_REACHED ();
378 case CTSF_RANGE: return N_("Range");
379 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
380 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
381 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
382 case CTSF_TOTALN: return (d ? N_("Total N")
383 : w ? N_("Adjusted Total N")
384 : N_("Unweighted Total N"));
385 case CTSF_VALIDN: return (d ? N_("Valid N")
386 : w ? N_("Adjusted Valid N")
387 : N_("Unweighted Valid N"));
388 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
389 case CTSF_areaPCT_SUM:
392 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
393 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
394 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
395 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
396 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
397 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
398 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
405 /* Don't bother translating these: they are for developers only. */
406 case CTAT_TABLE: return "Table ID";
407 case CTAT_LAYER: return "Layer ID";
408 case CTAT_LAYERROW: return "Layer Row ID";
409 case CTAT_LAYERCOL: return "Layer Column ID";
410 case CTAT_SUBTABLE: return "Subtable ID";
411 case CTAT_ROW: return "Row ID";
412 case CTAT_COL: return "Column ID";
420 static struct pivot_value *
421 ctables_summary_function_label (enum ctables_summary_function function,
422 enum ctables_weighting weighting,
423 enum ctables_area_type area,
426 if (function == CTSF_PTILE)
428 char *s = (weighting != CTW_UNWEIGHTED
429 ? xasprintf (_("Percentile %.2f"), percentile)
430 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
431 return pivot_value_new_user_text_nocopy (s);
434 return pivot_value_new_text (ctables_summary_function_label__ (
435 function, weighting, area));
438 /* CTABLES summaries. */
440 struct ctables_summary_spec
442 /* The calculation to be performed.
444 'function' is the function to calculate. 'weighted' specifies whether
445 to use weighted or unweighted data (for functions that do not support a
446 choice, it must be true). 'calc_area' is the area over which the
447 calculation takes place (for functions that target only an individual
448 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
449 percentile between 0 and 100 (for other functions it must be 0). */
450 enum ctables_summary_function function;
451 enum ctables_weighting weighting;
452 enum ctables_area_type calc_area;
453 double percentile; /* CTSF_PTILE only. */
455 /* How to display the result of the calculation.
457 'label' is a user-specified label, NULL if the user didn't specify
460 'user_area' is usually the same as 'calc_area', but when category labels
461 are rotated from one axis to another it swaps rows and columns.
463 'format' is the format for displaying the output. If
464 'is_ctables_format' is true, then 'format.type' is one of the special
465 CTEF_* formats instead of the standard ones. */
467 enum ctables_area_type user_area;
468 struct fmt_spec format;
469 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
471 size_t axis_idx; /* Leaf index if summary dimension in use. */
472 size_t sum_var_idx; /* Offset into 'sums' in ctables_area. */
476 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
477 const struct ctables_summary_spec *src)
480 dst->label = xstrdup_if_nonnull (src->label);
484 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
490 /* Collections of summary functions. */
492 struct ctables_summary_spec_set
494 struct ctables_summary_spec *specs;
498 /* The variable to which the summary specs are applied. */
499 struct variable *var;
501 /* Whether the variable to which the summary specs are applied is a scale
502 variable for the purpose of summarization.
504 (VALIDN and TOTALN act differently for summarizing scale and categorical
508 /* If any of these optional additional scale variables are missing, then
509 treat 'var' as if it's missing too. This is for implementing
510 SMISSING=LISTWISE. */
511 struct variable **listwise_vars;
512 size_t n_listwise_vars;
516 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
517 const struct ctables_summary_spec_set *src)
519 struct ctables_summary_spec *specs
520 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
521 for (size_t i = 0; i < src->n; i++)
522 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
524 *dst = (struct ctables_summary_spec_set) {
529 .is_scale = src->is_scale,
534 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
536 for (size_t i = 0; i < set->n; i++)
537 ctables_summary_spec_uninit (&set->specs[i]);
538 free (set->listwise_vars);
543 is_listwise_missing (const struct ctables_summary_spec_set *specs,
544 const struct ccase *c)
546 for (size_t i = 0; i < specs->n_listwise_vars; i++)
548 const struct variable *var = specs->listwise_vars[i];
549 if (var_is_num_missing (var, case_num (c, var)))
556 /* CTABLES postcompute expressions. */
558 struct ctables_postcompute
560 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
561 char *name; /* Name, without leading &. */
563 struct msg_location *location; /* Location of definition. */
564 struct ctables_pcexpr *expr;
566 struct ctables_summary_spec_set *specs;
567 bool hide_source_cats;
570 struct ctables_pcexpr
580 enum ctables_pcexpr_op
583 CTPO_CONSTANT, /* 5 */
584 CTPO_CAT_NUMBER, /* [5] */
585 CTPO_CAT_STRING, /* ["STRING"] */
586 CTPO_CAT_NRANGE, /* [LO THRU 5] */
587 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
588 CTPO_CAT_MISSING, /* MISSING */
589 CTPO_CAT_OTHERNM, /* OTHERNM */
590 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
591 CTPO_CAT_TOTAL, /* TOTAL */
605 /* CTPO_CAT_NUMBER. */
608 /* CTPO_CAT_STRING, in dictionary encoding. */
609 struct substring string;
611 /* CTPO_CAT_NRANGE. */
614 /* CTPO_CAT_SRANGE. */
615 struct substring srange[2];
617 /* CTPO_CAT_SUBTOTAL. */
618 size_t subtotal_index;
620 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
621 One element: CTPO_NEG. */
622 struct ctables_pcexpr *subs[2];
625 /* Source location. */
626 struct msg_location *location;
629 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
632 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
633 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
634 struct ctables_pcexpr *sub1);
636 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
637 struct dictionary *);
640 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
646 case CTPO_CAT_STRING:
647 ss_dealloc (&e->string);
650 case CTPO_CAT_SRANGE:
651 for (size_t i = 0; i < 2; i++)
652 ss_dealloc (&e->srange[i]);
661 for (size_t i = 0; i < 2; i++)
662 ctables_pcexpr_destroy (e->subs[i]);
666 case CTPO_CAT_NUMBER:
667 case CTPO_CAT_NRANGE:
668 case CTPO_CAT_MISSING:
669 case CTPO_CAT_OTHERNM:
670 case CTPO_CAT_SUBTOTAL:
675 msg_location_destroy (e->location);
680 static struct ctables_pcexpr *
681 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
682 struct ctables_pcexpr *sub0,
683 struct ctables_pcexpr *sub1)
685 struct ctables_pcexpr *e = xmalloc (sizeof *e);
686 *e = (struct ctables_pcexpr) {
688 .subs = { sub0, sub1 },
689 .location = msg_location_merged (sub0->location, sub1->location),
694 /* How to parse an operator. */
697 enum token_type token;
698 enum ctables_pcexpr_op op;
701 static const struct operator *
702 ctables_pcexpr_match_operator (struct lexer *lexer,
703 const struct operator ops[], size_t n_ops)
705 for (const struct operator *op = ops; op < ops + n_ops; op++)
706 if (lex_token (lexer) == op->token)
708 if (op->token != T_NEG_NUM)
717 static struct ctables_pcexpr *
718 ctables_pcexpr_parse_binary_operators__ (
719 struct lexer *lexer, struct dictionary *dict,
720 const struct operator ops[], size_t n_ops,
721 parse_recursively_func *parse_next_level,
722 const char *chain_warning, struct ctables_pcexpr *lhs)
724 for (int op_count = 0; ; op_count++)
726 const struct operator *op
727 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
730 if (op_count > 1 && chain_warning)
731 msg_at (SW, lhs->location, "%s", chain_warning);
736 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
739 ctables_pcexpr_destroy (lhs);
743 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
747 static struct ctables_pcexpr *
748 ctables_pcexpr_parse_binary_operators (
749 struct lexer *lexer, struct dictionary *dict,
750 const struct operator ops[], size_t n_ops,
751 parse_recursively_func *parse_next_level, const char *chain_warning)
753 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
757 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
762 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
763 struct dictionary *);
765 static struct ctables_pcexpr
766 ctpo_cat_nrange (double low, double high)
768 return (struct ctables_pcexpr) {
769 .op = CTPO_CAT_NRANGE,
770 .nrange = { low, high },
774 static struct ctables_pcexpr
775 ctpo_cat_srange (struct substring low, struct substring high)
777 return (struct ctables_pcexpr) {
778 .op = CTPO_CAT_SRANGE,
779 .srange = { low, high },
783 static struct substring
784 parse_substring (struct lexer *lexer, struct dictionary *dict)
786 struct substring s = recode_substring_pool (
787 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
788 ss_rtrim (&s, ss_cstr (" "));
793 static struct ctables_pcexpr *
794 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
796 int start_ofs = lex_ofs (lexer);
797 struct ctables_pcexpr e;
798 if (lex_is_number (lexer))
800 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
801 .number = lex_number (lexer) };
804 else if (lex_match_id (lexer, "MISSING"))
805 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
806 else if (lex_match_id (lexer, "OTHERNM"))
807 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
808 else if (lex_match_id (lexer, "TOTAL"))
809 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
810 else if (lex_match_id (lexer, "SUBTOTAL"))
812 size_t subtotal_index = 0;
813 if (lex_match (lexer, T_LBRACK))
815 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
817 subtotal_index = lex_integer (lexer);
819 if (!lex_force_match (lexer, T_RBRACK))
822 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
823 .subtotal_index = subtotal_index };
825 else if (lex_match (lexer, T_LBRACK))
827 if (lex_match_id (lexer, "LO"))
829 if (!lex_force_match_id (lexer, "THRU"))
832 if (lex_is_string (lexer))
834 struct substring low = { .string = NULL };
835 struct substring high = parse_substring (lexer, dict);
836 e = ctpo_cat_srange (low, high);
840 if (!lex_force_num (lexer))
842 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
846 else if (lex_is_number (lexer))
848 double number = lex_number (lexer);
850 if (lex_match_id (lexer, "THRU"))
852 if (lex_match_id (lexer, "HI"))
853 e = ctpo_cat_nrange (number, DBL_MAX);
856 if (!lex_force_num (lexer))
858 e = ctpo_cat_nrange (number, lex_number (lexer));
863 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
866 else if (lex_is_string (lexer))
868 struct substring s = parse_substring (lexer, dict);
870 if (lex_match_id (lexer, "THRU"))
872 struct substring high;
874 if (lex_match_id (lexer, "HI"))
875 high = (struct substring) { .string = NULL };
878 if (!lex_force_string (lexer))
883 high = parse_substring (lexer, dict);
886 e = ctpo_cat_srange (s, high);
889 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
894 _("Syntax error expecting number or string or range."));
898 if (!lex_force_match (lexer, T_RBRACK))
900 if (e.op == CTPO_CAT_STRING)
901 ss_dealloc (&e.string);
902 else if (e.op == CTPO_CAT_SRANGE)
904 ss_dealloc (&e.srange[0]);
905 ss_dealloc (&e.srange[1]);
910 else if (lex_match (lexer, T_LPAREN))
912 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
915 if (!lex_force_match (lexer, T_RPAREN))
917 ctables_pcexpr_destroy (ep);
924 lex_error (lexer, _("Syntax error in postcompute expression."));
928 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
929 return xmemdup (&e, sizeof e);
932 static struct ctables_pcexpr *
933 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
934 struct lexer *lexer, int start_ofs)
936 struct ctables_pcexpr *e = xmalloc (sizeof *e);
937 *e = (struct ctables_pcexpr) {
940 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
945 static struct ctables_pcexpr *
946 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
948 static const struct operator op = { T_EXP, CTPO_POW };
950 const char *chain_warning =
951 _("The exponentiation operator (`**') is left-associative: "
952 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
953 "To disable this warning, insert parentheses.");
955 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
956 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
957 ctables_pcexpr_parse_primary,
960 /* Special case for situations like "-5**6", which must be parsed as
963 int start_ofs = lex_ofs (lexer);
964 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
965 *lhs = (struct ctables_pcexpr) {
967 .number = -lex_tokval (lexer),
968 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
972 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
974 ctables_pcexpr_parse_primary, chain_warning, lhs);
978 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
981 /* Parses the unary minus level. */
982 static struct ctables_pcexpr *
983 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
985 int start_ofs = lex_ofs (lexer);
986 if (!lex_match (lexer, T_DASH))
987 return ctables_pcexpr_parse_exp (lexer, dict);
989 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
993 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
996 /* Parses the multiplication and division level. */
997 static struct ctables_pcexpr *
998 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
1000 static const struct operator ops[] =
1002 { T_ASTERISK, CTPO_MUL },
1003 { T_SLASH, CTPO_DIV },
1006 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
1007 sizeof ops / sizeof *ops,
1008 ctables_pcexpr_parse_neg, NULL);
1011 /* Parses the addition and subtraction level. */
1012 static struct ctables_pcexpr *
1013 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
1015 static const struct operator ops[] =
1017 { T_PLUS, CTPO_ADD },
1018 { T_DASH, CTPO_SUB },
1019 { T_NEG_NUM, CTPO_ADD },
1022 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1023 ops, sizeof ops / sizeof *ops,
1024 ctables_pcexpr_parse_mul, NULL);
1027 /* CTABLES axis expressions. */
1029 /* CTABLES has a number of extra formats that we implement via custom
1030 currency specifications on an alternate fmt_settings. */
1031 #define CTEF_NEGPAREN FMT_CCA
1032 #define CTEF_NEQUAL FMT_CCB
1033 #define CTEF_PAREN FMT_CCC
1034 #define CTEF_PCTPAREN FMT_CCD
1036 enum ctables_summary_variant
1045 enum ctables_axis_op
1061 struct variable *var;
1063 struct ctables_summary_spec_set specs[N_CSVS];
1067 struct ctables_axis *subs[2];
1070 struct msg_location *loc;
1074 ctables_axis_destroy (struct ctables_axis *axis)
1082 for (size_t i = 0; i < N_CSVS; i++)
1083 ctables_summary_spec_set_uninit (&axis->specs[i]);
1088 ctables_axis_destroy (axis->subs[0]);
1089 ctables_axis_destroy (axis->subs[1]);
1092 msg_location_destroy (axis->loc);
1096 static struct ctables_axis *
1097 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1098 struct ctables_axis *sub0,
1099 struct ctables_axis *sub1,
1100 struct lexer *lexer, int start_ofs)
1102 struct ctables_axis *axis = xmalloc (sizeof *axis);
1103 *axis = (struct ctables_axis) {
1105 .subs = { sub0, sub1 },
1106 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1111 struct ctables_axis_parse_ctx
1113 struct lexer *lexer;
1114 struct dictionary *dict;
1117 static struct pivot_value *
1118 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1121 return ctables_summary_function_label (spec->function, spec->weighting,
1122 spec->user_area, spec->percentile);
1125 struct substring in = ss_cstr (spec->label);
1126 struct substring target = ss_cstr (")CILEVEL");
1128 struct string out = DS_EMPTY_INITIALIZER;
1131 size_t chunk = ss_find_substring (in, target);
1132 ds_put_substring (&out, ss_head (in, chunk));
1133 ss_advance (&in, chunk);
1135 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1137 ss_advance (&in, target.length);
1138 ds_put_format (&out, "%g", cilevel);
1144 add_summary_spec (struct ctables_axis *axis,
1145 enum ctables_summary_function function,
1146 enum ctables_weighting weighting,
1147 enum ctables_area_type area, double percentile,
1148 const char *label, const struct fmt_spec *format,
1149 bool is_ctables_format, const struct msg_location *loc,
1150 enum ctables_summary_variant sv)
1152 if (axis->op == CTAO_VAR)
1154 char function_name[128];
1155 ctables_summary_function_name (function, weighting, area,
1156 function_name, sizeof function_name);
1157 const char *var_name = var_get_name (axis->var);
1158 switch (ctables_function_availability (function))
1162 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1163 "response sets."), function_name);
1164 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1170 if (!axis->scale && sv != CSV_TOTAL)
1173 _("Summary function %s applies only to scale variables."),
1175 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1185 struct ctables_summary_spec_set *set = &axis->specs[sv];
1186 if (set->n >= set->allocated)
1187 set->specs = x2nrealloc (set->specs, &set->allocated,
1188 sizeof *set->specs);
1190 struct ctables_summary_spec *dst = &set->specs[set->n++];
1191 *dst = (struct ctables_summary_spec) {
1192 .function = function,
1193 .weighting = weighting,
1196 .percentile = percentile,
1197 .label = xstrdup_if_nonnull (label),
1198 .format = (format ? *format
1199 : ctables_summary_default_format (function, axis->var)),
1200 .is_ctables_format = is_ctables_format,
1206 for (size_t i = 0; i < 2; i++)
1207 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1208 percentile, label, format, is_ctables_format,
1215 static struct ctables_axis *ctables_axis_parse_stack (
1216 struct ctables_axis_parse_ctx *);
1218 static struct ctables_axis *
1219 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1221 if (lex_match (ctx->lexer, T_LPAREN))
1223 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1224 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1226 ctables_axis_destroy (sub);
1232 if (!lex_force_id (ctx->lexer))
1235 if (lex_tokcstr (ctx->lexer)[0] == '$')
1237 lex_error (ctx->lexer,
1238 _("Multiple response set support not implemented."));
1242 int start_ofs = lex_ofs (ctx->lexer);
1243 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1247 struct ctables_axis *axis = xmalloc (sizeof *axis);
1248 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1250 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1251 : lex_match_phrase (ctx->lexer, "[C]") ? false
1252 : var_get_measure (var) == MEASURE_SCALE);
1253 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1254 lex_ofs (ctx->lexer) - 1);
1255 if (axis->scale && var_is_alpha (var))
1257 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1259 var_get_name (var));
1260 ctables_axis_destroy (axis);
1268 has_digit (const char *s)
1270 return s[strcspn (s, "0123456789")] != '\0';
1274 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1275 bool *is_ctables_format)
1277 char type[FMT_TYPE_LEN_MAX + 1];
1278 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1281 if (!strcasecmp (type, "NEGPAREN"))
1282 format->type = CTEF_NEGPAREN;
1283 else if (!strcasecmp (type, "NEQUAL"))
1284 format->type = CTEF_NEQUAL;
1285 else if (!strcasecmp (type, "PAREN"))
1286 format->type = CTEF_PAREN;
1287 else if (!strcasecmp (type, "PCTPAREN"))
1288 format->type = CTEF_PCTPAREN;
1291 *is_ctables_format = false;
1292 if (!parse_format_specifier (lexer, format))
1295 char *error = fmt_check_output__ (format);
1297 error = fmt_check_type_compat__ (format, NULL, VAL_NUMERIC);
1300 lex_next_error (lexer, -1, -1, "%s", error);
1311 lex_next_error (lexer, -1, -1,
1312 _("Output format %s requires width 2 or greater."), type);
1315 else if (format->d > format->w - 1)
1317 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1318 "greater than decimals."), type);
1323 *is_ctables_format = true;
1328 static struct ctables_axis *
1329 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1331 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1332 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1335 enum ctables_summary_variant sv = CSV_CELL;
1338 int start_ofs = lex_ofs (ctx->lexer);
1340 /* Parse function. */
1341 enum ctables_summary_function function;
1342 enum ctables_weighting weighting;
1343 enum ctables_area_type area;
1344 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1348 /* Parse percentile. */
1349 double percentile = 0;
1350 if (function == CTSF_PTILE)
1352 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1354 percentile = lex_number (ctx->lexer);
1355 lex_get (ctx->lexer);
1360 if (lex_is_string (ctx->lexer))
1362 label = ss_xstrdup (lex_tokss (ctx->lexer));
1363 lex_get (ctx->lexer);
1367 struct fmt_spec format;
1368 const struct fmt_spec *formatp;
1369 bool is_ctables_format = false;
1370 if (lex_token (ctx->lexer) == T_ID
1371 && has_digit (lex_tokcstr (ctx->lexer)))
1373 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1374 &is_ctables_format))
1384 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1385 lex_ofs (ctx->lexer) - 1);
1386 add_summary_spec (sub, function, weighting, area, percentile, label,
1387 formatp, is_ctables_format, loc, sv);
1389 msg_location_destroy (loc);
1391 lex_match (ctx->lexer, T_COMMA);
1392 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1394 if (!lex_force_match (ctx->lexer, T_LBRACK))
1398 else if (lex_match (ctx->lexer, T_RBRACK))
1400 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1407 ctables_axis_destroy (sub);
1411 static const struct ctables_axis *
1412 find_scale (const struct ctables_axis *axis)
1416 else if (axis->op == CTAO_VAR)
1417 return axis->scale ? axis : NULL;
1420 for (size_t i = 0; i < 2; i++)
1422 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1430 static const struct ctables_axis *
1431 find_categorical_summary_spec (const struct ctables_axis *axis)
1435 else if (axis->op == CTAO_VAR)
1436 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1439 for (size_t i = 0; i < 2; i++)
1441 const struct ctables_axis *sum
1442 = find_categorical_summary_spec (axis->subs[i]);
1450 static struct ctables_axis *
1451 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1453 int start_ofs = lex_ofs (ctx->lexer);
1454 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1458 while (lex_match (ctx->lexer, T_GT))
1460 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1463 ctables_axis_destroy (lhs);
1467 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1468 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1470 const struct ctables_axis *outer_scale = find_scale (lhs);
1471 const struct ctables_axis *inner_scale = find_scale (rhs);
1472 if (outer_scale && inner_scale)
1474 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1475 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1476 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1477 ctables_axis_destroy (nest);
1481 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1484 msg_at (SE, nest->loc,
1485 _("Summaries may only be requested for categorical variables "
1486 "at the innermost nesting level."));
1487 msg_at (SN, outer_sum->loc,
1488 _("This outer categorical variable has a summary."));
1489 ctables_axis_destroy (nest);
1499 static struct ctables_axis *
1500 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1502 int start_ofs = lex_ofs (ctx->lexer);
1503 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1507 while (lex_match (ctx->lexer, T_PLUS))
1509 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1512 ctables_axis_destroy (lhs);
1516 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1517 ctx->lexer, start_ofs);
1524 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1525 struct ctables_axis **axisp)
1528 if (lex_token (lexer) == T_BY
1529 || lex_token (lexer) == T_SLASH
1530 || lex_token (lexer) == T_ENDCMD)
1533 struct ctables_axis_parse_ctx ctx = {
1537 *axisp = ctables_axis_parse_stack (&ctx);
1541 /* CTABLES categories. */
1543 struct ctables_categories
1546 struct ctables_category *cats;
1551 struct ctables_category
1553 enum ctables_category_type
1555 /* Explicit category lists. */
1558 CCT_NRANGE, /* Numerical range. */
1559 CCT_SRANGE, /* String range. */
1564 /* Totals and subtotals. */
1568 /* Implicit category lists. */
1573 /* For contributing to TOTALN. */
1574 CCT_EXCLUDED_MISSING,
1578 struct ctables_category *subtotal;
1584 double number; /* CCT_NUMBER. */
1585 struct substring string; /* CCT_STRING, in dictionary encoding. */
1586 double nrange[2]; /* CCT_NRANGE. */
1587 struct substring srange[2]; /* CCT_SRANGE. */
1591 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1592 bool hide_subcategories; /* CCT_SUBTOTAL. */
1595 /* CCT_POSTCOMPUTE. */
1598 const struct ctables_postcompute *pc;
1599 enum fmt_type parse_format;
1602 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1605 bool include_missing;
1606 bool sort_ascending;
1609 enum ctables_summary_function sort_function;
1610 enum ctables_weighting weighting;
1611 enum ctables_area_type area;
1612 struct variable *sort_var;
1617 /* Source location (sometimes NULL). */
1618 struct msg_location *location;
1622 ctables_category_uninit (struct ctables_category *cat)
1627 msg_location_destroy (cat->location);
1634 case CCT_POSTCOMPUTE:
1638 ss_dealloc (&cat->string);
1642 ss_dealloc (&cat->srange[0]);
1643 ss_dealloc (&cat->srange[1]);
1648 free (cat->total_label);
1656 case CCT_EXCLUDED_MISSING:
1662 nullable_substring_equal (const struct substring *a,
1663 const struct substring *b)
1665 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1669 ctables_category_equal (const struct ctables_category *a,
1670 const struct ctables_category *b)
1672 if (a->type != b->type)
1678 return a->number == b->number;
1681 return ss_equals (a->string, b->string);
1684 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1687 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1688 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1694 case CCT_POSTCOMPUTE:
1695 return a->pc == b->pc;
1699 return !strcmp (a->total_label, b->total_label);
1704 return (a->include_missing == b->include_missing
1705 && a->sort_ascending == b->sort_ascending
1706 && a->sort_function == b->sort_function
1707 && a->sort_var == b->sort_var
1708 && a->percentile == b->percentile);
1710 case CCT_EXCLUDED_MISSING:
1718 ctables_categories_unref (struct ctables_categories *c)
1723 assert (c->n_refs > 0);
1727 for (size_t i = 0; i < c->n_cats; i++)
1728 ctables_category_uninit (&c->cats[i]);
1734 ctables_categories_equal (const struct ctables_categories *a,
1735 const struct ctables_categories *b)
1737 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
1740 for (size_t i = 0; i < a->n_cats; i++)
1741 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1747 static struct ctables_category
1748 cct_nrange (double low, double high)
1750 return (struct ctables_category) {
1752 .nrange = { low, high }
1756 static struct ctables_category
1757 cct_srange (struct substring low, struct substring high)
1759 return (struct ctables_category) {
1761 .srange = { low, high }
1766 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1767 struct ctables_category *cat)
1770 if (lex_match (lexer, T_EQUALS))
1772 if (!lex_force_string (lexer))
1775 total_label = ss_xstrdup (lex_tokss (lexer));
1779 total_label = xstrdup (_("Subtotal"));
1781 *cat = (struct ctables_category) {
1782 .type = CCT_SUBTOTAL,
1783 .hide_subcategories = hide_subcategories,
1784 .total_label = total_label
1790 ctables_table_parse_explicit_category (struct lexer *lexer,
1791 struct dictionary *dict,
1793 struct ctables_category *cat)
1795 if (lex_match_id (lexer, "OTHERNM"))
1796 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1797 else if (lex_match_id (lexer, "MISSING"))
1798 *cat = (struct ctables_category) { .type = CCT_MISSING };
1799 else if (lex_match_id (lexer, "SUBTOTAL"))
1800 return ctables_table_parse_subtotal (lexer, false, cat);
1801 else if (lex_match_id (lexer, "HSUBTOTAL"))
1802 return ctables_table_parse_subtotal (lexer, true, cat);
1803 else if (lex_match_id (lexer, "LO"))
1805 if (!lex_force_match_id (lexer, "THRU"))
1807 if (lex_is_string (lexer))
1809 struct substring sr0 = { .string = NULL };
1810 struct substring sr1 = parse_substring (lexer, dict);
1811 *cat = cct_srange (sr0, sr1);
1813 else if (lex_force_num (lexer))
1815 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1821 else if (lex_is_number (lexer))
1823 double number = lex_number (lexer);
1825 if (lex_match_id (lexer, "THRU"))
1827 if (lex_match_id (lexer, "HI"))
1828 *cat = cct_nrange (number, DBL_MAX);
1831 if (!lex_force_num (lexer))
1833 *cat = cct_nrange (number, lex_number (lexer));
1838 *cat = (struct ctables_category) {
1843 else if (lex_is_string (lexer))
1845 struct substring s = parse_substring (lexer, dict);
1846 if (lex_match_id (lexer, "THRU"))
1848 if (lex_match_id (lexer, "HI"))
1850 struct substring sr1 = { .string = NULL };
1851 *cat = cct_srange (s, sr1);
1855 if (!lex_force_string (lexer))
1860 struct substring sr1 = parse_substring (lexer, dict);
1861 *cat = cct_srange (s, sr1);
1865 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1867 else if (lex_match (lexer, T_AND))
1869 if (!lex_force_id (lexer))
1871 struct ctables_postcompute *pc = ctables_find_postcompute (
1872 ct, lex_tokcstr (lexer));
1875 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1876 msg_at (SE, loc, _("Unknown postcompute &%s."),
1877 lex_tokcstr (lexer));
1878 msg_location_destroy (loc);
1883 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1887 lex_error (lexer, _("Syntax error expecting category specification."));
1895 parse_category_string (struct msg_location *location,
1896 struct substring s, const struct dictionary *dict,
1897 enum fmt_type format, double *n)
1900 char *error = data_in (s, dict_get_encoding (dict), format,
1901 settings_get_fmt_settings (), &v, 0, NULL);
1904 msg_at (SE, location,
1905 _("Failed to parse category specification as format %s: %s."),
1906 fmt_name (format), error);
1915 static struct ctables_category *
1916 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1917 const struct ctables_pcexpr *e)
1919 struct ctables_category *best = NULL;
1920 size_t n_subtotals = 0;
1921 for (size_t i = 0; i < cats->n_cats; i++)
1923 struct ctables_category *cat = &cats->cats[i];
1926 case CTPO_CAT_NUMBER:
1927 if (cat->type == CCT_NUMBER && cat->number == e->number)
1931 case CTPO_CAT_STRING:
1932 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1936 case CTPO_CAT_NRANGE:
1937 if (cat->type == CCT_NRANGE
1938 && cat->nrange[0] == e->nrange[0]
1939 && cat->nrange[1] == e->nrange[1])
1943 case CTPO_CAT_SRANGE:
1944 if (cat->type == CCT_SRANGE
1945 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1946 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1950 case CTPO_CAT_MISSING:
1951 if (cat->type == CCT_MISSING)
1955 case CTPO_CAT_OTHERNM:
1956 if (cat->type == CCT_OTHERNM)
1960 case CTPO_CAT_SUBTOTAL:
1961 if (cat->type == CCT_SUBTOTAL)
1964 if (e->subtotal_index == n_subtotals)
1966 else if (e->subtotal_index == 0)
1971 case CTPO_CAT_TOTAL:
1972 if (cat->type == CCT_TOTAL)
1986 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1991 static struct ctables_category *
1992 ctables_find_category_for_postcompute (const struct dictionary *dict,
1993 const struct ctables_categories *cats,
1994 enum fmt_type parse_format,
1995 const struct ctables_pcexpr *e)
1997 if (parse_format != FMT_F)
1999 if (e->op == CTPO_CAT_STRING)
2002 if (!parse_category_string (e->location, e->string, dict,
2003 parse_format, &number))
2006 struct ctables_pcexpr e2 = {
2007 .op = CTPO_CAT_NUMBER,
2009 .location = e->location,
2011 return ctables_find_category_for_postcompute__ (cats, &e2);
2013 else if (e->op == CTPO_CAT_SRANGE)
2016 if (!e->srange[0].string)
2017 nrange[0] = -DBL_MAX;
2018 else if (!parse_category_string (e->location, e->srange[0], dict,
2019 parse_format, &nrange[0]))
2022 if (!e->srange[1].string)
2023 nrange[1] = DBL_MAX;
2024 else if (!parse_category_string (e->location, e->srange[1], dict,
2025 parse_format, &nrange[1]))
2028 struct ctables_pcexpr e2 = {
2029 .op = CTPO_CAT_NRANGE,
2030 .nrange = { nrange[0], nrange[1] },
2031 .location = e->location,
2033 return ctables_find_category_for_postcompute__ (cats, &e2);
2036 return ctables_find_category_for_postcompute__ (cats, e);
2039 static struct substring
2040 rtrim_value (const union value *v, const struct variable *var)
2042 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2043 var_get_width (var));
2044 ss_rtrim (&s, ss_cstr (" "));
2049 in_string_range (const union value *v, const struct variable *var,
2050 const struct substring *srange)
2052 struct substring s = rtrim_value (v, var);
2053 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2054 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2057 static const struct ctables_category *
2058 ctables_categories_match (const struct ctables_categories *c,
2059 const union value *v, const struct variable *var)
2061 if (var_is_numeric (var) && v->f == SYSMIS)
2064 const struct ctables_category *othernm = NULL;
2065 for (size_t i = c->n_cats; i-- > 0; )
2067 const struct ctables_category *cat = &c->cats[i];
2071 if (cat->number == v->f)
2076 if (ss_equals (cat->string, rtrim_value (v, var)))
2081 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2082 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2087 if (in_string_range (v, var, cat->srange))
2092 if (var_is_value_missing (var, v))
2096 case CCT_POSTCOMPUTE:
2111 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2114 case CCT_EXCLUDED_MISSING:
2119 return var_is_value_missing (var, v) ? NULL : othernm;
2122 static const struct ctables_category *
2123 ctables_categories_total (const struct ctables_categories *c)
2125 const struct ctables_category *first = &c->cats[0];
2126 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2127 return (first->type == CCT_TOTAL ? first
2128 : last->type == CCT_TOTAL ? last
2133 ctables_category_format_number (double number, const struct variable *var,
2136 struct pivot_value *pv = pivot_value_new_var_value (
2137 var, &(union value) { .f = number });
2138 pivot_value_format (pv, NULL, s);
2139 pivot_value_destroy (pv);
2143 ctables_category_format_string (struct substring string,
2144 const struct variable *var, struct string *out)
2146 int width = var_get_width (var);
2147 char *s = xmalloc (width);
2148 buf_copy_rpad (s, width, string.string, string.length, ' ');
2149 struct pivot_value *pv = pivot_value_new_var_value (
2150 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
2151 pivot_value_format (pv, NULL, out);
2152 pivot_value_destroy (pv);
2157 ctables_category_format_label (const struct ctables_category *cat,
2158 const struct variable *var,
2164 ctables_category_format_number (cat->number, var, s);
2168 ctables_category_format_string (cat->string, var, s);
2172 ctables_category_format_number (cat->nrange[0], var, s);
2173 ds_put_format (s, " THRU ");
2174 ctables_category_format_number (cat->nrange[1], var, s);
2178 ctables_category_format_string (cat->srange[0], var, s);
2179 ds_put_format (s, " THRU ");
2180 ctables_category_format_string (cat->srange[1], var, s);
2184 ds_put_cstr (s, "MISSING");
2188 ds_put_cstr (s, "OTHERNM");
2191 case CCT_POSTCOMPUTE:
2192 ds_put_format (s, "&%s", cat->pc->name);
2197 ds_put_cstr (s, cat->total_label);
2203 case CCT_EXCLUDED_MISSING:
2211 ctables_recursive_check_postcompute (struct dictionary *dict,
2212 const struct ctables_pcexpr *e,
2213 struct ctables_category *pc_cat,
2214 const struct ctables_categories *cats,
2215 const struct msg_location *cats_location)
2219 case CTPO_CAT_NUMBER:
2220 case CTPO_CAT_STRING:
2221 case CTPO_CAT_NRANGE:
2222 case CTPO_CAT_SRANGE:
2223 case CTPO_CAT_MISSING:
2224 case CTPO_CAT_OTHERNM:
2225 case CTPO_CAT_SUBTOTAL:
2226 case CTPO_CAT_TOTAL:
2228 struct ctables_category *cat = ctables_find_category_for_postcompute (
2229 dict, cats, pc_cat->parse_format, e);
2232 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2234 size_t n_subtotals = 0;
2235 for (size_t i = 0; i < cats->n_cats; i++)
2236 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2237 if (n_subtotals > 1)
2239 msg_at (SE, cats_location,
2240 ngettext ("These categories include %zu instance "
2241 "of SUBTOTAL or HSUBTOTAL, so references "
2242 "from computed categories must refer to "
2243 "subtotals by position, "
2244 "e.g. SUBTOTAL[1].",
2245 "These categories include %zu instances "
2246 "of SUBTOTAL or HSUBTOTAL, so references "
2247 "from computed categories must refer to "
2248 "subtotals by position, "
2249 "e.g. SUBTOTAL[1].",
2252 msg_at (SN, e->location,
2253 _("This is the reference that lacks a position."));
2258 msg_at (SE, pc_cat->location,
2259 _("Computed category &%s references a category not included "
2260 "in the category list."),
2262 msg_at (SN, e->location, _("This is the missing category."));
2263 if (e->op == CTPO_CAT_SUBTOTAL)
2264 msg_at (SN, cats_location,
2265 _("To fix the problem, add subtotals to the "
2266 "list of categories here."));
2267 else if (e->op == CTPO_CAT_TOTAL)
2268 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2269 "CATEGORIES specification."));
2271 msg_at (SN, cats_location,
2272 _("To fix the problem, add the missing category to the "
2273 "list of categories here."));
2276 if (pc_cat->pc->hide_source_cats)
2290 for (size_t i = 0; i < 2; i++)
2291 if (e->subs[i] && !ctables_recursive_check_postcompute (
2292 dict, e->subs[i], pc_cat, cats, cats_location))
2300 static struct pivot_value *
2301 ctables_postcompute_label (const struct ctables_categories *cats,
2302 const struct ctables_category *cat,
2303 const struct variable *var)
2305 struct substring in = ss_cstr (cat->pc->label);
2306 struct substring target = ss_cstr (")LABEL[");
2308 struct string out = DS_EMPTY_INITIALIZER;
2311 size_t chunk = ss_find_substring (in, target);
2312 if (chunk == SIZE_MAX)
2314 if (ds_is_empty (&out))
2315 return pivot_value_new_user_text (in.string, in.length);
2318 ds_put_substring (&out, in);
2319 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
2323 ds_put_substring (&out, ss_head (in, chunk));
2324 ss_advance (&in, chunk + target.length);
2326 struct substring idx_s;
2327 if (!ss_get_until (&in, ']', &idx_s))
2330 long int idx = strtol (idx_s.string, &tail, 10);
2331 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
2334 struct ctables_category *cat2 = &cats->cats[idx - 1];
2335 if (!ctables_category_format_label (cat2, var, &out))
2341 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
2344 static struct pivot_value *
2345 ctables_category_create_value_label (const struct ctables_categories *cats,
2346 const struct ctables_category *cat,
2347 const struct variable *var,
2348 const union value *value)
2350 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
2351 ? ctables_postcompute_label (cats, cat, var)
2352 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
2353 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
2354 : pivot_value_new_var_value (var, value));
2357 /* CTABLES variable nesting and stacking. */
2359 /* A nested sequence of variables, e.g. a > b > c. */
2362 struct variable **vars;
2366 size_t *areas[N_CTATS];
2367 size_t n_areas[N_CTATS];
2370 struct ctables_summary_spec_set specs[N_CSVS];
2373 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2374 struct ctables_stack
2376 struct ctables_nest *nests;
2381 ctables_nest_uninit (struct ctables_nest *nest)
2384 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2385 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2386 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2387 free (nest->areas[at]);
2391 ctables_stack_uninit (struct ctables_stack *stack)
2395 for (size_t i = 0; i < stack->n; i++)
2396 ctables_nest_uninit (&stack->nests[i]);
2397 free (stack->nests);
2401 static struct ctables_stack
2402 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2409 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2410 for (size_t i = 0; i < s0.n; i++)
2411 for (size_t j = 0; j < s1.n; j++)
2413 const struct ctables_nest *a = &s0.nests[i];
2414 const struct ctables_nest *b = &s1.nests[j];
2416 size_t allocate = a->n + b->n;
2417 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2419 for (size_t k = 0; k < a->n; k++)
2420 vars[n++] = a->vars[k];
2421 for (size_t k = 0; k < b->n; k++)
2422 vars[n++] = b->vars[k];
2423 assert (n == allocate);
2425 const struct ctables_nest *summary_src;
2426 if (!a->specs[CSV_CELL].var)
2428 else if (!b->specs[CSV_CELL].var)
2433 struct ctables_nest *new = &stack.nests[stack.n++];
2434 *new = (struct ctables_nest) {
2436 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2437 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2439 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2440 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2444 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2445 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2447 ctables_stack_uninit (&s0);
2448 ctables_stack_uninit (&s1);
2452 static struct ctables_stack
2453 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2455 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2456 for (size_t i = 0; i < s0.n; i++)
2457 stack.nests[stack.n++] = s0.nests[i];
2458 for (size_t i = 0; i < s1.n; i++)
2460 stack.nests[stack.n] = s1.nests[i];
2461 stack.nests[stack.n].group_head += s0.n;
2464 assert (stack.n == s0.n + s1.n);
2470 static struct ctables_stack
2471 var_fts (const struct ctables_axis *a)
2473 struct variable **vars = xmalloc (sizeof *vars);
2476 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2477 struct ctables_nest *nest = xmalloc (sizeof *nest);
2478 *nest = (struct ctables_nest) {
2481 .scale_idx = a->scale ? 0 : SIZE_MAX,
2482 .summary_idx = is_summary ? 0 : SIZE_MAX,
2485 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2487 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2488 nest->specs[sv].var = a->var;
2489 nest->specs[sv].is_scale = a->scale;
2491 return (struct ctables_stack) { .nests = nest, .n = 1 };
2494 static struct ctables_stack
2495 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2498 return (struct ctables_stack) { .n = 0 };
2506 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2507 enumerate_fts (axis_type, a->subs[1]));
2510 /* This should consider any of the scale variables found in the result to
2511 be linked to each other listwise for SMISSING=LISTWISE. */
2512 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2513 enumerate_fts (axis_type, a->subs[1]));
2519 /* CTABLES summary calculation. */
2521 union ctables_summary
2523 /* COUNT, VALIDN, TOTALN. */
2526 /* MINIMUM, MAXIMUM, RANGE. */
2533 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2534 struct moments1 *moments;
2536 /* MEDIAN, MODE, PTILE. */
2539 struct casewriter *writer;
2546 ctables_summary_init (union ctables_summary *s,
2547 const struct ctables_summary_spec *ss)
2549 switch (ss->function)
2552 case CTSF_areaPCT_COUNT:
2553 case CTSF_areaPCT_VALIDN:
2554 case CTSF_areaPCT_TOTALN:
2567 s->min = s->max = SYSMIS;
2572 case CTSF_areaPCT_SUM:
2573 s->moments = moments1_create (MOMENT_MEAN);
2579 s->moments = moments1_create (MOMENT_VARIANCE);
2586 struct caseproto *proto = caseproto_create ();
2587 proto = caseproto_add_width (proto, 0);
2588 proto = caseproto_add_width (proto, 0);
2590 struct subcase ordering;
2591 subcase_init (&ordering, 0, 0, SC_ASCEND);
2592 s->writer = sort_create_writer (&ordering, proto);
2593 subcase_uninit (&ordering);
2594 caseproto_unref (proto);
2604 ctables_summary_uninit (union ctables_summary *s,
2605 const struct ctables_summary_spec *ss)
2607 switch (ss->function)
2610 case CTSF_areaPCT_COUNT:
2611 case CTSF_areaPCT_VALIDN:
2612 case CTSF_areaPCT_TOTALN:
2631 case CTSF_areaPCT_SUM:
2632 moments1_destroy (s->moments);
2638 casewriter_destroy (s->writer);
2644 ctables_summary_add (union ctables_summary *s,
2645 const struct ctables_summary_spec *ss,
2646 const union value *value,
2647 bool is_missing, bool is_included,
2650 /* To determine whether a case is included in a given table for a particular
2651 kind of summary, consider the following charts for the variable being
2652 summarized. Only if "yes" appears is the case counted.
2654 Categorical variables: VALIDN other TOTALN
2655 Valid values in included categories yes yes yes
2656 Missing values in included categories --- yes yes
2657 Missing values in excluded categories --- --- yes
2658 Valid values in excluded categories --- --- ---
2660 Scale variables: VALIDN other TOTALN
2661 Valid value yes yes yes
2662 Missing value --- yes yes
2664 Missing values include both user- and system-missing. (The system-missing
2665 value is always in an excluded category.)
2667 One way to interpret the above table is that scale variables are like
2668 categorical variables in which all values are in included categories.
2670 switch (ss->function)
2673 case CTSF_areaPCT_TOTALN:
2678 case CTSF_areaPCT_COUNT:
2684 case CTSF_areaPCT_VALIDN:
2702 if (s->min == SYSMIS || value->f < s->min)
2704 if (s->max == SYSMIS || value->f > s->max)
2715 moments1_add (s->moments, value->f, weight);
2718 case CTSF_areaPCT_SUM:
2720 moments1_add (s->moments, value->f, weight);
2728 s->ovalid += weight;
2730 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2731 *case_num_rw_idx (c, 0) = value->f;
2732 *case_num_rw_idx (c, 1) = weight;
2733 casewriter_write (s->writer, c);
2740 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2741 union ctables_summary *s,
2742 const struct ctables_summary_spec *ss)
2744 switch (ss->function)
2750 return areas[ss->calc_area]->sequence;
2752 case CTSF_areaPCT_COUNT:
2754 const struct ctables_area *a = areas[ss->calc_area];
2755 double a_count = a->count[ss->weighting];
2756 return a_count ? s->count / a_count * 100 : SYSMIS;
2759 case CTSF_areaPCT_VALIDN:
2761 const struct ctables_area *a = areas[ss->calc_area];
2762 double a_valid = a->valid[ss->weighting];
2763 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2766 case CTSF_areaPCT_TOTALN:
2768 const struct ctables_area *a = areas[ss->calc_area];
2769 double a_total = a->total[ss->weighting];
2770 return a_total ? s->count / a_total * 100 : SYSMIS;
2785 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2790 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2796 double weight, variance;
2797 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2798 return calc_semean (variance, weight);
2804 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2805 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2810 double weight, mean;
2811 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2812 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2818 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2822 case CTSF_areaPCT_SUM:
2824 double weight, mean;
2825 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2826 if (weight == SYSMIS || mean == SYSMIS)
2829 const struct ctables_area *a = areas[ss->calc_area];
2830 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2831 double denom = sum->sum[ss->weighting];
2832 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2839 struct casereader *reader = casewriter_make_reader (s->writer);
2842 struct percentile *ptile = percentile_create (
2843 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2844 struct order_stats *os = &ptile->parent;
2845 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2846 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2847 statistic_destroy (&ptile->parent.parent);
2854 struct casereader *reader = casewriter_make_reader (s->writer);
2857 struct mode *mode = mode_create ();
2858 struct order_stats *os = &mode->parent;
2859 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2860 s->ovalue = mode->mode;
2861 statistic_destroy (&mode->parent.parent);
2869 /* CTABLES occurrences. */
2871 struct ctables_occurrence
2873 struct hmap_node node;
2878 ctables_add_occurrence (const struct variable *var,
2879 const union value *value,
2880 struct hmap *occurrences)
2882 int width = var_get_width (var);
2883 unsigned int hash = value_hash (value, width, 0);
2885 struct ctables_occurrence *o;
2886 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
2888 if (value_equal (value, &o->value, width))
2891 o = xmalloc (sizeof *o);
2892 value_clone (&o->value, value, width);
2893 hmap_insert (occurrences, &o->node, hash);
2898 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2899 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2900 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2901 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2906 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2907 all the axes (except the scalar variable, if any). */
2908 struct hmap_node node;
2909 struct ctables_section *section;
2911 /* The areas that contain this cell. */
2912 uint32_t omit_areas;
2913 struct ctables_area *areas[N_CTATS];
2918 enum ctables_summary_variant sv;
2920 struct ctables_cell_axis
2922 struct ctables_cell_value
2924 const struct ctables_category *category;
2932 union ctables_summary *summaries;
2935 struct ctables_section
2938 struct ctables_table *table;
2939 struct ctables_nest *nests[PIVOT_N_AXES];
2942 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2943 struct hmap cells; /* Contains "struct ctables_cell"s. */
2944 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2947 static void ctables_section_uninit (struct ctables_section *);
2949 struct ctables_table
2951 struct ctables *ctables;
2952 struct ctables_axis *axes[PIVOT_N_AXES];
2953 struct ctables_stack stacks[PIVOT_N_AXES];
2954 struct ctables_section *sections;
2956 enum pivot_axis_type summary_axis;
2957 struct ctables_summary_spec_set summary_specs;
2958 struct variable **sum_vars;
2961 enum pivot_axis_type slabels_axis;
2962 bool slabels_visible;
2964 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2966 Most commonly, label_axis[a] == a, and in particular we always have
2967 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2969 If ROWLABELS or COLLABELS is specified, then one of
2970 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2971 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2973 If any category labels are moved, then 'clabels_example' is one of the
2974 variables being moved (and it is otherwise NULL). All of the variables
2975 being moved have the same width, value labels, and categories, so this
2976 example variable can be used to find those out.
2978 The remaining members in this group are relevant only if category labels
2981 'clabels_values_map' holds a "struct ctables_value" for all the values
2982 that appear in all of the variables in the moved categories. It is
2983 accumulated as the data is read. Once the data is fully read, its
2984 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2986 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2987 enum pivot_axis_type clabels_from_axis;
2988 enum pivot_axis_type clabels_to_axis;
2989 int clabels_start_ofs, clabels_end_ofs;
2990 const struct variable *clabels_example;
2991 struct hmap clabels_values_map;
2992 struct ctables_value **clabels_values;
2993 size_t n_clabels_values;
2995 /* Indexed by variable dictionary index. */
2996 struct ctables_categories **categories;
2997 size_t n_categories;
3005 struct ctables_chisq *chisq;
3006 struct ctables_pairwise *pairwise;
3009 struct ctables_cell_sort_aux
3011 const struct ctables_nest *nest;
3012 enum pivot_axis_type a;
3016 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3018 const struct ctables_cell_sort_aux *aux = aux_;
3019 struct ctables_cell *const *ap = a_;
3020 struct ctables_cell *const *bp = b_;
3021 const struct ctables_cell *a = *ap;
3022 const struct ctables_cell *b = *bp;
3024 const struct ctables_nest *nest = aux->nest;
3025 for (size_t i = 0; i < nest->n; i++)
3026 if (i != nest->scale_idx)
3028 const struct variable *var = nest->vars[i];
3029 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3030 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3031 if (a_cv->category != b_cv->category)
3032 return a_cv->category > b_cv->category ? 1 : -1;
3034 const union value *a_val = &a_cv->value;
3035 const union value *b_val = &b_cv->value;
3036 switch (a_cv->category->type)
3042 case CCT_POSTCOMPUTE:
3043 case CCT_EXCLUDED_MISSING:
3044 /* Must be equal. */
3052 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3060 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3062 return a_cv->category->sort_ascending ? cmp : -cmp;
3068 const char *a_label = var_lookup_value_label (var, a_val);
3069 const char *b_label = var_lookup_value_label (var, b_val);
3075 cmp = strcmp (a_label, b_label);
3081 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3084 return a_cv->category->sort_ascending ? cmp : -cmp;
3095 static struct ctables_area *
3096 ctables_area_insert (struct ctables_cell *cell, enum ctables_area_type area)
3098 struct ctables_section *s = cell->section;
3100 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3102 const struct ctables_nest *nest = s->nests[a];
3103 for (size_t i = 0; i < nest->n_areas[area]; i++)
3105 size_t v_idx = nest->areas[area][i];
3106 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3107 hash = hash_pointer (cv->category, hash);
3108 if (cv->category->type != CCT_TOTAL
3109 && cv->category->type != CCT_SUBTOTAL
3110 && cv->category->type != CCT_POSTCOMPUTE)
3111 hash = value_hash (&cv->value,
3112 var_get_width (nest->vars[v_idx]), hash);
3116 struct ctables_area *a;
3117 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3119 const struct ctables_cell *df = a->example;
3120 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3122 const struct ctables_nest *nest = s->nests[a];
3123 for (size_t i = 0; i < nest->n_areas[area]; i++)
3125 size_t v_idx = nest->areas[area][i];
3126 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3127 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3128 if (cv1->category != cv2->category
3129 || (cv1->category->type != CCT_TOTAL
3130 && cv1->category->type != CCT_SUBTOTAL
3131 && cv1->category->type != CCT_POSTCOMPUTE
3132 && !value_equal (&cv1->value, &cv2->value,
3133 var_get_width (nest->vars[v_idx]))))
3142 struct ctables_sum *sums = (s->table->n_sum_vars
3143 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3146 a = xmalloc (sizeof *a);
3147 *a = (struct ctables_area) { .example = cell, .sums = sums };
3148 hmap_insert (&s->areas[area], &a->node, hash);
3152 static struct ctables_cell *
3153 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3154 const struct ctables_category **cats[PIVOT_N_AXES])
3157 enum ctables_summary_variant sv = CSV_CELL;
3158 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3160 const struct ctables_nest *nest = s->nests[a];
3161 for (size_t i = 0; i < nest->n; i++)
3162 if (i != nest->scale_idx)
3164 hash = hash_pointer (cats[a][i], hash);
3165 if (cats[a][i]->type != CCT_TOTAL
3166 && cats[a][i]->type != CCT_SUBTOTAL
3167 && cats[a][i]->type != CCT_POSTCOMPUTE)
3168 hash = value_hash (case_data (c, nest->vars[i]),
3169 var_get_width (nest->vars[i]), hash);
3175 struct ctables_cell *cell;
3176 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3178 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3180 const struct ctables_nest *nest = s->nests[a];
3181 for (size_t i = 0; i < nest->n; i++)
3182 if (i != nest->scale_idx
3183 && (cats[a][i] != cell->axes[a].cvs[i].category
3184 || (cats[a][i]->type != CCT_TOTAL
3185 && cats[a][i]->type != CCT_SUBTOTAL
3186 && cats[a][i]->type != CCT_POSTCOMPUTE
3187 && !value_equal (case_data (c, nest->vars[i]),
3188 &cell->axes[a].cvs[i].value,
3189 var_get_width (nest->vars[i])))))
3198 cell = xmalloc (sizeof *cell);
3202 cell->omit_areas = 0;
3203 cell->postcompute = false;
3204 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3206 const struct ctables_nest *nest = s->nests[a];
3207 cell->axes[a].cvs = (nest->n
3208 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3210 for (size_t i = 0; i < nest->n; i++)
3212 const struct ctables_category *cat = cats[a][i];
3213 const struct variable *var = nest->vars[i];
3214 const union value *value = case_data (c, var);
3215 if (i != nest->scale_idx)
3217 const struct ctables_category *subtotal = cat->subtotal;
3218 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3221 if (cat->type == CCT_TOTAL
3222 || cat->type == CCT_SUBTOTAL
3223 || cat->type == CCT_POSTCOMPUTE)
3227 case PIVOT_AXIS_COLUMN:
3228 cell->omit_areas |= ((1u << CTAT_TABLE) |
3229 (1u << CTAT_LAYER) |
3230 (1u << CTAT_LAYERCOL) |
3231 (1u << CTAT_SUBTABLE) |
3234 case PIVOT_AXIS_ROW:
3235 cell->omit_areas |= ((1u << CTAT_TABLE) |
3236 (1u << CTAT_LAYER) |
3237 (1u << CTAT_LAYERROW) |
3238 (1u << CTAT_SUBTABLE) |
3241 case PIVOT_AXIS_LAYER:
3242 cell->omit_areas |= ((1u << CTAT_TABLE) |
3243 (1u << CTAT_LAYER));
3247 if (cat->type == CCT_POSTCOMPUTE)
3248 cell->postcompute = true;
3251 cell->axes[a].cvs[i].category = cat;
3252 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3256 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3257 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3258 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3259 for (size_t i = 0; i < specs->n; i++)
3260 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3261 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3262 cell->areas[at] = ctables_area_insert (cell, at);
3263 hmap_insert (&s->cells, &cell->node, hash);
3268 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3270 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3275 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3276 const struct ctables_category **cats[PIVOT_N_AXES],
3277 bool is_included, double weight[N_CTWS])
3279 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3280 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3282 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3283 const union value *value = case_data (c, specs->var);
3284 bool is_missing = var_is_value_missing (specs->var, value);
3285 bool is_scale_missing
3286 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3288 for (size_t i = 0; i < specs->n; i++)
3289 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3290 is_scale_missing, is_included,
3291 weight[specs->specs[i].weighting]);
3292 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3293 if (!(cell->omit_areas && (1u << at)))
3295 struct ctables_area *a = cell->areas[at];
3297 add_weight (a->total, weight);
3299 add_weight (a->count, weight);
3302 add_weight (a->valid, weight);
3304 if (!is_scale_missing)
3305 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3307 const struct variable *var = s->table->sum_vars[i];
3308 double addend = case_num (c, var);
3309 if (!var_is_num_missing (var, addend))
3310 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3311 a->sums[i].sum[wt] += addend * weight[wt];
3318 recurse_totals (struct ctables_section *s, const struct ccase *c,
3319 const struct ctables_category **cats[PIVOT_N_AXES],
3320 bool is_included, double weight[N_CTWS],
3321 enum pivot_axis_type start_axis, size_t start_nest)
3323 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3325 const struct ctables_nest *nest = s->nests[a];
3326 for (size_t i = start_nest; i < nest->n; i++)
3328 if (i == nest->scale_idx)
3331 const struct variable *var = nest->vars[i];
3333 const struct ctables_category *total = ctables_categories_total (
3334 s->table->categories[var_get_dict_index (var)]);
3337 const struct ctables_category *save = cats[a][i];
3339 ctables_cell_add__ (s, c, cats, is_included, weight);
3340 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3349 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3350 const struct ctables_category **cats[PIVOT_N_AXES],
3351 bool is_included, double weight[N_CTWS],
3352 enum pivot_axis_type start_axis, size_t start_nest)
3354 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3356 const struct ctables_nest *nest = s->nests[a];
3357 for (size_t i = start_nest; i < nest->n; i++)
3359 if (i == nest->scale_idx)
3362 const struct ctables_category *save = cats[a][i];
3365 cats[a][i] = save->subtotal;
3366 ctables_cell_add__ (s, c, cats, is_included, weight);
3367 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3376 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3377 double weight[N_CTWS])
3379 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3380 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3381 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3382 const struct ctables_category **cats[PIVOT_N_AXES] =
3384 [PIVOT_AXIS_LAYER] = layer_cats,
3385 [PIVOT_AXIS_ROW] = row_cats,
3386 [PIVOT_AXIS_COLUMN] = column_cats,
3389 bool is_included = true;
3391 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3393 const struct ctables_nest *nest = s->nests[a];
3394 for (size_t i = 0; i < nest->n; i++)
3395 if (i != nest->scale_idx)
3397 const struct variable *var = nest->vars[i];
3398 const union value *value = case_data (c, var);
3400 cats[a][i] = ctables_categories_match (
3401 s->table->categories[var_get_dict_index (var)], value, var);
3404 if (i != nest->summary_idx)
3407 if (!var_is_value_missing (var, value))
3410 static const struct ctables_category cct_excluded_missing = {
3411 .type = CCT_EXCLUDED_MISSING,
3414 cats[a][i] = &cct_excluded_missing;
3415 is_included = false;
3421 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3423 const struct ctables_nest *nest = s->nests[a];
3424 for (size_t i = 0; i < nest->n; i++)
3425 if (i != nest->scale_idx)
3427 const struct variable *var = nest->vars[i];
3428 const union value *value = case_data (c, var);
3429 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3433 ctables_cell_add__ (s, c, cats, is_included, weight);
3434 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3435 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3438 struct ctables_value
3440 struct hmap_node node;
3445 static struct ctables_value *
3446 ctables_value_find__ (const struct ctables_table *t, const union value *value,
3447 int width, unsigned int hash)
3449 struct ctables_value *clv;
3450 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3451 hash, &t->clabels_values_map)
3452 if (value_equal (value, &clv->value, width))
3458 ctables_value_insert (struct ctables_table *t, const union value *value,
3461 unsigned int hash = value_hash (value, width, 0);
3462 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3465 clv = xmalloc (sizeof *clv);
3466 value_clone (&clv->value, value, width);
3467 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3471 static const struct ctables_value *
3472 ctables_value_find (const struct ctables_cell *cell)
3474 const struct ctables_section *s = cell->section;
3475 const struct ctables_table *t = s->table;
3476 if (!t->clabels_example)
3479 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3480 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3481 const union value *value
3482 = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3483 int width = var_get_width (var);
3484 const struct ctables_value *ctv = ctables_value_find__ (
3485 t, value, width, value_hash (value, width, 0));
3486 assert (ctv != NULL);
3491 compare_ctables_values_3way (const void *a_, const void *b_, const void *width_)
3493 const struct ctables_value *const *ap = a_;
3494 const struct ctables_value *const *bp = b_;
3495 const struct ctables_value *a = *ap;
3496 const struct ctables_value *b = *bp;
3497 const int *width = width_;
3498 return value_compare_3way (&a->value, &b->value, *width);
3502 ctables_sort_clabels_values (struct ctables_table *t)
3504 const struct variable *v0 = t->clabels_example;
3505 int width = var_get_width (v0);
3507 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3510 const struct val_labs *val_labs = var_get_value_labels (v0);
3511 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
3512 vl = val_labs_next (val_labs, vl))
3513 if (ctables_categories_match (c0, &vl->value, v0))
3514 ctables_value_insert (t, &vl->value, width);
3517 size_t n = hmap_count (&t->clabels_values_map);
3518 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
3520 struct ctables_value *clv;
3522 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
3523 t->clabels_values[i++] = clv;
3524 t->n_clabels_values = n;
3527 sort (t->clabels_values, n, sizeof *t->clabels_values,
3528 compare_ctables_values_3way, &width);
3530 for (size_t i = 0; i < n; i++)
3531 t->clabels_values[i]->leaf = i;
3536 const struct dictionary *dict;
3537 struct pivot_table_look *look;
3539 /* For CTEF_* formats. */
3540 struct fmt_settings ctables_formats;
3542 /* If this is NULL, zeros are displayed using the normal print format.
3543 Otherwise, this string is displayed. */
3546 /* If this is NULL, missing values are displayed using the normal print
3547 format. Otherwise, this string is displayed. */
3550 /* Indexed by variable dictionary index. */
3551 enum ctables_vlabel *vlabels;
3553 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
3555 bool mrsets_count_duplicates; /* MRSETS. */
3556 bool smissing_listwise; /* SMISSING. */
3557 struct variable *e_weight; /* WEIGHT. */
3558 int hide_threshold; /* HIDESMALLCOUNTS. */
3560 struct ctables_table **tables;
3565 ctpo_add (double a, double b)
3571 ctpo_sub (double a, double b)
3577 ctpo_mul (double a, double b)
3583 ctpo_div (double a, double b)
3585 return b ? a / b : SYSMIS;
3589 ctpo_pow (double a, double b)
3591 int save_errno = errno;
3593 double result = pow (a, b);
3601 ctpo_neg (double a, double b UNUSED)
3606 struct ctables_pcexpr_evaluate_ctx
3608 const struct ctables_cell *cell;
3609 const struct ctables_section *section;
3610 const struct ctables_categories *cats;
3611 enum pivot_axis_type pc_a;
3614 enum fmt_type parse_format;
3617 static double ctables_pcexpr_evaluate (
3618 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3621 ctables_pcexpr_evaluate_nonterminal (
3622 const struct ctables_pcexpr_evaluate_ctx *ctx,
3623 const struct ctables_pcexpr *e, size_t n_args,
3624 double evaluate (double, double))
3626 double args[2] = { 0, 0 };
3627 for (size_t i = 0; i < n_args; i++)
3629 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3630 if (!isfinite (args[i]) || args[i] == SYSMIS)
3633 return evaluate (args[0], args[1]);
3637 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3638 const struct ctables_cell_value *pc_cv)
3640 const struct ctables_section *s = ctx->section;
3643 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3645 const struct ctables_nest *nest = s->nests[a];
3646 for (size_t i = 0; i < nest->n; i++)
3647 if (i != nest->scale_idx)
3649 const struct ctables_cell_value *cv
3650 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3651 : &ctx->cell->axes[a].cvs[i]);
3652 hash = hash_pointer (cv->category, hash);
3653 if (cv->category->type != CCT_TOTAL
3654 && cv->category->type != CCT_SUBTOTAL
3655 && cv->category->type != CCT_POSTCOMPUTE)
3656 hash = value_hash (&cv->value,
3657 var_get_width (nest->vars[i]), hash);
3661 struct ctables_cell *tc;
3662 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3664 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3666 const struct ctables_nest *nest = s->nests[a];
3667 for (size_t i = 0; i < nest->n; i++)
3668 if (i != nest->scale_idx)
3670 const struct ctables_cell_value *p_cv
3671 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3672 : &ctx->cell->axes[a].cvs[i]);
3673 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3674 if (p_cv->category != t_cv->category
3675 || (p_cv->category->type != CCT_TOTAL
3676 && p_cv->category->type != CCT_SUBTOTAL
3677 && p_cv->category->type != CCT_POSTCOMPUTE
3678 && !value_equal (&p_cv->value,
3680 var_get_width (nest->vars[i]))))
3692 const struct ctables_table *t = s->table;
3693 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3694 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3695 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
3696 &specs->specs[ctx->summary_idx]);
3700 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3701 const struct ctables_pcexpr *e)
3708 case CTPO_CAT_NRANGE:
3709 case CTPO_CAT_SRANGE:
3710 case CTPO_CAT_MISSING:
3711 case CTPO_CAT_OTHERNM:
3713 struct ctables_cell_value cv = {
3714 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
3716 assert (cv.category != NULL);
3718 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3719 const struct ctables_occurrence *o;
3722 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3723 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3724 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3726 cv.value = o->value;
3727 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3732 case CTPO_CAT_NUMBER:
3733 case CTPO_CAT_SUBTOTAL:
3734 case CTPO_CAT_TOTAL:
3736 struct ctables_cell_value cv = {
3737 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
3738 .value = { .f = e->number },
3740 assert (cv.category != NULL);
3741 return ctables_pcexpr_evaluate_category (ctx, &cv);
3744 case CTPO_CAT_STRING:
3746 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
3748 if (width > e->string.length)
3750 s = xmalloc (width);
3751 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
3754 const struct ctables_category *category
3755 = ctables_find_category_for_postcompute (
3756 ctx->section->table->ctables->dict,
3757 ctx->cats, ctx->parse_format, e);
3758 assert (category != NULL);
3760 struct ctables_cell_value cv = { .category = category };
3761 if (category->type == CCT_NUMBER)
3762 cv.value.f = category->number;
3763 else if (category->type == CCT_STRING)
3764 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
3768 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
3774 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3777 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3780 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3783 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3786 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3789 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3795 static const struct ctables_category *
3796 ctables_cell_postcompute (const struct ctables_section *s,
3797 const struct ctables_cell *cell,
3798 enum pivot_axis_type *pc_a_p,
3801 assert (cell->postcompute);
3802 const struct ctables_category *pc_cat = NULL;
3803 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
3804 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3806 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3807 if (cv->category->type == CCT_POSTCOMPUTE)
3811 /* Multiple postcomputes cross each other. The value is
3816 pc_cat = cv->category;
3820 *pc_a_idx_p = pc_a_idx;
3824 assert (pc_cat != NULL);
3829 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3830 const struct ctables_cell *cell,
3831 const struct ctables_summary_spec *ss,
3832 struct fmt_spec *format,
3833 bool *is_ctables_format,
3836 enum pivot_axis_type pc_a = 0;
3837 size_t pc_a_idx = 0;
3838 const struct ctables_category *pc_cat = ctables_cell_postcompute (
3839 s, cell, &pc_a, &pc_a_idx);
3843 const struct ctables_postcompute *pc = pc_cat->pc;
3846 for (size_t i = 0; i < pc->specs->n; i++)
3848 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
3849 if (ss->function == ss2->function
3850 && ss->weighting == ss2->weighting
3851 && ss->calc_area == ss2->calc_area
3852 && ss->percentile == ss2->percentile)
3854 *format = ss2->format;
3855 *is_ctables_format = ss2->is_ctables_format;
3861 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3862 const struct ctables_categories *cats = s->table->categories[
3863 var_get_dict_index (var)];
3864 struct ctables_pcexpr_evaluate_ctx ctx = {
3869 .pc_a_idx = pc_a_idx,
3870 .summary_idx = summary_idx,
3871 .parse_format = pc_cat->parse_format,
3873 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3876 /* Chi-square test (SIGTEST). */
3877 struct ctables_chisq
3880 bool include_mrsets;
3884 /* Pairwise comparison test (COMPARETEST). */
3885 struct ctables_pairwise
3887 enum { PROP, MEAN } type;
3889 bool include_mrsets;
3890 bool meansvariance_allcats;
3892 enum { BONFERRONI = 1, BH } adjust;
3901 parse_col_width (struct lexer *lexer, const char *name, double *width)
3903 lex_match (lexer, T_EQUALS);
3904 if (lex_match_id (lexer, "DEFAULT"))
3906 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
3908 *width = lex_number (lexer);
3918 parse_bool (struct lexer *lexer, bool *b)
3920 if (lex_match_id (lexer, "NO"))
3922 else if (lex_match_id (lexer, "YES"))
3926 lex_error_expecting (lexer, "YES", "NO");
3933 ctables_chisq_destroy (struct ctables_chisq *chisq)
3939 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
3945 ctables_table_destroy (struct ctables_table *t)
3950 for (size_t i = 0; i < t->n_sections; i++)
3951 ctables_section_uninit (&t->sections[i]);
3954 for (size_t i = 0; i < t->n_categories; i++)
3955 ctables_categories_unref (t->categories[i]);
3956 free (t->categories);
3958 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3960 ctables_axis_destroy (t->axes[a]);
3961 ctables_stack_uninit (&t->stacks[a]);
3963 free (t->summary_specs.specs);
3965 struct ctables_value *ctv, *next_ctv;
3966 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
3967 &t->clabels_values_map)
3969 value_destroy (&ctv->value, var_get_width (t->clabels_example));
3970 hmap_delete (&t->clabels_values_map, &ctv->node);
3973 hmap_destroy (&t->clabels_values_map);
3974 free (t->clabels_values);
3980 ctables_chisq_destroy (t->chisq);
3981 ctables_pairwise_destroy (t->pairwise);
3986 ctables_destroy (struct ctables *ct)
3991 struct ctables_postcompute *pc, *next_pc;
3992 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
3996 msg_location_destroy (pc->location);
3997 ctables_pcexpr_destroy (pc->expr);
4001 ctables_summary_spec_set_uninit (pc->specs);
4004 hmap_delete (&ct->postcomputes, &pc->hmap_node);
4007 hmap_destroy (&ct->postcomputes);
4009 fmt_settings_uninit (&ct->ctables_formats);
4010 pivot_table_look_unref (ct->look);
4014 for (size_t i = 0; i < ct->n_tables; i++)
4015 ctables_table_destroy (ct->tables[i]);
4021 all_strings (struct variable **vars, size_t n_vars,
4022 const struct ctables_category *cat)
4024 for (size_t j = 0; j < n_vars; j++)
4025 if (var_is_numeric (vars[j]))
4027 msg_at (SE, cat->location,
4028 _("This category specification may be applied only to string "
4029 "variables, but this subcommand tries to apply it to "
4030 "numeric variable %s."),
4031 var_get_name (vars[j]));
4038 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
4039 struct ctables *ct, struct ctables_table *t)
4041 if (!lex_force_match_id (lexer, "VARIABLES"))
4043 lex_match (lexer, T_EQUALS);
4045 struct variable **vars;
4047 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
4050 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
4051 for (size_t i = 1; i < n_vars; i++)
4053 const struct fmt_spec *f = var_get_print_format (vars[i]);
4054 if (f->type != common_format->type)
4056 common_format = NULL;
4062 && (fmt_get_category (common_format->type)
4063 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
4065 struct ctables_categories *c = xmalloc (sizeof *c);
4066 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
4067 for (size_t i = 0; i < n_vars; i++)
4069 struct ctables_categories **cp
4070 = &t->categories[var_get_dict_index (vars[i])];
4071 ctables_categories_unref (*cp);
4075 size_t allocated_cats = 0;
4076 int cats_start_ofs = -1;
4077 int cats_end_ofs = -1;
4078 if (lex_match (lexer, T_LBRACK))
4080 cats_start_ofs = lex_ofs (lexer);
4083 if (c->n_cats >= allocated_cats)
4084 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4086 int start_ofs = lex_ofs (lexer);
4087 struct ctables_category *cat = &c->cats[c->n_cats];
4088 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
4090 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4093 lex_match (lexer, T_COMMA);
4095 while (!lex_match (lexer, T_RBRACK));
4096 cats_end_ofs = lex_ofs (lexer) - 1;
4099 struct ctables_category cat = {
4101 .include_missing = false,
4102 .sort_ascending = true,
4104 bool show_totals = false;
4105 char *total_label = NULL;
4106 bool totals_before = false;
4107 int key_start_ofs = 0;
4108 int key_end_ofs = 0;
4109 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4111 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
4113 lex_match (lexer, T_EQUALS);
4114 if (lex_match_id (lexer, "A"))
4115 cat.sort_ascending = true;
4116 else if (lex_match_id (lexer, "D"))
4117 cat.sort_ascending = false;
4120 lex_error_expecting (lexer, "A", "D");
4124 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
4126 key_start_ofs = lex_ofs (lexer) - 1;
4127 lex_match (lexer, T_EQUALS);
4128 if (lex_match_id (lexer, "VALUE"))
4129 cat.type = CCT_VALUE;
4130 else if (lex_match_id (lexer, "LABEL"))
4131 cat.type = CCT_LABEL;
4134 cat.type = CCT_FUNCTION;
4135 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
4136 &cat.weighting, &cat.area))
4139 if (lex_match (lexer, T_LPAREN))
4141 cat.sort_var = parse_variable (lexer, dict);
4145 if (cat.sort_function == CTSF_PTILE)
4147 lex_match (lexer, T_COMMA);
4148 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4150 cat.percentile = lex_number (lexer);
4154 if (!lex_force_match (lexer, T_RPAREN))
4157 else if (ctables_function_availability (cat.sort_function)
4160 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
4164 key_end_ofs = lex_ofs (lexer) - 1;
4166 if (cat.type == CCT_FUNCTION)
4168 lex_ofs_error (lexer, key_start_ofs, key_end_ofs,
4169 _("Data-dependent sorting is not implemented."));
4173 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
4175 lex_match (lexer, T_EQUALS);
4176 if (lex_match_id (lexer, "INCLUDE"))
4177 cat.include_missing = true;
4178 else if (lex_match_id (lexer, "EXCLUDE"))
4179 cat.include_missing = false;
4182 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4186 else if (lex_match_id (lexer, "TOTAL"))
4188 lex_match (lexer, T_EQUALS);
4189 if (!parse_bool (lexer, &show_totals))
4192 else if (lex_match_id (lexer, "LABEL"))
4194 lex_match (lexer, T_EQUALS);
4195 if (!lex_force_string (lexer))
4198 total_label = ss_xstrdup (lex_tokss (lexer));
4201 else if (lex_match_id (lexer, "POSITION"))
4203 lex_match (lexer, T_EQUALS);
4204 if (lex_match_id (lexer, "BEFORE"))
4205 totals_before = true;
4206 else if (lex_match_id (lexer, "AFTER"))
4207 totals_before = false;
4210 lex_error_expecting (lexer, "BEFORE", "AFTER");
4214 else if (lex_match_id (lexer, "EMPTY"))
4216 lex_match (lexer, T_EQUALS);
4217 if (lex_match_id (lexer, "INCLUDE"))
4218 c->show_empty = true;
4219 else if (lex_match_id (lexer, "EXCLUDE"))
4220 c->show_empty = false;
4223 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
4230 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
4231 "TOTAL", "LABEL", "POSITION", "EMPTY");
4233 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
4241 cat.location = lex_ofs_location (lexer, key_start_ofs, key_end_ofs);
4243 if (c->n_cats >= allocated_cats)
4244 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4245 c->cats[c->n_cats++] = cat;
4250 if (c->n_cats >= allocated_cats)
4251 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
4253 struct ctables_category *totals;
4256 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
4257 totals = &c->cats[0];
4260 totals = &c->cats[c->n_cats];
4263 *totals = (struct ctables_category) {
4265 .total_label = total_label ? total_label : xstrdup (_("Total")),
4269 struct ctables_category *subtotal = NULL;
4270 for (size_t i = totals_before ? 0 : c->n_cats;
4271 totals_before ? i < c->n_cats : i-- > 0;
4272 totals_before ? i++ : 0)
4274 struct ctables_category *cat = &c->cats[i];
4283 cat->subtotal = subtotal;
4286 case CCT_POSTCOMPUTE:
4297 case CCT_EXCLUDED_MISSING:
4302 if (cats_start_ofs != -1)
4304 for (size_t i = 0; i < c->n_cats; i++)
4306 struct ctables_category *cat = &c->cats[i];
4309 case CCT_POSTCOMPUTE:
4310 cat->parse_format = parse_strings ? common_format->type : FMT_F;
4311 struct msg_location *cats_location
4312 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
4313 bool ok = ctables_recursive_check_postcompute (
4314 dict, cat->pc->expr, cat, c, cats_location);
4315 msg_location_destroy (cats_location);
4322 for (size_t j = 0; j < n_vars; j++)
4323 if (var_is_alpha (vars[j]))
4325 msg_at (SE, cat->location,
4326 _("This category specification may be applied "
4327 "only to numeric variables, but this "
4328 "subcommand tries to apply it to string "
4330 var_get_name (vars[j]));
4339 if (!parse_category_string (cat->location, cat->string, dict,
4340 common_format->type, &n))
4343 ss_dealloc (&cat->string);
4345 cat->type = CCT_NUMBER;
4348 else if (!all_strings (vars, n_vars, cat))
4357 if (!cat->srange[0].string)
4359 else if (!parse_category_string (cat->location,
4360 cat->srange[0], dict,
4361 common_format->type, &n[0]))
4364 if (!cat->srange[1].string)
4366 else if (!parse_category_string (cat->location,
4367 cat->srange[1], dict,
4368 common_format->type, &n[1]))
4371 ss_dealloc (&cat->srange[0]);
4372 ss_dealloc (&cat->srange[1]);
4374 cat->type = CCT_NRANGE;
4375 cat->nrange[0] = n[0];
4376 cat->nrange[1] = n[1];
4378 else if (!all_strings (vars, n_vars, cat))
4389 case CCT_EXCLUDED_MISSING:
4406 const struct ctables_summary_spec_set *set;
4411 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4413 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4414 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4415 if (as->function != bs->function)
4416 return as->function > bs->function ? 1 : -1;
4417 else if (as->weighting != bs->weighting)
4418 return as->weighting > bs->weighting ? 1 : -1;
4419 else if (as->calc_area != bs->calc_area)
4420 return as->calc_area > bs->calc_area ? 1 : -1;
4421 else if (as->percentile != bs->percentile)
4422 return as->percentile < bs->percentile ? 1 : -1;
4424 const char *as_label = as->label ? as->label : "";
4425 const char *bs_label = bs->label ? bs->label : "";
4426 return strcmp (as_label, bs_label);
4430 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4431 size_t ix[PIVOT_N_AXES])
4433 if (a < PIVOT_N_AXES)
4435 size_t limit = MAX (t->stacks[a].n, 1);
4436 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4437 ctables_table_add_section (t, a + 1, ix);
4441 struct ctables_section *s = &t->sections[t->n_sections++];
4442 *s = (struct ctables_section) {
4444 .cells = HMAP_INITIALIZER (s->cells),
4446 for (a = 0; a < PIVOT_N_AXES; a++)
4449 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4451 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4452 for (size_t i = 0; i < nest->n; i++)
4453 hmap_init (&s->occurrences[a][i]);
4455 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4456 hmap_init (&s->areas[at]);
4461 ctables_format (double d, const struct fmt_spec *format,
4462 const struct fmt_settings *settings)
4464 const union value v = { .f = d };
4465 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4467 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4468 produce the results we want for negative numbers, putting the negative
4469 sign in the wrong spot, before the prefix instead of after it. We can't,
4470 in fact, produce the desired results using a custom-currency
4471 specification. Instead, we postprocess the output, moving the negative
4474 NEQUAL: "-N=3" => "N=-3"
4475 PAREN: "-(3)" => "(-3)"
4476 PCTPAREN: "-(3%)" => "(-3%)"
4478 This transformation doesn't affect NEGPAREN. */
4479 char *minus_src = strchr (s, '-');
4480 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4482 char *n_equals = strstr (s, "N=");
4483 char *lparen = strchr (s, '(');
4484 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4486 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4492 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4494 for (size_t i = 0; i < t->stacks[a].n; i++)
4496 struct ctables_nest *nest = &t->stacks[a].nests[i];
4497 if (nest->n != 1 || nest->scale_idx != 0)
4500 enum ctables_vlabel vlabel
4501 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4502 if (vlabel != CTVL_NONE)
4509 compare_ints_3way (int a, int b)
4511 return a < b ? -1 : a > b;
4515 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
4516 const void *aux UNUSED)
4518 struct ctables_cell *const *ap = a_;
4519 struct ctables_cell *const *bp = b_;
4520 const struct ctables_cell *a = *ap;
4521 const struct ctables_cell *b = *bp;
4529 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
4531 int cmp = compare_ints_3way (a->axes[axis].leaf, b->axes[axis].leaf);
4536 const struct ctables_value *a_ctv = ctables_value_find (a);
4537 const struct ctables_value *b_ctv = ctables_value_find (b);
4540 int cmp = compare_ints_3way (a_ctv->leaf, b_ctv->leaf);
4545 assert (!a_ctv && !b_ctv);
4550 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4552 struct pivot_table *pt = pivot_table_create__ (
4554 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4555 : pivot_value_new_text (N_("Custom Tables"))),
4558 pivot_table_set_caption (
4559 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4561 pivot_table_set_corner_text (
4562 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4564 bool summary_dimension = (t->summary_axis != t->slabels_axis
4565 || (!t->slabels_visible
4566 && t->summary_specs.n > 1));
4567 if (summary_dimension)
4569 struct pivot_dimension *d = pivot_dimension_create (
4570 pt, t->slabels_axis, N_("Statistics"));
4571 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4572 if (!t->slabels_visible)
4573 d->hide_all_labels = true;
4574 for (size_t i = 0; i < specs->n; i++)
4575 pivot_category_create_leaf (
4576 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4579 bool categories_dimension = t->clabels_example != NULL;
4580 if (categories_dimension)
4582 struct pivot_dimension *d = pivot_dimension_create (
4583 pt, t->label_axis[t->clabels_from_axis],
4584 t->clabels_from_axis == PIVOT_AXIS_ROW
4585 ? N_("Row Categories")
4586 : N_("Column Categories"));
4587 const struct variable *var = t->clabels_example;
4588 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4589 for (size_t i = 0; i < t->n_clabels_values; i++)
4591 const struct ctables_value *value = t->clabels_values[i];
4592 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4593 assert (cat != NULL);
4594 pivot_category_create_leaf (
4595 d->root, ctables_category_create_value_label (c, cat,
4601 pivot_table_set_look (pt, ct->look);
4602 struct pivot_dimension *d[PIVOT_N_AXES];
4603 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4605 static const char *names[] = {
4606 [PIVOT_AXIS_ROW] = N_("Rows"),
4607 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4608 [PIVOT_AXIS_LAYER] = N_("Layers"),
4610 d[a] = (t->axes[a] || a == t->summary_axis
4611 ? pivot_dimension_create (pt, a, names[a])
4616 assert (t->axes[a]);
4618 for (size_t i = 0; i < t->stacks[a].n; i++)
4620 struct ctables_nest *nest = &t->stacks[a].nests[i];
4621 struct ctables_section **sections = xnmalloc (t->n_sections,
4623 size_t n_sections = 0;
4625 size_t n_total_cells = 0;
4626 size_t max_depth = 0;
4627 for (size_t j = 0; j < t->n_sections; j++)
4628 if (t->sections[j].nests[a] == nest)
4630 struct ctables_section *s = &t->sections[j];
4631 sections[n_sections++] = s;
4632 n_total_cells += hmap_count (&s->cells);
4634 size_t depth = s->nests[a]->n;
4635 max_depth = MAX (depth, max_depth);
4638 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4640 size_t n_sorted = 0;
4642 for (size_t j = 0; j < n_sections; j++)
4644 struct ctables_section *s = sections[j];
4646 struct ctables_cell *cell;
4647 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4649 sorted[n_sorted++] = cell;
4650 assert (n_sorted <= n_total_cells);
4653 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4654 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4656 struct ctables_level
4658 enum ctables_level_type
4660 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4661 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4662 CTL_SUMMARY, /* Summary functions. */
4666 enum settings_value_show vlabel; /* CTL_VAR only. */
4669 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4670 size_t n_levels = 0;
4671 for (size_t k = 0; k < nest->n; k++)
4673 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4674 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4676 if (vlabel != CTVL_NONE)
4678 levels[n_levels++] = (struct ctables_level) {
4680 .vlabel = (enum settings_value_show) vlabel,
4685 if (nest->scale_idx != k
4686 && (k != nest->n - 1 || t->label_axis[a] == a))
4688 levels[n_levels++] = (struct ctables_level) {
4689 .type = CTL_CATEGORY,
4695 if (!summary_dimension && a == t->slabels_axis)
4697 levels[n_levels++] = (struct ctables_level) {
4698 .type = CTL_SUMMARY,
4699 .var_idx = SIZE_MAX,
4703 /* Pivot categories:
4705 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4706 - category for nest->vars[0], if nest->scale_idx != 0
4707 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4708 - category for nest->vars[1], if nest->scale_idx != 1
4710 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4711 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4712 - summary function, if 'a == t->slabels_axis && a ==
4715 Additional dimensions:
4717 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4719 - If 't->label_axis[b] == a' for some 'b != a', add a category
4724 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4726 for (size_t j = 0; j < n_sorted; j++)
4728 struct ctables_cell *cell = sorted[j];
4729 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4731 size_t n_common = 0;
4734 for (; n_common < n_levels; n_common++)
4736 const struct ctables_level *level = &levels[n_common];
4737 if (level->type == CTL_CATEGORY)
4739 size_t var_idx = level->var_idx;
4740 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4741 if (prev->axes[a].cvs[var_idx].category != c)
4743 else if (c->type != CCT_SUBTOTAL
4744 && c->type != CCT_TOTAL
4745 && c->type != CCT_POSTCOMPUTE
4746 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4747 &cell->axes[a].cvs[var_idx].value,
4748 var_get_type (nest->vars[var_idx])))
4754 for (size_t k = n_common; k < n_levels; k++)
4756 const struct ctables_level *level = &levels[k];
4757 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4758 if (level->type == CTL_SUMMARY)
4760 assert (k == n_levels - 1);
4762 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4763 for (size_t m = 0; m < specs->n; m++)
4765 int leaf = pivot_category_create_leaf (
4766 parent, ctables_summary_label (&specs->specs[m],
4774 const struct variable *var = nest->vars[level->var_idx];
4775 struct pivot_value *label;
4776 if (level->type == CTL_VAR)
4778 label = pivot_value_new_variable (var);
4779 label->variable.show = level->vlabel;
4781 else if (level->type == CTL_CATEGORY)
4783 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4784 label = ctables_category_create_value_label (
4785 t->categories[var_get_dict_index (var)],
4786 cv->category, var, &cv->value);
4791 if (k == n_levels - 1)
4792 prev_leaf = pivot_category_create_leaf (parent, label);
4794 groups[k] = pivot_category_create_group__ (parent, label);
4798 cell->axes[a].leaf = prev_leaf;
4807 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4811 size_t n_total_cells = 0;
4812 for (size_t j = 0; j < t->n_sections; j++)
4813 n_total_cells += hmap_count (&t->sections[j].cells);
4815 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4816 size_t n_sorted = 0;
4817 for (size_t j = 0; j < t->n_sections; j++)
4819 const struct ctables_section *s = &t->sections[j];
4820 struct ctables_cell *cell;
4821 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4823 sorted[n_sorted++] = cell;
4825 assert (n_sorted <= n_total_cells);
4826 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4828 size_t ids[N_CTATS];
4829 memset (ids, 0, sizeof ids);
4830 for (size_t j = 0; j < n_sorted; j++)
4832 struct ctables_cell *cell = sorted[j];
4833 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4835 struct ctables_area *area = cell->areas[at];
4836 if (!area->sequence)
4837 area->sequence = ++ids[at];
4844 for (size_t i = 0; i < t->n_sections; i++)
4846 struct ctables_section *s = &t->sections[i];
4848 struct ctables_cell *cell;
4849 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4854 const struct ctables_value *ctv = ctables_value_find (cell);
4855 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4856 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4857 for (size_t j = 0; j < specs->n; j++)
4860 size_t n_dindexes = 0;
4862 if (summary_dimension)
4863 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4866 dindexes[n_dindexes++] = ctv->leaf;
4868 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4871 int leaf = cell->axes[a].leaf;
4872 if (a == t->summary_axis && !summary_dimension)
4874 dindexes[n_dindexes++] = leaf;
4877 const struct ctables_summary_spec *ss = &specs->specs[j];
4879 struct fmt_spec format = specs->specs[j].format;
4880 bool is_ctables_format = ss->is_ctables_format;
4881 double d = (cell->postcompute
4882 ? ctables_cell_calculate_postcompute (
4883 s, cell, ss, &format, &is_ctables_format, j)
4884 : ctables_summary_value (cell->areas,
4885 &cell->summaries[j], ss));
4887 struct pivot_value *value;
4888 if (ct->hide_threshold != 0
4889 && d < ct->hide_threshold
4890 && ss->function == CTSF_COUNT)
4892 value = pivot_value_new_user_text_nocopy (
4893 xasprintf ("<%d", ct->hide_threshold));
4895 else if (d == 0 && ct->zero)
4896 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4897 else if (d == SYSMIS && ct->missing)
4898 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4899 else if (is_ctables_format)
4900 value = pivot_value_new_user_text_nocopy (
4901 ctables_format (d, &format, &ct->ctables_formats));
4904 value = pivot_value_new_number (d);
4905 value->numeric.format = format;
4907 /* XXX should text values be right-justified? */
4908 pivot_table_put (pt, dindexes, n_dindexes, value);
4913 pivot_table_submit (pt);
4917 ctables_check_label_position (struct ctables_table *t, struct lexer *lexer,
4918 enum pivot_axis_type a)
4920 enum pivot_axis_type label_pos = t->label_axis[a];
4924 const struct ctables_stack *stack = &t->stacks[a];
4928 const struct ctables_nest *n0 = &stack->nests[0];
4931 assert (stack->n == 1);
4935 const struct variable *v0 = n0->vars[n0->n - 1];
4936 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4937 t->clabels_example = v0;
4939 for (size_t i = 0; i < c0->n_cats; i++)
4940 if (c0->cats[i].type == CCT_FUNCTION)
4942 msg (SE, _("Category labels may not be moved to another axis when "
4943 "sorting by a summary function."));
4944 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4945 _("This syntax moves category labels to another axis."));
4946 msg_at (SN, c0->cats[i].location,
4947 _("This syntax requests sorting by a summary function."));
4951 for (size_t i = 0; i < stack->n; i++)
4953 const struct ctables_nest *ni = &stack->nests[i];
4955 const struct variable *vi = ni->vars[ni->n - 1];
4956 if (n0->n - 1 == ni->scale_idx)
4958 msg (SE, _("To move category labels from one axis to another, "
4959 "the variables whose labels are to be moved must be "
4960 "categorical, but %s is scale."), var_get_name (vi));
4961 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4962 _("This syntax moves category labels to another axis."));
4967 for (size_t i = 1; i < stack->n; i++)
4969 const struct ctables_nest *ni = &stack->nests[i];
4971 const struct variable *vi = ni->vars[ni->n - 1];
4972 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4974 if (var_get_width (v0) != var_get_width (vi))
4976 msg (SE, _("To move category labels from one axis to another, "
4977 "the variables whose labels are to be moved must all "
4978 "have the same width, but %s has width %d and %s has "
4980 var_get_name (v0), var_get_width (v0),
4981 var_get_name (vi), var_get_width (vi));
4982 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4983 _("This syntax moves category labels to another axis."));
4986 if (!val_labs_equal (var_get_value_labels (v0),
4987 var_get_value_labels (vi)))
4989 msg (SE, _("To move category labels from one axis to another, "
4990 "the variables whose labels are to be moved must all "
4991 "have the same value labels, but %s and %s have "
4992 "different value labels."),
4993 var_get_name (v0), var_get_name (vi));
4994 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
4995 _("This syntax moves category labels to another axis."));
4998 if (!ctables_categories_equal (c0, ci))
5000 msg (SE, _("To move category labels from one axis to another, "
5001 "the variables whose labels are to be moved must all "
5002 "have the same category specifications, but %s and %s "
5003 "have different category specifications."),
5004 var_get_name (v0), var_get_name (vi));
5005 lex_ofs_msg (lexer, SN, t->clabels_start_ofs, t->clabels_end_ofs,
5006 _("This syntax moves category labels to another axis."));
5015 add_sum_var (struct variable *var,
5016 struct variable ***sum_vars, size_t *n, size_t *allocated)
5018 for (size_t i = 0; i < *n; i++)
5019 if (var == (*sum_vars)[i])
5022 if (*n >= *allocated)
5023 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
5024 (*sum_vars)[*n] = var;
5028 static enum ctables_area_type
5029 rotate_area (enum ctables_area_type area)
5040 return CTAT_LAYERCOL;
5043 return CTAT_LAYERROW;
5056 enumerate_sum_vars (const struct ctables_axis *a,
5057 struct variable ***sum_vars, size_t *n, size_t *allocated)
5065 for (size_t i = 0; i < N_CSVS; i++)
5066 for (size_t j = 0; j < a->specs[i].n; j++)
5068 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
5069 if (spec->function == CTSF_areaPCT_SUM)
5070 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
5076 for (size_t i = 0; i < 2; i++)
5077 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
5083 ctables_prepare_table (struct ctables_table *t, struct lexer *lexer)
5085 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5088 t->stacks[a] = enumerate_fts (a, t->axes[a]);
5090 for (size_t j = 0; j < t->stacks[a].n; j++)
5092 struct ctables_nest *nest = &t->stacks[a].nests[j];
5093 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5095 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
5096 nest->n_areas[at] = 0;
5098 enum pivot_axis_type ata, atb;
5099 if (at == CTAT_ROW || at == CTAT_LAYERROW)
5101 ata = PIVOT_AXIS_ROW;
5102 atb = PIVOT_AXIS_COLUMN;
5104 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
5106 ata = PIVOT_AXIS_COLUMN;
5107 atb = PIVOT_AXIS_ROW;
5110 if (at == CTAT_LAYER
5111 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5112 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5113 ? a == atb && t->label_axis[a] != a
5116 for (size_t k = nest->n - 1; k < nest->n; k--)
5117 if (k != nest->scale_idx)
5119 nest->areas[at][nest->n_areas[at]++] = k;
5125 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5126 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5127 : at == CTAT_TABLE ? true
5131 for (size_t k = 0; k < nest->n; k++)
5132 if (k != nest->scale_idx)
5133 nest->areas[at][nest->n_areas[at]++] = k;
5139 #define L PIVOT_AXIS_LAYER
5140 n_drop = (t->clabels_from_axis == L ? a != L
5141 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5142 : t->clabels_from_axis == a ? 2
5149 n_drop = a == ata && t->label_axis[ata] == atb;
5154 n_drop = (a == ata ? t->label_axis[ata] == atb
5156 : t->clabels_from_axis == atb ? -1
5157 : t->clabels_to_axis != atb ? 1
5169 size_t n = nest->n_areas[at];
5172 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5173 nest->n_areas[at]--;
5178 for (int i = 0; i < n_drop; i++)
5179 if (nest->n_areas[at] > 0)
5180 nest->n_areas[at]--;
5187 struct ctables_nest *nest = xmalloc (sizeof *nest);
5188 *nest = (struct ctables_nest) {
5190 .scale_idx = SIZE_MAX,
5191 .summary_idx = SIZE_MAX
5193 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5195 /* There's no point in moving labels away from an axis that has no
5196 labels, so avoid dealing with the special cases around that. */
5197 t->label_axis[a] = a;
5200 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5201 for (size_t i = 0; i < stack->n; i++)
5203 struct ctables_nest *nest = &stack->nests[i];
5204 if (!nest->specs[CSV_CELL].n)
5206 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5207 ss->specs = xmalloc (sizeof *ss->specs);
5210 enum ctables_summary_function function
5211 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5215 nest->summary_idx = nest->n - 1;
5216 ss->var = nest->vars[nest->summary_idx];
5218 *ss->specs = (struct ctables_summary_spec) {
5219 .function = function,
5220 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5221 .format = ctables_summary_default_format (function, ss->var),
5224 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5225 &nest->specs[CSV_CELL]);
5227 else if (!nest->specs[CSV_TOTAL].n)
5228 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5229 &nest->specs[CSV_CELL]);
5231 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5232 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5234 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5235 for (size_t i = 0; i < nest->specs[sv].n; i++)
5237 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5238 const struct ctables_function_info *cfi =
5239 &ctables_function_info[ss->function];
5241 ss->calc_area = rotate_area (ss->calc_area);
5245 if (t->ctables->smissing_listwise)
5247 struct variable **listwise_vars = NULL;
5249 size_t allocated = 0;
5251 for (size_t j = nest->group_head; j < stack->n; j++)
5253 const struct ctables_nest *other_nest = &stack->nests[j];
5254 if (other_nest->group_head != nest->group_head)
5257 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5260 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5261 sizeof *listwise_vars);
5262 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5265 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5268 listwise_vars = xmemdup (listwise_vars,
5269 n * sizeof *listwise_vars);
5270 nest->specs[sv].listwise_vars = listwise_vars;
5271 nest->specs[sv].n_listwise_vars = n;
5276 struct ctables_summary_spec_set *merged = &t->summary_specs;
5277 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5279 for (size_t j = 0; j < stack->n; j++)
5281 const struct ctables_nest *nest = &stack->nests[j];
5283 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5284 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5289 struct merge_item min = items[0];
5290 for (size_t j = 1; j < n_left; j++)
5291 if (merge_item_compare_3way (&items[j], &min) < 0)
5294 if (merged->n >= merged->allocated)
5295 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5296 sizeof *merged->specs);
5297 merged->specs[merged->n++] = min.set->specs[min.ofs];
5299 for (size_t j = 0; j < n_left; )
5301 if (merge_item_compare_3way (&items[j], &min) == 0)
5303 struct merge_item *item = &items[j];
5304 item->set->specs[item->ofs++].axis_idx = merged->n - 1;
5305 if (item->ofs >= item->set->n)
5307 items[j] = items[--n_left];
5316 size_t allocated_sum_vars = 0;
5317 enumerate_sum_vars (t->axes[t->summary_axis],
5318 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5320 return (ctables_check_label_position (t, lexer, PIVOT_AXIS_ROW)
5321 && ctables_check_label_position (t, lexer, PIVOT_AXIS_COLUMN));
5325 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5326 enum pivot_axis_type a)
5328 struct ctables_stack *stack = &t->stacks[a];
5329 for (size_t i = 0; i < stack->n; i++)
5331 const struct ctables_nest *nest = &stack->nests[i];
5332 const struct variable *var = nest->vars[nest->n - 1];
5333 const union value *value = case_data (c, var);
5335 if (var_is_numeric (var) && value->f == SYSMIS)
5338 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5340 ctables_value_insert (t, value, var_get_width (var));
5345 ctables_add_category_occurrences (const struct variable *var,
5346 struct hmap *occurrences,
5347 const struct ctables_categories *cats)
5349 const struct val_labs *val_labs = var_get_value_labels (var);
5351 for (size_t i = 0; i < cats->n_cats; i++)
5353 const struct ctables_category *c = &cats->cats[i];
5357 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5363 int width = var_get_width (var);
5365 value_init (&value, width);
5366 value_copy_buf_rpad (&value, width,
5367 CHAR_CAST (uint8_t *, c->string.string),
5368 c->string.length, ' ');
5369 ctables_add_occurrence (var, &value, occurrences);
5370 value_destroy (&value, width);
5375 assert (var_is_numeric (var));
5376 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5377 vl = val_labs_next (val_labs, vl))
5378 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5379 ctables_add_occurrence (var, &vl->value, occurrences);
5383 assert (var_is_alpha (var));
5384 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5385 vl = val_labs_next (val_labs, vl))
5386 if (in_string_range (&vl->value, var, c->srange))
5387 ctables_add_occurrence (var, &vl->value, occurrences);
5391 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5392 vl = val_labs_next (val_labs, vl))
5393 if (var_is_value_missing (var, &vl->value))
5394 ctables_add_occurrence (var, &vl->value, occurrences);
5398 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5399 vl = val_labs_next (val_labs, vl))
5400 ctables_add_occurrence (var, &vl->value, occurrences);
5403 case CCT_POSTCOMPUTE:
5413 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5414 vl = val_labs_next (val_labs, vl))
5415 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5416 ctables_add_occurrence (var, &vl->value, occurrences);
5419 case CCT_EXCLUDED_MISSING:
5426 ctables_section_recurse_add_empty_categories (
5427 struct ctables_section *s,
5428 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5429 enum pivot_axis_type a, size_t a_idx)
5431 if (a >= PIVOT_N_AXES)
5432 ctables_cell_insert__ (s, c, cats);
5433 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5434 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5437 const struct variable *var = s->nests[a]->vars[a_idx];
5438 const struct ctables_categories *categories = s->table->categories[
5439 var_get_dict_index (var)];
5440 int width = var_get_width (var);
5441 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5442 const struct ctables_occurrence *o;
5443 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5445 union value *value = case_data_rw (c, var);
5446 value_destroy (value, width);
5447 value_clone (value, &o->value, width);
5448 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5449 assert (cats[a][a_idx] != NULL);
5450 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5453 for (size_t i = 0; i < categories->n_cats; i++)
5455 const struct ctables_category *cat = &categories->cats[i];
5456 if (cat->type == CCT_POSTCOMPUTE)
5458 cats[a][a_idx] = cat;
5459 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5466 ctables_section_add_empty_categories (struct ctables_section *s)
5468 bool show_empty = false;
5469 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5471 for (size_t k = 0; k < s->nests[a]->n; k++)
5472 if (k != s->nests[a]->scale_idx)
5474 const struct variable *var = s->nests[a]->vars[k];
5475 const struct ctables_categories *cats = s->table->categories[
5476 var_get_dict_index (var)];
5477 if (cats->show_empty)
5480 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5486 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5487 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5488 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5489 const struct ctables_category **cats[PIVOT_N_AXES] =
5491 [PIVOT_AXIS_LAYER] = layer_cats,
5492 [PIVOT_AXIS_ROW] = row_cats,
5493 [PIVOT_AXIS_COLUMN] = column_cats,
5495 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5496 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5501 ctables_section_clear (struct ctables_section *s)
5503 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5505 const struct ctables_nest *nest = s->nests[a];
5506 for (size_t i = 0; i < nest->n; i++)
5507 if (i != nest->scale_idx)
5509 const struct variable *var = nest->vars[i];
5510 int width = var_get_width (var);
5511 struct ctables_occurrence *o, *next;
5512 struct hmap *map = &s->occurrences[a][i];
5513 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5515 value_destroy (&o->value, width);
5516 hmap_delete (map, &o->node);
5523 struct ctables_cell *cell, *next_cell;
5524 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5526 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5528 const struct ctables_nest *nest = s->nests[a];
5529 for (size_t i = 0; i < nest->n; i++)
5530 if (i != nest->scale_idx)
5531 value_destroy (&cell->axes[a].cvs[i].value,
5532 var_get_width (nest->vars[i]));
5533 free (cell->axes[a].cvs);
5536 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5537 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5538 for (size_t i = 0; i < specs->n; i++)
5539 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5540 free (cell->summaries);
5542 hmap_delete (&s->cells, &cell->node);
5545 hmap_shrink (&s->cells);
5547 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5549 struct ctables_area *area, *next_area;
5550 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5554 hmap_delete (&s->areas[at], &area->node);
5557 hmap_shrink (&s->areas[at]);
5562 ctables_section_uninit (struct ctables_section *s)
5564 ctables_section_clear (s);
5566 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5568 struct ctables_nest *nest = s->nests[a];
5569 for (size_t i = 0; i < nest->n; i++)
5570 hmap_destroy (&s->occurrences[a][i]);
5571 free (s->occurrences[a]);
5574 hmap_destroy (&s->cells);
5575 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5576 hmap_destroy (&s->areas[at]);
5580 ctables_table_clear (struct ctables_table *t)
5582 for (size_t i = 0; i < t->n_sections; i++)
5583 ctables_section_clear (&t->sections[i]);
5585 if (t->clabels_example)
5587 int width = var_get_width (t->clabels_example);
5588 struct ctables_value *value, *next_value;
5589 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5590 &t->clabels_values_map)
5592 value_destroy (&value->value, width);
5593 hmap_delete (&t->clabels_values_map, &value->node);
5596 hmap_shrink (&t->clabels_values_map);
5598 free (t->clabels_values);
5599 t->clabels_values = NULL;
5600 t->n_clabels_values = 0;
5605 ctables_execute (struct dataset *ds, struct casereader *input,
5608 for (size_t i = 0; i < ct->n_tables; i++)
5610 struct ctables_table *t = ct->tables[i];
5611 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5612 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5613 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5614 sizeof *t->sections);
5615 size_t ix[PIVOT_N_AXES];
5616 ctables_table_add_section (t, 0, ix);
5619 struct dictionary *dict = dataset_dict (ds);
5621 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5622 struct casegrouper *grouper
5624 ? casegrouper_create_splits (input, dict)
5625 : casegrouper_create_vars (input, NULL, 0));
5626 struct casereader *group;
5627 while (casegrouper_get_next_group (grouper, &group))
5631 struct ccase *c = casereader_peek (group, 0);
5634 output_split_file_values (ds, c);
5639 bool warn_on_invalid = true;
5640 for (struct ccase *c = casereader_read (group); c;
5641 case_unref (c), c = casereader_read (group))
5643 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5644 double e_weight = (ct->e_weight
5645 ? var_force_valid_weight (ct->e_weight,
5646 case_num (c, ct->e_weight),
5650 [CTW_DICTIONARY] = d_weight,
5651 [CTW_EFFECTIVE] = e_weight,
5652 [CTW_UNWEIGHTED] = 1.0,
5655 for (size_t i = 0; i < ct->n_tables; i++)
5657 struct ctables_table *t = ct->tables[i];
5659 for (size_t j = 0; j < t->n_sections; j++)
5660 ctables_cell_insert (&t->sections[j], c, weight);
5662 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5663 if (t->label_axis[a] != a)
5664 ctables_insert_clabels_values (t, c, a);
5667 casereader_destroy (group);
5669 for (size_t i = 0; i < ct->n_tables; i++)
5671 struct ctables_table *t = ct->tables[i];
5673 if (t->clabels_example)
5674 ctables_sort_clabels_values (t);
5676 for (size_t j = 0; j < t->n_sections; j++)
5677 ctables_section_add_empty_categories (&t->sections[j]);
5679 ctables_table_output (ct, t);
5680 ctables_table_clear (t);
5683 return casegrouper_destroy (grouper);
5686 static struct ctables_postcompute *
5687 ctables_find_postcompute (struct ctables *ct, const char *name)
5689 struct ctables_postcompute *pc;
5690 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5691 utf8_hash_case_string (name, 0), &ct->postcomputes)
5692 if (!utf8_strcasecmp (pc->name, name))
5698 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5701 int pcompute_start = lex_ofs (lexer) - 1;
5703 if (!lex_match (lexer, T_AND))
5705 lex_error_expecting (lexer, "&");
5708 if (!lex_force_id (lexer))
5711 char *name = ss_xstrdup (lex_tokss (lexer));
5714 if (!lex_force_match_phrase (lexer, "=EXPR("))
5720 int expr_start = lex_ofs (lexer);
5721 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5722 int expr_end = lex_ofs (lexer) - 1;
5723 if (!expr || !lex_force_match (lexer, T_RPAREN))
5725 ctables_pcexpr_destroy (expr);
5729 int pcompute_end = lex_ofs (lexer) - 1;
5731 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5734 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5737 msg_at (SW, location, _("New definition of &%s will override the "
5738 "previous definition."),
5740 msg_at (SN, pc->location, _("This is the previous definition."));
5742 ctables_pcexpr_destroy (pc->expr);
5743 msg_location_destroy (pc->location);
5748 pc = xmalloc (sizeof *pc);
5749 *pc = (struct ctables_postcompute) { .name = name };
5750 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5751 utf8_hash_case_string (pc->name, 0));
5754 pc->location = location;
5756 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5761 ctables_parse_pproperties_format (struct lexer *lexer,
5762 struct ctables_summary_spec_set *sss)
5764 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5766 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5767 && !(lex_token (lexer) == T_ID
5768 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5769 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5770 lex_tokss (lexer)))))
5772 /* Parse function. */
5773 enum ctables_summary_function function;
5774 enum ctables_weighting weighting;
5775 enum ctables_area_type area;
5776 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5779 /* Parse percentile. */
5780 double percentile = 0;
5781 if (function == CTSF_PTILE)
5783 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5785 percentile = lex_number (lexer);
5790 struct fmt_spec format;
5791 bool is_ctables_format;
5792 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5795 if (sss->n >= sss->allocated)
5796 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5797 sizeof *sss->specs);
5798 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5799 .function = function,
5800 .weighting = weighting,
5803 .percentile = percentile,
5805 .is_ctables_format = is_ctables_format,
5811 ctables_summary_spec_set_uninit (sss);
5816 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5818 struct ctables_postcompute **pcs = NULL;
5820 size_t allocated_pcs = 0;
5822 while (lex_match (lexer, T_AND))
5824 if (!lex_force_id (lexer))
5826 struct ctables_postcompute *pc
5827 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5830 lex_error (lexer, _("Unknown computed category &%s."),
5831 lex_tokcstr (lexer));
5836 if (n_pcs >= allocated_pcs)
5837 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5841 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5843 if (lex_match_id (lexer, "LABEL"))
5845 lex_match (lexer, T_EQUALS);
5846 if (!lex_force_string (lexer))
5849 for (size_t i = 0; i < n_pcs; i++)
5851 free (pcs[i]->label);
5852 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5857 else if (lex_match_id (lexer, "FORMAT"))
5859 lex_match (lexer, T_EQUALS);
5861 struct ctables_summary_spec_set sss;
5862 if (!ctables_parse_pproperties_format (lexer, &sss))
5865 for (size_t i = 0; i < n_pcs; i++)
5868 ctables_summary_spec_set_uninit (pcs[i]->specs);
5870 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5871 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5873 ctables_summary_spec_set_uninit (&sss);
5875 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5877 lex_match (lexer, T_EQUALS);
5878 bool hide_source_cats;
5879 if (!parse_bool (lexer, &hide_source_cats))
5881 for (size_t i = 0; i < n_pcs; i++)
5882 pcs[i]->hide_source_cats = hide_source_cats;
5886 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5899 put_strftime (struct string *out, time_t now, const char *format)
5901 const struct tm *tm = localtime (&now);
5903 strftime (value, sizeof value, format, tm);
5904 ds_put_cstr (out, value);
5908 skip_prefix (struct substring *s, struct substring prefix)
5910 if (ss_starts_with (*s, prefix))
5912 ss_advance (s, prefix.length);
5920 put_table_expression (struct string *out, struct lexer *lexer,
5921 struct dictionary *dict, int expr_start, int expr_end)
5924 for (int ofs = expr_start; ofs < expr_end; ofs++)
5926 const struct token *t = lex_ofs_token (lexer, ofs);
5927 if (t->type == T_LBRACK)
5929 else if (t->type == T_RBRACK && nest > 0)
5935 else if (t->type == T_ID)
5937 const struct variable *var
5938 = dict_lookup_var (dict, t->string.string);
5939 const char *label = var ? var_get_label (var) : NULL;
5940 ds_put_cstr (out, label ? label : t->string.string);
5944 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5945 ds_put_byte (out, ' ');
5947 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5948 ds_put_cstr (out, repr);
5951 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5952 ds_put_byte (out, ' ');
5958 put_title_text (struct string *out, struct substring in, time_t now,
5959 struct lexer *lexer, struct dictionary *dict,
5960 int expr_start, int expr_end)
5964 size_t chunk = ss_find_byte (in, ')');
5965 ds_put_substring (out, ss_head (in, chunk));
5966 ss_advance (&in, chunk);
5967 if (ss_is_empty (in))
5970 if (skip_prefix (&in, ss_cstr (")DATE")))
5971 put_strftime (out, now, "%x");
5972 else if (skip_prefix (&in, ss_cstr (")TIME")))
5973 put_strftime (out, now, "%X");
5974 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5975 put_table_expression (out, lexer, dict, expr_start, expr_end);
5978 ds_put_byte (out, ')');
5979 ss_advance (&in, 1);
5985 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5987 struct casereader *input = NULL;
5989 struct measure_guesser *mg = measure_guesser_create (ds);
5992 input = proc_open (ds);
5993 measure_guesser_run (mg, input);
5994 measure_guesser_destroy (mg);
5997 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5998 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5999 enum settings_value_show tvars = settings_get_show_variables ();
6000 for (size_t i = 0; i < n_vars; i++)
6001 vlabels[i] = (enum ctables_vlabel) tvars;
6003 struct pivot_table_look *look = pivot_table_look_unshare (
6004 pivot_table_look_ref (pivot_table_look_get_default ()));
6005 look->omit_empty = false;
6007 struct ctables *ct = xmalloc (sizeof *ct);
6008 *ct = (struct ctables) {
6009 .dict = dataset_dict (ds),
6011 .ctables_formats = FMT_SETTINGS_INIT,
6013 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6016 time_t now = time (NULL);
6021 const char *dot_string;
6022 const char *comma_string;
6024 static const struct ctf ctfs[4] = {
6025 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6026 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6027 { CTEF_PAREN, "-,(,),", "-.(.)." },
6028 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6030 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6031 for (size_t i = 0; i < 4; i++)
6033 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6034 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6035 fmt_number_style_from_string (s));
6038 if (!lex_force_match (lexer, T_SLASH))
6041 while (!lex_match_id (lexer, "TABLE"))
6043 if (lex_match_id (lexer, "FORMAT"))
6045 double widths[2] = { SYSMIS, SYSMIS };
6046 double units_per_inch = 72.0;
6048 int start_ofs = lex_ofs (lexer);
6049 while (lex_token (lexer) != T_SLASH)
6051 if (lex_match_id (lexer, "MINCOLWIDTH"))
6053 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6056 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6058 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6061 else if (lex_match_id (lexer, "UNITS"))
6063 lex_match (lexer, T_EQUALS);
6064 if (lex_match_id (lexer, "POINTS"))
6065 units_per_inch = 72.0;
6066 else if (lex_match_id (lexer, "INCHES"))
6067 units_per_inch = 1.0;
6068 else if (lex_match_id (lexer, "CM"))
6069 units_per_inch = 2.54;
6072 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6076 else if (lex_match_id (lexer, "EMPTY"))
6081 lex_match (lexer, T_EQUALS);
6082 if (lex_match_id (lexer, "ZERO"))
6084 /* Nothing to do. */
6086 else if (lex_match_id (lexer, "BLANK"))
6087 ct->zero = xstrdup ("");
6088 else if (lex_force_string (lexer))
6090 ct->zero = ss_xstrdup (lex_tokss (lexer));
6096 else if (lex_match_id (lexer, "MISSING"))
6098 lex_match (lexer, T_EQUALS);
6099 if (!lex_force_string (lexer))
6103 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6104 ? ss_xstrdup (lex_tokss (lexer))
6110 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6111 "UNITS", "EMPTY", "MISSING");
6116 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6117 && widths[0] > widths[1])
6119 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6120 _("MINCOLWIDTH must not be greater than "
6125 for (size_t i = 0; i < 2; i++)
6126 if (widths[i] != SYSMIS)
6128 int *wr = ct->look->width_ranges[TABLE_HORZ];
6129 wr[i] = widths[i] / units_per_inch * 96.0;
6134 else if (lex_match_id (lexer, "VLABELS"))
6136 if (!lex_force_match_id (lexer, "VARIABLES"))
6138 lex_match (lexer, T_EQUALS);
6140 struct variable **vars;
6142 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6146 if (!lex_force_match_id (lexer, "DISPLAY"))
6151 lex_match (lexer, T_EQUALS);
6153 enum ctables_vlabel vlabel;
6154 if (lex_match_id (lexer, "DEFAULT"))
6155 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6156 else if (lex_match_id (lexer, "NAME"))
6158 else if (lex_match_id (lexer, "LABEL"))
6159 vlabel = CTVL_LABEL;
6160 else if (lex_match_id (lexer, "BOTH"))
6162 else if (lex_match_id (lexer, "NONE"))
6166 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6172 for (size_t i = 0; i < n_vars; i++)
6173 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6176 else if (lex_match_id (lexer, "MRSETS"))
6178 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6180 lex_match (lexer, T_EQUALS);
6181 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6184 else if (lex_match_id (lexer, "SMISSING"))
6186 if (lex_match_id (lexer, "VARIABLE"))
6187 ct->smissing_listwise = false;
6188 else if (lex_match_id (lexer, "LISTWISE"))
6189 ct->smissing_listwise = true;
6192 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6196 else if (lex_match_id (lexer, "PCOMPUTE"))
6198 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6201 else if (lex_match_id (lexer, "PPROPERTIES"))
6203 if (!ctables_parse_pproperties (lexer, ct))
6206 else if (lex_match_id (lexer, "WEIGHT"))
6208 if (!lex_force_match_id (lexer, "VARIABLE"))
6210 lex_match (lexer, T_EQUALS);
6211 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6215 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6217 if (lex_match_id (lexer, "COUNT"))
6219 lex_match (lexer, T_EQUALS);
6220 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6223 ct->hide_threshold = lex_integer (lexer);
6226 else if (ct->hide_threshold == 0)
6227 ct->hide_threshold = 5;
6231 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6232 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6233 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6234 if (lex_match_id (lexer, "SLABELS")
6235 || lex_match_id (lexer, "CLABELS")
6236 || lex_match_id (lexer, "CRITERIA")
6237 || lex_match_id (lexer, "CATEGORIES")
6238 || lex_match_id (lexer, "TITLES")
6239 || lex_match_id (lexer, "SIGTEST")
6240 || lex_match_id (lexer, "COMPARETEST"))
6241 lex_next_msg (lexer, SN, -1, -1,
6242 _("TABLE must appear before this subcommand."));
6246 if (!lex_force_match (lexer, T_SLASH))
6250 size_t allocated_tables = 0;
6253 if (ct->n_tables >= allocated_tables)
6254 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6255 sizeof *ct->tables);
6257 struct ctables_category *cat = xmalloc (sizeof *cat);
6258 *cat = (struct ctables_category) {
6260 .include_missing = false,
6261 .sort_ascending = true,
6264 struct ctables_categories *c = xmalloc (sizeof *c);
6265 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6266 *c = (struct ctables_categories) {
6273 struct ctables_categories **categories = xnmalloc (n_vars,
6274 sizeof *categories);
6275 for (size_t i = 0; i < n_vars; i++)
6278 struct ctables_table *t = xmalloc (sizeof *t);
6279 *t = (struct ctables_table) {
6281 .slabels_axis = PIVOT_AXIS_COLUMN,
6282 .slabels_visible = true,
6283 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6285 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6286 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6287 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6289 .clabels_from_axis = PIVOT_AXIS_LAYER,
6290 .clabels_to_axis = PIVOT_AXIS_LAYER,
6291 .categories = categories,
6292 .n_categories = n_vars,
6295 ct->tables[ct->n_tables++] = t;
6297 lex_match (lexer, T_EQUALS);
6298 int expr_start = lex_ofs (lexer);
6299 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6300 &t->axes[PIVOT_AXIS_ROW]))
6302 if (lex_match (lexer, T_BY))
6304 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6305 &t->axes[PIVOT_AXIS_COLUMN]))
6308 if (lex_match (lexer, T_BY))
6310 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6311 &t->axes[PIVOT_AXIS_LAYER]))
6315 int expr_end = lex_ofs (lexer);
6317 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6318 && !t->axes[PIVOT_AXIS_LAYER])
6320 lex_error (lexer, _("At least one variable must be specified."));
6324 const struct ctables_axis *scales[PIVOT_N_AXES];
6325 size_t n_scales = 0;
6326 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6328 scales[a] = find_scale (t->axes[a]);
6334 msg (SE, _("Scale variables may appear only on one axis."));
6335 if (scales[PIVOT_AXIS_ROW])
6336 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6337 _("This scale variable appears on the rows axis."));
6338 if (scales[PIVOT_AXIS_COLUMN])
6339 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6340 _("This scale variable appears on the columns axis."));
6341 if (scales[PIVOT_AXIS_LAYER])
6342 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6343 _("This scale variable appears on the layer axis."));
6347 const struct ctables_axis *summaries[PIVOT_N_AXES];
6348 size_t n_summaries = 0;
6349 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6351 summaries[a] = (scales[a]
6353 : find_categorical_summary_spec (t->axes[a]));
6357 if (n_summaries > 1)
6359 msg (SE, _("Summaries may appear only on one axis."));
6360 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6363 msg_at (SN, summaries[a]->loc,
6365 ? _("This variable on the rows axis has a summary.")
6366 : a == PIVOT_AXIS_COLUMN
6367 ? _("This variable on the columns axis has a summary.")
6368 : _("This variable on the layers axis has a summary."));
6370 msg_at (SN, summaries[a]->loc,
6371 _("This is a scale variable, so it always has a "
6372 "summary even if the syntax does not explicitly "
6377 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6378 if (n_summaries ? summaries[a] : t->axes[a])
6380 t->summary_axis = a;
6384 if (lex_token (lexer) == T_ENDCMD)
6386 if (!ctables_prepare_table (t, lexer))
6390 if (!lex_force_match (lexer, T_SLASH))
6393 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6395 if (lex_match_id (lexer, "SLABELS"))
6397 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6399 if (lex_match_id (lexer, "POSITION"))
6401 lex_match (lexer, T_EQUALS);
6402 if (lex_match_id (lexer, "COLUMN"))
6403 t->slabels_axis = PIVOT_AXIS_COLUMN;
6404 else if (lex_match_id (lexer, "ROW"))
6405 t->slabels_axis = PIVOT_AXIS_ROW;
6406 else if (lex_match_id (lexer, "LAYER"))
6407 t->slabels_axis = PIVOT_AXIS_LAYER;
6410 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6414 else if (lex_match_id (lexer, "VISIBLE"))
6416 lex_match (lexer, T_EQUALS);
6417 if (!parse_bool (lexer, &t->slabels_visible))
6422 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6427 else if (lex_match_id (lexer, "CLABELS"))
6429 int start_ofs = lex_ofs (lexer) - 1;
6430 if (lex_match_id (lexer, "AUTO"))
6432 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6433 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6435 else if (lex_match_id (lexer, "ROWLABELS"))
6437 lex_match (lexer, T_EQUALS);
6438 if (lex_match_id (lexer, "OPPOSITE"))
6439 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6440 else if (lex_match_id (lexer, "LAYER"))
6441 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6444 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6448 else if (lex_match_id (lexer, "COLLABELS"))
6450 lex_match (lexer, T_EQUALS);
6451 if (lex_match_id (lexer, "OPPOSITE"))
6452 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6453 else if (lex_match_id (lexer, "LAYER"))
6454 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6457 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6463 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6467 int end_ofs = lex_ofs (lexer) - 1;
6469 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6470 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6472 msg (SE, _("ROWLABELS and COLLABELS may not both be "
6475 lex_ofs_msg (lexer, SN, t->clabels_start_ofs,
6477 _("This is the first specification."));
6478 lex_ofs_msg (lexer, SN, start_ofs, end_ofs,
6479 _("This is the second specification."));
6483 t->clabels_start_ofs = start_ofs;
6484 t->clabels_end_ofs = end_ofs;
6486 else if (lex_match_id (lexer, "CRITERIA"))
6488 if (!lex_force_match_id (lexer, "CILEVEL"))
6490 lex_match (lexer, T_EQUALS);
6492 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6494 t->cilevel = lex_number (lexer);
6497 else if (lex_match_id (lexer, "CATEGORIES"))
6499 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6503 else if (lex_match_id (lexer, "TITLES"))
6508 if (lex_match_id (lexer, "CAPTIONS"))
6509 textp = &t->caption;
6510 else if (lex_match_id (lexer, "CORNERS"))
6512 else if (lex_match_id (lexer, "TITLES"))
6516 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6519 lex_match (lexer, T_EQUALS);
6521 struct string s = DS_EMPTY_INITIALIZER;
6522 while (lex_is_string (lexer))
6524 if (!ds_is_empty (&s))
6525 ds_put_byte (&s, ' ');
6526 put_title_text (&s, lex_tokss (lexer), now,
6527 lexer, dataset_dict (ds),
6528 expr_start, expr_end);
6532 *textp = ds_steal_cstr (&s);
6534 while (lex_token (lexer) != T_SLASH
6535 && lex_token (lexer) != T_ENDCMD);
6537 else if (lex_match_id (lexer, "SIGTEST"))
6539 int start_ofs = lex_ofs (lexer) - 1;
6542 t->chisq = xmalloc (sizeof *t->chisq);
6543 *t->chisq = (struct ctables_chisq) {
6545 .include_mrsets = true,
6546 .all_visible = true,
6552 if (lex_match_id (lexer, "TYPE"))
6554 lex_match (lexer, T_EQUALS);
6555 if (!lex_force_match_id (lexer, "CHISQUARE"))
6558 else if (lex_match_id (lexer, "ALPHA"))
6560 lex_match (lexer, T_EQUALS);
6561 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6563 t->chisq->alpha = lex_number (lexer);
6566 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6568 lex_match (lexer, T_EQUALS);
6569 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6572 else if (lex_match_id (lexer, "CATEGORIES"))
6574 lex_match (lexer, T_EQUALS);
6575 if (lex_match_id (lexer, "ALLVISIBLE"))
6576 t->chisq->all_visible = true;
6577 else if (lex_match_id (lexer, "SUBTOTALS"))
6578 t->chisq->all_visible = false;
6581 lex_error_expecting (lexer,
6582 "ALLVISIBLE", "SUBTOTALS");
6588 lex_error_expecting (lexer, "TYPE", "ALPHA",
6589 "INCLUDEMRSETS", "CATEGORIES");
6593 while (lex_token (lexer) != T_SLASH
6594 && lex_token (lexer) != T_ENDCMD);
6596 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6597 _("Support for SIGTEST not yet implemented."));
6600 else if (lex_match_id (lexer, "COMPARETEST"))
6602 int start_ofs = lex_ofs (lexer) - 1;
6605 t->pairwise = xmalloc (sizeof *t->pairwise);
6606 *t->pairwise = (struct ctables_pairwise) {
6608 .alpha = { .05, .05 },
6609 .adjust = BONFERRONI,
6610 .include_mrsets = true,
6611 .meansvariance_allcats = true,
6612 .all_visible = true,
6621 if (lex_match_id (lexer, "TYPE"))
6623 lex_match (lexer, T_EQUALS);
6624 if (lex_match_id (lexer, "PROP"))
6625 t->pairwise->type = PROP;
6626 else if (lex_match_id (lexer, "MEAN"))
6627 t->pairwise->type = MEAN;
6630 lex_error_expecting (lexer, "PROP", "MEAN");
6634 else if (lex_match_id (lexer, "ALPHA"))
6636 lex_match (lexer, T_EQUALS);
6638 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6640 double a0 = lex_number (lexer);
6643 lex_match (lexer, T_COMMA);
6644 if (lex_is_number (lexer))
6646 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6648 double a1 = lex_number (lexer);
6651 t->pairwise->alpha[0] = MIN (a0, a1);
6652 t->pairwise->alpha[1] = MAX (a0, a1);
6655 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6657 else if (lex_match_id (lexer, "ADJUST"))
6659 lex_match (lexer, T_EQUALS);
6660 if (lex_match_id (lexer, "BONFERRONI"))
6661 t->pairwise->adjust = BONFERRONI;
6662 else if (lex_match_id (lexer, "BH"))
6663 t->pairwise->adjust = BH;
6664 else if (lex_match_id (lexer, "NONE"))
6665 t->pairwise->adjust = 0;
6668 lex_error_expecting (lexer, "BONFERRONI", "BH",
6673 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6675 lex_match (lexer, T_EQUALS);
6676 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6679 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6681 lex_match (lexer, T_EQUALS);
6682 if (lex_match_id (lexer, "ALLCATS"))
6683 t->pairwise->meansvariance_allcats = true;
6684 else if (lex_match_id (lexer, "TESTEDCATS"))
6685 t->pairwise->meansvariance_allcats = false;
6688 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6692 else if (lex_match_id (lexer, "CATEGORIES"))
6694 lex_match (lexer, T_EQUALS);
6695 if (lex_match_id (lexer, "ALLVISIBLE"))
6696 t->pairwise->all_visible = true;
6697 else if (lex_match_id (lexer, "SUBTOTALS"))
6698 t->pairwise->all_visible = false;
6701 lex_error_expecting (lexer, "ALLVISIBLE",
6706 else if (lex_match_id (lexer, "MERGE"))
6708 lex_match (lexer, T_EQUALS);
6709 if (!parse_bool (lexer, &t->pairwise->merge))
6712 else if (lex_match_id (lexer, "STYLE"))
6714 lex_match (lexer, T_EQUALS);
6715 if (lex_match_id (lexer, "APA"))
6716 t->pairwise->apa_style = true;
6717 else if (lex_match_id (lexer, "SIMPLE"))
6718 t->pairwise->apa_style = false;
6721 lex_error_expecting (lexer, "APA", "SIMPLE");
6725 else if (lex_match_id (lexer, "SHOWSIG"))
6727 lex_match (lexer, T_EQUALS);
6728 if (!parse_bool (lexer, &t->pairwise->show_sig))
6733 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6734 "INCLUDEMRSETS", "MEANSVARIANCE",
6735 "CATEGORIES", "MERGE", "STYLE",
6740 while (lex_token (lexer) != T_SLASH
6741 && lex_token (lexer) != T_ENDCMD);
6743 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6744 _("Support for COMPARETEST not yet implemented."));
6749 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6750 "CRITERIA", "CATEGORIES", "TITLES",
6751 "SIGTEST", "COMPARETEST");
6752 if (lex_match_id (lexer, "FORMAT")
6753 || lex_match_id (lexer, "VLABELS")
6754 || lex_match_id (lexer, "MRSETS")
6755 || lex_match_id (lexer, "SMISSING")
6756 || lex_match_id (lexer, "PCOMPUTE")
6757 || lex_match_id (lexer, "PPROPERTIES")
6758 || lex_match_id (lexer, "WEIGHT")
6759 || lex_match_id (lexer, "HIDESMALLCOUNTS"))
6760 lex_next_msg (lexer, SN, -1, -1,
6761 _("This subcommand must appear before TABLE."));
6765 if (!lex_match (lexer, T_SLASH))
6769 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6770 t->clabels_from_axis = PIVOT_AXIS_ROW;
6771 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6772 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6773 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6775 if (!ctables_prepare_table (t, lexer))
6778 while (lex_token (lexer) != T_ENDCMD);
6781 input = proc_open (ds);
6782 bool ok = ctables_execute (ds, input, ct);
6783 ok = proc_commit (ds) && ok;
6785 ctables_destroy (ct);
6786 return ok ? CMD_SUCCESS : CMD_FAILURE;
6791 ctables_destroy (ct);