1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
58 enum ctables_weighting
66 /* CTABLES table areas. */
68 enum ctables_area_type
70 /* Within a section, where stacked variables divide one section from
73 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
74 parse_ctables_summary_function() parses correctly. */
75 CTAT_TABLE, /* All layers of a whole section. */
76 CTAT_LAYERROW, /* Row in one layer within a section. */
77 CTAT_LAYERCOL, /* Column in one layer within a section. */
78 CTAT_LAYER, /* One layer within a section. */
80 /* Within a subtable, where a subtable pairs an innermost row variable with
81 an innermost column variable within a single layer. */
82 CTAT_SUBTABLE, /* Whole subtable. */
83 CTAT_ROW, /* Row within a subtable. */
84 CTAT_COL, /* Column within a subtable. */
88 static const char *ctables_area_type_name[N_CTATS] = {
89 [CTAT_TABLE] = "TABLE",
90 [CTAT_LAYER] = "LAYER",
91 [CTAT_LAYERROW] = "LAYERROW",
92 [CTAT_LAYERCOL] = "LAYERCOL",
93 [CTAT_SUBTABLE] = "SUBTABLE",
100 struct hmap_node node;
102 const struct ctables_cell *example;
105 double count[N_CTWS];
106 double valid[N_CTWS];
107 double total[N_CTWS];
108 struct ctables_sum *sums;
116 /* CTABLES summary functions. */
118 enum ctables_function_type
120 /* A function that operates on data in a single cell. It operates on
121 effective weights. It does not have an unweighted version. */
124 /* A function that operates on data in a single cell. The function
125 operates on effective weights and has a U-prefixed unweighted
129 /* A function that operates on data in a single cell. It operates on
130 dictionary weights, and has U-prefixed unweighted version and an
131 E-prefixed effective weight version. */
134 /* A function that operates on an area of cells. It operates on effective
135 weights and has a U-prefixed unweighted version. */
146 enum ctables_function_availability
148 CTFA_ALL, /* Any variables. */
149 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
150 //CTFA_MRSETS, /* Only multiple-response sets */
153 enum ctables_summary_function
155 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
156 #include "ctables.inc"
161 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
163 #include "ctables.inc"
167 struct ctables_function_info
169 struct substring basename;
170 enum ctables_function_type type;
171 enum ctables_format format;
172 enum ctables_function_availability availability;
174 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
175 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
176 bool is_area; /* Needs an area prefix. */
178 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
179 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
181 .basename = SS_LITERAL_INITIALIZER (NAME), \
184 .availability = AVAILABILITY, \
185 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
186 .e_prefix = (TYPE) == CTFT_UECELL, \
187 .is_area = (TYPE) == CTFT_AREA \
189 #include "ctables.inc"
193 static struct fmt_spec
194 ctables_summary_default_format (enum ctables_summary_function function,
195 const struct variable *var)
197 static const enum ctables_format default_formats[] = {
198 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
199 #include "ctables.inc"
202 switch (default_formats[function])
205 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
208 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
211 return *var_get_print_format (var);
218 static enum ctables_function_availability
219 ctables_function_availability (enum ctables_summary_function f)
221 static enum ctables_function_availability availability[] = {
222 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
223 #include "ctables.inc"
227 return availability[f];
231 parse_ctables_summary_function (struct lexer *lexer,
232 enum ctables_summary_function *function,
233 enum ctables_weighting *weighting,
234 enum ctables_area_type *area)
236 if (!lex_force_id (lexer))
239 struct substring name = lex_tokss (lexer);
240 if (ss_ends_with_case (name, ss_cstr (".LCL"))
241 || ss_ends_with_case (name, ss_cstr (".UCL"))
242 || ss_ends_with_case (name, ss_cstr (".SE")))
244 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
245 "is not yet implemented."));
249 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
250 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
252 bool has_area = false;
254 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
255 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
260 if (ss_equals_case (name, ss_cstr ("PCT")))
262 /* Special case where .COUNT suffix is omitted. */
263 *function = CTSF_areaPCT_COUNT;
264 *weighting = CTW_EFFECTIVE;
271 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
273 const struct ctables_function_info *cfi = &ctables_function_info[f];
274 if (ss_equals_case (cfi->basename, name))
277 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
280 *weighting = (e ? CTW_EFFECTIVE
282 : cfi->e_prefix ? CTW_DICTIONARY
289 lex_error (lexer, _("Expecting summary function name."));
294 ctables_summary_function_name (enum ctables_summary_function function,
295 enum ctables_weighting weighting,
296 enum ctables_area_type area,
297 char *buffer, size_t bufsize)
299 const struct ctables_function_info *cfi = &ctables_function_info[function];
300 snprintf (buffer, bufsize, "%s%s%s",
301 (weighting == CTW_UNWEIGHTED ? "U"
302 : weighting == CTW_DICTIONARY ? ""
303 : cfi->e_prefix ? "E"
305 cfi->is_area ? ctables_area_type_name[area] : "",
306 cfi->basename.string);
311 ctables_summary_function_label__ (enum ctables_summary_function function,
312 enum ctables_weighting weighting,
313 enum ctables_area_type area)
315 bool w = weighting != CTW_UNWEIGHTED;
316 bool d = weighting == CTW_DICTIONARY;
317 enum ctables_area_type a = area;
321 return (d ? N_("Count")
322 : w ? N_("Adjusted Count")
323 : N_("Unweighted Count"));
325 case CTSF_areaPCT_COUNT:
328 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
329 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
330 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
331 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
332 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
333 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
334 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
338 case CTSF_areaPCT_VALIDN:
341 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
342 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
343 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
344 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
345 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
346 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
347 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
351 case CTSF_areaPCT_TOTALN:
354 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
355 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
356 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
357 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
358 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
359 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
360 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
364 case CTSF_MAXIMUM: return N_("Maximum");
365 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
366 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
367 case CTSF_MINIMUM: return N_("Minimum");
368 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
369 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
370 case CTSF_PTILE: NOT_REACHED ();
371 case CTSF_RANGE: return N_("Range");
372 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
373 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
374 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
375 case CTSF_TOTALN: return (d ? N_("Total N")
376 : w ? N_("Adjusted Total N")
377 : N_("Unweighted Total N"));
378 case CTSF_VALIDN: return (d ? N_("Valid N")
379 : w ? N_("Adjusted Valid N")
380 : N_("Unweighted Valid N"));
381 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
382 case CTSF_areaPCT_SUM:
385 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
386 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
387 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
388 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
389 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
390 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
391 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
398 /* Don't bother translating these: they are for developers only. */
399 case CTAT_TABLE: return "Table ID";
400 case CTAT_LAYER: return "Layer ID";
401 case CTAT_LAYERROW: return "Layer Row ID";
402 case CTAT_LAYERCOL: return "Layer Column ID";
403 case CTAT_SUBTABLE: return "Subtable ID";
404 case CTAT_ROW: return "Row ID";
405 case CTAT_COL: return "Column ID";
413 static struct pivot_value *
414 ctables_summary_function_label (enum ctables_summary_function function,
415 enum ctables_weighting weighting,
416 enum ctables_area_type area,
419 if (function == CTSF_PTILE)
421 char *s = (weighting != CTW_UNWEIGHTED
422 ? xasprintf (_("Percentile %.2f"), percentile)
423 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
424 return pivot_value_new_user_text_nocopy (s);
427 return pivot_value_new_text (ctables_summary_function_label__ (
428 function, weighting, area));
431 /* CTABLES summaries. */
433 struct ctables_summary_spec
435 /* The calculation to be performed.
437 'function' is the function to calculate. 'weighted' specifies whether
438 to use weighted or unweighted data (for functions that do not support a
439 choice, it must be true). 'calc_area' is the area over which the
440 calculation takes place (for functions that target only an individual
441 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
442 percentile between 0 and 100 (for other functions it must be 0). */
443 enum ctables_summary_function function;
444 enum ctables_weighting weighting;
445 enum ctables_area_type calc_area;
446 double percentile; /* CTSF_PTILE only. */
448 /* How to display the result of the calculation.
450 'label' is a user-specified label, NULL if the user didn't specify
453 'user_area' is usually the same as 'calc_area', but when category labels
454 are rotated from one axis to another it swaps rows and columns.
456 'format' is the format for displaying the output. If
457 'is_ctables_format' is true, then 'format.type' is one of the special
458 CTEF_* formats instead of the standard ones. */
460 enum ctables_area_type user_area;
461 struct fmt_spec format;
462 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
469 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
470 const struct ctables_summary_spec *src)
473 dst->label = xstrdup_if_nonnull (src->label);
477 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
483 /* Collections of summary functions. */
485 struct ctables_summary_spec_set
487 struct ctables_summary_spec *specs;
491 /* The variable to which the summary specs are applied. */
492 struct variable *var;
494 /* Whether the variable to which the summary specs are applied is a scale
495 variable for the purpose of summarization.
497 (VALIDN and TOTALN act differently for summarizing scale and categorical
501 /* If any of these optional additional scale variables are missing, then
502 treat 'var' as if it's missing too. This is for implementing
503 SMISSING=LISTWISE. */
504 struct variable **listwise_vars;
505 size_t n_listwise_vars;
509 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
510 const struct ctables_summary_spec_set *src)
512 struct ctables_summary_spec *specs
513 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
514 for (size_t i = 0; i < src->n; i++)
515 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
517 *dst = (struct ctables_summary_spec_set) {
522 .is_scale = src->is_scale,
527 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
529 for (size_t i = 0; i < set->n; i++)
530 ctables_summary_spec_uninit (&set->specs[i]);
531 free (set->listwise_vars);
536 is_listwise_missing (const struct ctables_summary_spec_set *specs,
537 const struct ccase *c)
539 for (size_t i = 0; i < specs->n_listwise_vars; i++)
541 const struct variable *var = specs->listwise_vars[i];
542 if (var_is_num_missing (var, case_num (c, var)))
549 /* CTABLES postcompute expressions. */
551 struct ctables_postcompute
553 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
554 char *name; /* Name, without leading &. */
556 struct msg_location *location; /* Location of definition. */
557 struct ctables_pcexpr *expr;
559 struct ctables_summary_spec_set *specs;
560 bool hide_source_cats;
563 struct ctables_pcexpr
573 enum ctables_pcexpr_op
576 CTPO_CONSTANT, /* 5 */
577 CTPO_CAT_NUMBER, /* [5] */
578 CTPO_CAT_STRING, /* ["STRING"] */
579 CTPO_CAT_NRANGE, /* [LO THRU 5] */
580 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
581 CTPO_CAT_MISSING, /* MISSING */
582 CTPO_CAT_OTHERNM, /* OTHERNM */
583 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
584 CTPO_CAT_TOTAL, /* TOTAL */
598 /* CTPO_CAT_NUMBER. */
601 /* CTPO_CAT_STRING, in dictionary encoding. */
602 struct substring string;
604 /* CTPO_CAT_NRANGE. */
607 /* CTPO_CAT_SRANGE. */
608 struct substring srange[2];
610 /* CTPO_CAT_SUBTOTAL. */
611 size_t subtotal_index;
613 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
614 One element: CTPO_NEG. */
615 struct ctables_pcexpr *subs[2];
618 /* Source location. */
619 struct msg_location *location;
623 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
626 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
627 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
628 struct ctables_pcexpr *sub1);
630 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
631 struct dictionary *);
634 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
640 case CTPO_CAT_STRING:
641 ss_dealloc (&e->string);
644 case CTPO_CAT_SRANGE:
645 for (size_t i = 0; i < 2; i++)
646 ss_dealloc (&e->srange[i]);
655 for (size_t i = 0; i < 2; i++)
656 ctables_pcexpr_destroy (e->subs[i]);
660 case CTPO_CAT_NUMBER:
661 case CTPO_CAT_NRANGE:
662 case CTPO_CAT_MISSING:
663 case CTPO_CAT_OTHERNM:
664 case CTPO_CAT_SUBTOTAL:
669 msg_location_destroy (e->location);
674 static struct ctables_pcexpr *
675 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
676 struct ctables_pcexpr *sub0,
677 struct ctables_pcexpr *sub1)
679 struct ctables_pcexpr *e = xmalloc (sizeof *e);
680 *e = (struct ctables_pcexpr) {
682 .subs = { sub0, sub1 },
683 .location = msg_location_merged (sub0->location, sub1->location),
688 /* How to parse an operator. */
691 enum token_type token;
692 enum ctables_pcexpr_op op;
695 static const struct operator *
696 ctables_pcexpr_match_operator (struct lexer *lexer,
697 const struct operator ops[], size_t n_ops)
699 for (const struct operator *op = ops; op < ops + n_ops; op++)
700 if (lex_token (lexer) == op->token)
702 if (op->token != T_NEG_NUM)
711 static struct ctables_pcexpr *
712 ctables_pcexpr_parse_binary_operators__ (
713 struct lexer *lexer, struct dictionary *dict,
714 const struct operator ops[], size_t n_ops,
715 parse_recursively_func *parse_next_level,
716 const char *chain_warning, struct ctables_pcexpr *lhs)
718 for (int op_count = 0; ; op_count++)
720 const struct operator *op
721 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
724 if (op_count > 1 && chain_warning)
725 msg_at (SW, lhs->location, "%s", chain_warning);
730 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
733 ctables_pcexpr_destroy (lhs);
737 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
741 static struct ctables_pcexpr *
742 ctables_pcexpr_parse_binary_operators (
743 struct lexer *lexer, struct dictionary *dict,
744 const struct operator ops[], size_t n_ops,
745 parse_recursively_func *parse_next_level, const char *chain_warning)
747 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
751 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
756 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
757 struct dictionary *);
759 static struct ctables_pcexpr
760 ctpo_cat_nrange (double low, double high)
762 return (struct ctables_pcexpr) {
763 .op = CTPO_CAT_NRANGE,
764 .nrange = { low, high },
768 static struct ctables_pcexpr
769 ctpo_cat_srange (struct substring low, struct substring high)
771 return (struct ctables_pcexpr) {
772 .op = CTPO_CAT_SRANGE,
773 .srange = { low, high },
777 static struct substring
778 parse_substring (struct lexer *lexer, struct dictionary *dict)
780 struct substring s = recode_substring_pool (
781 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
782 ss_rtrim (&s, ss_cstr (" "));
787 static struct ctables_pcexpr *
788 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
790 int start_ofs = lex_ofs (lexer);
791 struct ctables_pcexpr e;
792 if (lex_is_number (lexer))
794 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
795 .number = lex_number (lexer) };
798 else if (lex_match_id (lexer, "MISSING"))
799 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
800 else if (lex_match_id (lexer, "OTHERNM"))
801 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
802 else if (lex_match_id (lexer, "TOTAL"))
803 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
804 else if (lex_match_id (lexer, "SUBTOTAL"))
806 size_t subtotal_index = 0;
807 if (lex_match (lexer, T_LBRACK))
809 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
811 subtotal_index = lex_integer (lexer);
813 if (!lex_force_match (lexer, T_RBRACK))
816 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
817 .subtotal_index = subtotal_index };
819 else if (lex_match (lexer, T_LBRACK))
821 if (lex_match_id (lexer, "LO"))
823 if (!lex_force_match_id (lexer, "THRU"))
826 if (lex_is_string (lexer))
828 struct substring low = { .string = NULL };
829 struct substring high = parse_substring (lexer, dict);
830 e = ctpo_cat_srange (low, high);
834 if (!lex_force_num (lexer))
836 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
840 else if (lex_is_number (lexer))
842 double number = lex_number (lexer);
844 if (lex_match_id (lexer, "THRU"))
846 if (lex_match_id (lexer, "HI"))
847 e = ctpo_cat_nrange (number, DBL_MAX);
850 if (!lex_force_num (lexer))
852 e = ctpo_cat_nrange (number, lex_number (lexer));
857 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
860 else if (lex_is_string (lexer))
862 struct substring s = parse_substring (lexer, dict);
864 if (lex_match_id (lexer, "THRU"))
866 struct substring high;
868 if (lex_match_id (lexer, "HI"))
869 high = (struct substring) { .string = NULL };
872 if (!lex_force_string (lexer))
877 high = parse_substring (lexer, dict);
880 e = ctpo_cat_srange (s, high);
883 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
887 lex_error (lexer, NULL);
891 if (!lex_force_match (lexer, T_RBRACK))
893 if (e.op == CTPO_CAT_STRING)
894 ss_dealloc (&e.string);
895 else if (e.op == CTPO_CAT_SRANGE)
897 ss_dealloc (&e.srange[0]);
898 ss_dealloc (&e.srange[1]);
903 else if (lex_match (lexer, T_LPAREN))
905 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
908 if (!lex_force_match (lexer, T_RPAREN))
910 ctables_pcexpr_destroy (ep);
917 lex_error (lexer, NULL);
921 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
922 return xmemdup (&e, sizeof e);
925 static struct ctables_pcexpr *
926 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
927 struct lexer *lexer, int start_ofs)
929 struct ctables_pcexpr *e = xmalloc (sizeof *e);
930 *e = (struct ctables_pcexpr) {
933 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
938 static struct ctables_pcexpr *
939 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
941 static const struct operator op = { T_EXP, CTPO_POW };
943 const char *chain_warning =
944 _("The exponentiation operator (`**') is left-associative: "
945 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
946 "To disable this warning, insert parentheses.");
948 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
949 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
950 ctables_pcexpr_parse_primary,
953 /* Special case for situations like "-5**6", which must be parsed as
956 int start_ofs = lex_ofs (lexer);
957 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
958 *lhs = (struct ctables_pcexpr) {
960 .number = -lex_tokval (lexer),
961 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
965 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
967 ctables_pcexpr_parse_primary, chain_warning, lhs);
971 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
974 /* Parses the unary minus level. */
975 static struct ctables_pcexpr *
976 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
978 int start_ofs = lex_ofs (lexer);
979 if (!lex_match (lexer, T_DASH))
980 return ctables_pcexpr_parse_exp (lexer, dict);
982 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
986 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
989 /* Parses the multiplication and division level. */
990 static struct ctables_pcexpr *
991 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
993 static const struct operator ops[] =
995 { T_ASTERISK, CTPO_MUL },
996 { T_SLASH, CTPO_DIV },
999 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
1000 sizeof ops / sizeof *ops,
1001 ctables_pcexpr_parse_neg, NULL);
1004 /* Parses the addition and subtraction level. */
1005 static struct ctables_pcexpr *
1006 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
1008 static const struct operator ops[] =
1010 { T_PLUS, CTPO_ADD },
1011 { T_DASH, CTPO_SUB },
1012 { T_NEG_NUM, CTPO_ADD },
1015 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1016 ops, sizeof ops / sizeof *ops,
1017 ctables_pcexpr_parse_mul, NULL);
1020 /* CTABLES axis expressions. */
1022 /* CTABLES has a number of extra formats that we implement via custom
1023 currency specifications on an alternate fmt_settings. */
1024 #define CTEF_NEGPAREN FMT_CCA
1025 #define CTEF_NEQUAL FMT_CCB
1026 #define CTEF_PAREN FMT_CCC
1027 #define CTEF_PCTPAREN FMT_CCD
1029 enum ctables_summary_variant
1038 enum ctables_axis_op
1054 struct variable *var;
1056 struct ctables_summary_spec_set specs[N_CSVS];
1060 struct ctables_axis *subs[2];
1063 struct msg_location *loc;
1067 ctables_axis_destroy (struct ctables_axis *axis)
1075 for (size_t i = 0; i < N_CSVS; i++)
1076 ctables_summary_spec_set_uninit (&axis->specs[i]);
1081 ctables_axis_destroy (axis->subs[0]);
1082 ctables_axis_destroy (axis->subs[1]);
1085 msg_location_destroy (axis->loc);
1089 static struct ctables_axis *
1090 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1091 struct ctables_axis *sub0,
1092 struct ctables_axis *sub1,
1093 struct lexer *lexer, int start_ofs)
1095 struct ctables_axis *axis = xmalloc (sizeof *axis);
1096 *axis = (struct ctables_axis) {
1098 .subs = { sub0, sub1 },
1099 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1104 struct ctables_axis_parse_ctx
1106 struct lexer *lexer;
1107 struct dictionary *dict;
1110 static struct pivot_value *
1111 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1114 return ctables_summary_function_label (spec->function, spec->weighting,
1115 spec->user_area, spec->percentile);
1118 struct substring in = ss_cstr (spec->label);
1119 struct substring target = ss_cstr (")CILEVEL");
1121 struct string out = DS_EMPTY_INITIALIZER;
1124 size_t chunk = ss_find_substring (in, target);
1125 ds_put_substring (&out, ss_head (in, chunk));
1126 ss_advance (&in, chunk);
1128 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1130 ss_advance (&in, target.length);
1131 ds_put_format (&out, "%g", cilevel);
1137 add_summary_spec (struct ctables_axis *axis,
1138 enum ctables_summary_function function,
1139 enum ctables_weighting weighting,
1140 enum ctables_area_type area, double percentile,
1141 const char *label, const struct fmt_spec *format,
1142 bool is_ctables_format, const struct msg_location *loc,
1143 enum ctables_summary_variant sv)
1145 if (axis->op == CTAO_VAR)
1147 char function_name[128];
1148 ctables_summary_function_name (function, weighting, area,
1149 function_name, sizeof function_name);
1150 const char *var_name = var_get_name (axis->var);
1151 switch (ctables_function_availability (function))
1155 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1156 "response sets."), function_name);
1157 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1163 if (!axis->scale && sv != CSV_TOTAL)
1166 _("Summary function %s applies only to scale variables."),
1168 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1178 struct ctables_summary_spec_set *set = &axis->specs[sv];
1179 if (set->n >= set->allocated)
1180 set->specs = x2nrealloc (set->specs, &set->allocated,
1181 sizeof *set->specs);
1183 struct ctables_summary_spec *dst = &set->specs[set->n++];
1184 *dst = (struct ctables_summary_spec) {
1185 .function = function,
1186 .weighting = weighting,
1189 .percentile = percentile,
1190 .label = xstrdup_if_nonnull (label),
1191 .format = (format ? *format
1192 : ctables_summary_default_format (function, axis->var)),
1193 .is_ctables_format = is_ctables_format,
1199 for (size_t i = 0; i < 2; i++)
1200 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1201 percentile, label, format, is_ctables_format,
1208 static struct ctables_axis *ctables_axis_parse_stack (
1209 struct ctables_axis_parse_ctx *);
1211 static struct ctables_axis *
1212 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1214 if (lex_match (ctx->lexer, T_LPAREN))
1216 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1217 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1219 ctables_axis_destroy (sub);
1225 if (!lex_force_id (ctx->lexer))
1228 if (lex_tokcstr (ctx->lexer)[0] == '$')
1230 lex_error (ctx->lexer,
1231 _("Multiple response set support not implemented."));
1235 int start_ofs = lex_ofs (ctx->lexer);
1236 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1240 struct ctables_axis *axis = xmalloc (sizeof *axis);
1241 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1243 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1244 : lex_match_phrase (ctx->lexer, "[C]") ? false
1245 : var_get_measure (var) == MEASURE_SCALE);
1246 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1247 lex_ofs (ctx->lexer) - 1);
1248 if (axis->scale && var_is_alpha (var))
1250 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1252 var_get_name (var));
1253 ctables_axis_destroy (axis);
1261 has_digit (const char *s)
1263 return s[strcspn (s, "0123456789")] != '\0';
1267 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1268 bool *is_ctables_format)
1270 char type[FMT_TYPE_LEN_MAX + 1];
1271 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1274 if (!strcasecmp (type, "NEGPAREN"))
1275 format->type = CTEF_NEGPAREN;
1276 else if (!strcasecmp (type, "NEQUAL"))
1277 format->type = CTEF_NEQUAL;
1278 else if (!strcasecmp (type, "PAREN"))
1279 format->type = CTEF_PAREN;
1280 else if (!strcasecmp (type, "PCTPAREN"))
1281 format->type = CTEF_PCTPAREN;
1284 *is_ctables_format = false;
1285 return (parse_format_specifier (lexer, format)
1286 && fmt_check_output (format)
1287 && fmt_check_type_compat (format, VAL_NUMERIC));
1293 lex_next_error (lexer, -1, -1,
1294 _("Output format %s requires width 2 or greater."), type);
1297 else if (format->d > format->w - 1)
1299 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1300 "greater than decimals."), type);
1305 *is_ctables_format = true;
1310 static struct ctables_axis *
1311 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1313 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1314 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1317 enum ctables_summary_variant sv = CSV_CELL;
1320 int start_ofs = lex_ofs (ctx->lexer);
1322 /* Parse function. */
1323 enum ctables_summary_function function;
1324 enum ctables_weighting weighting;
1325 enum ctables_area_type area;
1326 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1330 /* Parse percentile. */
1331 double percentile = 0;
1332 if (function == CTSF_PTILE)
1334 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1336 percentile = lex_number (ctx->lexer);
1337 lex_get (ctx->lexer);
1342 if (lex_is_string (ctx->lexer))
1344 label = ss_xstrdup (lex_tokss (ctx->lexer));
1345 lex_get (ctx->lexer);
1349 struct fmt_spec format;
1350 const struct fmt_spec *formatp;
1351 bool is_ctables_format = false;
1352 if (lex_token (ctx->lexer) == T_ID
1353 && has_digit (lex_tokcstr (ctx->lexer)))
1355 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1356 &is_ctables_format))
1366 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1367 lex_ofs (ctx->lexer) - 1);
1368 add_summary_spec (sub, function, weighting, area, percentile, label,
1369 formatp, is_ctables_format, loc, sv);
1371 msg_location_destroy (loc);
1373 lex_match (ctx->lexer, T_COMMA);
1374 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1376 if (!lex_force_match (ctx->lexer, T_LBRACK))
1380 else if (lex_match (ctx->lexer, T_RBRACK))
1382 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1389 ctables_axis_destroy (sub);
1393 static const struct ctables_axis *
1394 find_scale (const struct ctables_axis *axis)
1398 else if (axis->op == CTAO_VAR)
1399 return axis->scale ? axis : NULL;
1402 for (size_t i = 0; i < 2; i++)
1404 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1412 static const struct ctables_axis *
1413 find_categorical_summary_spec (const struct ctables_axis *axis)
1417 else if (axis->op == CTAO_VAR)
1418 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1421 for (size_t i = 0; i < 2; i++)
1423 const struct ctables_axis *sum
1424 = find_categorical_summary_spec (axis->subs[i]);
1432 static struct ctables_axis *
1433 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1435 int start_ofs = lex_ofs (ctx->lexer);
1436 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1440 while (lex_match (ctx->lexer, T_GT))
1442 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1445 ctables_axis_destroy (lhs);
1449 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1450 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1452 const struct ctables_axis *outer_scale = find_scale (lhs);
1453 const struct ctables_axis *inner_scale = find_scale (rhs);
1454 if (outer_scale && inner_scale)
1456 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1457 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1458 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1459 ctables_axis_destroy (nest);
1463 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1466 msg_at (SE, nest->loc,
1467 _("Summaries may only be requested for categorical variables "
1468 "at the innermost nesting level."));
1469 msg_at (SN, outer_sum->loc,
1470 _("This outer categorical variable has a summary."));
1471 ctables_axis_destroy (nest);
1481 static struct ctables_axis *
1482 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1484 int start_ofs = lex_ofs (ctx->lexer);
1485 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1489 while (lex_match (ctx->lexer, T_PLUS))
1491 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1494 ctables_axis_destroy (lhs);
1498 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1499 ctx->lexer, start_ofs);
1506 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1507 struct ctables_axis **axisp)
1510 if (lex_token (lexer) == T_BY
1511 || lex_token (lexer) == T_SLASH
1512 || lex_token (lexer) == T_ENDCMD)
1515 struct ctables_axis_parse_ctx ctx = {
1519 *axisp = ctables_axis_parse_stack (&ctx);
1523 /* CTABLES categories. */
1525 struct ctables_categories
1528 struct ctables_category *cats;
1533 struct ctables_category
1535 enum ctables_category_type
1537 /* Explicit category lists. */
1540 CCT_NRANGE, /* Numerical range. */
1541 CCT_SRANGE, /* String range. */
1546 /* Totals and subtotals. */
1550 /* Implicit category lists. */
1555 /* For contributing to TOTALN. */
1556 CCT_EXCLUDED_MISSING,
1560 struct ctables_category *subtotal;
1566 double number; /* CCT_NUMBER. */
1567 struct substring string; /* CCT_STRING, in dictionary encoding. */
1568 double nrange[2]; /* CCT_NRANGE. */
1569 struct substring srange[2]; /* CCT_SRANGE. */
1573 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1574 bool hide_subcategories; /* CCT_SUBTOTAL. */
1577 /* CCT_POSTCOMPUTE. */
1580 const struct ctables_postcompute *pc;
1581 enum fmt_type parse_format;
1584 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1587 bool include_missing;
1588 bool sort_ascending;
1591 enum ctables_summary_function sort_function;
1592 enum ctables_weighting weighting;
1593 enum ctables_area_type area;
1594 struct variable *sort_var;
1599 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
1600 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
1601 struct msg_location *location;
1605 ctables_category_uninit (struct ctables_category *cat)
1610 msg_location_destroy (cat->location);
1617 case CCT_POSTCOMPUTE:
1621 ss_dealloc (&cat->string);
1625 ss_dealloc (&cat->srange[0]);
1626 ss_dealloc (&cat->srange[1]);
1631 free (cat->total_label);
1639 case CCT_EXCLUDED_MISSING:
1645 nullable_substring_equal (const struct substring *a,
1646 const struct substring *b)
1648 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1652 ctables_category_equal (const struct ctables_category *a,
1653 const struct ctables_category *b)
1655 if (a->type != b->type)
1661 return a->number == b->number;
1664 return ss_equals (a->string, b->string);
1667 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1670 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1671 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1677 case CCT_POSTCOMPUTE:
1678 return a->pc == b->pc;
1682 return !strcmp (a->total_label, b->total_label);
1687 return (a->include_missing == b->include_missing
1688 && a->sort_ascending == b->sort_ascending
1689 && a->sort_function == b->sort_function
1690 && a->sort_var == b->sort_var
1691 && a->percentile == b->percentile);
1693 case CCT_EXCLUDED_MISSING:
1701 ctables_categories_unref (struct ctables_categories *c)
1706 assert (c->n_refs > 0);
1710 for (size_t i = 0; i < c->n_cats; i++)
1711 ctables_category_uninit (&c->cats[i]);
1717 ctables_categories_equal (const struct ctables_categories *a,
1718 const struct ctables_categories *b)
1720 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
1723 for (size_t i = 0; i < a->n_cats; i++)
1724 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1730 static struct ctables_category
1731 cct_nrange (double low, double high)
1733 return (struct ctables_category) {
1735 .nrange = { low, high }
1739 static struct ctables_category
1740 cct_srange (struct substring low, struct substring high)
1742 return (struct ctables_category) {
1744 .srange = { low, high }
1749 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1750 struct ctables_category *cat)
1753 if (lex_match (lexer, T_EQUALS))
1755 if (!lex_force_string (lexer))
1758 total_label = ss_xstrdup (lex_tokss (lexer));
1762 total_label = xstrdup (_("Subtotal"));
1764 *cat = (struct ctables_category) {
1765 .type = CCT_SUBTOTAL,
1766 .hide_subcategories = hide_subcategories,
1767 .total_label = total_label
1773 ctables_table_parse_explicit_category (struct lexer *lexer,
1774 struct dictionary *dict,
1776 struct ctables_category *cat)
1778 if (lex_match_id (lexer, "OTHERNM"))
1779 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1780 else if (lex_match_id (lexer, "MISSING"))
1781 *cat = (struct ctables_category) { .type = CCT_MISSING };
1782 else if (lex_match_id (lexer, "SUBTOTAL"))
1783 return ctables_table_parse_subtotal (lexer, false, cat);
1784 else if (lex_match_id (lexer, "HSUBTOTAL"))
1785 return ctables_table_parse_subtotal (lexer, true, cat);
1786 else if (lex_match_id (lexer, "LO"))
1788 if (!lex_force_match_id (lexer, "THRU"))
1790 if (lex_is_string (lexer))
1792 struct substring sr0 = { .string = NULL };
1793 struct substring sr1 = parse_substring (lexer, dict);
1794 *cat = cct_srange (sr0, sr1);
1796 else if (lex_force_num (lexer))
1798 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1804 else if (lex_is_number (lexer))
1806 double number = lex_number (lexer);
1808 if (lex_match_id (lexer, "THRU"))
1810 if (lex_match_id (lexer, "HI"))
1811 *cat = cct_nrange (number, DBL_MAX);
1814 if (!lex_force_num (lexer))
1816 *cat = cct_nrange (number, lex_number (lexer));
1821 *cat = (struct ctables_category) {
1826 else if (lex_is_string (lexer))
1828 struct substring s = parse_substring (lexer, dict);
1829 if (lex_match_id (lexer, "THRU"))
1831 if (lex_match_id (lexer, "HI"))
1833 struct substring sr1 = { .string = NULL };
1834 *cat = cct_srange (s, sr1);
1838 if (!lex_force_string (lexer))
1843 struct substring sr1 = parse_substring (lexer, dict);
1844 *cat = cct_srange (s, sr1);
1848 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1850 else if (lex_match (lexer, T_AND))
1852 if (!lex_force_id (lexer))
1854 struct ctables_postcompute *pc = ctables_find_postcompute (
1855 ct, lex_tokcstr (lexer));
1858 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1859 msg_at (SE, loc, _("Unknown postcompute &%s."),
1860 lex_tokcstr (lexer));
1861 msg_location_destroy (loc);
1866 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1870 lex_error (lexer, NULL);
1878 parse_category_string (struct msg_location *location,
1879 struct substring s, const struct dictionary *dict,
1880 enum fmt_type format, double *n)
1883 char *error = data_in (s, dict_get_encoding (dict), format,
1884 settings_get_fmt_settings (), &v, 0, NULL);
1887 msg_at (SE, location,
1888 _("Failed to parse category specification as format %s: %s."),
1889 fmt_name (format), error);
1898 static struct ctables_category *
1899 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1900 const struct ctables_pcexpr *e)
1902 struct ctables_category *best = NULL;
1903 size_t n_subtotals = 0;
1904 for (size_t i = 0; i < cats->n_cats; i++)
1906 struct ctables_category *cat = &cats->cats[i];
1909 case CTPO_CAT_NUMBER:
1910 if (cat->type == CCT_NUMBER && cat->number == e->number)
1914 case CTPO_CAT_STRING:
1915 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1919 case CTPO_CAT_NRANGE:
1920 if (cat->type == CCT_NRANGE
1921 && cat->nrange[0] == e->nrange[0]
1922 && cat->nrange[1] == e->nrange[1])
1926 case CTPO_CAT_SRANGE:
1927 if (cat->type == CCT_SRANGE
1928 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1929 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1933 case CTPO_CAT_MISSING:
1934 if (cat->type == CCT_MISSING)
1938 case CTPO_CAT_OTHERNM:
1939 if (cat->type == CCT_OTHERNM)
1943 case CTPO_CAT_SUBTOTAL:
1944 if (cat->type == CCT_SUBTOTAL)
1947 if (e->subtotal_index == n_subtotals)
1949 else if (e->subtotal_index == 0)
1954 case CTPO_CAT_TOTAL:
1955 if (cat->type == CCT_TOTAL)
1969 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1974 static struct ctables_category *
1975 ctables_find_category_for_postcompute (const struct dictionary *dict,
1976 const struct ctables_categories *cats,
1977 enum fmt_type parse_format,
1978 const struct ctables_pcexpr *e)
1980 if (parse_format != FMT_F)
1982 if (e->op == CTPO_CAT_STRING)
1985 if (!parse_category_string (e->location, e->string, dict,
1986 parse_format, &number))
1989 struct ctables_pcexpr e2 = {
1990 .op = CTPO_CAT_NUMBER,
1992 .location = e->location,
1994 return ctables_find_category_for_postcompute__ (cats, &e2);
1996 else if (e->op == CTPO_CAT_SRANGE)
1999 if (!e->srange[0].string)
2000 nrange[0] = -DBL_MAX;
2001 else if (!parse_category_string (e->location, e->srange[0], dict,
2002 parse_format, &nrange[0]))
2005 if (!e->srange[1].string)
2006 nrange[1] = DBL_MAX;
2007 else if (!parse_category_string (e->location, e->srange[1], dict,
2008 parse_format, &nrange[1]))
2011 struct ctables_pcexpr e2 = {
2012 .op = CTPO_CAT_NRANGE,
2013 .nrange = { nrange[0], nrange[1] },
2014 .location = e->location,
2016 return ctables_find_category_for_postcompute__ (cats, &e2);
2019 return ctables_find_category_for_postcompute__ (cats, e);
2022 static struct substring
2023 rtrim_value (const union value *v, const struct variable *var)
2025 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2026 var_get_width (var));
2027 ss_rtrim (&s, ss_cstr (" "));
2032 in_string_range (const union value *v, const struct variable *var,
2033 const struct substring *srange)
2035 struct substring s = rtrim_value (v, var);
2036 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2037 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2040 static const struct ctables_category *
2041 ctables_categories_match (const struct ctables_categories *c,
2042 const union value *v, const struct variable *var)
2044 if (var_is_numeric (var) && v->f == SYSMIS)
2047 const struct ctables_category *othernm = NULL;
2048 for (size_t i = c->n_cats; i-- > 0; )
2050 const struct ctables_category *cat = &c->cats[i];
2054 if (cat->number == v->f)
2059 if (ss_equals (cat->string, rtrim_value (v, var)))
2064 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2065 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2070 if (in_string_range (v, var, cat->srange))
2075 if (var_is_value_missing (var, v))
2079 case CCT_POSTCOMPUTE:
2094 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2097 case CCT_EXCLUDED_MISSING:
2102 return var_is_value_missing (var, v) ? NULL : othernm;
2105 static const struct ctables_category *
2106 ctables_categories_total (const struct ctables_categories *c)
2108 const struct ctables_category *first = &c->cats[0];
2109 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2110 return (first->type == CCT_TOTAL ? first
2111 : last->type == CCT_TOTAL ? last
2116 ctables_category_format_number (double number, const struct variable *var,
2119 struct pivot_value *pv = pivot_value_new_var_value (
2120 var, &(union value) { .f = number });
2121 pivot_value_format (pv, NULL, s);
2122 pivot_value_destroy (pv);
2126 ctables_category_format_string (struct substring string,
2127 const struct variable *var, struct string *out)
2129 int width = var_get_width (var);
2130 char *s = xmalloc (width);
2131 buf_copy_rpad (s, width, string.string, string.length, ' ');
2132 struct pivot_value *pv = pivot_value_new_var_value (
2133 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
2134 pivot_value_format (pv, NULL, out);
2135 pivot_value_destroy (pv);
2140 ctables_category_format_label (const struct ctables_category *cat,
2141 const struct variable *var,
2147 ctables_category_format_number (cat->number, var, s);
2151 ctables_category_format_string (cat->string, var, s);
2155 ctables_category_format_number (cat->nrange[0], var, s);
2156 ds_put_format (s, " THRU ");
2157 ctables_category_format_number (cat->nrange[1], var, s);
2161 ctables_category_format_string (cat->srange[0], var, s);
2162 ds_put_format (s, " THRU ");
2163 ctables_category_format_string (cat->srange[1], var, s);
2167 ds_put_cstr (s, "MISSING");
2171 ds_put_cstr (s, "OTHERNM");
2174 case CCT_POSTCOMPUTE:
2175 ds_put_format (s, "&%s", cat->pc->name);
2180 ds_put_cstr (s, cat->total_label);
2186 case CCT_EXCLUDED_MISSING:
2194 ctables_recursive_check_postcompute (struct dictionary *dict,
2195 const struct ctables_pcexpr *e,
2196 struct ctables_category *pc_cat,
2197 const struct ctables_categories *cats,
2198 const struct msg_location *cats_location)
2202 case CTPO_CAT_NUMBER:
2203 case CTPO_CAT_STRING:
2204 case CTPO_CAT_NRANGE:
2205 case CTPO_CAT_SRANGE:
2206 case CTPO_CAT_MISSING:
2207 case CTPO_CAT_OTHERNM:
2208 case CTPO_CAT_SUBTOTAL:
2209 case CTPO_CAT_TOTAL:
2211 struct ctables_category *cat = ctables_find_category_for_postcompute (
2212 dict, cats, pc_cat->parse_format, e);
2215 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2217 size_t n_subtotals = 0;
2218 for (size_t i = 0; i < cats->n_cats; i++)
2219 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2220 if (n_subtotals > 1)
2222 msg_at (SE, cats_location,
2223 ngettext ("These categories include %zu instance "
2224 "of SUBTOTAL or HSUBTOTAL, so references "
2225 "from computed categories must refer to "
2226 "subtotals by position, "
2227 "e.g. SUBTOTAL[1].",
2228 "These categories include %zu instances "
2229 "of SUBTOTAL or HSUBTOTAL, so references "
2230 "from computed categories must refer to "
2231 "subtotals by position, "
2232 "e.g. SUBTOTAL[1].",
2235 msg_at (SN, e->location,
2236 _("This is the reference that lacks a position."));
2241 msg_at (SE, pc_cat->location,
2242 _("Computed category &%s references a category not included "
2243 "in the category list."),
2245 msg_at (SN, e->location, _("This is the missing category."));
2246 if (e->op == CTPO_CAT_SUBTOTAL)
2247 msg_at (SN, cats_location,
2248 _("To fix the problem, add subtotals to the "
2249 "list of categories here."));
2250 else if (e->op == CTPO_CAT_TOTAL)
2251 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2252 "CATEGORIES specification."));
2254 msg_at (SN, cats_location,
2255 _("To fix the problem, add the missing category to the "
2256 "list of categories here."));
2259 if (pc_cat->pc->hide_source_cats)
2273 for (size_t i = 0; i < 2; i++)
2274 if (e->subs[i] && !ctables_recursive_check_postcompute (
2275 dict, e->subs[i], pc_cat, cats, cats_location))
2283 static struct pivot_value *
2284 ctables_postcompute_label (const struct ctables_categories *cats,
2285 const struct ctables_category *cat,
2286 const struct variable *var)
2288 struct substring in = ss_cstr (cat->pc->label);
2289 struct substring target = ss_cstr (")LABEL[");
2291 struct string out = DS_EMPTY_INITIALIZER;
2294 size_t chunk = ss_find_substring (in, target);
2295 if (chunk == SIZE_MAX)
2297 if (ds_is_empty (&out))
2298 return pivot_value_new_user_text (in.string, in.length);
2301 ds_put_substring (&out, in);
2302 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
2306 ds_put_substring (&out, ss_head (in, chunk));
2307 ss_advance (&in, chunk + target.length);
2309 struct substring idx_s;
2310 if (!ss_get_until (&in, ']', &idx_s))
2313 long int idx = strtol (idx_s.string, &tail, 10);
2314 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
2317 struct ctables_category *cat2 = &cats->cats[idx - 1];
2318 if (!ctables_category_format_label (cat2, var, &out))
2324 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
2327 static struct pivot_value *
2328 ctables_category_create_value_label (const struct ctables_categories *cats,
2329 const struct ctables_category *cat,
2330 const struct variable *var,
2331 const union value *value)
2333 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
2334 ? ctables_postcompute_label (cats, cat, var)
2335 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
2336 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
2337 : pivot_value_new_var_value (var, value));
2340 /* CTABLES variable nesting and stacking. */
2342 /* A nested sequence of variables, e.g. a > b > c. */
2345 struct variable **vars;
2349 size_t *areas[N_CTATS];
2350 size_t n_areas[N_CTATS];
2353 struct ctables_summary_spec_set specs[N_CSVS];
2356 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2357 struct ctables_stack
2359 struct ctables_nest *nests;
2364 ctables_nest_uninit (struct ctables_nest *nest)
2367 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2368 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2369 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2370 free (nest->areas[at]);
2374 ctables_stack_uninit (struct ctables_stack *stack)
2378 for (size_t i = 0; i < stack->n; i++)
2379 ctables_nest_uninit (&stack->nests[i]);
2380 free (stack->nests);
2384 static struct ctables_stack
2385 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2392 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2393 for (size_t i = 0; i < s0.n; i++)
2394 for (size_t j = 0; j < s1.n; j++)
2396 const struct ctables_nest *a = &s0.nests[i];
2397 const struct ctables_nest *b = &s1.nests[j];
2399 size_t allocate = a->n + b->n;
2400 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2402 for (size_t k = 0; k < a->n; k++)
2403 vars[n++] = a->vars[k];
2404 for (size_t k = 0; k < b->n; k++)
2405 vars[n++] = b->vars[k];
2406 assert (n == allocate);
2408 const struct ctables_nest *summary_src;
2409 if (!a->specs[CSV_CELL].var)
2411 else if (!b->specs[CSV_CELL].var)
2416 struct ctables_nest *new = &stack.nests[stack.n++];
2417 *new = (struct ctables_nest) {
2419 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2420 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2422 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2423 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2427 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2428 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2430 ctables_stack_uninit (&s0);
2431 ctables_stack_uninit (&s1);
2435 static struct ctables_stack
2436 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2438 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2439 for (size_t i = 0; i < s0.n; i++)
2440 stack.nests[stack.n++] = s0.nests[i];
2441 for (size_t i = 0; i < s1.n; i++)
2443 stack.nests[stack.n] = s1.nests[i];
2444 stack.nests[stack.n].group_head += s0.n;
2447 assert (stack.n == s0.n + s1.n);
2453 static struct ctables_stack
2454 var_fts (const struct ctables_axis *a)
2456 struct variable **vars = xmalloc (sizeof *vars);
2459 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2460 struct ctables_nest *nest = xmalloc (sizeof *nest);
2461 *nest = (struct ctables_nest) {
2464 .scale_idx = a->scale ? 0 : SIZE_MAX,
2465 .summary_idx = is_summary ? 0 : SIZE_MAX,
2468 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2470 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2471 nest->specs[sv].var = a->var;
2472 nest->specs[sv].is_scale = a->scale;
2474 return (struct ctables_stack) { .nests = nest, .n = 1 };
2477 static struct ctables_stack
2478 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2481 return (struct ctables_stack) { .n = 0 };
2489 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2490 enumerate_fts (axis_type, a->subs[1]));
2493 /* This should consider any of the scale variables found in the result to
2494 be linked to each other listwise for SMISSING=LISTWISE. */
2495 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2496 enumerate_fts (axis_type, a->subs[1]));
2502 /* CTABLES summary calculation. */
2504 union ctables_summary
2506 /* COUNT, VALIDN, TOTALN. */
2509 /* MINIMUM, MAXIMUM, RANGE. */
2516 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2517 struct moments1 *moments;
2519 /* MEDIAN, MODE, PTILE. */
2522 struct casewriter *writer;
2529 ctables_summary_init (union ctables_summary *s,
2530 const struct ctables_summary_spec *ss)
2532 switch (ss->function)
2535 case CTSF_areaPCT_COUNT:
2536 case CTSF_areaPCT_VALIDN:
2537 case CTSF_areaPCT_TOTALN:
2550 s->min = s->max = SYSMIS;
2555 case CTSF_areaPCT_SUM:
2556 s->moments = moments1_create (MOMENT_MEAN);
2562 s->moments = moments1_create (MOMENT_VARIANCE);
2569 struct caseproto *proto = caseproto_create ();
2570 proto = caseproto_add_width (proto, 0);
2571 proto = caseproto_add_width (proto, 0);
2573 struct subcase ordering;
2574 subcase_init (&ordering, 0, 0, SC_ASCEND);
2575 s->writer = sort_create_writer (&ordering, proto);
2576 subcase_uninit (&ordering);
2577 caseproto_unref (proto);
2587 ctables_summary_uninit (union ctables_summary *s,
2588 const struct ctables_summary_spec *ss)
2590 switch (ss->function)
2593 case CTSF_areaPCT_COUNT:
2594 case CTSF_areaPCT_VALIDN:
2595 case CTSF_areaPCT_TOTALN:
2614 case CTSF_areaPCT_SUM:
2615 moments1_destroy (s->moments);
2621 casewriter_destroy (s->writer);
2627 ctables_summary_add (union ctables_summary *s,
2628 const struct ctables_summary_spec *ss,
2629 const union value *value,
2630 bool is_missing, bool is_included,
2633 /* To determine whether a case is included in a given table for a particular
2634 kind of summary, consider the following charts for the variable being
2635 summarized. Only if "yes" appears is the case counted.
2637 Categorical variables: VALIDN other TOTALN
2638 Valid values in included categories yes yes yes
2639 Missing values in included categories --- yes yes
2640 Missing values in excluded categories --- --- yes
2641 Valid values in excluded categories --- --- ---
2643 Scale variables: VALIDN other TOTALN
2644 Valid value yes yes yes
2645 Missing value --- yes yes
2647 Missing values include both user- and system-missing. (The system-missing
2648 value is always in an excluded category.)
2650 One way to interpret the above table is that scale variables are like
2651 categorical variables in which all values are in included categories.
2653 switch (ss->function)
2656 case CTSF_areaPCT_TOTALN:
2661 case CTSF_areaPCT_COUNT:
2667 case CTSF_areaPCT_VALIDN:
2685 if (s->min == SYSMIS || value->f < s->min)
2687 if (s->max == SYSMIS || value->f > s->max)
2698 moments1_add (s->moments, value->f, weight);
2701 case CTSF_areaPCT_SUM:
2703 moments1_add (s->moments, value->f, weight);
2711 s->ovalid += weight;
2713 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2714 *case_num_rw_idx (c, 0) = value->f;
2715 *case_num_rw_idx (c, 1) = weight;
2716 casewriter_write (s->writer, c);
2723 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2724 union ctables_summary *s,
2725 const struct ctables_summary_spec *ss)
2727 switch (ss->function)
2733 return areas[ss->calc_area]->sequence;
2735 case CTSF_areaPCT_COUNT:
2737 const struct ctables_area *a = areas[ss->calc_area];
2738 double a_count = a->count[ss->weighting];
2739 return a_count ? s->count / a_count * 100 : SYSMIS;
2742 case CTSF_areaPCT_VALIDN:
2744 const struct ctables_area *a = areas[ss->calc_area];
2745 double a_valid = a->valid[ss->weighting];
2746 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2749 case CTSF_areaPCT_TOTALN:
2751 const struct ctables_area *a = areas[ss->calc_area];
2752 double a_total = a->total[ss->weighting];
2753 return a_total ? s->count / a_total * 100 : SYSMIS;
2768 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2773 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2779 double weight, variance;
2780 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2781 return calc_semean (variance, weight);
2787 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2788 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2793 double weight, mean;
2794 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2795 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2801 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2805 case CTSF_areaPCT_SUM:
2807 double weight, mean;
2808 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2809 if (weight == SYSMIS || mean == SYSMIS)
2812 const struct ctables_area *a = areas[ss->calc_area];
2813 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2814 double denom = sum->sum[ss->weighting];
2815 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2822 struct casereader *reader = casewriter_make_reader (s->writer);
2825 struct percentile *ptile = percentile_create (
2826 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2827 struct order_stats *os = &ptile->parent;
2828 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2829 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2830 statistic_destroy (&ptile->parent.parent);
2837 struct casereader *reader = casewriter_make_reader (s->writer);
2840 struct mode *mode = mode_create ();
2841 struct order_stats *os = &mode->parent;
2842 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2843 s->ovalue = mode->mode;
2844 statistic_destroy (&mode->parent.parent);
2852 /* CTABLES occurrences. */
2854 struct ctables_occurrence
2856 struct hmap_node node;
2861 ctables_add_occurrence (const struct variable *var,
2862 const union value *value,
2863 struct hmap *occurrences)
2865 int width = var_get_width (var);
2866 unsigned int hash = value_hash (value, width, 0);
2868 struct ctables_occurrence *o;
2869 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
2871 if (value_equal (value, &o->value, width))
2874 o = xmalloc (sizeof *o);
2875 value_clone (&o->value, value, width);
2876 hmap_insert (occurrences, &o->node, hash);
2881 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2882 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2883 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2884 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2889 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2890 all the axes (except the scalar variable, if any). */
2891 struct hmap_node node;
2893 /* The areas that contain this cell. */
2894 uint32_t omit_areas;
2895 struct ctables_area *areas[N_CTATS];
2900 enum ctables_summary_variant sv;
2902 struct ctables_cell_axis
2904 struct ctables_cell_value
2906 const struct ctables_category *category;
2914 union ctables_summary *summaries;
2917 struct ctables_section
2920 struct ctables_table *table;
2921 struct ctables_nest *nests[PIVOT_N_AXES];
2924 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2925 struct hmap cells; /* Contains "struct ctables_cell"s. */
2926 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2929 static void ctables_section_uninit (struct ctables_section *);
2931 struct ctables_table
2933 struct ctables *ctables;
2934 struct ctables_axis *axes[PIVOT_N_AXES];
2935 struct ctables_stack stacks[PIVOT_N_AXES];
2936 struct ctables_section *sections;
2938 enum pivot_axis_type summary_axis;
2939 struct ctables_summary_spec_set summary_specs;
2940 struct variable **sum_vars;
2943 enum pivot_axis_type slabels_axis;
2944 bool slabels_visible;
2946 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2948 Most commonly, label_axis[a] == a, and in particular we always have
2949 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2951 If ROWLABELS or COLLABELS is specified, then one of
2952 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2953 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2955 If any category labels are moved, then 'clabels_example' is one of the
2956 variables being moved (and it is otherwise NULL). All of the variables
2957 being moved have the same width, value labels, and categories, so this
2958 example variable can be used to find those out.
2960 The remaining members in this group are relevant only if category labels
2963 'clabels_values_map' holds a "struct ctables_value" for all the values
2964 that appear in all of the variables in the moved categories. It is
2965 accumulated as the data is read. Once the data is fully read, its
2966 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2968 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2969 enum pivot_axis_type clabels_from_axis;
2970 enum pivot_axis_type clabels_to_axis;
2971 const struct variable *clabels_example;
2972 struct hmap clabels_values_map;
2973 struct ctables_value **clabels_values;
2974 size_t n_clabels_values;
2976 /* Indexed by variable dictionary index. */
2977 struct ctables_categories **categories;
2978 size_t n_categories;
2986 struct ctables_chisq *chisq;
2987 struct ctables_pairwise *pairwise;
2990 struct ctables_cell_sort_aux
2992 const struct ctables_nest *nest;
2993 enum pivot_axis_type a;
2997 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2999 const struct ctables_cell_sort_aux *aux = aux_;
3000 struct ctables_cell *const *ap = a_;
3001 struct ctables_cell *const *bp = b_;
3002 const struct ctables_cell *a = *ap;
3003 const struct ctables_cell *b = *bp;
3005 const struct ctables_nest *nest = aux->nest;
3006 for (size_t i = 0; i < nest->n; i++)
3007 if (i != nest->scale_idx)
3009 const struct variable *var = nest->vars[i];
3010 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3011 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3012 if (a_cv->category != b_cv->category)
3013 return a_cv->category > b_cv->category ? 1 : -1;
3015 const union value *a_val = &a_cv->value;
3016 const union value *b_val = &b_cv->value;
3017 switch (a_cv->category->type)
3023 case CCT_POSTCOMPUTE:
3024 case CCT_EXCLUDED_MISSING:
3025 /* Must be equal. */
3033 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3041 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3043 return a_cv->category->sort_ascending ? cmp : -cmp;
3049 const char *a_label = var_lookup_value_label (var, a_val);
3050 const char *b_label = var_lookup_value_label (var, b_val);
3056 cmp = strcmp (a_label, b_label);
3062 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3065 return a_cv->category->sort_ascending ? cmp : -cmp;
3077 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3078 const void *aux UNUSED)
3080 struct ctables_cell *const *ap = a_;
3081 struct ctables_cell *const *bp = b_;
3082 const struct ctables_cell *a = *ap;
3083 const struct ctables_cell *b = *bp;
3085 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3087 int al = a->axes[axis].leaf;
3088 int bl = b->axes[axis].leaf;
3090 return al > bl ? 1 : -1;
3095 static struct ctables_area *
3096 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3097 enum ctables_area_type area)
3100 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3102 const struct ctables_nest *nest = s->nests[a];
3103 for (size_t i = 0; i < nest->n_areas[area]; i++)
3105 size_t v_idx = nest->areas[area][i];
3106 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3107 hash = hash_pointer (cv->category, hash);
3108 if (cv->category->type != CCT_TOTAL
3109 && cv->category->type != CCT_SUBTOTAL
3110 && cv->category->type != CCT_POSTCOMPUTE)
3111 hash = value_hash (&cv->value,
3112 var_get_width (nest->vars[v_idx]), hash);
3116 struct ctables_area *a;
3117 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3119 const struct ctables_cell *df = a->example;
3120 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3122 const struct ctables_nest *nest = s->nests[a];
3123 for (size_t i = 0; i < nest->n_areas[area]; i++)
3125 size_t v_idx = nest->areas[area][i];
3126 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3127 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3128 if (cv1->category != cv2->category
3129 || (cv1->category->type != CCT_TOTAL
3130 && cv1->category->type != CCT_SUBTOTAL
3131 && cv1->category->type != CCT_POSTCOMPUTE
3132 && !value_equal (&cv1->value, &cv2->value,
3133 var_get_width (nest->vars[v_idx]))))
3142 struct ctables_sum *sums = (s->table->n_sum_vars
3143 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3146 a = xmalloc (sizeof *a);
3147 *a = (struct ctables_area) { .example = cell, .sums = sums };
3148 hmap_insert (&s->areas[area], &a->node, hash);
3152 static struct ctables_cell *
3153 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3154 const struct ctables_category **cats[PIVOT_N_AXES])
3157 enum ctables_summary_variant sv = CSV_CELL;
3158 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3160 const struct ctables_nest *nest = s->nests[a];
3161 for (size_t i = 0; i < nest->n; i++)
3162 if (i != nest->scale_idx)
3164 hash = hash_pointer (cats[a][i], hash);
3165 if (cats[a][i]->type != CCT_TOTAL
3166 && cats[a][i]->type != CCT_SUBTOTAL
3167 && cats[a][i]->type != CCT_POSTCOMPUTE)
3168 hash = value_hash (case_data (c, nest->vars[i]),
3169 var_get_width (nest->vars[i]), hash);
3175 struct ctables_cell *cell;
3176 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3178 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3180 const struct ctables_nest *nest = s->nests[a];
3181 for (size_t i = 0; i < nest->n; i++)
3182 if (i != nest->scale_idx
3183 && (cats[a][i] != cell->axes[a].cvs[i].category
3184 || (cats[a][i]->type != CCT_TOTAL
3185 && cats[a][i]->type != CCT_SUBTOTAL
3186 && cats[a][i]->type != CCT_POSTCOMPUTE
3187 && !value_equal (case_data (c, nest->vars[i]),
3188 &cell->axes[a].cvs[i].value,
3189 var_get_width (nest->vars[i])))))
3198 cell = xmalloc (sizeof *cell);
3201 cell->omit_areas = 0;
3202 cell->postcompute = false;
3203 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3205 const struct ctables_nest *nest = s->nests[a];
3206 cell->axes[a].cvs = (nest->n
3207 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3209 for (size_t i = 0; i < nest->n; i++)
3211 const struct ctables_category *cat = cats[a][i];
3212 const struct variable *var = nest->vars[i];
3213 const union value *value = case_data (c, var);
3214 if (i != nest->scale_idx)
3216 const struct ctables_category *subtotal = cat->subtotal;
3217 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3220 if (cat->type == CCT_TOTAL
3221 || cat->type == CCT_SUBTOTAL
3222 || cat->type == CCT_POSTCOMPUTE)
3226 case PIVOT_AXIS_COLUMN:
3227 cell->omit_areas |= ((1u << CTAT_TABLE) |
3228 (1u << CTAT_LAYER) |
3229 (1u << CTAT_LAYERCOL) |
3230 (1u << CTAT_SUBTABLE) |
3233 case PIVOT_AXIS_ROW:
3234 cell->omit_areas |= ((1u << CTAT_TABLE) |
3235 (1u << CTAT_LAYER) |
3236 (1u << CTAT_LAYERROW) |
3237 (1u << CTAT_SUBTABLE) |
3240 case PIVOT_AXIS_LAYER:
3241 cell->omit_areas |= ((1u << CTAT_TABLE) |
3242 (1u << CTAT_LAYER));
3246 if (cat->type == CCT_POSTCOMPUTE)
3247 cell->postcompute = true;
3250 cell->axes[a].cvs[i].category = cat;
3251 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3255 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3256 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3257 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3258 for (size_t i = 0; i < specs->n; i++)
3259 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3260 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3261 cell->areas[at] = ctables_area_insert (s, cell, at);
3262 hmap_insert (&s->cells, &cell->node, hash);
3267 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3269 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3274 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3275 const struct ctables_category **cats[PIVOT_N_AXES],
3276 bool is_included, double weight[N_CTWS])
3278 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3279 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3281 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3282 const union value *value = case_data (c, specs->var);
3283 bool is_missing = var_is_value_missing (specs->var, value);
3284 bool is_scale_missing
3285 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3287 for (size_t i = 0; i < specs->n; i++)
3288 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3289 is_scale_missing, is_included,
3290 weight[specs->specs[i].weighting]);
3291 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3292 if (!(cell->omit_areas && (1u << at)))
3294 struct ctables_area *a = cell->areas[at];
3296 add_weight (a->total, weight);
3298 add_weight (a->count, weight);
3301 add_weight (a->valid, weight);
3303 if (!is_scale_missing)
3304 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3306 const struct variable *var = s->table->sum_vars[i];
3307 double addend = case_num (c, var);
3308 if (!var_is_num_missing (var, addend))
3309 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3310 a->sums[i].sum[wt] += addend * weight[wt];
3317 recurse_totals (struct ctables_section *s, const struct ccase *c,
3318 const struct ctables_category **cats[PIVOT_N_AXES],
3319 bool is_included, double weight[N_CTWS],
3320 enum pivot_axis_type start_axis, size_t start_nest)
3322 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3324 const struct ctables_nest *nest = s->nests[a];
3325 for (size_t i = start_nest; i < nest->n; i++)
3327 if (i == nest->scale_idx)
3330 const struct variable *var = nest->vars[i];
3332 const struct ctables_category *total = ctables_categories_total (
3333 s->table->categories[var_get_dict_index (var)]);
3336 const struct ctables_category *save = cats[a][i];
3338 ctables_cell_add__ (s, c, cats, is_included, weight);
3339 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3348 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3349 const struct ctables_category **cats[PIVOT_N_AXES],
3350 bool is_included, double weight[N_CTWS],
3351 enum pivot_axis_type start_axis, size_t start_nest)
3353 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3355 const struct ctables_nest *nest = s->nests[a];
3356 for (size_t i = start_nest; i < nest->n; i++)
3358 if (i == nest->scale_idx)
3361 const struct ctables_category *save = cats[a][i];
3364 cats[a][i] = save->subtotal;
3365 ctables_cell_add__ (s, c, cats, is_included, weight);
3366 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3375 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3376 double weight[N_CTWS])
3378 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3379 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3380 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3381 const struct ctables_category **cats[PIVOT_N_AXES] =
3383 [PIVOT_AXIS_LAYER] = layer_cats,
3384 [PIVOT_AXIS_ROW] = row_cats,
3385 [PIVOT_AXIS_COLUMN] = column_cats,
3388 bool is_included = true;
3390 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3392 const struct ctables_nest *nest = s->nests[a];
3393 for (size_t i = 0; i < nest->n; i++)
3394 if (i != nest->scale_idx)
3396 const struct variable *var = nest->vars[i];
3397 const union value *value = case_data (c, var);
3399 cats[a][i] = ctables_categories_match (
3400 s->table->categories[var_get_dict_index (var)], value, var);
3403 if (i != nest->summary_idx)
3406 if (!var_is_value_missing (var, value))
3409 static const struct ctables_category cct_excluded_missing = {
3410 .type = CCT_EXCLUDED_MISSING,
3413 cats[a][i] = &cct_excluded_missing;
3414 is_included = false;
3420 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3422 const struct ctables_nest *nest = s->nests[a];
3423 for (size_t i = 0; i < nest->n; i++)
3424 if (i != nest->scale_idx)
3426 const struct variable *var = nest->vars[i];
3427 const union value *value = case_data (c, var);
3428 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3432 ctables_cell_add__ (s, c, cats, is_included, weight);
3433 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3434 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3437 struct ctables_value
3439 struct hmap_node node;
3444 static struct ctables_value *
3445 ctables_value_find__ (struct ctables_table *t, const union value *value,
3446 int width, unsigned int hash)
3448 struct ctables_value *clv;
3449 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3450 hash, &t->clabels_values_map)
3451 if (value_equal (value, &clv->value, width))
3457 ctables_value_insert (struct ctables_table *t, const union value *value,
3460 unsigned int hash = value_hash (value, width, 0);
3461 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3464 clv = xmalloc (sizeof *clv);
3465 value_clone (&clv->value, value, width);
3466 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3470 static struct ctables_value *
3471 ctables_value_find (struct ctables_table *t,
3472 const union value *value, int width)
3474 return ctables_value_find__ (t, value, width,
3475 value_hash (value, width, 0));
3479 compare_ctables_values_3way (const void *a_, const void *b_, const void *width_)
3481 const struct ctables_value *const *ap = a_;
3482 const struct ctables_value *const *bp = b_;
3483 const struct ctables_value *a = *ap;
3484 const struct ctables_value *b = *bp;
3485 const int *width = width_;
3486 return value_compare_3way (&a->value, &b->value, *width);
3490 ctables_sort_clabels_values (struct ctables_table *t)
3492 const struct variable *v0 = t->clabels_example;
3493 int width = var_get_width (v0);
3495 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3498 const struct val_labs *val_labs = var_get_value_labels (v0);
3499 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
3500 vl = val_labs_next (val_labs, vl))
3501 if (ctables_categories_match (c0, &vl->value, v0))
3502 ctables_value_insert (t, &vl->value, width);
3505 size_t n = hmap_count (&t->clabels_values_map);
3506 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
3508 struct ctables_value *clv;
3510 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
3511 t->clabels_values[i++] = clv;
3512 t->n_clabels_values = n;
3515 sort (t->clabels_values, n, sizeof *t->clabels_values,
3516 compare_ctables_values_3way, &width);
3518 for (size_t i = 0; i < n; i++)
3519 t->clabels_values[i]->leaf = i;
3524 const struct dictionary *dict;
3525 struct pivot_table_look *look;
3527 /* For CTEF_* formats. */
3528 struct fmt_settings ctables_formats;
3530 /* If this is NULL, zeros are displayed using the normal print format.
3531 Otherwise, this string is displayed. */
3534 /* If this is NULL, missing values are displayed using the normal print
3535 format. Otherwise, this string is displayed. */
3538 /* Indexed by variable dictionary index. */
3539 enum ctables_vlabel *vlabels;
3541 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
3543 bool mrsets_count_duplicates; /* MRSETS. */
3544 bool smissing_listwise; /* SMISSING. */
3545 struct variable *e_weight; /* WEIGHT. */
3546 int hide_threshold; /* HIDESMALLCOUNTS. */
3548 struct ctables_table **tables;
3552 /* Chi-square test (SIGTEST). */
3553 struct ctables_chisq
3556 bool include_mrsets;
3560 /* Pairwise comparison test (COMPARETEST). */
3561 struct ctables_pairwise
3563 enum { PROP, MEAN } type;
3565 bool include_mrsets;
3566 bool meansvariance_allcats;
3568 enum { BONFERRONI = 1, BH } adjust;
3577 parse_col_width (struct lexer *lexer, const char *name, double *width)
3579 lex_match (lexer, T_EQUALS);
3580 if (lex_match_id (lexer, "DEFAULT"))
3582 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
3584 *width = lex_number (lexer);
3594 parse_bool (struct lexer *lexer, bool *b)
3596 if (lex_match_id (lexer, "NO"))
3598 else if (lex_match_id (lexer, "YES"))
3602 lex_error_expecting (lexer, "YES", "NO");
3609 ctables_chisq_destroy (struct ctables_chisq *chisq)
3615 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
3621 ctables_table_destroy (struct ctables_table *t)
3626 for (size_t i = 0; i < t->n_sections; i++)
3627 ctables_section_uninit (&t->sections[i]);
3630 for (size_t i = 0; i < t->n_categories; i++)
3631 ctables_categories_unref (t->categories[i]);
3632 free (t->categories);
3634 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3636 ctables_axis_destroy (t->axes[a]);
3637 ctables_stack_uninit (&t->stacks[a]);
3639 free (t->summary_specs.specs);
3641 struct ctables_value *ctv, *next_ctv;
3642 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
3643 &t->clabels_values_map)
3645 value_destroy (&ctv->value, var_get_width (t->clabels_example));
3646 hmap_delete (&t->clabels_values_map, &ctv->node);
3649 hmap_destroy (&t->clabels_values_map);
3650 free (t->clabels_values);
3656 ctables_chisq_destroy (t->chisq);
3657 ctables_pairwise_destroy (t->pairwise);
3662 ctables_destroy (struct ctables *ct)
3667 struct ctables_postcompute *pc, *next_pc;
3668 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
3672 msg_location_destroy (pc->location);
3673 ctables_pcexpr_destroy (pc->expr);
3677 ctables_summary_spec_set_uninit (pc->specs);
3680 hmap_delete (&ct->postcomputes, &pc->hmap_node);
3683 hmap_destroy (&ct->postcomputes);
3685 fmt_settings_uninit (&ct->ctables_formats);
3686 pivot_table_look_unref (ct->look);
3690 for (size_t i = 0; i < ct->n_tables; i++)
3691 ctables_table_destroy (ct->tables[i]);
3697 all_strings (struct variable **vars, size_t n_vars,
3698 const struct ctables_category *cat)
3700 for (size_t j = 0; j < n_vars; j++)
3701 if (var_is_numeric (vars[j]))
3703 msg_at (SE, cat->location,
3704 _("This category specification may be applied only to string "
3705 "variables, but this subcommand tries to apply it to "
3706 "numeric variable %s."),
3707 var_get_name (vars[j]));
3714 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
3715 struct ctables *ct, struct ctables_table *t)
3717 if (!lex_match_id (lexer, "VARIABLES"))
3719 lex_match (lexer, T_EQUALS);
3721 struct variable **vars;
3723 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
3726 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
3727 for (size_t i = 1; i < n_vars; i++)
3729 const struct fmt_spec *f = var_get_print_format (vars[i]);
3730 if (f->type != common_format->type)
3732 common_format = NULL;
3738 && (fmt_get_category (common_format->type)
3739 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
3741 struct ctables_categories *c = xmalloc (sizeof *c);
3742 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
3743 for (size_t i = 0; i < n_vars; i++)
3745 struct ctables_categories **cp
3746 = &t->categories[var_get_dict_index (vars[i])];
3747 ctables_categories_unref (*cp);
3751 size_t allocated_cats = 0;
3752 int cats_start_ofs = -1;
3753 int cats_end_ofs = -1;
3754 if (lex_match (lexer, T_LBRACK))
3756 cats_start_ofs = lex_ofs (lexer);
3759 if (c->n_cats >= allocated_cats)
3760 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3762 int start_ofs = lex_ofs (lexer);
3763 struct ctables_category *cat = &c->cats[c->n_cats];
3764 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
3766 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
3769 lex_match (lexer, T_COMMA);
3771 while (!lex_match (lexer, T_RBRACK));
3772 cats_end_ofs = lex_ofs (lexer) - 1;
3775 struct ctables_category cat = {
3777 .include_missing = false,
3778 .sort_ascending = true,
3780 bool show_totals = false;
3781 char *total_label = NULL;
3782 bool totals_before = false;
3783 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
3785 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
3787 lex_match (lexer, T_EQUALS);
3788 if (lex_match_id (lexer, "A"))
3789 cat.sort_ascending = true;
3790 else if (lex_match_id (lexer, "D"))
3791 cat.sort_ascending = false;
3794 lex_error_expecting (lexer, "A", "D");
3798 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
3800 int start_ofs = lex_ofs (lexer) - 1;
3801 lex_match (lexer, T_EQUALS);
3802 if (lex_match_id (lexer, "VALUE"))
3803 cat.type = CCT_VALUE;
3804 else if (lex_match_id (lexer, "LABEL"))
3805 cat.type = CCT_LABEL;
3808 cat.type = CCT_FUNCTION;
3809 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
3810 &cat.weighting, &cat.area))
3813 if (lex_match (lexer, T_LPAREN))
3815 cat.sort_var = parse_variable (lexer, dict);
3819 if (cat.sort_function == CTSF_PTILE)
3821 lex_match (lexer, T_COMMA);
3822 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
3824 cat.percentile = lex_number (lexer);
3828 if (!lex_force_match (lexer, T_RPAREN))
3831 else if (ctables_function_availability (cat.sort_function)
3834 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
3838 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
3839 _("Data-dependent sorting is not implemented."));
3843 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
3845 lex_match (lexer, T_EQUALS);
3846 if (lex_match_id (lexer, "INCLUDE"))
3847 cat.include_missing = true;
3848 else if (lex_match_id (lexer, "EXCLUDE"))
3849 cat.include_missing = false;
3852 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
3856 else if (lex_match_id (lexer, "TOTAL"))
3858 lex_match (lexer, T_EQUALS);
3859 if (!parse_bool (lexer, &show_totals))
3862 else if (lex_match_id (lexer, "LABEL"))
3864 lex_match (lexer, T_EQUALS);
3865 if (!lex_force_string (lexer))
3868 total_label = ss_xstrdup (lex_tokss (lexer));
3871 else if (lex_match_id (lexer, "POSITION"))
3873 lex_match (lexer, T_EQUALS);
3874 if (lex_match_id (lexer, "BEFORE"))
3875 totals_before = true;
3876 else if (lex_match_id (lexer, "AFTER"))
3877 totals_before = false;
3880 lex_error_expecting (lexer, "BEFORE", "AFTER");
3884 else if (lex_match_id (lexer, "EMPTY"))
3886 lex_match (lexer, T_EQUALS);
3887 if (lex_match_id (lexer, "INCLUDE"))
3888 c->show_empty = true;
3889 else if (lex_match_id (lexer, "EXCLUDE"))
3890 c->show_empty = false;
3893 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
3900 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
3901 "TOTAL", "LABEL", "POSITION", "EMPTY");
3903 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
3910 if (c->n_cats >= allocated_cats)
3911 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3912 c->cats[c->n_cats++] = cat;
3917 if (c->n_cats >= allocated_cats)
3918 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3920 struct ctables_category *totals;
3923 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
3924 totals = &c->cats[0];
3927 totals = &c->cats[c->n_cats];
3930 *totals = (struct ctables_category) {
3932 .total_label = total_label ? total_label : xstrdup (_("Total")),
3936 struct ctables_category *subtotal = NULL;
3937 for (size_t i = totals_before ? 0 : c->n_cats;
3938 totals_before ? i < c->n_cats : i-- > 0;
3939 totals_before ? i++ : 0)
3941 struct ctables_category *cat = &c->cats[i];
3950 cat->subtotal = subtotal;
3953 case CCT_POSTCOMPUTE:
3964 case CCT_EXCLUDED_MISSING:
3969 if (cats_start_ofs != -1)
3971 for (size_t i = 0; i < c->n_cats; i++)
3973 struct ctables_category *cat = &c->cats[i];
3976 case CCT_POSTCOMPUTE:
3977 cat->parse_format = parse_strings ? common_format->type : FMT_F;
3978 struct msg_location *cats_location
3979 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
3980 bool ok = ctables_recursive_check_postcompute (
3981 dict, cat->pc->expr, cat, c, cats_location);
3982 msg_location_destroy (cats_location);
3989 for (size_t j = 0; j < n_vars; j++)
3990 if (var_is_alpha (vars[j]))
3992 msg_at (SE, cat->location,
3993 _("This category specification may be applied "
3994 "only to numeric variables, but this "
3995 "subcommand tries to apply it to string "
3997 var_get_name (vars[j]));
4006 if (!parse_category_string (cat->location, cat->string, dict,
4007 common_format->type, &n))
4010 ss_dealloc (&cat->string);
4012 cat->type = CCT_NUMBER;
4015 else if (!all_strings (vars, n_vars, cat))
4024 if (!cat->srange[0].string)
4026 else if (!parse_category_string (cat->location,
4027 cat->srange[0], dict,
4028 common_format->type, &n[0]))
4031 if (!cat->srange[1].string)
4033 else if (!parse_category_string (cat->location,
4034 cat->srange[1], dict,
4035 common_format->type, &n[1]))
4038 ss_dealloc (&cat->srange[0]);
4039 ss_dealloc (&cat->srange[1]);
4041 cat->type = CCT_NRANGE;
4042 cat->nrange[0] = n[0];
4043 cat->nrange[1] = n[1];
4045 else if (!all_strings (vars, n_vars, cat))
4056 case CCT_EXCLUDED_MISSING:
4073 const struct ctables_summary_spec_set *set;
4078 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4080 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4081 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4082 if (as->function != bs->function)
4083 return as->function > bs->function ? 1 : -1;
4084 else if (as->weighting != bs->weighting)
4085 return as->weighting > bs->weighting ? 1 : -1;
4086 else if (as->calc_area != bs->calc_area)
4087 return as->calc_area > bs->calc_area ? 1 : -1;
4088 else if (as->percentile != bs->percentile)
4089 return as->percentile < bs->percentile ? 1 : -1;
4091 const char *as_label = as->label ? as->label : "";
4092 const char *bs_label = bs->label ? bs->label : "";
4093 return strcmp (as_label, bs_label);
4097 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4098 size_t ix[PIVOT_N_AXES])
4100 if (a < PIVOT_N_AXES)
4102 size_t limit = MAX (t->stacks[a].n, 1);
4103 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4104 ctables_table_add_section (t, a + 1, ix);
4108 struct ctables_section *s = &t->sections[t->n_sections++];
4109 *s = (struct ctables_section) {
4111 .cells = HMAP_INITIALIZER (s->cells),
4113 for (a = 0; a < PIVOT_N_AXES; a++)
4116 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4118 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4119 for (size_t i = 0; i < nest->n; i++)
4120 hmap_init (&s->occurrences[a][i]);
4122 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4123 hmap_init (&s->areas[at]);
4128 ctpo_add (double a, double b)
4134 ctpo_sub (double a, double b)
4140 ctpo_mul (double a, double b)
4146 ctpo_div (double a, double b)
4148 return b ? a / b : SYSMIS;
4152 ctpo_pow (double a, double b)
4154 int save_errno = errno;
4156 double result = pow (a, b);
4164 ctpo_neg (double a, double b UNUSED)
4169 struct ctables_pcexpr_evaluate_ctx
4171 const struct ctables_cell *cell;
4172 const struct ctables_section *section;
4173 const struct ctables_categories *cats;
4174 enum pivot_axis_type pc_a;
4177 enum fmt_type parse_format;
4180 static double ctables_pcexpr_evaluate (
4181 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4184 ctables_pcexpr_evaluate_nonterminal (
4185 const struct ctables_pcexpr_evaluate_ctx *ctx,
4186 const struct ctables_pcexpr *e, size_t n_args,
4187 double evaluate (double, double))
4189 double args[2] = { 0, 0 };
4190 for (size_t i = 0; i < n_args; i++)
4192 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4193 if (!isfinite (args[i]) || args[i] == SYSMIS)
4196 return evaluate (args[0], args[1]);
4200 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4201 const struct ctables_cell_value *pc_cv)
4203 const struct ctables_section *s = ctx->section;
4206 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4208 const struct ctables_nest *nest = s->nests[a];
4209 for (size_t i = 0; i < nest->n; i++)
4210 if (i != nest->scale_idx)
4212 const struct ctables_cell_value *cv
4213 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4214 : &ctx->cell->axes[a].cvs[i]);
4215 hash = hash_pointer (cv->category, hash);
4216 if (cv->category->type != CCT_TOTAL
4217 && cv->category->type != CCT_SUBTOTAL
4218 && cv->category->type != CCT_POSTCOMPUTE)
4219 hash = value_hash (&cv->value,
4220 var_get_width (nest->vars[i]), hash);
4224 struct ctables_cell *tc;
4225 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4227 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4229 const struct ctables_nest *nest = s->nests[a];
4230 for (size_t i = 0; i < nest->n; i++)
4231 if (i != nest->scale_idx)
4233 const struct ctables_cell_value *p_cv
4234 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4235 : &ctx->cell->axes[a].cvs[i]);
4236 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4237 if (p_cv->category != t_cv->category
4238 || (p_cv->category->type != CCT_TOTAL
4239 && p_cv->category->type != CCT_SUBTOTAL
4240 && p_cv->category->type != CCT_POSTCOMPUTE
4241 && !value_equal (&p_cv->value,
4243 var_get_width (nest->vars[i]))))
4255 const struct ctables_table *t = s->table;
4256 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4257 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4258 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
4259 &specs->specs[ctx->summary_idx]);
4263 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4264 const struct ctables_pcexpr *e)
4271 case CTPO_CAT_NRANGE:
4272 case CTPO_CAT_SRANGE:
4273 case CTPO_CAT_MISSING:
4274 case CTPO_CAT_OTHERNM:
4276 struct ctables_cell_value cv = {
4277 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4279 assert (cv.category != NULL);
4281 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4282 const struct ctables_occurrence *o;
4285 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4286 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4287 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4289 cv.value = o->value;
4290 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4295 case CTPO_CAT_NUMBER:
4296 case CTPO_CAT_SUBTOTAL:
4297 case CTPO_CAT_TOTAL:
4299 struct ctables_cell_value cv = {
4300 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4301 .value = { .f = e->number },
4303 assert (cv.category != NULL);
4304 return ctables_pcexpr_evaluate_category (ctx, &cv);
4307 case CTPO_CAT_STRING:
4309 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4311 if (width > e->string.length)
4313 s = xmalloc (width);
4314 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4317 const struct ctables_category *category
4318 = ctables_find_category_for_postcompute (
4319 ctx->section->table->ctables->dict,
4320 ctx->cats, ctx->parse_format, e);
4321 assert (category != NULL);
4323 struct ctables_cell_value cv = { .category = category };
4324 if (category->type == CCT_NUMBER)
4325 cv.value.f = category->number;
4326 else if (category->type == CCT_STRING)
4327 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
4331 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4337 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4340 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4343 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4346 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4349 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4352 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4358 static const struct ctables_category *
4359 ctables_cell_postcompute (const struct ctables_section *s,
4360 const struct ctables_cell *cell,
4361 enum pivot_axis_type *pc_a_p,
4364 assert (cell->postcompute);
4365 const struct ctables_category *pc_cat = NULL;
4366 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4367 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4369 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4370 if (cv->category->type == CCT_POSTCOMPUTE)
4374 /* Multiple postcomputes cross each other. The value is
4379 pc_cat = cv->category;
4383 *pc_a_idx_p = pc_a_idx;
4387 assert (pc_cat != NULL);
4392 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4393 const struct ctables_cell *cell,
4394 const struct ctables_summary_spec *ss,
4395 struct fmt_spec *format,
4396 bool *is_ctables_format,
4399 enum pivot_axis_type pc_a = 0;
4400 size_t pc_a_idx = 0;
4401 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4402 s, cell, &pc_a, &pc_a_idx);
4406 const struct ctables_postcompute *pc = pc_cat->pc;
4409 for (size_t i = 0; i < pc->specs->n; i++)
4411 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4412 if (ss->function == ss2->function
4413 && ss->weighting == ss2->weighting
4414 && ss->calc_area == ss2->calc_area
4415 && ss->percentile == ss2->percentile)
4417 *format = ss2->format;
4418 *is_ctables_format = ss2->is_ctables_format;
4424 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4425 const struct ctables_categories *cats = s->table->categories[
4426 var_get_dict_index (var)];
4427 struct ctables_pcexpr_evaluate_ctx ctx = {
4432 .pc_a_idx = pc_a_idx,
4433 .summary_idx = summary_idx,
4434 .parse_format = pc_cat->parse_format,
4436 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4440 ctables_format (double d, const struct fmt_spec *format,
4441 const struct fmt_settings *settings)
4443 const union value v = { .f = d };
4444 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4446 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4447 produce the results we want for negative numbers, putting the negative
4448 sign in the wrong spot, before the prefix instead of after it. We can't,
4449 in fact, produce the desired results using a custom-currency
4450 specification. Instead, we postprocess the output, moving the negative
4453 NEQUAL: "-N=3" => "N=-3"
4454 PAREN: "-(3)" => "(-3)"
4455 PCTPAREN: "-(3%)" => "(-3%)"
4457 This transformation doesn't affect NEGPAREN. */
4458 char *minus_src = strchr (s, '-');
4459 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4461 char *n_equals = strstr (s, "N=");
4462 char *lparen = strchr (s, '(');
4463 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4465 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4471 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4473 for (size_t i = 0; i < t->stacks[a].n; i++)
4475 struct ctables_nest *nest = &t->stacks[a].nests[i];
4476 if (nest->n != 1 || nest->scale_idx != 0)
4479 enum ctables_vlabel vlabel
4480 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4481 if (vlabel != CTVL_NONE)
4488 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4490 struct pivot_table *pt = pivot_table_create__ (
4492 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4493 : pivot_value_new_text (N_("Custom Tables"))),
4496 pivot_table_set_caption (
4497 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4499 pivot_table_set_corner_text (
4500 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4502 bool summary_dimension = (t->summary_axis != t->slabels_axis
4503 || (!t->slabels_visible
4504 && t->summary_specs.n > 1));
4505 if (summary_dimension)
4507 struct pivot_dimension *d = pivot_dimension_create (
4508 pt, t->slabels_axis, N_("Statistics"));
4509 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4510 if (!t->slabels_visible)
4511 d->hide_all_labels = true;
4512 for (size_t i = 0; i < specs->n; i++)
4513 pivot_category_create_leaf (
4514 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4517 bool categories_dimension = t->clabels_example != NULL;
4518 if (categories_dimension)
4520 struct pivot_dimension *d = pivot_dimension_create (
4521 pt, t->label_axis[t->clabels_from_axis],
4522 t->clabels_from_axis == PIVOT_AXIS_ROW
4523 ? N_("Row Categories")
4524 : N_("Column Categories"));
4525 const struct variable *var = t->clabels_example;
4526 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4527 for (size_t i = 0; i < t->n_clabels_values; i++)
4529 const struct ctables_value *value = t->clabels_values[i];
4530 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4531 assert (cat != NULL);
4532 pivot_category_create_leaf (
4533 d->root, ctables_category_create_value_label (c, cat,
4539 pivot_table_set_look (pt, ct->look);
4540 struct pivot_dimension *d[PIVOT_N_AXES];
4541 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4543 static const char *names[] = {
4544 [PIVOT_AXIS_ROW] = N_("Rows"),
4545 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4546 [PIVOT_AXIS_LAYER] = N_("Layers"),
4548 d[a] = (t->axes[a] || a == t->summary_axis
4549 ? pivot_dimension_create (pt, a, names[a])
4554 assert (t->axes[a]);
4556 for (size_t i = 0; i < t->stacks[a].n; i++)
4558 struct ctables_nest *nest = &t->stacks[a].nests[i];
4559 struct ctables_section **sections = xnmalloc (t->n_sections,
4561 size_t n_sections = 0;
4563 size_t n_total_cells = 0;
4564 size_t max_depth = 0;
4565 for (size_t j = 0; j < t->n_sections; j++)
4566 if (t->sections[j].nests[a] == nest)
4568 struct ctables_section *s = &t->sections[j];
4569 sections[n_sections++] = s;
4570 n_total_cells += hmap_count (&s->cells);
4572 size_t depth = s->nests[a]->n;
4573 max_depth = MAX (depth, max_depth);
4576 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4578 size_t n_sorted = 0;
4580 for (size_t j = 0; j < n_sections; j++)
4582 struct ctables_section *s = sections[j];
4584 struct ctables_cell *cell;
4585 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4587 sorted[n_sorted++] = cell;
4588 assert (n_sorted <= n_total_cells);
4591 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4592 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4594 struct ctables_level
4596 enum ctables_level_type
4598 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4599 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4600 CTL_SUMMARY, /* Summary functions. */
4604 enum settings_value_show vlabel; /* CTL_VAR only. */
4607 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4608 size_t n_levels = 0;
4609 for (size_t k = 0; k < nest->n; k++)
4611 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4612 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4614 if (vlabel != CTVL_NONE)
4616 levels[n_levels++] = (struct ctables_level) {
4618 .vlabel = (enum settings_value_show) vlabel,
4623 if (nest->scale_idx != k
4624 && (k != nest->n - 1 || t->label_axis[a] == a))
4626 levels[n_levels++] = (struct ctables_level) {
4627 .type = CTL_CATEGORY,
4633 if (!summary_dimension && a == t->slabels_axis)
4635 levels[n_levels++] = (struct ctables_level) {
4636 .type = CTL_SUMMARY,
4637 .var_idx = SIZE_MAX,
4641 /* Pivot categories:
4643 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4644 - category for nest->vars[0], if nest->scale_idx != 0
4645 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4646 - category for nest->vars[1], if nest->scale_idx != 1
4648 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4649 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4650 - summary function, if 'a == t->slabels_axis && a ==
4653 Additional dimensions:
4655 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4657 - If 't->label_axis[b] == a' for some 'b != a', add a category
4662 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4664 for (size_t j = 0; j < n_sorted; j++)
4666 struct ctables_cell *cell = sorted[j];
4667 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4669 size_t n_common = 0;
4672 for (; n_common < n_levels; n_common++)
4674 const struct ctables_level *level = &levels[n_common];
4675 if (level->type == CTL_CATEGORY)
4677 size_t var_idx = level->var_idx;
4678 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4679 if (prev->axes[a].cvs[var_idx].category != c)
4681 else if (c->type != CCT_SUBTOTAL
4682 && c->type != CCT_TOTAL
4683 && c->type != CCT_POSTCOMPUTE
4684 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4685 &cell->axes[a].cvs[var_idx].value,
4686 var_get_type (nest->vars[var_idx])))
4692 for (size_t k = n_common; k < n_levels; k++)
4694 const struct ctables_level *level = &levels[k];
4695 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4696 if (level->type == CTL_SUMMARY)
4698 assert (k == n_levels - 1);
4700 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4701 for (size_t m = 0; m < specs->n; m++)
4703 int leaf = pivot_category_create_leaf (
4704 parent, ctables_summary_label (&specs->specs[m],
4712 const struct variable *var = nest->vars[level->var_idx];
4713 struct pivot_value *label;
4714 if (level->type == CTL_VAR)
4716 label = pivot_value_new_variable (var);
4717 label->variable.show = level->vlabel;
4719 else if (level->type == CTL_CATEGORY)
4721 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4722 label = ctables_category_create_value_label (
4723 t->categories[var_get_dict_index (var)],
4724 cv->category, var, &cv->value);
4729 if (k == n_levels - 1)
4730 prev_leaf = pivot_category_create_leaf (parent, label);
4732 groups[k] = pivot_category_create_group__ (parent, label);
4736 cell->axes[a].leaf = prev_leaf;
4745 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4749 size_t n_total_cells = 0;
4750 for (size_t j = 0; j < t->n_sections; j++)
4751 n_total_cells += hmap_count (&t->sections[j].cells);
4753 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4754 size_t n_sorted = 0;
4755 for (size_t j = 0; j < t->n_sections; j++)
4757 const struct ctables_section *s = &t->sections[j];
4758 struct ctables_cell *cell;
4759 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4761 sorted[n_sorted++] = cell;
4763 assert (n_sorted <= n_total_cells);
4764 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4766 size_t ids[N_CTATS];
4767 memset (ids, 0, sizeof ids);
4768 for (size_t j = 0; j < n_sorted; j++)
4770 struct ctables_cell *cell = sorted[j];
4771 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4773 struct ctables_area *area = cell->areas[at];
4774 if (!area->sequence)
4775 area->sequence = ++ids[at];
4782 for (size_t i = 0; i < t->n_sections; i++)
4784 struct ctables_section *s = &t->sections[i];
4786 struct ctables_cell *cell;
4787 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4792 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4793 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4794 for (size_t j = 0; j < specs->n; j++)
4797 size_t n_dindexes = 0;
4799 if (summary_dimension)
4800 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4802 if (categories_dimension)
4804 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4805 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4806 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4807 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4810 dindexes[n_dindexes++] = ctv->leaf;
4813 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4816 int leaf = cell->axes[a].leaf;
4817 if (a == t->summary_axis && !summary_dimension)
4819 dindexes[n_dindexes++] = leaf;
4822 const struct ctables_summary_spec *ss = &specs->specs[j];
4824 struct fmt_spec format = specs->specs[j].format;
4825 bool is_ctables_format = ss->is_ctables_format;
4826 double d = (cell->postcompute
4827 ? ctables_cell_calculate_postcompute (
4828 s, cell, ss, &format, &is_ctables_format, j)
4829 : ctables_summary_value (cell->areas,
4830 &cell->summaries[j], ss));
4832 struct pivot_value *value;
4833 if (ct->hide_threshold != 0
4834 && d < ct->hide_threshold
4835 && ss->function == CTSF_COUNT)
4837 value = pivot_value_new_user_text_nocopy (
4838 xasprintf ("<%d", ct->hide_threshold));
4840 else if (d == 0 && ct->zero)
4841 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4842 else if (d == SYSMIS && ct->missing)
4843 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4844 else if (is_ctables_format)
4845 value = pivot_value_new_user_text_nocopy (
4846 ctables_format (d, &format, &ct->ctables_formats));
4849 value = pivot_value_new_number (d);
4850 value->numeric.format = format;
4852 /* XXX should text values be right-justified? */
4853 pivot_table_put (pt, dindexes, n_dindexes, value);
4858 pivot_table_submit (pt);
4862 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4864 enum pivot_axis_type label_pos = t->label_axis[a];
4868 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4869 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4871 const struct ctables_stack *stack = &t->stacks[a];
4875 const struct ctables_nest *n0 = &stack->nests[0];
4878 assert (stack->n == 1);
4882 const struct variable *v0 = n0->vars[n0->n - 1];
4883 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4884 t->clabels_example = v0;
4886 for (size_t i = 0; i < c0->n_cats; i++)
4887 if (c0->cats[i].type == CCT_FUNCTION)
4889 msg (SE, _("%s=%s is not allowed with sorting based "
4890 "on a summary function."),
4891 subcommand_name, pos_name);
4894 if (n0->n - 1 == n0->scale_idx)
4896 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4897 "but %s is a scale variable."),
4898 subcommand_name, pos_name, var_get_name (v0));
4902 for (size_t i = 1; i < stack->n; i++)
4904 const struct ctables_nest *ni = &stack->nests[i];
4906 const struct variable *vi = ni->vars[ni->n - 1];
4907 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4909 if (ni->n - 1 == ni->scale_idx)
4911 msg (SE, _("%s=%s requires the variables to be moved to be "
4912 "categorical, but %s is a scale variable."),
4913 subcommand_name, pos_name, var_get_name (vi));
4916 if (var_get_width (v0) != var_get_width (vi))
4918 msg (SE, _("%s=%s requires the variables to be "
4919 "moved to have the same width, but %s has "
4920 "width %d and %s has width %d."),
4921 subcommand_name, pos_name,
4922 var_get_name (v0), var_get_width (v0),
4923 var_get_name (vi), var_get_width (vi));
4926 if (!val_labs_equal (var_get_value_labels (v0),
4927 var_get_value_labels (vi)))
4929 msg (SE, _("%s=%s requires the variables to be "
4930 "moved to have the same value labels, but %s "
4931 "and %s have different value labels."),
4932 subcommand_name, pos_name,
4933 var_get_name (v0), var_get_name (vi));
4936 if (!ctables_categories_equal (c0, ci))
4938 msg (SE, _("%s=%s requires the variables to be "
4939 "moved to have the same category "
4940 "specifications, but %s and %s have different "
4941 "category specifications."),
4942 subcommand_name, pos_name,
4943 var_get_name (v0), var_get_name (vi));
4952 add_sum_var (struct variable *var,
4953 struct variable ***sum_vars, size_t *n, size_t *allocated)
4955 for (size_t i = 0; i < *n; i++)
4956 if (var == (*sum_vars)[i])
4959 if (*n >= *allocated)
4960 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4961 (*sum_vars)[*n] = var;
4965 static enum ctables_area_type
4966 rotate_area (enum ctables_area_type area)
4977 return CTAT_LAYERCOL;
4980 return CTAT_LAYERROW;
4993 enumerate_sum_vars (const struct ctables_axis *a,
4994 struct variable ***sum_vars, size_t *n, size_t *allocated)
5002 for (size_t i = 0; i < N_CSVS; i++)
5003 for (size_t j = 0; j < a->specs[i].n; j++)
5005 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
5006 if (spec->function == CTSF_areaPCT_SUM)
5007 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
5013 for (size_t i = 0; i < 2; i++)
5014 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
5020 ctables_prepare_table (struct ctables_table *t)
5022 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5025 t->stacks[a] = enumerate_fts (a, t->axes[a]);
5027 for (size_t j = 0; j < t->stacks[a].n; j++)
5029 struct ctables_nest *nest = &t->stacks[a].nests[j];
5030 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5032 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
5033 nest->n_areas[at] = 0;
5035 enum pivot_axis_type ata, atb;
5036 if (at == CTAT_ROW || at == CTAT_LAYERROW)
5038 ata = PIVOT_AXIS_ROW;
5039 atb = PIVOT_AXIS_COLUMN;
5041 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
5043 ata = PIVOT_AXIS_COLUMN;
5044 atb = PIVOT_AXIS_ROW;
5047 if (at == CTAT_LAYER
5048 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5049 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5050 ? a == atb && t->label_axis[a] != a
5053 for (size_t k = nest->n - 1; k < nest->n; k--)
5054 if (k != nest->scale_idx)
5056 nest->areas[at][nest->n_areas[at]++] = k;
5062 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5063 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5064 : at == CTAT_TABLE ? true
5068 for (size_t k = 0; k < nest->n; k++)
5069 if (k != nest->scale_idx)
5070 nest->areas[at][nest->n_areas[at]++] = k;
5076 #define L PIVOT_AXIS_LAYER
5077 n_drop = (t->clabels_from_axis == L ? a != L
5078 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5079 : t->clabels_from_axis == a ? 2
5086 n_drop = a == ata && t->label_axis[ata] == atb;
5091 n_drop = (a == ata ? t->label_axis[ata] == atb
5093 : t->clabels_from_axis == atb ? -1
5094 : t->clabels_to_axis != atb ? 1
5106 size_t n = nest->n_areas[at];
5109 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5110 nest->n_areas[at]--;
5115 for (int i = 0; i < n_drop; i++)
5116 if (nest->n_areas[at] > 0)
5117 nest->n_areas[at]--;
5124 struct ctables_nest *nest = xmalloc (sizeof *nest);
5125 *nest = (struct ctables_nest) {
5127 .scale_idx = SIZE_MAX,
5128 .summary_idx = SIZE_MAX
5130 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5132 /* There's no point in moving labels away from an axis that has no
5133 labels, so avoid dealing with the special cases around that. */
5134 t->label_axis[a] = a;
5137 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5138 for (size_t i = 0; i < stack->n; i++)
5140 struct ctables_nest *nest = &stack->nests[i];
5141 if (!nest->specs[CSV_CELL].n)
5143 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5144 ss->specs = xmalloc (sizeof *ss->specs);
5147 enum ctables_summary_function function
5148 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5152 nest->summary_idx = nest->n - 1;
5153 ss->var = nest->vars[nest->summary_idx];
5155 *ss->specs = (struct ctables_summary_spec) {
5156 .function = function,
5157 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5158 .format = ctables_summary_default_format (function, ss->var),
5161 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5162 &nest->specs[CSV_CELL]);
5164 else if (!nest->specs[CSV_TOTAL].n)
5165 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5166 &nest->specs[CSV_CELL]);
5168 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5169 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5171 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5172 for (size_t i = 0; i < nest->specs[sv].n; i++)
5174 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5175 const struct ctables_function_info *cfi =
5176 &ctables_function_info[ss->function];
5178 ss->calc_area = rotate_area (ss->calc_area);
5182 if (t->ctables->smissing_listwise)
5184 struct variable **listwise_vars = NULL;
5186 size_t allocated = 0;
5188 for (size_t j = nest->group_head; j < stack->n; j++)
5190 const struct ctables_nest *other_nest = &stack->nests[j];
5191 if (other_nest->group_head != nest->group_head)
5194 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5197 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5198 sizeof *listwise_vars);
5199 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5202 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5205 listwise_vars = xmemdup (listwise_vars,
5206 n * sizeof *listwise_vars);
5207 nest->specs[sv].listwise_vars = listwise_vars;
5208 nest->specs[sv].n_listwise_vars = n;
5213 struct ctables_summary_spec_set *merged = &t->summary_specs;
5214 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5216 for (size_t j = 0; j < stack->n; j++)
5218 const struct ctables_nest *nest = &stack->nests[j];
5220 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5221 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5226 struct merge_item min = items[0];
5227 for (size_t j = 1; j < n_left; j++)
5228 if (merge_item_compare_3way (&items[j], &min) < 0)
5231 if (merged->n >= merged->allocated)
5232 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5233 sizeof *merged->specs);
5234 merged->specs[merged->n++] = min.set->specs[min.ofs];
5236 for (size_t j = 0; j < n_left; )
5238 if (merge_item_compare_3way (&items[j], &min) == 0)
5240 struct merge_item *item = &items[j];
5241 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5242 if (++item->ofs >= item->set->n)
5244 items[j] = items[--n_left];
5253 size_t allocated_sum_vars = 0;
5254 enumerate_sum_vars (t->axes[t->summary_axis],
5255 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5257 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5258 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5262 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5263 enum pivot_axis_type a)
5265 struct ctables_stack *stack = &t->stacks[a];
5266 for (size_t i = 0; i < stack->n; i++)
5268 const struct ctables_nest *nest = &stack->nests[i];
5269 const struct variable *var = nest->vars[nest->n - 1];
5270 const union value *value = case_data (c, var);
5272 if (var_is_numeric (var) && value->f == SYSMIS)
5275 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5277 ctables_value_insert (t, value, var_get_width (var));
5282 ctables_add_category_occurrences (const struct variable *var,
5283 struct hmap *occurrences,
5284 const struct ctables_categories *cats)
5286 const struct val_labs *val_labs = var_get_value_labels (var);
5288 for (size_t i = 0; i < cats->n_cats; i++)
5290 const struct ctables_category *c = &cats->cats[i];
5294 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5300 int width = var_get_width (var);
5302 value_init (&value, width);
5303 value_copy_buf_rpad (&value, width,
5304 CHAR_CAST (uint8_t *, c->string.string),
5305 c->string.length, ' ');
5306 ctables_add_occurrence (var, &value, occurrences);
5307 value_destroy (&value, width);
5312 assert (var_is_numeric (var));
5313 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5314 vl = val_labs_next (val_labs, vl))
5315 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5316 ctables_add_occurrence (var, &vl->value, occurrences);
5320 assert (var_is_alpha (var));
5321 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5322 vl = val_labs_next (val_labs, vl))
5323 if (in_string_range (&vl->value, var, c->srange))
5324 ctables_add_occurrence (var, &vl->value, occurrences);
5328 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5329 vl = val_labs_next (val_labs, vl))
5330 if (var_is_value_missing (var, &vl->value))
5331 ctables_add_occurrence (var, &vl->value, occurrences);
5335 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5336 vl = val_labs_next (val_labs, vl))
5337 ctables_add_occurrence (var, &vl->value, occurrences);
5340 case CCT_POSTCOMPUTE:
5350 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5351 vl = val_labs_next (val_labs, vl))
5352 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5353 ctables_add_occurrence (var, &vl->value, occurrences);
5356 case CCT_EXCLUDED_MISSING:
5363 ctables_section_recurse_add_empty_categories (
5364 struct ctables_section *s,
5365 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5366 enum pivot_axis_type a, size_t a_idx)
5368 if (a >= PIVOT_N_AXES)
5369 ctables_cell_insert__ (s, c, cats);
5370 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5371 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5374 const struct variable *var = s->nests[a]->vars[a_idx];
5375 const struct ctables_categories *categories = s->table->categories[
5376 var_get_dict_index (var)];
5377 int width = var_get_width (var);
5378 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5379 const struct ctables_occurrence *o;
5380 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5382 union value *value = case_data_rw (c, var);
5383 value_destroy (value, width);
5384 value_clone (value, &o->value, width);
5385 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5386 assert (cats[a][a_idx] != NULL);
5387 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5390 for (size_t i = 0; i < categories->n_cats; i++)
5392 const struct ctables_category *cat = &categories->cats[i];
5393 if (cat->type == CCT_POSTCOMPUTE)
5395 cats[a][a_idx] = cat;
5396 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5403 ctables_section_add_empty_categories (struct ctables_section *s)
5405 bool show_empty = false;
5406 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5408 for (size_t k = 0; k < s->nests[a]->n; k++)
5409 if (k != s->nests[a]->scale_idx)
5411 const struct variable *var = s->nests[a]->vars[k];
5412 const struct ctables_categories *cats = s->table->categories[
5413 var_get_dict_index (var)];
5414 if (cats->show_empty)
5417 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5423 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5424 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5425 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5426 const struct ctables_category **cats[PIVOT_N_AXES] =
5428 [PIVOT_AXIS_LAYER] = layer_cats,
5429 [PIVOT_AXIS_ROW] = row_cats,
5430 [PIVOT_AXIS_COLUMN] = column_cats,
5432 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5433 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5438 ctables_section_clear (struct ctables_section *s)
5440 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5442 const struct ctables_nest *nest = s->nests[a];
5443 for (size_t i = 0; i < nest->n; i++)
5444 if (i != nest->scale_idx)
5446 const struct variable *var = nest->vars[i];
5447 int width = var_get_width (var);
5448 struct ctables_occurrence *o, *next;
5449 struct hmap *map = &s->occurrences[a][i];
5450 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5452 value_destroy (&o->value, width);
5453 hmap_delete (map, &o->node);
5460 struct ctables_cell *cell, *next_cell;
5461 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5463 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5465 const struct ctables_nest *nest = s->nests[a];
5466 for (size_t i = 0; i < nest->n; i++)
5467 if (i != nest->scale_idx)
5468 value_destroy (&cell->axes[a].cvs[i].value,
5469 var_get_width (nest->vars[i]));
5470 free (cell->axes[a].cvs);
5473 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5474 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5475 for (size_t i = 0; i < specs->n; i++)
5476 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5477 free (cell->summaries);
5479 hmap_delete (&s->cells, &cell->node);
5482 hmap_shrink (&s->cells);
5484 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5486 struct ctables_area *area, *next_area;
5487 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5491 hmap_delete (&s->areas[at], &area->node);
5494 hmap_shrink (&s->areas[at]);
5499 ctables_section_uninit (struct ctables_section *s)
5501 ctables_section_clear (s);
5503 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5505 struct ctables_nest *nest = s->nests[a];
5506 for (size_t i = 0; i < nest->n; i++)
5507 hmap_destroy (&s->occurrences[a][i]);
5508 free (s->occurrences[a]);
5511 hmap_destroy (&s->cells);
5512 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5513 hmap_destroy (&s->areas[at]);
5517 ctables_table_clear (struct ctables_table *t)
5519 for (size_t i = 0; i < t->n_sections; i++)
5520 ctables_section_clear (&t->sections[i]);
5522 if (t->clabels_example)
5524 int width = var_get_width (t->clabels_example);
5525 struct ctables_value *value, *next_value;
5526 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5527 &t->clabels_values_map)
5529 value_destroy (&value->value, width);
5530 hmap_delete (&t->clabels_values_map, &value->node);
5533 hmap_shrink (&t->clabels_values_map);
5535 free (t->clabels_values);
5536 t->clabels_values = NULL;
5537 t->n_clabels_values = 0;
5542 ctables_execute (struct dataset *ds, struct casereader *input,
5545 for (size_t i = 0; i < ct->n_tables; i++)
5547 struct ctables_table *t = ct->tables[i];
5548 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5549 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5550 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5551 sizeof *t->sections);
5552 size_t ix[PIVOT_N_AXES];
5553 ctables_table_add_section (t, 0, ix);
5556 struct dictionary *dict = dataset_dict (ds);
5558 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5559 struct casegrouper *grouper
5561 ? casegrouper_create_splits (input, dict)
5562 : casegrouper_create_vars (input, NULL, 0));
5563 struct casereader *group;
5564 while (casegrouper_get_next_group (grouper, &group))
5568 struct ccase *c = casereader_peek (group, 0);
5571 output_split_file_values (ds, c);
5576 bool warn_on_invalid = true;
5577 for (struct ccase *c = casereader_read (group); c;
5578 case_unref (c), c = casereader_read (group))
5580 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5581 double e_weight = (ct->e_weight
5582 ? var_force_valid_weight (ct->e_weight,
5583 case_num (c, ct->e_weight),
5587 [CTW_DICTIONARY] = d_weight,
5588 [CTW_EFFECTIVE] = e_weight,
5589 [CTW_UNWEIGHTED] = 1.0,
5592 for (size_t i = 0; i < ct->n_tables; i++)
5594 struct ctables_table *t = ct->tables[i];
5596 for (size_t j = 0; j < t->n_sections; j++)
5597 ctables_cell_insert (&t->sections[j], c, weight);
5599 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5600 if (t->label_axis[a] != a)
5601 ctables_insert_clabels_values (t, c, a);
5604 casereader_destroy (group);
5606 for (size_t i = 0; i < ct->n_tables; i++)
5608 struct ctables_table *t = ct->tables[i];
5610 if (t->clabels_example)
5611 ctables_sort_clabels_values (t);
5613 for (size_t j = 0; j < t->n_sections; j++)
5614 ctables_section_add_empty_categories (&t->sections[j]);
5616 ctables_table_output (ct, t);
5617 ctables_table_clear (t);
5620 return casegrouper_destroy (grouper);
5623 static struct ctables_postcompute *
5624 ctables_find_postcompute (struct ctables *ct, const char *name)
5626 struct ctables_postcompute *pc;
5627 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5628 utf8_hash_case_string (name, 0), &ct->postcomputes)
5629 if (!utf8_strcasecmp (pc->name, name))
5635 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5638 int pcompute_start = lex_ofs (lexer) - 1;
5640 if (!lex_match (lexer, T_AND))
5642 lex_error_expecting (lexer, "&");
5645 if (!lex_force_id (lexer))
5648 char *name = ss_xstrdup (lex_tokss (lexer));
5651 if (!lex_force_match (lexer, T_EQUALS)
5652 || !lex_force_match_id (lexer, "EXPR")
5653 || !lex_force_match (lexer, T_LPAREN))
5659 int expr_start = lex_ofs (lexer);
5660 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5661 int expr_end = lex_ofs (lexer) - 1;
5662 if (!expr || !lex_force_match (lexer, T_RPAREN))
5664 ctables_pcexpr_destroy (expr);
5668 int pcompute_end = lex_ofs (lexer) - 1;
5670 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5673 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5676 msg_at (SW, location, _("New definition of &%s will override the "
5677 "previous definition."),
5679 msg_at (SN, pc->location, _("This is the previous definition."));
5681 ctables_pcexpr_destroy (pc->expr);
5682 msg_location_destroy (pc->location);
5687 pc = xmalloc (sizeof *pc);
5688 *pc = (struct ctables_postcompute) { .name = name };
5689 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5690 utf8_hash_case_string (pc->name, 0));
5693 pc->location = location;
5695 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5700 ctables_parse_pproperties_format (struct lexer *lexer,
5701 struct ctables_summary_spec_set *sss)
5703 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5705 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5706 && !(lex_token (lexer) == T_ID
5707 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5708 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5709 lex_tokss (lexer)))))
5711 /* Parse function. */
5712 enum ctables_summary_function function;
5713 enum ctables_weighting weighting;
5714 enum ctables_area_type area;
5715 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5718 /* Parse percentile. */
5719 double percentile = 0;
5720 if (function == CTSF_PTILE)
5722 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5724 percentile = lex_number (lexer);
5729 struct fmt_spec format;
5730 bool is_ctables_format;
5731 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5734 if (sss->n >= sss->allocated)
5735 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5736 sizeof *sss->specs);
5737 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5738 .function = function,
5739 .weighting = weighting,
5742 .percentile = percentile,
5744 .is_ctables_format = is_ctables_format,
5750 ctables_summary_spec_set_uninit (sss);
5755 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5757 struct ctables_postcompute **pcs = NULL;
5759 size_t allocated_pcs = 0;
5761 while (lex_match (lexer, T_AND))
5763 if (!lex_force_id (lexer))
5765 struct ctables_postcompute *pc
5766 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5769 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5774 if (n_pcs >= allocated_pcs)
5775 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5779 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5781 if (lex_match_id (lexer, "LABEL"))
5783 lex_match (lexer, T_EQUALS);
5784 if (!lex_force_string (lexer))
5787 for (size_t i = 0; i < n_pcs; i++)
5789 free (pcs[i]->label);
5790 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5795 else if (lex_match_id (lexer, "FORMAT"))
5797 lex_match (lexer, T_EQUALS);
5799 struct ctables_summary_spec_set sss;
5800 if (!ctables_parse_pproperties_format (lexer, &sss))
5803 for (size_t i = 0; i < n_pcs; i++)
5806 ctables_summary_spec_set_uninit (pcs[i]->specs);
5808 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5809 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5811 ctables_summary_spec_set_uninit (&sss);
5813 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5815 lex_match (lexer, T_EQUALS);
5816 bool hide_source_cats;
5817 if (!parse_bool (lexer, &hide_source_cats))
5819 for (size_t i = 0; i < n_pcs; i++)
5820 pcs[i]->hide_source_cats = hide_source_cats;
5824 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5837 put_strftime (struct string *out, time_t now, const char *format)
5839 const struct tm *tm = localtime (&now);
5841 strftime (value, sizeof value, format, tm);
5842 ds_put_cstr (out, value);
5846 skip_prefix (struct substring *s, struct substring prefix)
5848 if (ss_starts_with (*s, prefix))
5850 ss_advance (s, prefix.length);
5858 put_table_expression (struct string *out, struct lexer *lexer,
5859 struct dictionary *dict, int expr_start, int expr_end)
5862 for (int ofs = expr_start; ofs < expr_end; ofs++)
5864 const struct token *t = lex_ofs_token (lexer, ofs);
5865 if (t->type == T_LBRACK)
5867 else if (t->type == T_RBRACK && nest > 0)
5873 else if (t->type == T_ID)
5875 const struct variable *var
5876 = dict_lookup_var (dict, t->string.string);
5877 const char *label = var ? var_get_label (var) : NULL;
5878 ds_put_cstr (out, label ? label : t->string.string);
5882 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5883 ds_put_byte (out, ' ');
5885 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5886 ds_put_cstr (out, repr);
5889 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5890 ds_put_byte (out, ' ');
5896 put_title_text (struct string *out, struct substring in, time_t now,
5897 struct lexer *lexer, struct dictionary *dict,
5898 int expr_start, int expr_end)
5902 size_t chunk = ss_find_byte (in, ')');
5903 ds_put_substring (out, ss_head (in, chunk));
5904 ss_advance (&in, chunk);
5905 if (ss_is_empty (in))
5908 if (skip_prefix (&in, ss_cstr (")DATE")))
5909 put_strftime (out, now, "%x");
5910 else if (skip_prefix (&in, ss_cstr (")TIME")))
5911 put_strftime (out, now, "%X");
5912 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5913 put_table_expression (out, lexer, dict, expr_start, expr_end);
5916 ds_put_byte (out, ')');
5917 ss_advance (&in, 1);
5923 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5925 struct casereader *input = NULL;
5927 struct measure_guesser *mg = measure_guesser_create (ds);
5930 input = proc_open (ds);
5931 measure_guesser_run (mg, input);
5932 measure_guesser_destroy (mg);
5935 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5936 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5937 enum settings_value_show tvars = settings_get_show_variables ();
5938 for (size_t i = 0; i < n_vars; i++)
5939 vlabels[i] = (enum ctables_vlabel) tvars;
5941 struct pivot_table_look *look = pivot_table_look_unshare (
5942 pivot_table_look_ref (pivot_table_look_get_default ()));
5943 look->omit_empty = false;
5945 struct ctables *ct = xmalloc (sizeof *ct);
5946 *ct = (struct ctables) {
5947 .dict = dataset_dict (ds),
5949 .ctables_formats = FMT_SETTINGS_INIT,
5951 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5954 time_t now = time (NULL);
5959 const char *dot_string;
5960 const char *comma_string;
5962 static const struct ctf ctfs[4] = {
5963 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5964 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5965 { CTEF_PAREN, "-,(,),", "-.(.)." },
5966 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5968 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5969 for (size_t i = 0; i < 4; i++)
5971 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5972 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5973 fmt_number_style_from_string (s));
5976 if (!lex_force_match (lexer, T_SLASH))
5979 while (!lex_match_id (lexer, "TABLE"))
5981 if (lex_match_id (lexer, "FORMAT"))
5983 double widths[2] = { SYSMIS, SYSMIS };
5984 double units_per_inch = 72.0;
5986 while (lex_token (lexer) != T_SLASH)
5988 if (lex_match_id (lexer, "MINCOLWIDTH"))
5990 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5993 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5995 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5998 else if (lex_match_id (lexer, "UNITS"))
6000 lex_match (lexer, T_EQUALS);
6001 if (lex_match_id (lexer, "POINTS"))
6002 units_per_inch = 72.0;
6003 else if (lex_match_id (lexer, "INCHES"))
6004 units_per_inch = 1.0;
6005 else if (lex_match_id (lexer, "CM"))
6006 units_per_inch = 2.54;
6009 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6013 else if (lex_match_id (lexer, "EMPTY"))
6018 lex_match (lexer, T_EQUALS);
6019 if (lex_match_id (lexer, "ZERO"))
6021 /* Nothing to do. */
6023 else if (lex_match_id (lexer, "BLANK"))
6024 ct->zero = xstrdup ("");
6025 else if (lex_force_string (lexer))
6027 ct->zero = ss_xstrdup (lex_tokss (lexer));
6033 else if (lex_match_id (lexer, "MISSING"))
6035 lex_match (lexer, T_EQUALS);
6036 if (!lex_force_string (lexer))
6040 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6041 ? ss_xstrdup (lex_tokss (lexer))
6047 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6048 "UNITS", "EMPTY", "MISSING");
6053 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6054 && widths[0] > widths[1])
6056 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6060 for (size_t i = 0; i < 2; i++)
6061 if (widths[i] != SYSMIS)
6063 int *wr = ct->look->width_ranges[TABLE_HORZ];
6064 wr[i] = widths[i] / units_per_inch * 96.0;
6069 else if (lex_match_id (lexer, "VLABELS"))
6071 if (!lex_force_match_id (lexer, "VARIABLES"))
6073 lex_match (lexer, T_EQUALS);
6075 struct variable **vars;
6077 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6081 if (!lex_force_match_id (lexer, "DISPLAY"))
6086 lex_match (lexer, T_EQUALS);
6088 enum ctables_vlabel vlabel;
6089 if (lex_match_id (lexer, "DEFAULT"))
6090 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6091 else if (lex_match_id (lexer, "NAME"))
6093 else if (lex_match_id (lexer, "LABEL"))
6094 vlabel = CTVL_LABEL;
6095 else if (lex_match_id (lexer, "BOTH"))
6097 else if (lex_match_id (lexer, "NONE"))
6101 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6107 for (size_t i = 0; i < n_vars; i++)
6108 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6111 else if (lex_match_id (lexer, "MRSETS"))
6113 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6115 lex_match (lexer, T_EQUALS);
6116 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6119 else if (lex_match_id (lexer, "SMISSING"))
6121 if (lex_match_id (lexer, "VARIABLE"))
6122 ct->smissing_listwise = false;
6123 else if (lex_match_id (lexer, "LISTWISE"))
6124 ct->smissing_listwise = true;
6127 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6131 else if (lex_match_id (lexer, "PCOMPUTE"))
6133 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6136 else if (lex_match_id (lexer, "PPROPERTIES"))
6138 if (!ctables_parse_pproperties (lexer, ct))
6141 else if (lex_match_id (lexer, "WEIGHT"))
6143 if (!lex_force_match_id (lexer, "VARIABLE"))
6145 lex_match (lexer, T_EQUALS);
6146 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6150 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6152 if (lex_match_id (lexer, "COUNT"))
6154 lex_match (lexer, T_EQUALS);
6155 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6158 ct->hide_threshold = lex_integer (lexer);
6161 else if (ct->hide_threshold == 0)
6162 ct->hide_threshold = 5;
6166 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6167 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6168 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6172 if (!lex_force_match (lexer, T_SLASH))
6176 size_t allocated_tables = 0;
6179 if (ct->n_tables >= allocated_tables)
6180 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6181 sizeof *ct->tables);
6183 struct ctables_category *cat = xmalloc (sizeof *cat);
6184 *cat = (struct ctables_category) {
6186 .include_missing = false,
6187 .sort_ascending = true,
6190 struct ctables_categories *c = xmalloc (sizeof *c);
6191 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6192 *c = (struct ctables_categories) {
6199 struct ctables_categories **categories = xnmalloc (n_vars,
6200 sizeof *categories);
6201 for (size_t i = 0; i < n_vars; i++)
6204 struct ctables_table *t = xmalloc (sizeof *t);
6205 *t = (struct ctables_table) {
6207 .slabels_axis = PIVOT_AXIS_COLUMN,
6208 .slabels_visible = true,
6209 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6211 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6212 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6213 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6215 .clabels_from_axis = PIVOT_AXIS_LAYER,
6216 .clabels_to_axis = PIVOT_AXIS_LAYER,
6217 .categories = categories,
6218 .n_categories = n_vars,
6221 ct->tables[ct->n_tables++] = t;
6223 lex_match (lexer, T_EQUALS);
6224 int expr_start = lex_ofs (lexer);
6225 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6226 &t->axes[PIVOT_AXIS_ROW]))
6228 if (lex_match (lexer, T_BY))
6230 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6231 &t->axes[PIVOT_AXIS_COLUMN]))
6234 if (lex_match (lexer, T_BY))
6236 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6237 &t->axes[PIVOT_AXIS_LAYER]))
6241 int expr_end = lex_ofs (lexer);
6243 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6244 && !t->axes[PIVOT_AXIS_LAYER])
6246 lex_error (lexer, _("At least one variable must be specified."));
6250 const struct ctables_axis *scales[PIVOT_N_AXES];
6251 size_t n_scales = 0;
6252 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6254 scales[a] = find_scale (t->axes[a]);
6260 msg (SE, _("Scale variables may appear only on one axis."));
6261 if (scales[PIVOT_AXIS_ROW])
6262 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6263 _("This scale variable appears on the rows axis."));
6264 if (scales[PIVOT_AXIS_COLUMN])
6265 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6266 _("This scale variable appears on the columns axis."));
6267 if (scales[PIVOT_AXIS_LAYER])
6268 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6269 _("This scale variable appears on the layer axis."));
6273 const struct ctables_axis *summaries[PIVOT_N_AXES];
6274 size_t n_summaries = 0;
6275 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6277 summaries[a] = (scales[a]
6279 : find_categorical_summary_spec (t->axes[a]));
6283 if (n_summaries > 1)
6285 msg (SE, _("Summaries may appear only on one axis."));
6286 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6289 msg_at (SN, summaries[a]->loc,
6291 ? _("This variable on the rows axis has a summary.")
6292 : a == PIVOT_AXIS_COLUMN
6293 ? _("This variable on the columns axis has a summary.")
6294 : _("This variable on the layers axis has a summary."));
6296 msg_at (SN, summaries[a]->loc,
6297 _("This is a scale variable, so it always has a "
6298 "summary even if the syntax does not explicitly "
6303 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6304 if (n_summaries ? summaries[a] : t->axes[a])
6306 t->summary_axis = a;
6310 if (lex_token (lexer) == T_ENDCMD)
6312 if (!ctables_prepare_table (t))
6316 if (!lex_force_match (lexer, T_SLASH))
6319 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6321 if (lex_match_id (lexer, "SLABELS"))
6323 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6325 if (lex_match_id (lexer, "POSITION"))
6327 lex_match (lexer, T_EQUALS);
6328 if (lex_match_id (lexer, "COLUMN"))
6329 t->slabels_axis = PIVOT_AXIS_COLUMN;
6330 else if (lex_match_id (lexer, "ROW"))
6331 t->slabels_axis = PIVOT_AXIS_ROW;
6332 else if (lex_match_id (lexer, "LAYER"))
6333 t->slabels_axis = PIVOT_AXIS_LAYER;
6336 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6340 else if (lex_match_id (lexer, "VISIBLE"))
6342 lex_match (lexer, T_EQUALS);
6343 if (!parse_bool (lexer, &t->slabels_visible))
6348 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6353 else if (lex_match_id (lexer, "CLABELS"))
6355 if (lex_match_id (lexer, "AUTO"))
6357 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6358 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6360 else if (lex_match_id (lexer, "ROWLABELS"))
6362 lex_match (lexer, T_EQUALS);
6363 if (lex_match_id (lexer, "OPPOSITE"))
6364 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6365 else if (lex_match_id (lexer, "LAYER"))
6366 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6369 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6373 else if (lex_match_id (lexer, "COLLABELS"))
6375 lex_match (lexer, T_EQUALS);
6376 if (lex_match_id (lexer, "OPPOSITE"))
6377 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6378 else if (lex_match_id (lexer, "LAYER"))
6379 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6382 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6388 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6393 else if (lex_match_id (lexer, "CRITERIA"))
6395 if (!lex_force_match_id (lexer, "CILEVEL"))
6397 lex_match (lexer, T_EQUALS);
6399 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6401 t->cilevel = lex_number (lexer);
6404 else if (lex_match_id (lexer, "CATEGORIES"))
6406 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6410 else if (lex_match_id (lexer, "TITLES"))
6415 if (lex_match_id (lexer, "CAPTION"))
6416 textp = &t->caption;
6417 else if (lex_match_id (lexer, "CORNER"))
6419 else if (lex_match_id (lexer, "TITLE"))
6423 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6426 lex_match (lexer, T_EQUALS);
6428 struct string s = DS_EMPTY_INITIALIZER;
6429 while (lex_is_string (lexer))
6431 if (!ds_is_empty (&s))
6432 ds_put_byte (&s, ' ');
6433 put_title_text (&s, lex_tokss (lexer), now,
6434 lexer, dataset_dict (ds),
6435 expr_start, expr_end);
6439 *textp = ds_steal_cstr (&s);
6441 while (lex_token (lexer) != T_SLASH
6442 && lex_token (lexer) != T_ENDCMD);
6444 else if (lex_match_id (lexer, "SIGTEST"))
6446 int start_ofs = lex_ofs (lexer) - 1;
6449 t->chisq = xmalloc (sizeof *t->chisq);
6450 *t->chisq = (struct ctables_chisq) {
6452 .include_mrsets = true,
6453 .all_visible = true,
6459 if (lex_match_id (lexer, "TYPE"))
6461 lex_match (lexer, T_EQUALS);
6462 if (!lex_force_match_id (lexer, "CHISQUARE"))
6465 else if (lex_match_id (lexer, "ALPHA"))
6467 lex_match (lexer, T_EQUALS);
6468 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6470 t->chisq->alpha = lex_number (lexer);
6473 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6475 lex_match (lexer, T_EQUALS);
6476 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6479 else if (lex_match_id (lexer, "CATEGORIES"))
6481 lex_match (lexer, T_EQUALS);
6482 if (lex_match_id (lexer, "ALLVISIBLE"))
6483 t->chisq->all_visible = true;
6484 else if (lex_match_id (lexer, "SUBTOTALS"))
6485 t->chisq->all_visible = false;
6488 lex_error_expecting (lexer,
6489 "ALLVISIBLE", "SUBTOTALS");
6495 lex_error_expecting (lexer, "TYPE", "ALPHA",
6496 "INCLUDEMRSETS", "CATEGORIES");
6500 while (lex_token (lexer) != T_SLASH
6501 && lex_token (lexer) != T_ENDCMD);
6503 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6504 _("Support for SIGTEST not yet implemented."));
6507 else if (lex_match_id (lexer, "COMPARETEST"))
6509 int start_ofs = lex_ofs (lexer);
6512 t->pairwise = xmalloc (sizeof *t->pairwise);
6513 *t->pairwise = (struct ctables_pairwise) {
6515 .alpha = { .05, .05 },
6516 .adjust = BONFERRONI,
6517 .include_mrsets = true,
6518 .meansvariance_allcats = true,
6519 .all_visible = true,
6528 if (lex_match_id (lexer, "TYPE"))
6530 lex_match (lexer, T_EQUALS);
6531 if (lex_match_id (lexer, "PROP"))
6532 t->pairwise->type = PROP;
6533 else if (lex_match_id (lexer, "MEAN"))
6534 t->pairwise->type = MEAN;
6537 lex_error_expecting (lexer, "PROP", "MEAN");
6541 else if (lex_match_id (lexer, "ALPHA"))
6543 lex_match (lexer, T_EQUALS);
6545 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6547 double a0 = lex_number (lexer);
6550 lex_match (lexer, T_COMMA);
6551 if (lex_is_number (lexer))
6553 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6555 double a1 = lex_number (lexer);
6558 t->pairwise->alpha[0] = MIN (a0, a1);
6559 t->pairwise->alpha[1] = MAX (a0, a1);
6562 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6564 else if (lex_match_id (lexer, "ADJUST"))
6566 lex_match (lexer, T_EQUALS);
6567 if (lex_match_id (lexer, "BONFERRONI"))
6568 t->pairwise->adjust = BONFERRONI;
6569 else if (lex_match_id (lexer, "BH"))
6570 t->pairwise->adjust = BH;
6571 else if (lex_match_id (lexer, "NONE"))
6572 t->pairwise->adjust = 0;
6575 lex_error_expecting (lexer, "BONFERRONI", "BH",
6580 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6582 lex_match (lexer, T_EQUALS);
6583 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6586 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6588 lex_match (lexer, T_EQUALS);
6589 if (lex_match_id (lexer, "ALLCATS"))
6590 t->pairwise->meansvariance_allcats = true;
6591 else if (lex_match_id (lexer, "TESTEDCATS"))
6592 t->pairwise->meansvariance_allcats = false;
6595 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6599 else if (lex_match_id (lexer, "CATEGORIES"))
6601 lex_match (lexer, T_EQUALS);
6602 if (lex_match_id (lexer, "ALLVISIBLE"))
6603 t->pairwise->all_visible = true;
6604 else if (lex_match_id (lexer, "SUBTOTALS"))
6605 t->pairwise->all_visible = false;
6608 lex_error_expecting (lexer, "ALLVISIBLE",
6613 else if (lex_match_id (lexer, "MERGE"))
6615 lex_match (lexer, T_EQUALS);
6616 if (!parse_bool (lexer, &t->pairwise->merge))
6619 else if (lex_match_id (lexer, "STYLE"))
6621 lex_match (lexer, T_EQUALS);
6622 if (lex_match_id (lexer, "APA"))
6623 t->pairwise->apa_style = true;
6624 else if (lex_match_id (lexer, "SIMPLE"))
6625 t->pairwise->apa_style = false;
6628 lex_error_expecting (lexer, "APA", "SIMPLE");
6632 else if (lex_match_id (lexer, "SHOWSIG"))
6634 lex_match (lexer, T_EQUALS);
6635 if (!parse_bool (lexer, &t->pairwise->show_sig))
6640 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6641 "INCLUDEMRSETS", "MEANSVARIANCE",
6642 "CATEGORIES", "MERGE", "STYLE",
6647 while (lex_token (lexer) != T_SLASH
6648 && lex_token (lexer) != T_ENDCMD);
6650 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6651 _("Support for COMPARETEST not yet implemented."));
6656 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6657 "CRITERIA", "CATEGORIES", "TITLES",
6658 "SIGTEST", "COMPARETEST");
6662 if (!lex_match (lexer, T_SLASH))
6666 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6668 t->clabels_from_axis = PIVOT_AXIS_ROW;
6669 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6671 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6675 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6676 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6677 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6679 if (!ctables_prepare_table (t))
6682 while (lex_token (lexer) != T_ENDCMD);
6685 input = proc_open (ds);
6686 bool ok = ctables_execute (ds, input, ct);
6687 ok = proc_commit (ds) && ok;
6689 ctables_destroy (ct);
6690 return ok ? CMD_SUCCESS : CMD_FAILURE;
6695 ctables_destroy (ct);