1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
58 enum ctables_weighting
66 /* CTABLES table areas. */
68 enum ctables_area_type
70 /* Within a section, where stacked variables divide one section from
73 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
74 parse_ctables_summary_function() parses correctly. */
75 CTAT_TABLE, /* All layers of a whole section. */
76 CTAT_LAYERROW, /* Row in one layer within a section. */
77 CTAT_LAYERCOL, /* Column in one layer within a section. */
78 CTAT_LAYER, /* One layer within a section. */
80 /* Within a subtable, where a subtable pairs an innermost row variable with
81 an innermost column variable within a single layer. */
82 CTAT_SUBTABLE, /* Whole subtable. */
83 CTAT_ROW, /* Row within a subtable. */
84 CTAT_COL, /* Column within a subtable. */
88 static const char *ctables_area_type_name[N_CTATS] = {
89 [CTAT_TABLE] = "TABLE",
90 [CTAT_LAYER] = "LAYER",
91 [CTAT_LAYERROW] = "LAYERROW",
92 [CTAT_LAYERCOL] = "LAYERCOL",
93 [CTAT_SUBTABLE] = "SUBTABLE",
100 struct hmap_node node;
102 const struct ctables_cell *example;
105 double count[N_CTWS];
106 double valid[N_CTWS];
107 double total[N_CTWS];
108 struct ctables_sum *sums;
116 /* CTABLES summary functions. */
118 enum ctables_function_type
120 /* A function that operates on data in a single cell. It operates on
121 effective weights. It does not have an unweighted version. */
124 /* A function that operates on data in a single cell. The function
125 operates on effective weights and has a U-prefixed unweighted
129 /* A function that operates on data in a single cell. It operates on
130 dictionary weights, and has U-prefixed unweighted version and an
131 E-prefixed effective weight version. */
134 /* A function that operates on an area of cells. It operates on effective
135 weights and has a U-prefixed unweighted version. */
146 enum ctables_function_availability
148 CTFA_ALL, /* Any variables. */
149 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
150 //CTFA_MRSETS, /* Only multiple-response sets */
153 enum ctables_summary_function
155 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
156 #include "ctables.inc"
161 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
163 #include "ctables.inc"
167 struct ctables_function_info
169 struct substring basename;
170 enum ctables_function_type type;
171 enum ctables_format format;
172 enum ctables_function_availability availability;
174 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
175 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
176 bool is_area; /* Needs an area prefix. */
178 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
179 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
181 .basename = SS_LITERAL_INITIALIZER (NAME), \
184 .availability = AVAILABILITY, \
185 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
186 .e_prefix = (TYPE) == CTFT_UECELL, \
187 .is_area = (TYPE) == CTFT_AREA \
189 #include "ctables.inc"
193 static struct fmt_spec
194 ctables_summary_default_format (enum ctables_summary_function function,
195 const struct variable *var)
197 static const enum ctables_format default_formats[] = {
198 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
199 #include "ctables.inc"
202 switch (default_formats[function])
205 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
208 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
211 return *var_get_print_format (var);
218 static enum ctables_function_availability
219 ctables_function_availability (enum ctables_summary_function f)
221 static enum ctables_function_availability availability[] = {
222 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
223 #include "ctables.inc"
227 return availability[f];
231 parse_ctables_summary_function (struct lexer *lexer,
232 enum ctables_summary_function *function,
233 enum ctables_weighting *weighting,
234 enum ctables_area_type *area)
236 if (!lex_force_id (lexer))
239 struct substring name = lex_tokss (lexer);
240 if (ss_ends_with_case (name, ss_cstr (".LCL"))
241 || ss_ends_with_case (name, ss_cstr (".UCL"))
242 || ss_ends_with_case (name, ss_cstr (".SE")))
244 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
245 "is not yet implemented."));
249 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
250 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
252 bool has_area = false;
254 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
255 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
260 if (ss_equals_case (name, ss_cstr ("PCT")))
262 /* Special case where .COUNT suffix is omitted. */
263 *function = CTSF_areaPCT_COUNT;
264 *weighting = CTW_EFFECTIVE;
271 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
273 const struct ctables_function_info *cfi = &ctables_function_info[f];
274 if (ss_equals_case (cfi->basename, name))
277 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
280 *weighting = (e ? CTW_EFFECTIVE
282 : cfi->e_prefix ? CTW_DICTIONARY
289 lex_error (lexer, _("Expecting summary function name."));
294 ctables_summary_function_name (enum ctables_summary_function function,
295 enum ctables_weighting weighting,
296 enum ctables_area_type area,
297 char *buffer, size_t bufsize)
299 const struct ctables_function_info *cfi = &ctables_function_info[function];
300 snprintf (buffer, bufsize, "%s%s%s",
301 (weighting == CTW_UNWEIGHTED ? "U"
302 : weighting == CTW_DICTIONARY ? ""
303 : cfi->e_prefix ? "E"
305 cfi->is_area ? ctables_area_type_name[area] : "",
306 cfi->basename.string);
311 ctables_summary_function_label__ (enum ctables_summary_function function,
312 enum ctables_weighting weighting,
313 enum ctables_area_type area)
315 bool w = weighting != CTW_UNWEIGHTED;
316 bool d = weighting == CTW_DICTIONARY;
317 enum ctables_area_type a = area;
321 return (d ? N_("Count")
322 : w ? N_("Adjusted Count")
323 : N_("Unweighted Count"));
325 case CTSF_areaPCT_COUNT:
328 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
329 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
330 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
331 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
332 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
333 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
334 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
338 case CTSF_areaPCT_VALIDN:
341 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
342 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
343 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
344 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
345 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
346 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
347 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
351 case CTSF_areaPCT_TOTALN:
354 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
355 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
356 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
357 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
358 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
359 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
360 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
364 case CTSF_MAXIMUM: return N_("Maximum");
365 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
366 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
367 case CTSF_MINIMUM: return N_("Minimum");
368 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
369 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
370 case CTSF_PTILE: NOT_REACHED ();
371 case CTSF_RANGE: return N_("Range");
372 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
373 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
374 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
375 case CTSF_TOTALN: return (d ? N_("Total N")
376 : w ? N_("Adjusted Total N")
377 : N_("Unweighted Total N"));
378 case CTSF_VALIDN: return (d ? N_("Valid N")
379 : w ? N_("Adjusted Valid N")
380 : N_("Unweighted Valid N"));
381 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
382 case CTSF_areaPCT_SUM:
385 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
386 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
387 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
388 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
389 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
390 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
391 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
398 /* Don't bother translating these: they are for developers only. */
399 case CTAT_TABLE: return "Table ID";
400 case CTAT_LAYER: return "Layer ID";
401 case CTAT_LAYERROW: return "Layer Row ID";
402 case CTAT_LAYERCOL: return "Layer Column ID";
403 case CTAT_SUBTABLE: return "Subtable ID";
404 case CTAT_ROW: return "Row ID";
405 case CTAT_COL: return "Column ID";
413 static struct pivot_value *
414 ctables_summary_function_label (enum ctables_summary_function function,
415 enum ctables_weighting weighting,
416 enum ctables_area_type area,
419 if (function == CTSF_PTILE)
421 char *s = (weighting != CTW_UNWEIGHTED
422 ? xasprintf (_("Percentile %.2f"), percentile)
423 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
424 return pivot_value_new_user_text_nocopy (s);
427 return pivot_value_new_text (ctables_summary_function_label__ (
428 function, weighting, area));
431 /* CTABLES summaries. */
433 struct ctables_summary_spec
435 /* The calculation to be performed.
437 'function' is the function to calculate. 'weighted' specifies whether
438 to use weighted or unweighted data (for functions that do not support a
439 choice, it must be true). 'calc_area' is the area over which the
440 calculation takes place (for functions that target only an individual
441 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
442 percentile between 0 and 100 (for other functions it must be 0). */
443 enum ctables_summary_function function;
444 enum ctables_weighting weighting;
445 enum ctables_area_type calc_area;
446 double percentile; /* CTSF_PTILE only. */
448 /* How to display the result of the calculation.
450 'label' is a user-specified label, NULL if the user didn't specify
453 'user_area' is usually the same as 'calc_area', but when category labels
454 are rotated from one axis to another it swaps rows and columns.
456 'format' is the format for displaying the output. If
457 'is_ctables_format' is true, then 'format.type' is one of the special
458 CTEF_* formats instead of the standard ones. */
460 enum ctables_area_type user_area;
461 struct fmt_spec format;
462 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
469 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
470 const struct ctables_summary_spec *src)
473 dst->label = xstrdup_if_nonnull (src->label);
477 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
483 /* Collections of summary functions. */
485 struct ctables_summary_spec_set
487 struct ctables_summary_spec *specs;
491 /* The variable to which the summary specs are applied. */
492 struct variable *var;
494 /* Whether the variable to which the summary specs are applied is a scale
495 variable for the purpose of summarization.
497 (VALIDN and TOTALN act differently for summarizing scale and categorical
501 /* If any of these optional additional scale variables are missing, then
502 treat 'var' as if it's missing too. This is for implementing
503 SMISSING=LISTWISE. */
504 struct variable **listwise_vars;
505 size_t n_listwise_vars;
509 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
510 const struct ctables_summary_spec_set *src)
512 struct ctables_summary_spec *specs
513 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
514 for (size_t i = 0; i < src->n; i++)
515 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
517 *dst = (struct ctables_summary_spec_set) {
522 .is_scale = src->is_scale,
527 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
529 for (size_t i = 0; i < set->n; i++)
530 ctables_summary_spec_uninit (&set->specs[i]);
531 free (set->listwise_vars);
536 is_listwise_missing (const struct ctables_summary_spec_set *specs,
537 const struct ccase *c)
539 for (size_t i = 0; i < specs->n_listwise_vars; i++)
541 const struct variable *var = specs->listwise_vars[i];
542 if (var_is_num_missing (var, case_num (c, var)))
549 /* CTABLES postcompute expressions. */
551 struct ctables_postcompute
553 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
554 char *name; /* Name, without leading &. */
556 struct msg_location *location; /* Location of definition. */
557 struct ctables_pcexpr *expr;
559 struct ctables_summary_spec_set *specs;
560 bool hide_source_cats;
563 struct ctables_pcexpr
573 enum ctables_pcexpr_op
576 CTPO_CONSTANT, /* 5 */
577 CTPO_CAT_NUMBER, /* [5] */
578 CTPO_CAT_STRING, /* ["STRING"] */
579 CTPO_CAT_NRANGE, /* [LO THRU 5] */
580 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
581 CTPO_CAT_MISSING, /* MISSING */
582 CTPO_CAT_OTHERNM, /* OTHERNM */
583 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
584 CTPO_CAT_TOTAL, /* TOTAL */
598 /* CTPO_CAT_NUMBER. */
601 /* CTPO_CAT_STRING, in dictionary encoding. */
602 struct substring string;
604 /* CTPO_CAT_NRANGE. */
607 /* CTPO_CAT_SRANGE. */
608 struct substring srange[2];
610 /* CTPO_CAT_SUBTOTAL. */
611 size_t subtotal_index;
613 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
614 One element: CTPO_NEG. */
615 struct ctables_pcexpr *subs[2];
618 /* Source location. */
619 struct msg_location *location;
623 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
626 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
627 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
628 struct ctables_pcexpr *sub1);
630 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
631 struct dictionary *);
634 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
640 case CTPO_CAT_STRING:
641 ss_dealloc (&e->string);
644 case CTPO_CAT_SRANGE:
645 for (size_t i = 0; i < 2; i++)
646 ss_dealloc (&e->srange[i]);
655 for (size_t i = 0; i < 2; i++)
656 ctables_pcexpr_destroy (e->subs[i]);
660 case CTPO_CAT_NUMBER:
661 case CTPO_CAT_NRANGE:
662 case CTPO_CAT_MISSING:
663 case CTPO_CAT_OTHERNM:
664 case CTPO_CAT_SUBTOTAL:
669 msg_location_destroy (e->location);
674 static struct ctables_pcexpr *
675 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
676 struct ctables_pcexpr *sub0,
677 struct ctables_pcexpr *sub1)
679 struct ctables_pcexpr *e = xmalloc (sizeof *e);
680 *e = (struct ctables_pcexpr) {
682 .subs = { sub0, sub1 },
683 .location = msg_location_merged (sub0->location, sub1->location),
688 /* How to parse an operator. */
691 enum token_type token;
692 enum ctables_pcexpr_op op;
695 static const struct operator *
696 ctables_pcexpr_match_operator (struct lexer *lexer,
697 const struct operator ops[], size_t n_ops)
699 for (const struct operator *op = ops; op < ops + n_ops; op++)
700 if (lex_token (lexer) == op->token)
702 if (op->token != T_NEG_NUM)
711 static struct ctables_pcexpr *
712 ctables_pcexpr_parse_binary_operators__ (
713 struct lexer *lexer, struct dictionary *dict,
714 const struct operator ops[], size_t n_ops,
715 parse_recursively_func *parse_next_level,
716 const char *chain_warning, struct ctables_pcexpr *lhs)
718 for (int op_count = 0; ; op_count++)
720 const struct operator *op
721 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
724 if (op_count > 1 && chain_warning)
725 msg_at (SW, lhs->location, "%s", chain_warning);
730 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
733 ctables_pcexpr_destroy (lhs);
737 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
741 static struct ctables_pcexpr *
742 ctables_pcexpr_parse_binary_operators (
743 struct lexer *lexer, struct dictionary *dict,
744 const struct operator ops[], size_t n_ops,
745 parse_recursively_func *parse_next_level, const char *chain_warning)
747 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
751 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
756 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
757 struct dictionary *);
759 static struct ctables_pcexpr
760 ctpo_cat_nrange (double low, double high)
762 return (struct ctables_pcexpr) {
763 .op = CTPO_CAT_NRANGE,
764 .nrange = { low, high },
768 static struct ctables_pcexpr
769 ctpo_cat_srange (struct substring low, struct substring high)
771 return (struct ctables_pcexpr) {
772 .op = CTPO_CAT_SRANGE,
773 .srange = { low, high },
777 static struct substring
778 parse_substring (struct lexer *lexer, struct dictionary *dict)
780 struct substring s = recode_substring_pool (
781 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
782 ss_rtrim (&s, ss_cstr (" "));
787 static struct ctables_pcexpr *
788 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
790 int start_ofs = lex_ofs (lexer);
791 struct ctables_pcexpr e;
792 if (lex_is_number (lexer))
794 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
795 .number = lex_number (lexer) };
798 else if (lex_match_id (lexer, "MISSING"))
799 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
800 else if (lex_match_id (lexer, "OTHERNM"))
801 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
802 else if (lex_match_id (lexer, "TOTAL"))
803 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
804 else if (lex_match_id (lexer, "SUBTOTAL"))
806 size_t subtotal_index = 0;
807 if (lex_match (lexer, T_LBRACK))
809 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
811 subtotal_index = lex_integer (lexer);
813 if (!lex_force_match (lexer, T_RBRACK))
816 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
817 .subtotal_index = subtotal_index };
819 else if (lex_match (lexer, T_LBRACK))
821 if (lex_match_id (lexer, "LO"))
823 if (!lex_force_match_id (lexer, "THRU"))
826 if (lex_is_string (lexer))
828 struct substring low = { .string = NULL };
829 struct substring high = parse_substring (lexer, dict);
830 e = ctpo_cat_srange (low, high);
834 if (!lex_force_num (lexer))
836 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
840 else if (lex_is_number (lexer))
842 double number = lex_number (lexer);
844 if (lex_match_id (lexer, "THRU"))
846 if (lex_match_id (lexer, "HI"))
847 e = ctpo_cat_nrange (number, DBL_MAX);
850 if (!lex_force_num (lexer))
852 e = ctpo_cat_nrange (number, lex_number (lexer));
857 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
860 else if (lex_is_string (lexer))
862 struct substring s = parse_substring (lexer, dict);
864 if (lex_match_id (lexer, "THRU"))
866 struct substring high;
868 if (lex_match_id (lexer, "HI"))
869 high = (struct substring) { .string = NULL };
872 if (!lex_force_string (lexer))
877 high = parse_substring (lexer, dict);
880 e = ctpo_cat_srange (s, high);
883 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
887 lex_error (lexer, NULL);
891 if (!lex_force_match (lexer, T_RBRACK))
893 if (e.op == CTPO_CAT_STRING)
894 ss_dealloc (&e.string);
895 else if (e.op == CTPO_CAT_SRANGE)
897 ss_dealloc (&e.srange[0]);
898 ss_dealloc (&e.srange[1]);
903 else if (lex_match (lexer, T_LPAREN))
905 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
908 if (!lex_force_match (lexer, T_RPAREN))
910 ctables_pcexpr_destroy (ep);
917 lex_error (lexer, NULL);
921 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
922 return xmemdup (&e, sizeof e);
925 static struct ctables_pcexpr *
926 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
927 struct lexer *lexer, int start_ofs)
929 struct ctables_pcexpr *e = xmalloc (sizeof *e);
930 *e = (struct ctables_pcexpr) {
933 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
938 static struct ctables_pcexpr *
939 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
941 static const struct operator op = { T_EXP, CTPO_POW };
943 const char *chain_warning =
944 _("The exponentiation operator (`**') is left-associative: "
945 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
946 "To disable this warning, insert parentheses.");
948 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
949 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
950 ctables_pcexpr_parse_primary,
953 /* Special case for situations like "-5**6", which must be parsed as
956 int start_ofs = lex_ofs (lexer);
957 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
958 *lhs = (struct ctables_pcexpr) {
960 .number = -lex_tokval (lexer),
961 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
965 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
967 ctables_pcexpr_parse_primary, chain_warning, lhs);
971 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
974 /* Parses the unary minus level. */
975 static struct ctables_pcexpr *
976 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
978 int start_ofs = lex_ofs (lexer);
979 if (!lex_match (lexer, T_DASH))
980 return ctables_pcexpr_parse_exp (lexer, dict);
982 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
986 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
989 /* Parses the multiplication and division level. */
990 static struct ctables_pcexpr *
991 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
993 static const struct operator ops[] =
995 { T_ASTERISK, CTPO_MUL },
996 { T_SLASH, CTPO_DIV },
999 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
1000 sizeof ops / sizeof *ops,
1001 ctables_pcexpr_parse_neg, NULL);
1004 /* Parses the addition and subtraction level. */
1005 static struct ctables_pcexpr *
1006 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
1008 static const struct operator ops[] =
1010 { T_PLUS, CTPO_ADD },
1011 { T_DASH, CTPO_SUB },
1012 { T_NEG_NUM, CTPO_ADD },
1015 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1016 ops, sizeof ops / sizeof *ops,
1017 ctables_pcexpr_parse_mul, NULL);
1020 /* CTABLES axis expressions. */
1022 /* CTABLES has a number of extra formats that we implement via custom
1023 currency specifications on an alternate fmt_settings. */
1024 #define CTEF_NEGPAREN FMT_CCA
1025 #define CTEF_NEQUAL FMT_CCB
1026 #define CTEF_PAREN FMT_CCC
1027 #define CTEF_PCTPAREN FMT_CCD
1029 enum ctables_summary_variant
1038 enum ctables_axis_op
1054 struct variable *var;
1056 struct ctables_summary_spec_set specs[N_CSVS];
1060 struct ctables_axis *subs[2];
1063 struct msg_location *loc;
1067 ctables_axis_destroy (struct ctables_axis *axis)
1075 for (size_t i = 0; i < N_CSVS; i++)
1076 ctables_summary_spec_set_uninit (&axis->specs[i]);
1081 ctables_axis_destroy (axis->subs[0]);
1082 ctables_axis_destroy (axis->subs[1]);
1085 msg_location_destroy (axis->loc);
1089 static struct ctables_axis *
1090 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1091 struct ctables_axis *sub0,
1092 struct ctables_axis *sub1,
1093 struct lexer *lexer, int start_ofs)
1095 struct ctables_axis *axis = xmalloc (sizeof *axis);
1096 *axis = (struct ctables_axis) {
1098 .subs = { sub0, sub1 },
1099 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1104 struct ctables_axis_parse_ctx
1106 struct lexer *lexer;
1107 struct dictionary *dict;
1110 static struct pivot_value *
1111 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1114 return ctables_summary_function_label (spec->function, spec->weighting,
1115 spec->user_area, spec->percentile);
1118 struct substring in = ss_cstr (spec->label);
1119 struct substring target = ss_cstr (")CILEVEL");
1121 struct string out = DS_EMPTY_INITIALIZER;
1124 size_t chunk = ss_find_substring (in, target);
1125 ds_put_substring (&out, ss_head (in, chunk));
1126 ss_advance (&in, chunk);
1128 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1130 ss_advance (&in, target.length);
1131 ds_put_format (&out, "%g", cilevel);
1137 add_summary_spec (struct ctables_axis *axis,
1138 enum ctables_summary_function function,
1139 enum ctables_weighting weighting,
1140 enum ctables_area_type area, double percentile,
1141 const char *label, const struct fmt_spec *format,
1142 bool is_ctables_format, const struct msg_location *loc,
1143 enum ctables_summary_variant sv)
1145 if (axis->op == CTAO_VAR)
1147 char function_name[128];
1148 ctables_summary_function_name (function, weighting, area,
1149 function_name, sizeof function_name);
1150 const char *var_name = var_get_name (axis->var);
1151 switch (ctables_function_availability (function))
1155 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1156 "response sets."), function_name);
1157 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1163 if (!axis->scale && sv != CSV_TOTAL)
1166 _("Summary function %s applies only to scale variables."),
1168 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1178 struct ctables_summary_spec_set *set = &axis->specs[sv];
1179 if (set->n >= set->allocated)
1180 set->specs = x2nrealloc (set->specs, &set->allocated,
1181 sizeof *set->specs);
1183 struct ctables_summary_spec *dst = &set->specs[set->n++];
1184 *dst = (struct ctables_summary_spec) {
1185 .function = function,
1186 .weighting = weighting,
1189 .percentile = percentile,
1190 .label = xstrdup_if_nonnull (label),
1191 .format = (format ? *format
1192 : ctables_summary_default_format (function, axis->var)),
1193 .is_ctables_format = is_ctables_format,
1199 for (size_t i = 0; i < 2; i++)
1200 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1201 percentile, label, format, is_ctables_format,
1208 static struct ctables_axis *ctables_axis_parse_stack (
1209 struct ctables_axis_parse_ctx *);
1211 static struct ctables_axis *
1212 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1214 if (lex_match (ctx->lexer, T_LPAREN))
1216 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1217 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1219 ctables_axis_destroy (sub);
1225 if (!lex_force_id (ctx->lexer))
1228 if (lex_tokcstr (ctx->lexer)[0] == '$')
1230 lex_error (ctx->lexer,
1231 _("Multiple response set support not implemented."));
1235 int start_ofs = lex_ofs (ctx->lexer);
1236 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1240 struct ctables_axis *axis = xmalloc (sizeof *axis);
1241 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1243 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1244 : lex_match_phrase (ctx->lexer, "[C]") ? false
1245 : var_get_measure (var) == MEASURE_SCALE);
1246 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1247 lex_ofs (ctx->lexer) - 1);
1248 if (axis->scale && var_is_alpha (var))
1250 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1252 var_get_name (var));
1253 ctables_axis_destroy (axis);
1261 has_digit (const char *s)
1263 return s[strcspn (s, "0123456789")] != '\0';
1267 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1268 bool *is_ctables_format)
1270 char type[FMT_TYPE_LEN_MAX + 1];
1271 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1274 if (!strcasecmp (type, "NEGPAREN"))
1275 format->type = CTEF_NEGPAREN;
1276 else if (!strcasecmp (type, "NEQUAL"))
1277 format->type = CTEF_NEQUAL;
1278 else if (!strcasecmp (type, "PAREN"))
1279 format->type = CTEF_PAREN;
1280 else if (!strcasecmp (type, "PCTPAREN"))
1281 format->type = CTEF_PCTPAREN;
1284 *is_ctables_format = false;
1285 return (parse_format_specifier (lexer, format)
1286 && fmt_check_output (format)
1287 && fmt_check_type_compat (format, VAL_NUMERIC));
1293 lex_next_error (lexer, -1, -1,
1294 _("Output format %s requires width 2 or greater."), type);
1297 else if (format->d > format->w - 1)
1299 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1300 "greater than decimals."), type);
1305 *is_ctables_format = true;
1310 static struct ctables_axis *
1311 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1313 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1314 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1317 enum ctables_summary_variant sv = CSV_CELL;
1320 int start_ofs = lex_ofs (ctx->lexer);
1322 /* Parse function. */
1323 enum ctables_summary_function function;
1324 enum ctables_weighting weighting;
1325 enum ctables_area_type area;
1326 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1330 /* Parse percentile. */
1331 double percentile = 0;
1332 if (function == CTSF_PTILE)
1334 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1336 percentile = lex_number (ctx->lexer);
1337 lex_get (ctx->lexer);
1342 if (lex_is_string (ctx->lexer))
1344 label = ss_xstrdup (lex_tokss (ctx->lexer));
1345 lex_get (ctx->lexer);
1349 struct fmt_spec format;
1350 const struct fmt_spec *formatp;
1351 bool is_ctables_format = false;
1352 if (lex_token (ctx->lexer) == T_ID
1353 && has_digit (lex_tokcstr (ctx->lexer)))
1355 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1356 &is_ctables_format))
1366 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1367 lex_ofs (ctx->lexer) - 1);
1368 add_summary_spec (sub, function, weighting, area, percentile, label,
1369 formatp, is_ctables_format, loc, sv);
1371 msg_location_destroy (loc);
1373 lex_match (ctx->lexer, T_COMMA);
1374 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1376 if (!lex_force_match (ctx->lexer, T_LBRACK))
1380 else if (lex_match (ctx->lexer, T_RBRACK))
1382 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1389 ctables_axis_destroy (sub);
1393 static const struct ctables_axis *
1394 find_scale (const struct ctables_axis *axis)
1398 else if (axis->op == CTAO_VAR)
1399 return axis->scale ? axis : NULL;
1402 for (size_t i = 0; i < 2; i++)
1404 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1412 static const struct ctables_axis *
1413 find_categorical_summary_spec (const struct ctables_axis *axis)
1417 else if (axis->op == CTAO_VAR)
1418 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1421 for (size_t i = 0; i < 2; i++)
1423 const struct ctables_axis *sum
1424 = find_categorical_summary_spec (axis->subs[i]);
1432 static struct ctables_axis *
1433 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1435 int start_ofs = lex_ofs (ctx->lexer);
1436 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1440 while (lex_match (ctx->lexer, T_GT))
1442 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1445 ctables_axis_destroy (lhs);
1449 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1450 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1452 const struct ctables_axis *outer_scale = find_scale (lhs);
1453 const struct ctables_axis *inner_scale = find_scale (rhs);
1454 if (outer_scale && inner_scale)
1456 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1457 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1458 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1459 ctables_axis_destroy (nest);
1463 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1466 msg_at (SE, nest->loc,
1467 _("Summaries may only be requested for categorical variables "
1468 "at the innermost nesting level."));
1469 msg_at (SN, outer_sum->loc,
1470 _("This outer categorical variable has a summary."));
1471 ctables_axis_destroy (nest);
1481 static struct ctables_axis *
1482 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1484 int start_ofs = lex_ofs (ctx->lexer);
1485 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1489 while (lex_match (ctx->lexer, T_PLUS))
1491 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1494 ctables_axis_destroy (lhs);
1498 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1499 ctx->lexer, start_ofs);
1506 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1507 struct ctables_axis **axisp)
1510 if (lex_token (lexer) == T_BY
1511 || lex_token (lexer) == T_SLASH
1512 || lex_token (lexer) == T_ENDCMD)
1515 struct ctables_axis_parse_ctx ctx = {
1519 *axisp = ctables_axis_parse_stack (&ctx);
1523 /* CTABLES categories. */
1525 struct ctables_categories
1528 struct ctables_category *cats;
1533 struct ctables_category
1535 enum ctables_category_type
1537 /* Explicit category lists. */
1540 CCT_NRANGE, /* Numerical range. */
1541 CCT_SRANGE, /* String range. */
1546 /* Totals and subtotals. */
1550 /* Implicit category lists. */
1555 /* For contributing to TOTALN. */
1556 CCT_EXCLUDED_MISSING,
1560 struct ctables_category *subtotal;
1566 double number; /* CCT_NUMBER. */
1567 struct substring string; /* CCT_STRING, in dictionary encoding. */
1568 double nrange[2]; /* CCT_NRANGE. */
1569 struct substring srange[2]; /* CCT_SRANGE. */
1573 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1574 bool hide_subcategories; /* CCT_SUBTOTAL. */
1577 /* CCT_POSTCOMPUTE. */
1580 const struct ctables_postcompute *pc;
1581 enum fmt_type parse_format;
1584 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1587 bool include_missing;
1588 bool sort_ascending;
1591 enum ctables_summary_function sort_function;
1592 enum ctables_weighting weighting;
1593 enum ctables_area_type area;
1594 struct variable *sort_var;
1599 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
1600 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
1601 struct msg_location *location;
1605 ctables_category_uninit (struct ctables_category *cat)
1610 msg_location_destroy (cat->location);
1617 case CCT_POSTCOMPUTE:
1621 ss_dealloc (&cat->string);
1625 ss_dealloc (&cat->srange[0]);
1626 ss_dealloc (&cat->srange[1]);
1631 free (cat->total_label);
1639 case CCT_EXCLUDED_MISSING:
1645 nullable_substring_equal (const struct substring *a,
1646 const struct substring *b)
1648 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1652 ctables_category_equal (const struct ctables_category *a,
1653 const struct ctables_category *b)
1655 if (a->type != b->type)
1661 return a->number == b->number;
1664 return ss_equals (a->string, b->string);
1667 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1670 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1671 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1677 case CCT_POSTCOMPUTE:
1678 return a->pc == b->pc;
1682 return !strcmp (a->total_label, b->total_label);
1687 return (a->include_missing == b->include_missing
1688 && a->sort_ascending == b->sort_ascending
1689 && a->sort_function == b->sort_function
1690 && a->sort_var == b->sort_var
1691 && a->percentile == b->percentile);
1693 case CCT_EXCLUDED_MISSING:
1701 ctables_categories_unref (struct ctables_categories *c)
1706 assert (c->n_refs > 0);
1710 for (size_t i = 0; i < c->n_cats; i++)
1711 ctables_category_uninit (&c->cats[i]);
1717 ctables_categories_equal (const struct ctables_categories *a,
1718 const struct ctables_categories *b)
1720 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
1723 for (size_t i = 0; i < a->n_cats; i++)
1724 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1730 static struct ctables_category
1731 cct_nrange (double low, double high)
1733 return (struct ctables_category) {
1735 .nrange = { low, high }
1739 static struct ctables_category
1740 cct_srange (struct substring low, struct substring high)
1742 return (struct ctables_category) {
1744 .srange = { low, high }
1749 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1750 struct ctables_category *cat)
1753 if (lex_match (lexer, T_EQUALS))
1755 if (!lex_force_string (lexer))
1758 total_label = ss_xstrdup (lex_tokss (lexer));
1762 total_label = xstrdup (_("Subtotal"));
1764 *cat = (struct ctables_category) {
1765 .type = CCT_SUBTOTAL,
1766 .hide_subcategories = hide_subcategories,
1767 .total_label = total_label
1773 ctables_table_parse_explicit_category (struct lexer *lexer,
1774 struct dictionary *dict,
1776 struct ctables_category *cat)
1778 if (lex_match_id (lexer, "OTHERNM"))
1779 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1780 else if (lex_match_id (lexer, "MISSING"))
1781 *cat = (struct ctables_category) { .type = CCT_MISSING };
1782 else if (lex_match_id (lexer, "SUBTOTAL"))
1783 return ctables_table_parse_subtotal (lexer, false, cat);
1784 else if (lex_match_id (lexer, "HSUBTOTAL"))
1785 return ctables_table_parse_subtotal (lexer, true, cat);
1786 else if (lex_match_id (lexer, "LO"))
1788 if (!lex_force_match_id (lexer, "THRU"))
1790 if (lex_is_string (lexer))
1792 struct substring sr0 = { .string = NULL };
1793 struct substring sr1 = parse_substring (lexer, dict);
1794 *cat = cct_srange (sr0, sr1);
1796 else if (lex_force_num (lexer))
1798 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1804 else if (lex_is_number (lexer))
1806 double number = lex_number (lexer);
1808 if (lex_match_id (lexer, "THRU"))
1810 if (lex_match_id (lexer, "HI"))
1811 *cat = cct_nrange (number, DBL_MAX);
1814 if (!lex_force_num (lexer))
1816 *cat = cct_nrange (number, lex_number (lexer));
1821 *cat = (struct ctables_category) {
1826 else if (lex_is_string (lexer))
1828 struct substring s = parse_substring (lexer, dict);
1829 if (lex_match_id (lexer, "THRU"))
1831 if (lex_match_id (lexer, "HI"))
1833 struct substring sr1 = { .string = NULL };
1834 *cat = cct_srange (s, sr1);
1838 if (!lex_force_string (lexer))
1843 struct substring sr1 = parse_substring (lexer, dict);
1844 *cat = cct_srange (s, sr1);
1848 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1850 else if (lex_match (lexer, T_AND))
1852 if (!lex_force_id (lexer))
1854 struct ctables_postcompute *pc = ctables_find_postcompute (
1855 ct, lex_tokcstr (lexer));
1858 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1859 msg_at (SE, loc, _("Unknown postcompute &%s."),
1860 lex_tokcstr (lexer));
1861 msg_location_destroy (loc);
1866 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1870 lex_error (lexer, NULL);
1878 parse_category_string (struct msg_location *location,
1879 struct substring s, const struct dictionary *dict,
1880 enum fmt_type format, double *n)
1883 char *error = data_in (s, dict_get_encoding (dict), format,
1884 settings_get_fmt_settings (), &v, 0, NULL);
1887 msg_at (SE, location,
1888 _("Failed to parse category specification as format %s: %s."),
1889 fmt_name (format), error);
1898 static struct ctables_category *
1899 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1900 const struct ctables_pcexpr *e)
1902 struct ctables_category *best = NULL;
1903 size_t n_subtotals = 0;
1904 for (size_t i = 0; i < cats->n_cats; i++)
1906 struct ctables_category *cat = &cats->cats[i];
1909 case CTPO_CAT_NUMBER:
1910 if (cat->type == CCT_NUMBER && cat->number == e->number)
1914 case CTPO_CAT_STRING:
1915 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1919 case CTPO_CAT_NRANGE:
1920 if (cat->type == CCT_NRANGE
1921 && cat->nrange[0] == e->nrange[0]
1922 && cat->nrange[1] == e->nrange[1])
1926 case CTPO_CAT_SRANGE:
1927 if (cat->type == CCT_SRANGE
1928 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1929 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1933 case CTPO_CAT_MISSING:
1934 if (cat->type == CCT_MISSING)
1938 case CTPO_CAT_OTHERNM:
1939 if (cat->type == CCT_OTHERNM)
1943 case CTPO_CAT_SUBTOTAL:
1944 if (cat->type == CCT_SUBTOTAL)
1947 if (e->subtotal_index == n_subtotals)
1949 else if (e->subtotal_index == 0)
1954 case CTPO_CAT_TOTAL:
1955 if (cat->type == CCT_TOTAL)
1969 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1974 static struct ctables_category *
1975 ctables_find_category_for_postcompute (const struct dictionary *dict,
1976 const struct ctables_categories *cats,
1977 enum fmt_type parse_format,
1978 const struct ctables_pcexpr *e)
1980 if (parse_format != FMT_F)
1982 if (e->op == CTPO_CAT_STRING)
1985 if (!parse_category_string (e->location, e->string, dict,
1986 parse_format, &number))
1989 struct ctables_pcexpr e2 = {
1990 .op = CTPO_CAT_NUMBER,
1992 .location = e->location,
1994 return ctables_find_category_for_postcompute__ (cats, &e2);
1996 else if (e->op == CTPO_CAT_SRANGE)
1999 if (!e->srange[0].string)
2000 nrange[0] = -DBL_MAX;
2001 else if (!parse_category_string (e->location, e->srange[0], dict,
2002 parse_format, &nrange[0]))
2005 if (!e->srange[1].string)
2006 nrange[1] = DBL_MAX;
2007 else if (!parse_category_string (e->location, e->srange[1], dict,
2008 parse_format, &nrange[1]))
2011 struct ctables_pcexpr e2 = {
2012 .op = CTPO_CAT_NRANGE,
2013 .nrange = { nrange[0], nrange[1] },
2014 .location = e->location,
2016 return ctables_find_category_for_postcompute__ (cats, &e2);
2019 return ctables_find_category_for_postcompute__ (cats, e);
2022 static struct substring
2023 rtrim_value (const union value *v, const struct variable *var)
2025 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2026 var_get_width (var));
2027 ss_rtrim (&s, ss_cstr (" "));
2032 in_string_range (const union value *v, const struct variable *var,
2033 const struct substring *srange)
2035 struct substring s = rtrim_value (v, var);
2036 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2037 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2040 static const struct ctables_category *
2041 ctables_categories_match (const struct ctables_categories *c,
2042 const union value *v, const struct variable *var)
2044 if (var_is_numeric (var) && v->f == SYSMIS)
2047 const struct ctables_category *othernm = NULL;
2048 for (size_t i = c->n_cats; i-- > 0; )
2050 const struct ctables_category *cat = &c->cats[i];
2054 if (cat->number == v->f)
2059 if (ss_equals (cat->string, rtrim_value (v, var)))
2064 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2065 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2070 if (in_string_range (v, var, cat->srange))
2075 if (var_is_value_missing (var, v))
2079 case CCT_POSTCOMPUTE:
2094 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2097 case CCT_EXCLUDED_MISSING:
2102 return var_is_value_missing (var, v) ? NULL : othernm;
2105 static const struct ctables_category *
2106 ctables_categories_total (const struct ctables_categories *c)
2108 const struct ctables_category *first = &c->cats[0];
2109 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2110 return (first->type == CCT_TOTAL ? first
2111 : last->type == CCT_TOTAL ? last
2116 ctables_category_format_number (double number, const struct variable *var,
2119 struct pivot_value *pv = pivot_value_new_var_value (
2120 var, &(union value) { .f = number });
2121 pivot_value_format (pv, NULL, s);
2122 pivot_value_destroy (pv);
2126 ctables_category_format_string (struct substring string,
2127 const struct variable *var, struct string *out)
2129 int width = var_get_width (var);
2130 char *s = xmalloc (width);
2131 buf_copy_rpad (s, width, string.string, string.length, ' ');
2132 struct pivot_value *pv = pivot_value_new_var_value (
2133 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
2134 pivot_value_format (pv, NULL, out);
2135 pivot_value_destroy (pv);
2140 ctables_category_format_label (const struct ctables_category *cat,
2141 const struct variable *var,
2147 ctables_category_format_number (cat->number, var, s);
2151 ctables_category_format_string (cat->string, var, s);
2155 ctables_category_format_number (cat->nrange[0], var, s);
2156 ds_put_format (s, " THRU ");
2157 ctables_category_format_number (cat->nrange[1], var, s);
2161 ctables_category_format_string (cat->srange[0], var, s);
2162 ds_put_format (s, " THRU ");
2163 ctables_category_format_string (cat->srange[1], var, s);
2167 ds_put_cstr (s, "MISSING");
2171 ds_put_cstr (s, "OTHERNM");
2174 case CCT_POSTCOMPUTE:
2175 ds_put_format (s, "&%s", cat->pc->name);
2180 ds_put_cstr (s, cat->total_label);
2186 case CCT_EXCLUDED_MISSING:
2194 ctables_recursive_check_postcompute (struct dictionary *dict,
2195 const struct ctables_pcexpr *e,
2196 struct ctables_category *pc_cat,
2197 const struct ctables_categories *cats,
2198 const struct msg_location *cats_location)
2202 case CTPO_CAT_NUMBER:
2203 case CTPO_CAT_STRING:
2204 case CTPO_CAT_NRANGE:
2205 case CTPO_CAT_SRANGE:
2206 case CTPO_CAT_MISSING:
2207 case CTPO_CAT_OTHERNM:
2208 case CTPO_CAT_SUBTOTAL:
2209 case CTPO_CAT_TOTAL:
2211 struct ctables_category *cat = ctables_find_category_for_postcompute (
2212 dict, cats, pc_cat->parse_format, e);
2215 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
2217 size_t n_subtotals = 0;
2218 for (size_t i = 0; i < cats->n_cats; i++)
2219 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
2220 if (n_subtotals > 1)
2222 msg_at (SE, cats_location,
2223 ngettext ("These categories include %zu instance "
2224 "of SUBTOTAL or HSUBTOTAL, so references "
2225 "from computed categories must refer to "
2226 "subtotals by position, "
2227 "e.g. SUBTOTAL[1].",
2228 "These categories include %zu instances "
2229 "of SUBTOTAL or HSUBTOTAL, so references "
2230 "from computed categories must refer to "
2231 "subtotals by position, "
2232 "e.g. SUBTOTAL[1].",
2235 msg_at (SN, e->location,
2236 _("This is the reference that lacks a position."));
2241 msg_at (SE, pc_cat->location,
2242 _("Computed category &%s references a category not included "
2243 "in the category list."),
2245 msg_at (SN, e->location, _("This is the missing category."));
2246 if (e->op == CTPO_CAT_SUBTOTAL)
2247 msg_at (SN, cats_location,
2248 _("To fix the problem, add subtotals to the "
2249 "list of categories here."));
2250 else if (e->op == CTPO_CAT_TOTAL)
2251 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
2252 "CATEGORIES specification."));
2254 msg_at (SN, cats_location,
2255 _("To fix the problem, add the missing category to the "
2256 "list of categories here."));
2259 if (pc_cat->pc->hide_source_cats)
2273 for (size_t i = 0; i < 2; i++)
2274 if (e->subs[i] && !ctables_recursive_check_postcompute (
2275 dict, e->subs[i], pc_cat, cats, cats_location))
2283 static struct pivot_value *
2284 ctables_postcompute_label (const struct ctables_categories *cats,
2285 const struct ctables_category *cat,
2286 const struct variable *var)
2288 struct substring in = ss_cstr (cat->pc->label);
2289 struct substring target = ss_cstr (")LABEL[");
2291 struct string out = DS_EMPTY_INITIALIZER;
2294 size_t chunk = ss_find_substring (in, target);
2295 if (chunk == SIZE_MAX)
2297 if (ds_is_empty (&out))
2298 return pivot_value_new_user_text (in.string, in.length);
2301 ds_put_substring (&out, in);
2302 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
2306 ds_put_substring (&out, ss_head (in, chunk));
2307 ss_advance (&in, chunk + target.length);
2309 struct substring idx_s;
2310 if (!ss_get_until (&in, ']', &idx_s))
2313 long int idx = strtol (idx_s.string, &tail, 10);
2314 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
2317 struct ctables_category *cat2 = &cats->cats[idx - 1];
2318 if (!ctables_category_format_label (cat2, var, &out))
2324 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
2327 static struct pivot_value *
2328 ctables_category_create_value_label (const struct ctables_categories *cats,
2329 const struct ctables_category *cat,
2330 const struct variable *var,
2331 const union value *value)
2333 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
2334 ? ctables_postcompute_label (cats, cat, var)
2335 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
2336 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
2337 : pivot_value_new_var_value (var, value));
2340 /* CTABLES variable nesting and stacking. */
2342 /* A nested sequence of variables, e.g. a > b > c. */
2345 struct variable **vars;
2349 size_t *areas[N_CTATS];
2350 size_t n_areas[N_CTATS];
2353 struct ctables_summary_spec_set specs[N_CSVS];
2356 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2357 struct ctables_stack
2359 struct ctables_nest *nests;
2364 ctables_nest_uninit (struct ctables_nest *nest)
2367 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2368 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2369 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2370 free (nest->areas[at]);
2374 ctables_stack_uninit (struct ctables_stack *stack)
2378 for (size_t i = 0; i < stack->n; i++)
2379 ctables_nest_uninit (&stack->nests[i]);
2380 free (stack->nests);
2384 static struct ctables_stack
2385 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2392 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2393 for (size_t i = 0; i < s0.n; i++)
2394 for (size_t j = 0; j < s1.n; j++)
2396 const struct ctables_nest *a = &s0.nests[i];
2397 const struct ctables_nest *b = &s1.nests[j];
2399 size_t allocate = a->n + b->n;
2400 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2402 for (size_t k = 0; k < a->n; k++)
2403 vars[n++] = a->vars[k];
2404 for (size_t k = 0; k < b->n; k++)
2405 vars[n++] = b->vars[k];
2406 assert (n == allocate);
2408 const struct ctables_nest *summary_src;
2409 if (!a->specs[CSV_CELL].var)
2411 else if (!b->specs[CSV_CELL].var)
2416 struct ctables_nest *new = &stack.nests[stack.n++];
2417 *new = (struct ctables_nest) {
2419 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2420 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2422 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2423 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2427 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2428 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2430 ctables_stack_uninit (&s0);
2431 ctables_stack_uninit (&s1);
2435 static struct ctables_stack
2436 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2438 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2439 for (size_t i = 0; i < s0.n; i++)
2440 stack.nests[stack.n++] = s0.nests[i];
2441 for (size_t i = 0; i < s1.n; i++)
2443 stack.nests[stack.n] = s1.nests[i];
2444 stack.nests[stack.n].group_head += s0.n;
2447 assert (stack.n == s0.n + s1.n);
2453 static struct ctables_stack
2454 var_fts (const struct ctables_axis *a)
2456 struct variable **vars = xmalloc (sizeof *vars);
2459 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2460 struct ctables_nest *nest = xmalloc (sizeof *nest);
2461 *nest = (struct ctables_nest) {
2464 .scale_idx = a->scale ? 0 : SIZE_MAX,
2465 .summary_idx = is_summary ? 0 : SIZE_MAX,
2468 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2470 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2471 nest->specs[sv].var = a->var;
2472 nest->specs[sv].is_scale = a->scale;
2474 return (struct ctables_stack) { .nests = nest, .n = 1 };
2477 static struct ctables_stack
2478 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2481 return (struct ctables_stack) { .n = 0 };
2489 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2490 enumerate_fts (axis_type, a->subs[1]));
2493 /* This should consider any of the scale variables found in the result to
2494 be linked to each other listwise for SMISSING=LISTWISE. */
2495 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2496 enumerate_fts (axis_type, a->subs[1]));
2502 /* CTABLES summary calculation. */
2504 union ctables_summary
2506 /* COUNT, VALIDN, TOTALN. */
2509 /* MINIMUM, MAXIMUM, RANGE. */
2516 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2517 struct moments1 *moments;
2519 /* MEDIAN, MODE, PTILE. */
2522 struct casewriter *writer;
2529 ctables_summary_init (union ctables_summary *s,
2530 const struct ctables_summary_spec *ss)
2532 switch (ss->function)
2535 case CTSF_areaPCT_COUNT:
2536 case CTSF_areaPCT_VALIDN:
2537 case CTSF_areaPCT_TOTALN:
2550 s->min = s->max = SYSMIS;
2555 case CTSF_areaPCT_SUM:
2556 s->moments = moments1_create (MOMENT_MEAN);
2562 s->moments = moments1_create (MOMENT_VARIANCE);
2569 struct caseproto *proto = caseproto_create ();
2570 proto = caseproto_add_width (proto, 0);
2571 proto = caseproto_add_width (proto, 0);
2573 struct subcase ordering;
2574 subcase_init (&ordering, 0, 0, SC_ASCEND);
2575 s->writer = sort_create_writer (&ordering, proto);
2576 subcase_uninit (&ordering);
2577 caseproto_unref (proto);
2587 ctables_summary_uninit (union ctables_summary *s,
2588 const struct ctables_summary_spec *ss)
2590 switch (ss->function)
2593 case CTSF_areaPCT_COUNT:
2594 case CTSF_areaPCT_VALIDN:
2595 case CTSF_areaPCT_TOTALN:
2614 case CTSF_areaPCT_SUM:
2615 moments1_destroy (s->moments);
2621 casewriter_destroy (s->writer);
2627 ctables_summary_add (union ctables_summary *s,
2628 const struct ctables_summary_spec *ss,
2629 const union value *value,
2630 bool is_missing, bool is_included,
2633 /* To determine whether a case is included in a given table for a particular
2634 kind of summary, consider the following charts for the variable being
2635 summarized. Only if "yes" appears is the case counted.
2637 Categorical variables: VALIDN other TOTALN
2638 Valid values in included categories yes yes yes
2639 Missing values in included categories --- yes yes
2640 Missing values in excluded categories --- --- yes
2641 Valid values in excluded categories --- --- ---
2643 Scale variables: VALIDN other TOTALN
2644 Valid value yes yes yes
2645 Missing value --- yes yes
2647 Missing values include both user- and system-missing. (The system-missing
2648 value is always in an excluded category.)
2650 One way to interpret the above table is that scale variables are like
2651 categorical variables in which all values are in included categories.
2653 switch (ss->function)
2656 case CTSF_areaPCT_TOTALN:
2661 case CTSF_areaPCT_COUNT:
2667 case CTSF_areaPCT_VALIDN:
2685 if (s->min == SYSMIS || value->f < s->min)
2687 if (s->max == SYSMIS || value->f > s->max)
2698 moments1_add (s->moments, value->f, weight);
2701 case CTSF_areaPCT_SUM:
2703 moments1_add (s->moments, value->f, weight);
2711 s->ovalid += weight;
2713 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2714 *case_num_rw_idx (c, 0) = value->f;
2715 *case_num_rw_idx (c, 1) = weight;
2716 casewriter_write (s->writer, c);
2723 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2724 union ctables_summary *s,
2725 const struct ctables_summary_spec *ss)
2727 switch (ss->function)
2733 return areas[ss->calc_area]->sequence;
2735 case CTSF_areaPCT_COUNT:
2737 const struct ctables_area *a = areas[ss->calc_area];
2738 double a_count = a->count[ss->weighting];
2739 return a_count ? s->count / a_count * 100 : SYSMIS;
2742 case CTSF_areaPCT_VALIDN:
2744 const struct ctables_area *a = areas[ss->calc_area];
2745 double a_valid = a->valid[ss->weighting];
2746 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2749 case CTSF_areaPCT_TOTALN:
2751 const struct ctables_area *a = areas[ss->calc_area];
2752 double a_total = a->total[ss->weighting];
2753 return a_total ? s->count / a_total * 100 : SYSMIS;
2768 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2773 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2779 double weight, variance;
2780 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2781 return calc_semean (variance, weight);
2787 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2788 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2793 double weight, mean;
2794 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2795 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2801 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2805 case CTSF_areaPCT_SUM:
2807 double weight, mean;
2808 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2809 if (weight == SYSMIS || mean == SYSMIS)
2812 const struct ctables_area *a = areas[ss->calc_area];
2813 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2814 double denom = sum->sum[ss->weighting];
2815 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2822 struct casereader *reader = casewriter_make_reader (s->writer);
2825 struct percentile *ptile = percentile_create (
2826 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2827 struct order_stats *os = &ptile->parent;
2828 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2829 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2830 statistic_destroy (&ptile->parent.parent);
2837 struct casereader *reader = casewriter_make_reader (s->writer);
2840 struct mode *mode = mode_create ();
2841 struct order_stats *os = &mode->parent;
2842 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2843 s->ovalue = mode->mode;
2844 statistic_destroy (&mode->parent.parent);
2852 /* CTABLES occurrences. */
2854 struct ctables_occurrence
2856 struct hmap_node node;
2861 ctables_add_occurrence (const struct variable *var,
2862 const union value *value,
2863 struct hmap *occurrences)
2865 int width = var_get_width (var);
2866 unsigned int hash = value_hash (value, width, 0);
2868 struct ctables_occurrence *o;
2869 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
2871 if (value_equal (value, &o->value, width))
2874 o = xmalloc (sizeof *o);
2875 value_clone (&o->value, value, width);
2876 hmap_insert (occurrences, &o->node, hash);
2881 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2882 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2883 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2884 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2889 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2890 all the axes (except the scalar variable, if any). */
2891 struct hmap_node node;
2893 /* The areas that contain this cell. */
2894 uint32_t omit_areas;
2895 struct ctables_area *areas[N_CTATS];
2900 enum ctables_summary_variant sv;
2902 struct ctables_cell_axis
2904 struct ctables_cell_value
2906 const struct ctables_category *category;
2914 union ctables_summary *summaries;
2917 struct ctables_section
2920 struct ctables_table *table;
2921 struct ctables_nest *nests[PIVOT_N_AXES];
2924 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2925 struct hmap cells; /* Contains "struct ctables_cell"s. */
2926 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2929 static void ctables_section_uninit (struct ctables_section *);
2931 struct ctables_table
2933 struct ctables *ctables;
2934 struct ctables_axis *axes[PIVOT_N_AXES];
2935 struct ctables_stack stacks[PIVOT_N_AXES];
2936 struct ctables_section *sections;
2938 enum pivot_axis_type summary_axis;
2939 struct ctables_summary_spec_set summary_specs;
2940 struct variable **sum_vars;
2943 enum pivot_axis_type slabels_axis;
2944 bool slabels_visible;
2946 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2948 Most commonly, label_axis[a] == a, and in particular we always have
2949 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2951 If ROWLABELS or COLLABELS is specified, then one of
2952 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2953 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2955 If any category labels are moved, then 'clabels_example' is one of the
2956 variables being moved (and it is otherwise NULL). All of the variables
2957 being moved have the same width, value labels, and categories, so this
2958 example variable can be used to find those out.
2960 The remaining members in this group are relevant only if category labels
2963 'clabels_values_map' holds a "struct ctables_value" for all the values
2964 that appear in all of the variables in the moved categories. It is
2965 accumulated as the data is read. Once the data is fully read, its
2966 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2968 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2969 enum pivot_axis_type clabels_from_axis;
2970 enum pivot_axis_type clabels_to_axis;
2971 const struct variable *clabels_example;
2972 struct hmap clabels_values_map;
2973 struct ctables_value **clabels_values;
2974 size_t n_clabels_values;
2976 /* Indexed by variable dictionary index. */
2977 struct ctables_categories **categories;
2978 size_t n_categories;
2986 struct ctables_chisq *chisq;
2987 struct ctables_pairwise *pairwise;
2990 struct ctables_cell_sort_aux
2992 const struct ctables_nest *nest;
2993 enum pivot_axis_type a;
2997 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2999 const struct ctables_cell_sort_aux *aux = aux_;
3000 struct ctables_cell *const *ap = a_;
3001 struct ctables_cell *const *bp = b_;
3002 const struct ctables_cell *a = *ap;
3003 const struct ctables_cell *b = *bp;
3005 const struct ctables_nest *nest = aux->nest;
3006 for (size_t i = 0; i < nest->n; i++)
3007 if (i != nest->scale_idx)
3009 const struct variable *var = nest->vars[i];
3010 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3011 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3012 if (a_cv->category != b_cv->category)
3013 return a_cv->category > b_cv->category ? 1 : -1;
3015 const union value *a_val = &a_cv->value;
3016 const union value *b_val = &b_cv->value;
3017 switch (a_cv->category->type)
3023 case CCT_POSTCOMPUTE:
3024 case CCT_EXCLUDED_MISSING:
3025 /* Must be equal. */
3033 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3041 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3043 return a_cv->category->sort_ascending ? cmp : -cmp;
3049 const char *a_label = var_lookup_value_label (var, a_val);
3050 const char *b_label = var_lookup_value_label (var, b_val);
3056 cmp = strcmp (a_label, b_label);
3062 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3065 return a_cv->category->sort_ascending ? cmp : -cmp;
3077 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3078 const void *aux UNUSED)
3080 struct ctables_cell *const *ap = a_;
3081 struct ctables_cell *const *bp = b_;
3082 const struct ctables_cell *a = *ap;
3083 const struct ctables_cell *b = *bp;
3085 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3087 int al = a->axes[axis].leaf;
3088 int bl = b->axes[axis].leaf;
3090 return al > bl ? 1 : -1;
3095 static struct ctables_area *
3096 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3097 enum ctables_area_type area)
3100 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3102 const struct ctables_nest *nest = s->nests[a];
3103 for (size_t i = 0; i < nest->n_areas[area]; i++)
3105 size_t v_idx = nest->areas[area][i];
3106 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3107 hash = hash_pointer (cv->category, hash);
3108 if (cv->category->type != CCT_TOTAL
3109 && cv->category->type != CCT_SUBTOTAL
3110 && cv->category->type != CCT_POSTCOMPUTE)
3111 hash = value_hash (&cv->value,
3112 var_get_width (nest->vars[v_idx]), hash);
3116 struct ctables_area *a;
3117 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3119 const struct ctables_cell *df = a->example;
3120 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3122 const struct ctables_nest *nest = s->nests[a];
3123 for (size_t i = 0; i < nest->n_areas[area]; i++)
3125 size_t v_idx = nest->areas[area][i];
3126 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3127 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3128 if (cv1->category != cv2->category
3129 || (cv1->category->type != CCT_TOTAL
3130 && cv1->category->type != CCT_SUBTOTAL
3131 && cv1->category->type != CCT_POSTCOMPUTE
3132 && !value_equal (&cv1->value, &cv2->value,
3133 var_get_width (nest->vars[v_idx]))))
3142 struct ctables_sum *sums = (s->table->n_sum_vars
3143 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3146 a = xmalloc (sizeof *a);
3147 *a = (struct ctables_area) { .example = cell, .sums = sums };
3148 hmap_insert (&s->areas[area], &a->node, hash);
3152 static struct ctables_cell *
3153 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3154 const struct ctables_category **cats[PIVOT_N_AXES])
3157 enum ctables_summary_variant sv = CSV_CELL;
3158 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3160 const struct ctables_nest *nest = s->nests[a];
3161 for (size_t i = 0; i < nest->n; i++)
3162 if (i != nest->scale_idx)
3164 hash = hash_pointer (cats[a][i], hash);
3165 if (cats[a][i]->type != CCT_TOTAL
3166 && cats[a][i]->type != CCT_SUBTOTAL
3167 && cats[a][i]->type != CCT_POSTCOMPUTE)
3168 hash = value_hash (case_data (c, nest->vars[i]),
3169 var_get_width (nest->vars[i]), hash);
3175 struct ctables_cell *cell;
3176 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3178 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3180 const struct ctables_nest *nest = s->nests[a];
3181 for (size_t i = 0; i < nest->n; i++)
3182 if (i != nest->scale_idx
3183 && (cats[a][i] != cell->axes[a].cvs[i].category
3184 || (cats[a][i]->type != CCT_TOTAL
3185 && cats[a][i]->type != CCT_SUBTOTAL
3186 && cats[a][i]->type != CCT_POSTCOMPUTE
3187 && !value_equal (case_data (c, nest->vars[i]),
3188 &cell->axes[a].cvs[i].value,
3189 var_get_width (nest->vars[i])))))
3198 cell = xmalloc (sizeof *cell);
3201 cell->omit_areas = 0;
3202 cell->postcompute = false;
3203 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3205 const struct ctables_nest *nest = s->nests[a];
3206 cell->axes[a].cvs = (nest->n
3207 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3209 for (size_t i = 0; i < nest->n; i++)
3211 const struct ctables_category *cat = cats[a][i];
3212 const struct variable *var = nest->vars[i];
3213 const union value *value = case_data (c, var);
3214 if (i != nest->scale_idx)
3216 const struct ctables_category *subtotal = cat->subtotal;
3217 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3220 if (cat->type == CCT_TOTAL
3221 || cat->type == CCT_SUBTOTAL
3222 || cat->type == CCT_POSTCOMPUTE)
3226 case PIVOT_AXIS_COLUMN:
3227 cell->omit_areas |= ((1u << CTAT_TABLE) |
3228 (1u << CTAT_LAYER) |
3229 (1u << CTAT_LAYERCOL) |
3230 (1u << CTAT_SUBTABLE) |
3233 case PIVOT_AXIS_ROW:
3234 cell->omit_areas |= ((1u << CTAT_TABLE) |
3235 (1u << CTAT_LAYER) |
3236 (1u << CTAT_LAYERROW) |
3237 (1u << CTAT_SUBTABLE) |
3240 case PIVOT_AXIS_LAYER:
3241 cell->omit_areas |= ((1u << CTAT_TABLE) |
3242 (1u << CTAT_LAYER));
3246 if (cat->type == CCT_POSTCOMPUTE)
3247 cell->postcompute = true;
3250 cell->axes[a].cvs[i].category = cat;
3251 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3255 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3256 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3257 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3258 for (size_t i = 0; i < specs->n; i++)
3259 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3260 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3261 cell->areas[at] = ctables_area_insert (s, cell, at);
3262 hmap_insert (&s->cells, &cell->node, hash);
3267 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3269 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3274 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3275 const struct ctables_category **cats[PIVOT_N_AXES],
3276 bool is_included, double weight[N_CTWS])
3278 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3279 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3281 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3282 const union value *value = case_data (c, specs->var);
3283 bool is_missing = var_is_value_missing (specs->var, value);
3284 bool is_scale_missing
3285 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3287 for (size_t i = 0; i < specs->n; i++)
3288 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3289 is_scale_missing, is_included,
3290 weight[specs->specs[i].weighting]);
3291 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3292 if (!(cell->omit_areas && (1u << at)))
3294 struct ctables_area *a = cell->areas[at];
3296 add_weight (a->total, weight);
3298 add_weight (a->count, weight);
3301 add_weight (a->valid, weight);
3303 if (!is_scale_missing)
3304 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3306 const struct variable *var = s->table->sum_vars[i];
3307 double addend = case_num (c, var);
3308 if (!var_is_num_missing (var, addend))
3309 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3310 a->sums[i].sum[wt] += addend * weight[wt];
3317 recurse_totals (struct ctables_section *s, const struct ccase *c,
3318 const struct ctables_category **cats[PIVOT_N_AXES],
3319 bool is_included, double weight[N_CTWS],
3320 enum pivot_axis_type start_axis, size_t start_nest)
3322 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3324 const struct ctables_nest *nest = s->nests[a];
3325 for (size_t i = start_nest; i < nest->n; i++)
3327 if (i == nest->scale_idx)
3330 const struct variable *var = nest->vars[i];
3332 const struct ctables_category *total = ctables_categories_total (
3333 s->table->categories[var_get_dict_index (var)]);
3336 const struct ctables_category *save = cats[a][i];
3338 ctables_cell_add__ (s, c, cats, is_included, weight);
3339 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3348 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3349 const struct ctables_category **cats[PIVOT_N_AXES],
3350 bool is_included, double weight[N_CTWS],
3351 enum pivot_axis_type start_axis, size_t start_nest)
3353 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3355 const struct ctables_nest *nest = s->nests[a];
3356 for (size_t i = start_nest; i < nest->n; i++)
3358 if (i == nest->scale_idx)
3361 const struct ctables_category *save = cats[a][i];
3364 cats[a][i] = save->subtotal;
3365 ctables_cell_add__ (s, c, cats, is_included, weight);
3366 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3375 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3376 double weight[N_CTWS])
3378 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3379 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3380 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3381 const struct ctables_category **cats[PIVOT_N_AXES] =
3383 [PIVOT_AXIS_LAYER] = layer_cats,
3384 [PIVOT_AXIS_ROW] = row_cats,
3385 [PIVOT_AXIS_COLUMN] = column_cats,
3388 bool is_included = true;
3390 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3392 const struct ctables_nest *nest = s->nests[a];
3393 for (size_t i = 0; i < nest->n; i++)
3394 if (i != nest->scale_idx)
3396 const struct variable *var = nest->vars[i];
3397 const union value *value = case_data (c, var);
3399 cats[a][i] = ctables_categories_match (
3400 s->table->categories[var_get_dict_index (var)], value, var);
3403 if (i != nest->summary_idx)
3406 if (!var_is_value_missing (var, value))
3409 static const struct ctables_category cct_excluded_missing = {
3410 .type = CCT_EXCLUDED_MISSING,
3413 cats[a][i] = &cct_excluded_missing;
3414 is_included = false;
3420 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3422 const struct ctables_nest *nest = s->nests[a];
3423 for (size_t i = 0; i < nest->n; i++)
3424 if (i != nest->scale_idx)
3426 const struct variable *var = nest->vars[i];
3427 const union value *value = case_data (c, var);
3428 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3432 ctables_cell_add__ (s, c, cats, is_included, weight);
3433 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3434 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3437 struct ctables_value
3439 struct hmap_node node;
3444 static struct ctables_value *
3445 ctables_value_find__ (struct ctables_table *t, const union value *value,
3446 int width, unsigned int hash)
3448 struct ctables_value *clv;
3449 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3450 hash, &t->clabels_values_map)
3451 if (value_equal (value, &clv->value, width))
3457 ctables_value_insert (struct ctables_table *t, const union value *value,
3460 unsigned int hash = value_hash (value, width, 0);
3461 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3464 clv = xmalloc (sizeof *clv);
3465 value_clone (&clv->value, value, width);
3466 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3470 static struct ctables_value *
3471 ctables_value_find (struct ctables_table *t,
3472 const union value *value, int width)
3474 return ctables_value_find__ (t, value, width,
3475 value_hash (value, width, 0));
3480 const struct dictionary *dict;
3481 struct pivot_table_look *look;
3483 /* For CTEF_* formats. */
3484 struct fmt_settings ctables_formats;
3486 /* If this is NULL, zeros are displayed using the normal print format.
3487 Otherwise, this string is displayed. */
3490 /* If this is NULL, missing values are displayed using the normal print
3491 format. Otherwise, this string is displayed. */
3494 /* Indexed by variable dictionary index. */
3495 enum ctables_vlabel *vlabels;
3497 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
3499 bool mrsets_count_duplicates; /* MRSETS. */
3500 bool smissing_listwise; /* SMISSING. */
3501 struct variable *e_weight; /* WEIGHT. */
3502 int hide_threshold; /* HIDESMALLCOUNTS. */
3504 struct ctables_table **tables;
3508 /* Chi-square test (SIGTEST). */
3509 struct ctables_chisq
3512 bool include_mrsets;
3516 /* Pairwise comparison test (COMPARETEST). */
3517 struct ctables_pairwise
3519 enum { PROP, MEAN } type;
3521 bool include_mrsets;
3522 bool meansvariance_allcats;
3524 enum { BONFERRONI = 1, BH } adjust;
3533 parse_col_width (struct lexer *lexer, const char *name, double *width)
3535 lex_match (lexer, T_EQUALS);
3536 if (lex_match_id (lexer, "DEFAULT"))
3538 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
3540 *width = lex_number (lexer);
3550 parse_bool (struct lexer *lexer, bool *b)
3552 if (lex_match_id (lexer, "NO"))
3554 else if (lex_match_id (lexer, "YES"))
3558 lex_error_expecting (lexer, "YES", "NO");
3565 ctables_chisq_destroy (struct ctables_chisq *chisq)
3571 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
3577 ctables_table_destroy (struct ctables_table *t)
3582 for (size_t i = 0; i < t->n_sections; i++)
3583 ctables_section_uninit (&t->sections[i]);
3586 for (size_t i = 0; i < t->n_categories; i++)
3587 ctables_categories_unref (t->categories[i]);
3588 free (t->categories);
3590 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3592 ctables_axis_destroy (t->axes[a]);
3593 ctables_stack_uninit (&t->stacks[a]);
3595 free (t->summary_specs.specs);
3597 struct ctables_value *ctv, *next_ctv;
3598 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
3599 &t->clabels_values_map)
3601 value_destroy (&ctv->value, var_get_width (t->clabels_example));
3602 hmap_delete (&t->clabels_values_map, &ctv->node);
3605 hmap_destroy (&t->clabels_values_map);
3606 free (t->clabels_values);
3612 ctables_chisq_destroy (t->chisq);
3613 ctables_pairwise_destroy (t->pairwise);
3618 ctables_destroy (struct ctables *ct)
3623 struct ctables_postcompute *pc, *next_pc;
3624 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
3628 msg_location_destroy (pc->location);
3629 ctables_pcexpr_destroy (pc->expr);
3633 ctables_summary_spec_set_uninit (pc->specs);
3636 hmap_delete (&ct->postcomputes, &pc->hmap_node);
3639 hmap_destroy (&ct->postcomputes);
3641 fmt_settings_uninit (&ct->ctables_formats);
3642 pivot_table_look_unref (ct->look);
3646 for (size_t i = 0; i < ct->n_tables; i++)
3647 ctables_table_destroy (ct->tables[i]);
3653 all_strings (struct variable **vars, size_t n_vars,
3654 const struct ctables_category *cat)
3656 for (size_t j = 0; j < n_vars; j++)
3657 if (var_is_numeric (vars[j]))
3659 msg_at (SE, cat->location,
3660 _("This category specification may be applied only to string "
3661 "variables, but this subcommand tries to apply it to "
3662 "numeric variable %s."),
3663 var_get_name (vars[j]));
3670 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
3671 struct ctables *ct, struct ctables_table *t)
3673 if (!lex_match_id (lexer, "VARIABLES"))
3675 lex_match (lexer, T_EQUALS);
3677 struct variable **vars;
3679 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
3682 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
3683 for (size_t i = 1; i < n_vars; i++)
3685 const struct fmt_spec *f = var_get_print_format (vars[i]);
3686 if (f->type != common_format->type)
3688 common_format = NULL;
3694 && (fmt_get_category (common_format->type)
3695 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
3697 struct ctables_categories *c = xmalloc (sizeof *c);
3698 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
3699 for (size_t i = 0; i < n_vars; i++)
3701 struct ctables_categories **cp
3702 = &t->categories[var_get_dict_index (vars[i])];
3703 ctables_categories_unref (*cp);
3707 size_t allocated_cats = 0;
3708 int cats_start_ofs = -1;
3709 int cats_end_ofs = -1;
3710 if (lex_match (lexer, T_LBRACK))
3712 cats_start_ofs = lex_ofs (lexer);
3715 if (c->n_cats >= allocated_cats)
3716 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3718 int start_ofs = lex_ofs (lexer);
3719 struct ctables_category *cat = &c->cats[c->n_cats];
3720 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
3722 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
3725 lex_match (lexer, T_COMMA);
3727 while (!lex_match (lexer, T_RBRACK));
3728 cats_end_ofs = lex_ofs (lexer) - 1;
3731 struct ctables_category cat = {
3733 .include_missing = false,
3734 .sort_ascending = true,
3736 bool show_totals = false;
3737 char *total_label = NULL;
3738 bool totals_before = false;
3739 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
3741 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
3743 lex_match (lexer, T_EQUALS);
3744 if (lex_match_id (lexer, "A"))
3745 cat.sort_ascending = true;
3746 else if (lex_match_id (lexer, "D"))
3747 cat.sort_ascending = false;
3750 lex_error_expecting (lexer, "A", "D");
3754 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
3756 int start_ofs = lex_ofs (lexer) - 1;
3757 lex_match (lexer, T_EQUALS);
3758 if (lex_match_id (lexer, "VALUE"))
3759 cat.type = CCT_VALUE;
3760 else if (lex_match_id (lexer, "LABEL"))
3761 cat.type = CCT_LABEL;
3764 cat.type = CCT_FUNCTION;
3765 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
3766 &cat.weighting, &cat.area))
3769 if (lex_match (lexer, T_LPAREN))
3771 cat.sort_var = parse_variable (lexer, dict);
3775 if (cat.sort_function == CTSF_PTILE)
3777 lex_match (lexer, T_COMMA);
3778 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
3780 cat.percentile = lex_number (lexer);
3784 if (!lex_force_match (lexer, T_RPAREN))
3787 else if (ctables_function_availability (cat.sort_function)
3790 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
3794 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
3795 _("Data-dependent sorting is not implemented."));
3799 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
3801 lex_match (lexer, T_EQUALS);
3802 if (lex_match_id (lexer, "INCLUDE"))
3803 cat.include_missing = true;
3804 else if (lex_match_id (lexer, "EXCLUDE"))
3805 cat.include_missing = false;
3808 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
3812 else if (lex_match_id (lexer, "TOTAL"))
3814 lex_match (lexer, T_EQUALS);
3815 if (!parse_bool (lexer, &show_totals))
3818 else if (lex_match_id (lexer, "LABEL"))
3820 lex_match (lexer, T_EQUALS);
3821 if (!lex_force_string (lexer))
3824 total_label = ss_xstrdup (lex_tokss (lexer));
3827 else if (lex_match_id (lexer, "POSITION"))
3829 lex_match (lexer, T_EQUALS);
3830 if (lex_match_id (lexer, "BEFORE"))
3831 totals_before = true;
3832 else if (lex_match_id (lexer, "AFTER"))
3833 totals_before = false;
3836 lex_error_expecting (lexer, "BEFORE", "AFTER");
3840 else if (lex_match_id (lexer, "EMPTY"))
3842 lex_match (lexer, T_EQUALS);
3843 if (lex_match_id (lexer, "INCLUDE"))
3844 c->show_empty = true;
3845 else if (lex_match_id (lexer, "EXCLUDE"))
3846 c->show_empty = false;
3849 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
3856 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
3857 "TOTAL", "LABEL", "POSITION", "EMPTY");
3859 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
3866 if (c->n_cats >= allocated_cats)
3867 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3868 c->cats[c->n_cats++] = cat;
3873 if (c->n_cats >= allocated_cats)
3874 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3876 struct ctables_category *totals;
3879 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
3880 totals = &c->cats[0];
3883 totals = &c->cats[c->n_cats];
3886 *totals = (struct ctables_category) {
3888 .total_label = total_label ? total_label : xstrdup (_("Total")),
3892 struct ctables_category *subtotal = NULL;
3893 for (size_t i = totals_before ? 0 : c->n_cats;
3894 totals_before ? i < c->n_cats : i-- > 0;
3895 totals_before ? i++ : 0)
3897 struct ctables_category *cat = &c->cats[i];
3906 cat->subtotal = subtotal;
3909 case CCT_POSTCOMPUTE:
3920 case CCT_EXCLUDED_MISSING:
3925 if (cats_start_ofs != -1)
3927 for (size_t i = 0; i < c->n_cats; i++)
3929 struct ctables_category *cat = &c->cats[i];
3932 case CCT_POSTCOMPUTE:
3933 cat->parse_format = parse_strings ? common_format->type : FMT_F;
3934 struct msg_location *cats_location
3935 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
3936 bool ok = ctables_recursive_check_postcompute (
3937 dict, cat->pc->expr, cat, c, cats_location);
3938 msg_location_destroy (cats_location);
3945 for (size_t j = 0; j < n_vars; j++)
3946 if (var_is_alpha (vars[j]))
3948 msg_at (SE, cat->location,
3949 _("This category specification may be applied "
3950 "only to numeric variables, but this "
3951 "subcommand tries to apply it to string "
3953 var_get_name (vars[j]));
3962 if (!parse_category_string (cat->location, cat->string, dict,
3963 common_format->type, &n))
3966 ss_dealloc (&cat->string);
3968 cat->type = CCT_NUMBER;
3971 else if (!all_strings (vars, n_vars, cat))
3980 if (!cat->srange[0].string)
3982 else if (!parse_category_string (cat->location,
3983 cat->srange[0], dict,
3984 common_format->type, &n[0]))
3987 if (!cat->srange[1].string)
3989 else if (!parse_category_string (cat->location,
3990 cat->srange[1], dict,
3991 common_format->type, &n[1]))
3994 ss_dealloc (&cat->srange[0]);
3995 ss_dealloc (&cat->srange[1]);
3997 cat->type = CCT_NRANGE;
3998 cat->nrange[0] = n[0];
3999 cat->nrange[1] = n[1];
4001 else if (!all_strings (vars, n_vars, cat))
4012 case CCT_EXCLUDED_MISSING:
4029 const struct ctables_summary_spec_set *set;
4034 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4036 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4037 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4038 if (as->function != bs->function)
4039 return as->function > bs->function ? 1 : -1;
4040 else if (as->weighting != bs->weighting)
4041 return as->weighting > bs->weighting ? 1 : -1;
4042 else if (as->calc_area != bs->calc_area)
4043 return as->calc_area > bs->calc_area ? 1 : -1;
4044 else if (as->percentile != bs->percentile)
4045 return as->percentile < bs->percentile ? 1 : -1;
4047 const char *as_label = as->label ? as->label : "";
4048 const char *bs_label = bs->label ? bs->label : "";
4049 return strcmp (as_label, bs_label);
4053 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4054 size_t ix[PIVOT_N_AXES])
4056 if (a < PIVOT_N_AXES)
4058 size_t limit = MAX (t->stacks[a].n, 1);
4059 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4060 ctables_table_add_section (t, a + 1, ix);
4064 struct ctables_section *s = &t->sections[t->n_sections++];
4065 *s = (struct ctables_section) {
4067 .cells = HMAP_INITIALIZER (s->cells),
4069 for (a = 0; a < PIVOT_N_AXES; a++)
4072 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4074 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4075 for (size_t i = 0; i < nest->n; i++)
4076 hmap_init (&s->occurrences[a][i]);
4078 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4079 hmap_init (&s->areas[at]);
4084 ctpo_add (double a, double b)
4090 ctpo_sub (double a, double b)
4096 ctpo_mul (double a, double b)
4102 ctpo_div (double a, double b)
4104 return b ? a / b : SYSMIS;
4108 ctpo_pow (double a, double b)
4110 int save_errno = errno;
4112 double result = pow (a, b);
4120 ctpo_neg (double a, double b UNUSED)
4125 struct ctables_pcexpr_evaluate_ctx
4127 const struct ctables_cell *cell;
4128 const struct ctables_section *section;
4129 const struct ctables_categories *cats;
4130 enum pivot_axis_type pc_a;
4133 enum fmt_type parse_format;
4136 static double ctables_pcexpr_evaluate (
4137 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4140 ctables_pcexpr_evaluate_nonterminal (
4141 const struct ctables_pcexpr_evaluate_ctx *ctx,
4142 const struct ctables_pcexpr *e, size_t n_args,
4143 double evaluate (double, double))
4145 double args[2] = { 0, 0 };
4146 for (size_t i = 0; i < n_args; i++)
4148 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4149 if (!isfinite (args[i]) || args[i] == SYSMIS)
4152 return evaluate (args[0], args[1]);
4156 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4157 const struct ctables_cell_value *pc_cv)
4159 const struct ctables_section *s = ctx->section;
4162 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4164 const struct ctables_nest *nest = s->nests[a];
4165 for (size_t i = 0; i < nest->n; i++)
4166 if (i != nest->scale_idx)
4168 const struct ctables_cell_value *cv
4169 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4170 : &ctx->cell->axes[a].cvs[i]);
4171 hash = hash_pointer (cv->category, hash);
4172 if (cv->category->type != CCT_TOTAL
4173 && cv->category->type != CCT_SUBTOTAL
4174 && cv->category->type != CCT_POSTCOMPUTE)
4175 hash = value_hash (&cv->value,
4176 var_get_width (nest->vars[i]), hash);
4180 struct ctables_cell *tc;
4181 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4183 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4185 const struct ctables_nest *nest = s->nests[a];
4186 for (size_t i = 0; i < nest->n; i++)
4187 if (i != nest->scale_idx)
4189 const struct ctables_cell_value *p_cv
4190 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4191 : &ctx->cell->axes[a].cvs[i]);
4192 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4193 if (p_cv->category != t_cv->category
4194 || (p_cv->category->type != CCT_TOTAL
4195 && p_cv->category->type != CCT_SUBTOTAL
4196 && p_cv->category->type != CCT_POSTCOMPUTE
4197 && !value_equal (&p_cv->value,
4199 var_get_width (nest->vars[i]))))
4211 const struct ctables_table *t = s->table;
4212 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4213 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4214 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
4215 &specs->specs[ctx->summary_idx]);
4219 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4220 const struct ctables_pcexpr *e)
4227 case CTPO_CAT_NRANGE:
4228 case CTPO_CAT_SRANGE:
4229 case CTPO_CAT_MISSING:
4230 case CTPO_CAT_OTHERNM:
4232 struct ctables_cell_value cv = {
4233 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4235 assert (cv.category != NULL);
4237 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4238 const struct ctables_occurrence *o;
4241 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4242 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4243 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4245 cv.value = o->value;
4246 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4251 case CTPO_CAT_NUMBER:
4252 case CTPO_CAT_SUBTOTAL:
4253 case CTPO_CAT_TOTAL:
4255 struct ctables_cell_value cv = {
4256 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4257 .value = { .f = e->number },
4259 assert (cv.category != NULL);
4260 return ctables_pcexpr_evaluate_category (ctx, &cv);
4263 case CTPO_CAT_STRING:
4265 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4267 if (width > e->string.length)
4269 s = xmalloc (width);
4270 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4273 const struct ctables_category *category
4274 = ctables_find_category_for_postcompute (
4275 ctx->section->table->ctables->dict,
4276 ctx->cats, ctx->parse_format, e);
4277 assert (category != NULL);
4279 struct ctables_cell_value cv = { .category = category };
4280 if (category->type == CCT_NUMBER)
4281 cv.value.f = category->number;
4282 else if (category->type == CCT_STRING)
4283 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
4287 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4293 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4296 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4299 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4302 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4305 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4308 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4314 static const struct ctables_category *
4315 ctables_cell_postcompute (const struct ctables_section *s,
4316 const struct ctables_cell *cell,
4317 enum pivot_axis_type *pc_a_p,
4320 assert (cell->postcompute);
4321 const struct ctables_category *pc_cat = NULL;
4322 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4323 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4325 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4326 if (cv->category->type == CCT_POSTCOMPUTE)
4330 /* Multiple postcomputes cross each other. The value is
4335 pc_cat = cv->category;
4339 *pc_a_idx_p = pc_a_idx;
4343 assert (pc_cat != NULL);
4348 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4349 const struct ctables_cell *cell,
4350 const struct ctables_summary_spec *ss,
4351 struct fmt_spec *format,
4352 bool *is_ctables_format,
4355 enum pivot_axis_type pc_a = 0;
4356 size_t pc_a_idx = 0;
4357 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4358 s, cell, &pc_a, &pc_a_idx);
4362 const struct ctables_postcompute *pc = pc_cat->pc;
4365 for (size_t i = 0; i < pc->specs->n; i++)
4367 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4368 if (ss->function == ss2->function
4369 && ss->weighting == ss2->weighting
4370 && ss->calc_area == ss2->calc_area
4371 && ss->percentile == ss2->percentile)
4373 *format = ss2->format;
4374 *is_ctables_format = ss2->is_ctables_format;
4380 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4381 const struct ctables_categories *cats = s->table->categories[
4382 var_get_dict_index (var)];
4383 struct ctables_pcexpr_evaluate_ctx ctx = {
4388 .pc_a_idx = pc_a_idx,
4389 .summary_idx = summary_idx,
4390 .parse_format = pc_cat->parse_format,
4392 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4396 ctables_format (double d, const struct fmt_spec *format,
4397 const struct fmt_settings *settings)
4399 const union value v = { .f = d };
4400 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4402 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4403 produce the results we want for negative numbers, putting the negative
4404 sign in the wrong spot, before the prefix instead of after it. We can't,
4405 in fact, produce the desired results using a custom-currency
4406 specification. Instead, we postprocess the output, moving the negative
4409 NEQUAL: "-N=3" => "N=-3"
4410 PAREN: "-(3)" => "(-3)"
4411 PCTPAREN: "-(3%)" => "(-3%)"
4413 This transformation doesn't affect NEGPAREN. */
4414 char *minus_src = strchr (s, '-');
4415 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4417 char *n_equals = strstr (s, "N=");
4418 char *lparen = strchr (s, '(');
4419 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4421 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4427 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4429 for (size_t i = 0; i < t->stacks[a].n; i++)
4431 struct ctables_nest *nest = &t->stacks[a].nests[i];
4432 if (nest->n != 1 || nest->scale_idx != 0)
4435 enum ctables_vlabel vlabel
4436 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4437 if (vlabel != CTVL_NONE)
4444 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4446 struct pivot_table *pt = pivot_table_create__ (
4448 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4449 : pivot_value_new_text (N_("Custom Tables"))),
4452 pivot_table_set_caption (
4453 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4455 pivot_table_set_corner_text (
4456 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4458 bool summary_dimension = (t->summary_axis != t->slabels_axis
4459 || (!t->slabels_visible
4460 && t->summary_specs.n > 1));
4461 if (summary_dimension)
4463 struct pivot_dimension *d = pivot_dimension_create (
4464 pt, t->slabels_axis, N_("Statistics"));
4465 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4466 if (!t->slabels_visible)
4467 d->hide_all_labels = true;
4468 for (size_t i = 0; i < specs->n; i++)
4469 pivot_category_create_leaf (
4470 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4473 bool categories_dimension = t->clabels_example != NULL;
4474 if (categories_dimension)
4476 struct pivot_dimension *d = pivot_dimension_create (
4477 pt, t->label_axis[t->clabels_from_axis],
4478 t->clabels_from_axis == PIVOT_AXIS_ROW
4479 ? N_("Row Categories")
4480 : N_("Column Categories"));
4481 const struct variable *var = t->clabels_example;
4482 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4483 for (size_t i = 0; i < t->n_clabels_values; i++)
4485 const struct ctables_value *value = t->clabels_values[i];
4486 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4487 assert (cat != NULL);
4488 pivot_category_create_leaf (
4489 d->root, ctables_category_create_value_label (c, cat,
4495 pivot_table_set_look (pt, ct->look);
4496 struct pivot_dimension *d[PIVOT_N_AXES];
4497 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4499 static const char *names[] = {
4500 [PIVOT_AXIS_ROW] = N_("Rows"),
4501 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4502 [PIVOT_AXIS_LAYER] = N_("Layers"),
4504 d[a] = (t->axes[a] || a == t->summary_axis
4505 ? pivot_dimension_create (pt, a, names[a])
4510 assert (t->axes[a]);
4512 for (size_t i = 0; i < t->stacks[a].n; i++)
4514 struct ctables_nest *nest = &t->stacks[a].nests[i];
4515 struct ctables_section **sections = xnmalloc (t->n_sections,
4517 size_t n_sections = 0;
4519 size_t n_total_cells = 0;
4520 size_t max_depth = 0;
4521 for (size_t j = 0; j < t->n_sections; j++)
4522 if (t->sections[j].nests[a] == nest)
4524 struct ctables_section *s = &t->sections[j];
4525 sections[n_sections++] = s;
4526 n_total_cells += hmap_count (&s->cells);
4528 size_t depth = s->nests[a]->n;
4529 max_depth = MAX (depth, max_depth);
4532 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4534 size_t n_sorted = 0;
4536 for (size_t j = 0; j < n_sections; j++)
4538 struct ctables_section *s = sections[j];
4540 struct ctables_cell *cell;
4541 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4543 sorted[n_sorted++] = cell;
4544 assert (n_sorted <= n_total_cells);
4547 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4548 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4550 struct ctables_level
4552 enum ctables_level_type
4554 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4555 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4556 CTL_SUMMARY, /* Summary functions. */
4560 enum settings_value_show vlabel; /* CTL_VAR only. */
4563 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4564 size_t n_levels = 0;
4565 for (size_t k = 0; k < nest->n; k++)
4567 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4568 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4570 if (vlabel != CTVL_NONE)
4572 levels[n_levels++] = (struct ctables_level) {
4574 .vlabel = (enum settings_value_show) vlabel,
4579 if (nest->scale_idx != k
4580 && (k != nest->n - 1 || t->label_axis[a] == a))
4582 levels[n_levels++] = (struct ctables_level) {
4583 .type = CTL_CATEGORY,
4589 if (!summary_dimension && a == t->slabels_axis)
4591 levels[n_levels++] = (struct ctables_level) {
4592 .type = CTL_SUMMARY,
4593 .var_idx = SIZE_MAX,
4597 /* Pivot categories:
4599 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4600 - category for nest->vars[0], if nest->scale_idx != 0
4601 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4602 - category for nest->vars[1], if nest->scale_idx != 1
4604 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4605 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4606 - summary function, if 'a == t->slabels_axis && a ==
4609 Additional dimensions:
4611 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4613 - If 't->label_axis[b] == a' for some 'b != a', add a category
4618 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4620 for (size_t j = 0; j < n_sorted; j++)
4622 struct ctables_cell *cell = sorted[j];
4623 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4625 size_t n_common = 0;
4628 for (; n_common < n_levels; n_common++)
4630 const struct ctables_level *level = &levels[n_common];
4631 if (level->type == CTL_CATEGORY)
4633 size_t var_idx = level->var_idx;
4634 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4635 if (prev->axes[a].cvs[var_idx].category != c)
4637 else if (c->type != CCT_SUBTOTAL
4638 && c->type != CCT_TOTAL
4639 && c->type != CCT_POSTCOMPUTE
4640 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4641 &cell->axes[a].cvs[var_idx].value,
4642 var_get_type (nest->vars[var_idx])))
4648 for (size_t k = n_common; k < n_levels; k++)
4650 const struct ctables_level *level = &levels[k];
4651 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4652 if (level->type == CTL_SUMMARY)
4654 assert (k == n_levels - 1);
4656 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4657 for (size_t m = 0; m < specs->n; m++)
4659 int leaf = pivot_category_create_leaf (
4660 parent, ctables_summary_label (&specs->specs[m],
4668 const struct variable *var = nest->vars[level->var_idx];
4669 struct pivot_value *label;
4670 if (level->type == CTL_VAR)
4672 label = pivot_value_new_variable (var);
4673 label->variable.show = level->vlabel;
4675 else if (level->type == CTL_CATEGORY)
4677 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4678 label = ctables_category_create_value_label (
4679 t->categories[var_get_dict_index (var)],
4680 cv->category, var, &cv->value);
4685 if (k == n_levels - 1)
4686 prev_leaf = pivot_category_create_leaf (parent, label);
4688 groups[k] = pivot_category_create_group__ (parent, label);
4692 cell->axes[a].leaf = prev_leaf;
4701 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4705 size_t n_total_cells = 0;
4706 for (size_t j = 0; j < t->n_sections; j++)
4707 n_total_cells += hmap_count (&t->sections[j].cells);
4709 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4710 size_t n_sorted = 0;
4711 for (size_t j = 0; j < t->n_sections; j++)
4713 const struct ctables_section *s = &t->sections[j];
4714 struct ctables_cell *cell;
4715 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4717 sorted[n_sorted++] = cell;
4719 assert (n_sorted <= n_total_cells);
4720 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4722 size_t ids[N_CTATS];
4723 memset (ids, 0, sizeof ids);
4724 for (size_t j = 0; j < n_sorted; j++)
4726 struct ctables_cell *cell = sorted[j];
4727 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4729 struct ctables_area *area = cell->areas[at];
4730 if (!area->sequence)
4731 area->sequence = ++ids[at];
4738 for (size_t i = 0; i < t->n_sections; i++)
4740 struct ctables_section *s = &t->sections[i];
4742 struct ctables_cell *cell;
4743 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4748 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4749 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4750 for (size_t j = 0; j < specs->n; j++)
4753 size_t n_dindexes = 0;
4755 if (summary_dimension)
4756 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4758 if (categories_dimension)
4760 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4761 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4762 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4763 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4766 dindexes[n_dindexes++] = ctv->leaf;
4769 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4772 int leaf = cell->axes[a].leaf;
4773 if (a == t->summary_axis && !summary_dimension)
4775 dindexes[n_dindexes++] = leaf;
4778 const struct ctables_summary_spec *ss = &specs->specs[j];
4780 struct fmt_spec format = specs->specs[j].format;
4781 bool is_ctables_format = ss->is_ctables_format;
4782 double d = (cell->postcompute
4783 ? ctables_cell_calculate_postcompute (
4784 s, cell, ss, &format, &is_ctables_format, j)
4785 : ctables_summary_value (cell->areas,
4786 &cell->summaries[j], ss));
4788 struct pivot_value *value;
4789 if (ct->hide_threshold != 0
4790 && d < ct->hide_threshold
4791 && ss->function == CTSF_COUNT)
4793 value = pivot_value_new_user_text_nocopy (
4794 xasprintf ("<%d", ct->hide_threshold));
4796 else if (d == 0 && ct->zero)
4797 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4798 else if (d == SYSMIS && ct->missing)
4799 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4800 else if (is_ctables_format)
4801 value = pivot_value_new_user_text_nocopy (
4802 ctables_format (d, &format, &ct->ctables_formats));
4805 value = pivot_value_new_number (d);
4806 value->numeric.format = format;
4808 /* XXX should text values be right-justified? */
4809 pivot_table_put (pt, dindexes, n_dindexes, value);
4814 pivot_table_submit (pt);
4818 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4820 enum pivot_axis_type label_pos = t->label_axis[a];
4824 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4825 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4827 const struct ctables_stack *stack = &t->stacks[a];
4831 const struct ctables_nest *n0 = &stack->nests[0];
4834 assert (stack->n == 1);
4838 const struct variable *v0 = n0->vars[n0->n - 1];
4839 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4840 t->clabels_example = v0;
4842 for (size_t i = 0; i < c0->n_cats; i++)
4843 if (c0->cats[i].type == CCT_FUNCTION)
4845 msg (SE, _("%s=%s is not allowed with sorting based "
4846 "on a summary function."),
4847 subcommand_name, pos_name);
4850 if (n0->n - 1 == n0->scale_idx)
4852 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4853 "but %s is a scale variable."),
4854 subcommand_name, pos_name, var_get_name (v0));
4858 for (size_t i = 1; i < stack->n; i++)
4860 const struct ctables_nest *ni = &stack->nests[i];
4862 const struct variable *vi = ni->vars[ni->n - 1];
4863 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4865 if (ni->n - 1 == ni->scale_idx)
4867 msg (SE, _("%s=%s requires the variables to be moved to be "
4868 "categorical, but %s is a scale variable."),
4869 subcommand_name, pos_name, var_get_name (vi));
4872 if (var_get_width (v0) != var_get_width (vi))
4874 msg (SE, _("%s=%s requires the variables to be "
4875 "moved to have the same width, but %s has "
4876 "width %d and %s has width %d."),
4877 subcommand_name, pos_name,
4878 var_get_name (v0), var_get_width (v0),
4879 var_get_name (vi), var_get_width (vi));
4882 if (!val_labs_equal (var_get_value_labels (v0),
4883 var_get_value_labels (vi)))
4885 msg (SE, _("%s=%s requires the variables to be "
4886 "moved to have the same value labels, but %s "
4887 "and %s have different value labels."),
4888 subcommand_name, pos_name,
4889 var_get_name (v0), var_get_name (vi));
4892 if (!ctables_categories_equal (c0, ci))
4894 msg (SE, _("%s=%s requires the variables to be "
4895 "moved to have the same category "
4896 "specifications, but %s and %s have different "
4897 "category specifications."),
4898 subcommand_name, pos_name,
4899 var_get_name (v0), var_get_name (vi));
4908 add_sum_var (struct variable *var,
4909 struct variable ***sum_vars, size_t *n, size_t *allocated)
4911 for (size_t i = 0; i < *n; i++)
4912 if (var == (*sum_vars)[i])
4915 if (*n >= *allocated)
4916 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4917 (*sum_vars)[*n] = var;
4921 static enum ctables_area_type
4922 rotate_area (enum ctables_area_type area)
4933 return CTAT_LAYERCOL;
4936 return CTAT_LAYERROW;
4949 enumerate_sum_vars (const struct ctables_axis *a,
4950 struct variable ***sum_vars, size_t *n, size_t *allocated)
4958 for (size_t i = 0; i < N_CSVS; i++)
4959 for (size_t j = 0; j < a->specs[i].n; j++)
4961 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4962 if (spec->function == CTSF_areaPCT_SUM)
4963 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4969 for (size_t i = 0; i < 2; i++)
4970 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4976 ctables_prepare_table (struct ctables_table *t)
4978 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4981 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4983 for (size_t j = 0; j < t->stacks[a].n; j++)
4985 struct ctables_nest *nest = &t->stacks[a].nests[j];
4986 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4988 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4989 nest->n_areas[at] = 0;
4991 enum pivot_axis_type ata, atb;
4992 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4994 ata = PIVOT_AXIS_ROW;
4995 atb = PIVOT_AXIS_COLUMN;
4997 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4999 ata = PIVOT_AXIS_COLUMN;
5000 atb = PIVOT_AXIS_ROW;
5003 if (at == CTAT_LAYER
5004 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5005 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5006 ? a == atb && t->label_axis[a] != a
5009 for (size_t k = nest->n - 1; k < nest->n; k--)
5010 if (k != nest->scale_idx)
5012 nest->areas[at][nest->n_areas[at]++] = k;
5018 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5019 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5020 : at == CTAT_TABLE ? true
5024 for (size_t k = 0; k < nest->n; k++)
5025 if (k != nest->scale_idx)
5026 nest->areas[at][nest->n_areas[at]++] = k;
5032 #define L PIVOT_AXIS_LAYER
5033 n_drop = (t->clabels_from_axis == L ? a != L
5034 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5035 : t->clabels_from_axis == a ? 2
5042 n_drop = a == ata && t->label_axis[ata] == atb;
5047 n_drop = (a == ata ? t->label_axis[ata] == atb
5049 : t->clabels_from_axis == atb ? -1
5050 : t->clabels_to_axis != atb ? 1
5062 size_t n = nest->n_areas[at];
5065 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5066 nest->n_areas[at]--;
5071 for (int i = 0; i < n_drop; i++)
5072 if (nest->n_areas[at] > 0)
5073 nest->n_areas[at]--;
5080 struct ctables_nest *nest = xmalloc (sizeof *nest);
5081 *nest = (struct ctables_nest) {
5083 .scale_idx = SIZE_MAX,
5084 .summary_idx = SIZE_MAX
5086 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5088 /* There's no point in moving labels away from an axis that has no
5089 labels, so avoid dealing with the special cases around that. */
5090 t->label_axis[a] = a;
5093 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5094 for (size_t i = 0; i < stack->n; i++)
5096 struct ctables_nest *nest = &stack->nests[i];
5097 if (!nest->specs[CSV_CELL].n)
5099 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5100 ss->specs = xmalloc (sizeof *ss->specs);
5103 enum ctables_summary_function function
5104 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5108 nest->summary_idx = nest->n - 1;
5109 ss->var = nest->vars[nest->summary_idx];
5111 *ss->specs = (struct ctables_summary_spec) {
5112 .function = function,
5113 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5114 .format = ctables_summary_default_format (function, ss->var),
5117 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5118 &nest->specs[CSV_CELL]);
5120 else if (!nest->specs[CSV_TOTAL].n)
5121 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5122 &nest->specs[CSV_CELL]);
5124 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5125 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5127 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5128 for (size_t i = 0; i < nest->specs[sv].n; i++)
5130 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5131 const struct ctables_function_info *cfi =
5132 &ctables_function_info[ss->function];
5134 ss->calc_area = rotate_area (ss->calc_area);
5138 if (t->ctables->smissing_listwise)
5140 struct variable **listwise_vars = NULL;
5142 size_t allocated = 0;
5144 for (size_t j = nest->group_head; j < stack->n; j++)
5146 const struct ctables_nest *other_nest = &stack->nests[j];
5147 if (other_nest->group_head != nest->group_head)
5150 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5153 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5154 sizeof *listwise_vars);
5155 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5158 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5161 listwise_vars = xmemdup (listwise_vars,
5162 n * sizeof *listwise_vars);
5163 nest->specs[sv].listwise_vars = listwise_vars;
5164 nest->specs[sv].n_listwise_vars = n;
5169 struct ctables_summary_spec_set *merged = &t->summary_specs;
5170 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5172 for (size_t j = 0; j < stack->n; j++)
5174 const struct ctables_nest *nest = &stack->nests[j];
5176 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5177 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5182 struct merge_item min = items[0];
5183 for (size_t j = 1; j < n_left; j++)
5184 if (merge_item_compare_3way (&items[j], &min) < 0)
5187 if (merged->n >= merged->allocated)
5188 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5189 sizeof *merged->specs);
5190 merged->specs[merged->n++] = min.set->specs[min.ofs];
5192 for (size_t j = 0; j < n_left; )
5194 if (merge_item_compare_3way (&items[j], &min) == 0)
5196 struct merge_item *item = &items[j];
5197 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5198 if (++item->ofs >= item->set->n)
5200 items[j] = items[--n_left];
5209 size_t allocated_sum_vars = 0;
5210 enumerate_sum_vars (t->axes[t->summary_axis],
5211 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5213 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5214 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5218 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5219 enum pivot_axis_type a)
5221 struct ctables_stack *stack = &t->stacks[a];
5222 for (size_t i = 0; i < stack->n; i++)
5224 const struct ctables_nest *nest = &stack->nests[i];
5225 const struct variable *var = nest->vars[nest->n - 1];
5226 const union value *value = case_data (c, var);
5228 if (var_is_numeric (var) && value->f == SYSMIS)
5231 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5233 ctables_value_insert (t, value, var_get_width (var));
5238 compare_ctables_values_3way (const void *a_, const void *b_, const void *width_)
5240 const struct ctables_value *const *ap = a_;
5241 const struct ctables_value *const *bp = b_;
5242 const struct ctables_value *a = *ap;
5243 const struct ctables_value *b = *bp;
5244 const int *width = width_;
5245 return value_compare_3way (&a->value, &b->value, *width);
5249 ctables_sort_clabels_values (struct ctables_table *t)
5251 const struct variable *v0 = t->clabels_example;
5252 int width = var_get_width (v0);
5254 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5257 const struct val_labs *val_labs = var_get_value_labels (v0);
5258 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5259 vl = val_labs_next (val_labs, vl))
5260 if (ctables_categories_match (c0, &vl->value, v0))
5261 ctables_value_insert (t, &vl->value, width);
5264 size_t n = hmap_count (&t->clabels_values_map);
5265 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5267 struct ctables_value *clv;
5269 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5270 t->clabels_values[i++] = clv;
5271 t->n_clabels_values = n;
5274 sort (t->clabels_values, n, sizeof *t->clabels_values,
5275 compare_ctables_values_3way, &width);
5277 for (size_t i = 0; i < n; i++)
5278 t->clabels_values[i]->leaf = i;
5282 ctables_add_category_occurrences (const struct variable *var,
5283 struct hmap *occurrences,
5284 const struct ctables_categories *cats)
5286 const struct val_labs *val_labs = var_get_value_labels (var);
5288 for (size_t i = 0; i < cats->n_cats; i++)
5290 const struct ctables_category *c = &cats->cats[i];
5294 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5300 int width = var_get_width (var);
5302 value_init (&value, width);
5303 value_copy_buf_rpad (&value, width,
5304 CHAR_CAST (uint8_t *, c->string.string),
5305 c->string.length, ' ');
5306 ctables_add_occurrence (var, &value, occurrences);
5307 value_destroy (&value, width);
5312 assert (var_is_numeric (var));
5313 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5314 vl = val_labs_next (val_labs, vl))
5315 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5316 ctables_add_occurrence (var, &vl->value, occurrences);
5320 assert (var_is_alpha (var));
5321 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5322 vl = val_labs_next (val_labs, vl))
5323 if (in_string_range (&vl->value, var, c->srange))
5324 ctables_add_occurrence (var, &vl->value, occurrences);
5328 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5329 vl = val_labs_next (val_labs, vl))
5330 if (var_is_value_missing (var, &vl->value))
5331 ctables_add_occurrence (var, &vl->value, occurrences);
5335 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5336 vl = val_labs_next (val_labs, vl))
5337 ctables_add_occurrence (var, &vl->value, occurrences);
5340 case CCT_POSTCOMPUTE:
5350 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5351 vl = val_labs_next (val_labs, vl))
5352 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5353 ctables_add_occurrence (var, &vl->value, occurrences);
5356 case CCT_EXCLUDED_MISSING:
5363 ctables_section_recurse_add_empty_categories (
5364 struct ctables_section *s,
5365 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5366 enum pivot_axis_type a, size_t a_idx)
5368 if (a >= PIVOT_N_AXES)
5369 ctables_cell_insert__ (s, c, cats);
5370 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5371 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5374 const struct variable *var = s->nests[a]->vars[a_idx];
5375 const struct ctables_categories *categories = s->table->categories[
5376 var_get_dict_index (var)];
5377 int width = var_get_width (var);
5378 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5379 const struct ctables_occurrence *o;
5380 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5382 union value *value = case_data_rw (c, var);
5383 value_destroy (value, width);
5384 value_clone (value, &o->value, width);
5385 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5386 assert (cats[a][a_idx] != NULL);
5387 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5390 for (size_t i = 0; i < categories->n_cats; i++)
5392 const struct ctables_category *cat = &categories->cats[i];
5393 if (cat->type == CCT_POSTCOMPUTE)
5395 cats[a][a_idx] = cat;
5396 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5403 ctables_section_add_empty_categories (struct ctables_section *s)
5405 bool show_empty = false;
5406 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5408 for (size_t k = 0; k < s->nests[a]->n; k++)
5409 if (k != s->nests[a]->scale_idx)
5411 const struct variable *var = s->nests[a]->vars[k];
5412 const struct ctables_categories *cats = s->table->categories[
5413 var_get_dict_index (var)];
5414 if (cats->show_empty)
5417 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5423 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5424 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5425 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5426 const struct ctables_category **cats[PIVOT_N_AXES] =
5428 [PIVOT_AXIS_LAYER] = layer_cats,
5429 [PIVOT_AXIS_ROW] = row_cats,
5430 [PIVOT_AXIS_COLUMN] = column_cats,
5432 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5433 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5438 ctables_section_clear (struct ctables_section *s)
5440 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5442 const struct ctables_nest *nest = s->nests[a];
5443 for (size_t i = 0; i < nest->n; i++)
5444 if (i != nest->scale_idx)
5446 const struct variable *var = nest->vars[i];
5447 int width = var_get_width (var);
5448 struct ctables_occurrence *o, *next;
5449 struct hmap *map = &s->occurrences[a][i];
5450 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5452 value_destroy (&o->value, width);
5453 hmap_delete (map, &o->node);
5460 struct ctables_cell *cell, *next_cell;
5461 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5463 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5465 const struct ctables_nest *nest = s->nests[a];
5466 for (size_t i = 0; i < nest->n; i++)
5467 if (i != nest->scale_idx)
5468 value_destroy (&cell->axes[a].cvs[i].value,
5469 var_get_width (nest->vars[i]));
5470 free (cell->axes[a].cvs);
5473 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5474 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5475 for (size_t i = 0; i < specs->n; i++)
5476 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5477 free (cell->summaries);
5479 hmap_delete (&s->cells, &cell->node);
5482 hmap_shrink (&s->cells);
5484 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5486 struct ctables_area *area, *next_area;
5487 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5491 hmap_delete (&s->areas[at], &area->node);
5494 hmap_shrink (&s->areas[at]);
5499 ctables_section_uninit (struct ctables_section *s)
5501 ctables_section_clear (s);
5503 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5505 struct ctables_nest *nest = s->nests[a];
5506 for (size_t i = 0; i < nest->n; i++)
5507 hmap_destroy (&s->occurrences[a][i]);
5508 free (s->occurrences[a]);
5511 hmap_destroy (&s->cells);
5512 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5513 hmap_destroy (&s->areas[at]);
5517 ctables_table_clear (struct ctables_table *t)
5519 for (size_t i = 0; i < t->n_sections; i++)
5520 ctables_section_clear (&t->sections[i]);
5522 if (t->clabels_example)
5524 int width = var_get_width (t->clabels_example);
5525 struct ctables_value *value, *next_value;
5526 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5527 &t->clabels_values_map)
5529 value_destroy (&value->value, width);
5530 hmap_delete (&t->clabels_values_map, &value->node);
5533 hmap_shrink (&t->clabels_values_map);
5535 free (t->clabels_values);
5536 t->clabels_values = NULL;
5537 t->n_clabels_values = 0;
5542 ctables_execute (struct dataset *ds, struct casereader *input,
5545 for (size_t i = 0; i < ct->n_tables; i++)
5547 struct ctables_table *t = ct->tables[i];
5548 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5549 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5550 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5551 sizeof *t->sections);
5552 size_t ix[PIVOT_N_AXES];
5553 ctables_table_add_section (t, 0, ix);
5556 struct dictionary *dict = dataset_dict (ds);
5558 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5559 struct casegrouper *grouper
5561 ? casegrouper_create_splits (input, dict)
5562 : casegrouper_create_vars (input, NULL, 0));
5563 struct casereader *group;
5564 while (casegrouper_get_next_group (grouper, &group))
5568 struct ccase *c = casereader_peek (group, 0);
5571 output_split_file_values (ds, c);
5576 bool warn_on_invalid = true;
5577 for (struct ccase *c = casereader_read (group); c;
5578 case_unref (c), c = casereader_read (group))
5580 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5581 double e_weight = (ct->e_weight
5582 ? var_force_valid_weight (ct->e_weight,
5583 case_num (c, ct->e_weight),
5587 [CTW_DICTIONARY] = d_weight,
5588 [CTW_EFFECTIVE] = e_weight,
5589 [CTW_UNWEIGHTED] = 1.0,
5592 for (size_t i = 0; i < ct->n_tables; i++)
5594 struct ctables_table *t = ct->tables[i];
5596 for (size_t j = 0; j < t->n_sections; j++)
5597 ctables_cell_insert (&t->sections[j], c, weight);
5599 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5600 if (t->label_axis[a] != a)
5601 ctables_insert_clabels_values (t, c, a);
5604 casereader_destroy (group);
5606 for (size_t i = 0; i < ct->n_tables; i++)
5608 struct ctables_table *t = ct->tables[i];
5610 if (t->clabels_example)
5611 ctables_sort_clabels_values (t);
5613 for (size_t j = 0; j < t->n_sections; j++)
5614 ctables_section_add_empty_categories (&t->sections[j]);
5616 ctables_table_output (ct, t);
5617 ctables_table_clear (t);
5620 return casegrouper_destroy (grouper);
5623 static struct ctables_postcompute *
5624 ctables_find_postcompute (struct ctables *ct, const char *name)
5626 struct ctables_postcompute *pc;
5627 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5628 utf8_hash_case_string (name, 0), &ct->postcomputes)
5629 if (!utf8_strcasecmp (pc->name, name))
5635 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5638 int pcompute_start = lex_ofs (lexer) - 1;
5640 if (!lex_match (lexer, T_AND))
5642 lex_error_expecting (lexer, "&");
5645 if (!lex_force_id (lexer))
5648 char *name = ss_xstrdup (lex_tokss (lexer));
5651 if (!lex_force_match (lexer, T_EQUALS)
5652 || !lex_force_match_id (lexer, "EXPR")
5653 || !lex_force_match (lexer, T_LPAREN))
5659 int expr_start = lex_ofs (lexer);
5660 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5661 int expr_end = lex_ofs (lexer) - 1;
5662 if (!expr || !lex_force_match (lexer, T_RPAREN))
5664 ctables_pcexpr_destroy (expr);
5668 int pcompute_end = lex_ofs (lexer) - 1;
5670 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5673 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5676 msg_at (SW, location, _("New definition of &%s will override the "
5677 "previous definition."),
5679 msg_at (SN, pc->location, _("This is the previous definition."));
5681 ctables_pcexpr_destroy (pc->expr);
5682 msg_location_destroy (pc->location);
5687 pc = xmalloc (sizeof *pc);
5688 *pc = (struct ctables_postcompute) { .name = name };
5689 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5690 utf8_hash_case_string (pc->name, 0));
5693 pc->location = location;
5695 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5700 ctables_parse_pproperties_format (struct lexer *lexer,
5701 struct ctables_summary_spec_set *sss)
5703 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5705 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5706 && !(lex_token (lexer) == T_ID
5707 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5708 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5709 lex_tokss (lexer)))))
5711 /* Parse function. */
5712 enum ctables_summary_function function;
5713 enum ctables_weighting weighting;
5714 enum ctables_area_type area;
5715 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5718 /* Parse percentile. */
5719 double percentile = 0;
5720 if (function == CTSF_PTILE)
5722 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5724 percentile = lex_number (lexer);
5729 struct fmt_spec format;
5730 bool is_ctables_format;
5731 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5734 if (sss->n >= sss->allocated)
5735 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5736 sizeof *sss->specs);
5737 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5738 .function = function,
5739 .weighting = weighting,
5742 .percentile = percentile,
5744 .is_ctables_format = is_ctables_format,
5750 ctables_summary_spec_set_uninit (sss);
5755 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5757 struct ctables_postcompute **pcs = NULL;
5759 size_t allocated_pcs = 0;
5761 while (lex_match (lexer, T_AND))
5763 if (!lex_force_id (lexer))
5765 struct ctables_postcompute *pc
5766 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5769 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5774 if (n_pcs >= allocated_pcs)
5775 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5779 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5781 if (lex_match_id (lexer, "LABEL"))
5783 lex_match (lexer, T_EQUALS);
5784 if (!lex_force_string (lexer))
5787 for (size_t i = 0; i < n_pcs; i++)
5789 free (pcs[i]->label);
5790 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5795 else if (lex_match_id (lexer, "FORMAT"))
5797 lex_match (lexer, T_EQUALS);
5799 struct ctables_summary_spec_set sss;
5800 if (!ctables_parse_pproperties_format (lexer, &sss))
5803 for (size_t i = 0; i < n_pcs; i++)
5806 ctables_summary_spec_set_uninit (pcs[i]->specs);
5808 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5809 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5811 ctables_summary_spec_set_uninit (&sss);
5813 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5815 lex_match (lexer, T_EQUALS);
5816 bool hide_source_cats;
5817 if (!parse_bool (lexer, &hide_source_cats))
5819 for (size_t i = 0; i < n_pcs; i++)
5820 pcs[i]->hide_source_cats = hide_source_cats;
5824 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5837 put_strftime (struct string *out, time_t now, const char *format)
5839 const struct tm *tm = localtime (&now);
5841 strftime (value, sizeof value, format, tm);
5842 ds_put_cstr (out, value);
5846 skip_prefix (struct substring *s, struct substring prefix)
5848 if (ss_starts_with (*s, prefix))
5850 ss_advance (s, prefix.length);
5858 put_table_expression (struct string *out, struct lexer *lexer,
5859 struct dictionary *dict, int expr_start, int expr_end)
5862 for (int ofs = expr_start; ofs < expr_end; ofs++)
5864 const struct token *t = lex_ofs_token (lexer, ofs);
5865 if (t->type == T_LBRACK)
5867 else if (t->type == T_RBRACK && nest > 0)
5873 else if (t->type == T_ID)
5875 const struct variable *var
5876 = dict_lookup_var (dict, t->string.string);
5877 const char *label = var ? var_get_label (var) : NULL;
5878 ds_put_cstr (out, label ? label : t->string.string);
5882 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5883 ds_put_byte (out, ' ');
5885 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5886 ds_put_cstr (out, repr);
5889 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5890 ds_put_byte (out, ' ');
5896 put_title_text (struct string *out, struct substring in, time_t now,
5897 struct lexer *lexer, struct dictionary *dict,
5898 int expr_start, int expr_end)
5902 size_t chunk = ss_find_byte (in, ')');
5903 ds_put_substring (out, ss_head (in, chunk));
5904 ss_advance (&in, chunk);
5905 if (ss_is_empty (in))
5908 if (skip_prefix (&in, ss_cstr (")DATE")))
5909 put_strftime (out, now, "%x");
5910 else if (skip_prefix (&in, ss_cstr (")TIME")))
5911 put_strftime (out, now, "%X");
5912 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5913 put_table_expression (out, lexer, dict, expr_start, expr_end);
5916 ds_put_byte (out, ')');
5917 ss_advance (&in, 1);
5923 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5925 struct casereader *input = NULL;
5927 struct measure_guesser *mg = measure_guesser_create (ds);
5930 input = proc_open (ds);
5931 measure_guesser_run (mg, input);
5932 measure_guesser_destroy (mg);
5935 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5936 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5937 enum settings_value_show tvars = settings_get_show_variables ();
5938 for (size_t i = 0; i < n_vars; i++)
5939 vlabels[i] = (enum ctables_vlabel) tvars;
5941 struct pivot_table_look *look = pivot_table_look_unshare (
5942 pivot_table_look_ref (pivot_table_look_get_default ()));
5943 look->omit_empty = false;
5945 struct ctables *ct = xmalloc (sizeof *ct);
5946 *ct = (struct ctables) {
5947 .dict = dataset_dict (ds),
5949 .ctables_formats = FMT_SETTINGS_INIT,
5951 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5954 time_t now = time (NULL);
5959 const char *dot_string;
5960 const char *comma_string;
5962 static const struct ctf ctfs[4] = {
5963 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5964 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5965 { CTEF_PAREN, "-,(,),", "-.(.)." },
5966 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5968 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5969 for (size_t i = 0; i < 4; i++)
5971 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5972 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5973 fmt_number_style_from_string (s));
5976 if (!lex_force_match (lexer, T_SLASH))
5979 while (!lex_match_id (lexer, "TABLE"))
5981 if (lex_match_id (lexer, "FORMAT"))
5983 double widths[2] = { SYSMIS, SYSMIS };
5984 double units_per_inch = 72.0;
5986 while (lex_token (lexer) != T_SLASH)
5988 if (lex_match_id (lexer, "MINCOLWIDTH"))
5990 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5993 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5995 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5998 else if (lex_match_id (lexer, "UNITS"))
6000 lex_match (lexer, T_EQUALS);
6001 if (lex_match_id (lexer, "POINTS"))
6002 units_per_inch = 72.0;
6003 else if (lex_match_id (lexer, "INCHES"))
6004 units_per_inch = 1.0;
6005 else if (lex_match_id (lexer, "CM"))
6006 units_per_inch = 2.54;
6009 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6013 else if (lex_match_id (lexer, "EMPTY"))
6018 lex_match (lexer, T_EQUALS);
6019 if (lex_match_id (lexer, "ZERO"))
6021 /* Nothing to do. */
6023 else if (lex_match_id (lexer, "BLANK"))
6024 ct->zero = xstrdup ("");
6025 else if (lex_force_string (lexer))
6027 ct->zero = ss_xstrdup (lex_tokss (lexer));
6033 else if (lex_match_id (lexer, "MISSING"))
6035 lex_match (lexer, T_EQUALS);
6036 if (!lex_force_string (lexer))
6040 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6041 ? ss_xstrdup (lex_tokss (lexer))
6047 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6048 "UNITS", "EMPTY", "MISSING");
6053 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6054 && widths[0] > widths[1])
6056 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6060 for (size_t i = 0; i < 2; i++)
6061 if (widths[i] != SYSMIS)
6063 int *wr = ct->look->width_ranges[TABLE_HORZ];
6064 wr[i] = widths[i] / units_per_inch * 96.0;
6069 else if (lex_match_id (lexer, "VLABELS"))
6071 if (!lex_force_match_id (lexer, "VARIABLES"))
6073 lex_match (lexer, T_EQUALS);
6075 struct variable **vars;
6077 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6081 if (!lex_force_match_id (lexer, "DISPLAY"))
6086 lex_match (lexer, T_EQUALS);
6088 enum ctables_vlabel vlabel;
6089 if (lex_match_id (lexer, "DEFAULT"))
6090 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6091 else if (lex_match_id (lexer, "NAME"))
6093 else if (lex_match_id (lexer, "LABEL"))
6094 vlabel = CTVL_LABEL;
6095 else if (lex_match_id (lexer, "BOTH"))
6097 else if (lex_match_id (lexer, "NONE"))
6101 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6107 for (size_t i = 0; i < n_vars; i++)
6108 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6111 else if (lex_match_id (lexer, "MRSETS"))
6113 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6115 lex_match (lexer, T_EQUALS);
6116 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6119 else if (lex_match_id (lexer, "SMISSING"))
6121 if (lex_match_id (lexer, "VARIABLE"))
6122 ct->smissing_listwise = false;
6123 else if (lex_match_id (lexer, "LISTWISE"))
6124 ct->smissing_listwise = true;
6127 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6131 else if (lex_match_id (lexer, "PCOMPUTE"))
6133 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6136 else if (lex_match_id (lexer, "PPROPERTIES"))
6138 if (!ctables_parse_pproperties (lexer, ct))
6141 else if (lex_match_id (lexer, "WEIGHT"))
6143 if (!lex_force_match_id (lexer, "VARIABLE"))
6145 lex_match (lexer, T_EQUALS);
6146 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6150 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6152 if (lex_match_id (lexer, "COUNT"))
6154 lex_match (lexer, T_EQUALS);
6155 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6158 ct->hide_threshold = lex_integer (lexer);
6161 else if (ct->hide_threshold == 0)
6162 ct->hide_threshold = 5;
6166 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6167 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6168 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6172 if (!lex_force_match (lexer, T_SLASH))
6176 size_t allocated_tables = 0;
6179 if (ct->n_tables >= allocated_tables)
6180 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6181 sizeof *ct->tables);
6183 struct ctables_category *cat = xmalloc (sizeof *cat);
6184 *cat = (struct ctables_category) {
6186 .include_missing = false,
6187 .sort_ascending = true,
6190 struct ctables_categories *c = xmalloc (sizeof *c);
6191 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6192 *c = (struct ctables_categories) {
6199 struct ctables_categories **categories = xnmalloc (n_vars,
6200 sizeof *categories);
6201 for (size_t i = 0; i < n_vars; i++)
6204 struct ctables_table *t = xmalloc (sizeof *t);
6205 *t = (struct ctables_table) {
6207 .slabels_axis = PIVOT_AXIS_COLUMN,
6208 .slabels_visible = true,
6209 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6211 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6212 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6213 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6215 .clabels_from_axis = PIVOT_AXIS_LAYER,
6216 .clabels_to_axis = PIVOT_AXIS_LAYER,
6217 .categories = categories,
6218 .n_categories = n_vars,
6221 ct->tables[ct->n_tables++] = t;
6223 lex_match (lexer, T_EQUALS);
6224 int expr_start = lex_ofs (lexer);
6225 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6226 &t->axes[PIVOT_AXIS_ROW]))
6228 if (lex_match (lexer, T_BY))
6230 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6231 &t->axes[PIVOT_AXIS_COLUMN]))
6234 if (lex_match (lexer, T_BY))
6236 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6237 &t->axes[PIVOT_AXIS_LAYER]))
6241 int expr_end = lex_ofs (lexer);
6243 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6244 && !t->axes[PIVOT_AXIS_LAYER])
6246 lex_error (lexer, _("At least one variable must be specified."));
6250 const struct ctables_axis *scales[PIVOT_N_AXES];
6251 size_t n_scales = 0;
6252 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6254 scales[a] = find_scale (t->axes[a]);
6260 msg (SE, _("Scale variables may appear only on one axis."));
6261 if (scales[PIVOT_AXIS_ROW])
6262 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6263 _("This scale variable appears on the rows axis."));
6264 if (scales[PIVOT_AXIS_COLUMN])
6265 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6266 _("This scale variable appears on the columns axis."));
6267 if (scales[PIVOT_AXIS_LAYER])
6268 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6269 _("This scale variable appears on the layer axis."));
6273 const struct ctables_axis *summaries[PIVOT_N_AXES];
6274 size_t n_summaries = 0;
6275 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6277 summaries[a] = (scales[a]
6279 : find_categorical_summary_spec (t->axes[a]));
6283 if (n_summaries > 1)
6285 msg (SE, _("Summaries may appear only on one axis."));
6286 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6289 msg_at (SN, summaries[a]->loc,
6291 ? _("This variable on the rows axis has a summary.")
6292 : a == PIVOT_AXIS_COLUMN
6293 ? _("This variable on the columns axis has a summary.")
6294 : _("This variable on the layers axis has a summary."));
6296 msg_at (SN, summaries[a]->loc,
6297 _("This is a scale variable, so it always has a "
6298 "summary even if the syntax does not explicitly "
6303 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6304 if (n_summaries ? summaries[a] : t->axes[a])
6306 t->summary_axis = a;
6310 if (lex_token (lexer) == T_ENDCMD)
6312 if (!ctables_prepare_table (t))
6316 if (!lex_force_match (lexer, T_SLASH))
6319 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6321 if (lex_match_id (lexer, "SLABELS"))
6323 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6325 if (lex_match_id (lexer, "POSITION"))
6327 lex_match (lexer, T_EQUALS);
6328 if (lex_match_id (lexer, "COLUMN"))
6329 t->slabels_axis = PIVOT_AXIS_COLUMN;
6330 else if (lex_match_id (lexer, "ROW"))
6331 t->slabels_axis = PIVOT_AXIS_ROW;
6332 else if (lex_match_id (lexer, "LAYER"))
6333 t->slabels_axis = PIVOT_AXIS_LAYER;
6336 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6340 else if (lex_match_id (lexer, "VISIBLE"))
6342 lex_match (lexer, T_EQUALS);
6343 if (!parse_bool (lexer, &t->slabels_visible))
6348 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6353 else if (lex_match_id (lexer, "CLABELS"))
6355 if (lex_match_id (lexer, "AUTO"))
6357 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6358 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6360 else if (lex_match_id (lexer, "ROWLABELS"))
6362 lex_match (lexer, T_EQUALS);
6363 if (lex_match_id (lexer, "OPPOSITE"))
6364 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6365 else if (lex_match_id (lexer, "LAYER"))
6366 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6369 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6373 else if (lex_match_id (lexer, "COLLABELS"))
6375 lex_match (lexer, T_EQUALS);
6376 if (lex_match_id (lexer, "OPPOSITE"))
6377 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6378 else if (lex_match_id (lexer, "LAYER"))
6379 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6382 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6388 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6393 else if (lex_match_id (lexer, "CRITERIA"))
6395 if (!lex_force_match_id (lexer, "CILEVEL"))
6397 lex_match (lexer, T_EQUALS);
6399 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6401 t->cilevel = lex_number (lexer);
6404 else if (lex_match_id (lexer, "CATEGORIES"))
6406 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6410 else if (lex_match_id (lexer, "TITLES"))
6415 if (lex_match_id (lexer, "CAPTION"))
6416 textp = &t->caption;
6417 else if (lex_match_id (lexer, "CORNER"))
6419 else if (lex_match_id (lexer, "TITLE"))
6423 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6426 lex_match (lexer, T_EQUALS);
6428 struct string s = DS_EMPTY_INITIALIZER;
6429 while (lex_is_string (lexer))
6431 if (!ds_is_empty (&s))
6432 ds_put_byte (&s, ' ');
6433 put_title_text (&s, lex_tokss (lexer), now,
6434 lexer, dataset_dict (ds),
6435 expr_start, expr_end);
6439 *textp = ds_steal_cstr (&s);
6441 while (lex_token (lexer) != T_SLASH
6442 && lex_token (lexer) != T_ENDCMD);
6444 else if (lex_match_id (lexer, "SIGTEST"))
6446 int start_ofs = lex_ofs (lexer) - 1;
6449 t->chisq = xmalloc (sizeof *t->chisq);
6450 *t->chisq = (struct ctables_chisq) {
6452 .include_mrsets = true,
6453 .all_visible = true,
6459 if (lex_match_id (lexer, "TYPE"))
6461 lex_match (lexer, T_EQUALS);
6462 if (!lex_force_match_id (lexer, "CHISQUARE"))
6465 else if (lex_match_id (lexer, "ALPHA"))
6467 lex_match (lexer, T_EQUALS);
6468 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6470 t->chisq->alpha = lex_number (lexer);
6473 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6475 lex_match (lexer, T_EQUALS);
6476 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6479 else if (lex_match_id (lexer, "CATEGORIES"))
6481 lex_match (lexer, T_EQUALS);
6482 if (lex_match_id (lexer, "ALLVISIBLE"))
6483 t->chisq->all_visible = true;
6484 else if (lex_match_id (lexer, "SUBTOTALS"))
6485 t->chisq->all_visible = false;
6488 lex_error_expecting (lexer,
6489 "ALLVISIBLE", "SUBTOTALS");
6495 lex_error_expecting (lexer, "TYPE", "ALPHA",
6496 "INCLUDEMRSETS", "CATEGORIES");
6500 while (lex_token (lexer) != T_SLASH
6501 && lex_token (lexer) != T_ENDCMD);
6503 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6504 _("Support for SIGTEST not yet implemented."));
6507 else if (lex_match_id (lexer, "COMPARETEST"))
6509 int start_ofs = lex_ofs (lexer);
6512 t->pairwise = xmalloc (sizeof *t->pairwise);
6513 *t->pairwise = (struct ctables_pairwise) {
6515 .alpha = { .05, .05 },
6516 .adjust = BONFERRONI,
6517 .include_mrsets = true,
6518 .meansvariance_allcats = true,
6519 .all_visible = true,
6528 if (lex_match_id (lexer, "TYPE"))
6530 lex_match (lexer, T_EQUALS);
6531 if (lex_match_id (lexer, "PROP"))
6532 t->pairwise->type = PROP;
6533 else if (lex_match_id (lexer, "MEAN"))
6534 t->pairwise->type = MEAN;
6537 lex_error_expecting (lexer, "PROP", "MEAN");
6541 else if (lex_match_id (lexer, "ALPHA"))
6543 lex_match (lexer, T_EQUALS);
6545 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6547 double a0 = lex_number (lexer);
6550 lex_match (lexer, T_COMMA);
6551 if (lex_is_number (lexer))
6553 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6555 double a1 = lex_number (lexer);
6558 t->pairwise->alpha[0] = MIN (a0, a1);
6559 t->pairwise->alpha[1] = MAX (a0, a1);
6562 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6564 else if (lex_match_id (lexer, "ADJUST"))
6566 lex_match (lexer, T_EQUALS);
6567 if (lex_match_id (lexer, "BONFERRONI"))
6568 t->pairwise->adjust = BONFERRONI;
6569 else if (lex_match_id (lexer, "BH"))
6570 t->pairwise->adjust = BH;
6571 else if (lex_match_id (lexer, "NONE"))
6572 t->pairwise->adjust = 0;
6575 lex_error_expecting (lexer, "BONFERRONI", "BH",
6580 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6582 lex_match (lexer, T_EQUALS);
6583 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6586 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6588 lex_match (lexer, T_EQUALS);
6589 if (lex_match_id (lexer, "ALLCATS"))
6590 t->pairwise->meansvariance_allcats = true;
6591 else if (lex_match_id (lexer, "TESTEDCATS"))
6592 t->pairwise->meansvariance_allcats = false;
6595 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6599 else if (lex_match_id (lexer, "CATEGORIES"))
6601 lex_match (lexer, T_EQUALS);
6602 if (lex_match_id (lexer, "ALLVISIBLE"))
6603 t->pairwise->all_visible = true;
6604 else if (lex_match_id (lexer, "SUBTOTALS"))
6605 t->pairwise->all_visible = false;
6608 lex_error_expecting (lexer, "ALLVISIBLE",
6613 else if (lex_match_id (lexer, "MERGE"))
6615 lex_match (lexer, T_EQUALS);
6616 if (!parse_bool (lexer, &t->pairwise->merge))
6619 else if (lex_match_id (lexer, "STYLE"))
6621 lex_match (lexer, T_EQUALS);
6622 if (lex_match_id (lexer, "APA"))
6623 t->pairwise->apa_style = true;
6624 else if (lex_match_id (lexer, "SIMPLE"))
6625 t->pairwise->apa_style = false;
6628 lex_error_expecting (lexer, "APA", "SIMPLE");
6632 else if (lex_match_id (lexer, "SHOWSIG"))
6634 lex_match (lexer, T_EQUALS);
6635 if (!parse_bool (lexer, &t->pairwise->show_sig))
6640 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6641 "INCLUDEMRSETS", "MEANSVARIANCE",
6642 "CATEGORIES", "MERGE", "STYLE",
6647 while (lex_token (lexer) != T_SLASH
6648 && lex_token (lexer) != T_ENDCMD);
6650 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6651 _("Support for COMPARETEST not yet implemented."));
6656 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6657 "CRITERIA", "CATEGORIES", "TITLES",
6658 "SIGTEST", "COMPARETEST");
6662 if (!lex_match (lexer, T_SLASH))
6666 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6668 t->clabels_from_axis = PIVOT_AXIS_ROW;
6669 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6671 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6675 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6676 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6677 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6679 if (!ctables_prepare_table (t))
6682 while (lex_token (lexer) != T_ENDCMD);
6685 input = proc_open (ds);
6686 bool ok = ctables_execute (ds, input, ct);
6687 ok = proc_commit (ds) && ok;
6689 ctables_destroy (ct);
6690 return ok ? CMD_SUCCESS : CMD_FAILURE;
6695 ctables_destroy (ct);