1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
58 enum ctables_weighting
66 /* CTABLES table areas. */
68 enum ctables_area_type
70 /* Within a section, where stacked variables divide one section from
73 Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that
74 parse_ctables_summary_function() parses correctly. */
75 CTAT_TABLE, /* All layers of a whole section. */
76 CTAT_LAYERROW, /* Row in one layer within a section. */
77 CTAT_LAYERCOL, /* Column in one layer within a section. */
78 CTAT_LAYER, /* One layer within a section. */
80 /* Within a subtable, where a subtable pairs an innermost row variable with
81 an innermost column variable within a single layer. */
82 CTAT_SUBTABLE, /* Whole subtable. */
83 CTAT_ROW, /* Row within a subtable. */
84 CTAT_COL, /* Column within a subtable. */
88 static const char *ctables_area_type_name[N_CTATS] = {
89 [CTAT_TABLE] = "TABLE",
90 [CTAT_LAYER] = "LAYER",
91 [CTAT_LAYERROW] = "LAYERROW",
92 [CTAT_LAYERCOL] = "LAYERCOL",
93 [CTAT_SUBTABLE] = "SUBTABLE",
100 struct hmap_node node;
102 const struct ctables_cell *example;
105 double count[N_CTWS];
106 double valid[N_CTWS];
107 double total[N_CTWS];
108 struct ctables_sum *sums;
116 /* CTABLES summary functions. */
118 enum ctables_function_type
120 /* A function that operates on data in a single cell. It operates on
121 effective weights. It does not have an unweighted version. */
124 /* A function that operates on data in a single cell. The function
125 operates on effective weights and has a U-prefixed unweighted
129 /* A function that operates on data in a single cell. It operates on
130 dictionary weights, and has U-prefixed unweighted version and an
131 E-prefixed effective weight version. */
134 /* A function that operates on an area of cells. It operates on effective
135 weights and has a U-prefixed unweighted version. */
146 enum ctables_function_availability
148 CTFA_ALL, /* Any variables. */
149 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
150 //CTFA_MRSETS, /* Only multiple-response sets */
153 enum ctables_summary_function
155 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM,
156 #include "ctables.inc"
161 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1
163 #include "ctables.inc"
167 struct ctables_function_info
169 struct substring basename;
170 enum ctables_function_type type;
171 enum ctables_format format;
172 enum ctables_function_availability availability;
174 bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */
175 bool e_prefix; /* Accepts an 'E' prefix (for effective)? */
176 bool is_area; /* Needs an area prefix. */
178 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
179 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \
181 .basename = SS_LITERAL_INITIALIZER (NAME), \
184 .availability = AVAILABILITY, \
185 .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
186 .e_prefix = (TYPE) == CTFT_UECELL, \
187 .is_area = (TYPE) == CTFT_AREA \
189 #include "ctables.inc"
193 static struct fmt_spec
194 ctables_summary_default_format (enum ctables_summary_function function,
195 const struct variable *var)
197 static const enum ctables_format default_formats[] = {
198 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
199 #include "ctables.inc"
202 switch (default_formats[function])
205 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
208 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
211 return *var_get_print_format (var);
218 static enum ctables_function_availability
219 ctables_function_availability (enum ctables_summary_function f)
221 static enum ctables_function_availability availability[] = {
222 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
223 #include "ctables.inc"
227 return availability[f];
231 parse_ctables_summary_function (struct lexer *lexer,
232 enum ctables_summary_function *function,
233 enum ctables_weighting *weighting,
234 enum ctables_area_type *area)
236 if (!lex_force_id (lexer))
239 struct substring name = lex_tokss (lexer);
240 if (ss_ends_with_case (name, ss_cstr (".LCL"))
241 || ss_ends_with_case (name, ss_cstr (".UCL"))
242 || ss_ends_with_case (name, ss_cstr (".SE")))
244 lex_error (lexer, _("Support for LCL, UCL, and SE summary functions "
245 "is not yet implemented."));
249 bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
250 bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
252 bool has_area = false;
254 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
255 if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at])))
260 if (ss_equals_case (name, ss_cstr ("PCT")))
262 /* Special case where .COUNT suffix is omitted. */
263 *function = CTSF_areaPCT_COUNT;
264 *weighting = CTW_EFFECTIVE;
271 for (int f = 0; f < N_CTSF_FUNCTIONS; f++)
273 const struct ctables_function_info *cfi = &ctables_function_info[f];
274 if (ss_equals_case (cfi->basename, name))
277 if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
280 *weighting = (e ? CTW_EFFECTIVE
282 : cfi->e_prefix ? CTW_DICTIONARY
289 lex_error (lexer, _("Expecting summary function name."));
294 ctables_summary_function_name (enum ctables_summary_function function,
295 enum ctables_weighting weighting,
296 enum ctables_area_type area,
297 char *buffer, size_t bufsize)
299 const struct ctables_function_info *cfi = &ctables_function_info[function];
300 snprintf (buffer, bufsize, "%s%s%s",
301 (weighting == CTW_UNWEIGHTED ? "U"
302 : weighting == CTW_DICTIONARY ? ""
303 : cfi->e_prefix ? "E"
305 cfi->is_area ? ctables_area_type_name[area] : "",
306 cfi->basename.string);
311 ctables_summary_function_label__ (enum ctables_summary_function function,
312 enum ctables_weighting weighting,
313 enum ctables_area_type area)
315 bool w = weighting != CTW_UNWEIGHTED;
316 bool d = weighting == CTW_DICTIONARY;
317 enum ctables_area_type a = area;
321 return (d ? N_("Count")
322 : w ? N_("Adjusted Count")
323 : N_("Unweighted Count"));
325 case CTSF_areaPCT_COUNT:
328 case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %");
329 case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %");
330 case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %");
331 case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %");
332 case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %");
333 case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %");
334 case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %");
338 case CTSF_areaPCT_VALIDN:
341 case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %");
342 case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %");
343 case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %");
344 case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %");
345 case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %");
346 case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %");
347 case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %");
351 case CTSF_areaPCT_TOTALN:
354 case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %");
355 case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %");
356 case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %");
357 case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %");
358 case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %");
359 case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %");
360 case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %");
364 case CTSF_MAXIMUM: return N_("Maximum");
365 case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
366 case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
367 case CTSF_MINIMUM: return N_("Minimum");
368 case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
369 case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
370 case CTSF_PTILE: NOT_REACHED ();
371 case CTSF_RANGE: return N_("Range");
372 case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
373 case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
374 case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
375 case CTSF_TOTALN: return (d ? N_("Total N")
376 : w ? N_("Adjusted Total N")
377 : N_("Unweighted Total N"));
378 case CTSF_VALIDN: return (d ? N_("Valid N")
379 : w ? N_("Adjusted Valid N")
380 : N_("Unweighted Valid N"));
381 case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
382 case CTSF_areaPCT_SUM:
385 case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %");
386 case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %");
387 case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %");
388 case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %");
389 case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %");
390 case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %");
391 case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %");
398 /* Don't bother translating these: they are for developers only. */
399 case CTAT_TABLE: return "Table ID";
400 case CTAT_LAYER: return "Layer ID";
401 case CTAT_LAYERROW: return "Layer Row ID";
402 case CTAT_LAYERCOL: return "Layer Column ID";
403 case CTAT_SUBTABLE: return "Subtable ID";
404 case CTAT_ROW: return "Row ID";
405 case CTAT_COL: return "Column ID";
413 static struct pivot_value *
414 ctables_summary_function_label (enum ctables_summary_function function,
415 enum ctables_weighting weighting,
416 enum ctables_area_type area,
419 if (function == CTSF_PTILE)
421 char *s = (weighting != CTW_UNWEIGHTED
422 ? xasprintf (_("Percentile %.2f"), percentile)
423 : xasprintf (_("Unweighted Percentile %.2f"), percentile));
424 return pivot_value_new_user_text_nocopy (s);
427 return pivot_value_new_text (ctables_summary_function_label__ (
428 function, weighting, area));
431 /* CTABLES summaries. */
433 struct ctables_summary_spec
435 /* The calculation to be performed.
437 'function' is the function to calculate. 'weighted' specifies whether
438 to use weighted or unweighted data (for functions that do not support a
439 choice, it must be true). 'calc_area' is the area over which the
440 calculation takes place (for functions that target only an individual
441 cell, it must be 0). For CTSF_PTILE only, 'percentile' is the
442 percentile between 0 and 100 (for other functions it must be 0). */
443 enum ctables_summary_function function;
444 enum ctables_weighting weighting;
445 enum ctables_area_type calc_area;
446 double percentile; /* CTSF_PTILE only. */
448 /* How to display the result of the calculation.
450 'label' is a user-specified label, NULL if the user didn't specify
453 'user_area' is usually the same as 'calc_area', but when category labels
454 are rotated from one axis to another it swaps rows and columns.
456 'format' is the format for displaying the output. If
457 'is_ctables_format' is true, then 'format.type' is one of the special
458 CTEF_* formats instead of the standard ones. */
460 enum ctables_area_type user_area;
461 struct fmt_spec format;
462 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
469 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
470 const struct ctables_summary_spec *src)
473 dst->label = xstrdup_if_nonnull (src->label);
477 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
483 /* Collections of summary functions. */
485 struct ctables_summary_spec_set
487 struct ctables_summary_spec *specs;
491 /* The variable to which the summary specs are applied. */
492 struct variable *var;
494 /* Whether the variable to which the summary specs are applied is a scale
495 variable for the purpose of summarization.
497 (VALIDN and TOTALN act differently for summarizing scale and categorical
501 /* If any of these optional additional scale variables are missing, then
502 treat 'var' as if it's missing too. This is for implementing
503 SMISSING=LISTWISE. */
504 struct variable **listwise_vars;
505 size_t n_listwise_vars;
509 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
510 const struct ctables_summary_spec_set *src)
512 struct ctables_summary_spec *specs
513 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
514 for (size_t i = 0; i < src->n; i++)
515 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
517 *dst = (struct ctables_summary_spec_set) {
522 .is_scale = src->is_scale,
527 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
529 for (size_t i = 0; i < set->n; i++)
530 ctables_summary_spec_uninit (&set->specs[i]);
531 free (set->listwise_vars);
536 is_listwise_missing (const struct ctables_summary_spec_set *specs,
537 const struct ccase *c)
539 for (size_t i = 0; i < specs->n_listwise_vars; i++)
541 const struct variable *var = specs->listwise_vars[i];
542 if (var_is_num_missing (var, case_num (c, var)))
549 /* CTABLES postcompute expressions. */
551 struct ctables_postcompute
553 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
554 char *name; /* Name, without leading &. */
556 struct msg_location *location; /* Location of definition. */
557 struct ctables_pcexpr *expr;
559 struct ctables_summary_spec_set *specs;
560 bool hide_source_cats;
563 struct ctables_pcexpr
573 enum ctables_pcexpr_op
576 CTPO_CONSTANT, /* 5 */
577 CTPO_CAT_NUMBER, /* [5] */
578 CTPO_CAT_STRING, /* ["STRING"] */
579 CTPO_CAT_NRANGE, /* [LO THRU 5] */
580 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
581 CTPO_CAT_MISSING, /* MISSING */
582 CTPO_CAT_OTHERNM, /* OTHERNM */
583 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
584 CTPO_CAT_TOTAL, /* TOTAL */
598 /* CTPO_CAT_NUMBER. */
601 /* CTPO_CAT_STRING, in dictionary encoding. */
602 struct substring string;
604 /* CTPO_CAT_NRANGE. */
607 /* CTPO_CAT_SRANGE. */
608 struct substring srange[2];
610 /* CTPO_CAT_SUBTOTAL. */
611 size_t subtotal_index;
613 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
614 One element: CTPO_NEG. */
615 struct ctables_pcexpr *subs[2];
618 /* Source location. */
619 struct msg_location *location;
623 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
626 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
627 enum ctables_pcexpr_op, struct ctables_pcexpr *sub0,
628 struct ctables_pcexpr *sub1);
630 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
631 struct dictionary *);
634 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
640 case CTPO_CAT_STRING:
641 ss_dealloc (&e->string);
644 case CTPO_CAT_SRANGE:
645 for (size_t i = 0; i < 2; i++)
646 ss_dealloc (&e->srange[i]);
655 for (size_t i = 0; i < 2; i++)
656 ctables_pcexpr_destroy (e->subs[i]);
660 case CTPO_CAT_NUMBER:
661 case CTPO_CAT_NRANGE:
662 case CTPO_CAT_MISSING:
663 case CTPO_CAT_OTHERNM:
664 case CTPO_CAT_SUBTOTAL:
669 msg_location_destroy (e->location);
674 static struct ctables_pcexpr *
675 ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op,
676 struct ctables_pcexpr *sub0,
677 struct ctables_pcexpr *sub1)
679 struct ctables_pcexpr *e = xmalloc (sizeof *e);
680 *e = (struct ctables_pcexpr) {
682 .subs = { sub0, sub1 },
683 .location = msg_location_merged (sub0->location, sub1->location),
688 /* How to parse an operator. */
691 enum token_type token;
692 enum ctables_pcexpr_op op;
695 static const struct operator *
696 ctables_pcexpr_match_operator (struct lexer *lexer,
697 const struct operator ops[], size_t n_ops)
699 for (const struct operator *op = ops; op < ops + n_ops; op++)
700 if (lex_token (lexer) == op->token)
702 if (op->token != T_NEG_NUM)
711 static struct ctables_pcexpr *
712 ctables_pcexpr_parse_binary_operators__ (
713 struct lexer *lexer, struct dictionary *dict,
714 const struct operator ops[], size_t n_ops,
715 parse_recursively_func *parse_next_level,
716 const char *chain_warning, struct ctables_pcexpr *lhs)
718 for (int op_count = 0; ; op_count++)
720 const struct operator *op
721 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
724 if (op_count > 1 && chain_warning)
725 msg_at (SW, lhs->location, "%s", chain_warning);
730 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
733 ctables_pcexpr_destroy (lhs);
737 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
741 static struct ctables_pcexpr *
742 ctables_pcexpr_parse_binary_operators (
743 struct lexer *lexer, struct dictionary *dict,
744 const struct operator ops[], size_t n_ops,
745 parse_recursively_func *parse_next_level, const char *chain_warning)
747 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
751 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
756 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
757 struct dictionary *);
759 static struct ctables_pcexpr
760 ctpo_cat_nrange (double low, double high)
762 return (struct ctables_pcexpr) {
763 .op = CTPO_CAT_NRANGE,
764 .nrange = { low, high },
768 static struct ctables_pcexpr
769 ctpo_cat_srange (struct substring low, struct substring high)
771 return (struct ctables_pcexpr) {
772 .op = CTPO_CAT_SRANGE,
773 .srange = { low, high },
777 static struct substring
778 parse_substring (struct lexer *lexer, struct dictionary *dict)
780 struct substring s = recode_substring_pool (
781 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
782 ss_rtrim (&s, ss_cstr (" "));
787 static struct ctables_pcexpr *
788 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
790 int start_ofs = lex_ofs (lexer);
791 struct ctables_pcexpr e;
792 if (lex_is_number (lexer))
794 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
795 .number = lex_number (lexer) };
798 else if (lex_match_id (lexer, "MISSING"))
799 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
800 else if (lex_match_id (lexer, "OTHERNM"))
801 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
802 else if (lex_match_id (lexer, "TOTAL"))
803 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
804 else if (lex_match_id (lexer, "SUBTOTAL"))
806 size_t subtotal_index = 0;
807 if (lex_match (lexer, T_LBRACK))
809 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
811 subtotal_index = lex_integer (lexer);
813 if (!lex_force_match (lexer, T_RBRACK))
816 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
817 .subtotal_index = subtotal_index };
819 else if (lex_match (lexer, T_LBRACK))
821 if (lex_match_id (lexer, "LO"))
823 if (!lex_force_match_id (lexer, "THRU"))
826 if (lex_is_string (lexer))
828 struct substring low = { .string = NULL };
829 struct substring high = parse_substring (lexer, dict);
830 e = ctpo_cat_srange (low, high);
834 if (!lex_force_num (lexer))
836 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
840 else if (lex_is_number (lexer))
842 double number = lex_number (lexer);
844 if (lex_match_id (lexer, "THRU"))
846 if (lex_match_id (lexer, "HI"))
847 e = ctpo_cat_nrange (number, DBL_MAX);
850 if (!lex_force_num (lexer))
852 e = ctpo_cat_nrange (number, lex_number (lexer));
857 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
860 else if (lex_is_string (lexer))
862 struct substring s = parse_substring (lexer, dict);
864 if (lex_match_id (lexer, "THRU"))
866 struct substring high;
868 if (lex_match_id (lexer, "HI"))
869 high = (struct substring) { .string = NULL };
872 if (!lex_force_string (lexer))
877 high = parse_substring (lexer, dict);
880 e = ctpo_cat_srange (s, high);
883 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
887 lex_error (lexer, NULL);
891 if (!lex_force_match (lexer, T_RBRACK))
893 if (e.op == CTPO_CAT_STRING)
894 ss_dealloc (&e.string);
895 else if (e.op == CTPO_CAT_SRANGE)
897 ss_dealloc (&e.srange[0]);
898 ss_dealloc (&e.srange[1]);
903 else if (lex_match (lexer, T_LPAREN))
905 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
908 if (!lex_force_match (lexer, T_RPAREN))
910 ctables_pcexpr_destroy (ep);
917 lex_error (lexer, NULL);
921 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
922 return xmemdup (&e, sizeof e);
925 static struct ctables_pcexpr *
926 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
927 struct lexer *lexer, int start_ofs)
929 struct ctables_pcexpr *e = xmalloc (sizeof *e);
930 *e = (struct ctables_pcexpr) {
933 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
938 static struct ctables_pcexpr *
939 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
941 static const struct operator op = { T_EXP, CTPO_POW };
943 const char *chain_warning =
944 _("The exponentiation operator (`**') is left-associative: "
945 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
946 "To disable this warning, insert parentheses.");
948 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
949 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
950 ctables_pcexpr_parse_primary,
953 /* Special case for situations like "-5**6", which must be parsed as
956 int start_ofs = lex_ofs (lexer);
957 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
958 *lhs = (struct ctables_pcexpr) {
960 .number = -lex_tokval (lexer),
961 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
965 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
967 ctables_pcexpr_parse_primary, chain_warning, lhs);
971 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
974 /* Parses the unary minus level. */
975 static struct ctables_pcexpr *
976 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
978 int start_ofs = lex_ofs (lexer);
979 if (!lex_match (lexer, T_DASH))
980 return ctables_pcexpr_parse_exp (lexer, dict);
982 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
986 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
989 /* Parses the multiplication and division level. */
990 static struct ctables_pcexpr *
991 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
993 static const struct operator ops[] =
995 { T_ASTERISK, CTPO_MUL },
996 { T_SLASH, CTPO_DIV },
999 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
1000 sizeof ops / sizeof *ops,
1001 ctables_pcexpr_parse_neg, NULL);
1004 /* Parses the addition and subtraction level. */
1005 static struct ctables_pcexpr *
1006 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
1008 static const struct operator ops[] =
1010 { T_PLUS, CTPO_ADD },
1011 { T_DASH, CTPO_SUB },
1012 { T_NEG_NUM, CTPO_ADD },
1015 return ctables_pcexpr_parse_binary_operators (lexer, dict,
1016 ops, sizeof ops / sizeof *ops,
1017 ctables_pcexpr_parse_mul, NULL);
1020 /* CTABLES axis expressions. */
1022 /* CTABLES has a number of extra formats that we implement via custom
1023 currency specifications on an alternate fmt_settings. */
1024 #define CTEF_NEGPAREN FMT_CCA
1025 #define CTEF_NEQUAL FMT_CCB
1026 #define CTEF_PAREN FMT_CCC
1027 #define CTEF_PCTPAREN FMT_CCD
1029 enum ctables_summary_variant
1038 enum ctables_axis_op
1054 struct variable *var;
1056 struct ctables_summary_spec_set specs[N_CSVS];
1060 struct ctables_axis *subs[2];
1063 struct msg_location *loc;
1067 ctables_axis_destroy (struct ctables_axis *axis)
1075 for (size_t i = 0; i < N_CSVS; i++)
1076 ctables_summary_spec_set_uninit (&axis->specs[i]);
1081 ctables_axis_destroy (axis->subs[0]);
1082 ctables_axis_destroy (axis->subs[1]);
1085 msg_location_destroy (axis->loc);
1089 static struct ctables_axis *
1090 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1091 struct ctables_axis *sub0,
1092 struct ctables_axis *sub1,
1093 struct lexer *lexer, int start_ofs)
1095 struct ctables_axis *axis = xmalloc (sizeof *axis);
1096 *axis = (struct ctables_axis) {
1098 .subs = { sub0, sub1 },
1099 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1104 struct ctables_axis_parse_ctx
1106 struct lexer *lexer;
1107 struct dictionary *dict;
1110 static struct pivot_value *
1111 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1114 return ctables_summary_function_label (spec->function, spec->weighting,
1115 spec->user_area, spec->percentile);
1118 struct substring in = ss_cstr (spec->label);
1119 struct substring target = ss_cstr (")CILEVEL");
1121 struct string out = DS_EMPTY_INITIALIZER;
1124 size_t chunk = ss_find_substring (in, target);
1125 ds_put_substring (&out, ss_head (in, chunk));
1126 ss_advance (&in, chunk);
1128 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1130 ss_advance (&in, target.length);
1131 ds_put_format (&out, "%g", cilevel);
1137 add_summary_spec (struct ctables_axis *axis,
1138 enum ctables_summary_function function,
1139 enum ctables_weighting weighting,
1140 enum ctables_area_type area, double percentile,
1141 const char *label, const struct fmt_spec *format,
1142 bool is_ctables_format, const struct msg_location *loc,
1143 enum ctables_summary_variant sv)
1145 if (axis->op == CTAO_VAR)
1147 char function_name[128];
1148 ctables_summary_function_name (function, weighting, area,
1149 function_name, sizeof function_name);
1150 const char *var_name = var_get_name (axis->var);
1151 switch (ctables_function_availability (function))
1155 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1156 "response sets."), function_name);
1157 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1163 if (!axis->scale && sv != CSV_TOTAL)
1166 _("Summary function %s applies only to scale variables."),
1168 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1178 struct ctables_summary_spec_set *set = &axis->specs[sv];
1179 if (set->n >= set->allocated)
1180 set->specs = x2nrealloc (set->specs, &set->allocated,
1181 sizeof *set->specs);
1183 struct ctables_summary_spec *dst = &set->specs[set->n++];
1184 *dst = (struct ctables_summary_spec) {
1185 .function = function,
1186 .weighting = weighting,
1189 .percentile = percentile,
1190 .label = xstrdup_if_nonnull (label),
1191 .format = (format ? *format
1192 : ctables_summary_default_format (function, axis->var)),
1193 .is_ctables_format = is_ctables_format,
1199 for (size_t i = 0; i < 2; i++)
1200 if (!add_summary_spec (axis->subs[i], function, weighting, area,
1201 percentile, label, format, is_ctables_format,
1208 static struct ctables_axis *ctables_axis_parse_stack (
1209 struct ctables_axis_parse_ctx *);
1211 static struct ctables_axis *
1212 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1214 if (lex_match (ctx->lexer, T_LPAREN))
1216 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1217 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1219 ctables_axis_destroy (sub);
1225 if (!lex_force_id (ctx->lexer))
1228 if (lex_tokcstr (ctx->lexer)[0] == '$')
1230 lex_error (ctx->lexer,
1231 _("Multiple response set support not implemented."));
1235 int start_ofs = lex_ofs (ctx->lexer);
1236 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1240 struct ctables_axis *axis = xmalloc (sizeof *axis);
1241 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1243 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1244 : lex_match_phrase (ctx->lexer, "[C]") ? false
1245 : var_get_measure (var) == MEASURE_SCALE);
1246 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1247 lex_ofs (ctx->lexer) - 1);
1248 if (axis->scale && var_is_alpha (var))
1250 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1252 var_get_name (var));
1253 ctables_axis_destroy (axis);
1261 has_digit (const char *s)
1263 return s[strcspn (s, "0123456789")] != '\0';
1267 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1268 bool *is_ctables_format)
1270 char type[FMT_TYPE_LEN_MAX + 1];
1271 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1274 if (!strcasecmp (type, "NEGPAREN"))
1275 format->type = CTEF_NEGPAREN;
1276 else if (!strcasecmp (type, "NEQUAL"))
1277 format->type = CTEF_NEQUAL;
1278 else if (!strcasecmp (type, "PAREN"))
1279 format->type = CTEF_PAREN;
1280 else if (!strcasecmp (type, "PCTPAREN"))
1281 format->type = CTEF_PCTPAREN;
1284 *is_ctables_format = false;
1285 return (parse_format_specifier (lexer, format)
1286 && fmt_check_output (format)
1287 && fmt_check_type_compat (format, VAL_NUMERIC));
1293 lex_next_error (lexer, -1, -1,
1294 _("Output format %s requires width 2 or greater."), type);
1297 else if (format->d > format->w - 1)
1299 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1300 "greater than decimals."), type);
1305 *is_ctables_format = true;
1310 static struct ctables_axis *
1311 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1313 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1314 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1317 enum ctables_summary_variant sv = CSV_CELL;
1320 int start_ofs = lex_ofs (ctx->lexer);
1322 /* Parse function. */
1323 enum ctables_summary_function function;
1324 enum ctables_weighting weighting;
1325 enum ctables_area_type area;
1326 if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
1330 /* Parse percentile. */
1331 double percentile = 0;
1332 if (function == CTSF_PTILE)
1334 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1336 percentile = lex_number (ctx->lexer);
1337 lex_get (ctx->lexer);
1342 if (lex_is_string (ctx->lexer))
1344 label = ss_xstrdup (lex_tokss (ctx->lexer));
1345 lex_get (ctx->lexer);
1349 struct fmt_spec format;
1350 const struct fmt_spec *formatp;
1351 bool is_ctables_format = false;
1352 if (lex_token (ctx->lexer) == T_ID
1353 && has_digit (lex_tokcstr (ctx->lexer)))
1355 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1356 &is_ctables_format))
1366 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1367 lex_ofs (ctx->lexer) - 1);
1368 add_summary_spec (sub, function, weighting, area, percentile, label,
1369 formatp, is_ctables_format, loc, sv);
1371 msg_location_destroy (loc);
1373 lex_match (ctx->lexer, T_COMMA);
1374 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1376 if (!lex_force_match (ctx->lexer, T_LBRACK))
1380 else if (lex_match (ctx->lexer, T_RBRACK))
1382 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1389 ctables_axis_destroy (sub);
1393 static const struct ctables_axis *
1394 find_scale (const struct ctables_axis *axis)
1398 else if (axis->op == CTAO_VAR)
1399 return axis->scale ? axis : NULL;
1402 for (size_t i = 0; i < 2; i++)
1404 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1412 static const struct ctables_axis *
1413 find_categorical_summary_spec (const struct ctables_axis *axis)
1417 else if (axis->op == CTAO_VAR)
1418 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1421 for (size_t i = 0; i < 2; i++)
1423 const struct ctables_axis *sum
1424 = find_categorical_summary_spec (axis->subs[i]);
1432 static struct ctables_axis *
1433 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1435 int start_ofs = lex_ofs (ctx->lexer);
1436 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1440 while (lex_match (ctx->lexer, T_GT))
1442 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1445 ctables_axis_destroy (lhs);
1449 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1450 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1452 const struct ctables_axis *outer_scale = find_scale (lhs);
1453 const struct ctables_axis *inner_scale = find_scale (rhs);
1454 if (outer_scale && inner_scale)
1456 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1457 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1458 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1459 ctables_axis_destroy (nest);
1463 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1466 msg_at (SE, nest->loc,
1467 _("Summaries may only be requested for categorical variables "
1468 "at the innermost nesting level."));
1469 msg_at (SN, outer_sum->loc,
1470 _("This outer categorical variable has a summary."));
1471 ctables_axis_destroy (nest);
1481 static struct ctables_axis *
1482 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1484 int start_ofs = lex_ofs (ctx->lexer);
1485 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1489 while (lex_match (ctx->lexer, T_PLUS))
1491 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1494 ctables_axis_destroy (lhs);
1498 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1499 ctx->lexer, start_ofs);
1506 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1507 struct ctables_axis **axisp)
1510 if (lex_token (lexer) == T_BY
1511 || lex_token (lexer) == T_SLASH
1512 || lex_token (lexer) == T_ENDCMD)
1515 struct ctables_axis_parse_ctx ctx = {
1519 *axisp = ctables_axis_parse_stack (&ctx);
1523 /* CTABLES categories. */
1525 struct ctables_categories
1528 struct ctables_category *cats;
1533 struct ctables_category
1535 enum ctables_category_type
1537 /* Explicit category lists. */
1540 CCT_NRANGE, /* Numerical range. */
1541 CCT_SRANGE, /* String range. */
1546 /* Totals and subtotals. */
1550 /* Implicit category lists. */
1555 /* For contributing to TOTALN. */
1556 CCT_EXCLUDED_MISSING,
1560 struct ctables_category *subtotal;
1566 double number; /* CCT_NUMBER. */
1567 struct substring string; /* CCT_STRING, in dictionary encoding. */
1568 double nrange[2]; /* CCT_NRANGE. */
1569 struct substring srange[2]; /* CCT_SRANGE. */
1573 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
1574 bool hide_subcategories; /* CCT_SUBTOTAL. */
1577 /* CCT_POSTCOMPUTE. */
1580 const struct ctables_postcompute *pc;
1581 enum fmt_type parse_format;
1584 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
1587 bool include_missing;
1588 bool sort_ascending;
1591 enum ctables_summary_function sort_function;
1592 enum ctables_weighting weighting;
1593 enum ctables_area_type area;
1594 struct variable *sort_var;
1599 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
1600 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
1601 struct msg_location *location;
1605 ctables_category_uninit (struct ctables_category *cat)
1610 msg_location_destroy (cat->location);
1617 case CCT_POSTCOMPUTE:
1621 ss_dealloc (&cat->string);
1625 ss_dealloc (&cat->srange[0]);
1626 ss_dealloc (&cat->srange[1]);
1631 free (cat->total_label);
1639 case CCT_EXCLUDED_MISSING:
1645 nullable_substring_equal (const struct substring *a,
1646 const struct substring *b)
1648 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
1652 ctables_category_equal (const struct ctables_category *a,
1653 const struct ctables_category *b)
1655 if (a->type != b->type)
1661 return a->number == b->number;
1664 return ss_equals (a->string, b->string);
1667 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
1670 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
1671 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
1677 case CCT_POSTCOMPUTE:
1678 return a->pc == b->pc;
1682 return !strcmp (a->total_label, b->total_label);
1687 return (a->include_missing == b->include_missing
1688 && a->sort_ascending == b->sort_ascending
1689 && a->sort_function == b->sort_function
1690 && a->sort_var == b->sort_var
1691 && a->percentile == b->percentile);
1693 case CCT_EXCLUDED_MISSING:
1701 ctables_categories_unref (struct ctables_categories *c)
1706 assert (c->n_refs > 0);
1710 for (size_t i = 0; i < c->n_cats; i++)
1711 ctables_category_uninit (&c->cats[i]);
1717 ctables_categories_equal (const struct ctables_categories *a,
1718 const struct ctables_categories *b)
1720 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
1723 for (size_t i = 0; i < a->n_cats; i++)
1724 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
1730 static struct ctables_category
1731 cct_nrange (double low, double high)
1733 return (struct ctables_category) {
1735 .nrange = { low, high }
1739 static struct ctables_category
1740 cct_srange (struct substring low, struct substring high)
1742 return (struct ctables_category) {
1744 .srange = { low, high }
1749 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1750 struct ctables_category *cat)
1753 if (lex_match (lexer, T_EQUALS))
1755 if (!lex_force_string (lexer))
1758 total_label = ss_xstrdup (lex_tokss (lexer));
1762 total_label = xstrdup (_("Subtotal"));
1764 *cat = (struct ctables_category) {
1765 .type = CCT_SUBTOTAL,
1766 .hide_subcategories = hide_subcategories,
1767 .total_label = total_label
1773 ctables_table_parse_explicit_category (struct lexer *lexer,
1774 struct dictionary *dict,
1776 struct ctables_category *cat)
1778 if (lex_match_id (lexer, "OTHERNM"))
1779 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1780 else if (lex_match_id (lexer, "MISSING"))
1781 *cat = (struct ctables_category) { .type = CCT_MISSING };
1782 else if (lex_match_id (lexer, "SUBTOTAL"))
1783 return ctables_table_parse_subtotal (lexer, false, cat);
1784 else if (lex_match_id (lexer, "HSUBTOTAL"))
1785 return ctables_table_parse_subtotal (lexer, true, cat);
1786 else if (lex_match_id (lexer, "LO"))
1788 if (!lex_force_match_id (lexer, "THRU"))
1790 if (lex_is_string (lexer))
1792 struct substring sr0 = { .string = NULL };
1793 struct substring sr1 = parse_substring (lexer, dict);
1794 *cat = cct_srange (sr0, sr1);
1796 else if (lex_force_num (lexer))
1798 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1804 else if (lex_is_number (lexer))
1806 double number = lex_number (lexer);
1808 if (lex_match_id (lexer, "THRU"))
1810 if (lex_match_id (lexer, "HI"))
1811 *cat = cct_nrange (number, DBL_MAX);
1814 if (!lex_force_num (lexer))
1816 *cat = cct_nrange (number, lex_number (lexer));
1821 *cat = (struct ctables_category) {
1826 else if (lex_is_string (lexer))
1828 struct substring s = parse_substring (lexer, dict);
1829 if (lex_match_id (lexer, "THRU"))
1831 if (lex_match_id (lexer, "HI"))
1833 struct substring sr1 = { .string = NULL };
1834 *cat = cct_srange (s, sr1);
1838 if (!lex_force_string (lexer))
1843 struct substring sr1 = parse_substring (lexer, dict);
1844 *cat = cct_srange (s, sr1);
1848 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1850 else if (lex_match (lexer, T_AND))
1852 if (!lex_force_id (lexer))
1854 struct ctables_postcompute *pc = ctables_find_postcompute (
1855 ct, lex_tokcstr (lexer));
1858 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1859 msg_at (SE, loc, _("Unknown postcompute &%s."),
1860 lex_tokcstr (lexer));
1861 msg_location_destroy (loc);
1866 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1870 lex_error (lexer, NULL);
1878 parse_category_string (struct msg_location *location,
1879 struct substring s, const struct dictionary *dict,
1880 enum fmt_type format, double *n)
1883 char *error = data_in (s, dict_get_encoding (dict), format,
1884 settings_get_fmt_settings (), &v, 0, NULL);
1887 msg_at (SE, location,
1888 _("Failed to parse category specification as format %s: %s."),
1889 fmt_name (format), error);
1898 static struct ctables_category *
1899 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1900 const struct ctables_pcexpr *e)
1902 struct ctables_category *best = NULL;
1903 size_t n_subtotals = 0;
1904 for (size_t i = 0; i < cats->n_cats; i++)
1906 struct ctables_category *cat = &cats->cats[i];
1909 case CTPO_CAT_NUMBER:
1910 if (cat->type == CCT_NUMBER && cat->number == e->number)
1914 case CTPO_CAT_STRING:
1915 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1919 case CTPO_CAT_NRANGE:
1920 if (cat->type == CCT_NRANGE
1921 && cat->nrange[0] == e->nrange[0]
1922 && cat->nrange[1] == e->nrange[1])
1926 case CTPO_CAT_SRANGE:
1927 if (cat->type == CCT_SRANGE
1928 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1929 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1933 case CTPO_CAT_MISSING:
1934 if (cat->type == CCT_MISSING)
1938 case CTPO_CAT_OTHERNM:
1939 if (cat->type == CCT_OTHERNM)
1943 case CTPO_CAT_SUBTOTAL:
1944 if (cat->type == CCT_SUBTOTAL)
1947 if (e->subtotal_index == n_subtotals)
1949 else if (e->subtotal_index == 0)
1954 case CTPO_CAT_TOTAL:
1955 if (cat->type == CCT_TOTAL)
1969 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1974 static struct ctables_category *
1975 ctables_find_category_for_postcompute (const struct dictionary *dict,
1976 const struct ctables_categories *cats,
1977 enum fmt_type parse_format,
1978 const struct ctables_pcexpr *e)
1980 if (parse_format != FMT_F)
1982 if (e->op == CTPO_CAT_STRING)
1985 if (!parse_category_string (e->location, e->string, dict,
1986 parse_format, &number))
1989 struct ctables_pcexpr e2 = {
1990 .op = CTPO_CAT_NUMBER,
1992 .location = e->location,
1994 return ctables_find_category_for_postcompute__ (cats, &e2);
1996 else if (e->op == CTPO_CAT_SRANGE)
1999 if (!e->srange[0].string)
2000 nrange[0] = -DBL_MAX;
2001 else if (!parse_category_string (e->location, e->srange[0], dict,
2002 parse_format, &nrange[0]))
2005 if (!e->srange[1].string)
2006 nrange[1] = DBL_MAX;
2007 else if (!parse_category_string (e->location, e->srange[1], dict,
2008 parse_format, &nrange[1]))
2011 struct ctables_pcexpr e2 = {
2012 .op = CTPO_CAT_NRANGE,
2013 .nrange = { nrange[0], nrange[1] },
2014 .location = e->location,
2016 return ctables_find_category_for_postcompute__ (cats, &e2);
2019 return ctables_find_category_for_postcompute__ (cats, e);
2022 static struct substring
2023 rtrim_value (const union value *v, const struct variable *var)
2025 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2026 var_get_width (var));
2027 ss_rtrim (&s, ss_cstr (" "));
2032 in_string_range (const union value *v, const struct variable *var,
2033 const struct substring *srange)
2035 struct substring s = rtrim_value (v, var);
2036 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2037 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2040 static const struct ctables_category *
2041 ctables_categories_match (const struct ctables_categories *c,
2042 const union value *v, const struct variable *var)
2044 if (var_is_numeric (var) && v->f == SYSMIS)
2047 const struct ctables_category *othernm = NULL;
2048 for (size_t i = c->n_cats; i-- > 0; )
2050 const struct ctables_category *cat = &c->cats[i];
2054 if (cat->number == v->f)
2059 if (ss_equals (cat->string, rtrim_value (v, var)))
2064 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2065 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2070 if (in_string_range (v, var, cat->srange))
2075 if (var_is_value_missing (var, v))
2079 case CCT_POSTCOMPUTE:
2094 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2097 case CCT_EXCLUDED_MISSING:
2102 return var_is_value_missing (var, v) ? NULL : othernm;
2105 static const struct ctables_category *
2106 ctables_categories_total (const struct ctables_categories *c)
2108 const struct ctables_category *first = &c->cats[0];
2109 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2110 return (first->type == CCT_TOTAL ? first
2111 : last->type == CCT_TOTAL ? last
2116 ctables_category_format_number (double number, const struct variable *var,
2119 struct pivot_value *pv = pivot_value_new_var_value (
2120 var, &(union value) { .f = number });
2121 pivot_value_format (pv, NULL, s);
2122 pivot_value_destroy (pv);
2126 ctables_category_format_string (struct substring string,
2127 const struct variable *var, struct string *out)
2129 int width = var_get_width (var);
2130 char *s = xmalloc (width);
2131 buf_copy_rpad (s, width, string.string, string.length, ' ');
2132 struct pivot_value *pv = pivot_value_new_var_value (
2133 var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
2134 pivot_value_format (pv, NULL, out);
2135 pivot_value_destroy (pv);
2140 ctables_category_format_label (const struct ctables_category *cat,
2141 const struct variable *var,
2147 ctables_category_format_number (cat->number, var, s);
2151 ctables_category_format_string (cat->string, var, s);
2155 ctables_category_format_number (cat->nrange[0], var, s);
2156 ds_put_format (s, " THRU ");
2157 ctables_category_format_number (cat->nrange[1], var, s);
2161 ctables_category_format_string (cat->srange[0], var, s);
2162 ds_put_format (s, " THRU ");
2163 ctables_category_format_string (cat->srange[1], var, s);
2167 ds_put_cstr (s, "MISSING");
2171 ds_put_cstr (s, "OTHERNM");
2174 case CCT_POSTCOMPUTE:
2175 ds_put_format (s, "&%s", cat->pc->name);
2180 ds_put_cstr (s, cat->total_label);
2186 case CCT_EXCLUDED_MISSING:
2193 /* CTABLES variable nesting and stacking. */
2195 /* A nested sequence of variables, e.g. a > b > c. */
2198 struct variable **vars;
2202 size_t *areas[N_CTATS];
2203 size_t n_areas[N_CTATS];
2206 struct ctables_summary_spec_set specs[N_CSVS];
2209 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
2210 struct ctables_stack
2212 struct ctables_nest *nests;
2217 ctables_nest_uninit (struct ctables_nest *nest)
2220 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2221 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2222 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
2223 free (nest->areas[at]);
2227 ctables_stack_uninit (struct ctables_stack *stack)
2231 for (size_t i = 0; i < stack->n; i++)
2232 ctables_nest_uninit (&stack->nests[i]);
2233 free (stack->nests);
2237 static struct ctables_stack
2238 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2245 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2246 for (size_t i = 0; i < s0.n; i++)
2247 for (size_t j = 0; j < s1.n; j++)
2249 const struct ctables_nest *a = &s0.nests[i];
2250 const struct ctables_nest *b = &s1.nests[j];
2252 size_t allocate = a->n + b->n;
2253 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2255 for (size_t k = 0; k < a->n; k++)
2256 vars[n++] = a->vars[k];
2257 for (size_t k = 0; k < b->n; k++)
2258 vars[n++] = b->vars[k];
2259 assert (n == allocate);
2261 const struct ctables_nest *summary_src;
2262 if (!a->specs[CSV_CELL].var)
2264 else if (!b->specs[CSV_CELL].var)
2269 struct ctables_nest *new = &stack.nests[stack.n++];
2270 *new = (struct ctables_nest) {
2272 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2273 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2275 .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
2276 : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
2280 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2281 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2283 ctables_stack_uninit (&s0);
2284 ctables_stack_uninit (&s1);
2288 static struct ctables_stack
2289 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2291 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2292 for (size_t i = 0; i < s0.n; i++)
2293 stack.nests[stack.n++] = s0.nests[i];
2294 for (size_t i = 0; i < s1.n; i++)
2296 stack.nests[stack.n] = s1.nests[i];
2297 stack.nests[stack.n].group_head += s0.n;
2300 assert (stack.n == s0.n + s1.n);
2306 static struct ctables_stack
2307 var_fts (const struct ctables_axis *a)
2309 struct variable **vars = xmalloc (sizeof *vars);
2312 bool is_summary = a->specs[CSV_CELL].n || a->scale;
2313 struct ctables_nest *nest = xmalloc (sizeof *nest);
2314 *nest = (struct ctables_nest) {
2317 .scale_idx = a->scale ? 0 : SIZE_MAX,
2318 .summary_idx = is_summary ? 0 : SIZE_MAX,
2321 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2323 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2324 nest->specs[sv].var = a->var;
2325 nest->specs[sv].is_scale = a->scale;
2327 return (struct ctables_stack) { .nests = nest, .n = 1 };
2330 static struct ctables_stack
2331 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2334 return (struct ctables_stack) { .n = 0 };
2342 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2343 enumerate_fts (axis_type, a->subs[1]));
2346 /* This should consider any of the scale variables found in the result to
2347 be linked to each other listwise for SMISSING=LISTWISE. */
2348 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2349 enumerate_fts (axis_type, a->subs[1]));
2355 /* CTABLES summary calculation. */
2357 union ctables_summary
2359 /* COUNT, VALIDN, TOTALN. */
2362 /* MINIMUM, MAXIMUM, RANGE. */
2369 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2370 struct moments1 *moments;
2372 /* MEDIAN, MODE, PTILE. */
2375 struct casewriter *writer;
2382 ctables_summary_init (union ctables_summary *s,
2383 const struct ctables_summary_spec *ss)
2385 switch (ss->function)
2388 case CTSF_areaPCT_COUNT:
2389 case CTSF_areaPCT_VALIDN:
2390 case CTSF_areaPCT_TOTALN:
2403 s->min = s->max = SYSMIS;
2408 case CTSF_areaPCT_SUM:
2409 s->moments = moments1_create (MOMENT_MEAN);
2415 s->moments = moments1_create (MOMENT_VARIANCE);
2422 struct caseproto *proto = caseproto_create ();
2423 proto = caseproto_add_width (proto, 0);
2424 proto = caseproto_add_width (proto, 0);
2426 struct subcase ordering;
2427 subcase_init (&ordering, 0, 0, SC_ASCEND);
2428 s->writer = sort_create_writer (&ordering, proto);
2429 subcase_uninit (&ordering);
2430 caseproto_unref (proto);
2440 ctables_summary_uninit (union ctables_summary *s,
2441 const struct ctables_summary_spec *ss)
2443 switch (ss->function)
2446 case CTSF_areaPCT_COUNT:
2447 case CTSF_areaPCT_VALIDN:
2448 case CTSF_areaPCT_TOTALN:
2467 case CTSF_areaPCT_SUM:
2468 moments1_destroy (s->moments);
2474 casewriter_destroy (s->writer);
2480 ctables_summary_add (union ctables_summary *s,
2481 const struct ctables_summary_spec *ss,
2482 const union value *value,
2483 bool is_missing, bool is_included,
2486 /* To determine whether a case is included in a given table for a particular
2487 kind of summary, consider the following charts for the variable being
2488 summarized. Only if "yes" appears is the case counted.
2490 Categorical variables: VALIDN other TOTALN
2491 Valid values in included categories yes yes yes
2492 Missing values in included categories --- yes yes
2493 Missing values in excluded categories --- --- yes
2494 Valid values in excluded categories --- --- ---
2496 Scale variables: VALIDN other TOTALN
2497 Valid value yes yes yes
2498 Missing value --- yes yes
2500 Missing values include both user- and system-missing. (The system-missing
2501 value is always in an excluded category.)
2503 One way to interpret the above table is that scale variables are like
2504 categorical variables in which all values are in included categories.
2506 switch (ss->function)
2509 case CTSF_areaPCT_TOTALN:
2514 case CTSF_areaPCT_COUNT:
2520 case CTSF_areaPCT_VALIDN:
2538 if (s->min == SYSMIS || value->f < s->min)
2540 if (s->max == SYSMIS || value->f > s->max)
2551 moments1_add (s->moments, value->f, weight);
2554 case CTSF_areaPCT_SUM:
2556 moments1_add (s->moments, value->f, weight);
2564 s->ovalid += weight;
2566 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2567 *case_num_rw_idx (c, 0) = value->f;
2568 *case_num_rw_idx (c, 1) = weight;
2569 casewriter_write (s->writer, c);
2576 ctables_summary_value (struct ctables_area *areas[N_CTATS],
2577 union ctables_summary *s,
2578 const struct ctables_summary_spec *ss)
2580 switch (ss->function)
2586 return areas[ss->calc_area]->sequence;
2588 case CTSF_areaPCT_COUNT:
2590 const struct ctables_area *a = areas[ss->calc_area];
2591 double a_count = a->count[ss->weighting];
2592 return a_count ? s->count / a_count * 100 : SYSMIS;
2595 case CTSF_areaPCT_VALIDN:
2597 const struct ctables_area *a = areas[ss->calc_area];
2598 double a_valid = a->valid[ss->weighting];
2599 return a_valid ? s->count / a_valid * 100 : SYSMIS;
2602 case CTSF_areaPCT_TOTALN:
2604 const struct ctables_area *a = areas[ss->calc_area];
2605 double a_total = a->total[ss->weighting];
2606 return a_total ? s->count / a_total * 100 : SYSMIS;
2621 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2626 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2632 double weight, variance;
2633 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2634 return calc_semean (variance, weight);
2640 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2641 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2646 double weight, mean;
2647 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2648 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2654 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2658 case CTSF_areaPCT_SUM:
2660 double weight, mean;
2661 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2662 if (weight == SYSMIS || mean == SYSMIS)
2665 const struct ctables_area *a = areas[ss->calc_area];
2666 const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
2667 double denom = sum->sum[ss->weighting];
2668 return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
2675 struct casereader *reader = casewriter_make_reader (s->writer);
2678 struct percentile *ptile = percentile_create (
2679 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2680 struct order_stats *os = &ptile->parent;
2681 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2682 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2683 statistic_destroy (&ptile->parent.parent);
2690 struct casereader *reader = casewriter_make_reader (s->writer);
2693 struct mode *mode = mode_create ();
2694 struct order_stats *os = &mode->parent;
2695 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2696 s->ovalue = mode->mode;
2697 statistic_destroy (&mode->parent.parent);
2707 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
2708 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
2709 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
2710 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
2715 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
2716 all the axes (except the scalar variable, if any). */
2717 struct hmap_node node;
2719 /* The areas that contain this cell. */
2720 uint32_t omit_areas;
2721 struct ctables_area *areas[N_CTATS];
2726 enum ctables_summary_variant sv;
2728 struct ctables_cell_axis
2730 struct ctables_cell_value
2732 const struct ctables_category *category;
2740 union ctables_summary *summaries;
2745 const struct dictionary *dict;
2746 struct pivot_table_look *look;
2748 /* For CTEF_* formats. */
2749 struct fmt_settings ctables_formats;
2751 /* If this is NULL, zeros are displayed using the normal print format.
2752 Otherwise, this string is displayed. */
2755 /* If this is NULL, missing values are displayed using the normal print
2756 format. Otherwise, this string is displayed. */
2759 /* Indexed by variable dictionary index. */
2760 enum ctables_vlabel *vlabels;
2762 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
2764 bool mrsets_count_duplicates; /* MRSETS. */
2765 bool smissing_listwise; /* SMISSING. */
2766 struct variable *e_weight; /* WEIGHT. */
2767 int hide_threshold; /* HIDESMALLCOUNTS. */
2769 struct ctables_table **tables;
2773 struct ctables_value
2775 struct hmap_node node;
2780 struct ctables_occurrence
2782 struct hmap_node node;
2786 struct ctables_section
2789 struct ctables_table *table;
2790 struct ctables_nest *nests[PIVOT_N_AXES];
2793 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
2794 struct hmap cells; /* Contains "struct ctables_cell"s. */
2795 struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
2798 static void ctables_section_uninit (struct ctables_section *);
2800 struct ctables_table
2802 struct ctables *ctables;
2803 struct ctables_axis *axes[PIVOT_N_AXES];
2804 struct ctables_stack stacks[PIVOT_N_AXES];
2805 struct ctables_section *sections;
2807 enum pivot_axis_type summary_axis;
2808 struct ctables_summary_spec_set summary_specs;
2809 struct variable **sum_vars;
2812 enum pivot_axis_type slabels_axis;
2813 bool slabels_visible;
2815 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
2817 Most commonly, label_axis[a] == a, and in particular we always have
2818 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
2820 If ROWLABELS or COLLABELS is specified, then one of
2821 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
2822 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
2824 If any category labels are moved, then 'clabels_example' is one of the
2825 variables being moved (and it is otherwise NULL). All of the variables
2826 being moved have the same width, value labels, and categories, so this
2827 example variable can be used to find those out.
2829 The remaining members in this group are relevant only if category labels
2832 'clabels_values_map' holds a "struct ctables_value" for all the values
2833 that appear in all of the variables in the moved categories. It is
2834 accumulated as the data is read. Once the data is fully read, its
2835 sorted values are put into 'clabels_values' and 'n_clabels_values'.
2837 enum pivot_axis_type label_axis[PIVOT_N_AXES];
2838 enum pivot_axis_type clabels_from_axis;
2839 enum pivot_axis_type clabels_to_axis;
2840 const struct variable *clabels_example;
2841 struct hmap clabels_values_map;
2842 struct ctables_value **clabels_values;
2843 size_t n_clabels_values;
2845 /* Indexed by variable dictionary index. */
2846 struct ctables_categories **categories;
2847 size_t n_categories;
2855 struct ctables_chisq *chisq;
2856 struct ctables_pairwise *pairwise;
2859 /* Chi-square test (SIGTEST). */
2860 struct ctables_chisq
2863 bool include_mrsets;
2867 /* Pairwise comparison test (COMPARETEST). */
2868 struct ctables_pairwise
2870 enum { PROP, MEAN } type;
2872 bool include_mrsets;
2873 bool meansvariance_allcats;
2875 enum { BONFERRONI = 1, BH } adjust;
2884 parse_col_width (struct lexer *lexer, const char *name, double *width)
2886 lex_match (lexer, T_EQUALS);
2887 if (lex_match_id (lexer, "DEFAULT"))
2889 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
2891 *width = lex_number (lexer);
2901 parse_bool (struct lexer *lexer, bool *b)
2903 if (lex_match_id (lexer, "NO"))
2905 else if (lex_match_id (lexer, "YES"))
2909 lex_error_expecting (lexer, "YES", "NO");
2916 ctables_chisq_destroy (struct ctables_chisq *chisq)
2922 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
2928 ctables_table_destroy (struct ctables_table *t)
2933 for (size_t i = 0; i < t->n_sections; i++)
2934 ctables_section_uninit (&t->sections[i]);
2937 for (size_t i = 0; i < t->n_categories; i++)
2938 ctables_categories_unref (t->categories[i]);
2939 free (t->categories);
2941 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2943 ctables_axis_destroy (t->axes[a]);
2944 ctables_stack_uninit (&t->stacks[a]);
2946 free (t->summary_specs.specs);
2948 struct ctables_value *ctv, *next_ctv;
2949 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
2950 &t->clabels_values_map)
2952 value_destroy (&ctv->value, var_get_width (t->clabels_example));
2953 hmap_delete (&t->clabels_values_map, &ctv->node);
2956 hmap_destroy (&t->clabels_values_map);
2957 free (t->clabels_values);
2963 ctables_chisq_destroy (t->chisq);
2964 ctables_pairwise_destroy (t->pairwise);
2969 ctables_destroy (struct ctables *ct)
2974 struct ctables_postcompute *pc, *next_pc;
2975 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
2979 msg_location_destroy (pc->location);
2980 ctables_pcexpr_destroy (pc->expr);
2984 ctables_summary_spec_set_uninit (pc->specs);
2987 hmap_delete (&ct->postcomputes, &pc->hmap_node);
2990 hmap_destroy (&ct->postcomputes);
2992 fmt_settings_uninit (&ct->ctables_formats);
2993 pivot_table_look_unref (ct->look);
2997 for (size_t i = 0; i < ct->n_tables; i++)
2998 ctables_table_destroy (ct->tables[i]);
3004 ctables_recursive_check_postcompute (struct dictionary *dict,
3005 const struct ctables_pcexpr *e,
3006 struct ctables_category *pc_cat,
3007 const struct ctables_categories *cats,
3008 const struct msg_location *cats_location)
3012 case CTPO_CAT_NUMBER:
3013 case CTPO_CAT_STRING:
3014 case CTPO_CAT_NRANGE:
3015 case CTPO_CAT_SRANGE:
3016 case CTPO_CAT_MISSING:
3017 case CTPO_CAT_OTHERNM:
3018 case CTPO_CAT_SUBTOTAL:
3019 case CTPO_CAT_TOTAL:
3021 struct ctables_category *cat = ctables_find_category_for_postcompute (
3022 dict, cats, pc_cat->parse_format, e);
3025 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
3027 size_t n_subtotals = 0;
3028 for (size_t i = 0; i < cats->n_cats; i++)
3029 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
3030 if (n_subtotals > 1)
3032 msg_at (SE, cats_location,
3033 ngettext ("These categories include %zu instance "
3034 "of SUBTOTAL or HSUBTOTAL, so references "
3035 "from computed categories must refer to "
3036 "subtotals by position, "
3037 "e.g. SUBTOTAL[1].",
3038 "These categories include %zu instances "
3039 "of SUBTOTAL or HSUBTOTAL, so references "
3040 "from computed categories must refer to "
3041 "subtotals by position, "
3042 "e.g. SUBTOTAL[1].",
3045 msg_at (SN, e->location,
3046 _("This is the reference that lacks a position."));
3051 msg_at (SE, pc_cat->location,
3052 _("Computed category &%s references a category not included "
3053 "in the category list."),
3055 msg_at (SN, e->location, _("This is the missing category."));
3056 if (e->op == CTPO_CAT_SUBTOTAL)
3057 msg_at (SN, cats_location,
3058 _("To fix the problem, add subtotals to the "
3059 "list of categories here."));
3060 else if (e->op == CTPO_CAT_TOTAL)
3061 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
3062 "CATEGORIES specification."));
3064 msg_at (SN, cats_location,
3065 _("To fix the problem, add the missing category to the "
3066 "list of categories here."));
3069 if (pc_cat->pc->hide_source_cats)
3083 for (size_t i = 0; i < 2; i++)
3084 if (e->subs[i] && !ctables_recursive_check_postcompute (
3085 dict, e->subs[i], pc_cat, cats, cats_location))
3094 all_strings (struct variable **vars, size_t n_vars,
3095 const struct ctables_category *cat)
3097 for (size_t j = 0; j < n_vars; j++)
3098 if (var_is_numeric (vars[j]))
3100 msg_at (SE, cat->location,
3101 _("This category specification may be applied only to string "
3102 "variables, but this subcommand tries to apply it to "
3103 "numeric variable %s."),
3104 var_get_name (vars[j]));
3111 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
3112 struct ctables *ct, struct ctables_table *t)
3114 if (!lex_match_id (lexer, "VARIABLES"))
3116 lex_match (lexer, T_EQUALS);
3118 struct variable **vars;
3120 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
3123 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
3124 for (size_t i = 1; i < n_vars; i++)
3126 const struct fmt_spec *f = var_get_print_format (vars[i]);
3127 if (f->type != common_format->type)
3129 common_format = NULL;
3135 && (fmt_get_category (common_format->type)
3136 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
3138 struct ctables_categories *c = xmalloc (sizeof *c);
3139 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
3140 for (size_t i = 0; i < n_vars; i++)
3142 struct ctables_categories **cp
3143 = &t->categories[var_get_dict_index (vars[i])];
3144 ctables_categories_unref (*cp);
3148 size_t allocated_cats = 0;
3149 int cats_start_ofs = -1;
3150 int cats_end_ofs = -1;
3151 if (lex_match (lexer, T_LBRACK))
3153 cats_start_ofs = lex_ofs (lexer);
3156 if (c->n_cats >= allocated_cats)
3157 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3159 int start_ofs = lex_ofs (lexer);
3160 struct ctables_category *cat = &c->cats[c->n_cats];
3161 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
3163 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
3166 lex_match (lexer, T_COMMA);
3168 while (!lex_match (lexer, T_RBRACK));
3169 cats_end_ofs = lex_ofs (lexer) - 1;
3172 struct ctables_category cat = {
3174 .include_missing = false,
3175 .sort_ascending = true,
3177 bool show_totals = false;
3178 char *total_label = NULL;
3179 bool totals_before = false;
3180 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
3182 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
3184 lex_match (lexer, T_EQUALS);
3185 if (lex_match_id (lexer, "A"))
3186 cat.sort_ascending = true;
3187 else if (lex_match_id (lexer, "D"))
3188 cat.sort_ascending = false;
3191 lex_error_expecting (lexer, "A", "D");
3195 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
3197 int start_ofs = lex_ofs (lexer) - 1;
3198 lex_match (lexer, T_EQUALS);
3199 if (lex_match_id (lexer, "VALUE"))
3200 cat.type = CCT_VALUE;
3201 else if (lex_match_id (lexer, "LABEL"))
3202 cat.type = CCT_LABEL;
3205 cat.type = CCT_FUNCTION;
3206 if (!parse_ctables_summary_function (lexer, &cat.sort_function,
3207 &cat.weighting, &cat.area))
3210 if (lex_match (lexer, T_LPAREN))
3212 cat.sort_var = parse_variable (lexer, dict);
3216 if (cat.sort_function == CTSF_PTILE)
3218 lex_match (lexer, T_COMMA);
3219 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
3221 cat.percentile = lex_number (lexer);
3225 if (!lex_force_match (lexer, T_RPAREN))
3228 else if (ctables_function_availability (cat.sort_function)
3231 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
3235 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
3236 _("Data-dependent sorting is not implemented."));
3240 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
3242 lex_match (lexer, T_EQUALS);
3243 if (lex_match_id (lexer, "INCLUDE"))
3244 cat.include_missing = true;
3245 else if (lex_match_id (lexer, "EXCLUDE"))
3246 cat.include_missing = false;
3249 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
3253 else if (lex_match_id (lexer, "TOTAL"))
3255 lex_match (lexer, T_EQUALS);
3256 if (!parse_bool (lexer, &show_totals))
3259 else if (lex_match_id (lexer, "LABEL"))
3261 lex_match (lexer, T_EQUALS);
3262 if (!lex_force_string (lexer))
3265 total_label = ss_xstrdup (lex_tokss (lexer));
3268 else if (lex_match_id (lexer, "POSITION"))
3270 lex_match (lexer, T_EQUALS);
3271 if (lex_match_id (lexer, "BEFORE"))
3272 totals_before = true;
3273 else if (lex_match_id (lexer, "AFTER"))
3274 totals_before = false;
3277 lex_error_expecting (lexer, "BEFORE", "AFTER");
3281 else if (lex_match_id (lexer, "EMPTY"))
3283 lex_match (lexer, T_EQUALS);
3284 if (lex_match_id (lexer, "INCLUDE"))
3285 c->show_empty = true;
3286 else if (lex_match_id (lexer, "EXCLUDE"))
3287 c->show_empty = false;
3290 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
3297 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
3298 "TOTAL", "LABEL", "POSITION", "EMPTY");
3300 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
3307 if (c->n_cats >= allocated_cats)
3308 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3309 c->cats[c->n_cats++] = cat;
3314 if (c->n_cats >= allocated_cats)
3315 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
3317 struct ctables_category *totals;
3320 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
3321 totals = &c->cats[0];
3324 totals = &c->cats[c->n_cats];
3327 *totals = (struct ctables_category) {
3329 .total_label = total_label ? total_label : xstrdup (_("Total")),
3333 struct ctables_category *subtotal = NULL;
3334 for (size_t i = totals_before ? 0 : c->n_cats;
3335 totals_before ? i < c->n_cats : i-- > 0;
3336 totals_before ? i++ : 0)
3338 struct ctables_category *cat = &c->cats[i];
3347 cat->subtotal = subtotal;
3350 case CCT_POSTCOMPUTE:
3361 case CCT_EXCLUDED_MISSING:
3366 if (cats_start_ofs != -1)
3368 for (size_t i = 0; i < c->n_cats; i++)
3370 struct ctables_category *cat = &c->cats[i];
3373 case CCT_POSTCOMPUTE:
3374 cat->parse_format = parse_strings ? common_format->type : FMT_F;
3375 struct msg_location *cats_location
3376 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
3377 bool ok = ctables_recursive_check_postcompute (
3378 dict, cat->pc->expr, cat, c, cats_location);
3379 msg_location_destroy (cats_location);
3386 for (size_t j = 0; j < n_vars; j++)
3387 if (var_is_alpha (vars[j]))
3389 msg_at (SE, cat->location,
3390 _("This category specification may be applied "
3391 "only to numeric variables, but this "
3392 "subcommand tries to apply it to string "
3394 var_get_name (vars[j]));
3403 if (!parse_category_string (cat->location, cat->string, dict,
3404 common_format->type, &n))
3407 ss_dealloc (&cat->string);
3409 cat->type = CCT_NUMBER;
3412 else if (!all_strings (vars, n_vars, cat))
3421 if (!cat->srange[0].string)
3423 else if (!parse_category_string (cat->location,
3424 cat->srange[0], dict,
3425 common_format->type, &n[0]))
3428 if (!cat->srange[1].string)
3430 else if (!parse_category_string (cat->location,
3431 cat->srange[1], dict,
3432 common_format->type, &n[1]))
3435 ss_dealloc (&cat->srange[0]);
3436 ss_dealloc (&cat->srange[1]);
3438 cat->type = CCT_NRANGE;
3439 cat->nrange[0] = n[0];
3440 cat->nrange[1] = n[1];
3442 else if (!all_strings (vars, n_vars, cat))
3453 case CCT_EXCLUDED_MISSING:
3467 struct ctables_cell_sort_aux
3469 const struct ctables_nest *nest;
3470 enum pivot_axis_type a;
3474 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3476 const struct ctables_cell_sort_aux *aux = aux_;
3477 struct ctables_cell *const *ap = a_;
3478 struct ctables_cell *const *bp = b_;
3479 const struct ctables_cell *a = *ap;
3480 const struct ctables_cell *b = *bp;
3482 const struct ctables_nest *nest = aux->nest;
3483 for (size_t i = 0; i < nest->n; i++)
3484 if (i != nest->scale_idx)
3486 const struct variable *var = nest->vars[i];
3487 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3488 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3489 if (a_cv->category != b_cv->category)
3490 return a_cv->category > b_cv->category ? 1 : -1;
3492 const union value *a_val = &a_cv->value;
3493 const union value *b_val = &b_cv->value;
3494 switch (a_cv->category->type)
3500 case CCT_POSTCOMPUTE:
3501 case CCT_EXCLUDED_MISSING:
3502 /* Must be equal. */
3510 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3518 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3520 return a_cv->category->sort_ascending ? cmp : -cmp;
3526 const char *a_label = var_lookup_value_label (var, a_val);
3527 const char *b_label = var_lookup_value_label (var, b_val);
3533 cmp = strcmp (a_label, b_label);
3539 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3542 return a_cv->category->sort_ascending ? cmp : -cmp;
3554 ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
3555 const void *aux UNUSED)
3557 struct ctables_cell *const *ap = a_;
3558 struct ctables_cell *const *bp = b_;
3559 const struct ctables_cell *a = *ap;
3560 const struct ctables_cell *b = *bp;
3562 for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
3564 int al = a->axes[axis].leaf;
3565 int bl = b->axes[axis].leaf;
3567 return al > bl ? 1 : -1;
3572 static struct ctables_area *
3573 ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
3574 enum ctables_area_type area)
3577 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3579 const struct ctables_nest *nest = s->nests[a];
3580 for (size_t i = 0; i < nest->n_areas[area]; i++)
3582 size_t v_idx = nest->areas[area][i];
3583 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3584 hash = hash_pointer (cv->category, hash);
3585 if (cv->category->type != CCT_TOTAL
3586 && cv->category->type != CCT_SUBTOTAL
3587 && cv->category->type != CCT_POSTCOMPUTE)
3588 hash = value_hash (&cv->value,
3589 var_get_width (nest->vars[v_idx]), hash);
3593 struct ctables_area *a;
3594 HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
3596 const struct ctables_cell *df = a->example;
3597 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3599 const struct ctables_nest *nest = s->nests[a];
3600 for (size_t i = 0; i < nest->n_areas[area]; i++)
3602 size_t v_idx = nest->areas[area][i];
3603 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3604 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3605 if (cv1->category != cv2->category
3606 || (cv1->category->type != CCT_TOTAL
3607 && cv1->category->type != CCT_SUBTOTAL
3608 && cv1->category->type != CCT_POSTCOMPUTE
3609 && !value_equal (&cv1->value, &cv2->value,
3610 var_get_width (nest->vars[v_idx]))))
3619 struct ctables_sum *sums = (s->table->n_sum_vars
3620 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3623 a = xmalloc (sizeof *a);
3624 *a = (struct ctables_area) { .example = cell, .sums = sums };
3625 hmap_insert (&s->areas[area], &a->node, hash);
3629 static struct ctables_cell *
3630 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3631 const struct ctables_category **cats[PIVOT_N_AXES])
3634 enum ctables_summary_variant sv = CSV_CELL;
3635 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3637 const struct ctables_nest *nest = s->nests[a];
3638 for (size_t i = 0; i < nest->n; i++)
3639 if (i != nest->scale_idx)
3641 hash = hash_pointer (cats[a][i], hash);
3642 if (cats[a][i]->type != CCT_TOTAL
3643 && cats[a][i]->type != CCT_SUBTOTAL
3644 && cats[a][i]->type != CCT_POSTCOMPUTE)
3645 hash = value_hash (case_data (c, nest->vars[i]),
3646 var_get_width (nest->vars[i]), hash);
3652 struct ctables_cell *cell;
3653 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3655 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3657 const struct ctables_nest *nest = s->nests[a];
3658 for (size_t i = 0; i < nest->n; i++)
3659 if (i != nest->scale_idx
3660 && (cats[a][i] != cell->axes[a].cvs[i].category
3661 || (cats[a][i]->type != CCT_TOTAL
3662 && cats[a][i]->type != CCT_SUBTOTAL
3663 && cats[a][i]->type != CCT_POSTCOMPUTE
3664 && !value_equal (case_data (c, nest->vars[i]),
3665 &cell->axes[a].cvs[i].value,
3666 var_get_width (nest->vars[i])))))
3675 cell = xmalloc (sizeof *cell);
3678 cell->omit_areas = 0;
3679 cell->postcompute = false;
3680 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3682 const struct ctables_nest *nest = s->nests[a];
3683 cell->axes[a].cvs = (nest->n
3684 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3686 for (size_t i = 0; i < nest->n; i++)
3688 const struct ctables_category *cat = cats[a][i];
3689 const struct variable *var = nest->vars[i];
3690 const union value *value = case_data (c, var);
3691 if (i != nest->scale_idx)
3693 const struct ctables_category *subtotal = cat->subtotal;
3694 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3697 if (cat->type == CCT_TOTAL
3698 || cat->type == CCT_SUBTOTAL
3699 || cat->type == CCT_POSTCOMPUTE)
3703 case PIVOT_AXIS_COLUMN:
3704 cell->omit_areas |= ((1u << CTAT_TABLE) |
3705 (1u << CTAT_LAYER) |
3706 (1u << CTAT_LAYERCOL) |
3707 (1u << CTAT_SUBTABLE) |
3710 case PIVOT_AXIS_ROW:
3711 cell->omit_areas |= ((1u << CTAT_TABLE) |
3712 (1u << CTAT_LAYER) |
3713 (1u << CTAT_LAYERROW) |
3714 (1u << CTAT_SUBTABLE) |
3717 case PIVOT_AXIS_LAYER:
3718 cell->omit_areas |= ((1u << CTAT_TABLE) |
3719 (1u << CTAT_LAYER));
3723 if (cat->type == CCT_POSTCOMPUTE)
3724 cell->postcompute = true;
3727 cell->axes[a].cvs[i].category = cat;
3728 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3732 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3733 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3734 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3735 for (size_t i = 0; i < specs->n; i++)
3736 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3737 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3738 cell->areas[at] = ctables_area_insert (s, cell, at);
3739 hmap_insert (&s->cells, &cell->node, hash);
3744 add_weight (double dst[N_CTWS], const double src[N_CTWS])
3746 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3751 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3752 const struct ctables_category **cats[PIVOT_N_AXES],
3753 bool is_included, double weight[N_CTWS])
3755 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3756 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3758 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3759 const union value *value = case_data (c, specs->var);
3760 bool is_missing = var_is_value_missing (specs->var, value);
3761 bool is_scale_missing
3762 = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
3764 for (size_t i = 0; i < specs->n; i++)
3765 ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
3766 is_scale_missing, is_included,
3767 weight[specs->specs[i].weighting]);
3768 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
3769 if (!(cell->omit_areas && (1u << at)))
3771 struct ctables_area *a = cell->areas[at];
3773 add_weight (a->total, weight);
3775 add_weight (a->count, weight);
3778 add_weight (a->valid, weight);
3780 if (!is_scale_missing)
3781 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3783 const struct variable *var = s->table->sum_vars[i];
3784 double addend = case_num (c, var);
3785 if (!var_is_num_missing (var, addend))
3786 for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
3787 a->sums[i].sum[wt] += addend * weight[wt];
3794 recurse_totals (struct ctables_section *s, const struct ccase *c,
3795 const struct ctables_category **cats[PIVOT_N_AXES],
3796 bool is_included, double weight[N_CTWS],
3797 enum pivot_axis_type start_axis, size_t start_nest)
3799 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3801 const struct ctables_nest *nest = s->nests[a];
3802 for (size_t i = start_nest; i < nest->n; i++)
3804 if (i == nest->scale_idx)
3807 const struct variable *var = nest->vars[i];
3809 const struct ctables_category *total = ctables_categories_total (
3810 s->table->categories[var_get_dict_index (var)]);
3813 const struct ctables_category *save = cats[a][i];
3815 ctables_cell_add__ (s, c, cats, is_included, weight);
3816 recurse_totals (s, c, cats, is_included, weight, a, i + 1);
3825 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3826 const struct ctables_category **cats[PIVOT_N_AXES],
3827 bool is_included, double weight[N_CTWS],
3828 enum pivot_axis_type start_axis, size_t start_nest)
3830 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3832 const struct ctables_nest *nest = s->nests[a];
3833 for (size_t i = start_nest; i < nest->n; i++)
3835 if (i == nest->scale_idx)
3838 const struct ctables_category *save = cats[a][i];
3841 cats[a][i] = save->subtotal;
3842 ctables_cell_add__ (s, c, cats, is_included, weight);
3843 recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
3852 ctables_add_occurrence (const struct variable *var,
3853 const union value *value,
3854 struct hmap *occurrences)
3856 int width = var_get_width (var);
3857 unsigned int hash = value_hash (value, width, 0);
3859 struct ctables_occurrence *o;
3860 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3862 if (value_equal (value, &o->value, width))
3865 o = xmalloc (sizeof *o);
3866 value_clone (&o->value, value, width);
3867 hmap_insert (occurrences, &o->node, hash);
3871 ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
3872 double weight[N_CTWS])
3874 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
3875 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
3876 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
3877 const struct ctables_category **cats[PIVOT_N_AXES] =
3879 [PIVOT_AXIS_LAYER] = layer_cats,
3880 [PIVOT_AXIS_ROW] = row_cats,
3881 [PIVOT_AXIS_COLUMN] = column_cats,
3884 bool is_included = true;
3886 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3888 const struct ctables_nest *nest = s->nests[a];
3889 for (size_t i = 0; i < nest->n; i++)
3890 if (i != nest->scale_idx)
3892 const struct variable *var = nest->vars[i];
3893 const union value *value = case_data (c, var);
3895 cats[a][i] = ctables_categories_match (
3896 s->table->categories[var_get_dict_index (var)], value, var);
3899 if (i != nest->summary_idx)
3902 if (!var_is_value_missing (var, value))
3905 static const struct ctables_category cct_excluded_missing = {
3906 .type = CCT_EXCLUDED_MISSING,
3909 cats[a][i] = &cct_excluded_missing;
3910 is_included = false;
3916 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3918 const struct ctables_nest *nest = s->nests[a];
3919 for (size_t i = 0; i < nest->n; i++)
3920 if (i != nest->scale_idx)
3922 const struct variable *var = nest->vars[i];
3923 const union value *value = case_data (c, var);
3924 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3928 ctables_cell_add__ (s, c, cats, is_included, weight);
3929 recurse_totals (s, c, cats, is_included, weight, 0, 0);
3930 recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
3935 const struct ctables_summary_spec_set *set;
3940 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3942 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3943 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3944 if (as->function != bs->function)
3945 return as->function > bs->function ? 1 : -1;
3946 else if (as->weighting != bs->weighting)
3947 return as->weighting > bs->weighting ? 1 : -1;
3948 else if (as->calc_area != bs->calc_area)
3949 return as->calc_area > bs->calc_area ? 1 : -1;
3950 else if (as->percentile != bs->percentile)
3951 return as->percentile < bs->percentile ? 1 : -1;
3953 const char *as_label = as->label ? as->label : "";
3954 const char *bs_label = bs->label ? bs->label : "";
3955 return strcmp (as_label, bs_label);
3958 static struct pivot_value *
3959 ctables_postcompute_label (const struct ctables_categories *cats,
3960 const struct ctables_category *cat,
3961 const struct variable *var)
3963 struct substring in = ss_cstr (cat->pc->label);
3964 struct substring target = ss_cstr (")LABEL[");
3966 struct string out = DS_EMPTY_INITIALIZER;
3969 size_t chunk = ss_find_substring (in, target);
3970 if (chunk == SIZE_MAX)
3972 if (ds_is_empty (&out))
3973 return pivot_value_new_user_text (in.string, in.length);
3976 ds_put_substring (&out, in);
3977 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3981 ds_put_substring (&out, ss_head (in, chunk));
3982 ss_advance (&in, chunk + target.length);
3984 struct substring idx_s;
3985 if (!ss_get_until (&in, ']', &idx_s))
3988 long int idx = strtol (idx_s.string, &tail, 10);
3989 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3992 struct ctables_category *cat2 = &cats->cats[idx - 1];
3993 if (!ctables_category_format_label (cat2, var, &out))
3999 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4002 static struct pivot_value *
4003 ctables_category_create_value_label (const struct ctables_categories *cats,
4004 const struct ctables_category *cat,
4005 const struct variable *var,
4006 const union value *value)
4008 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4009 ? ctables_postcompute_label (cats, cat, var)
4010 : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4011 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4012 : pivot_value_new_var_value (var, value));
4015 static struct ctables_value *
4016 ctables_value_find__ (struct ctables_table *t, const union value *value,
4017 int width, unsigned int hash)
4019 struct ctables_value *clv;
4020 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4021 hash, &t->clabels_values_map)
4022 if (value_equal (value, &clv->value, width))
4028 ctables_value_insert (struct ctables_table *t, const union value *value,
4031 unsigned int hash = value_hash (value, width, 0);
4032 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4035 clv = xmalloc (sizeof *clv);
4036 value_clone (&clv->value, value, width);
4037 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4041 static struct ctables_value *
4042 ctables_value_find (struct ctables_table *t,
4043 const union value *value, int width)
4045 return ctables_value_find__ (t, value, width,
4046 value_hash (value, width, 0));
4050 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4051 size_t ix[PIVOT_N_AXES])
4053 if (a < PIVOT_N_AXES)
4055 size_t limit = MAX (t->stacks[a].n, 1);
4056 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4057 ctables_table_add_section (t, a + 1, ix);
4061 struct ctables_section *s = &t->sections[t->n_sections++];
4062 *s = (struct ctables_section) {
4064 .cells = HMAP_INITIALIZER (s->cells),
4066 for (a = 0; a < PIVOT_N_AXES; a++)
4069 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4071 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4072 for (size_t i = 0; i < nest->n; i++)
4073 hmap_init (&s->occurrences[a][i]);
4075 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4076 hmap_init (&s->areas[at]);
4081 ctpo_add (double a, double b)
4087 ctpo_sub (double a, double b)
4093 ctpo_mul (double a, double b)
4099 ctpo_div (double a, double b)
4101 return b ? a / b : SYSMIS;
4105 ctpo_pow (double a, double b)
4107 int save_errno = errno;
4109 double result = pow (a, b);
4117 ctpo_neg (double a, double b UNUSED)
4122 struct ctables_pcexpr_evaluate_ctx
4124 const struct ctables_cell *cell;
4125 const struct ctables_section *section;
4126 const struct ctables_categories *cats;
4127 enum pivot_axis_type pc_a;
4130 enum fmt_type parse_format;
4133 static double ctables_pcexpr_evaluate (
4134 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4137 ctables_pcexpr_evaluate_nonterminal (
4138 const struct ctables_pcexpr_evaluate_ctx *ctx,
4139 const struct ctables_pcexpr *e, size_t n_args,
4140 double evaluate (double, double))
4142 double args[2] = { 0, 0 };
4143 for (size_t i = 0; i < n_args; i++)
4145 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4146 if (!isfinite (args[i]) || args[i] == SYSMIS)
4149 return evaluate (args[0], args[1]);
4153 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4154 const struct ctables_cell_value *pc_cv)
4156 const struct ctables_section *s = ctx->section;
4159 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4161 const struct ctables_nest *nest = s->nests[a];
4162 for (size_t i = 0; i < nest->n; i++)
4163 if (i != nest->scale_idx)
4165 const struct ctables_cell_value *cv
4166 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4167 : &ctx->cell->axes[a].cvs[i]);
4168 hash = hash_pointer (cv->category, hash);
4169 if (cv->category->type != CCT_TOTAL
4170 && cv->category->type != CCT_SUBTOTAL
4171 && cv->category->type != CCT_POSTCOMPUTE)
4172 hash = value_hash (&cv->value,
4173 var_get_width (nest->vars[i]), hash);
4177 struct ctables_cell *tc;
4178 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4180 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4182 const struct ctables_nest *nest = s->nests[a];
4183 for (size_t i = 0; i < nest->n; i++)
4184 if (i != nest->scale_idx)
4186 const struct ctables_cell_value *p_cv
4187 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4188 : &ctx->cell->axes[a].cvs[i]);
4189 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4190 if (p_cv->category != t_cv->category
4191 || (p_cv->category->type != CCT_TOTAL
4192 && p_cv->category->type != CCT_SUBTOTAL
4193 && p_cv->category->type != CCT_POSTCOMPUTE
4194 && !value_equal (&p_cv->value,
4196 var_get_width (nest->vars[i]))))
4208 const struct ctables_table *t = s->table;
4209 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4210 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4211 return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx],
4212 &specs->specs[ctx->summary_idx]);
4216 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4217 const struct ctables_pcexpr *e)
4224 case CTPO_CAT_NRANGE:
4225 case CTPO_CAT_SRANGE:
4226 case CTPO_CAT_MISSING:
4227 case CTPO_CAT_OTHERNM:
4229 struct ctables_cell_value cv = {
4230 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4232 assert (cv.category != NULL);
4234 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4235 const struct ctables_occurrence *o;
4238 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4239 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4240 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4242 cv.value = o->value;
4243 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4248 case CTPO_CAT_NUMBER:
4249 case CTPO_CAT_SUBTOTAL:
4250 case CTPO_CAT_TOTAL:
4252 struct ctables_cell_value cv = {
4253 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4254 .value = { .f = e->number },
4256 assert (cv.category != NULL);
4257 return ctables_pcexpr_evaluate_category (ctx, &cv);
4260 case CTPO_CAT_STRING:
4262 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4264 if (width > e->string.length)
4266 s = xmalloc (width);
4267 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4270 const struct ctables_category *category
4271 = ctables_find_category_for_postcompute (
4272 ctx->section->table->ctables->dict,
4273 ctx->cats, ctx->parse_format, e);
4274 assert (category != NULL);
4276 struct ctables_cell_value cv = { .category = category };
4277 if (category->type == CCT_NUMBER)
4278 cv.value.f = category->number;
4279 else if (category->type == CCT_STRING)
4280 cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string);
4284 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4290 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4293 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4296 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4299 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4302 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4305 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4311 static const struct ctables_category *
4312 ctables_cell_postcompute (const struct ctables_section *s,
4313 const struct ctables_cell *cell,
4314 enum pivot_axis_type *pc_a_p,
4317 assert (cell->postcompute);
4318 const struct ctables_category *pc_cat = NULL;
4319 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4320 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4322 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4323 if (cv->category->type == CCT_POSTCOMPUTE)
4327 /* Multiple postcomputes cross each other. The value is
4332 pc_cat = cv->category;
4336 *pc_a_idx_p = pc_a_idx;
4340 assert (pc_cat != NULL);
4345 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4346 const struct ctables_cell *cell,
4347 const struct ctables_summary_spec *ss,
4348 struct fmt_spec *format,
4349 bool *is_ctables_format,
4352 enum pivot_axis_type pc_a = 0;
4353 size_t pc_a_idx = 0;
4354 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4355 s, cell, &pc_a, &pc_a_idx);
4359 const struct ctables_postcompute *pc = pc_cat->pc;
4362 for (size_t i = 0; i < pc->specs->n; i++)
4364 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4365 if (ss->function == ss2->function
4366 && ss->weighting == ss2->weighting
4367 && ss->calc_area == ss2->calc_area
4368 && ss->percentile == ss2->percentile)
4370 *format = ss2->format;
4371 *is_ctables_format = ss2->is_ctables_format;
4377 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4378 const struct ctables_categories *cats = s->table->categories[
4379 var_get_dict_index (var)];
4380 struct ctables_pcexpr_evaluate_ctx ctx = {
4385 .pc_a_idx = pc_a_idx,
4386 .summary_idx = summary_idx,
4387 .parse_format = pc_cat->parse_format,
4389 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4393 ctables_format (double d, const struct fmt_spec *format,
4394 const struct fmt_settings *settings)
4396 const union value v = { .f = d };
4397 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4399 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4400 produce the results we want for negative numbers, putting the negative
4401 sign in the wrong spot, before the prefix instead of after it. We can't,
4402 in fact, produce the desired results using a custom-currency
4403 specification. Instead, we postprocess the output, moving the negative
4406 NEQUAL: "-N=3" => "N=-3"
4407 PAREN: "-(3)" => "(-3)"
4408 PCTPAREN: "-(3%)" => "(-3%)"
4410 This transformation doesn't affect NEGPAREN. */
4411 char *minus_src = strchr (s, '-');
4412 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4414 char *n_equals = strstr (s, "N=");
4415 char *lparen = strchr (s, '(');
4416 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4418 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4424 all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a)
4426 for (size_t i = 0; i < t->stacks[a].n; i++)
4428 struct ctables_nest *nest = &t->stacks[a].nests[i];
4429 if (nest->n != 1 || nest->scale_idx != 0)
4432 enum ctables_vlabel vlabel
4433 = t->ctables->vlabels[var_get_dict_index (nest->vars[0])];
4434 if (vlabel != CTVL_NONE)
4441 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4443 struct pivot_table *pt = pivot_table_create__ (
4445 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4446 : pivot_value_new_text (N_("Custom Tables"))),
4449 pivot_table_set_caption (
4450 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4452 pivot_table_set_corner_text (
4453 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4455 bool summary_dimension = (t->summary_axis != t->slabels_axis
4456 || (!t->slabels_visible
4457 && t->summary_specs.n > 1));
4458 if (summary_dimension)
4460 struct pivot_dimension *d = pivot_dimension_create (
4461 pt, t->slabels_axis, N_("Statistics"));
4462 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4463 if (!t->slabels_visible)
4464 d->hide_all_labels = true;
4465 for (size_t i = 0; i < specs->n; i++)
4466 pivot_category_create_leaf (
4467 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4470 bool categories_dimension = t->clabels_example != NULL;
4471 if (categories_dimension)
4473 struct pivot_dimension *d = pivot_dimension_create (
4474 pt, t->label_axis[t->clabels_from_axis],
4475 t->clabels_from_axis == PIVOT_AXIS_ROW
4476 ? N_("Row Categories")
4477 : N_("Column Categories"));
4478 const struct variable *var = t->clabels_example;
4479 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4480 for (size_t i = 0; i < t->n_clabels_values; i++)
4482 const struct ctables_value *value = t->clabels_values[i];
4483 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4484 assert (cat != NULL);
4485 pivot_category_create_leaf (
4486 d->root, ctables_category_create_value_label (c, cat,
4492 pivot_table_set_look (pt, ct->look);
4493 struct pivot_dimension *d[PIVOT_N_AXES];
4494 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4496 static const char *names[] = {
4497 [PIVOT_AXIS_ROW] = N_("Rows"),
4498 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4499 [PIVOT_AXIS_LAYER] = N_("Layers"),
4501 d[a] = (t->axes[a] || a == t->summary_axis
4502 ? pivot_dimension_create (pt, a, names[a])
4507 assert (t->axes[a]);
4509 for (size_t i = 0; i < t->stacks[a].n; i++)
4511 struct ctables_nest *nest = &t->stacks[a].nests[i];
4512 struct ctables_section **sections = xnmalloc (t->n_sections,
4514 size_t n_sections = 0;
4516 size_t n_total_cells = 0;
4517 size_t max_depth = 0;
4518 for (size_t j = 0; j < t->n_sections; j++)
4519 if (t->sections[j].nests[a] == nest)
4521 struct ctables_section *s = &t->sections[j];
4522 sections[n_sections++] = s;
4523 n_total_cells += hmap_count (&s->cells);
4525 size_t depth = s->nests[a]->n;
4526 max_depth = MAX (depth, max_depth);
4529 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4531 size_t n_sorted = 0;
4533 for (size_t j = 0; j < n_sections; j++)
4535 struct ctables_section *s = sections[j];
4537 struct ctables_cell *cell;
4538 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4540 sorted[n_sorted++] = cell;
4541 assert (n_sorted <= n_total_cells);
4544 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4545 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4547 struct ctables_level
4549 enum ctables_level_type
4551 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4552 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4553 CTL_SUMMARY, /* Summary functions. */
4557 enum settings_value_show vlabel; /* CTL_VAR only. */
4560 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4561 size_t n_levels = 0;
4562 for (size_t k = 0; k < nest->n; k++)
4564 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4565 if (vlabel == CTVL_NONE && nest->scale_idx == k)
4567 if (vlabel != CTVL_NONE)
4569 levels[n_levels++] = (struct ctables_level) {
4571 .vlabel = (enum settings_value_show) vlabel,
4576 if (nest->scale_idx != k
4577 && (k != nest->n - 1 || t->label_axis[a] == a))
4579 levels[n_levels++] = (struct ctables_level) {
4580 .type = CTL_CATEGORY,
4586 if (!summary_dimension && a == t->slabels_axis)
4588 levels[n_levels++] = (struct ctables_level) {
4589 .type = CTL_SUMMARY,
4590 .var_idx = SIZE_MAX,
4594 /* Pivot categories:
4596 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4597 - category for nest->vars[0], if nest->scale_idx != 0
4598 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4599 - category for nest->vars[1], if nest->scale_idx != 1
4601 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4602 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4603 - summary function, if 'a == t->slabels_axis && a ==
4606 Additional dimensions:
4608 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4610 - If 't->label_axis[b] == a' for some 'b != a', add a category
4615 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4617 for (size_t j = 0; j < n_sorted; j++)
4619 struct ctables_cell *cell = sorted[j];
4620 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4622 size_t n_common = 0;
4625 for (; n_common < n_levels; n_common++)
4627 const struct ctables_level *level = &levels[n_common];
4628 if (level->type == CTL_CATEGORY)
4630 size_t var_idx = level->var_idx;
4631 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4632 if (prev->axes[a].cvs[var_idx].category != c)
4634 else if (c->type != CCT_SUBTOTAL
4635 && c->type != CCT_TOTAL
4636 && c->type != CCT_POSTCOMPUTE
4637 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4638 &cell->axes[a].cvs[var_idx].value,
4639 var_get_type (nest->vars[var_idx])))
4645 for (size_t k = n_common; k < n_levels; k++)
4647 const struct ctables_level *level = &levels[k];
4648 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4649 if (level->type == CTL_SUMMARY)
4651 assert (k == n_levels - 1);
4653 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4654 for (size_t m = 0; m < specs->n; m++)
4656 int leaf = pivot_category_create_leaf (
4657 parent, ctables_summary_label (&specs->specs[m],
4665 const struct variable *var = nest->vars[level->var_idx];
4666 struct pivot_value *label;
4667 if (level->type == CTL_VAR)
4669 label = pivot_value_new_variable (var);
4670 label->variable.show = level->vlabel;
4672 else if (level->type == CTL_CATEGORY)
4674 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4675 label = ctables_category_create_value_label (
4676 t->categories[var_get_dict_index (var)],
4677 cv->category, var, &cv->value);
4682 if (k == n_levels - 1)
4683 prev_leaf = pivot_category_create_leaf (parent, label);
4685 groups[k] = pivot_category_create_group__ (parent, label);
4689 cell->axes[a].leaf = prev_leaf;
4698 d[a]->hide_all_labels = all_hidden_vlabels (t, a);
4702 size_t n_total_cells = 0;
4703 for (size_t j = 0; j < t->n_sections; j++)
4704 n_total_cells += hmap_count (&t->sections[j].cells);
4706 struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted);
4707 size_t n_sorted = 0;
4708 for (size_t j = 0; j < t->n_sections; j++)
4710 const struct ctables_section *s = &t->sections[j];
4711 struct ctables_cell *cell;
4712 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4714 sorted[n_sorted++] = cell;
4716 assert (n_sorted <= n_total_cells);
4717 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way,
4719 size_t ids[N_CTATS];
4720 memset (ids, 0, sizeof ids);
4721 for (size_t j = 0; j < n_sorted; j++)
4723 struct ctables_cell *cell = sorted[j];
4724 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4726 struct ctables_area *area = cell->areas[at];
4727 if (!area->sequence)
4728 area->sequence = ++ids[at];
4735 for (size_t i = 0; i < t->n_sections; i++)
4737 struct ctables_section *s = &t->sections[i];
4739 struct ctables_cell *cell;
4740 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4745 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4746 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4747 for (size_t j = 0; j < specs->n; j++)
4750 size_t n_dindexes = 0;
4752 if (summary_dimension)
4753 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4755 if (categories_dimension)
4757 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4758 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4759 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4760 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4763 dindexes[n_dindexes++] = ctv->leaf;
4766 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4769 int leaf = cell->axes[a].leaf;
4770 if (a == t->summary_axis && !summary_dimension)
4772 dindexes[n_dindexes++] = leaf;
4775 const struct ctables_summary_spec *ss = &specs->specs[j];
4777 struct fmt_spec format = specs->specs[j].format;
4778 bool is_ctables_format = ss->is_ctables_format;
4779 double d = (cell->postcompute
4780 ? ctables_cell_calculate_postcompute (
4781 s, cell, ss, &format, &is_ctables_format, j)
4782 : ctables_summary_value (cell->areas,
4783 &cell->summaries[j], ss));
4785 struct pivot_value *value;
4786 if (ct->hide_threshold != 0
4787 && d < ct->hide_threshold
4788 && ss->function == CTSF_COUNT)
4790 value = pivot_value_new_user_text_nocopy (
4791 xasprintf ("<%d", ct->hide_threshold));
4793 else if (d == 0 && ct->zero)
4794 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4795 else if (d == SYSMIS && ct->missing)
4796 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4797 else if (is_ctables_format)
4798 value = pivot_value_new_user_text_nocopy (
4799 ctables_format (d, &format, &ct->ctables_formats));
4802 value = pivot_value_new_number (d);
4803 value->numeric.format = format;
4805 /* XXX should text values be right-justified? */
4806 pivot_table_put (pt, dindexes, n_dindexes, value);
4811 pivot_table_submit (pt);
4815 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4817 enum pivot_axis_type label_pos = t->label_axis[a];
4821 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4822 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4824 const struct ctables_stack *stack = &t->stacks[a];
4828 const struct ctables_nest *n0 = &stack->nests[0];
4831 assert (stack->n == 1);
4835 const struct variable *v0 = n0->vars[n0->n - 1];
4836 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4837 t->clabels_example = v0;
4839 for (size_t i = 0; i < c0->n_cats; i++)
4840 if (c0->cats[i].type == CCT_FUNCTION)
4842 msg (SE, _("%s=%s is not allowed with sorting based "
4843 "on a summary function."),
4844 subcommand_name, pos_name);
4847 if (n0->n - 1 == n0->scale_idx)
4849 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4850 "but %s is a scale variable."),
4851 subcommand_name, pos_name, var_get_name (v0));
4855 for (size_t i = 1; i < stack->n; i++)
4857 const struct ctables_nest *ni = &stack->nests[i];
4859 const struct variable *vi = ni->vars[ni->n - 1];
4860 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4862 if (ni->n - 1 == ni->scale_idx)
4864 msg (SE, _("%s=%s requires the variables to be moved to be "
4865 "categorical, but %s is a scale variable."),
4866 subcommand_name, pos_name, var_get_name (vi));
4869 if (var_get_width (v0) != var_get_width (vi))
4871 msg (SE, _("%s=%s requires the variables to be "
4872 "moved to have the same width, but %s has "
4873 "width %d and %s has width %d."),
4874 subcommand_name, pos_name,
4875 var_get_name (v0), var_get_width (v0),
4876 var_get_name (vi), var_get_width (vi));
4879 if (!val_labs_equal (var_get_value_labels (v0),
4880 var_get_value_labels (vi)))
4882 msg (SE, _("%s=%s requires the variables to be "
4883 "moved to have the same value labels, but %s "
4884 "and %s have different value labels."),
4885 subcommand_name, pos_name,
4886 var_get_name (v0), var_get_name (vi));
4889 if (!ctables_categories_equal (c0, ci))
4891 msg (SE, _("%s=%s requires the variables to be "
4892 "moved to have the same category "
4893 "specifications, but %s and %s have different "
4894 "category specifications."),
4895 subcommand_name, pos_name,
4896 var_get_name (v0), var_get_name (vi));
4905 add_sum_var (struct variable *var,
4906 struct variable ***sum_vars, size_t *n, size_t *allocated)
4908 for (size_t i = 0; i < *n; i++)
4909 if (var == (*sum_vars)[i])
4912 if (*n >= *allocated)
4913 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4914 (*sum_vars)[*n] = var;
4918 static enum ctables_area_type
4919 rotate_area (enum ctables_area_type area)
4930 return CTAT_LAYERCOL;
4933 return CTAT_LAYERROW;
4946 enumerate_sum_vars (const struct ctables_axis *a,
4947 struct variable ***sum_vars, size_t *n, size_t *allocated)
4955 for (size_t i = 0; i < N_CSVS; i++)
4956 for (size_t j = 0; j < a->specs[i].n; j++)
4958 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4959 if (spec->function == CTSF_areaPCT_SUM)
4960 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4966 for (size_t i = 0; i < 2; i++)
4967 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4973 ctables_prepare_table (struct ctables_table *t)
4975 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4978 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4980 for (size_t j = 0; j < t->stacks[a].n; j++)
4982 struct ctables_nest *nest = &t->stacks[a].nests[j];
4983 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
4985 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
4986 nest->n_areas[at] = 0;
4988 enum pivot_axis_type ata, atb;
4989 if (at == CTAT_ROW || at == CTAT_LAYERROW)
4991 ata = PIVOT_AXIS_ROW;
4992 atb = PIVOT_AXIS_COLUMN;
4994 else if (at == CTAT_COL || at == CTAT_LAYERCOL)
4996 ata = PIVOT_AXIS_COLUMN;
4997 atb = PIVOT_AXIS_ROW;
5000 if (at == CTAT_LAYER
5001 ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
5002 : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
5003 ? a == atb && t->label_axis[a] != a
5006 for (size_t k = nest->n - 1; k < nest->n; k--)
5007 if (k != nest->scale_idx)
5009 nest->areas[at][nest->n_areas[at]++] = k;
5015 if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
5016 : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
5017 : at == CTAT_TABLE ? true
5021 for (size_t k = 0; k < nest->n; k++)
5022 if (k != nest->scale_idx)
5023 nest->areas[at][nest->n_areas[at]++] = k;
5029 #define L PIVOT_AXIS_LAYER
5030 n_drop = (t->clabels_from_axis == L ? a != L
5031 : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
5032 : t->clabels_from_axis == a ? 2
5039 n_drop = a == ata && t->label_axis[ata] == atb;
5044 n_drop = (a == ata ? t->label_axis[ata] == atb
5046 : t->clabels_from_axis == atb ? -1
5047 : t->clabels_to_axis != atb ? 1
5059 size_t n = nest->n_areas[at];
5062 nest->areas[at][n - 2] = nest->areas[at][n - 1];
5063 nest->n_areas[at]--;
5068 for (int i = 0; i < n_drop; i++)
5069 if (nest->n_areas[at] > 0)
5070 nest->n_areas[at]--;
5077 struct ctables_nest *nest = xmalloc (sizeof *nest);
5078 *nest = (struct ctables_nest) {
5080 .scale_idx = SIZE_MAX,
5081 .summary_idx = SIZE_MAX
5083 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5085 /* There's no point in moving labels away from an axis that has no
5086 labels, so avoid dealing with the special cases around that. */
5087 t->label_axis[a] = a;
5090 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5091 for (size_t i = 0; i < stack->n; i++)
5093 struct ctables_nest *nest = &stack->nests[i];
5094 if (!nest->specs[CSV_CELL].n)
5096 struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
5097 ss->specs = xmalloc (sizeof *ss->specs);
5100 enum ctables_summary_function function
5101 = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
5105 nest->summary_idx = nest->n - 1;
5106 ss->var = nest->vars[nest->summary_idx];
5108 *ss->specs = (struct ctables_summary_spec) {
5109 .function = function,
5110 .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
5111 .format = ctables_summary_default_format (function, ss->var),
5114 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5115 &nest->specs[CSV_CELL]);
5117 else if (!nest->specs[CSV_TOTAL].n)
5118 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5119 &nest->specs[CSV_CELL]);
5121 if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN
5122 || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
5124 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5125 for (size_t i = 0; i < nest->specs[sv].n; i++)
5127 struct ctables_summary_spec *ss = &nest->specs[sv].specs[i];
5128 const struct ctables_function_info *cfi =
5129 &ctables_function_info[ss->function];
5131 ss->calc_area = rotate_area (ss->calc_area);
5135 if (t->ctables->smissing_listwise)
5137 struct variable **listwise_vars = NULL;
5139 size_t allocated = 0;
5141 for (size_t j = nest->group_head; j < stack->n; j++)
5143 const struct ctables_nest *other_nest = &stack->nests[j];
5144 if (other_nest->group_head != nest->group_head)
5147 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5150 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5151 sizeof *listwise_vars);
5152 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5155 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5158 listwise_vars = xmemdup (listwise_vars,
5159 n * sizeof *listwise_vars);
5160 nest->specs[sv].listwise_vars = listwise_vars;
5161 nest->specs[sv].n_listwise_vars = n;
5166 struct ctables_summary_spec_set *merged = &t->summary_specs;
5167 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5169 for (size_t j = 0; j < stack->n; j++)
5171 const struct ctables_nest *nest = &stack->nests[j];
5173 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5174 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5179 struct merge_item min = items[0];
5180 for (size_t j = 1; j < n_left; j++)
5181 if (merge_item_compare_3way (&items[j], &min) < 0)
5184 if (merged->n >= merged->allocated)
5185 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5186 sizeof *merged->specs);
5187 merged->specs[merged->n++] = min.set->specs[min.ofs];
5189 for (size_t j = 0; j < n_left; )
5191 if (merge_item_compare_3way (&items[j], &min) == 0)
5193 struct merge_item *item = &items[j];
5194 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5195 if (++item->ofs >= item->set->n)
5197 items[j] = items[--n_left];
5206 size_t allocated_sum_vars = 0;
5207 enumerate_sum_vars (t->axes[t->summary_axis],
5208 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5210 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5211 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5215 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5216 enum pivot_axis_type a)
5218 struct ctables_stack *stack = &t->stacks[a];
5219 for (size_t i = 0; i < stack->n; i++)
5221 const struct ctables_nest *nest = &stack->nests[i];
5222 const struct variable *var = nest->vars[nest->n - 1];
5223 const union value *value = case_data (c, var);
5225 if (var_is_numeric (var) && value->f == SYSMIS)
5228 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5230 ctables_value_insert (t, value, var_get_width (var));
5235 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5237 const struct ctables_value *const *ap = a_;
5238 const struct ctables_value *const *bp = b_;
5239 const struct ctables_value *a = *ap;
5240 const struct ctables_value *b = *bp;
5241 const int *width = width_;
5242 return value_compare_3way (&a->value, &b->value, *width);
5246 ctables_sort_clabels_values (struct ctables_table *t)
5248 const struct variable *v0 = t->clabels_example;
5249 int width = var_get_width (v0);
5251 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5254 const struct val_labs *val_labs = var_get_value_labels (v0);
5255 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5256 vl = val_labs_next (val_labs, vl))
5257 if (ctables_categories_match (c0, &vl->value, v0))
5258 ctables_value_insert (t, &vl->value, width);
5261 size_t n = hmap_count (&t->clabels_values_map);
5262 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5264 struct ctables_value *clv;
5266 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5267 t->clabels_values[i++] = clv;
5268 t->n_clabels_values = n;
5271 sort (t->clabels_values, n, sizeof *t->clabels_values,
5272 compare_clabels_values_3way, &width);
5274 for (size_t i = 0; i < n; i++)
5275 t->clabels_values[i]->leaf = i;
5279 ctables_add_category_occurrences (const struct variable *var,
5280 struct hmap *occurrences,
5281 const struct ctables_categories *cats)
5283 const struct val_labs *val_labs = var_get_value_labels (var);
5285 for (size_t i = 0; i < cats->n_cats; i++)
5287 const struct ctables_category *c = &cats->cats[i];
5291 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5297 int width = var_get_width (var);
5299 value_init (&value, width);
5300 value_copy_buf_rpad (&value, width,
5301 CHAR_CAST (uint8_t *, c->string.string),
5302 c->string.length, ' ');
5303 ctables_add_occurrence (var, &value, occurrences);
5304 value_destroy (&value, width);
5309 assert (var_is_numeric (var));
5310 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5311 vl = val_labs_next (val_labs, vl))
5312 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5313 ctables_add_occurrence (var, &vl->value, occurrences);
5317 assert (var_is_alpha (var));
5318 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5319 vl = val_labs_next (val_labs, vl))
5320 if (in_string_range (&vl->value, var, c->srange))
5321 ctables_add_occurrence (var, &vl->value, occurrences);
5325 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5326 vl = val_labs_next (val_labs, vl))
5327 if (var_is_value_missing (var, &vl->value))
5328 ctables_add_occurrence (var, &vl->value, occurrences);
5332 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5333 vl = val_labs_next (val_labs, vl))
5334 ctables_add_occurrence (var, &vl->value, occurrences);
5337 case CCT_POSTCOMPUTE:
5347 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5348 vl = val_labs_next (val_labs, vl))
5349 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5350 ctables_add_occurrence (var, &vl->value, occurrences);
5353 case CCT_EXCLUDED_MISSING:
5360 ctables_section_recurse_add_empty_categories (
5361 struct ctables_section *s,
5362 const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c,
5363 enum pivot_axis_type a, size_t a_idx)
5365 if (a >= PIVOT_N_AXES)
5366 ctables_cell_insert__ (s, c, cats);
5367 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5368 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5371 const struct variable *var = s->nests[a]->vars[a_idx];
5372 const struct ctables_categories *categories = s->table->categories[
5373 var_get_dict_index (var)];
5374 int width = var_get_width (var);
5375 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5376 const struct ctables_occurrence *o;
5377 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5379 union value *value = case_data_rw (c, var);
5380 value_destroy (value, width);
5381 value_clone (value, &o->value, width);
5382 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5383 assert (cats[a][a_idx] != NULL);
5384 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5387 for (size_t i = 0; i < categories->n_cats; i++)
5389 const struct ctables_category *cat = &categories->cats[i];
5390 if (cat->type == CCT_POSTCOMPUTE)
5392 cats[a][a_idx] = cat;
5393 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5400 ctables_section_add_empty_categories (struct ctables_section *s)
5402 bool show_empty = false;
5403 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5405 for (size_t k = 0; k < s->nests[a]->n; k++)
5406 if (k != s->nests[a]->scale_idx)
5408 const struct variable *var = s->nests[a]->vars[k];
5409 const struct ctables_categories *cats = s->table->categories[
5410 var_get_dict_index (var)];
5411 if (cats->show_empty)
5414 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5420 const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
5421 const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
5422 const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
5423 const struct ctables_category **cats[PIVOT_N_AXES] =
5425 [PIVOT_AXIS_LAYER] = layer_cats,
5426 [PIVOT_AXIS_ROW] = row_cats,
5427 [PIVOT_AXIS_COLUMN] = column_cats,
5429 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5430 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5435 ctables_section_clear (struct ctables_section *s)
5437 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5439 const struct ctables_nest *nest = s->nests[a];
5440 for (size_t i = 0; i < nest->n; i++)
5441 if (i != nest->scale_idx)
5443 const struct variable *var = nest->vars[i];
5444 int width = var_get_width (var);
5445 struct ctables_occurrence *o, *next;
5446 struct hmap *map = &s->occurrences[a][i];
5447 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5449 value_destroy (&o->value, width);
5450 hmap_delete (map, &o->node);
5457 struct ctables_cell *cell, *next_cell;
5458 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5460 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5462 const struct ctables_nest *nest = s->nests[a];
5463 for (size_t i = 0; i < nest->n; i++)
5464 if (i != nest->scale_idx)
5465 value_destroy (&cell->axes[a].cvs[i].value,
5466 var_get_width (nest->vars[i]));
5467 free (cell->axes[a].cvs);
5470 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5471 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5472 for (size_t i = 0; i < specs->n; i++)
5473 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5474 free (cell->summaries);
5476 hmap_delete (&s->cells, &cell->node);
5479 hmap_shrink (&s->cells);
5481 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5483 struct ctables_area *area, *next_area;
5484 HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node,
5488 hmap_delete (&s->areas[at], &area->node);
5491 hmap_shrink (&s->areas[at]);
5496 ctables_section_uninit (struct ctables_section *s)
5498 ctables_section_clear (s);
5500 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5502 struct ctables_nest *nest = s->nests[a];
5503 for (size_t i = 0; i < nest->n; i++)
5504 hmap_destroy (&s->occurrences[a][i]);
5505 free (s->occurrences[a]);
5508 hmap_destroy (&s->cells);
5509 for (enum ctables_area_type at = 0; at < N_CTATS; at++)
5510 hmap_destroy (&s->areas[at]);
5514 ctables_table_clear (struct ctables_table *t)
5516 for (size_t i = 0; i < t->n_sections; i++)
5517 ctables_section_clear (&t->sections[i]);
5519 if (t->clabels_example)
5521 int width = var_get_width (t->clabels_example);
5522 struct ctables_value *value, *next_value;
5523 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5524 &t->clabels_values_map)
5526 value_destroy (&value->value, width);
5527 hmap_delete (&t->clabels_values_map, &value->node);
5530 hmap_shrink (&t->clabels_values_map);
5532 free (t->clabels_values);
5533 t->clabels_values = NULL;
5534 t->n_clabels_values = 0;
5539 ctables_execute (struct dataset *ds, struct casereader *input,
5542 for (size_t i = 0; i < ct->n_tables; i++)
5544 struct ctables_table *t = ct->tables[i];
5545 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5546 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5547 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5548 sizeof *t->sections);
5549 size_t ix[PIVOT_N_AXES];
5550 ctables_table_add_section (t, 0, ix);
5553 struct dictionary *dict = dataset_dict (ds);
5555 bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE;
5556 struct casegrouper *grouper
5558 ? casegrouper_create_splits (input, dict)
5559 : casegrouper_create_vars (input, NULL, 0));
5560 struct casereader *group;
5561 while (casegrouper_get_next_group (grouper, &group))
5565 struct ccase *c = casereader_peek (group, 0);
5568 output_split_file_values (ds, c);
5573 bool warn_on_invalid = true;
5574 for (struct ccase *c = casereader_read (group); c;
5575 case_unref (c), c = casereader_read (group))
5577 double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
5578 double e_weight = (ct->e_weight
5579 ? var_force_valid_weight (ct->e_weight,
5580 case_num (c, ct->e_weight),
5584 [CTW_DICTIONARY] = d_weight,
5585 [CTW_EFFECTIVE] = e_weight,
5586 [CTW_UNWEIGHTED] = 1.0,
5589 for (size_t i = 0; i < ct->n_tables; i++)
5591 struct ctables_table *t = ct->tables[i];
5593 for (size_t j = 0; j < t->n_sections; j++)
5594 ctables_cell_insert (&t->sections[j], c, weight);
5596 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5597 if (t->label_axis[a] != a)
5598 ctables_insert_clabels_values (t, c, a);
5601 casereader_destroy (group);
5603 for (size_t i = 0; i < ct->n_tables; i++)
5605 struct ctables_table *t = ct->tables[i];
5607 if (t->clabels_example)
5608 ctables_sort_clabels_values (t);
5610 for (size_t j = 0; j < t->n_sections; j++)
5611 ctables_section_add_empty_categories (&t->sections[j]);
5613 ctables_table_output (ct, t);
5614 ctables_table_clear (t);
5617 return casegrouper_destroy (grouper);
5620 static struct ctables_postcompute *
5621 ctables_find_postcompute (struct ctables *ct, const char *name)
5623 struct ctables_postcompute *pc;
5624 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5625 utf8_hash_case_string (name, 0), &ct->postcomputes)
5626 if (!utf8_strcasecmp (pc->name, name))
5632 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5635 int pcompute_start = lex_ofs (lexer) - 1;
5637 if (!lex_match (lexer, T_AND))
5639 lex_error_expecting (lexer, "&");
5642 if (!lex_force_id (lexer))
5645 char *name = ss_xstrdup (lex_tokss (lexer));
5648 if (!lex_force_match (lexer, T_EQUALS)
5649 || !lex_force_match_id (lexer, "EXPR")
5650 || !lex_force_match (lexer, T_LPAREN))
5656 int expr_start = lex_ofs (lexer);
5657 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5658 int expr_end = lex_ofs (lexer) - 1;
5659 if (!expr || !lex_force_match (lexer, T_RPAREN))
5661 ctables_pcexpr_destroy (expr);
5665 int pcompute_end = lex_ofs (lexer) - 1;
5667 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5670 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5673 msg_at (SW, location, _("New definition of &%s will override the "
5674 "previous definition."),
5676 msg_at (SN, pc->location, _("This is the previous definition."));
5678 ctables_pcexpr_destroy (pc->expr);
5679 msg_location_destroy (pc->location);
5684 pc = xmalloc (sizeof *pc);
5685 *pc = (struct ctables_postcompute) { .name = name };
5686 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5687 utf8_hash_case_string (pc->name, 0));
5690 pc->location = location;
5692 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5697 ctables_parse_pproperties_format (struct lexer *lexer,
5698 struct ctables_summary_spec_set *sss)
5700 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5702 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5703 && !(lex_token (lexer) == T_ID
5704 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5705 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5706 lex_tokss (lexer)))))
5708 /* Parse function. */
5709 enum ctables_summary_function function;
5710 enum ctables_weighting weighting;
5711 enum ctables_area_type area;
5712 if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
5715 /* Parse percentile. */
5716 double percentile = 0;
5717 if (function == CTSF_PTILE)
5719 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5721 percentile = lex_number (lexer);
5726 struct fmt_spec format;
5727 bool is_ctables_format;
5728 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5731 if (sss->n >= sss->allocated)
5732 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5733 sizeof *sss->specs);
5734 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5735 .function = function,
5736 .weighting = weighting,
5739 .percentile = percentile,
5741 .is_ctables_format = is_ctables_format,
5747 ctables_summary_spec_set_uninit (sss);
5752 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5754 struct ctables_postcompute **pcs = NULL;
5756 size_t allocated_pcs = 0;
5758 while (lex_match (lexer, T_AND))
5760 if (!lex_force_id (lexer))
5762 struct ctables_postcompute *pc
5763 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5766 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5771 if (n_pcs >= allocated_pcs)
5772 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5776 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5778 if (lex_match_id (lexer, "LABEL"))
5780 lex_match (lexer, T_EQUALS);
5781 if (!lex_force_string (lexer))
5784 for (size_t i = 0; i < n_pcs; i++)
5786 free (pcs[i]->label);
5787 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5792 else if (lex_match_id (lexer, "FORMAT"))
5794 lex_match (lexer, T_EQUALS);
5796 struct ctables_summary_spec_set sss;
5797 if (!ctables_parse_pproperties_format (lexer, &sss))
5800 for (size_t i = 0; i < n_pcs; i++)
5803 ctables_summary_spec_set_uninit (pcs[i]->specs);
5805 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5806 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5808 ctables_summary_spec_set_uninit (&sss);
5810 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5812 lex_match (lexer, T_EQUALS);
5813 bool hide_source_cats;
5814 if (!parse_bool (lexer, &hide_source_cats))
5816 for (size_t i = 0; i < n_pcs; i++)
5817 pcs[i]->hide_source_cats = hide_source_cats;
5821 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5834 put_strftime (struct string *out, time_t now, const char *format)
5836 const struct tm *tm = localtime (&now);
5838 strftime (value, sizeof value, format, tm);
5839 ds_put_cstr (out, value);
5843 skip_prefix (struct substring *s, struct substring prefix)
5845 if (ss_starts_with (*s, prefix))
5847 ss_advance (s, prefix.length);
5855 put_table_expression (struct string *out, struct lexer *lexer,
5856 struct dictionary *dict, int expr_start, int expr_end)
5859 for (int ofs = expr_start; ofs < expr_end; ofs++)
5861 const struct token *t = lex_ofs_token (lexer, ofs);
5862 if (t->type == T_LBRACK)
5864 else if (t->type == T_RBRACK && nest > 0)
5870 else if (t->type == T_ID)
5872 const struct variable *var
5873 = dict_lookup_var (dict, t->string.string);
5874 const char *label = var ? var_get_label (var) : NULL;
5875 ds_put_cstr (out, label ? label : t->string.string);
5879 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5880 ds_put_byte (out, ' ');
5882 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5883 ds_put_cstr (out, repr);
5886 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5887 ds_put_byte (out, ' ');
5893 put_title_text (struct string *out, struct substring in, time_t now,
5894 struct lexer *lexer, struct dictionary *dict,
5895 int expr_start, int expr_end)
5899 size_t chunk = ss_find_byte (in, ')');
5900 ds_put_substring (out, ss_head (in, chunk));
5901 ss_advance (&in, chunk);
5902 if (ss_is_empty (in))
5905 if (skip_prefix (&in, ss_cstr (")DATE")))
5906 put_strftime (out, now, "%x");
5907 else if (skip_prefix (&in, ss_cstr (")TIME")))
5908 put_strftime (out, now, "%X");
5909 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5910 put_table_expression (out, lexer, dict, expr_start, expr_end);
5913 ds_put_byte (out, ')');
5914 ss_advance (&in, 1);
5920 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5922 struct casereader *input = NULL;
5924 struct measure_guesser *mg = measure_guesser_create (ds);
5927 input = proc_open (ds);
5928 measure_guesser_run (mg, input);
5929 measure_guesser_destroy (mg);
5932 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5933 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5934 enum settings_value_show tvars = settings_get_show_variables ();
5935 for (size_t i = 0; i < n_vars; i++)
5936 vlabels[i] = (enum ctables_vlabel) tvars;
5938 struct pivot_table_look *look = pivot_table_look_unshare (
5939 pivot_table_look_ref (pivot_table_look_get_default ()));
5940 look->omit_empty = false;
5942 struct ctables *ct = xmalloc (sizeof *ct);
5943 *ct = (struct ctables) {
5944 .dict = dataset_dict (ds),
5946 .ctables_formats = FMT_SETTINGS_INIT,
5948 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5951 time_t now = time (NULL);
5956 const char *dot_string;
5957 const char *comma_string;
5959 static const struct ctf ctfs[4] = {
5960 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5961 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5962 { CTEF_PAREN, "-,(,),", "-.(.)." },
5963 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5965 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5966 for (size_t i = 0; i < 4; i++)
5968 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5969 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5970 fmt_number_style_from_string (s));
5973 if (!lex_force_match (lexer, T_SLASH))
5976 while (!lex_match_id (lexer, "TABLE"))
5978 if (lex_match_id (lexer, "FORMAT"))
5980 double widths[2] = { SYSMIS, SYSMIS };
5981 double units_per_inch = 72.0;
5983 while (lex_token (lexer) != T_SLASH)
5985 if (lex_match_id (lexer, "MINCOLWIDTH"))
5987 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5990 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5992 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5995 else if (lex_match_id (lexer, "UNITS"))
5997 lex_match (lexer, T_EQUALS);
5998 if (lex_match_id (lexer, "POINTS"))
5999 units_per_inch = 72.0;
6000 else if (lex_match_id (lexer, "INCHES"))
6001 units_per_inch = 1.0;
6002 else if (lex_match_id (lexer, "CM"))
6003 units_per_inch = 2.54;
6006 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6010 else if (lex_match_id (lexer, "EMPTY"))
6015 lex_match (lexer, T_EQUALS);
6016 if (lex_match_id (lexer, "ZERO"))
6018 /* Nothing to do. */
6020 else if (lex_match_id (lexer, "BLANK"))
6021 ct->zero = xstrdup ("");
6022 else if (lex_force_string (lexer))
6024 ct->zero = ss_xstrdup (lex_tokss (lexer));
6030 else if (lex_match_id (lexer, "MISSING"))
6032 lex_match (lexer, T_EQUALS);
6033 if (!lex_force_string (lexer))
6037 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6038 ? ss_xstrdup (lex_tokss (lexer))
6044 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6045 "UNITS", "EMPTY", "MISSING");
6050 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6051 && widths[0] > widths[1])
6053 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6057 for (size_t i = 0; i < 2; i++)
6058 if (widths[i] != SYSMIS)
6060 int *wr = ct->look->width_ranges[TABLE_HORZ];
6061 wr[i] = widths[i] / units_per_inch * 96.0;
6066 else if (lex_match_id (lexer, "VLABELS"))
6068 if (!lex_force_match_id (lexer, "VARIABLES"))
6070 lex_match (lexer, T_EQUALS);
6072 struct variable **vars;
6074 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6078 if (!lex_force_match_id (lexer, "DISPLAY"))
6083 lex_match (lexer, T_EQUALS);
6085 enum ctables_vlabel vlabel;
6086 if (lex_match_id (lexer, "DEFAULT"))
6087 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6088 else if (lex_match_id (lexer, "NAME"))
6090 else if (lex_match_id (lexer, "LABEL"))
6091 vlabel = CTVL_LABEL;
6092 else if (lex_match_id (lexer, "BOTH"))
6094 else if (lex_match_id (lexer, "NONE"))
6098 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6104 for (size_t i = 0; i < n_vars; i++)
6105 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6108 else if (lex_match_id (lexer, "MRSETS"))
6110 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6112 lex_match (lexer, T_EQUALS);
6113 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6116 else if (lex_match_id (lexer, "SMISSING"))
6118 if (lex_match_id (lexer, "VARIABLE"))
6119 ct->smissing_listwise = false;
6120 else if (lex_match_id (lexer, "LISTWISE"))
6121 ct->smissing_listwise = true;
6124 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6128 else if (lex_match_id (lexer, "PCOMPUTE"))
6130 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6133 else if (lex_match_id (lexer, "PPROPERTIES"))
6135 if (!ctables_parse_pproperties (lexer, ct))
6138 else if (lex_match_id (lexer, "WEIGHT"))
6140 if (!lex_force_match_id (lexer, "VARIABLE"))
6142 lex_match (lexer, T_EQUALS);
6143 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6147 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6149 if (lex_match_id (lexer, "COUNT"))
6151 lex_match (lexer, T_EQUALS);
6152 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6155 ct->hide_threshold = lex_integer (lexer);
6158 else if (ct->hide_threshold == 0)
6159 ct->hide_threshold = 5;
6163 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6164 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6165 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6169 if (!lex_force_match (lexer, T_SLASH))
6173 size_t allocated_tables = 0;
6176 if (ct->n_tables >= allocated_tables)
6177 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6178 sizeof *ct->tables);
6180 struct ctables_category *cat = xmalloc (sizeof *cat);
6181 *cat = (struct ctables_category) {
6183 .include_missing = false,
6184 .sort_ascending = true,
6187 struct ctables_categories *c = xmalloc (sizeof *c);
6188 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6189 *c = (struct ctables_categories) {
6196 struct ctables_categories **categories = xnmalloc (n_vars,
6197 sizeof *categories);
6198 for (size_t i = 0; i < n_vars; i++)
6201 struct ctables_table *t = xmalloc (sizeof *t);
6202 *t = (struct ctables_table) {
6204 .slabels_axis = PIVOT_AXIS_COLUMN,
6205 .slabels_visible = true,
6206 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6208 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6209 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6210 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6212 .clabels_from_axis = PIVOT_AXIS_LAYER,
6213 .clabels_to_axis = PIVOT_AXIS_LAYER,
6214 .categories = categories,
6215 .n_categories = n_vars,
6218 ct->tables[ct->n_tables++] = t;
6220 lex_match (lexer, T_EQUALS);
6221 int expr_start = lex_ofs (lexer);
6222 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6223 &t->axes[PIVOT_AXIS_ROW]))
6225 if (lex_match (lexer, T_BY))
6227 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6228 &t->axes[PIVOT_AXIS_COLUMN]))
6231 if (lex_match (lexer, T_BY))
6233 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6234 &t->axes[PIVOT_AXIS_LAYER]))
6238 int expr_end = lex_ofs (lexer);
6240 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6241 && !t->axes[PIVOT_AXIS_LAYER])
6243 lex_error (lexer, _("At least one variable must be specified."));
6247 const struct ctables_axis *scales[PIVOT_N_AXES];
6248 size_t n_scales = 0;
6249 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6251 scales[a] = find_scale (t->axes[a]);
6257 msg (SE, _("Scale variables may appear only on one axis."));
6258 if (scales[PIVOT_AXIS_ROW])
6259 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6260 _("This scale variable appears on the rows axis."));
6261 if (scales[PIVOT_AXIS_COLUMN])
6262 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6263 _("This scale variable appears on the columns axis."));
6264 if (scales[PIVOT_AXIS_LAYER])
6265 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6266 _("This scale variable appears on the layer axis."));
6270 const struct ctables_axis *summaries[PIVOT_N_AXES];
6271 size_t n_summaries = 0;
6272 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6274 summaries[a] = (scales[a]
6276 : find_categorical_summary_spec (t->axes[a]));
6280 if (n_summaries > 1)
6282 msg (SE, _("Summaries may appear only on one axis."));
6283 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6286 msg_at (SN, summaries[a]->loc,
6288 ? _("This variable on the rows axis has a summary.")
6289 : a == PIVOT_AXIS_COLUMN
6290 ? _("This variable on the columns axis has a summary.")
6291 : _("This variable on the layers axis has a summary."));
6293 msg_at (SN, summaries[a]->loc,
6294 _("This is a scale variable, so it always has a "
6295 "summary even if the syntax does not explicitly "
6300 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6301 if (n_summaries ? summaries[a] : t->axes[a])
6303 t->summary_axis = a;
6307 if (lex_token (lexer) == T_ENDCMD)
6309 if (!ctables_prepare_table (t))
6313 if (!lex_force_match (lexer, T_SLASH))
6316 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6318 if (lex_match_id (lexer, "SLABELS"))
6320 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6322 if (lex_match_id (lexer, "POSITION"))
6324 lex_match (lexer, T_EQUALS);
6325 if (lex_match_id (lexer, "COLUMN"))
6326 t->slabels_axis = PIVOT_AXIS_COLUMN;
6327 else if (lex_match_id (lexer, "ROW"))
6328 t->slabels_axis = PIVOT_AXIS_ROW;
6329 else if (lex_match_id (lexer, "LAYER"))
6330 t->slabels_axis = PIVOT_AXIS_LAYER;
6333 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6337 else if (lex_match_id (lexer, "VISIBLE"))
6339 lex_match (lexer, T_EQUALS);
6340 if (!parse_bool (lexer, &t->slabels_visible))
6345 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6350 else if (lex_match_id (lexer, "CLABELS"))
6352 if (lex_match_id (lexer, "AUTO"))
6354 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6355 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6357 else if (lex_match_id (lexer, "ROWLABELS"))
6359 lex_match (lexer, T_EQUALS);
6360 if (lex_match_id (lexer, "OPPOSITE"))
6361 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6362 else if (lex_match_id (lexer, "LAYER"))
6363 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6366 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6370 else if (lex_match_id (lexer, "COLLABELS"))
6372 lex_match (lexer, T_EQUALS);
6373 if (lex_match_id (lexer, "OPPOSITE"))
6374 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6375 else if (lex_match_id (lexer, "LAYER"))
6376 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6379 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6385 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6390 else if (lex_match_id (lexer, "CRITERIA"))
6392 if (!lex_force_match_id (lexer, "CILEVEL"))
6394 lex_match (lexer, T_EQUALS);
6396 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6398 t->cilevel = lex_number (lexer);
6401 else if (lex_match_id (lexer, "CATEGORIES"))
6403 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6407 else if (lex_match_id (lexer, "TITLES"))
6412 if (lex_match_id (lexer, "CAPTION"))
6413 textp = &t->caption;
6414 else if (lex_match_id (lexer, "CORNER"))
6416 else if (lex_match_id (lexer, "TITLE"))
6420 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6423 lex_match (lexer, T_EQUALS);
6425 struct string s = DS_EMPTY_INITIALIZER;
6426 while (lex_is_string (lexer))
6428 if (!ds_is_empty (&s))
6429 ds_put_byte (&s, ' ');
6430 put_title_text (&s, lex_tokss (lexer), now,
6431 lexer, dataset_dict (ds),
6432 expr_start, expr_end);
6436 *textp = ds_steal_cstr (&s);
6438 while (lex_token (lexer) != T_SLASH
6439 && lex_token (lexer) != T_ENDCMD);
6441 else if (lex_match_id (lexer, "SIGTEST"))
6443 int start_ofs = lex_ofs (lexer) - 1;
6446 t->chisq = xmalloc (sizeof *t->chisq);
6447 *t->chisq = (struct ctables_chisq) {
6449 .include_mrsets = true,
6450 .all_visible = true,
6456 if (lex_match_id (lexer, "TYPE"))
6458 lex_match (lexer, T_EQUALS);
6459 if (!lex_force_match_id (lexer, "CHISQUARE"))
6462 else if (lex_match_id (lexer, "ALPHA"))
6464 lex_match (lexer, T_EQUALS);
6465 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6467 t->chisq->alpha = lex_number (lexer);
6470 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6472 lex_match (lexer, T_EQUALS);
6473 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6476 else if (lex_match_id (lexer, "CATEGORIES"))
6478 lex_match (lexer, T_EQUALS);
6479 if (lex_match_id (lexer, "ALLVISIBLE"))
6480 t->chisq->all_visible = true;
6481 else if (lex_match_id (lexer, "SUBTOTALS"))
6482 t->chisq->all_visible = false;
6485 lex_error_expecting (lexer,
6486 "ALLVISIBLE", "SUBTOTALS");
6492 lex_error_expecting (lexer, "TYPE", "ALPHA",
6493 "INCLUDEMRSETS", "CATEGORIES");
6497 while (lex_token (lexer) != T_SLASH
6498 && lex_token (lexer) != T_ENDCMD);
6500 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6501 _("Support for SIGTEST not yet implemented."));
6504 else if (lex_match_id (lexer, "COMPARETEST"))
6506 int start_ofs = lex_ofs (lexer);
6509 t->pairwise = xmalloc (sizeof *t->pairwise);
6510 *t->pairwise = (struct ctables_pairwise) {
6512 .alpha = { .05, .05 },
6513 .adjust = BONFERRONI,
6514 .include_mrsets = true,
6515 .meansvariance_allcats = true,
6516 .all_visible = true,
6525 if (lex_match_id (lexer, "TYPE"))
6527 lex_match (lexer, T_EQUALS);
6528 if (lex_match_id (lexer, "PROP"))
6529 t->pairwise->type = PROP;
6530 else if (lex_match_id (lexer, "MEAN"))
6531 t->pairwise->type = MEAN;
6534 lex_error_expecting (lexer, "PROP", "MEAN");
6538 else if (lex_match_id (lexer, "ALPHA"))
6540 lex_match (lexer, T_EQUALS);
6542 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6544 double a0 = lex_number (lexer);
6547 lex_match (lexer, T_COMMA);
6548 if (lex_is_number (lexer))
6550 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6552 double a1 = lex_number (lexer);
6555 t->pairwise->alpha[0] = MIN (a0, a1);
6556 t->pairwise->alpha[1] = MAX (a0, a1);
6559 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6561 else if (lex_match_id (lexer, "ADJUST"))
6563 lex_match (lexer, T_EQUALS);
6564 if (lex_match_id (lexer, "BONFERRONI"))
6565 t->pairwise->adjust = BONFERRONI;
6566 else if (lex_match_id (lexer, "BH"))
6567 t->pairwise->adjust = BH;
6568 else if (lex_match_id (lexer, "NONE"))
6569 t->pairwise->adjust = 0;
6572 lex_error_expecting (lexer, "BONFERRONI", "BH",
6577 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6579 lex_match (lexer, T_EQUALS);
6580 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6583 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6585 lex_match (lexer, T_EQUALS);
6586 if (lex_match_id (lexer, "ALLCATS"))
6587 t->pairwise->meansvariance_allcats = true;
6588 else if (lex_match_id (lexer, "TESTEDCATS"))
6589 t->pairwise->meansvariance_allcats = false;
6592 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6596 else if (lex_match_id (lexer, "CATEGORIES"))
6598 lex_match (lexer, T_EQUALS);
6599 if (lex_match_id (lexer, "ALLVISIBLE"))
6600 t->pairwise->all_visible = true;
6601 else if (lex_match_id (lexer, "SUBTOTALS"))
6602 t->pairwise->all_visible = false;
6605 lex_error_expecting (lexer, "ALLVISIBLE",
6610 else if (lex_match_id (lexer, "MERGE"))
6612 lex_match (lexer, T_EQUALS);
6613 if (!parse_bool (lexer, &t->pairwise->merge))
6616 else if (lex_match_id (lexer, "STYLE"))
6618 lex_match (lexer, T_EQUALS);
6619 if (lex_match_id (lexer, "APA"))
6620 t->pairwise->apa_style = true;
6621 else if (lex_match_id (lexer, "SIMPLE"))
6622 t->pairwise->apa_style = false;
6625 lex_error_expecting (lexer, "APA", "SIMPLE");
6629 else if (lex_match_id (lexer, "SHOWSIG"))
6631 lex_match (lexer, T_EQUALS);
6632 if (!parse_bool (lexer, &t->pairwise->show_sig))
6637 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6638 "INCLUDEMRSETS", "MEANSVARIANCE",
6639 "CATEGORIES", "MERGE", "STYLE",
6644 while (lex_token (lexer) != T_SLASH
6645 && lex_token (lexer) != T_ENDCMD);
6647 lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
6648 _("Support for COMPARETEST not yet implemented."));
6653 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6654 "CRITERIA", "CATEGORIES", "TITLES",
6655 "SIGTEST", "COMPARETEST");
6659 if (!lex_match (lexer, T_SLASH))
6663 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW)
6665 t->clabels_from_axis = PIVOT_AXIS_ROW;
6666 if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6668 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6672 else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6673 t->clabels_from_axis = PIVOT_AXIS_COLUMN;
6674 t->clabels_to_axis = t->label_axis[t->clabels_from_axis];
6676 if (!ctables_prepare_table (t))
6679 while (lex_token (lexer) != T_ENDCMD);
6682 input = proc_open (ds);
6683 bool ok = ctables_execute (ds, input, ct);
6684 ok = proc_commit (ds) && ok;
6686 ctables_destroy (ct);
6687 return ok ? CMD_SUCCESS : CMD_FAILURE;
6692 ctables_destroy (ct);