deb98beb9c0115cbd792fb6cfabbe3655c0bf094
[pspp] / src / language / stats / ctables.c
1 /* PSPP - a program for statistical analysis.
2    Copyright (C) 2021 Free Software Foundation, Inc.
3
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include "data/dataset.h"
20 #include "data/dictionary.h"
21 #include "language/command.h"
22 #include "language/lexer/format-parser.h"
23 #include "language/lexer/lexer.h"
24 #include "language/lexer/variable-parser.h"
25 #include "libpspp/hmap.h"
26 #include "libpspp/message.h"
27 #include "output/pivot-table.h"
28
29 #include "gl/minmax.h"
30 #include "gl/xalloc.h"
31
32 #include "gettext.h"
33 #define _(msgid) gettext (msgid)
34
35 enum ctables_vlabel
36   {
37     CTVL_DEFAULT = SETTINGS_VALUE_SHOW_DEFAULT,
38     CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
39     CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
40     CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
41     CTVL_NONE,
42   };
43 static void UNUSED
44 ctables_vlabel_unique (enum ctables_vlabel vlabel)
45 {
46   /* This ensures that all of the values are unique. */
47   switch (vlabel)
48     {
49     case CTVL_DEFAULT:
50     case CTVL_NAME:
51     case CTVL_LABEL:
52     case CTVL_BOTH:
53     case CTVL_NONE:
54       abort ();
55     }
56 }
57
58 struct ctables
59   {
60     struct pivot_table_look *look;
61
62     /* If this is NULL, zeros are displayed using the normal print format.
63        Otherwise, this string is displayed. */
64     char *zero;
65
66     /* If this is NULL, missing values are displayed using the normal print
67        format.  Otherwise, this string is displayed. */
68     char *missing;
69
70     enum ctables_vlabel *vlabels;
71
72     bool mrsets_count_duplicates; /* MRSETS. */
73     bool smissing_listwise;       /* SMISSING. */
74     struct variable *base_weight; /* WEIGHT. */
75     int hide_threshold;           /* HIDESMALLCOUNTS. */
76
77     struct ctables_table *tables;
78     size_t n_tables;
79   };
80
81 struct ctables_postcompute
82   {
83     struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
84     const char *name;           /* Name, without leading &. */
85
86     struct ctables_postcompute_expr *expr;
87     char *label;
88     /* XXX FORMAT */
89     bool hide_source_cats;
90   };
91
92 struct ctables_postcompute_expr
93   {
94     enum ctables_postcompute_op
95       {
96         /* Terminals. */
97         CTPO_CAT_NUMBER,
98         CTPO_CAT_STRING,
99         CTPO_CAT_RANGE,
100         CTPO_CAT_MISSING,
101         /* XXX OTHERNM */
102         /* XXX SUBTOTAL and HSUBTOTAL */
103
104         /* Nonterminals. */
105         CTPO_ADD,
106         CTPO_SUB,
107         CTPO_MUL,
108         CTPO_DIV,
109         CTPO_POW,
110       }
111     op;
112
113     union
114       {
115         /* CTPO_CAT_NUMBER, CTPO_NUMBER. */
116         double number;
117
118         /* CTPO_CAT_RANGE.
119
120            XXX what about string ranges? */
121         struct
122           {
123             double low;         /* -DBL_MAX for LO. */
124             double high;        /* DBL_MAX for HIGH. */
125           }
126         range;
127
128         /* CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW. */
129         struct ctables_postcompute_expr *subs[2];
130       };
131   };
132
133 enum ctables_label_position
134   {
135     CTLP_NORMAL,
136     CTLP_OPPOSITE,
137     CTLP_LAYER,
138   };
139
140 struct ctables_table
141   {
142     struct ctables_axis *axes[PIVOT_N_AXES];
143
144     enum pivot_axis_type slabels_position;
145     bool slabels_visible;
146
147     enum ctables_label_position row_labels;
148     enum ctables_label_position col_labels;
149
150     /* XXX CATEGORIES */
151
152     double cilevel;
153
154     char *caption;
155     char *corner;
156     char *title;
157
158     struct ctables_chisq *chisq;
159     struct ctables_pairwise *pairwise;
160   };
161
162 /* Chi-square test (SIGTEST). */
163 struct ctables_chisq
164   {
165     double alpha;
166     bool include_mrsets;
167     bool all_visible;
168   };
169
170 /* Pairwise comparison test (COMPARETEST). */
171 struct ctables_pairwise
172   {
173     enum { PROP, MEAN } type;
174     double alpha[2];
175     bool include_mrsets;
176     bool meansvariance_allcats;
177     bool all_visible;
178     enum { BONFERRONI = 1, BH } adjust;
179     bool merge;
180     bool apa_style;
181     bool show_sig;
182   };
183
184 struct ctables_axis
185   {
186     enum ctables_axis_op
187       {
188         /* Terminals. */
189         CTAO_VAR,
190         CTAO_MRSET,
191
192         /* Nonterminals. */
193         CTAO_STACK,             /* + */
194         CTAO_NEST,              /* > */
195       }
196     op;
197
198     union
199       {
200         /* Terminals. */
201         struct
202           {
203             union
204               {
205                 struct variable *var;
206                 const struct mrset *mrset;
207               };
208
209             bool scale;
210             struct ctables_summary *summaries;
211             size_t n_summaries;
212           };
213
214         /* Nonterminals. */
215         struct ctables_axis *subs[2];
216       };
217   };
218
219 static void ctables_axis_destroy (struct ctables_axis *);
220
221 #define SUMMARIES                                                       \
222     /* All variables. */                                                \
223     S(CTSF_COUNT, "COUNT")                                              \
224     S(CTSF_ECOUNT, "ECOUNT")                                            \
225     S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT")                                \
226     S(CTSF_COLPCT_COUNT, "COLPCT.COUNT")                                \
227     S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT")                            \
228     S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT")                      \
229     S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT")                            \
230     S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT")                      \
231     S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT")                      \
232     S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN")                              \
233     S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN")                              \
234     S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN")                          \
235     S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN")                    \
236     S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN")                          \
237     S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN")                    \
238     S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN")                    \
239     S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN")                              \
240     S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN")                              \
241     S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN")                          \
242     S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN")                    \
243     S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN")                          \
244     S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN")                    \
245     S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN")                    \
246                                                                         \
247     /* Scale variables, totals, and subtotals. */                       \
248     S(CTSF_MAXIMUM, "!MAXIMUM")                                         \
249     S(CTSF_MEAN, "!MEAN")                                               \
250     S(CTSF_MEDIAN, "!MEDIAN")                                           \
251     S(CTSF_MINIMUM, "!MINIMUM")                                         \
252     S(CTSF_MISSING, "!MISSING")                                         \
253     S(CTSF_MODE, "!MODE")                                               \
254     S(CTSF_PTILE, "!PTILE")                                             \
255     S(CTSF_RANGE, "!RANGE")                                             \
256     S(CTSF_SEMAN, "!SEMAN")                                             \
257     S(CTSF_STDDEV, "!STDDEV")                                           \
258     S(CTSF_SUM, "!SUM")                                                 \
259     S(CSTF_TOTALN, "!TOTALN")                                           \
260     S(CTSF_ETOTALN, "!ETOTALN")                                         \
261     S(CTSF_VALIDN, "!VALIDN")                                           \
262     S(CTSF_EVALIDN, "!EVALIDN")                                         \
263     S(CTSF_VARIANCE, "!VARIANCE")                                       \
264     S(CTSF_ROWPCT_SUM, "ROWPCT.SUM")                                    \
265     S(CTSF_COLPCT_SUM, "COLPCT.SUM")                                    \
266     S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM")                                \
267     S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM")                          \
268     S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM")                                \
269     S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM")                          \
270     S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM")                          \
271                                                                         \
272     /* Multiple response sets. */                                       \
273     S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES")                        \
274     S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES")                        \
275     S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES")                    \
276     S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES")              \
277     S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES")                    \
278     S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES")              \
279     S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES")              \
280     S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT")            \
281     S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT")            \
282     S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT")        \
283     S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT")  \
284     S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT")        \
285     S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT")  \
286     S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT")  \
287     S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES")            \
288     S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES")            \
289     S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES")        \
290     S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES")  \
291     S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES")        \
292     S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES")  \
293     S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.COUNT.RESPONSES")
294
295 enum ctables_summary_function
296   {
297 #define S(ENUM, NAME) ENUM,
298     SUMMARIES
299 #undef S
300   };
301
302 enum {
303 #define S(ENUM, NAME) +1
304   N_CTSF_FUNCTIONS = SUMMARIES
305 #undef S
306 };
307
308 struct ctables_summary
309   {
310     enum ctables_summary_function function;
311     char *label;
312     struct fmt_spec format;     /* XXX extra CTABLES formats */
313   };
314
315 static void
316 ctables_summary_uninit (struct ctables_summary *s)
317 {
318   if (s)
319     free (s->label);
320 }
321
322 static bool
323 parse_col_width (struct lexer *lexer, const char *name, double *width)
324 {
325   lex_match (lexer, T_EQUALS);
326   if (lex_match_id (lexer, "DEFAULT"))
327     *width = SYSMIS;
328   else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
329     {
330       *width = lex_number (lexer);
331       lex_get (lexer);
332     }
333   else
334     return false;
335
336   return true;
337 }
338
339 static bool
340 parse_bool (struct lexer *lexer, bool *b)
341 {
342   if (lex_match_id (lexer, "NO"))
343     *b = false;
344   else if (lex_match_id (lexer, "YES"))
345     *b = true;
346   else
347     {
348       lex_error_expecting (lexer, "YES", "NO");
349       return false;
350     }
351   return true;
352 }
353
354 static bool
355 parse_ctables_summary_function (struct lexer *lexer,
356                                 enum ctables_summary_function *f)
357 {
358   struct pair
359     {
360       enum ctables_summary_function function;
361       struct substring name;
362     };
363   static struct pair names[] = {
364 #define S(ENUM, NAME) { ENUM, SS_LITERAL_INITIALIZER (NAME) },
365     SUMMARIES
366
367     /* The .COUNT suffix may be omitted. */
368     S(CTSF_ROWPCT_COUNT, "ROWPCT")
369     S(CTSF_COLPCT_COUNT, "COLPCT")
370     S(CTSF_TABLEPCT_COUNT, "TABLEPCT")
371     S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT")
372     S(CTSF_LAYERPCT_COUNT, "LAYERPCT")
373     S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT")
374     S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT")
375 #undef S
376   };
377
378   if (!lex_force_id (lexer))
379     return false;
380
381   for (size_t i = 0; i < sizeof names / sizeof *names; i++)
382     if (ss_equals_case (names[i].name, lex_tokss (lexer)))
383       {
384         *f = names[i].function;
385         return true;
386       }
387
388   lex_error (lexer, _("Expecting summary function name."));
389   return false;
390 }
391
392 static void
393 ctables_axis_destroy (struct ctables_axis *axis)
394 {
395   if (!axis)
396     return;
397
398   switch (axis->op)
399     {
400     case CTAO_VAR:
401     case CTAO_MRSET:
402       for (size_t i = 0; i < axis->n_summaries; i++)
403         ctables_summary_uninit (&axis->summaries[i]);
404       free (axis->summaries);
405       break;
406
407     case CTAO_STACK:
408     case CTAO_NEST:
409       ctables_axis_destroy (axis->subs[0]);
410       ctables_axis_destroy (axis->subs[1]);
411       break;
412     }
413   free (axis);
414 }
415
416 static struct ctables_axis *
417 ctables_axis_new_nonterminal (enum ctables_axis_op op,
418                               struct ctables_axis *sub0,
419                               struct ctables_axis *sub1)
420 {
421   struct ctables_axis *axis = xmalloc (sizeof *axis);
422   *axis = (struct ctables_axis) { .op = op, .subs = { sub0, sub1 } };
423   return axis;
424 }
425
426 struct ctables_axis_parse_ctx
427   {
428     struct lexer *lexer;
429     struct dictionary *dict;
430     struct ctables *ct;
431     struct ctables_table *t;
432   };
433
434 static struct ctables_summary *
435 add_summary (struct ctables_axis *axis, size_t *allocated_summaries)
436 {
437   if (axis->n_summaries >= *allocated_summaries)
438     axis->summaries = x2nrealloc (axis->summaries, allocated_summaries,
439                                   sizeof *axis->summaries);
440
441   struct ctables_summary *s = &axis->summaries[axis->n_summaries++];
442   *s = (struct ctables_summary) { .function = CTSF_COUNT };
443   return s;
444 }
445
446 static struct ctables_axis *ctables_axis_parse_stack (
447   struct ctables_axis_parse_ctx *);
448
449 static struct ctables_axis *
450 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
451 {
452   if (lex_match (ctx->lexer, T_LPAREN))
453     {
454       struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
455       if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
456         {
457           ctables_axis_destroy (sub);
458           return NULL;
459         }
460       return sub;
461     }
462
463   if (!lex_force_id (ctx->lexer))
464     return NULL;
465
466   const struct mrset *mrset = NULL;
467   struct variable *var = NULL;
468   if (ss_starts_with (lex_tokss (ctx->lexer), ss_cstr ("$")))
469     {
470       mrset = dict_lookup_mrset (ctx->dict, lex_tokcstr (ctx->lexer));
471       if (!mrset)
472         {
473           lex_error (ctx->lexer, _("'%s' is not the name of a "
474                                    "multiple-response set in the active file "
475                                    "dictionary."),
476                      lex_tokcstr (ctx->lexer));
477           return NULL;
478         }
479       lex_get (ctx->lexer);
480     }
481   else
482     {
483       var = parse_variable (ctx->lexer, ctx->dict);
484       if (!var)
485         return NULL;
486     }
487
488   struct ctables_axis *axis = xmalloc (sizeof *axis);
489   if (mrset)
490     *axis = (struct ctables_axis) { .op = CTAO_MRSET, .mrset = mrset };
491   else
492     *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
493
494   /* XXX should figure out default measures by reading data */
495   axis->scale = (mrset ? false
496                  : lex_match_phrase (ctx->lexer, "[S]") ? true
497                  : lex_match_phrase (ctx->lexer, "[C]") ? false
498                  : var_get_measure (var) == MEASURE_SCALE);
499
500   size_t allocated_summaries = 0;
501   if (lex_match (ctx->lexer, T_LBRACK))
502     {
503       do
504         {
505           struct ctables_summary *s = add_summary (axis, &allocated_summaries);
506           if (!parse_ctables_summary_function (ctx->lexer, &s->function))
507             goto error;
508           if (lex_is_string (ctx->lexer))
509             {
510               s->label = ss_xstrdup (lex_tokss (ctx->lexer));
511               lex_get (ctx->lexer);
512             }
513           if (lex_token (ctx->lexer) == T_ID)
514             {
515               if (!parse_format_specifier (ctx->lexer, &s->format)
516                   || !fmt_check_output (&s->format)
517                   || !fmt_check_type_compat (&s->format, VAL_NUMERIC))
518                 goto error;
519             }
520           lex_match (ctx->lexer, T_COMMA);
521         }
522       while (!lex_match (ctx->lexer, T_RBRACK));
523     }
524   else
525     {
526       struct ctables_summary *s = add_summary (axis, &allocated_summaries);
527       s->function = axis->scale ? CTSF_MEAN : CTSF_COUNT;
528       s->label = xstrdup (axis->scale ? _("Mean") : _("Count"));
529       s->format = (struct fmt_spec) { .type = FMT_F, .w = 40 };
530     }
531   return axis;
532
533 error:
534   ctables_axis_destroy (axis);
535   return NULL;
536 }
537
538 static struct ctables_axis *
539 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
540 {
541   struct ctables_axis *lhs = ctables_axis_parse_primary (ctx);
542   if (!lhs)
543     return NULL;
544
545   while (lex_match (ctx->lexer, T_PLUS))
546     {
547       struct ctables_axis *rhs = ctables_axis_parse_primary (ctx);
548       if (!rhs)
549         return NULL;
550
551       lhs = ctables_axis_new_nonterminal (CTAO_NEST, lhs, rhs);
552     }
553
554   return lhs;
555 }
556
557 static struct ctables_axis *
558 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
559 {
560   struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
561   if (!lhs)
562     return NULL;
563
564   while (lex_match (ctx->lexer, T_PLUS))
565     {
566       struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
567       if (!rhs)
568         return NULL;
569
570       lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs);
571     }
572
573   return lhs;
574 }
575
576 static bool
577 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
578                     struct ctables *ct, struct ctables_table *t,
579                     enum pivot_axis_type a)
580 {
581   if (lex_token (lexer) == T_BY
582       || lex_token (lexer) == T_SLASH
583       || lex_token (lexer) == T_ENDCMD)
584     return true;
585
586   struct ctables_axis_parse_ctx ctx = {
587     .lexer = lexer,
588     .dict = dict,
589     .ct = ct,
590     .t = t
591   };
592   t->axes[a] = ctables_axis_parse_stack (&ctx);
593   return t->axes[a] != NULL;
594 }
595
596 int
597 cmd_ctables (struct lexer *lexer, struct dataset *ds)
598 {
599   size_t n_vars = dict_get_n_vars (dataset_dict (ds));
600   enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
601   for (size_t i = 0; n_vars; i++)
602     vlabels[i] = CTVL_DEFAULT;
603
604   struct ctables *ct = xmalloc (sizeof *ct);
605   *ct = (struct ctables) {
606     .look = pivot_table_look_unshare (pivot_table_look_ref (
607                                         pivot_table_look_get_default ())),
608     .vlabels = vlabels,
609     .hide_threshold = 5,
610   };
611
612   if (!lex_force_match (lexer, T_SLASH))
613     goto error;
614
615   while (!lex_match_id (lexer, "TABLE"))
616     {
617       if (lex_match_id (lexer, "FORMAT"))
618         {
619           double widths[2] = { SYSMIS, SYSMIS };
620           double units_per_inch = 72.0;
621
622           while (lex_token (lexer) != T_SLASH)
623             {
624               if (lex_match_id (lexer, "MINCOLWIDTH"))
625                 {
626                   if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
627                     goto error;
628                 }
629               else if (lex_match_id (lexer, "MAXCOLWIDTH"))
630                 {
631                   if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
632                     goto error;
633                 }
634               else if (lex_match_id (lexer, "UNITS"))
635                 {
636                   lex_match (lexer, T_EQUALS);
637                   if (lex_match_id (lexer, "POINTS"))
638                     units_per_inch = 72.0;
639                   else if (lex_match_id (lexer, "INCHES"))
640                     units_per_inch = 1.0;
641                   else if (lex_match_id (lexer, "CM"))
642                     units_per_inch = 2.54;
643                   else
644                     {
645                       lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
646                       goto error;
647                     }
648                 }
649               else if (lex_match_id (lexer, "EMPTY"))
650                 {
651                   free (ct->zero);
652                   ct->zero = NULL;
653
654                   lex_match (lexer, T_EQUALS);
655                   if (lex_match_id (lexer, "ZERO"))
656                     {
657                       /* Nothing to do. */
658                     }
659                   else if (lex_match_id (lexer, "BLANK"))
660                     ct->zero = xstrdup ("");
661                   else if (lex_force_string (lexer))
662                     {
663                       ct->zero = ss_xstrdup (lex_tokss (lexer));
664                       lex_get (lexer);
665                     }
666                   else
667                     goto error;
668                 }
669               else if (lex_match_id (lexer, "MISSING"))
670                 {
671                   lex_match (lexer, T_EQUALS);
672                   if (!lex_force_string (lexer))
673                     goto error;
674
675                   free (ct->missing);
676                   ct->missing = (strcmp (lex_tokcstr (lexer), ".")
677                                  ? ss_xstrdup (lex_tokss (lexer))
678                                  : NULL);
679                   lex_get (lexer);
680                 }
681               else
682                 {
683                   lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
684                                        "UNITS", "EMPTY", "MISSING");
685                   goto error;
686                 }
687             }
688
689           if (widths[0] != SYSMIS && widths[1] != SYSMIS
690               && widths[0] > widths[1])
691             {
692               msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
693               goto error;
694             }
695
696           for (size_t i = 0; i < 2; i++)
697             if (widths[i] != SYSMIS)
698               {
699                 int *wr = ct->look->width_ranges[TABLE_HORZ];
700                 wr[i] = widths[i] / units_per_inch * 96.0;
701                 if (wr[0] > wr[1])
702                   wr[!i] = wr[i];
703               }
704         }
705       else if (lex_match_id (lexer, "VLABELS"))
706         {
707           if (!lex_force_match_id (lexer, "VARIABLES"))
708             goto error;
709           lex_match (lexer, T_EQUALS);
710
711           struct variable **vars;
712           size_t n_vars;
713           if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
714                                 PV_NO_SCRATCH))
715             goto error;
716
717           if (!lex_force_match_id (lexer, "DISPLAY"))
718             {
719               free (vars);
720               goto error;
721             }
722           lex_match (lexer, T_EQUALS);
723
724           enum ctables_vlabel vlabel;
725           if (lex_match_id (lexer, "DEFAULT"))
726             vlabel = CTVL_DEFAULT;
727           else if (lex_match_id (lexer, "NAME"))
728             vlabel = CTVL_NAME;
729           else if (lex_match_id (lexer, "LABEL"))
730             vlabel = CTVL_LABEL;
731           else if (lex_match_id (lexer, "BOTH"))
732             vlabel = CTVL_BOTH;
733           else if (lex_match_id (lexer, "NONE"))
734             vlabel = CTVL_NONE;
735           else
736             {
737               lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
738                                    "BOTH", "NONE");
739               free (vars);
740               goto error;
741             }
742
743           for (size_t i = 0; i < n_vars; i++)
744             ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
745           free (vars);
746         }
747       else if (lex_match_id (lexer, "MRSETS"))
748         {
749           if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
750             goto error;
751           lex_match (lexer, T_EQUALS);
752           if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
753             goto error;
754         }
755       else if (lex_match_id (lexer, "SMISSING"))
756         {
757           if (lex_match_id (lexer, "VARIABLE"))
758             ct->smissing_listwise = false;
759           else if (lex_match_id (lexer, "LISTWISE"))
760             ct->smissing_listwise = true;
761           else
762             {
763               lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
764               goto error;
765             }
766         }
767       /* XXX PCOMPUTE */
768       else if (lex_match_id (lexer, "WEIGHT"))
769         {
770           if (!lex_force_match_id (lexer, "VARIABLE"))
771             goto error;
772           lex_match (lexer, T_EQUALS);
773           ct->base_weight = parse_variable (lexer, dataset_dict (ds));
774           if (!ct->base_weight)
775             goto error;
776         }
777       else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
778         {
779           if (!lex_force_match_id (lexer, "COUNT"))
780             goto error;
781           lex_match (lexer, T_EQUALS);
782           if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT", 2, INT_MAX))
783             goto error;
784           ct->hide_threshold = lex_integer (lexer);
785           lex_get (lexer);
786         }
787       else
788         {
789           lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
790                                "SMISSING", "PCOMPUTE", "PPROPERTIES",
791                                "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
792           goto error;
793         }
794
795       if (!lex_force_match (lexer, T_SLASH))
796         goto error;
797     }
798
799   size_t allocated_tables = 0;
800   do
801     {
802       if (ct->n_tables >= allocated_tables)
803         ct->tables = x2nrealloc (ct->tables, &allocated_tables,
804                                  sizeof *ct->tables);
805
806       struct ctables_table *t = &ct->tables[ct->n_tables++];
807       *t = (struct ctables_table) {
808         .slabels_position = PIVOT_AXIS_COLUMN,
809         .slabels_visible = true,
810         .row_labels = CTLP_NORMAL,
811         .col_labels = CTLP_NORMAL,
812         .cilevel = 95,
813       };
814
815       lex_match (lexer, T_EQUALS);
816       if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
817         goto error;
818
819       if (lex_match (lexer, T_BY))
820         {
821           if (!ctables_axis_parse (lexer, dataset_dict (ds),
822                                    ct, t, PIVOT_AXIS_COLUMN))
823             goto error;
824
825           if (lex_match (lexer, T_BY))
826             {
827               if (!ctables_axis_parse (lexer, dataset_dict (ds),
828                                        ct, t, PIVOT_AXIS_LAYER))
829                 goto error;
830             }
831         }
832       if (!lex_force_match (lexer, T_SLASH))
833         goto error;
834
835       /* XXX Validate axes. */
836       while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
837         {
838           if (lex_match_id (lexer, "SLABELS"))
839             {
840               while (lex_token (lexer) != T_SLASH)
841                 {
842                   if (lex_match_id (lexer, "POSITION"))
843                     {
844                       lex_match (lexer, T_EQUALS);
845                       if (lex_match_id (lexer, "COLUMN"))
846                         t->slabels_position = PIVOT_AXIS_COLUMN;
847                       else if (lex_match_id (lexer, "ROW"))
848                         t->slabels_position = PIVOT_AXIS_ROW;
849                       else if (lex_match_id (lexer, "LAYER"))
850                         t->slabels_position = PIVOT_AXIS_LAYER;
851                       else
852                         {
853                           lex_error_expecting (lexer, "COLUMN", "ROW",
854                                                "LAYER");
855                           goto error;
856                         }
857                     }
858                   else if (lex_match_id (lexer, "VISIBLE"))
859                     {
860                       lex_match (lexer, T_EQUALS);
861                       if (!parse_bool (lexer, &t->slabels_visible))
862                         goto error;
863                     }
864                   else
865                     {
866                       lex_error_expecting (lexer, "POSITION", "VISIBLE");
867                       goto error;
868                     }
869                 }
870             }
871           else if (lex_match_id (lexer, "CLABELS"))
872             {
873               while (lex_token (lexer) != T_SLASH)
874                 {
875                   if (lex_match_id (lexer, "AUTO"))
876                     t->row_labels = t->col_labels = CTLP_NORMAL;
877                   else if (lex_match_id (lexer, "ROWLABELS"))
878                     {
879                       lex_match (lexer, T_EQUALS);
880                       if (lex_match_id (lexer, "OPPOSITE"))
881                         t->row_labels = CTLP_OPPOSITE;
882                       else if (lex_match_id (lexer, "LAYER"))
883                         t->row_labels = CTLP_LAYER;
884                       else
885                         {
886                           lex_error_expecting (lexer, "OPPOSITE", "LAYER");
887                           goto error;
888                         }
889                     }
890                   else if (lex_match_id (lexer, "COLLABELS"))
891                     {
892                       lex_match (lexer, T_EQUALS);
893                       if (lex_match_id (lexer, "OPPOSITE"))
894                         t->col_labels = CTLP_OPPOSITE;
895                       else if (lex_match_id (lexer, "LAYER"))
896                         t->col_labels = CTLP_LAYER;
897                       else
898                         {
899                           lex_error_expecting (lexer, "OPPOSITE", "LAYER");
900                           goto error;
901                         }
902                     }
903                   else
904                     {
905                       lex_error_expecting (lexer, "AUTO", "ROWLABELS",
906                                            "COLLABELS");
907                       goto error;
908                     }
909                 }
910             }
911           else if (lex_match_id (lexer, "CRITERIA"))
912             {
913               if (!lex_force_match_id (lexer, "CILEVEL"))
914                 goto error;
915               lex_match (lexer, T_EQUALS);
916
917               if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
918                 goto error;
919               t->cilevel = lex_number (lexer);
920               lex_get (lexer);
921             }
922           else if (lex_match_id (lexer, "TITLES"))
923             {
924               do
925                 {
926                   char **textp;
927                   if (lex_match_id (lexer, "CAPTION"))
928                     textp = &t->caption;
929                   else if (lex_match_id (lexer, "CORNER"))
930                     textp = &t->corner;
931                   else if (lex_match_id (lexer, "TITLE"))
932                     textp = &t->title;
933                   else
934                     {
935                       lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
936                       goto error;
937                     }
938                   lex_match (lexer, T_EQUALS);
939
940                   struct string s = DS_EMPTY_INITIALIZER;
941                   while (lex_is_string (lexer))
942                     {
943                       if (!ds_is_empty (&s))
944                         ds_put_byte (&s, ' ');
945                       ds_put_substring (&s, lex_tokss (lexer));
946                       lex_get (lexer);
947                     }
948                   free (*textp);
949                   *textp = ds_steal_cstr (&s);
950                 }
951               while (lex_token (lexer) != T_SLASH
952                      && lex_token (lexer) != T_ENDCMD);
953             }
954           else if (lex_match_id (lexer, "SIGTEST"))
955             {
956               if (!t->chisq)
957                 {
958                   t->chisq = xmalloc (sizeof *t->chisq);
959                   *t->chisq = (struct ctables_chisq) {
960                     .alpha = .05,
961                     .include_mrsets = true,
962                     .all_visible = true,
963                   };
964                 }
965
966               do
967                 {
968                   if (lex_match_id (lexer, "TYPE"))
969                     {
970                       lex_match (lexer, T_EQUALS);
971                       if (!lex_force_match_id (lexer, "CHISQUARE"))
972                         goto error;
973                     }
974                   else if (lex_match_id (lexer, "ALPHA"))
975                     {
976                       lex_match (lexer, T_EQUALS);
977                       if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
978                         goto error;
979                       t->chisq->alpha = lex_number (lexer);
980                       lex_get (lexer);
981                     }
982                   else if (lex_match_id (lexer, "INCLUDEMRSETS"))
983                     {
984                       lex_match (lexer, T_EQUALS);
985                       if (parse_bool (lexer, &t->chisq->include_mrsets))
986                         goto error;
987                     }
988                   else if (lex_match_id (lexer, "CATEGORIES"))
989                     {
990                       lex_match (lexer, T_EQUALS);
991                       if (lex_match_id (lexer, "ALLVISIBLE"))
992                         t->chisq->all_visible = true;
993                       else if (lex_match_id (lexer, "SUBTOTALS"))
994                         t->chisq->all_visible = false;
995                       else
996                         {
997                           lex_error_expecting (lexer,
998                                                "ALLVISIBLE", "SUBTOTALS");
999                           goto error;
1000                         }
1001                     }
1002                   else
1003                     {
1004                       lex_error_expecting (lexer, "TYPE", "ALPHA",
1005                                            "INCLUDEMRSETS", "CATEGORIES");
1006                       goto error;
1007                     }
1008                 }
1009               while (lex_token (lexer) != T_SLASH
1010                      && lex_token (lexer) != T_ENDCMD);
1011             }
1012           else if (lex_match_id (lexer, "COMPARETEST"))
1013             {
1014               if (!t->pairwise)
1015                 {
1016                   t->pairwise = xmalloc (sizeof *t->pairwise);
1017                   *t->pairwise = (struct ctables_pairwise) {
1018                     .type = PROP,
1019                     .alpha = { .05, .05 },
1020                     .adjust = BONFERRONI,
1021                     .include_mrsets = true,
1022                     .meansvariance_allcats = true,
1023                     .all_visible = true,
1024                     .merge = false,
1025                     .apa_style = true,
1026                     .show_sig = false,
1027                   };
1028                 }
1029
1030               do
1031                 {
1032                   if (lex_match_id (lexer, "TYPE"))
1033                     {
1034                       lex_match (lexer, T_EQUALS);
1035                       if (lex_match_id (lexer, "PROP"))
1036                         t->pairwise->type = PROP;
1037                       else if (lex_match_id (lexer, "MEAN"))
1038                         t->pairwise->type = MEAN;
1039                       else
1040                         {
1041                           lex_error_expecting (lexer, "PROP", "MEAN");
1042                           goto error;
1043                         }
1044                     }
1045                   else if (lex_match_id (lexer, "ALPHA"))
1046                     {
1047                       lex_match (lexer, T_EQUALS);
1048
1049                       if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
1050                         goto error;
1051                       double a0 = lex_number (lexer);
1052                       lex_get (lexer);
1053
1054                       lex_match (lexer, T_COMMA);
1055                       if (lex_is_number (lexer))
1056                         {
1057                           if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
1058                             goto error;
1059                           double a1 = lex_number (lexer);
1060                           lex_get (lexer);
1061
1062                           t->pairwise->alpha[0] = MIN (a0, a1);
1063                           t->pairwise->alpha[1] = MAX (a0, a1);
1064                         }
1065                       else
1066                         t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
1067                     }
1068                   else if (lex_match_id (lexer, "ADJUST"))
1069                     {
1070                       lex_match (lexer, T_EQUALS);
1071                       if (lex_match_id (lexer, "BONFERRONI"))
1072                         t->pairwise->adjust = BONFERRONI;
1073                       else if (lex_match_id (lexer, "BH"))
1074                         t->pairwise->adjust = BH;
1075                       else if (lex_match_id (lexer, "NONE"))
1076                         t->pairwise->adjust = 0;
1077                       else
1078                         {
1079                           lex_error_expecting (lexer, "BONFERRONI", "BH",
1080                                                "NONE");
1081                           goto error;
1082                         }
1083                     }
1084                   else if (lex_match_id (lexer, "INCLUDEMRSETS"))
1085                     {
1086                       lex_match (lexer, T_EQUALS);
1087                       if (!parse_bool (lexer, &t->pairwise->include_mrsets))
1088                         goto error;
1089                     }
1090                   else if (lex_match_id (lexer, "MEANSVARIANCE"))
1091                     {
1092                       lex_match (lexer, T_EQUALS);
1093                       if (lex_match_id (lexer, "ALLCATS"))
1094                         t->pairwise->meansvariance_allcats = true;
1095                       else if (lex_match_id (lexer, "TESTEDCATS"))
1096                         t->pairwise->meansvariance_allcats = false;
1097                       else
1098                         {
1099                           lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
1100                           goto error;
1101                         }
1102                     }
1103                   else if (lex_match_id (lexer, "CATEGORIES"))
1104                     {
1105                       lex_match (lexer, T_EQUALS);
1106                       if (lex_match_id (lexer, "ALLVISIBLE"))
1107                         t->pairwise->all_visible = true;
1108                       else if (lex_match_id (lexer, "SUBTOTALS"))
1109                         t->pairwise->all_visible = false;
1110                       else
1111                         {
1112                           lex_error_expecting (lexer, "ALLVISIBLE",
1113                                                "SUBTOTALS");
1114                           goto error;
1115                         }
1116                     }
1117                   else if (lex_match_id (lexer, "MERGE"))
1118                     {
1119                       lex_match (lexer, T_EQUALS);
1120                       if (!parse_bool (lexer, &t->pairwise->merge))
1121                         goto error;
1122                     }
1123                   else if (lex_match_id (lexer, "STYLE"))
1124                     {
1125                       lex_match (lexer, T_EQUALS);
1126                       if (lex_match_id (lexer, "APA"))
1127                         t->pairwise->apa_style = true;
1128                       else if (lex_match_id (lexer, "SIMPLE"))
1129                         t->pairwise->apa_style = false;
1130                       else
1131                         {
1132                           lex_error_expecting (lexer, "APA", "SIMPLE");
1133                           goto error;
1134                         }
1135                     }
1136                   else if (lex_match_id (lexer, "SHOWSIG"))
1137                     {
1138                       lex_match (lexer, T_EQUALS);
1139                       if (!parse_bool (lexer, &t->pairwise->show_sig))
1140                         goto error;
1141                     }
1142                   else
1143                     {
1144                       lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
1145                                            "INCLUDEMRSETS", "MEANSVARIANCE",
1146                                            "CATEGORIES", "MERGE", "STYLE",
1147                                            "SHOWSIG");
1148                       goto error;
1149                     }
1150                 }
1151               while (lex_token (lexer) != T_SLASH
1152                      && lex_token (lexer) != T_ENDCMD);
1153             }
1154           else
1155             {
1156               lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
1157                                    "CRITERIA", "CATEGORIES", "TITLES",
1158                                    "SIGTEST", "COMPARETEST");
1159               goto error;
1160             }
1161         }
1162     }
1163   while (lex_token (lexer) != T_ENDCMD);
1164
1165   return CMD_SUCCESS;
1166
1167 error:
1168   /* XXX free */
1169   return CMD_FAILURE;
1170 }
1171