Bug fixes.
[pspp] / src / language / stats / ctables.c
index aa204b4ea5f0070bd1aa1a582ac09a4d63e7bdeb..3b7dc24d1051daded14b47a9fc25fc6f6c42263f 100644 (file)
@@ -795,7 +795,7 @@ enum ctables_function_availability
   {
     CTFA_ALL,                /* Any variables. */
     CTFA_SCALE,              /* Only scale variables, totals, and subtotals. */
-    CTFA_MRSETS,             /* Only multiple-response sets */
+    //CTFA_MRSETS,             /* Only multiple-response sets */
   };
 
 struct ctables_summary_spec
@@ -1165,16 +1165,17 @@ add_summary_spec (struct ctables_axis *axis,
       const char *var_name = var_get_name (axis->var);
       switch (ctables_function_availability (function))
         {
+#if 0
         case CTFA_MRSETS:
           msg_at (SE, loc, _("Summary function %s applies only to multiple "
                              "response sets."), function_name);
           msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
                   var_name);
           return false;
+#endif
 
         case CTFA_SCALE:
-#if 0
-          if (!axis->scale)
+          if (!axis->scale && sv != CSV_TOTAL)
             {
               msg_at (SE, loc,
                       _("Summary function %s applies only to scale variables."),
@@ -1183,7 +1184,6 @@ add_summary_spec (struct ctables_axis *axis,
                       var_name);
               return false;
             }
-#endif
           break;
 
         case CTFA_ALL:
@@ -1245,7 +1245,6 @@ ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
   struct ctables_axis *axis = xmalloc (sizeof *axis);
   *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
 
-  /* XXX should figure out default measures by reading data */
   axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
                  : lex_match_phrase (ctx->lexer, "[C]") ? false
                  : var_get_measure (var) == MEASURE_SCALE);
@@ -1293,15 +1292,17 @@ parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
               && fmt_check_type_compat (format, VAL_NUMERIC));
     }
 
+  lex_get (lexer);
   if (format->w < 2)
     {
-      msg (SE, _("Output format %s requires width 2 or greater."), type);
+      lex_next_error (lexer, -1, -1,
+                      _("Output format %s requires width 2 or greater."), type);
       return false;
     }
   else if (format->d > format->w - 1)
     {
-      msg (SE, _("Output format %s requires width greater than decimals."),
-           type);
+      lex_next_error (lexer, -1, -1, _("Output format %s requires width "
+                                       "greater than decimals."), type);
       return false;
     }
   else
@@ -1897,11 +1898,13 @@ ctables_recursive_check_postcompute (struct dictionary *dict,
                             ngettext ("These categories include %zu instance "
                                       "of SUBTOTAL or HSUBTOTAL, so references "
                                       "from computed categories must refer to "
-                                      "subtotals by position.",
+                                      "subtotals by position, "
+                                      "e.g. SUBTOTAL[1].",
                                       "These categories include %zu instances "
                                       "of SUBTOTAL or HSUBTOTAL, so references "
                                       "from computed categories must refer to "
-                                      "subtotals by position.",
+                                      "subtotals by position, "
+                                      "e.g. SUBTOTAL[1].",
                                       n_subtotals),
                             n_subtotals);
                     msg_at (SN, e->location,
@@ -1915,9 +1918,17 @@ ctables_recursive_check_postcompute (struct dictionary *dict,
                       "in the category list."),
                     pc_cat->pc->name);
             msg_at (SN, e->location, _("This is the missing category."));
-            msg_at (SN, cats_location,
-                    _("To fix the problem, add the missing category to the "
-                      "list of categories here."));
+            if (e->op == CTPO_CAT_SUBTOTAL)
+              msg_at (SN, cats_location,
+                      _("To fix the problem, add subtotals to the "
+                        "list of categories here."));
+            else if (e->op == CTPO_CAT_TOTAL)
+              msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
+                         "CATEGORIES specification."));
+            else
+              msg_at (SN, cats_location,
+                      _("To fix the problem, add the missing category to the "
+                        "list of categories here."));
             return false;
           }
         if (pc_cat->pc->hide_source_cats)
@@ -2001,9 +2012,11 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
     }
 
   size_t allocated_cats = 0;
+  int cats_start_ofs = -1;
+  int cats_end_ofs = -1;
   if (lex_match (lexer, T_LBRACK))
     {
-      int cats_start_ofs = lex_ofs (lexer);
+      cats_start_ofs = lex_ofs (lexer);
       do
         {
           if (c->n_cats >= allocated_cats)
@@ -2019,94 +2032,7 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
           lex_match (lexer, T_COMMA);
         }
       while (!lex_match (lexer, T_RBRACK));
-
-      struct msg_location *cats_location
-        = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
-      for (size_t i = 0; i < c->n_cats; i++)
-        {
-          struct ctables_category *cat = &c->cats[i];
-          switch (cat->type)
-            {
-            case CCT_POSTCOMPUTE:
-              cat->parse_format = parse_strings ? common_format->type : FMT_F;
-              if (!ctables_recursive_check_postcompute (dict, cat->pc->expr,
-                                                        cat, c, cats_location))
-                return false;
-              break;
-
-            case CCT_NUMBER:
-            case CCT_NRANGE:
-              for (size_t j = 0; j < n_vars; j++)
-                if (var_is_alpha (vars[j]))
-                  {
-                    msg_at (SE, cat->location,
-                            _("This category specification may be applied "
-                              "only to numeric variables, but this "
-                              "subcommand tries to apply it to string "
-                              "variable %s."),
-                            var_get_name (vars[j]));
-                    return false;
-                  }
-              break;
-
-            case CCT_STRING:
-              if (parse_strings)
-                {
-                  double n;
-                  if (!parse_category_string (cat->location, cat->string, dict,
-                                              common_format->type, &n))
-                    return false;
-
-                  ss_dealloc (&cat->string);
-
-                  cat->type = CCT_NUMBER;
-                  cat->number = n;
-                }
-              else if (!all_strings (vars, n_vars, cat))
-                return false;
-              break;
-
-            case CCT_SRANGE:
-              if (parse_strings)
-                {
-                  double n[2];
-
-                  if (!cat->srange[0].string)
-                    n[0] = -DBL_MAX;
-                  else if (!parse_category_string (cat->location,
-                                                   cat->srange[0], dict,
-                                                   common_format->type, &n[0]))
-                    return false;
-
-                  if (!cat->srange[1].string)
-                    n[1] = DBL_MAX;
-                  else if (!parse_category_string (cat->location,
-                                                   cat->srange[1], dict,
-                                                   common_format->type, &n[1]))
-                    return false;
-
-                  ss_dealloc (&cat->srange[0]);
-                  ss_dealloc (&cat->srange[1]);
-
-                  cat->type = CCT_NRANGE;
-                  cat->nrange[0] = n[0];
-                  cat->nrange[1] = n[1];
-                }
-              else if (!all_strings (vars, n_vars, cat))
-                return false;
-              break;
-
-            case CCT_MISSING:
-            case CCT_OTHERNM:
-            case CCT_SUBTOTAL:
-            case CCT_TOTAL:
-            case CCT_VALUE:
-            case CCT_LABEL:
-            case CCT_FUNCTION:
-            case CCT_EXCLUDED_MISSING:
-              break;
-            }
-        }
+      cats_end_ofs = lex_ofs (lexer) - 1;
     }
 
   struct ctables_category cat = {
@@ -2297,6 +2223,97 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
         }
     }
 
+  if (cats_start_ofs != -1)
+    {
+      struct msg_location *cats_location
+        = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
+      for (size_t i = 0; i < c->n_cats; i++)
+        {
+          struct ctables_category *cat = &c->cats[i];
+          switch (cat->type)
+            {
+            case CCT_POSTCOMPUTE:
+              cat->parse_format = parse_strings ? common_format->type : FMT_F;
+              if (!ctables_recursive_check_postcompute (dict, cat->pc->expr,
+                                                        cat, c, cats_location))
+                return false;
+              break;
+
+            case CCT_NUMBER:
+            case CCT_NRANGE:
+              for (size_t j = 0; j < n_vars; j++)
+                if (var_is_alpha (vars[j]))
+                  {
+                    msg_at (SE, cat->location,
+                            _("This category specification may be applied "
+                              "only to numeric variables, but this "
+                              "subcommand tries to apply it to string "
+                              "variable %s."),
+                            var_get_name (vars[j]));
+                    return false;
+                  }
+              break;
+
+            case CCT_STRING:
+              if (parse_strings)
+                {
+                  double n;
+                  if (!parse_category_string (cat->location, cat->string, dict,
+                                              common_format->type, &n))
+                    return false;
+
+                  ss_dealloc (&cat->string);
+
+                  cat->type = CCT_NUMBER;
+                  cat->number = n;
+                }
+              else if (!all_strings (vars, n_vars, cat))
+                return false;
+              break;
+
+            case CCT_SRANGE:
+              if (parse_strings)
+                {
+                  double n[2];
+
+                  if (!cat->srange[0].string)
+                    n[0] = -DBL_MAX;
+                  else if (!parse_category_string (cat->location,
+                                                   cat->srange[0], dict,
+                                                   common_format->type, &n[0]))
+                    return false;
+
+                  if (!cat->srange[1].string)
+                    n[1] = DBL_MAX;
+                  else if (!parse_category_string (cat->location,
+                                                   cat->srange[1], dict,
+                                                   common_format->type, &n[1]))
+                    return false;
+
+                  ss_dealloc (&cat->srange[0]);
+                  ss_dealloc (&cat->srange[1]);
+
+                  cat->type = CCT_NRANGE;
+                  cat->nrange[0] = n[0];
+                  cat->nrange[1] = n[1];
+                }
+              else if (!all_strings (vars, n_vars, cat))
+                return false;
+              break;
+
+            case CCT_MISSING:
+            case CCT_OTHERNM:
+            case CCT_SUBTOTAL:
+            case CCT_TOTAL:
+            case CCT_VALUE:
+            case CCT_LABEL:
+            case CCT_FUNCTION:
+            case CCT_EXCLUDED_MISSING:
+              break;
+            }
+        }
+    }
+
   return true;
 }
 
@@ -4719,7 +4736,12 @@ ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
     return true;
 
   const struct ctables_nest *n0 = &stack->nests[0];
-  assert (n0->n > 0);
+  if (n0->n == 0)
+    {
+      assert (stack->n == 1);
+      return true;
+    }
+
   const struct variable *v0 = n0->vars[n0->n - 1];
   struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
   t->clabels_example = v0;
@@ -4896,6 +4918,10 @@ ctables_prepare_table (struct ctables_table *t)
         struct ctables_nest *nest = xmalloc (sizeof *nest);
         *nest = (struct ctables_nest) { .n = 0 };
         t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
+
+        /* There's no point in moving labels away from an axis that has no
+           labels, so avoid dealing with the special cases around that. */
+        t->label_axis[a] = a;
       }
 
   struct ctables_stack *stack = &t->stacks[t->summary_axis];
@@ -5317,7 +5343,8 @@ ctables_table_clear (struct ctables_table *t)
 }
 
 static bool
-ctables_execute (struct dataset *ds, struct ctables *ct)
+ctables_execute (struct dataset *ds, struct casereader *input,
+                 struct ctables *ct)
 {
   for (size_t i = 0; i < ct->n_tables; i++)
     {
@@ -5331,7 +5358,6 @@ ctables_execute (struct dataset *ds, struct ctables *ct)
     }
 
   struct dictionary *dict = dataset_dict (ds);
-  struct casereader *input = proc_open (ds);
   struct casegrouper *grouper
     = (dict_get_split_type (dict) == SPLIT_SEPARATE
        ? casegrouper_create_splits (input, dict)
@@ -5386,8 +5412,7 @@ ctables_execute (struct dataset *ds, struct ctables *ct)
           ctables_table_clear (t);
         }
     }
-  bool ok = casegrouper_destroy (grouper);
-  return proc_commit (ds) && ok;
+  return casegrouper_destroy (grouper);
 }
 \f
 /* Postcomputes. */
@@ -5567,7 +5592,7 @@ ctable_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
     {
       if (lex_match_id (lexer, "LO"))
         {
-          if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
+          if (!lex_force_match_id (lexer, "THRU") || !lex_force_num (lexer))
             return false;
           e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
           lex_get (lexer);
@@ -5748,7 +5773,12 @@ ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
 {
   int pcompute_start = lex_ofs (lexer) - 1;
 
-  if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
+  if (!lex_match (lexer, T_AND))
+    {
+      lex_error_expecting (lexer, "&");
+      return false;
+    }
+  if (!lex_force_id (lexer))
     return false;
 
   char *name = ss_xstrdup (lex_tokss (lexer));
@@ -6022,6 +6052,16 @@ put_title_text (struct string *out, struct substring in, time_t now,
 int
 cmd_ctables (struct lexer *lexer, struct dataset *ds)
 {
+  struct casereader *input = NULL;
+
+  struct measure_guesser *mg = measure_guesser_create (ds);
+  if (mg)
+    {
+      input = proc_open (ds);
+      measure_guesser_run (mg, input);
+      measure_guesser_destroy (mg);
+    }
+
   size_t n_vars = dict_get_n_vars (dataset_dict (ds));
   enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
   enum settings_value_show tvars = settings_get_show_variables ();
@@ -6237,7 +6277,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds)
           if (!ct->e_weight)
             goto error;
         }
-      else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
+      else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
         {
           if (lex_match_id (lexer, "COUNT"))
             {
@@ -6396,7 +6436,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds)
           break;
         }
       if (!lex_force_match (lexer, T_SLASH))
-        break;
+        goto error;
 
       while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
         {
@@ -6434,46 +6474,43 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds)
             }
           else if (lex_match_id (lexer, "CLABELS"))
             {
-              while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
+              if (lex_match_id (lexer, "AUTO"))
                 {
-                  if (lex_match_id (lexer, "AUTO"))
-                    {
-                      t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
-                      t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
-                    }
-                  else if (lex_match_id (lexer, "ROWLABELS"))
-                    {
-                      lex_match (lexer, T_EQUALS);
-                      if (lex_match_id (lexer, "OPPOSITE"))
-                        t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
-                      else if (lex_match_id (lexer, "LAYER"))
-                        t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
-                      else
-                        {
-                          lex_error_expecting (lexer, "OPPOSITE", "LAYER");
-                          goto error;
-                        }
-                    }
-                  else if (lex_match_id (lexer, "COLLABELS"))
+                  t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
+                  t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
+                }
+              else if (lex_match_id (lexer, "ROWLABELS"))
+                {
+                  lex_match (lexer, T_EQUALS);
+                  if (lex_match_id (lexer, "OPPOSITE"))
+                    t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
+                  else if (lex_match_id (lexer, "LAYER"))
+                    t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
+                  else
                     {
-                      lex_match (lexer, T_EQUALS);
-                      if (lex_match_id (lexer, "OPPOSITE"))
-                        t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
-                      else if (lex_match_id (lexer, "LAYER"))
-                        t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
-                      else
-                        {
-                          lex_error_expecting (lexer, "OPPOSITE", "LAYER");
-                          goto error;
-                        }
+                      lex_error_expecting (lexer, "OPPOSITE", "LAYER");
+                      goto error;
                     }
+                }
+              else if (lex_match_id (lexer, "COLLABELS"))
+                {
+                  lex_match (lexer, T_EQUALS);
+                  if (lex_match_id (lexer, "OPPOSITE"))
+                    t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
+                  else if (lex_match_id (lexer, "LAYER"))
+                    t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
                   else
                     {
-                      lex_error_expecting (lexer, "AUTO", "ROWLABELS",
-                                           "COLLABELS");
+                      lex_error_expecting (lexer, "OPPOSITE", "LAYER");
                       goto error;
                     }
                 }
+              else
+                {
+                  lex_error_expecting (lexer, "AUTO", "ROWLABELS",
+                                       "COLLABELS");
+                  goto error;
+                }
             }
           else if (lex_match_id (lexer, "CRITERIA"))
             {
@@ -6557,7 +6594,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds)
                   else if (lex_match_id (lexer, "INCLUDEMRSETS"))
                     {
                       lex_match (lexer, T_EQUALS);
-                      if (parse_bool (lexer, &t->chisq->include_mrsets))
+                      if (!parse_bool (lexer, &t->chisq->include_mrsets))
                         goto error;
                     }
                   else if (lex_match_id (lexer, "CATEGORIES"))
@@ -6750,11 +6787,17 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds)
     }
   while (lex_token (lexer) != T_ENDCMD);
 
-  bool ok = ctables_execute (ds, ct);
+  if (!input)
+    input = proc_open (ds);
+  bool ok = ctables_execute (ds, input, ct);
+  ok = proc_commit (ds) && ok;
+
   ctables_destroy (ct);
   return ok ? CMD_SUCCESS : CMD_FAILURE;
 
 error:
+  if (input)
+    proc_commit (ds);
   ctables_destroy (ct);
   return CMD_FAILURE;
 }