From 9db55512389a5a6a06d6d864512d8a4c214a71d5 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@cs.stanford.edu>
Date: Sun, 18 Sep 2022 19:31:47 -0700
Subject: [PATCH] SORT CASES: Improve error messages and coding style.

---
 src/data/subcase.h                 |  1 +
 src/language/stats/sort-cases.c    | 15 ++++++-------
 src/language/stats/sort-criteria.c | 15 +++++++------
 tests/language/stats/sort-cases.at | 34 ++++++++++++++++++++++++++++++
 4 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/src/data/subcase.h b/src/data/subcase.h
index 4e5043648c..027a639542 100644
--- a/src/data/subcase.h
+++ b/src/data/subcase.h
@@ -47,6 +47,7 @@ struct subcase
 
     struct caseproto *proto;    /* Created lazily. */
   };
+#define SUBCASE_EMPTY_INITIALIZER { .fields = NULL }
 
 void subcase_init_empty (struct subcase *);
 void subcase_init_vars (struct subcase *,
diff --git a/src/language/stats/sort-cases.c b/src/language/stats/sort-cases.c
index 4b6b1ebec3..41c7662413 100644
--- a/src/language/stats/sort-cases.c
+++ b/src/language/stats/sort-cases.c
@@ -40,30 +40,29 @@
 int
 cmd_sort_cases (struct lexer *lexer, struct dataset *ds)
 {
-  struct subcase ordering;
-  struct casereader *output;
+  struct subcase ordering = SUBCASE_EMPTY_INITIALIZER;
   bool ok = false;
 
   lex_match (lexer, T_BY);
 
   proc_cancel_temporary_transformations (ds);
-  subcase_init_empty (&ordering);
   if (!parse_sort_criteria (lexer, dataset_dict (ds), &ordering, NULL, NULL))
     return CMD_CASCADING_FAILURE;
 
   if (settings_get_testing_mode () && lex_match (lexer, T_SLASH))
     {
-      if (!lex_force_match_id (lexer, "BUFFERS") || !lex_match (lexer, T_EQUALS)
-          || !lex_force_int_range (lexer, "BUFFERS", 2, INT_MAX))
+      if (!lex_force_match_id (lexer, "BUFFERS"))
+        goto done;
+      lex_match (lexer, T_EQUALS);
+      if (!lex_force_int_range (lexer, "BUFFERS", 2, INT_MAX))
         goto done;
-
       min_buffers = max_buffers = lex_integer (lexer);
-
       lex_get (lexer);
     }
 
   proc_discard_output (ds);
-  output = sort_execute (proc_open_filtering (ds, false), &ordering);
+  struct casereader *output = sort_execute (proc_open_filtering (ds, false),
+                                            &ordering);
   ok = proc_commit (ds);
   ok = dataset_set_source (ds, output) && ok;
 
diff --git a/src/language/stats/sort-criteria.c b/src/language/stats/sort-criteria.c
index 36466cd410..92c8064046 100644
--- a/src/language/stats/sort-criteria.c
+++ b/src/language/stats/sort-criteria.c
@@ -51,18 +51,18 @@ parse_sort_criteria (struct lexer *lexer, const struct dictionary *dict,
   if (saw_direction != NULL)
     *saw_direction = false;
 
+  int start_ofs = lex_ofs (lexer);
   do
     {
       size_t prev_n_vars = n_vars;
-      enum subcase_direction direction;
-      size_t i;
 
       /* Variables. */
       if (!parse_variables_const (lexer, dict, vars, &n_vars,
-                                  PV_APPEND | PV_NO_SCRATCH))
+                                  PV_APPEND | PV_DUPLICATE | PV_NO_SCRATCH))
         goto error;
 
       /* Sort direction. */
+      enum subcase_direction direction;
       if (lex_match (lexer, T_LPAREN))
 	{
 	  if (lex_match_id (lexer, "D") || lex_match_id (lexer, "DOWN"))
@@ -82,12 +82,13 @@ parse_sort_criteria (struct lexer *lexer, const struct dictionary *dict,
       else
         direction = SC_ASCEND;
 
-      for (i = prev_n_vars; i < n_vars; i++)
+      for (size_t i = prev_n_vars; i < n_vars; i++)
         {
           const struct variable *var = (*vars)[i];
           if (!subcase_add_var (ordering, var, direction))
-            msg (SW, _("Variable %s specified twice in sort criteria."),
-                 var_get_name (var));
+            lex_ofs_msg (lexer, SW, start_ofs, lex_ofs (lexer) - 1,
+                         _("Variable %s specified twice in sort criteria."),
+                         var_get_name (var));
         }
     }
   while (lex_token (lexer) == T_ID
@@ -97,6 +98,8 @@ parse_sort_criteria (struct lexer *lexer, const struct dictionary *dict,
   return true;
 
 error:
+  subcase_uninit (ordering);
+  subcase_init_empty (ordering);
   free (local_vars);
   if (vars)
     *vars = NULL;
diff --git a/tests/language/stats/sort-cases.at b/tests/language/stats/sort-cases.at
index 8d839f9dab..6c21efa5eb 100644
--- a/tests/language/stats/sort-cases.at
+++ b/tests/language/stats/sort-cases.at
@@ -135,3 +135,37 @@ x,mod2
 5.00,1.00
 ])
 AT_CLEANUP
+
+AT_SETUP([SORT CASES syntax errors])
+AT_DATA([sort-cases.sps], [dnl
+DATA LIST LIST NOTABLE/x y z.
+SORT CASES BY **.
+SORT CASES BY x(**).
+SORT CASES BY x(D**).
+SORT CASES BY x(A) x(D) y(**).
+])
+AT_DATA([insert.sps], [dnl
+INSERT FILE='sort-cases.sps' ERROR=IGNORE.
+])
+AT_CHECK([pspp --testing-mode -O format=csv insert.sps], [1], [dnl
+"sort-cases.sps:2.15-2.16: error: SORT CASES: Syntax error expecting variable name.
+    2 | SORT CASES BY **.
+      |               ^~"
+
+"sort-cases.sps:3.17-3.18: error: SORT CASES: Syntax error expecting A or D.
+    3 | SORT CASES BY x(**).
+      |                 ^~"
+
+"sort-cases.sps:4.18-4.19: error: SORT CASES: Syntax error expecting `)'.
+    4 | SORT CASES BY x(D**).
+      |                  ^~"
+
+"sort-cases.sps:5.15-5.23: warning: SORT CASES: Variable x specified twice in sort criteria.
+    5 | SORT CASES BY x(A) x(D) y(**).
+      |               ^~~~~~~~~"
+
+"sort-cases.sps:5.27-5.28: error: SORT CASES: Syntax error expecting A or D.
+    5 | SORT CASES BY x(A) x(D) y(**).
+      |                           ^~"
+])
+AT_CLEANUP
\ No newline at end of file
-- 
2.30.2