T-TEST: Improve error messages and coding style.
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 19 Sep 2022 02:59:16 +0000 (19:59 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 19 Sep 2022 03:00:50 +0000 (20:00 -0700)
src/language/stats/t-test-parser.c
src/language/stats/t-test.h
tests/language/stats/t-test.at

index cc6e35edb9cbca4061ff56823fb904927bb197f5..3b201e0b5c10737123fe36d56e5f876d2f1fd7d6 100644 (file)
@@ -38,9 +38,6 @@ int
 cmd_t_test (struct lexer *lexer, struct dataset *ds)
 {
   bool ok = false;
-  const struct dictionary *dict = dataset_dict (ds);
-  struct tt tt;
-  int mode_count = 0;
 
   /* Variables pertaining to the paired mode */
   const struct variable **v1 = NULL;
@@ -51,7 +48,6 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
   size_t n_pairs = 0;
   vp *pairs = NULL;
 
-
   /* One sample mode */
   double testval = SYSMIS;
 
@@ -62,18 +58,21 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
   int gval_width = -1;
   bool cut = false;
 
-  tt.wv = dict_get_weight (dict);
-  tt.dict = dict;
-  tt.confidence = 0.95;
-  tt.exclude = MV_ANY;
-  tt.missing_type = MISS_ANALYSIS;
-  tt.n_vars = 0;
-  tt.vars = NULL;
-  tt.mode = MODE_undef;
+  const struct dictionary *dict = dataset_dict (ds);
+  struct tt tt = {
+    .wv = dict_get_weight (dict),
+    .dict = dict,
+    .confidence = 0.95,
+    .exclude = MV_ANY,
+    .missing_type = MISS_ANALYSIS,
+    .n_vars = 0,
+    .vars = NULL,
+  };
 
   lex_match (lexer, T_EQUALS);
 
-  for (; lex_token (lexer) != T_ENDCMD;)
+  int mode_count = 0;
+  while (lex_token (lexer) != T_ENDCMD)
     {
       lex_match (lexer, T_SLASH);
       if (lex_match_id (lexer, "TESTVAL"))
@@ -94,7 +93,8 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
           lex_match (lexer, T_EQUALS);
 
           int groups_start = lex_ofs (lexer);
-          if (NULL == (gvar = parse_variable (lexer, dict)))
+          gvar = parse_variable (lexer, dict);
+          if (!gvar)
             goto exit;
 
           gval_width = var_get_width (gvar);
@@ -104,11 +104,13 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
           int n;
           if (lex_match (lexer, T_LPAREN))
             {
-              parse_value (lexer, &gval0, gvar);
+              if (!parse_value (lexer, &gval0, gvar))
+                goto exit;
               if (lex_token (lexer) != T_RPAREN)
                 {
                   lex_match (lexer, T_COMMA);
-                  parse_value (lexer, &gval1, gvar);
+                  if (!parse_value (lexer, &gval1, gvar))
+                    goto exit;
                   cut = false;
                   n = 2;
                 }
@@ -118,7 +120,7 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
                   n = 1;
                 }
 
-              if (! lex_force_match (lexer, T_RPAREN))
+              if (!lex_force_match (lexer, T_RPAREN))
                 goto exit;
             }
           else
@@ -155,6 +157,7 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
           tt.mode = MODE_PAIRED;
           lex_match (lexer, T_EQUALS);
 
+          int vars_start = lex_ofs (lexer);
           if (!parse_variables_const (lexer, dict,
                                       &v1, &n_v1,
                                       PV_NO_DUPLICATE | PV_NUMERIC))
@@ -167,81 +170,64 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
                                           &v2, &n_v2,
                                           PV_NO_DUPLICATE | PV_NUMERIC))
                 goto exit;
+              int vars_end = lex_ofs (lexer) - 1;
 
-              if (lex_match (lexer, T_LPAREN)
-                  && lex_match_id (lexer, "PAIRED")
-                  && lex_match (lexer, T_RPAREN))
+              if (lex_match_phrase (lexer, "(PAIRED)"))
                 {
                   paired = true;
                   if (n_v1 != n_v2)
                     {
-                      msg (SE, _("PAIRED was specified but the number of variables "
-                                 "preceding WITH (%zu) did not match the number "
-                                 "following (%zu)."),
-                           n_v1, n_v2);
+                      lex_ofs_error (lexer, vars_start, vars_end,
+                                     _("PAIRED was specified, but the number "
+                                       "of variables preceding WITH (%zu) "
+                                       "does not match the number following "
+                                       "(%zu)."),
+                                     n_v1, n_v2);
                       goto exit;
                     }
                 }
             }
-          {
-            int i;
-
-            if (!with)
-              n_pairs = (n_v1 * (n_v1 - 1)) / 2.0;
-            else if (paired)
-              n_pairs = n_v1;
-            else
-              n_pairs = n_v1 * n_v2;
 
-            pairs = xcalloc (n_pairs, sizeof *pairs);
+          n_pairs = (paired ? n_v1
+                     : with ? n_v1 * n_v2
+                     : (n_v1 * (n_v1 - 1)) / 2.0);
+          pairs = xcalloc (n_pairs, sizeof *pairs);
 
-            if (with)
-              {
-                int x = 0;
-                if (paired)
-                  {
-                    for (i = 0 ; i < n_v1; ++i)
-                      {
-                        vp *pair = &pairs[i];
-                        (*pair)[0] = v1[i];
-                        (*pair)[1] = v2[i];
-                      }
-                  }
-                else
+          if (paired)
+            {
+              for (size_t i = 0; i < n_v1; ++i)
+                {
+                  vp *pair = &pairs[i];
+                  (*pair)[0] = v1[i];
+                  (*pair)[1] = v2[i];
+                }
+            }
+          else if (with)
+            {
+              int x = 0;
+              for (size_t i = 0; i < n_v1; ++i)
+                for (size_t j = 0; j < n_v2; ++j)
                   {
-                    for (i = 0 ; i < n_v1; ++i)
-                      {
-                        int j;
-                        for (j = 0 ; j < n_v2; ++j)
-                          {
-                            vp *pair = &pairs[x++];
-                            (*pair)[0] = v1[i];
-                            (*pair)[1] = v2[j];
-                          }
-                      }
+                    vp *pair = &pairs[x++];
+                    (*pair)[0] = v1[i];
+                    (*pair)[1] = v2[j];
                   }
-              }
-            else
-              {
-                int x = 0;
-                for (i = 0 ; i < n_v1; ++i)
+            }
+          else
+            {
+              int x = 0;
+              for (size_t i = 0; i < n_v1; ++i)
+                for (size_t j = i + 1; j < n_v1; ++j)
                   {
-                    int j;
-
-                    for (j = i + 1 ; j < n_v1; ++j)
-                      {
-                        vp *pair = &pairs[x++];
-                        (*pair)[0] = v1[i];
-                        (*pair)[1] = v1[j];
-                      }
+                    vp *pair = &pairs[x++];
+                    (*pair)[0] = v1[i];
+                    (*pair)[1] = v1[j];
                   }
-              }
-
-          }
+            }
         }
       else if (lex_match_id (lexer, "VARIABLES"))
         {
-          if (tt.mode == MODE_PAIRED)
+          if (mode_count && tt.mode == MODE_PAIRED)
             {
               lex_next_error (lexer, -1, -1,
                               _("%s subcommand may not be used with %s."),
@@ -263,24 +249,17 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
             {
               if (lex_match_id (lexer, "INCLUDE"))
-                {
-                  tt.exclude = MV_SYSTEM;
-                }
+                tt.exclude = MV_SYSTEM;
               else if (lex_match_id (lexer, "EXCLUDE"))
-                {
-                  tt.exclude = MV_ANY;
-                }
+                tt.exclude = MV_ANY;
               else if (lex_match_id (lexer, "LISTWISE"))
-                {
-                  tt.missing_type = MISS_LISTWISE;
-                }
+                tt.missing_type = MISS_LISTWISE;
               else if (lex_match_id (lexer, "ANALYSIS"))
-                {
-                  tt.missing_type = MISS_ANALYSIS;
-                }
+                tt.missing_type = MISS_ANALYSIS;
               else
                 {
-                  lex_error (lexer, NULL);
+                  lex_error_expecting (lexer, "INCLUDE", "EXCLUDE",
+                                       "LISTWISE", "ANALYSIS");
                   goto exit;
                 }
               lex_match (lexer, T_COMMA);
@@ -289,20 +268,24 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
       else if (lex_match_id (lexer, "CRITERIA"))
         {
           lex_match (lexer, T_EQUALS);
-          if (lex_match_id (lexer, "CIN") || lex_force_match_id (lexer, "CI"))
-            if (lex_force_match (lexer, T_LPAREN))
-              {
-                if (!lex_force_num (lexer))
-                  goto exit;
-                tt.confidence = lex_number (lexer);
-                lex_get (lexer);
-                if (! lex_force_match (lexer, T_RPAREN))
-                  goto exit;
-              }
+          if (!lex_match_id (lexer, "CIN") && !lex_match_id (lexer, "CI"))
+            {
+              lex_error_expecting (lexer, "CIN", "CI");
+              goto exit;
+            }
+          if (!lex_force_match (lexer, T_LPAREN))
+            goto exit;
+          if (!lex_force_num (lexer))
+            goto exit;
+          tt.confidence = lex_number (lexer);
+          lex_get (lexer);
+          if (!lex_force_match (lexer, T_RPAREN))
+            goto exit;
         }
       else
         {
-          lex_error (lexer, NULL);
+          lex_error_expecting (lexer, "TESTVAL", "GROUPS", "PAIRS",
+                               "VARIABLES", "MISSING", "CRITERIA");
           goto exit;
         }
     }
@@ -320,63 +303,43 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
       goto exit;
     }
 
-
-
-  /* Deal with splits etc */
-  {
-    struct casereader *group;
-    struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
-
-    while (casegrouper_get_next_group (grouper, &group))
+  struct casereader *group;
+  struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
+  while (casegrouper_get_next_group (grouper, &group))
+    switch (tt.mode)
       {
-        if (tt.mode == MODE_SINGLE)
+      case MODE_SINGLE:
+        if (tt.missing_type == MISS_LISTWISE)
+          group = casereader_create_filter_missing (group, tt.vars, tt.n_vars,
+                                                    tt.exclude, NULL, NULL);
+        one_sample_run (&tt, testval, group);
+        break;
+
+      case MODE_PAIRED:
+        if (tt.missing_type == MISS_LISTWISE)
           {
-            if (tt.missing_type == MISS_LISTWISE)
-              group  = casereader_create_filter_missing (group,
-                                                         tt.vars, tt.n_vars,
-                                                         tt.exclude,
-                                                         NULL,  NULL);
-            one_sample_run (&tt, testval, group);
+            group = casereader_create_filter_missing (group, v1, n_v1,
+                                                      tt.exclude, NULL, NULL);
+            group = casereader_create_filter_missing (group, v2, n_v2,
+                                                      tt.exclude, NULL, NULL);
           }
-        else if (tt.mode == MODE_PAIRED)
-          {
-            if (tt.missing_type == MISS_LISTWISE)
-              {
-                group  = casereader_create_filter_missing (group,
-                                                           v1, n_v1,
-                                                           tt.exclude,
-                                                           NULL,  NULL);
-                group  = casereader_create_filter_missing (group,
-                                                           v2, n_v2,
-                                                           tt.exclude,
-                                                           NULL,  NULL);
-              }
-
-            paired_run (&tt, n_pairs, pairs, group);
-          }
-        else /* tt.mode == MODE_INDEP */
-          {
-            if (tt.missing_type == MISS_LISTWISE)
-              {
-                group  = casereader_create_filter_missing (group,
-                                                           tt.vars, tt.n_vars,
-                                                           tt.exclude,
-                                                           NULL,  NULL);
+        paired_run (&tt, n_pairs, pairs, group);
+        break;
 
-                group  = casereader_create_filter_missing (group,
-                                                           &gvar, 1,
-                                                           tt.exclude,
-                                                           NULL,  NULL);
-
-              }
-
-            indep_run (&tt, gvar, cut, &gval0, &gval1, group);
+      case MODE_INDEP:
+        if (tt.missing_type == MISS_LISTWISE)
+          {
+            group = casereader_create_filter_missing (group, tt.vars, tt.n_vars,
+                                                      tt.exclude, NULL, NULL);
+            group = casereader_create_filter_missing (group, &gvar, 1,
+                                                      tt.exclude, NULL, NULL);
           }
+        indep_run (&tt, gvar, cut, &gval0, &gval1, group);
+        break;
       }
 
-    ok = casegrouper_destroy (grouper);
-    ok = proc_commit (ds) && ok;
-  }
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
 
 exit:
   if (gval_width != -1)
index c2e8099335489b26444d2bfbe5ed8ac03591ddc4..2b4dba445a77e439508045b12d3895b633d9d9df 100644 (file)
@@ -30,7 +30,6 @@ enum missing_type
 
 enum mode
   {
-    MODE_undef,
     MODE_PAIRED,
     MODE_INDEP,
     MODE_SINGLE,
index bf1b293603fe1bd0b31e806ce278ef9886c07a6c..bab4f9c1865a158d78bd32b72d55a7f920810f53 100644 (file)
@@ -912,8 +912,113 @@ end data.
 
 
 t-test /MISSING=listwise /PAIRS a"b with c d (PA       RED).
-])
+]) dnl "
 
 AT_CHECK([pspp t.sps],[1],[ignore],[ignore])
 
 AT_CLEANUP
+
+AT_SETUP([T-TEST syntax errors])
+AT_DATA([t-test.sps], [dnl
+DATA LIST LIST NOTABLE/x y z * s(a10).
+T-TEST TESTVAL=**.
+T-TEST GROUPS=**.
+T-TEST GROUPS=x(**).
+T-TEST GROUPS=x(1,**).
+T-TEST GROUPS=x(1,2 **).
+T-TEST GROUPS=s('a').
+T-TEST VARIABLES=x y PAIRS.
+T-TEST PAIRS=**.
+T-TEST PAIRS=x WITH **.
+T-TEST PAIRS=x WITH y z (PAIRED).
+T-TEST PAIRS=x WITH y/VARIABLES.
+T-TEST VARIABLES=**.
+T-TEST MISSING=**.
+T-TEST CRITERIA=**.
+T-TEST CRITERIA=CIN**.
+T-TEST CRITERIA=CIN(**).
+T-TEST CRITERIA=CIN(1 **).
+T-TEST **.
+T-TEST MISSING=INCLUDE.
+T-TEST MISSING=INCLUDE/TESTVAL=1.
+])
+AT_CHECK([pspp -O format=csv t-test.sps], [1], [dnl
+"t-test.sps:2.16-2.17: error: T-TEST: Syntax error expecting number.
+    2 | T-TEST TESTVAL=**.
+      |                ^~"
+
+"t-test.sps:3.15-3.16: error: T-TEST: Syntax error expecting variable name.
+    3 | T-TEST GROUPS=**.
+      |               ^~"
+
+"t-test.sps:4.17-4.18: error: T-TEST: Syntax error expecting number.
+    4 | T-TEST GROUPS=x(**).
+      |                 ^~"
+
+"t-test.sps:5.19-5.20: error: T-TEST: Syntax error expecting number.
+    5 | T-TEST GROUPS=x(1,**).
+      |                   ^~"
+
+"t-test.sps:6.21-6.22: error: T-TEST: Syntax error expecting `)'.
+    6 | T-TEST GROUPS=x(1,2 **).
+      |                     ^~"
+
+"t-test.sps:7.15-7.20: error: T-TEST: When applying GROUPS to a string variable, two values must be specified.
+    7 | T-TEST GROUPS=s('a').
+      |               ^~~~~~"
+
+"t-test.sps:8.22-8.26: error: T-TEST: VARIABLES subcommand may not be used with PAIRS.
+    8 | T-TEST VARIABLES=x y PAIRS.
+      |                      ^~~~~"
+
+"t-test.sps:9.14-9.15: error: T-TEST: Syntax error expecting variable name.
+    9 | T-TEST PAIRS=**.
+      |              ^~"
+
+"t-test.sps:10.21-10.22: error: T-TEST: Syntax error expecting variable name.
+   10 | T-TEST PAIRS=x WITH **.
+      |                     ^~"
+
+"t-test.sps:11.14-11.23: error: T-TEST: PAIRED was specified, but the number of variables preceding WITH (1) does not match the number following (2).
+   11 | T-TEST PAIRS=x WITH y z (PAIRED).
+      |              ^~~~~~~~~~"
+
+"t-test.sps:12.23-12.31: error: T-TEST: VARIABLES subcommand may not be used with PAIRS.
+   12 | T-TEST PAIRS=x WITH y/VARIABLES.
+      |                       ^~~~~~~~~"
+
+"t-test.sps:13.18-13.19: error: T-TEST: Syntax error expecting variable name.
+   13 | T-TEST VARIABLES=**.
+      |                  ^~"
+
+"t-test.sps:14.16-14.17: error: T-TEST: Syntax error expecting INCLUDE, EXCLUDE, LISTWISE, or ANALYSIS.
+   14 | T-TEST MISSING=**.
+      |                ^~"
+
+"t-test.sps:15.17-15.18: error: T-TEST: Syntax error expecting CIN or CI.
+   15 | T-TEST CRITERIA=**.
+      |                 ^~"
+
+"t-test.sps:16.20-16.21: error: T-TEST: Syntax error expecting `('.
+   16 | T-TEST CRITERIA=CIN**.
+      |                    ^~"
+
+"t-test.sps:17.21-17.22: error: T-TEST: Syntax error expecting number.
+   17 | T-TEST CRITERIA=CIN(**).
+      |                     ^~"
+
+"t-test.sps:18.23-18.24: error: T-TEST: Syntax error expecting `)'.
+   18 | T-TEST CRITERIA=CIN(1 **).
+      |                       ^~"
+
+"t-test.sps:19.8-19.9: error: T-TEST: Syntax error expecting TESTVAL, GROUPS, PAIRS, VARIABLES, MISSING, or CRITERIA.
+   19 | T-TEST **.
+      |        ^~"
+
+"t-test.sps:20: error: T-TEST: Exactly one of TESTVAL, GROUPS and PAIRS subcommands must be specified."
+
+"t-test.sps:21.1-21.33: error: T-TEST: Required subcommand VARIABLES was not specified.
+   21 | T-TEST MISSING=INCLUDE/TESTVAL=1.
+      | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+])
+AT_CLEANUP
\ No newline at end of file