From: Ben Pfaff <blp@cs.stanford.edu>
Date: Mon, 21 Nov 2022 06:25:33 +0000 (-0800)
Subject: MEANS: Improve error messages and coding style.
X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=09e70bd965a78b9831d51e93270d24932ed00b5b;p=pspp

MEANS: Improve error messages and coding style.
---

diff --git a/src/language/stats/means-parser.c b/src/language/stats/means-parser.c
index 6bbae89385..0cc0b1b317 100644
--- a/src/language/stats/means-parser.c
+++ b/src/language/stats/means-parser.c
@@ -73,52 +73,67 @@ static bool
 lex_is_variable (struct lexer *lexer, const struct dictionary *dict,
 		 int n)
 {
-  const char *tstr;
-  if (lex_next_token (lexer, n) !=  T_ID)
+  if (lex_next_token (lexer, n) != T_ID)
     return false;
 
-  tstr = lex_next_tokcstr (lexer, n);
+  const char *tstr = lex_next_tokcstr (lexer, n);
+  return dict_lookup_var (dict, tstr) != NULL;
+}
 
-  if (NULL == dict_lookup_var (dict, tstr))
-    return false;
+static const struct cell_spec *
+match_cell (struct lexer *lexer)
+{
+  for (size_t i = 0; i < n_MEANS_STATISTICS; ++i)
+    {
+      const struct cell_spec *cs = &cell_spec[i];
+      if (lex_match_id (lexer, cs->keyword))
+        return cs;
+    }
+  return NULL;
+}
 
-  return true;
+static void
+add_statistic (struct means *means, int statistic)
+{
+  if (means->n_statistics >= means->allocated_statistics)
+    means->statistics = pool_2nrealloc (means->pool, means->statistics,
+                                        &means->allocated_statistics,
+                                        sizeof *means->statistics);
+  means->statistics[means->n_statistics++] = statistic;
+}
+
+void
+means_set_default_statistics (struct means *means)
+{
+  means->n_statistics = 0;
+  add_statistic (means, MEANS_MEAN);
+  add_statistic (means, MEANS_N);
+  add_statistic (means, MEANS_STDDEV);
 }
 
 bool
 means_parse (struct lexer *lexer, struct means *means)
 {
-  /*   Optional TABLES =   */
-  if (lex_match_id (lexer, "TABLES"))
-    {
-      if (! lex_force_match (lexer, T_EQUALS))
-	return false;
-    }
+  /* Optional TABLES=. */
+  if (lex_match_id (lexer, "TABLES") && !lex_force_match (lexer, T_EQUALS))
+    return false;
 
-  bool more_tables = true;
   /* Parse the "tables" */
-  while (more_tables)
+  for (;;)
     {
       means->table = pool_realloc (means->pool, means->table,
-				   (means->n_tables + 1) * sizeof (*means->table));
+				   (means->n_tables + 1) * sizeof *means->table);
 
-      if (! parse_means_table_syntax (lexer, means,
-				      &means->table[means->n_tables]))
-	{
-	  return false;
-	}
-      means->n_tables ++;
+      if (!parse_means_table_syntax (lexer, means,
+                                     &means->table[means->n_tables]))
+        return false;
+      means->n_tables++;
 
       /* Look ahead to see if there are more tables to be parsed */
-      more_tables = false;
-      if (T_SLASH == lex_next_token (lexer, 0))
-	{
-	  if (lex_is_variable (lexer, means->dict, 1))
-	    {
-	      more_tables = true;
-	      lex_match (lexer, T_SLASH);
-	    }
-	}
+      if (lex_next_token (lexer, 0) != T_SLASH
+          || !lex_is_variable (lexer, means->dict, 1))
+        break;
+      lex_match (lexer, T_SLASH);
     }
 
   /* /MISSING subcommand */
@@ -128,43 +143,36 @@ means_parse (struct lexer *lexer, struct means *means)
 
       if (lex_match_id (lexer, "MISSING"))
 	{
-	  /*
-	    If no MISSING subcommand is specified, each combination of
-	    a dependent variable and categorical variables is handled
-	    separately.
-	  */
+	  /* If no MISSING subcommand is specified, each combination of a
+             dependent variable and categorical variables is handled
+             separately. */
 	  lex_match (lexer, T_EQUALS);
 	  if (lex_match_id (lexer, "INCLUDE"))
 	    {
-	      /*
-		Use the subcommand  "/MISSING=INCLUDE" to include user-missing
-		values in the analysis.
-	      */
+	      /* Use the subcommand "/MISSING=INCLUDE" to include user-missing
+                 values in the analysis. */
 
 	      means->ctrl_exclude = MV_SYSTEM;
 	      means->dep_exclude = MV_SYSTEM;
 	    }
 	  else if (lex_match_id (lexer, "DEPENDENT"))
-	    /*
-	      Use the command "/MISSING=DEPENDENT" to
-	      include user-missing values for the categorical variables,
-	      while excluding them for the dependent variables.
-
-	      Cases are dropped only when user-missing values
-	      appear in dependent  variables.  User-missing
-	      values for categorical variables are treated according to
-	      their face value.
-
-	      Cases are ALWAYS dropped when System Missing values appear
-	      in the categorical variables.
-	    */
+	    /* Use the command "/MISSING=DEPENDENT" to include user-missing
+               values for the categorical variables, while excluding them for
+               the dependent variables.
+
+               Cases are dropped only when user-missing values appear in
+               dependent variables.  User-missing values for categorical
+               variables are treated according to their face value.
+
+               Cases are ALWAYS dropped when System Missing values appear in
+               the categorical variables. */
 	    {
 	      means->dep_exclude = MV_ANY;
 	      means->ctrl_exclude = MV_SYSTEM;
 	    }
 	  else
 	    {
-	      lex_error (lexer, NULL);
+	      lex_error_expecting (lexer, "INCLUDE", "DEPENDENT");
 	      return false;
 	    }
 	}
@@ -174,63 +182,30 @@ means_parse (struct lexer *lexer, struct means *means)
 
 	  /* The default values become overwritten */
 	  means->n_statistics = 0;
-	  pool_free (means->pool, means->statistics);
-	  means->statistics = 0;
-	  while (lex_token (lexer) != T_ENDCMD
-		 && lex_token (lexer) != T_SLASH)
+	  while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 	    {
 	      if (lex_match (lexer, T_ALL))
 	      	{
-		  pool_free (means->pool, means->statistics);
-		  means->statistics = pool_calloc (means->pool,
-						   n_MEANS_STATISTICS,
-						   sizeof (*means->statistics));
-		  means->n_statistics = n_MEANS_STATISTICS;
-		  int i;
-		  for (i = 0; i < n_MEANS_STATISTICS; ++i)
-		    {
-		      means->statistics[i] = i;
-		    }
+		  means->n_statistics = 0;
+		  for (int i = 0; i < n_MEANS_STATISTICS; ++i)
+                    add_statistic (means, i);
 	      	}
 	      else if (lex_match_id (lexer, "NONE"))
-		{
-		  means->n_statistics = 0;
-		  pool_free (means->pool, means->statistics);
-		  means->statistics = 0;
-		}
+                means->n_statistics = 0;
 	      else if (lex_match_id (lexer, "DEFAULT"))
+                means_set_default_statistics (means);
+              else
 		{
-		  pool_free (means->pool, means->statistics);
-		  means->statistics = pool_calloc (means->pool,
-						   3,
-						   sizeof *means->statistics);
-		  means->statistics[0] = MEANS_MEAN;
-		  means->statistics[1] = MEANS_N;
-		  means->statistics[2] = MEANS_STDDEV;
-		}
-	      else
-		{
-		  int i;
-		  for (i = 0; i < n_MEANS_STATISTICS; ++i)
-		    {
-		      const struct cell_spec *cs = cell_spec + i;
-		      if (lex_match_id (lexer, cs->keyword))
-			{
-			  means->statistics
-			    = pool_realloc (means->pool,
-					   means->statistics,
-					   (means->n_statistics + 1)
-					   * sizeof (*means->statistics));
-
-			  means->statistics[means->n_statistics] = i;
-			  means->n_statistics++;
-			  break;
-			}
-		    }
-
-		  if (i >= n_MEANS_STATISTICS)
+                  const struct cell_spec *cs = match_cell (lexer);
+                  if (cs)
+                    add_statistic (means, cs - cell_spec);
+                  else
 		    {
-		      lex_error (lexer, NULL);
+                      const char *keywords[n_MEANS_STATISTICS];
+                      for (int i = 0; i < n_MEANS_STATISTICS; ++i)
+                        keywords[i] = cell_spec[i].keyword;
+		      lex_error_expecting_array (lexer, keywords,
+                                                 n_MEANS_STATISTICS);
 		      return false;
 		    }
 		}
@@ -238,7 +213,7 @@ means_parse (struct lexer *lexer, struct means *means)
 	}
       else
 	{
-	  lex_error (lexer, NULL);
+	  lex_error_expecting (lexer, "MISSING", "CELLS");
 	  return false;
 	}
     }
diff --git a/src/language/stats/means.c b/src/language/stats/means.c
index 4ebd60d83a..c1337ad949 100644
--- a/src/language/stats/means.c
+++ b/src/language/stats/means.c
@@ -31,6 +31,7 @@
 #include "libpspp/pool.h"
 
 #include "language/command.h"
+#include "language/lexer/lexer.h"
 
 #include "count-one-bits.h"
 #include "count-leading-zeros.h"
@@ -234,7 +235,7 @@ dump_cell (const struct cell *cell, const struct mtable *mt, int level)
 static void
 dump_indeces (const size_t *indexes, int n)
 {
-  for (int i = 0 ; i < n; ++i)
+  for (int i = 0; i < n; ++i)
     {
       printf ("%ld; ", indexes[i]);
     }
@@ -601,7 +602,7 @@ create_table_structure (const struct mtable *mt, struct pivot_table *pt,
   int * lindexes = ws->control_idx;
   /* The inner layers are situated rightmost in the table.
      So this iteration is in reverse order.  */
-  for (int l = mt->n_layers -1; l >=0 ; --l)
+  for (int l = mt->n_layers - 1; l >= 0; --l)
     {
       const struct layer *layer = mt->layers[l];
       const struct cell_container *instances = ws->instances + l;
@@ -1102,28 +1103,18 @@ run_means (struct means *cmd, struct casereader *input,
   post_means (cmd);
 }
 
-struct lexer;
-
 int
 cmd_means (struct lexer *lexer, struct dataset *ds)
 {
-  struct means means;
-  means.pool = pool_create ();
-
-  means.ctrl_exclude = MV_ANY;
-  means.dep_exclude = MV_ANY;
-  means.table = NULL;
-  means.n_tables = 0;
-
-  means.dict = dataset_dict (ds);
-
-  means.n_statistics = 3;
-  means.statistics = pool_calloc (means.pool, 3, sizeof *means.statistics);
-  means.statistics[0] = MEANS_MEAN;
-  means.statistics[1] = MEANS_N;
-  means.statistics[2] = MEANS_STDDEV;
-
-  if (! means_parse (lexer, &means))
+  struct means means = {
+    .pool = pool_create (),
+    .ctrl_exclude = MV_ANY,
+    .dep_exclude = MV_ANY,
+    .dict = dataset_dict (ds),
+  };
+  means_set_default_statistics (&means);
+
+  if (!means_parse (lexer, &means))
     goto error;
 
   /* Calculate some constant data for each table.  */
@@ -1135,62 +1126,58 @@ cmd_means (struct lexer *lexer, struct dataset *ds)
 	mt->n_combinations *= mt->layers[l]->n_factor_vars;
     }
 
-  {
-    struct casegrouper *grouper;
-    struct casereader *group;
-    bool ok;
-
-    grouper = casegrouper_create_splits (proc_open (ds), means.dict);
-    while (casegrouper_get_next_group (grouper, &group))
-      {
-	/* Allocate the workspaces.  */
-	for (int t = 0; t < means.n_tables; ++t)
+  struct casegrouper *grouper
+    = casegrouper_create_splits (proc_open (ds), means.dict);
+  struct casereader *group;
+  while (casegrouper_get_next_group (grouper, &group))
+    {
+      /* Allocate the workspaces.  */
+      for (int t = 0; t < means.n_tables; ++t)
 	{
 	  struct mtable *mt = means.table + t;
 	  mt->summ = xcalloc (mt->n_combinations * mt->n_dep_vars,
-			      sizeof (*mt->summ));
-	  mt->ws = xcalloc (mt->n_combinations, sizeof (*mt->ws));
+			      sizeof *mt->summ);
+	  mt->ws = xcalloc (mt->n_combinations, sizeof *mt->ws);
 	}
-      	run_means (&means, group, ds);
-	for (int t = 0; t < means.n_tables; ++t)
-	  {
-	    const struct mtable *mt = means.table + t;
+      run_means (&means, group, ds);
+      for (int t = 0; t < means.n_tables; ++t)
+        {
+          const struct mtable *mt = means.table + t;
 
-	    means_case_processing_summary (mt);
-	    means_shipout (mt, &means);
+          means_case_processing_summary (mt);
+          means_shipout (mt, &means);
 
-	    for (int i = 0; i < mt->n_combinations; ++i)
-	      {
-		struct workspace *ws = mt->ws + i;
-		if (ws->root_cell == NULL)
-		  continue;
+          for (int i = 0; i < mt->n_combinations; ++i)
+            {
+              struct workspace *ws = mt->ws + i;
+              if (ws->root_cell)
+                means_destroy_cells (&means, ws->root_cell, mt);
+            }
+        }
 
-		means_destroy_cells (&means, ws->root_cell, mt);
-	      }
-	  }
+      /* Destroy the workspaces.  */
+      for (int t = 0; t < means.n_tables; ++t)
+        {
+          struct mtable *mt = means.table + t;
+          free (mt->summ);
+          for (int i = 0; i < mt->n_combinations; ++i)
+            {
+              struct workspace *ws = mt->ws + i;
+              destroy_workspace (mt, ws);
+            }
+          free (mt->ws);
+        }
+    }
 
-	/* Destroy the workspaces.  */
-	for (int t = 0; t < means.n_tables; ++t)
-	  {
-	    struct mtable *mt = means.table + t;
-	    free (mt->summ);
-	    for (int i = 0; i < mt->n_combinations; ++i)
-	      {
-		struct workspace *ws = mt->ws + i;
-		destroy_workspace (mt, ws);
-	      }
-	    free (mt->ws);
-	  }
-      }
-    ok = casegrouper_destroy (grouper);
-    ok = proc_commit (ds) && ok;
-  }
+  bool ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
+  if (!ok)
+    goto error;
 
   pool_destroy (means.pool);
   return CMD_SUCCESS;
 
  error:
-
   pool_destroy (means.pool);
   return CMD_FAILURE;
 }
diff --git a/src/language/stats/means.h b/src/language/stats/means.h
index b2bdd1387f..1ce86868e1 100644
--- a/src/language/stats/means.h
+++ b/src/language/stats/means.h
@@ -21,6 +21,10 @@
 #include "libpspp/bt.h"
 #include "libpspp/compiler.h"
 
+struct casereader;
+struct dataset;
+struct lexer;
+
 struct cell_container
 {
   /* A hash table containing the cells.  The table is indexed by a hash
@@ -127,6 +131,7 @@ struct means
   /* The statistics to be calculated for each cell.  */
   int *statistics;
   int n_statistics;
+  size_t allocated_statistics;
 
   /* Pool on which cell functions may allocate data.  */
   struct pool *pool;
@@ -148,15 +153,8 @@ enum
     MEANS_STDDEV
   };
 
-
-
-struct dataset;
-struct casereader;
-void run_means (struct means *cmd, struct casereader *input, const struct dataset *ds UNUSED);
-
-struct lexer;
-bool means_parse (struct lexer *lexer, struct means *means);
-
-
+void run_means (struct means *, struct casereader *, const struct dataset *);
+bool means_parse (struct lexer *, struct means *);
+void means_set_default_statistics (struct means *);
 
 #endif
diff --git a/tests/language/stats/means.at b/tests/language/stats/means.at
index 72f98b3d1f..f25fe04aec 100644
--- a/tests/language/stats/means.at
+++ b/tests/language/stats/means.at
@@ -1120,5 +1120,34 @@ Range,02:01:00,$1.14
 
 AT_CLEANUP
 
-
-
+AT_SETUP([MEANS syntax errors])
+AT_DATA([means.sps], [dnl
+DATA LIST LIST NOTABLE/x y z.
+MEANS TABLES **.
+MEANS x BY **.
+MEANS x/MISSING=**.
+MEANS x/CELLS=**.
+MEANS x/ **.
+])
+AT_CHECK([pspp -O format=csv means.sps], [1], [dnl
+"means.sps:2.14-2.15: error: MEANS: Syntax error expecting `='.
+    2 | MEANS TABLES **.
+      |              ^~"
+
+"means.sps:3.12-3.13: error: MEANS: Syntax error expecting variable name.
+    3 | MEANS x BY **.
+      |            ^~"
+
+"means.sps:4.17-4.18: error: MEANS: Syntax error expecting INCLUDE or DEPENDENT.
+    4 | MEANS x/MISSING=**.
+      |                 ^~"
+
+"means.sps:5.15-5.16: error: MEANS: Syntax error expecting one of the following: MEAN, COUNT, STDDEV, SEMEAN, SUM, MIN, MAX, RANGE, VARIANCE, KURT, SEKURT, SKEW, SESKEW, FIRST, LAST, HARMONIC, GEOMETRIC.
+    5 | MEANS x/CELLS=**.
+      |               ^~"
+
+"means.sps:6.10-6.11: error: MEANS: Syntax error expecting MISSING or CELLS.
+    6 | MEANS x/ **.
+      |          ^~"
+])
+AT_CLEANUP
\ No newline at end of file