add errors for unimplemented features

[pspp] / src / language / lexer / lexer.c
diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c

index 329003406bf0281b0d01f99db81372bd4b5649b0..7c540e528822a2559208ec2e1076d49a7362210a 100644 (file)
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -282,15 +282,15 @@ struct lexer
    };
  
  static struct lex_source *lex_source__ (const struct lexer *);
-static char *lex_source_get_syntax__ (const struct lex_source *,
-                                      int n0, int n1);
+static char *lex_source_syntax__ (const struct lex_source *,
+                                  int ofs0, int ofs1);
  static const struct lex_token *lex_next__ (const struct lexer *, int n);
  static void lex_source_push_endcmd__ (struct lex_source *);
  static void lex_source_push_parse (struct lex_source *, struct lex_token *);
  static void lex_source_clear_parse (struct lex_source *);
  
  static bool lex_source_get_parse (struct lex_source *);
-static void lex_source_error_valist (struct lex_source *, int n0, int n1,
+static void lex_source_error_valist (struct lex_source *, int ofs0, int ofs1,
                                       const char *format, va_list)
     PRINTF_FORMAT (4, 0);
  static const struct lex_token *lex_source_next__ (const struct lex_source *,
@@ -425,7 +425,7 @@ lex_error (struct lexer *lexer, const char *format, ...)
    va_list args;
  
    va_start (args, format);
-  lex_next_error_valist (lexer, 0, 0, format, args);
+  lex_ofs_error_valist (lexer, lex_ofs (lexer), lex_ofs (lexer), format, args);
    va_end (args);
  }
  
@@ -434,18 +434,33 @@ lex_error (struct lexer *lexer, const char *format, ...)
  void
  lex_error_valist (struct lexer *lexer, const char *format, va_list args)
  {
-  lex_next_error_valist (lexer, 0, 0, format, args);
+  lex_ofs_error_valist (lexer, lex_ofs (lexer), lex_ofs (lexer), format, args);
  }
  
-/* Prints a syntax error message containing the current token and
-   given message MESSAGE (if non-null). */
+/* Prints a syntax error message for the span of tokens N0 through N1,
+   inclusive, from the current token in LEXER, adding message MESSAGE (if
+   non-null). */
  void
  lex_next_error (struct lexer *lexer, int n0, int n1, const char *format, ...)
  {
    va_list args;
  
    va_start (args, format);
-  lex_next_error_valist (lexer, n0, n1, format, args);
+  int ofs = lex_ofs (lexer);
+  lex_ofs_error_valist (lexer, n0 + ofs, n1 + ofs, format, args);
+  va_end (args);
+}
+
+/* Prints a syntax error message for the span of tokens with offsets OFS0
+   through OFS1, inclusive, within the current command in LEXER, adding message
+   MESSAGE (if non-null). */
+void
+lex_ofs_error (struct lexer *lexer, int ofs0, int ofs1, const char *format, ...)
+{
+  va_list args;
+
+  va_start (args, format);
+  lex_ofs_error_valist (lexer, ofs0, ofs1, format, args);
    va_end (args);
  }
  
@@ -530,6 +545,12 @@ lex_error_expecting_array (struct lexer *lexer, const char **options, size_t n)
                   options[5], options[6], options[7]);
        break;
  
+    case 9:
+      lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, %s, %s, or %s"),
+                 options[0], options[1], options[2], options[3], options[4],
+                 options[5], options[6], options[7], options[8]);
+      break;
+
      default:
        lex_error (lexer, NULL);
      }
@@ -578,16 +599,17 @@ lex_spec_missing (struct lexer *lexer, const char *sbc, const char *spec)
               sbc, spec);
  }
  
-/* Prints a syntax error message containing the current token and
-   given message MESSAGE (if non-null). */
+/* Prints a syntax error message for the span of tokens with offsets OFS0
+   through OFS1, inclusive, within the current command in LEXER, adding message
+   MESSAGE (if non-null) with the given ARGS. */
  void
-lex_next_error_valist (struct lexer *lexer, int n0, int n1,
-                       const char *format, va_list args)
+lex_ofs_error_valist (struct lexer *lexer, int ofs0, int ofs1,
+                      const char *format, va_list args)
  {
    struct lex_source *src = lex_source__ (lexer);
  
    if (src != NULL)
-    lex_source_error_valist (src, n0, n1, format, args);
+    lex_source_error_valist (src, ofs0, ofs1, format, args);
    else
      {
        struct string s;
@@ -966,6 +988,236 @@ lex_force_num (struct lexer *lexer)
    return false;
  }
  
+/* If the current token is an number in the closed range [MIN,MAX], does
+   nothing and returns true.  Otherwise, reports an error and returns false.
+   If NAME is nonnull, then it is used in the error message. */
+bool
+lex_force_num_range_closed (struct lexer *lexer, const char *name,
+                            double min, double max)
+{
+  bool is_number = lex_is_number (lexer);
+  bool too_small = is_number && lex_number (lexer) < min;
+  bool too_big = is_number && lex_number (lexer) > max;
+  if (is_number && !too_small && !too_big)
+    return true;
+
+  if (min > max)
+    {
+      /* Weird, maybe a bug in the caller.  Just report that we needed an
+         number. */
+      if (name)
+        lex_error (lexer, _("Number expected for %s."), name);
+      else
+        lex_error (lexer, _("Number expected."));
+    }
+  else if (min == max)
+    {
+      if (name)
+        lex_error (lexer, _("Expected %g for %s."), min, name);
+      else
+        lex_error (lexer, _("Expected %g."), min);
+    }
+  else
+    {
+      bool report_lower_bound = min > -DBL_MAX || too_small;
+      bool report_upper_bound = max < DBL_MAX || too_big;
+
+      if (report_lower_bound && report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer,
+                       _("Expected number between %g and %g for %s."),
+                       min, max, name);
+          else
+            lex_error (lexer, _("Expected number between %g and %g."),
+                       min, max);
+        }
+      else if (report_lower_bound)
+        {
+          if (min == 0)
+            {
+              if (name)
+                lex_error (lexer, _("Expected non-negative number for %s."),
+                           name);
+              else
+                lex_error (lexer, _("Expected non-negative number."));
+            }
+          else
+            {
+              if (name)
+                lex_error (lexer, _("Expected number %g or greater for %s."),
+                           min, name);
+              else
+                lex_error (lexer, _("Expected number %g or greater."), min);
+            }
+        }
+      else if (report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer,
+                       _("Expected number less than or equal to %g for %s."),
+                       max, name);
+          else
+            lex_error (lexer, _("Expected number less than or equal to %g."),
+                       max);
+        }
+      else
+        {
+          if (name)
+            lex_error (lexer, _("Number expected for %s."), name);
+          else
+            lex_error (lexer, _("Number expected."));
+        }
+    }
+  return false;
+}
+
+/* If the current token is an number in the half-open range [MIN,MAX), does
+   nothing and returns true.  Otherwise, reports an error and returns false.
+   If NAME is nonnull, then it is used in the error message. */
+bool
+lex_force_num_range_halfopen (struct lexer *lexer, const char *name,
+                              double min, double max)
+{
+  bool is_number = lex_is_number (lexer);
+  bool too_small = is_number && lex_number (lexer) < min;
+  bool too_big = is_number && lex_number (lexer) >= max;
+  if (is_number && !too_small && !too_big)
+    return true;
+
+  if (min >= max)
+    {
+      /* Weird, maybe a bug in the caller.  Just report that we needed an
+         number. */
+      if (name)
+        lex_error (lexer, _("Number expected for %s."), name);
+      else
+        lex_error (lexer, _("Number expected."));
+    }
+  else
+    {
+      bool report_lower_bound = min > -DBL_MAX || too_small;
+      bool report_upper_bound = max < DBL_MAX || too_big;
+
+      if (report_lower_bound && report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer, _("Expected number in [%g,%g) for %s."),
+                       min, max, name);
+          else
+            lex_error (lexer, _("Expected number in [%g,%g)."),
+                       min, max);
+        }
+      else if (report_lower_bound)
+        {
+          if (min == 0)
+            {
+              if (name)
+                lex_error (lexer, _("Expected non-negative number for %s."),
+                           name);
+              else
+                lex_error (lexer, _("Expected non-negative number."));
+            }
+          else
+            {
+              if (name)
+                lex_error (lexer, _("Expected number %g or greater for %s."),
+                           min, name);
+              else
+                lex_error (lexer, _("Expected number %g or greater."), min);
+            }
+        }
+      else if (report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer,
+                       _("Expected number less than %g for %s."), max, name);
+          else
+            lex_error (lexer, _("Expected number less than %g."), max);
+        }
+      else
+        {
+          if (name)
+            lex_error (lexer, _("Number expected for %s."), name);
+          else
+            lex_error (lexer, _("Number expected."));
+        }
+    }
+  return false;
+}
+
+/* If the current token is an number in the open range (MIN,MAX], does
+   nothing and returns true.  Otherwise, reports an error and returns false.
+   If NAME is nonnull, then it is used in the error message. */
+bool
+lex_force_num_range_open (struct lexer *lexer, const char *name,
+                          double min, double max)
+{
+  bool is_number = lex_is_number (lexer);
+  bool too_small = is_number && lex_number (lexer) <= min;
+  bool too_big = is_number && lex_number (lexer) >= max;
+  if (is_number && !too_small && !too_big)
+    return true;
+
+  if (min >= max)
+    {
+      /* Weird, maybe a bug in the caller.  Just report that we needed an
+         number. */
+      if (name)
+        lex_error (lexer, _("Number expected for %s."), name);
+      else
+        lex_error (lexer, _("Number expected."));
+    }
+  else
+    {
+      bool report_lower_bound = min > -DBL_MAX || too_small;
+      bool report_upper_bound = max < DBL_MAX || too_big;
+
+      if (report_lower_bound && report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer, _("Expected number in (%g,%g) for %s."),
+                       min, max, name);
+          else
+            lex_error (lexer, _("Expected number in (%g,%g)."), min, max);
+        }
+      else if (report_lower_bound)
+        {
+          if (min == 0)
+            {
+              if (name)
+                lex_error (lexer, _("Expected positive number for %s."), name);
+              else
+                lex_error (lexer, _("Expected positive number."));
+            }
+          else
+            {
+              if (name)
+                lex_error (lexer, _("Expected number greater than %g for %s."),
+                           min, name);
+              else
+                lex_error (lexer, _("Expected number greater than %g."), min);
+            }
+        }
+      else if (report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer, _("Expected number less than %g for %s."),
+                       max, name);
+          else
+            lex_error (lexer, _("Expected number less than %g."), max);
+        }
+      else
+        {
+          if (name)
+            lex_error (lexer, _("Number expected for %s."), name);
+          else
+            lex_error (lexer, _("Number expected."));
+        }
+    }
+  return false;
+}
+
  /* If the current token is an identifier, does nothing and returns true.
     Otherwise, reports an error and returns false. */
  bool
@@ -1221,15 +1473,37 @@ lex_ofs_end_point (const struct lexer *lexer, int ofs)
  
  /* Returns the text of the syntax in tokens N0 ahead of the current one,
     through N1 ahead of the current one, inclusive.  (For example, if N0 and N1
-   are both zero, this requests the syntax for the current token.)  The caller
-   must eventually free the returned string (with free()).  The syntax is
-   encoded in UTF-8 and in the original form supplied to the lexer so that, for
-   example, it may include comments, spaces, and new-lines if it spans multiple
-   tokens.  Macro expansion, however, has already been performed. */
+   are both zero, this requests the syntax for the current token.)
+
+   The caller must eventually free the returned string (with free()).  The
+   syntax is encoded in UTF-8 and in the original form supplied to the lexer so
+   that, for example, it may include comments, spaces, and new-lines if it
+   spans multiple tokens.  Macro expansion, however, has already been
+   performed. */
  char *
  lex_next_representation (const struct lexer *lexer, int n0, int n1)
  {
-  return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1);
+  const struct lex_source *src = lex_source__ (lexer);
+  return (src
+          ? lex_source_syntax__ (src, n0 + src->parse_ofs, n1 + src->parse_ofs)
+          : xstrdup (""));
+}
+
+
+/* Returns the text of the syntax in tokens with offsets OFS0 to OFS1,
+   inclusive.  (For example, if OFS0 and OFS1 are both zero, this requests the
+   syntax for the first token in the current command.)
+
+   The caller must eventually free the returned string (with free()).  The
+   syntax is encoded in UTF-8 and in the original form supplied to the lexer so
+   that, for example, it may include comments, spaces, and new-lines if it
+   spans multiple tokens.  Macro expansion, however, has already been
+   performed. */
+char *
+lex_ofs_representation (const struct lexer *lexer, int ofs0, int ofs1)
+{
+  const struct lex_source *src = lex_source__ (lexer);
+  return src ? lex_source_syntax__ (src, ofs0, ofs1) : xstrdup ("");
  }
  
  /* Returns true if the token N ahead of the current one was produced by macro
@@ -1386,11 +1660,11 @@ lex_token_location_rw (const struct lex_source *src,
  }
  
  static struct msg_location *
-lex_source_get_location (const struct lex_source *src, int n0, int n1)
+lex_source_get_location (const struct lex_source *src, int ofs0, int ofs1)
  {
    return lex_token_location_rw (src,
-                                lex_source_next__ (src, n0),
-                                lex_source_next__ (src, n1));
+                                lex_source_ofs__ (src, ofs0),
+                                lex_source_ofs__ (src, ofs1));
  }
  
  /* Returns the name of the syntax file from which the current command is drawn.
@@ -1557,32 +1831,33 @@ lex_source__ (const struct lexer *lexer)
            : ll_data (ll_head (&lexer->sources), struct lex_source, ll));
  }
  
-/* Returns the text of the syntax in SRC for tokens N0 ahead of the current
-   one, through N1 ahead of the current one, inclusive.  (For example, if N0
-   and N1 are both zero, this requests the syntax for the current token.)  The
-   caller must eventually free the returned string (with free()).  The syntax
-   is encoded in UTF-8 and in the original form supplied to the lexer so that,
-   for example, it may include comments, spaces, and new-lines if it spans
-   multiple tokens.  Macro expansion, however, has already been performed. */
+/* Returns the text of the syntax in SRC for tokens with offsets OFS0 through
+   OFS1 in the current command, inclusive.  (For example, if OFS0 and OFS1 are
+   both zero, this requests the syntax for the first token in the current
+   command.)  The caller must eventually free the returned string (with
+   free()).  The syntax is encoded in UTF-8 and in the original form supplied
+   to the lexer so that, for example, it may include comments, spaces, and
+   new-lines if it spans multiple tokens.  Macro expansion, however, has
+   already been performed. */
  static char *
-lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
+lex_source_syntax__ (const struct lex_source *src, int ofs0, int ofs1)
  {
    struct string s = DS_EMPTY_INITIALIZER;
-  for (size_t i = n0; i <= n1; )
+  for (size_t i = ofs0; i <= ofs1; )
      {
        /* Find [I,J) as the longest sequence of tokens not produced by macro
           expansion, or otherwise the longest sequence expanded from a single
           macro call. */
-      const struct lex_token *first = lex_source_next__ (src, i);
+      const struct lex_token *first = lex_source_ofs__ (src, i);
        size_t j;
-      for (j = i + 1; j <= n1; j++)
+      for (j = i + 1; j <= ofs1; j++)
          {
-          const struct lex_token *cur = lex_source_next__ (src, j);
+          const struct lex_token *cur = lex_source_ofs__ (src, j);
            if ((first->macro_rep != NULL) != (cur->macro_rep != NULL)
                || first->macro_rep != cur->macro_rep)
              break;
          }
-      const struct lex_token *last = lex_source_next__ (src, j - 1);
+      const struct lex_token *last = lex_source_ofs__ (src, j - 1);
  
        /* Now add the syntax for this sequence of tokens to SRC. */
        if (!ds_is_empty (&s))
@@ -1607,10 +1882,10 @@ lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
  }
  
  static bool
-lex_source_contains_macro_call (struct lex_source *src, int n0, int n1)
+lex_source_contains_macro_call (struct lex_source *src, int ofs0, int ofs1)
  {
-  for (size_t i = n0; i <= n1; i++)
-    if (lex_source_next__ (src, i)->macro_rep)
+  for (int i = ofs0; i <= ofs1; i++)
+    if (lex_source_ofs__ (src, i)->macro_rep)
        return true;
    return false;
  }
@@ -1625,13 +1900,13 @@ lex_source_contains_macro_call (struct lex_source *src, int n0, int n1)
  
     The caller must not modify or free the returned string. */
  static struct substring
-lex_source_get_macro_call (struct lex_source *src, int n0, int n1)
+lex_source_get_macro_call (struct lex_source *src, int ofs0, int ofs1)
  {
-  if (!lex_source_contains_macro_call (src, n0, n1))
+  if (!lex_source_contains_macro_call (src, ofs0, ofs1))
      return ss_empty ();
  
-  const struct lex_token *token0 = lex_source_next__ (src, n0);
-  const struct lex_token *token1 = lex_source_next__ (src, MAX (n0, n1));
+  const struct lex_token *token0 = lex_source_ofs__ (src, ofs0);
+  const struct lex_token *token1 = lex_source_ofs__ (src, MAX (ofs0, ofs1));
    size_t start = token0->token_pos;
    size_t end = token1->token_pos + token1->token_len;
  
@@ -1639,7 +1914,7 @@ lex_source_get_macro_call (struct lex_source *src, int n0, int n1)
  }
  
  static void
-lex_source_error_valist (struct lex_source *src, int n0, int n1,
+lex_source_error_valist (struct lex_source *src, int ofs0, int ofs1,
                           const char *format, va_list args)
  {
    const struct lex_token *token;
@@ -1647,13 +1922,13 @@ lex_source_error_valist (struct lex_source *src, int n0, int n1,
  
    ds_init_empty (&s);
  
-  token = lex_source_next__ (src, n0);
+  token = lex_source_ofs__ (src, ofs0);
    if (token->token.type == T_ENDCMD)
      ds_put_cstr (&s, _("Syntax error at end of command"));
    else
      {
        /* Get the syntax that caused the error. */
-      char *raw_syntax = lex_source_get_syntax__ (src, n0, n1);
+      char *raw_syntax = lex_source_syntax__ (src, ofs0, ofs1);
        char syntax[64];
        str_ellipsize (ss_cstr (raw_syntax), syntax, sizeof syntax);
        free (raw_syntax);
@@ -1661,7 +1936,7 @@ lex_source_error_valist (struct lex_source *src, int n0, int n1,
        /* Get the macro call(s) that expanded to the syntax that caused the
           error. */
        char call[64];
-      str_ellipsize (lex_source_get_macro_call (src, n0, n1),
+      str_ellipsize (lex_source_get_macro_call (src, ofs0, ofs1),
                       call, sizeof call);
  
        if (syntax[0])
@@ -1695,7 +1970,7 @@ lex_source_error_valist (struct lex_source *src, int n0, int n1,
    *m = (struct msg) {
      .category = MSG_C_SYNTAX,
      .severity = MSG_S_ERROR,
-    .location = lex_source_get_location (src, n0, n1),
+    .location = lex_source_get_location (src, ofs0, ofs1),
      .text = ds_steal_cstr (&s),
    };
    msg_emit (m);