lexer: Factor out scan error messages into new function.

author Ben Pfaff <blp@cs.stanford.edu>

Sun, 4 Jul 2021 02:35:32 +0000 (19:35 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Sun, 4 Jul 2021 02:35:32 +0000 (19:35 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sun, 4 Jul 2021 02:35:32 +0000 (19:35 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Sun, 4 Jul 2021 02:35:32 +0000 (19:35 -0700)
diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c

index 1025414a7c4407bf265519af144b255c1d836629..c109077a89e41ecbaff81d9aed66fd805b30ef7d 100644 (file)
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -1606,21 +1606,26 @@ lex_source_error_valist (struct lex_source *src, int n0, int n1,
    msg_emit (m);
  }
  
-static void PRINTF_FORMAT (2, 3)
-lex_get_error (struct lex_source *src, const char *format, ...)
+static void PRINTF_FORMAT (4, 5)
+lex_source_error (struct lex_source *src, int n0, int n1,
+                  const char *format, ...)
  {
    va_list args;
    va_start (args, format);
+  lex_source_error_valist (src, n0, n1, format, args);
+  va_end (args);
+}
  
+static void
+lex_get_error (struct lex_source *src, const char *s)
+{
    size_t old_middle = src->middle;
    src->middle = src->front;
    size_t n = src->front - src->back - 1;
-  lex_source_error_valist (src, n, n, format, args);
+  lex_source_error (src, n, n, "%s", s);
    src->middle = old_middle;
  
    lex_source_pop_front (src);
-
-  va_end (args);
  }
  
  /* Attempts to append an additional token at the front of SRC, reading more
@@ -1772,44 +1777,17 @@ lex_source_try_get__ (struct lex_source *src)
        return true;
  
      case SCAN_BAD_HEX_LENGTH:
-      lex_get_error (src, _("String of hex digits has %d characters, which "
-                            "is not a multiple of 2"),
-                     (int) token->token.number);
-      return false;
-
      case SCAN_BAD_HEX_DIGIT:
      case SCAN_BAD_UNICODE_DIGIT:
-      lex_get_error (src, _("`%c' is not a valid hex digit"),
-                     (int) token->token.number);
-      return false;
-
      case SCAN_BAD_UNICODE_LENGTH:
-      lex_get_error (src, _("Unicode string contains %d bytes, which is "
-                            "not in the valid range of 1 to 8 bytes"),
-                     (int) token->token.number);
-      return false;
-
      case SCAN_BAD_UNICODE_CODE_POINT:
-      lex_get_error (src, _("U+%04X is not a valid Unicode code point"),
-                     (int) token->token.number);
-      return false;
-
      case SCAN_EXPECTED_QUOTE:
-      lex_get_error (src, _("Unterminated string constant"));
-      return false;
-
      case SCAN_EXPECTED_EXPONENT:
-      lex_get_error (src, _("Missing exponent following `%s'"),
-                     token->token.string.string);
-      return false;
-
      case SCAN_UNEXPECTED_CHAR:
-      {
-        char c_name[16];
-        lex_get_error (src, _("Bad character %s in input"),
-                       uc_name (token->token.number, c_name));
-        return false;
-      }
+      char *msg = scan_token_to_error (&token->token);
+      lex_get_error (src, msg);
+      free (msg);
+      return false;
  
      case SCAN_SKIP:
        lex_source_pop_front (src);
diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c

index 7aa01593f68be02dc424e1950800607e70ea0948..611a60ef63e90354e885d1ad3f631ac6cb045ec5 100644 (file)
--- a/src/language/lexer/scan.c
+++ b/src/language/lexer/scan.c
@@ -25,11 +25,15 @@
  #include "language/lexer/token.h"
  #include "libpspp/assertion.h"
  #include "libpspp/cast.h"
+#include "libpspp/i18n.h"
  
  #include "gl/c-ctype.h"
  #include "gl/c-strtod.h"
  #include "gl/xmemdup0.h"
  
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
  enum
    {
      S_START,
@@ -421,6 +425,48 @@ is_scan_type (enum scan_type type)
    return type > SCAN_FIRST && type < SCAN_LAST;
  }
  
+/* If TOKEN has the type of a scan error (a subset of those identified by
+   is_scan_type()), returns an appropriate error message.  Otherwise, returns
+   NULL. */
+char *
+scan_token_to_error (const struct token *token)
+{
+  switch (token->type)
+    {
+    case SCAN_BAD_HEX_LENGTH:
+      return xasprintf (_("String of hex digits has %d characters, which "
+                          "is not a multiple of 2"), (int) token->number);
+
+    case SCAN_BAD_HEX_DIGIT:
+    case SCAN_BAD_UNICODE_DIGIT:
+      return xasprintf (_("`%c' is not a valid hex digit"),
+                        (int) token->number);
+
+    case SCAN_BAD_UNICODE_LENGTH:
+      return xasprintf (_("Unicode string contains %d bytes, which is "
+                          "not in the valid range of 1 to 8 bytes"),
+                        (int) token->number);
+
+    case SCAN_BAD_UNICODE_CODE_POINT:
+      return xasprintf (_("U+%04X is not a valid Unicode code point"),
+                        (int) token->number);
+
+    case SCAN_EXPECTED_QUOTE:
+      return xasprintf (_("Unterminated string constant"));
+
+    case SCAN_EXPECTED_EXPONENT:
+      return xasprintf (_("Missing exponent following `%s'"),
+                        token->string.string);
+
+    case SCAN_UNEXPECTED_CHAR:
+      char c_name[16];
+      return xasprintf (_("Bad character %s in input"),
+                        uc_name (token->number, c_name));
+    }
+
+  return NULL;
+}
+
  static enum scan_result
  scan_start__ (struct scanner *scanner, enum segment_type type,
                struct substring s, struct token *token)
diff --git a/src/language/lexer/scan.h b/src/language/lexer/scan.h

index 1c0ff7a1e5477286ff1d1256a4e6c5823877fba0..0dde2738049d6d8fdbe2f4bcdb6f5a2bf7b8a3e2 100644 (file)
--- a/src/language/lexer/scan.h
+++ b/src/language/lexer/scan.h
@@ -66,6 +66,8 @@ enum scan_type
  const char *scan_type_to_string (enum scan_type);
  bool is_scan_type (enum scan_type);
  
+char *scan_token_to_error (const struct token *);
+
  /* A scanner.  Opaque. */
  struct scanner
    {
author	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 4 Jul 2021 02:35:32 +0000 (19:35 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 4 Jul 2021 02:35:32 +0000 (19:35 -0700)
src/language/lexer/lexer.c		patch \| blob \| history
src/language/lexer/scan.c		patch \| blob \| history
src/language/lexer/scan.h		patch \| blob \| history