lexer: Factor out scan error messages into new function.

author Ben Pfaff <blp@cs.stanford.edu>

Sun, 4 Jul 2021 02:35:32 +0000 (19:35 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Mon, 5 Jul 2021 01:22:12 +0000 (18:22 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sun, 4 Jul 2021 02:35:32 +0000 (19:35 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Mon, 5 Jul 2021 01:22:12 +0000 (18:22 -0700)
diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c

index 8eac18066f7b36346192743ed3ab6702c0a5a179..df2806eedf090e64d14747512dd1070620cc7b16 100644 (file)
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -1457,19 +1457,22 @@ lex_source_error_valist (struct lex_source *src, int n0, int n1,
    msg_emit (m);
  }
  
-static void PRINTF_FORMAT (2, 3)
-lex_get_error (struct lex_source *src, const char *format, ...)
+static void PRINTF_FORMAT (4, 5)
+lex_source_error (struct lex_source *src, int n0, int n1,
+                  const char *format, ...)
  {
    va_list args;
-  int n;
-
    va_start (args, format);
+  lex_source_error_valist (src, n0, n1, format, args);
+  va_end (args);
+}
  
-  n = deque_count (&src->deque) - 1;
-  lex_source_error_valist (src, n, n, format, args);
+static void
+lex_get_error (struct lex_source *src, const char *s)
+{
+  int n = deque_count (&src->deque) - 1;
+  lex_source_error (src, n, n, "%s", s);
    lex_source_pop_front (src);
-
-  va_end (args);
  }
  
  /* Attempts to append an additional token into SRC's deque, reading more from
@@ -1621,43 +1624,16 @@ lex_source_get__ (const struct lex_source *src_)
        break;
  
      case SCAN_BAD_HEX_LENGTH:
-      lex_get_error (src, _("String of hex digits has %d characters, which "
-                            "is not a multiple of 2"),
-                     (int) token->token.number);
-      break;
-
      case SCAN_BAD_HEX_DIGIT:
      case SCAN_BAD_UNICODE_DIGIT:
-      lex_get_error (src, _("`%c' is not a valid hex digit"),
-                     (int) token->token.number);
-      break;
-
      case SCAN_BAD_UNICODE_LENGTH:
-      lex_get_error (src, _("Unicode string contains %d bytes, which is "
-                            "not in the valid range of 1 to 8 bytes"),
-                     (int) token->token.number);
-      break;
-
      case SCAN_BAD_UNICODE_CODE_POINT:
-      lex_get_error (src, _("U+%04X is not a valid Unicode code point"),
-                     (int) token->token.number);
-      break;
-
      case SCAN_EXPECTED_QUOTE:
-      lex_get_error (src, _("Unterminated string constant"));
-      break;
-
      case SCAN_EXPECTED_EXPONENT:
-      lex_get_error (src, _("Missing exponent following `%s'"),
-                     token->token.string.string);
-      break;
-
      case SCAN_UNEXPECTED_CHAR:
-      {
-        char c_name[16];
-        lex_get_error (src, _("Bad character %s in input"),
-                       uc_name (token->token.number, c_name));
-      }
+      char *msg = scan_token_to_error (&token->token);
+      lex_get_error (src, msg);
+      free (msg);
        break;
  
      case SCAN_SKIP:
diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c

index 86ebb7d00675cd6c89d223d49b1924cb278473e9..0c922105407ab9dad66001b9d17f4ef2ab918df9 100644 (file)
--- a/src/language/lexer/scan.c
+++ b/src/language/lexer/scan.c
@@ -25,11 +25,15 @@
  #include "language/lexer/token.h"
  #include "libpspp/assertion.h"
  #include "libpspp/cast.h"
+#include "libpspp/i18n.h"
  
  #include "gl/c-ctype.h"
  #include "gl/c-strtod.h"
  #include "gl/xmemdup0.h"
  
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
  enum
    {
      S_START,
@@ -421,6 +425,48 @@ is_scan_type (enum scan_type type)
    return type > SCAN_FIRST && type < SCAN_LAST;
  }
  
+/* If TOKEN has the type of a scan error (a subset of those identified by
+   is_scan_type()), returns an appropriate error message.  Otherwise, returns
+   NULL. */
+char *
+scan_token_to_error (const struct token *token)
+{
+  switch (token->type)
+    {
+    case SCAN_BAD_HEX_LENGTH:
+      return xasprintf (_("String of hex digits has %d characters, which "
+                          "is not a multiple of 2."), (int) token->number);
+
+    case SCAN_BAD_HEX_DIGIT:
+    case SCAN_BAD_UNICODE_DIGIT:
+      return xasprintf (_("`%c' is not a valid hex digit."),
+                        (int) token->number);
+
+    case SCAN_BAD_UNICODE_LENGTH:
+      return xasprintf (_("Unicode string contains %d bytes, which is "
+                          "not in the valid range of 1 to 8 bytes."),
+                        (int) token->number);
+
+    case SCAN_BAD_UNICODE_CODE_POINT:
+      return xasprintf (_("U+%04X is not a valid Unicode code point."),
+                        (int) token->number);
+
+    case SCAN_EXPECTED_QUOTE:
+      return xasprintf (_("Unterminated string constant."));
+
+    case SCAN_EXPECTED_EXPONENT:
+      return xasprintf (_("Missing exponent following `%s'."),
+                        token->string.string);
+
+    case SCAN_UNEXPECTED_CHAR:
+      char c_name[16];
+      return xasprintf (_("Bad character %s in input."),
+                        uc_name (token->number, c_name));
+    }
+
+  return NULL;
+}
+
  static enum scan_result
  scan_start__ (struct scanner *scanner, enum segment_type type,
                struct substring s, struct token *token)
diff --git a/src/language/lexer/scan.h b/src/language/lexer/scan.h

index 866321b0c84c5c61e8dec03b29fc3901554db3b6..61bfc5b553964dcebcc64726593a071de826c44f 100644 (file)
--- a/src/language/lexer/scan.h
+++ b/src/language/lexer/scan.h
@@ -66,6 +66,8 @@ enum scan_type
  const char *scan_type_to_string (enum scan_type);
  bool is_scan_type (enum scan_type);
  
+char *scan_token_to_error (const struct token *);
+
  /* A scanner.  Opaque. */
  struct scanner
    {
author	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 4 Jul 2021 02:35:32 +0000 (19:35 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Mon, 5 Jul 2021 01:22:12 +0000 (18:22 -0700)
src/language/lexer/lexer.c		patch \| blob \| history
src/language/lexer/scan.c		patch \| blob \| history
src/language/lexer/scan.h		patch \| blob \| history