From 939dd9ec5ada7d7a91560d4c1e416709d923dcf3 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 3 Jul 2021 19:35:32 -0700 Subject: [PATCH] lexer: Factor out scan error messages into new function. --- src/language/lexer/lexer.c | 52 ++++++++++---------------------------- src/language/lexer/scan.c | 46 +++++++++++++++++++++++++++++++++ src/language/lexer/scan.h | 2 ++ 3 files changed, 62 insertions(+), 38 deletions(-) diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 8eac18066f..df2806eedf 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -1457,19 +1457,22 @@ lex_source_error_valist (struct lex_source *src, int n0, int n1, msg_emit (m); } -static void PRINTF_FORMAT (2, 3) -lex_get_error (struct lex_source *src, const char *format, ...) +static void PRINTF_FORMAT (4, 5) +lex_source_error (struct lex_source *src, int n0, int n1, + const char *format, ...) { va_list args; - int n; - va_start (args, format); + lex_source_error_valist (src, n0, n1, format, args); + va_end (args); +} - n = deque_count (&src->deque) - 1; - lex_source_error_valist (src, n, n, format, args); +static void +lex_get_error (struct lex_source *src, const char *s) +{ + int n = deque_count (&src->deque) - 1; + lex_source_error (src, n, n, "%s", s); lex_source_pop_front (src); - - va_end (args); } /* Attempts to append an additional token into SRC's deque, reading more from @@ -1621,43 +1624,16 @@ lex_source_get__ (const struct lex_source *src_) break; case SCAN_BAD_HEX_LENGTH: - lex_get_error (src, _("String of hex digits has %d characters, which " - "is not a multiple of 2"), - (int) token->token.number); - break; - case SCAN_BAD_HEX_DIGIT: case SCAN_BAD_UNICODE_DIGIT: - lex_get_error (src, _("`%c' is not a valid hex digit"), - (int) token->token.number); - break; - case SCAN_BAD_UNICODE_LENGTH: - lex_get_error (src, _("Unicode string contains %d bytes, which is " - "not in the valid range of 1 to 8 bytes"), - (int) token->token.number); - break; - case SCAN_BAD_UNICODE_CODE_POINT: - lex_get_error (src, _("U+%04X is not a valid Unicode code point"), - (int) token->token.number); - break; - case SCAN_EXPECTED_QUOTE: - lex_get_error (src, _("Unterminated string constant")); - break; - case SCAN_EXPECTED_EXPONENT: - lex_get_error (src, _("Missing exponent following `%s'"), - token->token.string.string); - break; - case SCAN_UNEXPECTED_CHAR: - { - char c_name[16]; - lex_get_error (src, _("Bad character %s in input"), - uc_name (token->token.number, c_name)); - } + char *msg = scan_token_to_error (&token->token); + lex_get_error (src, msg); + free (msg); break; case SCAN_SKIP: diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c index 86ebb7d006..0c92210540 100644 --- a/src/language/lexer/scan.c +++ b/src/language/lexer/scan.c @@ -25,11 +25,15 @@ #include "language/lexer/token.h" #include "libpspp/assertion.h" #include "libpspp/cast.h" +#include "libpspp/i18n.h" #include "gl/c-ctype.h" #include "gl/c-strtod.h" #include "gl/xmemdup0.h" +#include "gettext.h" +#define _(msgid) gettext (msgid) + enum { S_START, @@ -421,6 +425,48 @@ is_scan_type (enum scan_type type) return type > SCAN_FIRST && type < SCAN_LAST; } +/* If TOKEN has the type of a scan error (a subset of those identified by + is_scan_type()), returns an appropriate error message. Otherwise, returns + NULL. */ +char * +scan_token_to_error (const struct token *token) +{ + switch (token->type) + { + case SCAN_BAD_HEX_LENGTH: + return xasprintf (_("String of hex digits has %d characters, which " + "is not a multiple of 2."), (int) token->number); + + case SCAN_BAD_HEX_DIGIT: + case SCAN_BAD_UNICODE_DIGIT: + return xasprintf (_("`%c' is not a valid hex digit."), + (int) token->number); + + case SCAN_BAD_UNICODE_LENGTH: + return xasprintf (_("Unicode string contains %d bytes, which is " + "not in the valid range of 1 to 8 bytes."), + (int) token->number); + + case SCAN_BAD_UNICODE_CODE_POINT: + return xasprintf (_("U+%04X is not a valid Unicode code point."), + (int) token->number); + + case SCAN_EXPECTED_QUOTE: + return xasprintf (_("Unterminated string constant.")); + + case SCAN_EXPECTED_EXPONENT: + return xasprintf (_("Missing exponent following `%s'."), + token->string.string); + + case SCAN_UNEXPECTED_CHAR: + char c_name[16]; + return xasprintf (_("Bad character %s in input."), + uc_name (token->number, c_name)); + } + + return NULL; +} + static enum scan_result scan_start__ (struct scanner *scanner, enum segment_type type, struct substring s, struct token *token) diff --git a/src/language/lexer/scan.h b/src/language/lexer/scan.h index 866321b0c8..61bfc5b553 100644 --- a/src/language/lexer/scan.h +++ b/src/language/lexer/scan.h @@ -66,6 +66,8 @@ enum scan_type const char *scan_type_to_string (enum scan_type); bool is_scan_type (enum scan_type); +char *scan_token_to_error (const struct token *); + /* A scanner. Opaque. */ struct scanner { -- 2.30.2