From fc6fccc92a47c4bb4e03bd39600234da7cf7ba55 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 2 Apr 2016 17:53:52 -0700 Subject: [PATCH] lexer: Treat null characters in input as spaces. In some circumstances nulls could confuse the lexer and cause crashes. Thanks to John Darrington for reporting the problem. Bug #47602. --- src/language/lexer/lexer.c | 32 +++++++++++++++++++++++--------- tests/language/lexer/lexer.at | 14 ++++++++++++++ 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index ea81f19559..1f455119f0 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -1174,19 +1174,33 @@ lex_source_read__ (struct lex_source *src) { do { - size_t head_ofs; - size_t space; - size_t n; - lex_source_expand__ (src); - head_ofs = src->head - src->tail; - space = src->allocated - head_ofs; - n = src->reader->class->read (src->reader, &src->buffer[head_ofs], - space, - segmenter_get_prompt (&src->segmenter)); + size_t head_ofs = src->head - src->tail; + size_t space = src->allocated - head_ofs; + enum prompt_style prompt = segmenter_get_prompt (&src->segmenter); + size_t n = src->reader->class->read (src->reader, &src->buffer[head_ofs], + space, prompt); assert (n <= space); + for (char *p = &src->buffer[head_ofs]; p < &src->buffer[head_ofs + n]; + p++) + if (*p == '\0') + { + struct msg m; + m.category = MSG_C_SYNTAX; + m.severity = MSG_S_ERROR; + m.file_name = src->reader->file_name; + m.first_line = 0; + m.last_line = 0; + m.first_column = 0; + m.last_column = 0; + m.text = xstrdup ("Bad character U+0000 in input."); + msg_emit (&m); + + *p = ' '; + } + if (n == 0) { /* End of input. diff --git a/tests/language/lexer/lexer.at b/tests/language/lexer/lexer.at index 08c146447f..af6de0c904 100644 --- a/tests/language/lexer/lexer.at +++ b/tests/language/lexer/lexer.at @@ -61,3 +61,17 @@ lexer.sps:10.1: error: Syntax error at ``': Bad character ``' in input. lexer.sps:11.1: error: Syntax error at `�': Bad character U+FFFD in input. ]) AT_CLEANUP + +# Bug #47602. +AT_SETUP([lexer crash due to null byte]) +# Intentionally leave out the new-line and add a null byte: +printf "datA dist list notable file='input.txt'/a b c. +lis|.\0" > lexer.sps +AT_CHECK([pspp -O format=csv lexer.sps], [1], [dnl +lexer.sps: error: Bad character U+0000 in input. + +lexer.sps:1: error: Unknown command `datA dist'. + +lexer.sps:2: error: LIST: LIST is allowed only after the active dataset has been defined. +]) +AT_CLEANUP -- 2.30.2