dnl PSPP - a program for statistical analysis.
dnl Copyright (C) 2017 Free Software Foundation, Inc.
-dnl
+dnl
dnl This program is free software: you can redistribute it and/or modify
dnl it under the terms of the GNU General Public License as published by
dnl the Free Software Foundation, either version 3 of the License, or
dnl (at your option) any later version.
-dnl
+dnl
dnl This program is distributed in the hope that it will be useful,
dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
dnl GNU General Public License for more details.
-dnl
+dnl
dnl You should have received a copy of the GNU General Public License
dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
dnl
'foo
'very long unterminated string that be ellipsized in its error message
1e .x
-`
+^
�
])
AT_CHECK([pspp -O format=csv lexer.sps], [1], [dnl
-"lexer.sps:1.1-1.6: error: Syntax error at `x'123'': String of hex digits has 3 characters, which is not a multiple of 2."
-
-lexer.sps:2.1-2.5: error: Syntax error at `x'1x'': `x' is not a valid hex digit.
+"lexer.sps:1.1-1.6: error: String of hex digits has 3 characters, which is not a multiple of 2.
+ 1 | x'123'
+ | ^~~~~~"
-"lexer.sps:3.1-3.3: error: Syntax error at `u''': Unicode string contains 0 bytes, which is not in the valid range of 1 to 8 bytes."
+"lexer.sps:2.1-2.5: error: `x' is not a valid hex digit.
+ 2 | x'1x'
+ | ^~~~~"
-"lexer.sps:4.1-4.12: error: Syntax error at `u'012345678'': Unicode string contains 9 bytes, which is not in the valid range of 1 to 8 bytes."
+"lexer.sps:3.1-3.3: error: Unicode string contains 0 bytes, which is not in the valid range of 1 to 8 bytes.
+ 3 | u''
+ | ^~~"
-lexer.sps:5.1-5.7: error: Syntax error at `u'd800'': U+D800 is not a valid Unicode code point.
+"lexer.sps:4.1-4.12: error: Unicode string contains 9 bytes, which is not in the valid range of 1 to 8 bytes.
+ 4 | u'012345678'
+ | ^~~~~~~~~~~~"
-lexer.sps:6.1-6.9: error: Syntax error at `u'110000'': U+110000 is not a valid Unicode code point.
+"lexer.sps:5.1-5.7: error: U+D800 is not a valid Unicode code point.
+ 5 | u'd800'
+ | ^~~~~~~"
-lexer.sps:7.1-7.4: error: Syntax error at `'foo': Unterminated string constant.
+"lexer.sps:6.1-6.9: error: U+110000 is not a valid Unicode code point.
+ 6 | u'110000'
+ | ^~~~~~~~~"
-lexer.sps:8.1-8.70: error: Syntax error at `'very long unterminated string that be ellipsized in its err...': Unterminated string constant.
+"lexer.sps:7.1-7.4: error: Unterminated string constant.
+ 7 | 'foo
+ | ^~~~"
-lexer.sps:9.1-9.2: error: Syntax error at `1e': Missing exponent following `1e'.
+"lexer.sps:8.1-8.70: error: Unterminated string constant.
+ 8 | 'very long unterminated string that be ellipsized in its error message
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-lexer.sps:9.4: error: Syntax error at `.': Unexpected `.' in middle of command.
+"lexer.sps:9.1-9.2: error: Missing exponent following `1e'.
+ 9 | 1e .x
+ | ^~"
-lexer.sps:9: error: Unknown command `x'.
+"lexer.sps:9.4: error: Syntax error expecting command name.
+ 9 | 1e .x
+ | ^"
-lexer.sps:10.1: error: Syntax error at ``': Bad character ``' in input.
+"lexer.sps:10.1: error: Bad character `^' in input.
+ 10 | ^
+ | ^"
-lexer.sps:11.1: error: Syntax error at `�': Bad character U+FFFD in input.
+"lexer.sps:11.1-11.2: error: Bad character U+FFFD in input.
+ 11 | �
+ | ^~"
])
AT_CLEANUP
printf "datA dist list notable file='input.txt'/a b c.
lis|.\0" > lexer.sps
-# We sort the output into a predictable order because the lexer finds
-# and reports null bytes as soon as it reads them into its input
-# buffer, as opposed to when it encounters them during tokenization.
-# This also means that null bytes might be reported as part of one
-# command or another or none, hence removing the LIST: prefix.
-AT_CHECK([pspp -O format=csv lexer.sps > lexer.csv], [1])
-AT_CHECK([sed '/^$/d
-s/LIST: //' lexer.csv | sort], [0], [dnl
-lexer.sps: error: Bad character U+0000 in input.
-lexer.sps:1: error: Unknown command `datA dist'.
-lexer.sps:2: error: LIST is allowed only after the active dataset has been defined.
+AT_CHECK([pspp -O format=csv lexer.sps], [1], [dnl
+"lexer.sps:1.1-1.9: error: Unknown command `datA dist'.
+ 1 | datA dist list notable file='input.txt'/a b c.
+ | ^~~~~~~~~"
+
+"lexer.sps:2.1-2.3: error: LIST: LIST is allowed only after the active dataset has been defined.
+ 2 | lis|."
+
+"lexer.sps:2.6: error: LIST: Bad character U+0000 in input.
+ 2 | lis|."
+])
+AT_CLEANUP
+
+
+
+
+# Bug #54684
+AT_SETUP([lexer crash due to overflow])
+printf "DATA LIST/5555555555555555." > lexer.sps
+
+AT_CHECK([pspp -O format=csv lexer.sps], [1], [dnl
+"lexer.sps:1.11-1.26: error: DATA LIST: Syntax error expecting integer between 1 and 2147483647.
+ 1 | DATA LIST/5555555555555555.
+ | ^~~~~~~~~~~~~~~~"
])
+
AT_CLEANUP