From be14f24529e64b7d1dad2b148b4d254da38160de Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 5 Jul 2021 15:15:45 -0700 Subject: [PATCH] segment: Make negative numbers into single segments. --- src/language/lexer/scan.c | 20 +++--- src/language/lexer/segment.c | 29 ++++++-- tests/language/lexer/scan.at | 113 +++++++++++++++++++++++++++++++- tests/language/lexer/segment.at | 79 +++++++++++++++++++++- 4 files changed, 223 insertions(+), 18 deletions(-) diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c index 3b9e3c5a22..32f5c9ae16 100644 --- a/src/language/lexer/scan.c +++ b/src/language/lexer/scan.c @@ -369,11 +369,10 @@ scan_punct__ (struct substring s) : scan_punct2__ (s.string[0], s.string[1])); } -static double -scan_number__ (struct substring s) +static void +scan_number__ (struct substring s, struct token *token) { char buf[128]; - double number; char *p; if (s.length < sizeof buf) @@ -385,12 +384,15 @@ scan_number__ (struct substring s) else p = xmemdup0 (s.string, s.length); - number = c_strtod (p, NULL); + bool negative = *p == '-'; + double x = c_strtod (p + negative, NULL); + *token = (struct token) { + .type = negative ? T_NEG_NUM : T_POS_NUM, + .number = negative ? -x : x, + }; if (p != buf) free (p); - - return number; } static enum scan_result @@ -476,8 +478,7 @@ scan_start__ (struct scanner *scanner, enum segment_type type, switch (type) { case SEG_NUMBER: - token->type = T_POS_NUM; - token->number = scan_number__ (s); + scan_number__ (s, token); return SCAN_DONE; case SEG_QUOTED_STRING: @@ -571,8 +572,9 @@ scan_dash__ (enum segment_type type, struct substring s, struct token *token) return SCAN_MORE; case SEG_NUMBER: + scan_number__ (s, token); token->type = T_NEG_NUM; - token->number = -scan_number__ (s); + token->number = -token->number; return SCAN_DONE; default: diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c index 9c2c9b11e8..5c298a781d 100644 --- a/src/language/lexer/segment.c +++ b/src/language/lexer/segment.c @@ -291,13 +291,11 @@ skip_digits (const char *input, size_t n, bool eof, int ofs) static int segmenter_parse_number__ (struct segmenter *s, const char *input, size_t n, - bool eof, enum segment_type *type) + bool eof, enum segment_type *type, int ofs) { - int ofs; - assert (s->state == S_GENERAL); - ofs = skip_digits (input, n, eof, 0); + ofs = skip_digits (input, n, eof, ofs); if (ofs < 0) return -1; @@ -939,7 +937,24 @@ segmenter_parse_mid_command__ (struct segmenter *s, *type = SEG_PUNCT; return 1; - case '(': case ')': case ',': case '=': case '-': + case '-': + ofs = skip_spaces (input, n, eof, 1); + if (ofs < 0) + return -1; + else if (c_isdigit (input[ofs])) + return segmenter_parse_number__ (s, input, n, eof, type, ofs); + else if (input[ofs] == '.') + { + if (ofs + 1 >= n) + { + if (!eof) + return -1; + } + else if (c_isdigit (input[ofs + 1])) + return segmenter_parse_number__ (s, input, n, eof, type, ofs); + } + /* Fall through. */ + case '(': case ')': case ',': case '=': case '[': case ']': case '&': case '|': case '+': *type = SEG_PUNCT; s->substate = 0; @@ -971,7 +986,7 @@ segmenter_parse_mid_command__ (struct segmenter *s, return -1; } else if (c_isdigit (input[1])) - return segmenter_parse_number__ (s, input, n, eof, type); + return segmenter_parse_number__ (s, input, n, eof, type, 0); int eol = at_end_of_line (input, n, eof, 1); if (eol < 0) @@ -988,7 +1003,7 @@ segmenter_parse_mid_command__ (struct segmenter *s, case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - return segmenter_parse_number__ (s, input, n, eof, type); + return segmenter_parse_number__ (s, input, n, eof, type, 0); case 'u': case 'U': return segmenter_maybe_parse_string__ (SEG_UNICODE_STRING, diff --git a/tests/language/lexer/scan.at b/tests/language/lexer/scan.at index 146b891e1c..fe53f37a8f 100644 --- a/tests/language/lexer/scan.at +++ b/tests/language/lexer/scan.at @@ -274,7 +274,7 @@ STOP PSPP_CHECK_SCAN([-i]) AT_CLEANUP -AT_SETUP([numbers]) +AT_SETUP([positive numbers]) AT_KEYWORDS([scan]) AT_DATA([input], [dnl 0 1 01 001. 1. @@ -359,6 +359,117 @@ STOP PSPP_CHECK_SCAN([-i]) AT_CLEANUP +AT_SETUP([negative numbers]) +AT_KEYWORDS([scan]) +AT_DATA([input-base], [dnl + -0 -1 -01 -001. -1. + -123. /* comment 1 */ /* comment 2 */ + -.1 -0.1 -00.1 -00.10 + -5e1 -6E-1 -7e+1 -6E+01 -6e-03 + -.3E1 -.4e-1 -.5E+1 -.6e+01 -.7E-03 + -1.23e1 -45.6E-1 -78.9e+1 -99.9E+01 -11.2e-03 + -/**/1 + -. -1e -e1 -1e+ -1e- -1. +]) +AT_DATA([expout-base0], [dnl +SKIP +NEG_NUM +SKIP +NEG_NUM -1 +SKIP +NEG_NUM -1 +SKIP +NEG_NUM -1 +SKIP +NEG_NUM -1 +ENDCMD +SKIP +SKIP +NEG_NUM -123 +ENDCMD +SKIP +SKIP +SKIP +SKIP +SKIP +SKIP +NEG_NUM -0.1 +SKIP +NEG_NUM -0.1 +SKIP +NEG_NUM -0.1 +SKIP +NEG_NUM -0.1 +SKIP +SKIP +NEG_NUM -50 +SKIP +NEG_NUM -0.6 +SKIP +NEG_NUM -70 +SKIP +NEG_NUM -60 +SKIP +NEG_NUM -0.006 +SKIP +SKIP +NEG_NUM -3 +SKIP +NEG_NUM -0.04 +SKIP +NEG_NUM -5 +SKIP +NEG_NUM -6 +SKIP +NEG_NUM -0.0007 +SKIP +SKIP +NEG_NUM -12.3 +SKIP +NEG_NUM -4.56 +SKIP +NEG_NUM -789 +SKIP +NEG_NUM -999 +SKIP +NEG_NUM -0.0112 +SKIP +SKIP +NEG_NUM -1 +SKIP +SKIP +DASH ++SKIP +MACRO_PUNCT "." +SKIP +EXPECTED_EXPONENT "-1e" +SKIP +DASH ++SKIP +ID "e1" +SKIP +EXPECTED_EXPONENT "-1e+" +SKIP +EXPECTED_EXPONENT "-1e-" +SKIP +NEG_NUM -1 +ENDCMD +-SKIP +STOP +]) + +AS_BOX([without extra spaces]) +cp input-base input +sed '/^+/d' < expout-base0 > expout-base +PSPP_CHECK_SCAN([-i]) + +AS_BOX([with extra spaces]) +sed 's/ -/ - /g' < input-base > input +sed 's/EXPONENT "-/EXPONENT "- / + s/^+//' < expout-base0 > expout-base +PSPP_CHECK_SCAN([-i]) +AT_CLEANUP + AT_SETUP([strings]) AT_KEYWORDS([scan]) AT_DATA([input], [dnl diff --git a/tests/language/lexer/segment.at b/tests/language/lexer/segment.at index 86a0941194..5d9440fe21 100644 --- a/tests/language/lexer/segment.at +++ b/tests/language/lexer/segment.at @@ -355,7 +355,7 @@ end PSPP_CHECK_SEGMENT([-i]) AT_CLEANUP -AT_SETUP([numbers]) +AT_SETUP([positive numbers]) AT_KEYWORDS([segment]) AT_DATA([input], [dnl 0 1 01 001. 1. @@ -423,6 +423,83 @@ end ]) PSPP_CHECK_SEGMENT([-i]) AT_CLEANUP + +AT_SETUP([negative numbers]) +AT_KEYWORDS([segment]) +AT_DATA([input], [dnl + -0 -1 -01 -001. -1. + -123. /* comment 1 */ /* comment 2 */ + -.1 -0.1 -00.1 -00.10 + -5e1 -6E-1 -7e+1 -6E+01 -6e-03 + -.3E1 -.4e-1 -.5E+1 -.6e+01 -.7E-03 + -1.23e1 -45.6E-1 -78.9e+1 -99.9E+01 -11.2e-03 + -/**/1 + -. -1e -e1 -1e+ -1e- -1. +]) +AT_DATA([expout-base], [dnl +spaces _ +number -0 space +number -1 space +number -01 space +number -001. space +number -1 +end_command . +newline \n (first) + space +number -123 +end_command . space +comment /*_comment_1_*/ space +comment /*_comment_2_*/ +newline \n (first) + space +number -.1 space +number -0.1 space +number -00.1 space +number -00.10 +newline \n (later) + space +number -5e1 space +number -6E-1 space +number -7e+1 space +number -6E+01 space +number -6e-03 +newline \n (later) + space +number -.3E1 space +number -.4e-1 space +number -.5E+1 space +number -.6e+01 space +number -.7E-03 +newline \n (later) + space +number -1.23e1 space +number -45.6E-1 space +number -78.9e+1 space +number -99.9E+01 space +number -11.2e-03 +newline \n (later) + space +punct - +comment /**/ +number 1 +newline \n (later) + space +punct - +punct . space +expected_exponent -1e space +punct - +identifier e1 space +expected_exponent -1e+ space +expected_exponent -1e- space +number -1 +end_command . +-newline \n (first) +- +end +]) +PSPP_CHECK_SEGMENT([-i]) +AT_CLEANUP + AT_SETUP([strings]) AT_KEYWORDS([segment]) -- 2.30.2