more tests
authorBen Pfaff <blp@cs.stanford.edu>
Thu, 11 Jul 2024 14:59:00 +0000 (07:59 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Thu, 11 Jul 2024 14:59:00 +0000 (07:59 -0700)
15 files changed:
rust/Cargo.lock
rust/Cargo.toml
rust/fuzz/.gitignore [new file with mode: 0644]
rust/fuzz/Cargo.lock [new file with mode: 0644]
rust/fuzz/Cargo.toml [new file with mode: 0644]
rust/fuzz/fuzz_targets/fuzz_target_1.rs [new file with mode: 0644]
rust/fuzz/fuzz_targets/segment.rs [new file with mode: 0644]
rust/src/identifier.rs
rust/src/lex/segment.rs
src/language/lexer/segment.c
src/language/lexer/segment.h
src/libpspp/prompt.c
src/libpspp/prompt.h
tests/language/lexer/segment-test.c
tests/language/lexer/segment.at

index 4569faca60f24c14460a69d721866b8827f3cecb..c8fc850cf8f999270db1a388b766d8fc452e0b7f 100644 (file)
@@ -144,6 +144,12 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "diff"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
+
 [[package]]
 name = "encoding_rs"
 version = "0.8.32"
@@ -502,6 +508,7 @@ dependencies = [
  "bitflags 2.5.0",
  "chrono",
  "clap",
+ "diff",
  "encoding_rs",
  "finl_unicode",
  "flate2",
index 371ac6dff22f7835d6da8644b8dd4b95d261b02b..f73563809448b8f0bcc16f9eee5bd84e83d52ebf 100644 (file)
@@ -42,3 +42,6 @@ path = "src/lib.rs"
 name = "sack"
 path = "tests/sack.rs"
 harness = false
+
+[dev-dependencies]
+diff = "0.1.13"
diff --git a/rust/fuzz/.gitignore b/rust/fuzz/.gitignore
new file mode 100644 (file)
index 0000000..1a45eee
--- /dev/null
@@ -0,0 +1,4 @@
+target
+corpus
+artifacts
+coverage
diff --git a/rust/fuzz/Cargo.lock b/rust/fuzz/Cargo.lock
new file mode 100644 (file)
index 0000000..c840c28
--- /dev/null
@@ -0,0 +1,872 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "adler"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
+
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391"
+dependencies = [
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19"
+dependencies = [
+ "anstyle",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
+
+[[package]]
+name = "arbitrary"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
+
+[[package]]
+name = "bitflags"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
+
+[[package]]
+name = "bumpalo"
+version = "3.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
+
+[[package]]
+name = "cc"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2"
+dependencies = [
+ "jobserver",
+ "libc",
+ "once_cell",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "chrono"
+version = "0.4.38"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "wasm-bindgen",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84b3edb18336f4df585bc9aa31dd99c036dfa5dc5e9a2939a722a188f3a8970d"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1c09dd5ada6c6c78075d6fd0da3f90d8080651e2d6cc8eb2f1aaa4034ced708"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+ "terminal_size",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
+
+[[package]]
+name = "crc32fast"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "encoding_rs"
+version = "0.8.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+
+[[package]]
+name = "errno"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "finl_unicode"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6"
+
+[[package]]
+name = "flate2"
+version = "1.0.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "float_next_after"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
+
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "hexplay"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0962bea6731e28b5a443ba4aa00fe3e4fe7555dadf12012435efb738eeac5898"
+dependencies = [
+ "atty",
+ "termcolor",
+]
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
+
+[[package]]
+name = "jobserver"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.155"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+
+[[package]]
+name = "libfuzzer-sys"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7"
+dependencies = [
+ "arbitrary",
+ "cc",
+ "once_cell",
+]
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
+
+[[package]]
+name = "log"
+version = "0.4.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08"
+dependencies = [
+ "adler",
+]
+
+[[package]]
+name = "num"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
+dependencies = [
+ "num-bigint",
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-derive"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
+dependencies = [
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
+
+[[package]]
+name = "ordered-float"
+version = "3.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "pspp"
+version = "1.0.0"
+dependencies = [
+ "anyhow",
+ "bitflags",
+ "chrono",
+ "clap",
+ "encoding_rs",
+ "finl_unicode",
+ "flate2",
+ "float_next_after",
+ "hexplay",
+ "indexmap",
+ "lazy_static",
+ "libc",
+ "num",
+ "num-derive",
+ "num-traits",
+ "ordered-float",
+ "thiserror",
+ "unicase",
+ "utf8-decode",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "pspp-fuzz"
+version = "0.0.0"
+dependencies = [
+ "libfuzzer-sys",
+ "pspp",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rustix"
+version = "0.38.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "201fcda3845c23e8212cd466bfebf0bd20694490fc0356ae8e428e0824a915a6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "termcolor"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adc4587ead41bf016f11af03e55a624c06568b5a19db4e90fde573d805074f83"
+dependencies = [
+ "wincolor",
+]
+
+[[package]]
+name = "terminal_size"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7"
+dependencies = [
+ "rustix",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.61"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.61"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "unicase"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89"
+dependencies = [
+ "version_check",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+
+[[package]]
+name = "utf8-decode"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca61eb27fa339aa08826a29f03e87b99b4d8f0fc2255306fd266bb1b6a9de498"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "wincolor"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eeb06499a3a4d44302791052df005d5232b927ed1a9658146d842165c4de7767"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+dependencies = [
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
diff --git a/rust/fuzz/Cargo.toml b/rust/fuzz/Cargo.toml
new file mode 100644 (file)
index 0000000..8b44789
--- /dev/null
@@ -0,0 +1,28 @@
+[package]
+name = "pspp-fuzz"
+version = "0.0.0"
+publish = false
+edition = "2021"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+libfuzzer-sys = "0.4"
+
+[dependencies.pspp]
+path = ".."
+
+[[bin]]
+name = "fuzz_target_1"
+path = "fuzz_targets/fuzz_target_1.rs"
+test = false
+doc = false
+bench = false
+
+[[bin]]
+name = "segment"
+path = "fuzz_targets/segment.rs"
+test = false
+doc = false
+bench = false
diff --git a/rust/fuzz/fuzz_targets/fuzz_target_1.rs b/rust/fuzz/fuzz_targets/fuzz_target_1.rs
new file mode 100644 (file)
index 0000000..43a88c1
--- /dev/null
@@ -0,0 +1,7 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+
+fuzz_target!(|data: &[u8]| {
+    // fuzzed code goes here
+});
diff --git a/rust/fuzz/fuzz_targets/segment.rs b/rust/fuzz/fuzz_targets/segment.rs
new file mode 100644 (file)
index 0000000..1e5a109
--- /dev/null
@@ -0,0 +1,18 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use pspp::lex::segment::{Segmenter, Mode, Type};
+
+fuzz_target!(|data: &[u8]| {
+    if let Ok(mut input) = std::str::from_utf8(data) {
+        let mut segmenter = Segmenter::new(Mode::Auto, false);
+        loop {
+            let (rest, type_) = segmenter.push(input, true).unwrap();
+            match type_ {
+                Type::End => break,
+                _ => (),
+            }
+            input = rest;
+        }
+    }
+});
index 8e37e64ecb3ffac0889621121228d9f3e08d10f0..3d00520535c8c3d2528af492b4c23918a134e548 100644 (file)
@@ -96,7 +96,7 @@ pub enum Error {
 
 pub fn is_reserved_word(s: &str) -> bool {
     for word in [
-        "and", "or", "not", "eq", "ge", "gt", "le", "ne", "all", "by", "to", "with",
+        "and", "or", "not", "eq", "ge", "gt", "le", "lt", "ne", "all", "by", "to", "with",
     ] {
         if s.eq_ignore_ascii_case(word) {
             return true;
index 94ead036b764a75a751b68486352438c91e07412..53bc26d5eac27b8959d1161b24c565759191bd3a 100644 (file)
@@ -88,7 +88,7 @@ pub enum Type {
 }
 
 bitflags! {
-    #[derive(Copy, Clone)]
+    #[derive(Copy, Clone, Debug)]
     pub struct Substate: u8 {
         const START_OF_LINE = 1;
         const START_OF_COMMAND = 2;
@@ -400,8 +400,7 @@ fn is_end_of_line(input: &str, eof: bool) -> Result<bool, Incomplete> {
 }
 
 fn at_end_of_line(input: &str, eof: bool) -> Result<bool, Incomplete> {
-    let input = skip_spaces_and_comments(input, eof)?;
-    is_end_of_line(input, eof)
+    is_end_of_line(skip_spaces_and_comments(input, eof)?, eof)
 }
 
 fn first(s: &str) -> char {
@@ -419,17 +418,18 @@ fn get_command_name_candidates(target: &str) -> &[&'static str] {
 
 fn detect_command_name(input: &str, eof: bool) -> Result<bool, Incomplete> {
     let command_name = input
-        .split(|c: char| !(c.is_whitespace() || c.may_continue_id() || c == '-'))
+        .split(|c: char| {
+            !((c.is_whitespace() && c != '\n') || (c.may_continue_id() && c != '.') || c == '-')
+        })
         .next()
         .unwrap();
     if !eof && command_name.len() == input.len() {
         return Err(Incomplete);
     }
-    let string = command_name.strip_suffix('.').unwrap_or(command_name);
+    let command_name = command_name.trim_end_matches(|c: char| c.is_whitespace() || c == '.');
     for command in get_command_name_candidates(command_name) {
-        if let Some(m) = command_match(command, string) {
+        if let Some(m) = command_match(command, command_name) {
             if m.missing_words <= 0 {
-                println!("{command}");
                 return Ok(true);
             }
         }
@@ -443,11 +443,11 @@ impl Segmenter {
         input: &'a str,
         eof: bool,
     ) -> Result<(&'a str, Type), Incomplete> {
-        let (c, rest) = take(input, eof)?;
-        if c == Some('#') {
+        if let (Some('#'), rest) = take(input, eof)? {
             if let (Some('!'), rest) = take(rest, eof)? {
+                let rest = self.parse_full_line(rest, eof)?;
                 self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok((self.parse_full_line(rest, eof)?, Type::Shbang));
+                return Ok((rest, Type::Shbang));
             }
         }
 
@@ -477,7 +477,7 @@ impl Segmenter {
             unreachable!()
         };
         match c {
-            '+' if is_start_of_string(skip_spaces_and_comments(input, eof)?, eof)? => {
+            '+' if is_start_of_string(skip_spaces_and_comments(rest, eof)?, eof)? => {
                 // This  `+` is punctuation that may separate pieces of a string.
                 self.state = (State::General, Substate::empty());
                 return Ok((rest, Type::Punct));
@@ -486,14 +486,16 @@ impl Segmenter {
                 self.state = (State::General, Substate::START_OF_COMMAND);
                 return Ok((rest, Type::StartCommand));
             }
-            c if c.is_whitespace() => {
-                if at_end_of_line(rest, eof)? {
+            _ if c.is_whitespace() => {
+                if at_end_of_line(input, eof)? {
                     self.state = (State::General, Substate::START_OF_COMMAND);
                     return Ok((input, Type::SeparateCommands));
                 }
             }
             _ => {
-                if self.at_command_start(input, eof)? {
+                if self.at_command_start(input, eof)?
+                    && !self.state.1.contains(Substate::START_OF_COMMAND)
+                {
                     self.state = (State::General, Substate::START_OF_COMMAND);
                     return Ok((input, Type::StartCommand));
                 }
@@ -556,23 +558,21 @@ impl Segmenter {
                     self.state.0 = State::Comment1;
                     self.parse_comment_1(input, eof)
                 } else {
-                    self.parse_digraph(&['*'], input, eof)
+                    self.parse_digraph(&['*'], rest, eof)
                 }
             }
             '<' => self.parse_digraph(&['=', '>'], rest, eof),
             '>' => self.parse_digraph(&['='], rest, eof),
             '~' => self.parse_digraph(&['='], rest, eof),
+            '.' if at_end_of_line(rest, eof)? => {
+                self.state.1 = Substate::START_OF_COMMAND;
+                Ok((rest, Type::EndCommand))
+            }
             '.' => match take(rest, eof)? {
                 (Some(c), _) if c.is_ascii_digit() => self.parse_number(input, eof),
-                (Some('\r' | '\n'), _) if is_end_of_line(rest, eof)? => {
-                    self.state.1 = Substate::START_OF_COMMAND;
-                    Ok((rest, Type::EndCommand))
-                }
                 _ => Ok((rest, Type::Punct)),
             },
-            '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => {
-                self.parse_number(input, eof)
-            }
+            '0'..='9' => self.parse_number(input, eof),
             'u' | 'U' => self.maybe_parse_string(Type::UnicodeString, (input, rest), eof),
             'x' | 'X' => self.maybe_parse_string(Type::HexString, (input, rest), eof),
             '\'' | '"' => self.parse_string(Type::QuotedString, c, rest, eof),
@@ -603,21 +603,19 @@ impl Segmenter {
         mut input: &'a str,
         eof: bool,
     ) -> Result<(&'a str, Type), Incomplete> {
-        loop {
-            let (Some(c), rest) = take(input, eof)? else {
-                break;
-            };
-            if c == quote {
-                if take(rest, eof)?.0 == Some(quote) {
-                    input = rest;
-                    continue;
-                } else {
-                    return Ok((rest, type_));
+        while let (Some(c), rest) = take(input, eof)? {
+            match c {
+                _ if c == quote => {
+                    let (c, rest2) = take(rest, eof)?;
+                    if c != Some(quote) {
+                        self.state.1 = Substate::empty();
+                        return Ok((rest, type_));
+                    }
+                    input = rest2;
                 }
-            } else if is_end_of_line(input, eof)? {
-                break;
+                '\r' | '\n' if is_end_of_line(input, eof)? => break,
+                _ => input = rest,
             }
-            input = rest;
         }
         self.state.1 = Substate::empty();
         Ok((input, Type::ExpectedQuote))
@@ -751,6 +749,7 @@ impl Segmenter {
         eof: bool,
     ) -> Result<(&'a str, Type), Incomplete> {
         let (c, rest) = take(input, eof)?;
+        self.state.1 = Substate::empty();
         Ok((
             match c {
                 Some(c) if seconds.contains(&c) => rest,
@@ -780,6 +779,7 @@ impl Segmenter {
             }
             input = rest2;
         }
+        self.state.1 = Substate::empty();
         Ok((input, Type::Number))
     }
     fn parse_comment_1<'a>(
@@ -1076,15 +1076,12 @@ impl Segmenter {
                     State::General,
                     Substate::START_OF_COMMAND | Substate::START_OF_LINE,
                 );
-                return self.push(input, eof)
+                return self.push(input, eof);
             }
         }
-        return Ok((rest, Type::DoRepeatCommand))
+        return Ok((rest, Type::DoRepeatCommand));
     }
-    fn parse_do_repeat_4<'a>(
-        &mut self,
-        input: &'a str,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result<(&'a str, Type), Incomplete> {
         self.state.0 = State::DoRepeat3;
         Ok((input, Type::DoRepeatOverflow))
     }
@@ -1203,19 +1200,19 @@ impl Segmenter {
                 // Line starts with some content followed by `!ENDDEFINE`.
                 Ok((rest, Type::MacroBody))
             }
-        } else if line.is_empty() {
-            // Entirely blank line.
-            self.parse_define_6(input, eof)
         } else {
             // No `!ENDDEFINE`.  We have a full line of macro body.
             //
-            // The line might be blank, whether completely empty or just spaces
-            // and comments.  That's OK: we need to report blank lines because
-            // they can have significance.
+            // If the first line of the macro body is blank, we just report it
+            // as spaces, or not at all if there are no spaces, because it's not
+            // significant.
             //
-            // However, if the first line of the macro body is blank, we just
-            // report it as spaces because it's not significant.
+            // However, if it's a later line, we need to report it because blank
+            // lines can have significance.
             let type_ = if self.state.0 == State::Define4 && line.trim_start().is_empty() {
+                if line.is_empty() {
+                    return self.parse_define_6(input, eof);
+                }
                 Type::Spaces
             } else {
                 Type::MacroBody
@@ -1278,7 +1275,7 @@ impl Segmenter {
                 _ => return false,
             }
         }
-        endcmd
+        true
     }
     fn parse_begin_data_3<'a>(
         &mut self,
@@ -1321,16 +1318,58 @@ fn strip_prefix_ignore_ascii_case<'a>(line: &'a str, pattern: &str) -> Option<&'
 
 #[cfg(test)]
 mod test {
+    use crate::prompt::PromptStyle;
+
     use super::{Mode, Segmenter, Type};
 
-    /*
-        fn check_segmentation(mut input: &str, output: &[(Type, &str)]) {
-            let mut segmenter = Segmenter::new(Mode::Auto, false);
-            for (&exp_type, &exp_s) in output {
-                let (rest, type_) = segmenter.push(input, true).unwrap();
+    fn check_segmentation(
+        mut input: &str,
+        mode: Mode,
+        expect_segments: &[(Type, &str)],
+        expect_prompts: &[PromptStyle],
+    ) {
+        let mut segments = Vec::with_capacity(expect_segments.len());
+        let mut prompts = Vec::new();
+        let mut segmenter = Segmenter::new(mode, false);
+        loop {
+            let (rest, type_) = segmenter.push(input, true).unwrap();
+            let len = input.len() - rest.len();
+            let token = &input[..len];
+            segments.push((type_, token));
+            match type_ {
+                Type::End => break,
+                Type::Newline => prompts.push(segmenter.prompt()),
+                _ => (),
+            }
+            input = rest;
+        }
+
+        if &segments != expect_segments {
+            eprintln!("segments differ from expected:");
+            let difference = diff::slice(expect_segments, &segments);
+            for result in difference {
+                match result {
+                    diff::Result::Left(left) => eprintln!("-{left:?}"),
+                    diff::Result::Both(left, _right) => eprintln!(" {left:?}"),
+                    diff::Result::Right(right) => eprintln!("+{right:?}"),
+                }
+            }
+            panic!();
+        }
 
+        if &prompts != expect_prompts {
+            eprintln!("prompts differ from expected:");
+            let difference = diff::slice(expect_prompts, &prompts);
+            for result in difference {
+                match result {
+                    diff::Result::Left(left) => eprintln!("-{left:?}"),
+                    diff::Result::Both(left, _right) => eprintln!(" {left:?}"),
+                    diff::Result::Right(right) => eprintln!("+{right:?}"),
+                }
             }
-    }*/
+            panic!();
+        }
+    }
 
     fn print_segmentation(mut input: &str) {
         let mut segmenter = Segmenter::new(Mode::Auto, false);
@@ -1351,7 +1390,7 @@ mod test {
 
     #[test]
     fn test_identifiers() {
-        print_segmentation(
+        check_segmentation(
             r#"a ab abc abcd !abcd
 A AB ABC ABCD !ABCD
 aB aBC aBcD !aBcD
@@ -1364,12 +1403,115 @@ f@#_.#6
 GhIjK
 .x 1y _z
 "#,
+            Mode::Auto,
+            &[
+                (Type::Identifier, "a"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "ab"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "abc"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "abcd"),
+                (Type::Spaces, " "),
+                (Type::MacroId, "!abcd"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "A"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "AB"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "ABC"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "ABCD"),
+                (Type::Spaces, " "),
+                (Type::MacroId, "!ABCD"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "aB"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "aBC"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "aBcD"),
+                (Type::Spaces, " "),
+                (Type::MacroId, "!aBcD"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "$x"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "$y"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "$z"),
+                (Type::Spaces, " "),
+                (Type::MacroId, "!$z"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "grève"),
+                (Type::Spaces, "\u{00a0}"),
+                (Type::Identifier, "Ângstrom"),
+                (Type::Spaces, "\u{00a0}"),
+                (Type::Identifier, "poté"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "#a"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "#b"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "#c"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "##"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "#d"),
+                (Type::Spaces, " "),
+                (Type::MacroId, "!#d"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "@efg"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "@"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "@@."),
+                (Type::Spaces, " "),
+                (Type::Identifier, "@#@"),
+                (Type::Spaces, " "),
+                (Type::MacroId, "!@"),
+                (Type::Spaces, " "),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "##"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "#"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "#12345"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "#.#"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "f@#_.#6"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "GhIjK"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "."),
+                (Type::Identifier, "x"),
+                (Type::Spaces, " "),
+                (Type::Number, "1"),
+                (Type::Identifier, "y"),
+                (Type::Spaces, " "),
+                (Type::Punct, "_"),
+                (Type::Identifier, "z"),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+            ],
         );
     }
 
     #[test]
     fn test_identifiers_ending_in_dot() {
-        print_segmentation(
+        check_segmentation(
             r#"abcd. abcd.
 ABCD. ABCD.
 aBcD. aBcD. 
@@ -1388,33 +1530,311 @@ wxyz./* unterminated end of line comment
 WXYZ. /* unterminated end of line comment
 WxYz./* unterminated end of line comment 
 "#,
+            Mode::Auto,
+            &[
+                (Type::Identifier, "abcd."),
+                (Type::Spaces, " "),
+                (Type::Identifier, "abcd"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "ABCD."),
+                (Type::Spaces, " "),
+                (Type::Identifier, "ABCD"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "aBcD."),
+                (Type::Spaces, " "),
+                (Type::Identifier, "aBcD"),
+                (Type::EndCommand, "."),
+                (Type::Spaces, " "),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "$y."),
+                (Type::Spaces, " "),
+                (Type::Identifier, "$z."),
+                (Type::Spaces, " "),
+                (Type::Identifier, "あいうえお"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "#c."),
+                (Type::Spaces, " "),
+                (Type::Identifier, "#d."),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "@@."),
+                (Type::Spaces, " "),
+                (Type::Identifier, "@@..."),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "#.#"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "#abcd"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "."),
+                (Type::Spaces, " "),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "LMNOP"),
+                (Type::EndCommand, "."),
+                (Type::Spaces, " "),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "QRSTUV"),
+                (Type::EndCommand, "."),
+                (Type::Comment, "/* end of line comment */"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "qrstuv"),
+                (Type::EndCommand, "."),
+                (Type::Spaces, " "),
+                (Type::Comment, "/* end of line comment */"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "QrStUv"),
+                (Type::EndCommand, "."),
+                (Type::Comment, "/* end of line comment */"),
+                (Type::Spaces, " "),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "wxyz"),
+                (Type::EndCommand, "."),
+                (Type::Comment, "/* unterminated end of line comment"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "WXYZ"),
+                (Type::EndCommand, "."),
+                (Type::Spaces, " "),
+                (Type::Comment, "/* unterminated end of line comment"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "WxYz"),
+                (Type::EndCommand, "."),
+                (Type::Comment, "/* unterminated end of line comment "),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+            ],
         );
     }
 
     #[test]
     fn test_reserved_words() {
-        print_segmentation(
+        check_segmentation(
             r#"and or not eq ge gt le lt ne all by to with
 AND OR NOT EQ GE GT LE LT NE ALL BY TO WITH
 andx orx notx eqx gex gtx lex ltx nex allx byx tox withx
 and. with.
 "#,
+            Mode::Auto,
+            &[
+                (Type::ReservedWord, "and"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "or"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "not"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "eq"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "ge"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "gt"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "le"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "lt"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "ne"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "all"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "by"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "to"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "with"),
+                (Type::Newline, "\n"),
+                (Type::ReservedWord, "AND"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "OR"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "NOT"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "EQ"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "GE"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "GT"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "LE"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "LT"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "NE"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "ALL"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "BY"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "TO"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "WITH"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "andx"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "orx"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "notx"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "eqx"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "gex"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "gtx"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "lex"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "ltx"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "nex"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "allx"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "byx"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "tox"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "withx"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "and."),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "with"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::First,
+            ],
         );
     }
 
     #[test]
     fn test_punctuation() {
-        print_segmentation(
-            r#"~ & | = >= > <= < ~= <> ( ) , - + * / [[ ]] **
-~&|=>=><=<~=<>(),-+*/[[]]**!*
+        check_segmentation(
+            r#"~ & | = >= > <= < ~= <> ( ) , - + * / [ ] **
+~&|=>=><=<~=<>(),-+*/[]**!*
 % : ; ? _ ` { } ~ !*
 "#,
+            Mode::Auto,
+            &[
+                (Type::Punct, "~"),
+                (Type::Spaces, " "),
+                (Type::Punct, "&"),
+                (Type::Spaces, " "),
+                (Type::Punct, "|"),
+                (Type::Spaces, " "),
+                (Type::Punct, "="),
+                (Type::Spaces, " "),
+                (Type::Punct, ">="),
+                (Type::Spaces, " "),
+                (Type::Punct, ">"),
+                (Type::Spaces, " "),
+                (Type::Punct, "<="),
+                (Type::Spaces, " "),
+                (Type::Punct, "<"),
+                (Type::Spaces, " "),
+                (Type::Punct, "~="),
+                (Type::Spaces, " "),
+                (Type::Punct, "<>"),
+                (Type::Spaces, " "),
+                (Type::Punct, "("),
+                (Type::Spaces, " "),
+                (Type::Punct, ")"),
+                (Type::Spaces, " "),
+                (Type::Punct, ","),
+                (Type::Spaces, " "),
+                (Type::Punct, "-"),
+                (Type::Spaces, " "),
+                (Type::Punct, "+"),
+                (Type::Spaces, " "),
+                (Type::Punct, "*"),
+                (Type::Spaces, " "),
+                (Type::Punct, "/"),
+                (Type::Spaces, " "),
+                (Type::Punct, "["),
+                (Type::Spaces, " "),
+                (Type::Punct, "]"),
+                (Type::Spaces, " "),
+                (Type::Punct, "**"),
+                (Type::Newline, "\n"),
+                (Type::Punct, "~"),
+                (Type::Punct, "&"),
+                (Type::Punct, "|"),
+                (Type::Punct, "="),
+                (Type::Punct, ">="),
+                (Type::Punct, ">"),
+                (Type::Punct, "<="),
+                (Type::Punct, "<"),
+                (Type::Punct, "~="),
+                (Type::Punct, "<>"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Punct, ","),
+                (Type::Punct, "-"),
+                (Type::Punct, "+"),
+                (Type::Punct, "*"),
+                (Type::Punct, "/"),
+                (Type::Punct, "["),
+                (Type::Punct, "]"),
+                (Type::Punct, "**"),
+                (Type::MacroId, "!*"),
+                (Type::Newline, "\n"),
+                (Type::Punct, "%"),
+                (Type::Spaces, " "),
+                (Type::Punct, ":"),
+                (Type::Spaces, " "),
+                (Type::Punct, ";"),
+                (Type::Spaces, " "),
+                (Type::Punct, "?"),
+                (Type::Spaces, " "),
+                (Type::Punct, "_"),
+                (Type::Spaces, " "),
+                (Type::Punct, "`"),
+                (Type::Spaces, " "),
+                (Type::Punct, "{"),
+                (Type::Spaces, " "),
+                (Type::Punct, "}"),
+                (Type::Spaces, " "),
+                (Type::Punct, "~"),
+                (Type::Spaces, " "),
+                (Type::MacroId, "!*"),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::Later, PromptStyle::Later, PromptStyle::Later],
         );
     }
 
     #[test]
     fn test_positive_numbers() {
-        print_segmentation(
+        check_segmentation(
             r#"0 1 01 001. 1.
 123. /* comment 1 */ /* comment 2 */
 .1 0.1 00.1 00.10
@@ -1423,12 +1843,96 @@ and. with.
 1.23e1 45.6E-1 78.9e+1 99.9E+01 11.2e-03
 . 1e e1 1e+ 1e- 1.
 "#,
+            Mode::Auto,
+            &[
+                (Type::Number, "0"),
+                (Type::Spaces, " "),
+                (Type::Number, "1"),
+                (Type::Spaces, " "),
+                (Type::Number, "01"),
+                (Type::Spaces, " "),
+                (Type::Number, "001."),
+                (Type::Spaces, " "),
+                (Type::Number, "1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Number, "123"),
+                (Type::EndCommand, "."),
+                (Type::Spaces, " "),
+                (Type::Comment, "/* comment 1 */"),
+                (Type::Spaces, " "),
+                (Type::Comment, "/* comment 2 */"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "."),
+                (Type::Number, "1"),
+                (Type::Spaces, " "),
+                (Type::Number, "0.1"),
+                (Type::Spaces, " "),
+                (Type::Number, "00.1"),
+                (Type::Spaces, " "),
+                (Type::Number, "00.10"),
+                (Type::Newline, "\n"),
+                (Type::Number, "5e1"),
+                (Type::Spaces, " "),
+                (Type::Number, "6E-1"),
+                (Type::Spaces, " "),
+                (Type::Number, "7e+1"),
+                (Type::Spaces, " "),
+                (Type::Number, "6E+01"),
+                (Type::Spaces, " "),
+                (Type::Number, "6e-03"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "."),
+                (Type::Number, "3E1"),
+                (Type::Spaces, " "),
+                (Type::Number, ".4e-1"),
+                (Type::Spaces, " "),
+                (Type::Number, ".5E+1"),
+                (Type::Spaces, " "),
+                (Type::Number, ".6e+01"),
+                (Type::Spaces, " "),
+                (Type::Number, ".7E-03"),
+                (Type::Newline, "\n"),
+                (Type::Number, "1.23e1"),
+                (Type::Spaces, " "),
+                (Type::Number, "45.6E-1"),
+                (Type::Spaces, " "),
+                (Type::Number, "78.9e+1"),
+                (Type::Spaces, " "),
+                (Type::Number, "99.9E+01"),
+                (Type::Spaces, " "),
+                (Type::Number, "11.2e-03"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "."),
+                (Type::Spaces, " "),
+                (Type::ExpectedExponent, "1e"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "e1"),
+                (Type::Spaces, " "),
+                (Type::ExpectedExponent, "1e+"),
+                (Type::Spaces, " "),
+                (Type::ExpectedExponent, "1e-"),
+                (Type::Spaces, " "),
+                (Type::Number, "1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::First,
+            ],
         );
     }
 
     #[test]
     fn test_negative_numbers() {
-        print_segmentation(
+        check_segmentation(
             r#" -0 -1 -01 -001. -1.
  -123. /* comment 1 */ /* comment 2 */
  -.1 -0.1 -00.1 -00.10
@@ -1438,12 +1942,109 @@ and. with.
  -/**/1
  -. -1e -e1 -1e+ -1e- -1.
 "#,
+            Mode::Auto,
+            &[
+                (Type::Spaces, " "),
+                (Type::Number, "-0"),
+                (Type::Spaces, " "),
+                (Type::Number, "-1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-01"),
+                (Type::Spaces, " "),
+                (Type::Number, "-001."),
+                (Type::Spaces, " "),
+                (Type::Number, "-1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Spaces, " "),
+                (Type::Number, "-123"),
+                (Type::EndCommand, "."),
+                (Type::Spaces, " "),
+                (Type::Comment, "/* comment 1 */"),
+                (Type::Spaces, " "),
+                (Type::Comment, "/* comment 2 */"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, " "),
+                (Type::Number, "-.1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-0.1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-00.1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-00.10"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, " "),
+                (Type::Number, "-5e1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-6E-1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-7e+1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-6E+01"),
+                (Type::Spaces, " "),
+                (Type::Number, "-6e-03"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, " "),
+                (Type::Number, "-.3E1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-.4e-1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-.5E+1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-.6e+01"),
+                (Type::Spaces, " "),
+                (Type::Number, "-.7E-03"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, " "),
+                (Type::Number, "-1.23e1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-45.6E-1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-78.9e+1"),
+                (Type::Spaces, " "),
+                (Type::Number, "-99.9E+01"),
+                (Type::Spaces, " "),
+                (Type::Number, "-11.2e-03"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, " "),
+                (Type::Punct, "-"),
+                (Type::Comment, "/**/"),
+                (Type::Number, "1"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, " "),
+                (Type::Punct, "-"),
+                (Type::Punct, "."),
+                (Type::Spaces, " "),
+                (Type::ExpectedExponent, "-1e"),
+                (Type::Spaces, " "),
+                (Type::Punct, "-"),
+                (Type::Identifier, "e1"),
+                (Type::Spaces, " "),
+                (Type::ExpectedExponent, "-1e+"),
+                (Type::Spaces, " "),
+                (Type::ExpectedExponent, "-1e-"),
+                (Type::Spaces, " "),
+                (Type::Number, "-1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::First,
+            ],
         );
     }
 
     #[test]
     fn test_strings() {
-        print_segmentation(
+        check_segmentation(
             r#"'x' "y" 'abc'
 'Don''t' "Can't" 'Won''t'
 """quoted""" '"quoted"'
@@ -1457,22 +2058,116 @@ u'fffd' U"041"
 + /* also a punctuator on blank line
 - 'new command'
 "#,
+            Mode::Auto,
+            &[
+                (Type::QuotedString, "'x'"),
+                (Type::Spaces, " "),
+                (Type::QuotedString, "\"y\""),
+                (Type::Spaces, " "),
+                (Type::QuotedString, "'abc'"),
+                (Type::Newline, "\n"),
+                (Type::QuotedString, "'Don''t'"),
+                (Type::Spaces, " "),
+                (Type::QuotedString, "\"Can't\""),
+                (Type::Spaces, " "),
+                (Type::QuotedString, "'Won''t'"),
+                (Type::Newline, "\n"),
+                (Type::QuotedString, "\"\"\"quoted\"\"\""),
+                (Type::Spaces, " "),
+                (Type::QuotedString, "'\"quoted\"'"),
+                (Type::Newline, "\n"),
+                (Type::QuotedString, "''"),
+                (Type::Spaces, " "),
+                (Type::QuotedString, "\"\""),
+                (Type::Newline, "\n"),
+                (Type::ExpectedQuote, "'missing end quote"),
+                (Type::Newline, "\n"),
+                (Type::ExpectedQuote, "\"missing double quote"),
+                (Type::Newline, "\n"),
+                (Type::HexString, "x\"4142\""),
+                (Type::Spaces, " "),
+                (Type::HexString, "X'5152'"),
+                (Type::Newline, "\n"),
+                (Type::UnicodeString, "u'fffd'"),
+                (Type::Spaces, " "),
+                (Type::UnicodeString, "U\"041\""),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "+"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "new"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::Newline, "\n"),
+                (Type::Punct, "+"),
+                (Type::Spaces, " "),
+                (Type::Comment, "/* comment */"),
+                (Type::Spaces, " "),
+                (Type::QuotedString, "'string continuation'"),
+                (Type::Newline, "\n"),
+                (Type::Punct, "+"),
+                (Type::Spaces, " "),
+                (Type::Comment, "/* also a punctuator on blank line"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "-"),
+                (Type::Spaces, " "),
+                (Type::QuotedString, "'new command'"),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+            ],
         );
     }
 
     #[test]
     fn test_shbang() {
-        print_segmentation(
+        check_segmentation(
             r#"#! /usr/bin/pspp
 title my title.
 #! /usr/bin/pspp
 "#,
+            Mode::Interactive,
+            &[
+                (Type::Shbang, "#! /usr/bin/pspp"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "title"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "my"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "title"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "#"),
+                (Type::MacroId, "!"),
+                (Type::Spaces, " "),
+                (Type::Punct, "/"),
+                (Type::Identifier, "usr"),
+                (Type::Punct, "/"),
+                (Type::Identifier, "bin"),
+                (Type::Punct, "/"),
+                (Type::Identifier, "pspp"),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::First, PromptStyle::First, PromptStyle::Later],
         );
     }
 
     #[test]
     fn test_comment_command() {
-        print_segmentation(
+        check_segmentation(
             r#"* Comment commands "don't
 have to contain valid tokens.
 
@@ -1490,12 +2185,88 @@ com is ambiguous with COMPUTE.
 next command.
 
 "#,
+            Mode::Interactive,
+            &[
+                (Type::CommentCommand, "* Comment commands \"don't"),
+                (Type::Newline, "\n"),
+                (Type::CommentCommand, "have to contain valid tokens"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::CommentCommand, "** Check ambiguity with ** token"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::CommentCommand, "****************"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::CommentCommand, "comment keyword works too"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::CommentCommand, "COMM also"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "com"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "is"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "ambiguous"),
+                (Type::Spaces, " "),
+                (Type::ReservedWord, "with"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "COMPUTE"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "   "),
+                (
+                    Type::CommentCommand,
+                    "* Comment need not start at left margin",
+                ),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::CommentCommand, "* Comment ends with blank line"),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "next"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Comment,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::Comment,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+            ],
         );
     }
 
     #[test]
     fn test_document_command() {
-        print_segmentation(
+        check_segmentation(
             r#"DOCUMENT one line.
 DOC more
     than
@@ -1506,14 +2277,58 @@ first.paragraph
 isn't parsed as tokens
 
 second paragraph.
-
 "#,
+            Mode::Interactive,
+            &[
+                (Type::StartDocument, ""),
+                (Type::Document, "DOCUMENT one line."),
+                (Type::EndCommand, ""),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::StartDocument, ""),
+                (Type::Document, "DOC more"),
+                (Type::Newline, "\n"),
+                (Type::Document, "    than"),
+                (Type::Newline, "\n"),
+                (Type::Document, "        one"),
+                (Type::Newline, "\n"),
+                (Type::Document, "            line."),
+                (Type::EndCommand, ""),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::StartDocument, ""),
+                (Type::Document, "docu"),
+                (Type::Newline, "\n"),
+                (Type::Document, "first.paragraph"),
+                (Type::Newline, "\n"),
+                (Type::Document, "isn't parsed as tokens"),
+                (Type::Newline, "\n"),
+                (Type::Document, ""),
+                (Type::Newline, "\n"),
+                (Type::Document, "second paragraph."),
+                (Type::EndCommand, ""),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::First,
+                PromptStyle::Document,
+                PromptStyle::Document,
+                PromptStyle::Document,
+                PromptStyle::First,
+                PromptStyle::Document,
+                PromptStyle::Document,
+                PromptStyle::Document,
+                PromptStyle::Document,
+                PromptStyle::First,
+            ],
         );
     }
 
     #[test]
     fn test_file_label_command() {
-        print_segmentation(
+        check_segmentation(
             r#"FIL label isn't quoted.
 FILE
   lab 'is quoted'.
@@ -1521,12 +2336,51 @@ FILE /*
 /**/  lab not quoted here either
 
 "#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "FIL"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "label"),
+                (Type::Spaces, " "),
+                (Type::UnquotedString, "isn't quoted"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "FILE"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "  "),
+                (Type::Identifier, "lab"),
+                (Type::Spaces, " "),
+                (Type::QuotedString, "'is quoted'"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "FILE"),
+                (Type::Spaces, " "),
+                (Type::Comment, "/*"),
+                (Type::Newline, "\n"),
+                (Type::Comment, "/**/"),
+                (Type::Spaces, "  "),
+                (Type::Identifier, "lab"),
+                (Type::Spaces, " "),
+                (Type::UnquotedString, "not quoted here either"),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::First,
+                PromptStyle::Later,
+                PromptStyle::First,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::First,
+            ],
         );
     }
 
     #[test]
     fn test_begin_data() {
-        print_segmentation(
+        check_segmentation(
             r#"begin data.
 end data.
 
@@ -1550,14 +2404,127 @@ end data.
 begin data "xxx".
 begin data 123.
 not data
-
 "#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "begin"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "data"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "end"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "data"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "begin"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "data"),
+                (Type::EndCommand, "."),
+                (Type::Spaces, " "),
+                (Type::Comment, "/*"),
+                (Type::Newline, "\n"),
+                (Type::InlineData, "123"),
+                (Type::Newline, "\n"),
+                (Type::InlineData, "xxx"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "end"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "data"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "BEG"),
+                (Type::Spaces, " "),
+                (Type::Comment, "/**/"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "DAT"),
+                (Type::Spaces, " "),
+                (Type::Comment, "/*"),
+                (Type::Newline, "\n"),
+                (Type::InlineData, "5 6 7 /* x"),
+                (Type::Newline, "\n"),
+                (Type::InlineData, ""),
+                (Type::Newline, "\n"),
+                (Type::InlineData, "end  data"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "end"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "data"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "begin"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "data"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::InlineData, "data"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "end"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "data"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "begin"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "data"),
+                (Type::Spaces, " "),
+                (Type::QuotedString, "\"xxx\""),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "begin"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "data"),
+                (Type::Spaces, " "),
+                (Type::Number, "123"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::ReservedWord, "not"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "data"),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Data,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::Data,
+                PromptStyle::Data,
+                PromptStyle::Data,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::Data,
+                PromptStyle::Data,
+                PromptStyle::Data,
+                PromptStyle::Data,
+                PromptStyle::Later,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::Later,
+                PromptStyle::Data,
+                PromptStyle::Data,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::Later,
+            ],
         );
     }
 
     #[test]
     fn test_do_repeat() {
-        print_segmentation(
+        check_segmentation(
             r#"do repeat x=a b c
           y=d e f.
   do repeat a=1 thru 5.
   inner command.
 end repeat.
 "#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "do"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "repeat"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "x"),
+                (Type::Punct, "="),
+                (Type::Identifier, "a"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "b"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "c"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "          "),
+                (Type::Identifier, "y"),
+                (Type::Punct, "="),
+                (Type::Identifier, "d"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "e"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "f"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "  do repeat a=1 thru 5."),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "another command."),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "second command"),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "+ third command."),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "end /* x */ /* y */ repeat print."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "end"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "repeat"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "do"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "  "),
+                (Type::Identifier, "repeat"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "#a"),
+                (Type::Punct, "="),
+                (Type::Number, "1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "  inner command."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "end"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "repeat"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Later,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::Later,
+                PromptStyle::First,
+                PromptStyle::Later,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::First,
+            ],
         );
     }
 
     #[test]
     fn test_do_repeat_overflow() {
-        let mut s = String::new();
         const N: usize = 257;
-        for i in 0..N {
-            s.push_str(&format!("do repeat v{i}={i} thru {}\n", i + 5));
+        let do_repeat: Vec<String> = (0..N)
+            .map(|i| format!("do repeat v{i}={i} thru {}.\n", i + 5))
+            .collect();
+        let end_repeat: Vec<String> = (0..N)
+            .rev()
+            .map(|i| format!("end repeat. /* {i}\n"))
+            .collect();
+
+        let s: String = do_repeat
+            .iter()
+            .chain(end_repeat.iter())
+            .map(|s| s.as_str())
+            .collect();
+        let mut expect_output = vec![
+            (Type::Identifier, "do"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "repeat"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "v0"),
+            (Type::Punct, "="),
+            (Type::Number, "0"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "thru"),
+            (Type::Spaces, " "),
+            (Type::Number, "5"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+        ];
+        for i in 1..N {
+            expect_output.push((Type::DoRepeatCommand, &do_repeat[i].trim_end()));
+            if i >= 255 {
+                expect_output.push((Type::DoRepeatOverflow, ""));
+            }
+            expect_output.push((Type::Newline, "\n"));
         }
-        for i in (0..N).rev() {
-            s.push_str(&format!("end repeat. /* {i}\n"));
+        for i in 0..254 {
+            expect_output.push((Type::DoRepeatCommand, &end_repeat[i].trim_end()));
+            expect_output.push((Type::Newline, "\n"));
         }
-        print_segmentation(&s);
+        let comments: Vec<String> = (0..(N - 254)).rev().map(|i| format!("/* {i}")).collect();
+        for comment in &comments {
+            expect_output.extend([
+                (Type::Identifier, "end"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "repeat"),
+                (Type::EndCommand, "."),
+                (Type::Spaces, " "),
+                (Type::Comment, comment),
+                (Type::Newline, "\n"),
+            ]);
+        }
+        expect_output.push((Type::End, ""));
+
+        let expect_prompts: Vec<_> = (0..N * 2 - 3)
+            .map(|_| PromptStyle::DoRepeat)
+            .chain([PromptStyle::First, PromptStyle::First, PromptStyle::First])
+            .collect();
+        check_segmentation(&s, Mode::Interactive, &expect_output, &expect_prompts);
     }
 
     #[test]
-    fn test_define_simple() {
-        print_segmentation(
-            r#"define !macro1()
-var1 var2 var3 "!enddefine"
-!enddefine.
+    fn test_do_repeat_batch() {
+        check_segmentation(
+            r#"do repeat x=a b c
+          y=d e f
+do repeat a=1 thru 5
+another command
+second command
++ third command
+end /* x */ /* y */ repeat print
+end
+ repeat
+do
+  repeat #a=1
+
+  inner command
+end repeat
 "#,
+            Mode::Batch,
+            &[
+                (Type::Identifier, "do"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "repeat"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "x"),
+                (Type::Punct, "="),
+                (Type::Identifier, "a"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "b"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "c"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "          "),
+                (Type::Identifier, "y"),
+                (Type::Punct, "="),
+                (Type::Identifier, "d"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "e"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "f"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, ""),
+                (Type::DoRepeatCommand, "do repeat a=1 thru 5"),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "another command"),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "second command"),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "+ third command"),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "end /* x */ /* y */ repeat print"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "end"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "repeat"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, ""),
+                (Type::Identifier, "do"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "  "),
+                (Type::Identifier, "repeat"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "#a"),
+                (Type::Punct, "="),
+                (Type::Number, "1"),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::DoRepeatCommand, "  inner command"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "end"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "repeat"),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::DoRepeat,
+                PromptStyle::DoRepeat,
+                PromptStyle::Later,
+            ],
         );
     }
 
-    #[test]
-    fn test_define_no_newline_after_parentheses() {
-        print_segmentation(
-            r#"define !macro1() var1 var2 var3 /* !enddefine
+    mod define {
+        use crate::{
+            lex::segment::{Mode, Type},
+            prompt::PromptStyle,
+        };
+
+        use super::check_segmentation;
+
+        #[test]
+        fn test_simple() {
+            check_segmentation(
+                r#"define !macro1()
+var1 var2 var3 "!enddefine"
 !enddefine.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroBody, "var1 var2 var3 \"!enddefine\""),
+                    (Type::Newline, "\n"),
+                    (Type::MacroId, "!enddefine"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::Define, PromptStyle::Define, PromptStyle::First],
+            );
+        }
 
-    #[test]
-    fn test_define_no_newline_before_enddefine() {
-        print_segmentation(
-            r#"define !macro1()
+        #[test]
+        fn test_no_newline_after_parentheses() {
+            check_segmentation(
+                r#"define !macro1() var1 var2 var3 /* !enddefine
+!enddefine.
+"#,
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::MacroBody, " var1 var2 var3 /* !enddefine"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroId, "!enddefine"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::Define, PromptStyle::First],
+            );
+        }
+
+        #[test]
+        fn test_no_newline_before_enddefine() {
+            check_segmentation(
+                r#"define !macro1()
 var1 var2 var3!enddefine.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroBody, "var1 var2 var3"),
+                    (Type::MacroId, "!enddefine"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::Define, PromptStyle::First],
+            );
+        }
 
-    #[test]
-    fn test_define_all_on_one_line() {
-        print_segmentation(
-            r#"define !macro1()var1 var2 var3!enddefine.
+        #[test]
+        fn test_all_on_one_line() {
+            check_segmentation(
+                r#"define !macro1()var1 var2 var3!enddefine.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::MacroBody, "var1 var2 var3"),
+                    (Type::MacroId, "!enddefine"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::First],
+            );
+        }
 
-    #[test]
-    fn test_define_empty() {
-        print_segmentation(
-            r#"define !macro1()
+        #[test]
+        fn test_empty() {
+            check_segmentation(
+                r#"define !macro1()
 !enddefine.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroId, "!enddefine"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::Define, PromptStyle::First],
+            );
+        }
 
-    #[test]
-    fn test_define_blank_lines() {
-        print_segmentation(
-            r#"define !macro1()
+        #[test]
+        fn test_blank_lines() {
+            check_segmentation(
+                r#"define !macro1()
 
 
 !enddefine.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroBody, ""),
+                    (Type::Newline, "\n"),
+                    (Type::MacroBody, ""),
+                    (Type::Newline, "\n"),
+                    (Type::MacroId, "!enddefine"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[
+                    PromptStyle::Define,
+                    PromptStyle::Define,
+                    PromptStyle::Define,
+                    PromptStyle::First,
+                ],
+            );
+        }
 
-    #[test]
-    fn test_define_arguments() {
-        print_segmentation(
-            r#"define !macro1(a(), b(), c())
+        #[test]
+        fn test_arguments() {
+            check_segmentation(
+                r#"define !macro1(a(), b(), c())
 !enddefine.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::Identifier, "a"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Punct, ","),
+                    (Type::Spaces, " "),
+                    (Type::Identifier, "b"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Punct, ","),
+                    (Type::Spaces, " "),
+                    (Type::Identifier, "c"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Punct, ")"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroId, "!enddefine"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::Define, PromptStyle::First],
+            );
+        }
 
-    #[test]
-    fn test_define_multiline_arguments() {
-        print_segmentation(
-            r#"define !macro1(
+        #[test]
+        fn test_multiline_arguments() {
+            check_segmentation(
+                r#"define !macro1(
   a(), b(
   ),
   c()
 )
 !enddefine.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::Newline, "\n"),
+                    (Type::Spaces, "  "),
+                    (Type::Identifier, "a"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Punct, ","),
+                    (Type::Spaces, " "),
+                    (Type::Identifier, "b"),
+                    (Type::Punct, "("),
+                    (Type::Newline, "\n"),
+                    (Type::Spaces, "  "),
+                    (Type::Punct, ")"),
+                    (Type::Punct, ","),
+                    (Type::Newline, "\n"),
+                    (Type::Spaces, "  "),
+                    (Type::Identifier, "c"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Newline, "\n"),
+                    (Type::Punct, ")"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroId, "!enddefine"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[
+                    PromptStyle::Later,
+                    PromptStyle::Later,
+                    PromptStyle::Later,
+                    PromptStyle::Later,
+                    PromptStyle::Define,
+                    PromptStyle::First,
+                ],
+            );
+        }
 
-    #[test]
-    fn test_define_arguments_start_on_second_line() {
-        print_segmentation(
-            r#"define !macro1
+        #[test]
+        fn test_arguments_start_on_second_line() {
+            check_segmentation(
+                r#"define !macro1
 (x,y,z
 )
 content 1
 content 2
 !enddefine.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Newline, "\n"),
+                    (Type::Punct, "("),
+                    (Type::Identifier, "x"),
+                    (Type::Punct, ","),
+                    (Type::Identifier, "y"),
+                    (Type::Punct, ","),
+                    (Type::Identifier, "z"),
+                    (Type::Newline, "\n"),
+                    (Type::Punct, ")"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroBody, "content 1"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroBody, "content 2"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroId, "!enddefine"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[
+                    PromptStyle::Later,
+                    PromptStyle::Later,
+                    PromptStyle::Define,
+                    PromptStyle::Define,
+                    PromptStyle::Define,
+                    PromptStyle::First,
+                ],
+            );
+        }
 
-    #[test]
-    fn test_early_end_of_command_1() {
-        print_segmentation(
-            r#"define !macro1.
+        #[test]
+        fn test_early_end_of_command_1() {
+            check_segmentation(
+                r#"define !macro1.
 data list /x 1.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::Identifier, "data"),
+                    (Type::Spaces, " "),
+                    (Type::Identifier, "list"),
+                    (Type::Spaces, " "),
+                    (Type::Punct, "/"),
+                    (Type::Identifier, "x"),
+                    (Type::Spaces, " "),
+                    (Type::Number, "1"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::First, PromptStyle::First],
+            );
+        }
 
-    #[test]
-    fn test_early_end_of_command_2() {
-        print_segmentation(
-            r#"define !macro1
+        #[test]
+        fn test_early_end_of_command_2() {
+            check_segmentation(
+                r#"define !macro1
 x.
 data list /x 1.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Newline, "\n"),
+                    (Type::Identifier, "x"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::Identifier, "data"),
+                    (Type::Spaces, " "),
+                    (Type::Identifier, "list"),
+                    (Type::Spaces, " "),
+                    (Type::Punct, "/"),
+                    (Type::Identifier, "x"),
+                    (Type::Spaces, " "),
+                    (Type::Number, "1"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::Later, PromptStyle::First, PromptStyle::First],
+            );
+        }
 
-    #[test]
-    fn test_early_end_of_command_3() {
-        print_segmentation(
-            r#"define !macro1(.
+        #[test]
+        fn test_early_end_of_command_3() {
+            check_segmentation(
+                r#"define !macro1(.
 x.
 data list /x 1.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::Identifier, "x"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::Identifier, "data"),
+                    (Type::Spaces, " "),
+                    (Type::Identifier, "list"),
+                    (Type::Spaces, " "),
+                    (Type::Punct, "/"),
+                    (Type::Identifier, "x"),
+                    (Type::Spaces, " "),
+                    (Type::Number, "1"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::First, PromptStyle::First, PromptStyle::First],
+            );
+        }
 
-    #[test]
-    fn test_early_end_of_command_4() {
-        // Notice the command terminator at the end of the `DEFINE` command,
-        // which should not be there and ends it early.
-        print_segmentation(
-            r#"define !macro1.
+        #[test]
+        fn test_early_end_of_command_4() {
+            // Notice the command terminator at the end of the `DEFINE` command,
+            // which should not be there and ends it early.
+            check_segmentation(
+                r#"define !macro1.
 data list /x 1.
 "#,
-        );
-    }
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::Identifier, "data"),
+                    (Type::Spaces, " "),
+                    (Type::Identifier, "list"),
+                    (Type::Spaces, " "),
+                    (Type::Punct, "/"),
+                    (Type::Identifier, "x"),
+                    (Type::Spaces, " "),
+                    (Type::Number, "1"),
+                    (Type::EndCommand, "."),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::First, PromptStyle::First],
+            );
+        }
 
-    #[test]
-    fn test_define_missing_enddefine() {
-        print_segmentation(
-            r#"define !macro1()
+        #[test]
+        fn test_missing_enddefine() {
+            check_segmentation(
+                r#"define !macro1()
 content line 1
 content line 2
 "#,
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroBody, "content line 1"),
+                    (Type::Newline, "\n"),
+                    (Type::MacroBody, "content line 2"),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[
+                    PromptStyle::Define,
+                    PromptStyle::Define,
+                    PromptStyle::Define,
+                ],
+            );
+        }
+
+        #[test]
+        fn test_missing_enddefine_2() {
+            check_segmentation(
+                r#"define !macro1()
+"#,
+                Mode::Interactive,
+                &[
+                    (Type::Identifier, "define"),
+                    (Type::Spaces, " "),
+                    (Type::MacroName, "!macro1"),
+                    (Type::Punct, "("),
+                    (Type::Punct, ")"),
+                    (Type::Newline, "\n"),
+                    (Type::End, ""),
+                ],
+                &[PromptStyle::Define],
+            );
+        }
+    }
+
+    #[test]
+    fn test_batch_mode() {
+        check_segmentation(
+            r#"first command
+     another line of first command
++  second command
+third command
+
+fourth command.
+   fifth command.
+"#,
+            Mode::Batch,
+            &[
+                (Type::Identifier, "first"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "     "),
+                (Type::Identifier, "another"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "line"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "of"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "first"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "+"),
+                (Type::Spaces, "  "),
+                (Type::Identifier, "second"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, ""),
+                (Type::Identifier, "third"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "fourth"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "   "),
+                (Type::Identifier, "fifth"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+            ],
         );
     }
 
     #[test]
-    fn test_define_missing_enddefine_2() {
-        print_segmentation(
-            r#"define !macro1()
+    fn test_auto_mode() {
+        check_segmentation(
+            r#"command
+     another line of command
+2sls
++  another command
+another line of second command
+data list /x 1
+aggregate.
+print eject.
+twostep cluster
+
+
+fourth command.
+   fifth command.
 "#,
+            Mode::Auto,
+            &[
+                (Type::Identifier, "command"),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "     "),
+                (Type::Identifier, "another"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "line"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "of"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, ""),
+                (Type::Number, "2"),
+                (Type::Identifier, "sls"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, "+"),
+                (Type::Spaces, "  "),
+                (Type::Identifier, "another"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "another"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "line"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "of"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "second"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, ""),
+                (Type::Identifier, "data"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "list"),
+                (Type::Spaces, " "),
+                (Type::Punct, "/"),
+                (Type::Identifier, "x"),
+                (Type::Spaces, " "),
+                (Type::Number, "1"),
+                (Type::Newline, "\n"),
+                (Type::StartCommand, ""),
+                (Type::Identifier, "aggregate"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "print"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "eject"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "twostep"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "cluster"),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::SeparateCommands, ""),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "fourth"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "   "),
+                (Type::Identifier, "fifth"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "command"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::Later,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+                PromptStyle::First,
+            ],
         );
     }
 }
index 096383f5df222894d6d94526201bef57ee8baf3c..16e7c0fdd3d4ee6a72e5329752d13733c1ef487a 100644 (file)
@@ -1848,6 +1848,47 @@ segment_type_to_string (enum segment_type type)
     }
 }
 
+/* Returns the name of segment TYPE as a string.  The caller must not modify
+   or free the returned string.
+
+   This is useful only for debugging and testing. */
+const char *
+segment_type_to_rust_string (enum segment_type type)
+{
+  switch (type)
+    {
+    case SEG_NUMBER: return "Number";
+    case SEG_QUOTED_STRING: return "QuotedString";
+    case SEG_HEX_STRING: return "HexString";
+    case SEG_UNICODE_STRING: return "UnicodeString";
+    case SEG_UNQUOTED_STRING: return "UnquotedString";
+    case SEG_RESERVED_WORD: return "ReservedWord";
+    case SEG_IDENTIFIER: return "Identifier";
+    case SEG_PUNCT: return "Punct";
+    case SEG_SHBANG: return "Shbang";
+    case SEG_SPACES: return "Spaces";
+    case SEG_COMMENT: return "Comment";
+    case SEG_NEWLINE: return "Newline";
+    case SEG_COMMENT_COMMAND: return "CommentCommand";
+    case SEG_DO_REPEAT_COMMAND: return "DoRepeatCommand";
+    case SEG_INLINE_DATA: return "InlineData";
+    case SEG_MACRO_ID: return "MacroId";
+    case SEG_MACRO_NAME: return "MacroName";
+    case SEG_MACRO_BODY: return "MacroBody";
+    case SEG_START_DOCUMENT: return "StartDocument";
+    case SEG_DOCUMENT: return "Document";
+    case SEG_START_COMMAND: return "StartCommand";
+    case SEG_SEPARATE_COMMANDS: return "SeparateCommands";
+    case SEG_END_COMMAND: return "EndCommand";
+    case SEG_END: return "End";
+    case SEG_EXPECTED_QUOTE: return "ExpectedQuote";
+    case SEG_EXPECTED_EXPONENT: return "ExpectedExponent";
+    case SEG_UNEXPECTED_CHAR: return "UnexpectedChar";
+    default:
+      return "unknown segment type";
+    }
+}
+
 /* Returns a segmenter with the given syntax MODE.
 
    If IS_SNIPPET is false, then the segmenter will parse as if it's being given
index d5f846a900db8b55d463e07996232a89e6401b39..6c2f0bd6b30d8756289a4d243521bd718e80e6f5 100644 (file)
@@ -108,6 +108,7 @@ enum { SEG_N_TYPES = SEG_TYPES };
 #undef SEG_TYPE
 
 const char *segment_type_to_string (enum segment_type);
+const char *segment_type_to_rust_string (enum segment_type);
 
 /* A segmenter.  Opaque. */
 struct segmenter
index f96ca8c1000be697a8936122926d91e33fd4a60b..6f70d33826f82bdba1b08d370b598d4d2e9812e5 100644 (file)
@@ -42,3 +42,27 @@ prompt_style_to_string (enum prompt_style style)
     }
 }
 
+const char *
+prompt_style_to_rust_string (enum prompt_style style)
+{
+  switch (style)
+    {
+    case PROMPT_FIRST:
+      return "First";
+    case PROMPT_LATER:
+      return "Later";
+    case PROMPT_DATA:
+      return "Data";
+    case PROMPT_COMMENT:
+      return "Comment";
+    case PROMPT_DOCUMENT:
+      return "Document";
+    case PROMPT_DO_REPEAT:
+      return "DoRepeat";
+    case PROMPT_DEFINE:
+      return "Define";
+    default:
+      return "unknown prompt";
+    }
+}
+
index 8022e737327dbec5498f7108a845cdaa8fd25a52..24f489003518af1e57153be1a5f87bca78c62210 100644 (file)
@@ -29,5 +29,6 @@ enum prompt_style
   };
 
 const char *prompt_style_to_string (enum prompt_style);
+const char *prompt_style_to_rust_string (enum prompt_style);
 
 #endif /* prompt.h */
index 5977e8fce69236c868d4035f14515350d385afaf..74e99e507ef5359256ad650193010b38ee1d8b05 100644 (file)
@@ -54,6 +54,8 @@ static bool check_truncations;
     input. */
 static bool strip_trailing_newline;
 
+static bool rust;
+
 static const char *parse_options (int argc, char **argv);
 static void usage (void) NO_RETURN;
 
@@ -115,9 +117,16 @@ check_segmentation (const char *input, size_t length, bool print_segments)
   int prev_type = -1;
   size_t offset = 0;
   enum segment_type type;
+
+  enum prompt_style *prompts = NULL;
+  size_t n_prompts = 0;
+  size_t allocated_prompts = 0;
+
+  if (rust)
+    printf ("&[\n");
   do
     {
-      const char *type_name, *p;
+      const char *p;
       int n;
 
       if (one_byte)
@@ -175,33 +184,51 @@ check_segmentation (const char *input, size_t length, bool print_segments)
           continue;
         }
 
-      if (!verbose)
+      if (!rust)
         {
-          if (prev_type != SEG_SPACES && prev_type != -1
-              && type == SEG_SPACES && n == 1 && input[offset] == ' ')
+          if (!verbose)
             {
-              printf ("    space\n");
-              offset++;
-              prev_type = -1;
-              continue;
+              if (prev_type != SEG_SPACES && prev_type != -1
+                  && type == SEG_SPACES && n == 1 && input[offset] == ' ')
+                {
+                  printf ("    space\n");
+                  offset++;
+                  prev_type = -1;
+                  continue;
+                }
             }
+          if (prev_type != -1)
+            putchar ('\n');
+          prev_type = type;
         }
-      if (prev_type != -1)
-        putchar ('\n');
-      prev_type = type;
 
       if (verbose)
         printf ("%2zu:%2zu: ", line_number, offset - line_offset);
 
-      type_name = segment_type_to_string (type);
-      for (p = type_name; *p != '\0'; p++)
-        putchar (tolower ((unsigned char) *p));
-      if (n > 0)
+      if (rust)
+        {
+          printf ("  (Type::%s, ", segment_type_to_rust_string (type));
+        }
+      else
+        {
+          const char *type_name = segment_type_to_string (type);
+          for (p = type_name; *p != '\0'; p++)
+            putchar (tolower ((unsigned char) *p));
+        }
+
+      if (n > 0 || rust)
         {
           int i;
 
-          for (i = MIN (15, strlen (type_name)); i < 16; i++)
-            putchar (' ');
+          if (rust)
+            printf ("\"");
+          else
+            {
+              const char *type_name = segment_type_to_string (type);
+              for (i = MIN (15, strlen (type_name)); i < 16; i++)
+                putchar (' ');
+            }
+
           for (i = 0; i < n;)
             {
               const uint8_t *u_input = CHAR_CAST (const uint8_t *, input);
@@ -228,11 +255,17 @@ check_segmentation (const char *input, size_t length, bool print_segments)
                   switch (uc)
                     {
                     case ' ':
-                      printf ("_");
+                      if (rust)
+                        putchar (' ');
+                      else
+                        putchar ('_');;
                       break;
 
                     case '_':
-                      printf ("\\_");
+                      if (rust)
+                        putchar ('_');
+                      else
+                        printf ("\\_");
                       break;
 
                     case '\\':
@@ -255,9 +288,21 @@ check_segmentation (const char *input, size_t length, bool print_segments)
                       printf ("\\v");
                       break;
 
+                    case '"':
+                      if (rust)
+                        printf ("\\\"");
+                      else
+                        putchar ('"');
+                      break;
+
                     default:
                       if (uc < 0x20 || uc == 0x00a0)
-                        printf ("<U+%04X>", uc);
+                        {
+                          if (rust)
+                            printf ("\\u{%04x}", uc);
+                          else
+                            printf("<U+%04X>", uc);
+                        }
                       else
                         fwrite (input + offset + i, 1, mblen, stdout);
                       break;
@@ -269,22 +314,42 @@ check_segmentation (const char *input, size_t length, bool print_segments)
         }
 
       offset += n;
-      if (type == SEG_NEWLINE)
+      if (rust)
+        {
+          printf ("\"),\n");
+          if (type == SEG_NEWLINE)
+            {
+              if (n_prompts >= allocated_prompts)
+                prompts = x2nrealloc (prompts, &allocated_prompts, sizeof *prompts);
+              prompts[n_prompts++] = segmenter_get_prompt (&s);
+            }
+        }
+      else
         {
-          enum prompt_style prompt;
+          if (type == SEG_NEWLINE) {
+            enum prompt_style prompt;
 
-          line_number++;
-          line_offset = offset;
+            line_number++;
+            line_offset = offset;
 
-          prompt = segmenter_get_prompt (&s);
-          printf (" (%s)\n", prompt_style_to_string (prompt));
+            prompt = segmenter_get_prompt(&s);
+            printf(" (%s)\n", prompt_style_to_string(prompt));
+          }
         }
-      fflush (stdout);
+      fflush(stdout);
     }
   while (type != SEG_END);
 
-  if (print_segments)
+  if (print_segments && !rust)
     putchar ('\n');
+
+  if (rust)
+    {
+      printf ("], &[\n");
+      for (size_t i = 0; i < n_prompts; i++)
+        printf ("  PromptStyle::%s,\n", prompt_style_to_rust_string(prompts[i]));
+      printf ("]\n");
+    }
 }
 
 static const char *
@@ -301,6 +366,7 @@ parse_options (int argc, char **argv)
           {"batch", no_argument, NULL, 'b'},
           {"interactive", no_argument, NULL, 'i'},
           {"verbose", no_argument, NULL, 'v'},
+          {"rust", no_argument, NULL, 'r'},
           {"help", no_argument, NULL, 'h'},
           {NULL, 0, NULL, 0},
         };
@@ -339,6 +405,10 @@ parse_options (int argc, char **argv)
           verbose = true;
           break;
 
+        case 'r':
+          rust = true;
+          break;
+
         case 'h':
           usage ();
 
index 80c09779a8732999a97e1d7295fe6b13e01b0250..2d5184b6cce600d924a18fee3d9bf4473cbe766c 100644 (file)
@@ -17,6 +17,7 @@ dnl
 AT_BANNER([syntax segmentation])
 m4_define([PSPP_CHECK_SEGMENT],
   [AT_CAPTURE_FILE([input])
+   segment-test --rust $1 input > output.rs
    for strip in "" "-s"; do
      case $strip in # (
         '') sed 's/^-//' < expout-base > expout ;; # (