From 37c21ba9301956091823dea7d84ff0400330bd4b Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 30 Jul 2023 07:43:33 -0700 Subject: [PATCH] separate sack integration test binary --- rust/Cargo.lock | 136 ++++++++++++++++++++++++++++++++++++++++----- rust/Cargo.toml | 7 ++- rust/src/sack.rs | 39 ++++++++++++- rust/tests/sack.rs | 82 +++++++++++++++++++++++++++ 4 files changed, 246 insertions(+), 18 deletions(-) create mode 100644 rust/tests/sack.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index b69e7e9b22..339237c316 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -62,6 +62,7 @@ dependencies = [ "once_cell", "strsim", "termcolor 1.2.0", + "terminal_size", ] [[package]] @@ -106,6 +107,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "errno" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "errno-dragonfly" version = "0.1.2" @@ -170,7 +182,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1abeb7a0dd0f8181267ff8adc397075586500b81b28a73e8a0208b00fc170fb3" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -181,8 +193,8 @@ checksum = "21b6b32576413a8e69b90e952e4a026476040d81017b80445deda5f2d3921857" dependencies = [ "hermit-abi 0.3.1", "io-lifetimes", - "rustix", - "windows-sys", + "rustix 0.36.8", + "windows-sys 0.45.0", ] [[package]] @@ -197,6 +209,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +[[package]] +name = "linux-raw-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + [[package]] name = "miniz_oxide" version = "0.7.1" @@ -379,11 +397,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644" dependencies = [ "bitflags", - "errno", + "errno 0.2.8", + "io-lifetimes", + "libc", + "linux-raw-sys 0.1.4", + "windows-sys 0.45.0", +] + +[[package]] +name = "rustix" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b24138615de35e32031d041a09032ef3487a616d901ca4db224e7d557efae2" +dependencies = [ + "bitflags", + "errno 0.3.1", "io-lifetimes", "libc", - "linux-raw-sys", - "windows-sys", + "linux-raw-sys 0.3.8", + "windows-sys 0.45.0", ] [[package]] @@ -432,6 +464,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" +dependencies = [ + "rustix 0.37.3", + "windows-sys 0.48.0", +] + [[package]] name = "thiserror" version = "1.0.39" @@ -510,7 +552,16 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.1", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.1", ] [[package]] @@ -519,13 +570,28 @@ version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.1", + "windows_aarch64_msvc 0.42.1", + "windows_i686_gnu 0.42.1", + "windows_i686_msvc 0.42.1", + "windows_x86_64_gnu 0.42.1", + "windows_x86_64_gnullvm 0.42.1", + "windows_x86_64_msvc 0.42.1", +] + +[[package]] +name = "windows-targets" +version = "0.48.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", ] [[package]] @@ -534,38 +600,80 @@ version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + [[package]] name = "windows_aarch64_msvc" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + [[package]] name = "windows_i686_gnu" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + [[package]] name = "windows_i686_msvc" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + [[package]] name = "windows_x86_64_gnu" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + [[package]] name = "windows_x86_64_msvc" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 20a9b21afb..d365ab63a8 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -6,7 +6,7 @@ authors = [ "Ben Pfaff", "John Darrington" ] [dependencies] anyhow = "1.0.69" -clap = { version = "4.1.7", features = ["derive"] } +clap = { version = "4.1.7", features = ["derive", "wrap_help"] } flate2 = "1.0.26" float_next_after = "1.0.0" hexplay = "0.2.1" @@ -22,3 +22,8 @@ path = "src/main.rs" [lib] path = "src/lib.rs" + +[[test]] +name = "sack" +path = "tests/sack.rs" +harness = false \ No newline at end of file diff --git a/rust/src/sack.rs b/rust/src/sack.rs index 41012c467e..1b71154444 100644 --- a/rust/src/sack.rs +++ b/rust/src/sack.rs @@ -12,6 +12,12 @@ use std::{ use crate::endian::{Endian, ToBytes}; pub fn sack(input: &str, endian: Endian) -> Result> { + let mut lexer = Lexer::new(input, endian)?; + while let Some(ref token) = lexer.token { + println!("{token:?}"); + lexer.get()?; + } + let mut symbol_table = HashMap::new(); let output = _sack(input, endian, &mut symbol_table)?; let output = if !symbol_table.is_empty() { @@ -231,7 +237,7 @@ where Ok(()) } -#[derive(PartialEq, Eq, Clone)] +#[derive(PartialEq, Eq, Clone, Debug)] enum Token { Integer(i64), Float(OrderedFloat), @@ -269,7 +275,7 @@ impl<'a> Lexer<'a> { line_number: 1, endian, }; - lexer.next()?; + lexer.token = lexer.next()?; Ok(lexer) } fn take(&mut self) -> Result { @@ -354,7 +360,7 @@ impl<'a> Lexer<'a> { } } '"' => { - let mut s = String::from(c); + let mut s = String::new(); loop { match self.iter.next() { None => return Err(anyhow!("end-of-file inside string")), @@ -410,3 +416,30 @@ impl<'a> Lexer<'a> { Ok(Some(token)) } } + +#[cfg(test)] +mod test { + use crate::endian::Endian; + use crate::sack::sack; + use anyhow::Result; + use hexplay::HexView; + + #[test] + fn basic_sack() -> Result<()> { + let input = r#" +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; # Layout code +28; # Nominal case size +0; # Not compressed +0; # Not weighted +1; # 1 case. +100.0; # Bias. +"01 Jan 11"; "20:53:52"; +"PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 ""; +i8 0 *3; +"#; + let output = sack(input, Endian::Big)?; + HexView::new(&output).print()?; + Ok(()) + } +} diff --git a/rust/tests/sack.rs b/rust/tests/sack.rs new file mode 100644 index 0000000000..407f2ec9a1 --- /dev/null +++ b/rust/tests/sack.rs @@ -0,0 +1,82 @@ +use std::fs::read_to_string; +use std::io::{stdout, IsTerminal, Write}; +use std::path::PathBuf; + +use anyhow::{anyhow, Result}; +use clap::Parser; +use pspp::endian::Endian; +use pspp::sack::sack; + +/// SAv Construction Kit +/// +/// The input is a sequence of data items, each followed by a semicolon. Each +/// data item is converted to the output format and written on stdout. A data +/// item is one of the following: +/// +/// - An integer in decimal, in hexadecimal prefixed by `0x`, or in octal +/// prefixed by `0`. Output as a 32-bit binary integer. +/// +/// - A floating-point number. Output in 64-bit IEEE 754 format. +/// +/// - A string enclosed in double quotes. Output literally. There is no +/// syntax for "escapes". Strings may not contain new-lines. +/// +/// - A literal of the form `s` followed by a quoted string as above. +/// Output as the string's contents followed by enough spaces to fill up +/// `` bytes. For example, `s8 "foo"` is output as `foo` followed +/// by 5 spaces. +/// +/// - The literal `i8`, `i16`, or `i64` followed by an integer. Output +/// as a binary integer with the specified number of bits. +/// +/// - One of the literals `SYSMIS`, `LOWEST`, or `HIGHEST`. Output as a +/// 64-bit IEEE 754 float of the appropriate PSPP value. +/// +/// - `PCSYSMIS`. Output as SPSS/PC+ system-missing value. +/// +/// - The literal `ENDIAN`. Output as a 32-bit binary integer, either with +/// value 1 if `--be` is in effect or 2 if `--le` is in effect. +/// +/// - A pair of parentheses enclosing a sequence of data items, each followed +/// by a semicolon (the last semicolon is optional). Output as the enclosed +/// data items in sequence. +/// +/// - The literal `COUNT` or `COUNT8` followed by a sequence of parenthesized +/// data items, as above. Output as a 32-bit or 8-bit binary integer whose +/// value is the number of bytes enclosed within the parentheses, followed +/// by the enclosed data items themselves. +/// +/// optionally followed by an asterisk and a positive integer, which specifies a +/// repeat count for the data item. +#[derive(Parser, Debug)] +struct Args { + /// Big-endian output format (default) + #[arg(long = "be")] + be: bool, + + /// Little-endian output format + #[arg(long = "le")] + le: bool, + + /// Input file. + #[arg(required = true)] + input: PathBuf, +} + +fn main() -> Result<()> { + let Args { be, le, input } = Args::parse(); + if stdout().is_terminal() { + return Err(anyhow!( + "not writing binary data to a terminal; redirect to a file" + )); + } + let endian = match (be, le) { + (false, false) | (true, false) => Endian::Big, + (false, true) => Endian::Little, + (true, true) => return Err(anyhow!("can't use both `--be` and `--le`")), + }; + let input = read_to_string(&input)?; + let output = sack(&input, endian)?; + stdout().write(&output)?; + Ok(()) +} -- 2.30.2