separate sack integration test binary
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 30 Jul 2023 14:43:33 +0000 (07:43 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 30 Jul 2023 14:43:33 +0000 (07:43 -0700)
rust/Cargo.lock
rust/Cargo.toml
rust/src/sack.rs
rust/tests/sack.rs [new file with mode: 0644]

index b69e7e9b22da0528ef7017bb152b78a436bf2802..339237c316115d9f23ff5bf651cbee857cd7c250 100644 (file)
@@ -62,6 +62,7 @@ dependencies = [
  "once_cell",
  "strsim",
  "termcolor 1.2.0",
+ "terminal_size",
 ]
 
 [[package]]
@@ -106,6 +107,17 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "errno"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
+dependencies = [
+ "errno-dragonfly",
+ "libc",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "errno-dragonfly"
 version = "0.1.2"
@@ -170,7 +182,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1abeb7a0dd0f8181267ff8adc397075586500b81b28a73e8a0208b00fc170fb3"
 dependencies = [
  "libc",
- "windows-sys",
+ "windows-sys 0.45.0",
 ]
 
 [[package]]
@@ -181,8 +193,8 @@ checksum = "21b6b32576413a8e69b90e952e4a026476040d81017b80445deda5f2d3921857"
 dependencies = [
  "hermit-abi 0.3.1",
  "io-lifetimes",
- "rustix",
- "windows-sys",
+ "rustix 0.36.8",
+ "windows-sys 0.45.0",
 ]
 
 [[package]]
@@ -197,6 +209,12 @@ version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4"
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
+
 [[package]]
 name = "miniz_oxide"
 version = "0.7.1"
@@ -379,11 +397,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644"
 dependencies = [
  "bitflags",
- "errno",
+ "errno 0.2.8",
+ "io-lifetimes",
+ "libc",
+ "linux-raw-sys 0.1.4",
+ "windows-sys 0.45.0",
+]
+
+[[package]]
+name = "rustix"
+version = "0.37.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b24138615de35e32031d041a09032ef3487a616d901ca4db224e7d557efae2"
+dependencies = [
+ "bitflags",
+ "errno 0.3.1",
  "io-lifetimes",
  "libc",
- "linux-raw-sys",
- "windows-sys",
+ "linux-raw-sys 0.3.8",
+ "windows-sys 0.45.0",
 ]
 
 [[package]]
@@ -432,6 +464,16 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "terminal_size"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237"
+dependencies = [
+ "rustix 0.37.3",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.39"
@@ -510,7 +552,16 @@ version = "0.45.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.42.1",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+dependencies = [
+ "windows-targets 0.48.1",
 ]
 
 [[package]]
@@ -519,13 +570,28 @@ version = "0.42.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7"
 dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
+ "windows_aarch64_gnullvm 0.42.1",
+ "windows_aarch64_msvc 0.42.1",
+ "windows_i686_gnu 0.42.1",
+ "windows_i686_msvc 0.42.1",
+ "windows_x86_64_gnu 0.42.1",
+ "windows_x86_64_gnullvm 0.42.1",
+ "windows_x86_64_msvc 0.42.1",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.0",
+ "windows_aarch64_msvc 0.48.0",
+ "windows_i686_gnu 0.48.0",
+ "windows_i686_msvc 0.48.0",
+ "windows_x86_64_gnu 0.48.0",
+ "windows_x86_64_gnullvm 0.48.0",
+ "windows_x86_64_msvc 0.48.0",
 ]
 
 [[package]]
@@ -534,38 +600,80 @@ version = "0.42.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608"
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.42.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.42.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.42.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.42.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.42.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.42.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
index 20a9b21afb24e9d8eeb4c2c611024e8ae534d0b5..d365ab63a8407616cf368855ebd0ac41b0c04a69 100644 (file)
@@ -6,7 +6,7 @@ authors = [ "Ben Pfaff", "John Darrington" ]
 
 [dependencies]
 anyhow = "1.0.69"
-clap = { version = "4.1.7", features = ["derive"] }
+clap = { version = "4.1.7", features = ["derive", "wrap_help"] }
 flate2 = "1.0.26"
 float_next_after = "1.0.0"
 hexplay = "0.2.1"
@@ -22,3 +22,8 @@ path = "src/main.rs"
 
 [lib]
 path = "src/lib.rs"
+
+[[test]]
+name = "sack"
+path = "tests/sack.rs"
+harness = false
\ No newline at end of file
index 41012c467e4eb40a880333cfbeb28a9d2c141b8e..1b711544441df9b7e180d0adc81d441d10241485 100644 (file)
@@ -12,6 +12,12 @@ use std::{
 use crate::endian::{Endian, ToBytes};
 
 pub fn sack(input: &str, endian: Endian) -> Result<Vec<u8>> {
+    let mut lexer = Lexer::new(input, endian)?;
+    while let Some(ref token) = lexer.token {
+        println!("{token:?}");
+        lexer.get()?;
+    }
+
     let mut symbol_table = HashMap::new();
     let output = _sack(input, endian, &mut symbol_table)?;
     let output = if !symbol_table.is_empty() {
@@ -231,7 +237,7 @@ where
     Ok(())
 }
 
-#[derive(PartialEq, Eq, Clone)]
+#[derive(PartialEq, Eq, Clone, Debug)]
 enum Token {
     Integer(i64),
     Float(OrderedFloat<f64>),
@@ -269,7 +275,7 @@ impl<'a> Lexer<'a> {
             line_number: 1,
             endian,
         };
-        lexer.next()?;
+        lexer.token = lexer.next()?;
         Ok(lexer)
     }
     fn take(&mut self) -> Result<Token> {
@@ -354,7 +360,7 @@ impl<'a> Lexer<'a> {
                 }
             }
             '"' => {
-                let mut s = String::from(c);
+                let mut s = String::new();
                 loop {
                     match self.iter.next() {
                         None => return Err(anyhow!("end-of-file inside string")),
@@ -410,3 +416,30 @@ impl<'a> Lexer<'a> {
         Ok(Some(token))
     }
 }
+
+#[cfg(test)]
+mod test {
+    use crate::endian::Endian;
+    use crate::sack::sack;
+    use anyhow::Result;
+    use hexplay::HexView;
+
+    #[test]
+    fn basic_sack() -> Result<()> {
+        let input = r#"
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; # Layout code
+28; # Nominal case size
+0; # Not compressed
+0; # Not weighted
+1; # 1 case.
+100.0; # Bias.
+"01 Jan 11"; "20:53:52";
+"PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 "";
+i8 0 *3;
+"#;
+        let output = sack(input, Endian::Big)?;
+        HexView::new(&output).print()?;
+        Ok(())
+    }
+}
diff --git a/rust/tests/sack.rs b/rust/tests/sack.rs
new file mode 100644 (file)
index 0000000..407f2ec
--- /dev/null
@@ -0,0 +1,82 @@
+use std::fs::read_to_string;
+use std::io::{stdout, IsTerminal, Write};
+use std::path::PathBuf;
+
+use anyhow::{anyhow, Result};
+use clap::Parser;
+use pspp::endian::Endian;
+use pspp::sack::sack;
+
+/// SAv Construction Kit
+///
+/// The input is a sequence of data items, each followed by a semicolon.  Each
+/// data item is converted to the output format and written on stdout.  A data
+/// item is one of the following:
+///
+///   - An integer in decimal, in hexadecimal prefixed by `0x`, or in octal
+///     prefixed by `0`.  Output as a 32-bit binary integer.
+///
+///   - A floating-point number.  Output in 64-bit IEEE 754 format.
+///
+///   - A string enclosed in double quotes.  Output literally.  There is no
+///     syntax for "escapes".  Strings may not contain new-lines.
+///
+///   - A literal of the form `s<number>` followed by a quoted string as above.
+///     Output as the string's contents followed by enough spaces to fill up
+///     `<number>` bytes.  For example, `s8 "foo"` is output as `foo` followed
+///     by 5 spaces.
+///
+///   - The literal `i8`, `i16`, or `i64` followed by an integer.  Output
+///     as a binary integer with the specified number of bits.
+///
+///   - One of the literals `SYSMIS`, `LOWEST`, or `HIGHEST`.  Output as a
+///     64-bit IEEE 754 float of the appropriate PSPP value.
+///
+///   - `PCSYSMIS`.  Output as SPSS/PC+ system-missing value.
+///
+///   - The literal `ENDIAN`.  Output as a 32-bit binary integer, either with
+///     value 1 if `--be` is in effect or 2 if `--le` is in effect.
+///
+///   - A pair of parentheses enclosing a sequence of data items, each followed
+///     by a semicolon (the last semicolon is optional).  Output as the enclosed
+///     data items in sequence.
+///
+///   - The literal `COUNT` or `COUNT8` followed by a sequence of parenthesized
+///     data items, as above.  Output as a 32-bit or 8-bit binary integer whose
+///     value is the number of bytes enclosed within the parentheses, followed
+///     by the enclosed data items themselves.
+///
+/// optionally followed by an asterisk and a positive integer, which specifies a
+/// repeat count for the data item.
+#[derive(Parser, Debug)]
+struct Args {
+    /// Big-endian output format (default)
+    #[arg(long = "be")]
+    be: bool,
+
+    /// Little-endian output format
+    #[arg(long = "le")]
+    le: bool,
+
+    /// Input file.
+    #[arg(required = true)]
+    input: PathBuf,
+}
+
+fn main() -> Result<()> {
+    let Args { be, le, input } = Args::parse();
+    if stdout().is_terminal() {
+        return Err(anyhow!(
+            "not writing binary data to a terminal; redirect to a file"
+        ));
+    }
+    let endian = match (be, le) {
+        (false, false) | (true, false) => Endian::Big,
+        (false, true) => Endian::Little,
+        (true, true) => return Err(anyhow!("can't use both `--be` and `--le`")),
+    };
+    let input = read_to_string(&input)?;
+    let output = sack(&input, endian)?;
+    stdout().write(&output)?;
+    Ok(())
+}