From: Ben Pfaff Date: Sun, 19 Mar 2023 00:50:19 +0000 (-0700) Subject: work on rust version of pspp-dump-sav X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5619e0513e9d2246c7e1d36a519f38cd432d2f6c;p=pspp work on rust version of pspp-dump-sav --- diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100644 index 0000000000..2f7896d1d1 --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1 @@ +target/ diff --git a/rust/Cargo.lock b/rust/Cargo.lock new file mode 100644 index 0000000000..cdefc3b8f5 --- /dev/null +++ b/rust/Cargo.lock @@ -0,0 +1,419 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anyhow" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + +[[package]] +name = "clap" +version = "4.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f3061d6db6d8fcbbd4b05e057f2acace52e64e96b498c08c2d7a4e65addd340" +dependencies = [ + "bitflags", + "clap_derive", + "clap_lex", + "is-terminal", + "once_cell", + "strsim", + "termcolor", +] + +[[package]] +name = "clap_derive" +version = "4.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34d122164198950ba84a918270a3bb3f7ededd25e15f7451673d986f55bd2667" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350b9cf31731f9957399229e9b2adc51eeabdfbe9d71d9a0552275fd12710d09" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + +[[package]] +name = "io-lifetimes" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1abeb7a0dd0f8181267ff8adc397075586500b81b28a73e8a0208b00fc170fb3" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "is-terminal" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b6b32576413a8e69b90e952e4a026476040d81017b80445deda5f2d3921857" +dependencies = [ + "hermit-abi", + "io-lifetimes", + "rustix", + "windows-sys", +] + +[[package]] +name = "libc" +version = "0.2.139" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" + +[[package]] +name = "linux-raw-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" + +[[package]] +name = "num" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" + +[[package]] +name = "os_str_bytes" +version = "6.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pspp-dump-sav" +version = "1.0.0" +dependencies = [ + "anyhow", + "clap", + "num", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustix" +version = "0.36.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 0000000000..b44edbe3a4 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "pspp-dump-sav" +version = "1.0.0" +edition = "2021" +authors = [ "Ben Pfaff", "John Darrington" ] + +[dependencies] +anyhow = "1.0.69" +clap = { version = "4.1.7", features = ["derive"] } +num = "0.4.0" diff --git a/rust/src/main.rs b/rust/src/main.rs new file mode 100644 index 0000000000..41dec4d92f --- /dev/null +++ b/rust/src/main.rs @@ -0,0 +1,460 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +use anyhow::{anyhow, Result}; +use clap::Parser; +use num::Num; +use std::fs::File; +use std::io::prelude::*; +use std::io::BufReader; +use std::path::{Path, PathBuf}; + +/// A utility to dissect SPSS system files. +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Maximum number of cases to print. + #[arg(long = "data", default_value_t = 0)] + max_cases: usize, + + /// Files to dissect. + #[arg(required = true)] + files: Vec +} + +fn main() -> Result<()> { + let Args { max_cases, files } = Args::parse(); + + let error = false; + for file in files { + Dissector::new(file)?; + } + Ok(()) +} + +#[derive(Copy, Clone, Debug)] +enum Compression { + Simple, + ZLib +} + +#[derive(Copy, Clone, Debug)] +enum Endianness { + BigEndian, + LittleEndian +} +use Endianness::*; + +trait Parse { + fn parse(self, bytes: [u8; N]) -> T; +} +impl Parse for Endianness { + fn parse(self, bytes: [u8; 8]) -> u64 { + match self { + BigEndian => u64::from_be_bytes(bytes), + LittleEndian => u64::from_le_bytes(bytes) + } + } +} +impl Parse for Endianness { + fn parse(self, bytes: [u8; 4]) -> u32 { + match self { + BigEndian => u32::from_be_bytes(bytes), + LittleEndian => u32::from_le_bytes(bytes) + } + } +} +impl Parse for Endianness { + fn parse(self, bytes: [u8; 2]) -> u16 { + match self { + BigEndian => u16::from_be_bytes(bytes), + LittleEndian => u16::from_le_bytes(bytes) + } + } +} +impl Parse for Endianness { + fn parse(self, bytes: [u8; 1]) -> u8 { + match self { + BigEndian => u8::from_be_bytes(bytes), + LittleEndian => u8::from_le_bytes(bytes) + } + } +} +impl Parse for Endianness { + fn parse(self, bytes: [u8; 8]) -> i64 { + match self { + BigEndian => i64::from_be_bytes(bytes), + LittleEndian => i64::from_le_bytes(bytes) + } + } +} +impl Parse for Endianness { + fn parse(self, bytes: [u8; 4]) -> i32 { + match self { + BigEndian => i32::from_be_bytes(bytes), + LittleEndian => i32::from_le_bytes(bytes) + } + } +} +impl Parse for Endianness { + fn parse(self, bytes: [u8; 2]) -> i16 { + match self { + BigEndian => i16::from_be_bytes(bytes), + LittleEndian => i16::from_le_bytes(bytes) + } + } +} +impl Parse for Endianness { + fn parse(self, bytes: [u8; 1]) -> i8 { + match self { + BigEndian => i8::from_be_bytes(bytes), + LittleEndian => i8::from_le_bytes(bytes) + } + } +} +impl Parse for Endianness { + fn parse(self, bytes: [u8; 8]) -> f64 { + match self { + BigEndian => f64::from_be_bytes(bytes), + LittleEndian => f64::from_le_bytes(bytes) + } + } +} + +fn read_bytes(r: &mut BufReader) -> Result<[u8; N]> { + let mut buf = [0; N]; + r.read_exact(&mut buf)?; + Ok(buf) +} + +fn read_vec(r: &mut BufReader, n: usize) -> Result> { + let mut vec = Vec::with_capacity(n); + vec.resize(n, 0); + r.read_exact(&mut vec)?; + Ok(vec) +} + +trait ReadSwap { + fn read_swap(&mut self) -> Result; +} + +impl ReadSwap for Dissector { + fn read_swap(&mut self) -> Result { + Ok(self.endianness.parse(read_bytes(&mut self.r)?)) + } +} +impl ReadSwap for Dissector { + fn read_swap(&mut self) -> Result { + Ok(self.endianness.parse(read_bytes(&mut self.r)?)) + } +} + +impl ReadSwap for Dissector { + fn read_swap(&mut self) -> Result { + Ok(self.endianness.parse(read_bytes(&mut self.r)?)) + } +} + +impl ReadSwap for Dissector { + fn read_swap(&mut self) -> Result { + Ok(self.endianness.parse(read_bytes(&mut self.r)?)) + } +} + +struct Dissector { + filename: String, + r: BufReader, + compression: Option, + endianness: Endianness, + fp_format: Endianness, + bias: f64, + n_variable_records: usize, + n_variables: usize, + var_widths: Vec, +} + +fn detect_endianness(layout_code: [u8; 4]) -> Option { + for endianness in [BigEndian, LittleEndian] { + match endianness.parse(layout_code) { + 2 | 3 => return Some(endianness), + _ => () + } + } + None +} + +fn detect_fp_format(bias: [u8; 8]) -> Option { + for endianness in [BigEndian, LittleEndian] { + let value: f64 = endianness.parse(bias); + if value == 100.0 { + return Some(endianness) + } + } + None +} + +fn trim_end(mut s: Vec, c: u8) -> Vec { + while s.last() == Some(&c) { + s.pop(); + } + s +} + +fn format_name(type_: u32) -> &'static str { + match type_ { + 1 => "A", + 2 => "AHEX", + 3 => "COMMA", + 4 => "DOLLAR", + 5 => "F", + 6 => "IB", + 7 => "PIBHEX", + 8 => "P", + 9 => "PIB", + 10 => "PK", + 11 => "RB", + 12 => "RBHEX", + 15 => "Z", + 16 => "N", + 17 => "E", + 20 => "DATE", + 21 => "TIME", + 22 => "DATETIME", + 23 => "ADATE", + 24 => "JDATE", + 25 => "DTIME", + 26 => "WKDAY", + 27 => "MONTH", + 28 => "MOYR", + 29 => "QYR", + 30 => "WKYR", + 31 => "PCT", + 32 => "DOT", + 33 => "CCA", + 34 => "CCB", + 35 => "CCC", + 36 => "CCD", + 37 => "CCE", + 38 => "EDATE", + 39 => "SDATE", + 40 => "MTIME", + 41 => "YMDHMS", + _ => "invalid" + } +} + +fn round_up(x: T, y: T) -> T +{ + (x + (y - T::one())) / y * y +} + +impl UntypedValue { + fn new( +} + +impl Dissector { + fn new>(filename: P) -> Result { + let mut r = BufReader::new(File::open(&filename)?); + let filename = filename.as_ref().to_string_lossy().into_owned(); + let rec_type: [u8; 4] = read_bytes(&mut r)?; + let zmagic = match &rec_type { + b"$FL2" => false, + b"$FL3" => true, + _ => Err(anyhow!("This is not an SPSS system file."))? + }; + + let eye_catcher: [u8; 60] = read_bytes(&mut r)?; + let layout_code: [u8; 4] = read_bytes(&mut r)?; + let endianness = detect_endianness(layout_code) + .ok_or_else(|| anyhow!("This is not an SPSS system file."))?; + let layout_code: u32 = endianness.parse(layout_code); + let _nominal_case_size: [u8; 4] = read_bytes(&mut r)?; + let compressed: u32 = endianness.parse(read_bytes(&mut r)?); + let compression = match (zmagic, compressed) { + (false, 0) => None, + (false, 1) => Some(Compression::Simple), + (true, 2) => Some(Compression::ZLib), + _ => Err(anyhow!("{} file header has invalid compression value {compressed}.", + if zmagic { "ZSAV" } else { "SAV" }))?, + }; + + let weight_index: u32 = endianness.parse(read_bytes(&mut r)?); + let n_cases: u32 = endianness.parse(read_bytes(&mut r)?); + + let bias: [u8; 8] = read_bytes(&mut r)?; + let fp_format = detect_fp_format(bias) + .unwrap_or_else(|| { eprintln!("Compression bias is not the usual value of 100, or system file uses unrecognized floating-point format."); endianness }); + let bias: f64 = fp_format.parse(bias); + + let mut d = Dissector { + filename, + r, + compression, + endianness, + fp_format, + bias, + n_variable_records: 0, + n_variables: 0, + var_widths: Vec::new(), + }; + + let creation_date: [u8; 9] = read_bytes(&mut d.r)?; + let creation_time: [u8; 8] = read_bytes(&mut d.r)?; + let file_label: [u8; 64] = read_bytes(&mut d.r)?; + let mut file_label = trim_end(Vec::from(file_label), b' '); + d.r.seek_relative(3)?; + + println!("File header record:"); + println!("{:>17}: {}", "Product name", String::from_utf8_lossy(&eye_catcher)); + println!("{:>17}: {}", "Layout code", layout_code); + println!("{:>17}: {} ({})", "Compressed", compressed, match compression { + None => "no compression", + Some(Compression::Simple) => "simple compression", + Some(Compression::ZLib) => "ZLIB compression", + }); + println!("{:>17}: {}", "Weight index", weight_index); + println!("{:>17}: {}", "Number of cases", n_cases); + println!("{:>17}: {}", "Compression bias", bias); + println!("{:>17}: {}", "Creation date", String::from_utf8_lossy(&creation_date)); + println!("{:>17}: {}", "Creation time", String::from_utf8_lossy(&creation_time)); + println!("{:>17}: \"{}\"", "File label", String::from_utf8_lossy(&file_label)); + + loop { + let rec_type: u32 = d.read_swap()?; + match rec_type { + 2 => d.read_variable_record()?, + 3 => d.read_value_label_record()?, + 4 => Err(anyhow!("Misplaced type 4 record."))?, + 999 => break, + _ => Err(anyhow!("Unrecognized record type {rec_type}."))? + } + } + + let pos = d.r.stream_position()?; + println!("{:08x}: end-of-dictionary record (first byte of data at {:0x})", pos, pos + 4); + + Ok(d) + } + + fn read_variable_record(&mut self) -> Result<()> { + self.n_variable_records += 1; + println!("{:08x}: variable record {}", self.r.stream_position()?, self.n_variable_records); + let width: i32 = self.read_swap()?; + let has_variable_label: u32 = self.read_swap()?; + let missing_value_code: i32 = self.read_swap()?; + let print_format: u32 = self.read_swap()?; + let write_format: u32 = self.read_swap()?; + let name: [u8; 8] = read_bytes(&mut self.r)?; + let name: Vec = trim_end(Vec::from(name), b'\0'); + + if width >= 0 { + self.n_variables += 1; + } + self.var_widths.push(width); + + println!("\tWidth: {width} ({})", match width { + _ if width > 0 => "string", + _ if width == 0 => "numeric", + _ => "long string continuation record" + }); + + println!("\tVariable label: {has_variable_label}"); + println!("\tMissing values code: {missing_value_code} ({})", + match missing_value_code { + 0 => "no missing values", + 1 => "one missing value", + 2 => "two missing values", + 3 => "three missing values", + -2 => "one missing value range", + -3 => "one missing value, one range", + _ => "bad value" + }); + for (which, format) in [("Print", print_format), + ("Worite", write_format)] { + let type_ = format_name(format >> 16); + let w = (format >> 8) & 0xff; + let d = format & 0xff; + println!("\t{which} format: {format:06x} ({type_}{w}.{d})"); + } + println!("\tName: {}", String::from_utf8_lossy(&name)); + + // Read variable label. + match has_variable_label { + 0 => (), + 1 => { + let offset = self.r.stream_position()?; + let len: u32 = self.read_swap()?; + let read_len = len.min(65535) as usize; + let label = read_vec(&mut self.r, read_len)?; + println!("\t{offset:08x} Variable label: \"{}\"", String::from_utf8_lossy(&label)); + + self.r.seek_relative((round_up(len, 4) - len).into())?; + }, + _ => Err(anyhow!("Variable label indicator field is not 0 or 1."))?, + }; + + // Read missing values. + if missing_value_code != 0 { + print!("\t{:08x} Missing values:", self.r.stream_position()?); + if width == 0 { + let (has_range, n_individual) = match missing_value_code { + -3 => (true, 1), + -2 => (true, 0), + 1 | 2 | 3 => (false, missing_value_code), + _ => Err(anyhow!("Numeric missing value indicator field is not -3, -2, 0, 1, 2, or 3."))?, + }; + if has_range { + let low: f64 = self.read_swap()?; + let high: f64 = self.read_swap()?; + print!(" {low}...{high}"); + } + for _i in 0..n_individual { + let value: f64 = self.read_swap()?; + print!(" {value}"); + } + } else if width > 0 { + if missing_value_code < 1 || missing_value_code > 3 { + Err(anyhow!("String missing value indicator field is not 0, 1, 2, or 3."))?; + } + for _i in 0..missing_value_code { + let string: [u8; 8] = read_bytes(&mut self.r)?; + let string: Vec = trim_end(Vec::from(string), b'\0'); + println!(" {}", String::from_utf8_lossy(&string)); + } + } + println!(); + } + + Ok(()) + } + + fn read_value_label_record(&mut self) -> Result<()> { + println!("{:08x}: value labels record", self.r.stream_position()?); + + let n_labels: u32 = self.read_swap()?; + for _i in 0..n_labels { + let raw: [u8; 8] = read_bytes(&mut self.r)?; + let label_len: u8 = self.read_swap()?; + let padded_len = round_up(label_len as usize + 1, 8); + + let mut label = read_vec(&mut self.r, padded_len)?; + label.truncate(label_len as usize); + print + } + + Ok(()) + } +}