work on rust version of pspp-dump-sav

author Ben Pfaff <blp@cs.stanford.edu>

Sun, 19 Mar 2023 00:50:19 +0000 (17:50 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Sun, 19 Mar 2023 00:50:36 +0000 (17:50 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sun, 19 Mar 2023 00:50:19 +0000 (17:50 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Sun, 19 Mar 2023 00:50:36 +0000 (17:50 -0700)
diff --git a/rust/.gitignore b/rust/.gitignore

new file mode 100644 (file)

index 0000000..2f7896d
--- /dev/null
+++ b/rust/.gitignore
@@ -0,0 +1 @@
+target/
diff --git a/rust/Cargo.lock b/rust/Cargo.lock

new file mode 100644 (file)

index 0000000..cdefc3b
--- /dev/null
+++ b/rust/Cargo.lock
@@ -0,0 +1,419 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "anyhow"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800"
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "cc"
+version = "1.0.79"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
+
+[[package]]
+name = "clap"
+version = "4.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f3061d6db6d8fcbbd4b05e057f2acace52e64e96b498c08c2d7a4e65addd340"
+dependencies = [
+ "bitflags",
+ "clap_derive",
+ "clap_lex",
+ "is-terminal",
+ "once_cell",
+ "strsim",
+ "termcolor",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34d122164198950ba84a918270a3bb3f7ededd25e15f7451673d986f55bd2667"
+dependencies = [
+ "heck",
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "350b9cf31731f9957399229e9b2adc51eeabdfbe9d71d9a0552275fd12710d09"
+dependencies = [
+ "os_str_bytes",
+]
+
+[[package]]
+name = "errno"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1"
+dependencies = [
+ "errno-dragonfly",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "errno-dragonfly"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
+dependencies = [
+ "cc",
+ "libc",
+]
+
+[[package]]
+name = "heck"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+
+[[package]]
+name = "hermit-abi"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
+
+[[package]]
+name = "io-lifetimes"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1abeb7a0dd0f8181267ff8adc397075586500b81b28a73e8a0208b00fc170fb3"
+dependencies = [
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "is-terminal"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b6b32576413a8e69b90e952e4a026476040d81017b80445deda5f2d3921857"
+dependencies = [
+ "hermit-abi",
+ "io-lifetimes",
+ "rustix",
+ "windows-sys",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.139"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4"
+
+[[package]]
+name = "num"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606"
+dependencies = [
+ "num-bigint",
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
+dependencies = [
+ "autocfg",
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
+dependencies = [
+ "autocfg",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
+
+[[package]]
+name = "os_str_bytes"
+version = "6.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee"
+
+[[package]]
+name = "proc-macro-error"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
+dependencies = [
+ "proc-macro-error-attr",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.51"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "pspp-dump-sav"
+version = "1.0.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "num",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rustix"
+version = "0.36.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644"
+dependencies = [
+ "bitflags",
+ "errno",
+ "io-lifetimes",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys",
+]
+
+[[package]]
+name = "strsim"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "termcolor"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-sys"
+version = "0.45.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"
diff --git a/rust/Cargo.toml b/rust/Cargo.toml

new file mode 100644 (file)

index 0000000..b44edbe
--- /dev/null
+++ b/rust/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "pspp-dump-sav"
+version = "1.0.0"
+edition = "2021"
+authors = [ "Ben Pfaff", "John Darrington" ]
+
+[dependencies]
+anyhow = "1.0.69"
+clap = { version = "4.1.7", features = ["derive"] }
+num = "0.4.0"
diff --git a/rust/src/main.rs b/rust/src/main.rs

new file mode 100644 (file)

index 0000000..41dec4d
--- /dev/null
+++ b/rust/src/main.rs
@@ -0,0 +1,460 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+use anyhow::{anyhow, Result};
+use clap::Parser;
+use num::Num;
+use std::fs::File;
+use std::io::prelude::*;
+use std::io::BufReader;
+use std::path::{Path, PathBuf};
+
+/// A utility to dissect SPSS system files.
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+struct Args {
+    /// Maximum number of cases to print.
+    #[arg(long = "data", default_value_t = 0)]
+    max_cases: usize,
+
+    /// Files to dissect.
+    #[arg(required = true)]
+    files: Vec<PathBuf>
+}
+
+fn main() -> Result<()> {
+    let Args { max_cases, files } = Args::parse();
+
+    let error = false;
+    for file in files {
+        Dissector::new(file)?;
+    }
+    Ok(())
+}
+
+#[derive(Copy, Clone, Debug)]
+enum Compression {
+    Simple,
+    ZLib
+}
+
+#[derive(Copy, Clone, Debug)]
+enum Endianness {
+    BigEndian,
+    LittleEndian
+}
+use Endianness::*;
+
+trait Parse<T, const N: usize> {
+    fn parse(self, bytes: [u8; N]) -> T;
+}
+impl Parse<u64, 8> for Endianness {
+    fn parse(self, bytes: [u8; 8]) -> u64 {
+        match self {
+            BigEndian => u64::from_be_bytes(bytes),
+            LittleEndian => u64::from_le_bytes(bytes)
+        }
+    }
+}
+impl Parse<u32, 4> for Endianness {
+    fn parse(self, bytes: [u8; 4]) -> u32 {
+        match self {
+            BigEndian => u32::from_be_bytes(bytes),
+            LittleEndian => u32::from_le_bytes(bytes)
+        }
+    }
+}
+impl Parse<u16, 2> for Endianness {
+    fn parse(self, bytes: [u8; 2]) -> u16 {
+        match self {
+            BigEndian => u16::from_be_bytes(bytes),
+            LittleEndian => u16::from_le_bytes(bytes)
+        }
+    }
+}
+impl Parse<u8, 1> for Endianness {
+    fn parse(self, bytes: [u8; 1]) -> u8 {
+        match self {
+            BigEndian => u8::from_be_bytes(bytes),
+            LittleEndian => u8::from_le_bytes(bytes)
+        }
+    }
+}
+impl Parse<i64, 8> for Endianness {
+    fn parse(self, bytes: [u8; 8]) -> i64 {
+        match self {
+            BigEndian => i64::from_be_bytes(bytes),
+            LittleEndian => i64::from_le_bytes(bytes)
+        }
+    }
+}
+impl Parse<i32, 4> for Endianness {
+    fn parse(self, bytes: [u8; 4]) -> i32 {
+        match self {
+            BigEndian => i32::from_be_bytes(bytes),
+            LittleEndian => i32::from_le_bytes(bytes)
+        }
+    }
+}
+impl Parse<i16, 2> for Endianness {
+    fn parse(self, bytes: [u8; 2]) -> i16 {
+        match self {
+            BigEndian => i16::from_be_bytes(bytes),
+            LittleEndian => i16::from_le_bytes(bytes)
+        }
+    }
+}
+impl Parse<i8, 1> for Endianness {
+    fn parse(self, bytes: [u8; 1]) -> i8 {
+        match self {
+            BigEndian => i8::from_be_bytes(bytes),
+            LittleEndian => i8::from_le_bytes(bytes)
+        }
+    }
+}
+impl Parse<f64, 8> for Endianness {
+    fn parse(self, bytes: [u8; 8]) -> f64 {
+        match self {
+            BigEndian => f64::from_be_bytes(bytes),
+            LittleEndian => f64::from_le_bytes(bytes)
+        }
+    }
+}
+
+fn read_bytes<const N: usize>(r: &mut BufReader<File>) -> Result<[u8; N]> {
+    let mut buf = [0; N];
+    r.read_exact(&mut buf)?;
+    Ok(buf)
+}
+
+fn read_vec(r: &mut BufReader<File>, n: usize) -> Result<Vec<u8>> {
+    let mut vec = Vec::with_capacity(n);
+    vec.resize(n, 0);
+    r.read_exact(&mut vec)?;
+    Ok(vec)
+}    
+
+trait ReadSwap<T> {
+    fn read_swap(&mut self) -> Result<T>;
+}
+
+impl ReadSwap<u32> for Dissector {
+    fn read_swap(&mut self) -> Result<u32> {
+        Ok(self.endianness.parse(read_bytes(&mut self.r)?))
+    }
+}
+impl ReadSwap<u8> for Dissector {
+    fn read_swap(&mut self) -> Result<u8> {
+        Ok(self.endianness.parse(read_bytes(&mut self.r)?))
+    }
+}
+
+impl ReadSwap<i32> for Dissector {
+    fn read_swap(&mut self) -> Result<i32> {
+        Ok(self.endianness.parse(read_bytes(&mut self.r)?))
+    }
+}
+
+impl ReadSwap<f64> for Dissector {
+    fn read_swap(&mut self) -> Result<f64> {
+        Ok(self.endianness.parse(read_bytes(&mut self.r)?))
+    }
+}
+
+struct Dissector {
+    filename: String,
+    r: BufReader<File>,
+    compression: Option<Compression>,
+    endianness: Endianness,
+    fp_format: Endianness,
+    bias: f64,
+    n_variable_records: usize,
+    n_variables: usize,
+    var_widths: Vec<i32>,
+}
+
+fn detect_endianness(layout_code: [u8; 4]) -> Option<Endianness> {
+    for endianness in [BigEndian, LittleEndian] {
+        match endianness.parse(layout_code) {
+            2 | 3 => return Some(endianness),
+            _ => ()
+        }
+    }
+    None
+}
+
+fn detect_fp_format(bias: [u8; 8]) -> Option<Endianness> {
+    for endianness in [BigEndian, LittleEndian] {
+        let value: f64 = endianness.parse(bias);
+        if value == 100.0 {
+            return Some(endianness)
+        }
+    }
+    None
+}
+
+fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
+    while s.last() == Some(&c) {
+        s.pop();
+    }
+    s
+}
+
+fn format_name(type_: u32) -> &'static str {
+    match type_ {
+        1 => "A",
+        2 => "AHEX",
+        3 => "COMMA",
+        4 => "DOLLAR",
+        5 => "F",
+        6 => "IB",
+        7 => "PIBHEX",
+        8 => "P",
+        9 => "PIB",
+        10 => "PK",
+        11 => "RB",
+        12 => "RBHEX",
+        15 => "Z",
+        16 => "N",
+        17 => "E",
+        20 => "DATE",
+        21 => "TIME",
+        22 => "DATETIME",
+        23 => "ADATE",
+        24 => "JDATE",
+        25 => "DTIME",
+        26 => "WKDAY",
+        27 => "MONTH",
+        28 => "MOYR",
+        29 => "QYR",
+        30 => "WKYR",
+        31 => "PCT",
+        32 => "DOT",
+        33 => "CCA",
+        34 => "CCB",
+        35 => "CCC",
+        36 => "CCD",
+        37 => "CCE",
+        38 => "EDATE",
+        39 => "SDATE",
+        40 => "MTIME",
+        41 => "YMDHMS",
+        _ => "invalid"
+    }
+}
+
+fn round_up<T: Num + Copy>(x: T, y: T) -> T
+{
+    (x + (y - T::one())) / y * y
+}
+
+impl UntypedValue {
+    fn new(
+}
+
+impl Dissector {
+    fn new<P: AsRef<Path>>(filename: P) -> Result<Dissector> {
+        let mut r = BufReader::new(File::open(&filename)?);
+        let filename = filename.as_ref().to_string_lossy().into_owned();
+        let rec_type: [u8; 4] = read_bytes(&mut r)?;
+        let zmagic = match &rec_type {
+            b"$FL2" => false,
+            b"$FL3" => true,
+            _ => Err(anyhow!("This is not an SPSS system file."))?
+        };
+
+        let eye_catcher: [u8; 60] = read_bytes(&mut r)?;
+        let layout_code: [u8; 4] = read_bytes(&mut r)?;
+        let endianness = detect_endianness(layout_code)
+            .ok_or_else(|| anyhow!("This is not an SPSS system file."))?;
+        let layout_code: u32 = endianness.parse(layout_code);
+        let _nominal_case_size: [u8; 4] = read_bytes(&mut r)?;
+        let compressed: u32 = endianness.parse(read_bytes(&mut r)?);
+        let compression = match (zmagic, compressed) {
+            (false, 0) => None,
+            (false, 1) => Some(Compression::Simple),
+            (true, 2) => Some(Compression::ZLib),
+            _ => Err(anyhow!("{} file header has invalid compression value {compressed}.",
+                             if zmagic { "ZSAV" } else { "SAV" }))?,
+        };
+
+        let weight_index: u32 = endianness.parse(read_bytes(&mut r)?);
+        let n_cases: u32 = endianness.parse(read_bytes(&mut r)?);
+
+        let bias: [u8; 8] = read_bytes(&mut r)?;
+        let fp_format = detect_fp_format(bias)
+            .unwrap_or_else(|| { eprintln!("Compression bias is not the usual value of 100, or system file uses unrecognized floating-point format."); endianness });
+        let bias: f64 = fp_format.parse(bias);
+
+        let mut d = Dissector {
+            filename,
+            r,
+            compression,
+            endianness,
+            fp_format,
+            bias,
+            n_variable_records: 0,
+            n_variables: 0,
+            var_widths: Vec::new(),
+        };
+
+        let creation_date: [u8; 9] = read_bytes(&mut d.r)?;
+        let creation_time: [u8; 8] = read_bytes(&mut d.r)?;
+        let file_label: [u8; 64] = read_bytes(&mut d.r)?;
+        let mut file_label = trim_end(Vec::from(file_label), b' ');
+        d.r.seek_relative(3)?;
+
+        println!("File header record:");
+        println!("{:>17}: {}", "Product name", String::from_utf8_lossy(&eye_catcher));
+        println!("{:>17}: {}", "Layout code", layout_code);
+        println!("{:>17}: {} ({})", "Compressed", compressed, match compression {
+            None => "no compression",
+            Some(Compression::Simple) => "simple compression",
+            Some(Compression::ZLib) => "ZLIB compression",
+        });
+        println!("{:>17}: {}", "Weight index", weight_index);
+        println!("{:>17}: {}", "Number of cases", n_cases);
+        println!("{:>17}: {}", "Compression bias", bias);
+        println!("{:>17}: {}", "Creation date", String::from_utf8_lossy(&creation_date));
+        println!("{:>17}: {}", "Creation time", String::from_utf8_lossy(&creation_time));
+        println!("{:>17}: \"{}\"", "File label", String::from_utf8_lossy(&file_label));
+
+        loop {
+            let rec_type: u32 = d.read_swap()?;
+            match rec_type {
+                2 => d.read_variable_record()?,
+                3 => d.read_value_label_record()?,
+                4 => Err(anyhow!("Misplaced type 4 record."))?,
+                999 => break,
+                _ => Err(anyhow!("Unrecognized record type {rec_type}."))?
+            }
+        }
+
+        let pos = d.r.stream_position()?;
+        println!("{:08x}: end-of-dictionary record (first byte of data at {:0x})", pos, pos + 4);
+
+        Ok(d)
+    }
+
+    fn read_variable_record(&mut self) -> Result<()> {
+        self.n_variable_records += 1;
+        println!("{:08x}: variable record {}", self.r.stream_position()?, self.n_variable_records);
+        let width: i32 = self.read_swap()?;
+        let has_variable_label: u32 = self.read_swap()?;
+        let missing_value_code: i32 = self.read_swap()?;
+        let print_format: u32 = self.read_swap()?;
+        let write_format: u32 = self.read_swap()?;
+        let name: [u8; 8] = read_bytes(&mut self.r)?;
+        let name: Vec<u8> = trim_end(Vec::from(name), b'\0');
+
+        if width >= 0 {
+            self.n_variables += 1;
+        }
+        self.var_widths.push(width);
+
+        println!("\tWidth: {width} ({})", match width {
+            _ if width > 0 => "string",
+            _ if width == 0 => "numeric",
+            _ => "long string continuation record"
+        });
+
+        println!("\tVariable label: {has_variable_label}");
+        println!("\tMissing values code: {missing_value_code} ({})",
+                 match missing_value_code {
+                     0 => "no missing values",
+                     1 => "one missing value",
+                     2 => "two missing values",
+                     3 => "three missing values",
+                     -2 => "one missing value range",
+                     -3 => "one missing value, one range",
+                     _ => "bad value"
+                 });
+        for (which, format) in [("Print", print_format),
+                                ("Worite", write_format)] {
+            let type_ = format_name(format >> 16);
+            let w = (format >> 8) & 0xff;
+            let d = format & 0xff;
+            println!("\t{which} format: {format:06x} ({type_}{w}.{d})");
+        }
+        println!("\tName: {}", String::from_utf8_lossy(&name));
+
+        // Read variable label.
+        match has_variable_label {
+            0 => (),
+            1 => {
+                let offset = self.r.stream_position()?;
+                let len: u32 = self.read_swap()?;
+                let read_len = len.min(65535) as usize;
+                let label = read_vec(&mut self.r, read_len)?;
+                println!("\t{offset:08x} Variable label: \"{}\"", String::from_utf8_lossy(&label));
+
+                self.r.seek_relative((round_up(len, 4) - len).into())?;
+            },
+            _ => Err(anyhow!("Variable label indicator field is not 0 or 1."))?,
+        };
+
+        // Read missing values.
+        if missing_value_code != 0 {
+            print!("\t{:08x} Missing values:", self.r.stream_position()?);
+            if width == 0 {
+                let (has_range, n_individual) = match missing_value_code {
+                    -3 => (true, 1),
+                    -2 => (true, 0),
+                    1 | 2 | 3 => (false, missing_value_code),
+                    _ => Err(anyhow!("Numeric missing value indicator field is not -3, -2, 0, 1, 2, or 3."))?,
+                };
+                if has_range {
+                    let low: f64 = self.read_swap()?;
+                    let high: f64 = self.read_swap()?;
+                    print!(" {low}...{high}");
+                }
+                for _i in 0..n_individual {
+                    let value: f64 = self.read_swap()?;
+                    print!(" {value}");
+                }
+            } else if width > 0 {
+                if missing_value_code < 1 || missing_value_code > 3 {
+                    Err(anyhow!("String missing value indicator field is not 0, 1, 2, or 3."))?;
+                }
+                for _i in 0..missing_value_code {
+                    let string: [u8; 8] = read_bytes(&mut self.r)?;
+                    let string: Vec<u8> = trim_end(Vec::from(string), b'\0');
+                    println!(" {}", String::from_utf8_lossy(&string));
+                }
+            }
+            println!();
+        }
+
+        Ok(())
+    }
+
+    fn read_value_label_record(&mut self) -> Result<()> {
+        println!("{:08x}: value labels record", self.r.stream_position()?);
+
+        let n_labels: u32 = self.read_swap()?;
+        for _i in 0..n_labels {
+            let raw: [u8; 8] = read_bytes(&mut self.r)?;
+            let label_len: u8 = self.read_swap()?;
+            let padded_len = round_up(label_len as usize + 1, 8);
+
+            let mut label = read_vec(&mut self.r, padded_len)?;
+            label.truncate(label_len as usize);
+            print
+        }
+
+        Ok(())
+    }
+}
author	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 19 Mar 2023 00:50:19 +0000 (17:50 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 19 Mar 2023 00:50:36 +0000 (17:50 -0700)
rust/.gitignore	[new file with mode: 0644]	patch \| blob
rust/Cargo.lock	[new file with mode: 0644]	patch \| blob
rust/Cargo.toml	[new file with mode: 0644]	patch \| blob
rust/src/main.rs	[new file with mode: 0644]	patch \| blob