From e8d43edf3da5afeddaaddda8bb89eecc710c41a7 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 16 Apr 2023 16:38:50 -0700 Subject: [PATCH] rust --- Makefile.am | 1 + rust/Cargo.lock | 53 ++++++++- rust/Cargo.toml | 1 + rust/src/main.rs | 292 +++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 335 insertions(+), 12 deletions(-) diff --git a/Makefile.am b/Makefile.am index 6693874f58..0fdd46bb2c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -160,3 +160,4 @@ mimedir = $(datadir)/mime/packages mime_DATA = org.gnu.pspp.xml EXTRA_DIST += org.gnu.pspp.xml +EXTRA_DIST += rust/Cargo.lock rust/Cargo.toml rust/src/main.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index cdefc3b8f5..6ebd99f2c1 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -8,6 +8,17 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -38,7 +49,7 @@ dependencies = [ "is-terminal", "once_cell", "strsim", - "termcolor", + "termcolor 1.2.0", ] [[package]] @@ -90,12 +101,31 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +[[package]] +name = "hexplay" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0962bea6731e28b5a443ba4aa00fe3e4fe7555dadf12012435efb738eeac5898" +dependencies = [ + "atty", + "termcolor 0.3.6", +] + [[package]] name = "io-lifetimes" version = "1.0.5" @@ -112,7 +142,7 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21b6b32576413a8e69b90e952e4a026476040d81017b80445deda5f2d3921857" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.1", "io-lifetimes", "rustix", "windows-sys", @@ -257,6 +287,7 @@ version = "1.0.0" dependencies = [ "anyhow", "clap", + "hexplay", "num", ] @@ -300,6 +331,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "termcolor" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adc4587ead41bf016f11af03e55a624c06568b5a19db4e90fde573d805074f83" +dependencies = [ + "wincolor", +] + [[package]] name = "termcolor" version = "1.2.0" @@ -352,6 +392,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "wincolor" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeb06499a3a4d44302791052df005d5232b927ed1a9658146d842165c4de7767" +dependencies = [ + "winapi", +] + [[package]] name = "windows-sys" version = "0.45.0" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index b44edbe3a4..fe9af6885a 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -7,4 +7,5 @@ authors = [ "Ben Pfaff", "John Darrington" ] [dependencies] anyhow = "1.0.69" clap = { version = "4.1.7", features = ["derive"] } +hexplay = "0.2.1" num = "0.4.0" diff --git a/rust/src/main.rs b/rust/src/main.rs index 41dec4d92f..fbe11c5dfb 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -16,7 +16,9 @@ use anyhow::{anyhow, Result}; use clap::Parser; -use num::Num; +use hexplay::HexViewBuilder; +use num::{Float, Num}; +use std::{fmt, num::FpCategory}; use std::fs::File; use std::io::prelude::*; use std::io::BufReader; @@ -38,7 +40,6 @@ struct Args { fn main() -> Result<()> { let Args { max_cases, files } = Args::parse(); - let error = false; for file in files { Dissector::new(file)?; } @@ -213,6 +214,13 @@ fn trim_end(mut s: Vec, c: u8) -> Vec { s } +fn slice_trim_end(mut s: &[u8], c: u8) -> &[u8] { + while s.last() == Some(&c) { + s = s.split_last().unwrap().1; + } + s +} + fn format_name(type_: u32) -> &'static str { match type_ { 1 => "A", @@ -261,8 +269,70 @@ fn round_up(x: T, y: T) -> T (x + (y - T::one())) / y * y } +struct UntypedValue { + raw: [u8; 8], + endianness: Endianness +} + impl UntypedValue { - fn new( + fn new(raw: [u8; 8], endianness: Endianness) -> UntypedValue { + UntypedValue { raw, endianness } + } +} + +impl fmt::Display for UntypedValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let numeric: f64 = self.endianness.parse(self.raw); + let n_printable = self.raw.iter().take_while(|&&x| x == b' ' || x.is_ascii_graphic()).count(); + let printable_prefix = std::str::from_utf8(&self.raw[0..n_printable]).unwrap(); + write!(f, "{numeric}/\"{printable_prefix}\"") + } +} + +struct HexFloat(T); + +impl fmt::Display for HexFloat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let sign = if self.0.is_sign_negative() { "-" } else { "" }; + match self.0.classify() { + FpCategory::Nan => return write!(f, "NaN"), + FpCategory::Infinite => return write!(f, "{sign}Infinity"), + FpCategory::Zero => return write!(f, "{sign}0.0"), + _ => (), + }; + let (significand, mut exponent, _) = self.0.integer_decode(); + let mut hex_sig = format!("{:x}", significand); + while hex_sig.ends_with('0') { + hex_sig.pop(); + exponent += 4; + } + match hex_sig.len() { + 0 => write!(f, "{sign}0.0"), + 1 => write!(f, "{sign}0x{hex_sig}.0p{exponent}"), + len => write!(f, "{sign}0x{}.{}p{}", + hex_sig.chars().nth(0).unwrap(), + &hex_sig[1..], + exponent + 4 * (len as i16 - 1)) + } + } +} + +#[cfg(test)] +mod hex_float_tests { + use crate::HexFloat; + use num::Float; + + #[test] + fn test() { + assert_eq!(format!("{}", HexFloat(1.0)), "0x1.0p0"); + assert_eq!(format!("{}", HexFloat(123.0)), "0x1.ecp6"); + assert_eq!(format!("{}", HexFloat(1.0 / 16.0)), "0x1.0p-4"); + assert_eq!(format!("{}", HexFloat(f64::infinity())), "Infinity"); + assert_eq!(format!("{}", HexFloat(f64::neg_infinity())), "-Infinity"); + assert_eq!(format!("{}", HexFloat(f64::nan())), "NaN"); + assert_eq!(format!("{}", HexFloat(0.0)), "0.0"); + assert_eq!(format!("{}", HexFloat(f64::neg_zero())), "-0.0"); + } } impl Dissector { @@ -314,8 +384,8 @@ impl Dissector { let creation_date: [u8; 9] = read_bytes(&mut d.r)?; let creation_time: [u8; 8] = read_bytes(&mut d.r)?; let file_label: [u8; 64] = read_bytes(&mut d.r)?; - let mut file_label = trim_end(Vec::from(file_label), b' '); - d.r.seek_relative(3)?; + let file_label = trim_end(Vec::from(file_label), b' '); + d.skip_bytes(3)?; println!("File header record:"); println!("{:>17}: {}", "Product name", String::from_utf8_lossy(&eye_catcher)); @@ -338,6 +408,8 @@ impl Dissector { 2 => d.read_variable_record()?, 3 => d.read_value_label_record()?, 4 => Err(anyhow!("Misplaced type 4 record."))?, + 6 => d.read_document_record()?, + 7 => d.read_extension_record()?, 999 => break, _ => Err(anyhow!("Unrecognized record type {rec_type}."))? } @@ -349,6 +421,49 @@ impl Dissector { Ok(d) } + fn read_extension_record(&mut self) -> Result<()> { + let offset = self.r.stream_position()?; + let subtype: u32 = self.read_swap()?; + let size: u32 = self.read_swap()?; + let count: u32 = self.read_swap()?; + println!("{offset:08x}: Record 7, subtype {subtype}, size={size}, count={count}"); + match subtype { + 3 => self.read_machine_integer_info(size, count), + 4 => self.read_machine_float_info(size, count), + _ => self.read_unknown_extension(subtype, size, count), + } + } + + fn warn(&mut self, s: String) -> Result<()> { + println!("\"{}\" near offset 0x{:08x}: {s}", self.filename, self.r.stream_position()?); + Ok(()) + } + + fn skip_bytes(&mut self, mut n: u64) -> Result<()> { + let mut buf = [0; 1024]; + while n > 0 { + let chunk = u64::min(n, buf.len() as u64); + self.r.read_exact(&mut buf[0..chunk as usize])?; + n -= chunk; + } + Ok(()) + } + + fn read_unknown_extension(&mut self, subtype: u32, size: u32, count: u32) -> Result<()> { + self.warn(format!("Unrecognized record type 7, subtype {subtype}."))?; + if size == 0 || count > 65536 / size { + self.skip_bytes(size as u64 * count as u64)?; + } else if size != 1 { + let mut offset = 0; + for _ in 0..count { + let vec = read_vec(&mut self.r, size as usize)?; + println!("{}", HexViewBuilder::new(&vec).address_offset(offset).finish()); + offset += size as usize; + } + } + Ok(()) + } + fn read_variable_record(&mut self) -> Result<()> { self.n_variable_records += 1; println!("{:08x}: variable record {}", self.r.stream_position()?, self.n_variable_records); @@ -401,7 +516,7 @@ impl Dissector { let label = read_vec(&mut self.r, read_len)?; println!("\t{offset:08x} Variable label: \"{}\"", String::from_utf8_lossy(&label)); - self.r.seek_relative((round_up(len, 4) - len).into())?; + self.skip_bytes((round_up(len, 4) - len).into())?; }, _ => Err(anyhow!("Variable label indicator field is not 0 or 1."))?, }; @@ -421,7 +536,7 @@ impl Dissector { let high: f64 = self.read_swap()?; print!(" {low}...{high}"); } - for _i in 0..n_individual { + for _ in 0..n_individual { let value: f64 = self.read_swap()?; print!(" {value}"); } @@ -429,7 +544,7 @@ impl Dissector { if missing_value_code < 1 || missing_value_code > 3 { Err(anyhow!("String missing value indicator field is not 0, 1, 2, or 3."))?; } - for _i in 0..missing_value_code { + for _ in 0..missing_value_code { let string: [u8; 8] = read_bytes(&mut self.r)?; let string: Vec = trim_end(Vec::from(string), b'\0'); println!(" {}", String::from_utf8_lossy(&string)); @@ -444,17 +559,174 @@ impl Dissector { fn read_value_label_record(&mut self) -> Result<()> { println!("{:08x}: value labels record", self.r.stream_position()?); + // Read the labels. let n_labels: u32 = self.read_swap()?; - for _i in 0..n_labels { + for _ in 0..n_labels { let raw: [u8; 8] = read_bytes(&mut self.r)?; + let value = UntypedValue::new(raw, self.fp_format); let label_len: u8 = self.read_swap()?; let padded_len = round_up(label_len as usize + 1, 8); let mut label = read_vec(&mut self.r, padded_len)?; label.truncate(label_len as usize); - print + let label = String::from_utf8_lossy(&label); + + println!("\t{value}: {label}"); + } + + // Read the type-4 record with the corresponding variable indexes. + let rec_type: u32 = self.read_swap()?; + if rec_type != 4 { + Err(anyhow!("Variable index record (type 4) does not immediately \ + follow value label record (type 3) as it should."))?; + } + + println!("\t{:08x}: apply to variables", self.r.stream_position()?); + let n_vars: u32 = self.read_swap()?; + for _ in 0..n_vars { + let index: u32 = self.read_swap()?; + print!(" {index}"); } + println!(); Ok(()) } + + fn read_document_record(&mut self) -> Result<()> { + println!("{:08x}: document record", self.r.stream_position()?); + let n_lines: u32 = self.read_swap()?; + println!("\t{n_lines} lines of documents"); + + for i in 0..n_lines { + print!("\t{:08x}: ", self.r.stream_position()?); + let line: [u8; 64] = read_bytes(&mut self.r)?; + let line = trim_end(Vec::from(line), b' '); + println!("line {i}: \"{}\"", String::from_utf8_lossy(&line)); + } + Ok(()) + } + + fn read_machine_integer_info(&mut self, size: u32, count: u32) -> Result<()> { + let offset = self.r.stream_position()?; + let version_major: u32 = self.read_swap()?; + let version_minor: u32 = self.read_swap()?; + let version_revision: u32 = self.read_swap()?; + let machine_code: u32 = self.read_swap()?; + let float_representation: u32 = self.read_swap()?; + let compression_code: u32 = self.read_swap()?; + let integer_representation: u32 = self.read_swap()?; + let character_code: u32 = self.read_swap()?; + + println!("{offset:08x}: machine integer info"); + if size != 4 || count != 8 { + Err(anyhow!("Bad size ({size}) or count ({count}) field on record type 7, subtype 3"))?; + } + println!("\tVersion: {version_major}.{version_minor}.{version_revision}"); + println!("\tMachine code: {machine_code}"); + println!("\tFloating point representation: {float_representation} ({})", + match float_representation { + 1 => "IEEE 754", + 2 => "IBM 370", + 3 => "DEC VAX", + _ => "unknown" + }); + println!("\tCompression code: {compression_code}"); + println!("\tEndianness: {integer_representation} ({})", + match integer_representation { + 1 => "big", + 2 => "little", + _ => "unknown" + }); + println!("\tCharacter code: {character_code}"); + Ok(()) + } + + fn read_machine_float_info(&mut self, size: u32, count: u32) -> Result<()> { + let offset = self.r.stream_position()?; + let sysmis: f64 = self.read_swap()?; + let highest: f64 = self.read_swap()?; + let lowest: f64 = self.read_swap()?; + + println!("{offset:08x}: machine float info"); + if size != 4 || count != 8 { + Err(anyhow!("Bad size ({size}) or count ({count}) field on extension 4."))?; + } + + println!("\tsysmis: {sysmis} ({})", HexFloat(sysmis)); + println!("\thighest: {highest} ({})", HexFloat(highest)); + println!("\tlowest: {lowest} ({})", HexFloat(lowest)); + Ok(()) + } + + fn read_variable_sets(&mut self, size: u32, count: u32) -> Result<()> { + println!("{:08x}: variable sets", self.r.stream_position()?); + let mut text = self.open_text_record(size, count)?; + loop { + while text.match_byte(b'\n') { + continue; + } + let set = match text.tokenize(b'=') { + Some(set) => String::from_utf8_lossy(&set).into_owned(), + None => break, + }; + + // Always present even for an empty set. + text.match_byte(b' '); + + match text.tokenize(b'\n') { + None => println!("\tset \"{set}\" is empty"), + Some(variables) => { + println!("\tset \"{set}\" contains \"{}\"", String::from_utf8_lossy(variables).trim_end_matches('\r')); + }, + }; + + } + Ok(()) + } + + fn read_extra_product_info(&mut self, size: u32, count: u32) -> Result<()> { + print!("{:08x}: extra product info", self.r.stream_position()?); + let mut text = self.open_text_record(size, count)?; + + } + + fn open_text_record(&mut self, size: u32, count: u32) -> Result { + let n_bytes = match u32::checked_mul(size, count) { + Some(n) => n, + None => Err(anyhow!("Extension record too large."))? + }; + Ok(TextRecord::new(read_vec(&mut self.r, n_bytes as usize)?)) + } +} + +struct TextRecord { + buffer: Vec, + pos: usize +} + +impl TextRecord { + fn new(buffer: Vec) -> TextRecord { + TextRecord { buffer, pos: 0 } + } + + fn tokenize<'a>(&'a mut self, delimiter: u8) -> Option<&'a [u8]> { + let mut start = self.pos; + while self.pos < self.buffer.len() && self.buffer[self.pos] != delimiter && self.buffer[self.pos] != 0 { + self.pos += 1 + } + if start == self.pos { + None + } else { + Some(&self.buffer[start..self.pos]) + } + } + + fn match_byte(&mut self, c: u8) -> bool { + if self.pos < self.buffer.len() && self.buffer[self.pos] == c { + self.pos += 1; + true + } else { + false + } + } } -- 2.30.2