From: Ben Pfaff Date: Sun, 23 Jul 2023 05:48:28 +0000 (-0700) Subject: I think it's complete. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=757a335120d871099824b9ba4a669b4ba73bfe66;p=pspp I think it's complete. --- diff --git a/rust/src/hexfloat.rs b/rust/src/hexfloat.rs new file mode 100644 index 0000000000..b885fb2266 --- /dev/null +++ b/rust/src/hexfloat.rs @@ -0,0 +1,52 @@ +use num::Float; +use std::{num::FpCategory, fmt::{Display, Formatter, Result}}; + +pub struct HexFloat(pub T); + +impl Display for HexFloat { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + let sign = if self.0.is_sign_negative() { "-" } else { "" }; + match self.0.classify() { + FpCategory::Nan => return write!(f, "NaN"), + FpCategory::Infinite => return write!(f, "{sign}Infinity"), + FpCategory::Zero => return write!(f, "{sign}0.0"), + _ => (), + }; + let (significand, mut exponent, _) = self.0.integer_decode(); + let mut hex_sig = format!("{:x}", significand); + while hex_sig.ends_with('0') { + hex_sig.pop(); + exponent += 4; + } + match hex_sig.len() { + 0 => write!(f, "{sign}0.0"), + 1 => write!(f, "{sign}0x{hex_sig}.0p{exponent}"), + len => write!( + f, + "{sign}0x{}.{}p{}", + hex_sig.chars().next().unwrap(), + &hex_sig[1..], + exponent + 4 * (len as i16 - 1) + ), + } + } +} + +#[cfg(test)] +mod hex_float_tests { + use crate::HexFloat; + use num::Float; + + #[test] + fn test() { + assert_eq!(format!("{}", HexFloat(1.0)), "0x1.0p0"); + assert_eq!(format!("{}", HexFloat(123.0)), "0x1.ecp6"); + assert_eq!(format!("{}", HexFloat(1.0 / 16.0)), "0x1.0p-4"); + assert_eq!(format!("{}", HexFloat(f64::infinity())), "Infinity"); + assert_eq!(format!("{}", HexFloat(f64::neg_infinity())), "-Infinity"); + assert_eq!(format!("{}", HexFloat(f64::nan())), "NaN"); + assert_eq!(format!("{}", HexFloat(0.0)), "0.0"); + assert_eq!(format!("{}", HexFloat(f64::neg_zero())), "-0.0"); + } +} + diff --git a/rust/src/main.rs b/rust/src/main.rs index 9c92b5515d..7ee84de9ce 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -1,5 +1,3 @@ -#![allow(unused_variables)] -#![allow(dead_code)] /* PSPP - a program for statistical analysis. * Copyright (C) 2023 Free Software Foundation, Inc. * @@ -20,14 +18,19 @@ use anyhow::{anyhow, Result}; use clap::Parser; use hexplay::HexView; use hexplay::HexViewBuilder; -use num::{Float, Num}; +use num::Num; use std::cmp::Ordering; +use std::collections::VecDeque; +use std::fmt; use std::fs::File; use std::io::prelude::*; use std::io::BufReader; +use std::io::ErrorKind; use std::path::{Path, PathBuf}; use std::str; -use std::{fmt, num::FpCategory}; + +mod hexfloat; +use hexfloat::HexFloat; /// A utility to dissect SPSS system files. #[derive(Parser, Debug)] @@ -46,7 +49,7 @@ fn main() -> Result<()> { let Args { max_cases, files } = Args::parse(); for file in files { - Dissector::new(file)?; + Dissector::new(file, max_cases)?; } Ok(()) } @@ -156,13 +159,18 @@ trait ReadSwap { fn read_swap(&mut self) -> Result; } +impl ReadSwap for Dissector { + fn read_swap(&mut self) -> Result { + Ok(self.endianness.parse(read_bytes(&mut self.r)?)) + } +} impl ReadSwap for Dissector { fn read_swap(&mut self) -> Result { Ok(self.endianness.parse(read_bytes(&mut self.r)?)) } } -impl ReadSwap for Dissector { - fn read_swap(&mut self) -> Result { +impl ReadSwap for Dissector { + fn read_swap(&mut self) -> Result { Ok(self.endianness.parse(read_bytes(&mut self.r)?)) } } @@ -182,7 +190,6 @@ impl ReadSwap for Dissector { struct Dissector { filename: String, r: BufReader, - compression: Option, endianness: Endianness, fp_format: Endianness, bias: f64, @@ -218,13 +225,6 @@ fn trim_end(mut s: Vec, c: u8) -> Vec { s } -fn slice_trim_end(mut s: &[u8], c: u8) -> &[u8] { - while s.last() == Some(&c) { - s = s.split_last().unwrap().1; - } - s -} - fn format_name(type_: u32) -> &'static str { match type_ { 1 => "A", @@ -296,57 +296,8 @@ impl fmt::Display for UntypedValue { } } -struct HexFloat(T); - -impl fmt::Display for HexFloat { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let sign = if self.0.is_sign_negative() { "-" } else { "" }; - match self.0.classify() { - FpCategory::Nan => return write!(f, "NaN"), - FpCategory::Infinite => return write!(f, "{sign}Infinity"), - FpCategory::Zero => return write!(f, "{sign}0.0"), - _ => (), - }; - let (significand, mut exponent, _) = self.0.integer_decode(); - let mut hex_sig = format!("{:x}", significand); - while hex_sig.ends_with('0') { - hex_sig.pop(); - exponent += 4; - } - match hex_sig.len() { - 0 => write!(f, "{sign}0.0"), - 1 => write!(f, "{sign}0x{hex_sig}.0p{exponent}"), - len => write!( - f, - "{sign}0x{}.{}p{}", - hex_sig.chars().next().unwrap(), - &hex_sig[1..], - exponent + 4 * (len as i16 - 1) - ), - } - } -} - -#[cfg(test)] -mod hex_float_tests { - use crate::HexFloat; - use num::Float; - - #[test] - fn test() { - assert_eq!(format!("{}", HexFloat(1.0)), "0x1.0p0"); - assert_eq!(format!("{}", HexFloat(123.0)), "0x1.ecp6"); - assert_eq!(format!("{}", HexFloat(1.0 / 16.0)), "0x1.0p-4"); - assert_eq!(format!("{}", HexFloat(f64::infinity())), "Infinity"); - assert_eq!(format!("{}", HexFloat(f64::neg_infinity())), "-Infinity"); - assert_eq!(format!("{}", HexFloat(f64::nan())), "NaN"); - assert_eq!(format!("{}", HexFloat(0.0)), "0.0"); - assert_eq!(format!("{}", HexFloat(f64::neg_zero())), "-0.0"); - } -} - impl Dissector { - fn new>(filename: P) -> Result { + fn new>(filename: P, max_cases: usize) -> Result { let mut r = BufReader::new(File::open(&filename)?); let filename = filename.as_ref().to_string_lossy().into_owned(); let rec_type: [u8; 4] = read_bytes(&mut r)?; @@ -384,7 +335,6 @@ impl Dissector { let mut d = Dissector { filename, r, - compression, endianness, fp_format, bias, @@ -455,9 +405,183 @@ impl Dissector { pos + 4 ); + match compression { + Some(Compression::Simple) => { + if max_cases > 0 { + d.read_simple_compressed_data(max_cases)?; + } + } + Some(Compression::ZLib) => d.read_zlib_compressed_data()?, + None => (), + } + Ok(d) } + fn read_simple_compressed_data(&mut self, max_cases: usize) -> Result<()> { + let _: i32 = self.read_swap()?; + println!("\n{:08x}: compressed data:", self.r.stream_position()?); + + const N_OPCODES: usize = 8; + let mut opcodes = VecDeque::::with_capacity(8); + let mut opcode_ofs = 0; + for case_num in 0..max_cases { + println!( + "{:08x}: case {case_num}'s uncompressible data begins", + self.r.stream_position()? + ); + let mut i = 0; + while i < self.var_widths.len() { + let width = self.var_widths[i]; + + let opcode_idx = N_OPCODES - opcodes.len(); + let Some(opcode) = opcodes.pop_back() else { + opcode_ofs = self.r.stream_position()?; + let mut new_opcodes = [0; N_OPCODES]; + if let Err(error) = self.r.read_exact(&mut new_opcodes) { + if i == 0 && error.kind() == ErrorKind::UnexpectedEof { + return Ok(()); + } else { + return Err(error.into()); + } + }; + opcodes.extend(new_opcodes.into_iter()); + continue; + }; + + print!( + "{:08x}: variable {i}: opcode {opcode}: ", + opcode_ofs + opcode_idx as u64 + ); + match opcode { + 0 => println!("ignored padding"), + 252 => { + println!("end of data"); + break; + } + 253 => { + let raw: [u8; 8] = read_bytes(&mut self.r)?; + let value = UntypedValue::new(raw, self.fp_format); + println!("uncompressible data: {value}"); + i += 1; + } + 254 => { + print!("spaces"); + if width == 0 { + print!(", but this is a numeric variable"); + } + println!(); + i += 1; + } + 255 => { + print!("SYSMIS"); + if width != 0 { + print!(", but this is a string variable (width={width})"); + } + println!(); + i += 1; + } + _ => { + print!("{}", opcode as f64 - self.bias); + if width != 0 { + print!(", but this is a string variable (width={width})"); + } + println!(); + i += 1; + } + } + } + } + Ok(()) + } + + fn read_zlib_compressed_data(&mut self) -> Result<()> { + let _: i32 = self.read_swap()?; + let ofs = self.r.stream_position()?; + println!("\n{ofs:08x}: ZLIB compressed data header:"); + + let this_ofs: u64 = self.read_swap()?; + let next_ofs: u64 = self.read_swap()?; + let next_len: u64 = self.read_swap()?; + + println!("\theader_ofs: {this_ofs:#x}"); + if this_ofs != ofs { + println!("\t\t(Expected {ofs:#x}.)"); + } + println!("\ttrailer_ofs: {next_ofs:#x}"); + println!("\ttrailer_len: {next_len}"); + if next_len < 24 || next_len % 24 != 0 { + println!("\t\t(Trailer length is not positive multiple of 24.)"); + } + + let zlib_data_len = next_ofs - (ofs + 8 * 3); + println!( + "\n{:08x}: {zlib_data_len:#x} bytes of ZLIB compressed data", + ofs + 8 * 3 + ); + + self.skip_bytes(zlib_data_len)?; + + println!("\n{next_ofs:08x}: ZLIB trailer fixed header"); + let bias: u64 = self.read_swap()?; + let zero: u64 = self.read_swap()?; + let block_size: u32 = self.read_swap()?; + let n_blocks: u32 = self.read_swap()?; + println!("\tbias: {bias}"); + println!("\tzero: {zero:#x}"); + if zero != 0 { + println!("\t\t(Expected 0.)"); + } + println!("\tblock size: {block_size:#x}"); + if block_size != 0x3ff000 { + println!("\t\t(Expected 0x3ff000.)"); + } + println!("\tn_blocks: {n_blocks}"); + if n_blocks as u64 != next_len / 24 - 1 { + println!("\t\t(Expected {}.)", next_len / 24 - 1); + } + + let mut expected_uncmp_ofs = ofs; + let mut expected_cmp_ofs = ofs + 24; + for i in 1..=n_blocks { + let blockinfo_ofs = self.r.stream_position()?; + let uncompressed_ofs: u64 = self.read_swap()?; + let compressed_ofs: u64 = self.read_swap()?; + let uncompressed_size: u32 = self.read_swap()?; + let compressed_size: u32 = self.read_swap()?; + + println!("\n{blockinfo_ofs:08x}: ZLIB block descriptor {i}"); + + println!("\tuncompressed_ofs: {uncompressed_ofs:#x}"); + if uncompressed_ofs != expected_uncmp_ofs { + println!("\t\t(Expected {ofs:#x}.)"); + } + + println!("\tcompressed_ofs: {compressed_ofs:#x}"); + if compressed_ofs != expected_cmp_ofs { + println!("\t\t(Expected {expected_cmp_ofs:#x}.)"); + } + + println!("\tuncompressed_size: {uncompressed_size:#x}"); + if i < n_blocks && uncompressed_size != block_size { + println!("\t\t(Expected {block_size:#x}.)"); + } + + println!("\tcompressed_size: {compressed_size:#x}"); + if i == n_blocks && compressed_ofs.checked_add(compressed_size as u64) != Some(next_ofs) + { + println!( + "\t\t(This was expected to be {:#x}.)", + next_ofs - compressed_size as u64 + ); + } + + expected_uncmp_ofs += uncompressed_size as u64; + expected_cmp_ofs += uncompressed_size as u64; + } + Ok(()) + } + fn read_extension_record(&mut self) -> Result<()> { let offset = self.r.stream_position()?; let subtype: u32 = self.read_swap()?; @@ -475,6 +599,7 @@ impl Dissector { 7 | 19 => self.read_mrsets(size, count), 10 => self.read_extra_product_info(size, count), 11 => self.read_display_parameters(size, count), + 13 => self.read_long_string_map(size, count), _ => self.read_unknown_extension(subtype, size, count), } } @@ -782,6 +907,7 @@ impl Dissector { let Some(name) = text.tokenize(b'=') else { break; }; + let name = Vec::from(name); let (mrset, cat_label_from_counted_values, label_from_var_label) = if text .match_byte(b'C') @@ -827,17 +953,45 @@ impl Dissector { }; let counted_value = if mrset == MrSet::MD { - Some(text.parse_counted_string()?) - } else { None }; + Some(Vec::from(text.parse_counted_string()?)) + } else { + None + }; - let label = text.parse_counted_string()?; + let label = Vec::from(text.parse_counted_string()?); let variables = text.tokenize(b'\n'); - print!("\t\"{}\": multiple {} set", - String::from_utf8_lossy(name), - if mrset == MrSet::MC { "category" } else { "dichotomy" }); - + print!( + "\t\"{}\": multiple {} set", + String::from_utf8_lossy(&name), + if mrset == MrSet::MC { + "category" + } else { + "dichotomy" + } + ); + if let Some(counted_value) = counted_value { + print!( + ", counted value \"{}\"", + String::from_utf8_lossy(&counted_value) + ); + } + if cat_label_from_counted_values { + println!(", category labels from counted values"); + } + if label != b"" { + print!(", label \"{}\"", String::from_utf8_lossy(&label)); + } + if label_from_var_label { + print!(", label from variable label"); + } + if let Some(variables) = variables { + print!(", variables \"{}\"", String::from_utf8_lossy(variables)); + } else { + print!("no variables"); + } + println!(); } Ok(()) } @@ -899,12 +1053,31 @@ impl Dissector { Ok(()) } - fn open_text_record(&mut self, size: u32, count: u32) -> Result { - let n_bytes = match u32::checked_mul(size, count) { - Some(n) => n, - None => Err(anyhow!("Extension record too large."))?, + fn read_long_string_map(&mut self, size: u32, count: u32) -> Result<()> { + print!( + "{:08x}: very long strings (variable => length)", + self.r.stream_position()? + ); + let mut text = self.open_text_record(size, count)?; + while let Some((var, length)) = text.read_variable_to_value_pair() { + println!( + "\t{} => {}", + String::from_utf8_lossy(&var), + String::from_utf8_lossy(&length) + ); + } + Ok(()) + } + + fn read_text_record(&mut self, size: u32, count: u32) -> Result> { + let Some(n_bytes) = u32::checked_mul(size, count) else { + Err(anyhow!("Extension record too large."))? }; - Ok(TextRecord::new(read_vec(&mut self.r, n_bytes as usize)?)) + read_vec(&mut self.r, n_bytes as usize) + } + + fn open_text_record(&mut self, size: u32, count: u32) -> Result { + Ok(TextRecord::new(self.read_text_record(size, count)?)) } } @@ -996,8 +1169,11 @@ impl TextRecord { } let Some((start, end)) = self.get_n_bytes(length) else { - Err(anyhow!("{length}-byte string starting at offset {} exceeds record length {}", - self.pos, self.buffer.len()))? + Err(anyhow!( + "{length}-byte string starting at offset {} exceeds record length {}", + self.pos, + self.buffer.len() + ))? }; if !self.match_byte(b' ') { Err(anyhow!( @@ -1008,4 +1184,12 @@ impl TextRecord { } Ok(&self.buffer[start..end]) } + + fn read_variable_to_value_pair(&mut self) -> Option<(Vec, Vec)> { + let key = self.tokenize(b'=')?.into(); + let value = self.tokenize(b'\t')?.into(); + + while self.match_byte(b'\t') || self.match_byte(b'\0') {} + Some((key, value)) + } }