From: Ben Pfaff Date: Sun, 6 Aug 2023 19:08:19 +0000 (-0700) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=559e5e5035fd98393beac3ddfd70d08dc2134c23;p=pspp work --- diff --git a/rust/src/main.rs b/rust/src/main.rs index 5da01dd024..0bb33bbbcc 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -14,26 +14,14 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -use anyhow::{anyhow, Result}; +use anyhow::{Result}; use clap::Parser; -use hexplay::HexView; -use hexplay::HexViewBuilder; -use num::Num; -use std::cmp::Ordering; -use std::collections::VecDeque; -use std::fmt; +use pspp::raw::Reader; use std::fs::File; -use std::io::prelude::*; use std::io::BufReader; -use std::io::ErrorKind; use std::path::{Path, PathBuf}; use std::str; -mod hexfloat; -use hexfloat::HexFloat; - -const ID_MAX_LEN: u32 = 64; - /// A utility to dissect SPSS system files. #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] @@ -48,1314 +36,20 @@ struct Args { } fn main() -> Result<()> { - let Args { max_cases, files } = Args::parse(); + let Args { files, .. } = Args::parse(); for file in files { - Dissector::new(file, max_cases)?; + dissect(&file)?; } Ok(()) } -#[derive(Copy, Clone, Debug)] -enum Compression { - Simple, - ZLib, -} - -#[derive(Copy, Clone, Debug)] -enum Endianness { - BigEndian, - LittleEndian, -} -use Endianness::*; - -trait Parse { - fn parse(self, bytes: [u8; N]) -> T; -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 8]) -> u64 { - match self { - BigEndian => u64::from_be_bytes(bytes), - LittleEndian => u64::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 4]) -> u32 { - match self { - BigEndian => u32::from_be_bytes(bytes), - LittleEndian => u32::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 2]) -> u16 { - match self { - BigEndian => u16::from_be_bytes(bytes), - LittleEndian => u16::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 1]) -> u8 { - match self { - BigEndian => u8::from_be_bytes(bytes), - LittleEndian => u8::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 8]) -> i64 { - match self { - BigEndian => i64::from_be_bytes(bytes), - LittleEndian => i64::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 4]) -> i32 { - match self { - BigEndian => i32::from_be_bytes(bytes), - LittleEndian => i32::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 2]) -> i16 { - match self { - BigEndian => i16::from_be_bytes(bytes), - LittleEndian => i16::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 1]) -> i8 { - match self { - BigEndian => i8::from_be_bytes(bytes), - LittleEndian => i8::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 8]) -> f64 { - match self { - BigEndian => f64::from_be_bytes(bytes), - LittleEndian => f64::from_le_bytes(bytes), - } - } -} - -fn read_bytes(r: &mut BufReader) -> Result<[u8; N]> { - let mut buf = [0; N]; - r.read_exact(&mut buf)?; - Ok(buf) -} - -fn read_vec(r: &mut BufReader, n: usize) -> Result> { - let mut vec = vec![0; n]; - r.read_exact(&mut vec)?; - Ok(vec) -} - -trait ReadSwap { - fn read_swap(&mut self) -> Result; -} - -impl ReadSwap for Dissector { - fn read_swap(&mut self) -> Result { - Ok(self.endianness.parse(read_bytes(&mut self.r)?)) - } -} -impl ReadSwap for Dissector { - fn read_swap(&mut self) -> Result { - Ok(self.endianness.parse(read_bytes(&mut self.r)?)) - } -} -impl ReadSwap for Dissector { - fn read_swap(&mut self) -> Result { - Ok(self.endianness.parse(read_bytes(&mut self.r)?)) - } -} - -impl ReadSwap for Dissector { - fn read_swap(&mut self) -> Result { - Ok(self.endianness.parse(read_bytes(&mut self.r)?)) - } -} - -impl ReadSwap for Dissector { - fn read_swap(&mut self) -> Result { - Ok(self.endianness.parse(read_bytes(&mut self.r)?)) - } -} - -struct Dissector { - filename: String, - r: BufReader, - endianness: Endianness, - fp_format: Endianness, - bias: f64, - n_variable_records: usize, - n_variables: usize, - var_widths: Vec, -} - -fn detect_endianness(layout_code: [u8; 4]) -> Option { - for endianness in [BigEndian, LittleEndian] { - match endianness.parse(layout_code) { - 2 | 3 => return Some(endianness), - _ => (), - } - } - None -} - -fn detect_fp_format(bias: [u8; 8]) -> Option { - for endianness in [BigEndian, LittleEndian] { - let value: f64 = endianness.parse(bias); - if value == 100.0 { - return Some(endianness); - } - } - None -} - -fn trim_end(mut s: Vec, c: u8) -> Vec { - while s.last() == Some(&c) { - s.pop(); - } - s -} - -fn format_name(type_: u32) -> &'static str { - match type_ { - 1 => "A", - 2 => "AHEX", - 3 => "COMMA", - 4 => "DOLLAR", - 5 => "F", - 6 => "IB", - 7 => "PIBHEX", - 8 => "P", - 9 => "PIB", - 10 => "PK", - 11 => "RB", - 12 => "RBHEX", - 15 => "Z", - 16 => "N", - 17 => "E", - 20 => "DATE", - 21 => "TIME", - 22 => "DATETIME", - 23 => "ADATE", - 24 => "JDATE", - 25 => "DTIME", - 26 => "WKDAY", - 27 => "MONTH", - 28 => "MOYR", - 29 => "QYR", - 30 => "WKYR", - 31 => "PCT", - 32 => "DOT", - 33 => "CCA", - 34 => "CCB", - 35 => "CCC", - 36 => "CCD", - 37 => "CCE", - 38 => "EDATE", - 39 => "SDATE", - 40 => "MTIME", - 41 => "YMDHMS", - _ => "invalid", - } -} - -fn round_up(x: T, y: T) -> T { - (x + (y - T::one())) / y * y -} - -struct UntypedValue { - raw: [u8; 8], - endianness: Endianness, -} - -impl UntypedValue { - fn new(raw: [u8; 8], endianness: Endianness) -> UntypedValue { - UntypedValue { raw, endianness } - } -} - -impl fmt::Display for UntypedValue { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let numeric: f64 = self.endianness.parse(self.raw); - let n_printable = self - .raw - .iter() - .take_while(|&&x| x == b' ' || x.is_ascii_graphic()) - .count(); - let printable_prefix = std::str::from_utf8(&self.raw[0..n_printable]).unwrap(); - write!(f, "{numeric}/\"{printable_prefix}\"") - } -} - -impl Dissector { - fn new>(filename: P, max_cases: usize) -> Result { - let mut r = BufReader::new(File::open(&filename)?); - let filename = filename.as_ref().to_string_lossy().into_owned(); - let rec_type: [u8; 4] = read_bytes(&mut r)?; - let zmagic = match &rec_type { - b"$FL2" => false, - b"$FL3" => true, - _ => Err(anyhow!("This is not an SPSS system file."))?, - }; - - let eye_catcher: [u8; 60] = read_bytes(&mut r)?; - let layout_code: [u8; 4] = read_bytes(&mut r)?; - let endianness = detect_endianness(layout_code) - .ok_or_else(|| anyhow!("This is not an SPSS system file."))?; - let layout_code: u32 = endianness.parse(layout_code); - let _nominal_case_size: [u8; 4] = read_bytes(&mut r)?; - let compressed: u32 = endianness.parse(read_bytes(&mut r)?); - let compression = match (zmagic, compressed) { - (false, 0) => None, - (false, 1) => Some(Compression::Simple), - (true, 2) => Some(Compression::ZLib), - _ => Err(anyhow!( - "{} file header has invalid compression value {compressed}.", - if zmagic { "ZSAV" } else { "SAV" } - ))?, - }; - - let weight_index: u32 = endianness.parse(read_bytes(&mut r)?); - let n_cases: u32 = endianness.parse(read_bytes(&mut r)?); - - let bias: [u8; 8] = read_bytes(&mut r)?; - let fp_format = detect_fp_format(bias) - .unwrap_or_else(|| { eprintln!("Compression bias is not the usual value of 100, or system file uses unrecognized floating-point format."); endianness }); - let bias: f64 = fp_format.parse(bias); - - let mut d = Dissector { - filename, - r, - endianness, - fp_format, - bias, - n_variable_records: 0, - n_variables: 0, - var_widths: Vec::new(), - }; - - let creation_date: [u8; 9] = read_bytes(&mut d.r)?; - let creation_time: [u8; 8] = read_bytes(&mut d.r)?; - let file_label: [u8; 64] = read_bytes(&mut d.r)?; - let file_label = trim_end(Vec::from(file_label), b' '); - d.skip_bytes(3)?; - - println!("File header record:"); - println!( - "{:>17}: {}", - "Product name", - String::from_utf8_lossy(&eye_catcher) - ); - println!("{:>17}: {}", "Layout code", layout_code); - println!( - "{:>17}: {} ({})", - "Compressed", - compressed, - match compression { - None => "no compression", - Some(Compression::Simple) => "simple compression", - Some(Compression::ZLib) => "ZLIB compression", - } - ); - println!("{:>17}: {}", "Weight index", weight_index); - println!("{:>17}: {}", "Number of cases", n_cases); - println!("{:>17}: {}", "Compression bias", bias); - println!( - "{:>17}: {}", - "Creation date", - String::from_utf8_lossy(&creation_date) - ); - println!( - "{:>17}: {}", - "Creation time", - String::from_utf8_lossy(&creation_time) - ); - println!( - "{:>17}: \"{}\"", - "File label", - String::from_utf8_lossy(&file_label) - ); - - loop { - let rec_type: u32 = d.read_swap()?; - match rec_type { - 2 => d.read_variable_record()?, - 3 => d.read_value_label_record()?, - 4 => Err(anyhow!("Misplaced type 4 record."))?, - 6 => d.read_document_record()?, - 7 => d.read_extension_record()?, - 999 => break, - _ => Err(anyhow!("Unrecognized record type {rec_type}."))?, - } - } - - let pos = d.r.stream_position()?; - println!( - "{:08x}: end-of-dictionary record (first byte of data at {:0x})", - pos, - pos + 4 - ); - - match compression { - Some(Compression::Simple) => { - if max_cases > 0 { - d.read_simple_compressed_data(max_cases)?; - } - } - Some(Compression::ZLib) => d.read_zlib_compressed_data()?, - None => (), - } - - Ok(d) - } - - fn read_simple_compressed_data(&mut self, max_cases: usize) -> Result<()> { - let _: i32 = self.read_swap()?; - println!("\n{:08x}: compressed data:", self.r.stream_position()?); - - const N_OPCODES: usize = 8; - let mut opcodes = VecDeque::::with_capacity(8); - let mut opcode_ofs = 0; - for case_num in 0..max_cases { - println!( - "{:08x}: case {case_num}'s uncompressible data begins", - self.r.stream_position()? - ); - let mut i = 0; - while i < self.var_widths.len() { - let width = self.var_widths[i]; - - let opcode_idx = N_OPCODES - opcodes.len(); - let Some(opcode) = opcodes.pop_back() else { - opcode_ofs = self.r.stream_position()?; - let mut new_opcodes = [0; N_OPCODES]; - if let Err(error) = self.r.read_exact(&mut new_opcodes) { - if i == 0 && error.kind() == ErrorKind::UnexpectedEof { - return Ok(()); - } else { - return Err(error.into()); - } - }; - opcodes.extend(new_opcodes.into_iter()); - continue; - }; - - print!( - "{:08x}: variable {i}: opcode {opcode}: ", - opcode_ofs + opcode_idx as u64 - ); - match opcode { - 0 => println!("ignored padding"), - 252 => { - println!("end of data"); - break; - } - 253 => { - let raw: [u8; 8] = read_bytes(&mut self.r)?; - let value = UntypedValue::new(raw, self.fp_format); - println!("uncompressible data: {value}"); - i += 1; - } - 254 => { - print!("spaces"); - if width == 0 { - print!(", but this is a numeric variable"); - } - println!(); - i += 1; - } - 255 => { - print!("SYSMIS"); - if width != 0 { - print!(", but this is a string variable (width={width})"); - } - println!(); - i += 1; - } - _ => { - print!("{}", opcode as f64 - self.bias); - if width != 0 { - print!(", but this is a string variable (width={width})"); - } - println!(); - i += 1; - } - } - } - } - Ok(()) - } - - fn read_zlib_compressed_data(&mut self) -> Result<()> { - let _: i32 = self.read_swap()?; - let ofs = self.r.stream_position()?; - println!("\n{ofs:08x}: ZLIB compressed data header:"); - - let this_ofs: u64 = self.read_swap()?; - let next_ofs: u64 = self.read_swap()?; - let next_len: u64 = self.read_swap()?; - - println!("\theader_ofs: {this_ofs:#x}"); - if this_ofs != ofs { - println!("\t\t(Expected {ofs:#x}.)"); - } - println!("\ttrailer_ofs: {next_ofs:#x}"); - println!("\ttrailer_len: {next_len}"); - if next_len < 24 || next_len % 24 != 0 { - println!("\t\t(Trailer length is not positive multiple of 24.)"); - } - - let zlib_data_len = next_ofs - (ofs + 8 * 3); - println!( - "\n{:08x}: {zlib_data_len:#x} bytes of ZLIB compressed data", - ofs + 8 * 3 - ); - - self.skip_bytes(zlib_data_len)?; - - println!("\n{next_ofs:08x}: ZLIB trailer fixed header"); - let bias: u64 = self.read_swap()?; - let zero: u64 = self.read_swap()?; - let block_size: u32 = self.read_swap()?; - let n_blocks: u32 = self.read_swap()?; - println!("\tbias: {bias}"); - println!("\tzero: {zero:#x}"); - if zero != 0 { - println!("\t\t(Expected 0.)"); - } - println!("\tblock size: {block_size:#x}"); - if block_size != 0x3ff000 { - println!("\t\t(Expected 0x3ff000.)"); - } - println!("\tn_blocks: {n_blocks}"); - if n_blocks as u64 != next_len / 24 - 1 { - println!("\t\t(Expected {}.)", next_len / 24 - 1); - } - - let mut expected_uncmp_ofs = ofs; - let mut expected_cmp_ofs = ofs + 24; - for i in 1..=n_blocks { - let blockinfo_ofs = self.r.stream_position()?; - let uncompressed_ofs: u64 = self.read_swap()?; - let compressed_ofs: u64 = self.read_swap()?; - let uncompressed_size: u32 = self.read_swap()?; - let compressed_size: u32 = self.read_swap()?; - - println!("\n{blockinfo_ofs:08x}: ZLIB block descriptor {i}"); - - println!("\tuncompressed_ofs: {uncompressed_ofs:#x}"); - if uncompressed_ofs != expected_uncmp_ofs { - println!("\t\t(Expected {ofs:#x}.)"); - } - - println!("\tcompressed_ofs: {compressed_ofs:#x}"); - if compressed_ofs != expected_cmp_ofs { - println!("\t\t(Expected {expected_cmp_ofs:#x}.)"); - } - - println!("\tuncompressed_size: {uncompressed_size:#x}"); - if i < n_blocks && uncompressed_size != block_size { - println!("\t\t(Expected {block_size:#x}.)"); - } - - println!("\tcompressed_size: {compressed_size:#x}"); - if i == n_blocks && compressed_ofs.checked_add(compressed_size as u64) != Some(next_ofs) - { - println!( - "\t\t(This was expected to be {:#x}.)", - next_ofs - compressed_size as u64 - ); - } - - expected_uncmp_ofs += uncompressed_size as u64; - expected_cmp_ofs += uncompressed_size as u64; - } - Ok(()) - } - - fn read_extension_record(&mut self) -> Result<()> { - let offset = self.r.stream_position()?; - let subtype: u32 = self.read_swap()?; - let size: u32 = self.read_swap()?; - let count: u32 = self.read_swap()?; - println!("{offset:08x}: Record 7, subtype {subtype}, size={size}, count={count}"); - if size.checked_mul(count).is_none() { - Err(anyhow!("{size} * {count} exceeds {}", u32::MAX))? - } - match subtype { - 3 => self.read_machine_integer_info(size, count), - 4 => self.read_machine_float_info(size, count), - 5 => self.read_variable_sets(size, count), - 6 => { - // DATE variable information. We don't use it yet, but we should. - Ok(()) - } - 7 | 19 => self.read_mrsets(size, count), - 10 => self.read_extra_product_info(size, count), - 11 => self.read_display_parameters(size, count), - 13 => self.read_long_var_name_map(size, count), - 14 => self.read_long_string_map(size, count), - 16 => self.read_ncases64(size, count), - 17 => self.read_datafile_attributes(size, count), - 18 => self.read_variable_attributes(size, count), - 20 => self.read_character_encoding(size, count), - 21 => self.read_long_string_value_labels(size, count), - 22 => self.read_long_string_missing_values(size, count), - _ => self.read_unknown_extension(subtype, size, count), - } - } - - fn warn(&mut self, s: String) -> Result<()> { - println!( - "\"{}\" near offset 0x{:08x}: {s}", - self.filename, - self.r.stream_position()? - ); - Ok(()) - } - - fn skip_bytes(&mut self, mut n: u64) -> Result<()> { - let mut buf = [0; 1024]; - while n > 0 { - let chunk = u64::min(n, buf.len() as u64); - self.r.read_exact(&mut buf[0..chunk as usize])?; - n -= chunk; - } - Ok(()) - } - - fn read_unknown_extension(&mut self, subtype: u32, size: u32, count: u32) -> Result<()> { - self.warn(format!("Unrecognized record type 7, subtype {subtype}."))?; - if size == 0 || count > 65536 / size { - self.skip_bytes(size as u64 * count as u64)?; - } else if size != 1 { - let mut offset = 0; - for _ in 0..count { - let vec = read_vec(&mut self.r, size as usize)?; - println!( - "{}", - HexViewBuilder::new(&vec).address_offset(offset).finish() - ); - offset += size as usize; - } - } - Ok(()) - } - - fn read_variable_record(&mut self) -> Result<()> { - self.n_variable_records += 1; - println!( - "{:08x}: variable record {}", - self.r.stream_position()?, - self.n_variable_records - ); - let width: i32 = self.read_swap()?; - let has_variable_label: u32 = self.read_swap()?; - let missing_value_code: i32 = self.read_swap()?; - let print_format: u32 = self.read_swap()?; - let write_format: u32 = self.read_swap()?; - let name: [u8; 8] = read_bytes(&mut self.r)?; - let name: Vec = trim_end(Vec::from(name), b'\0'); - - if width >= 0 { - self.n_variables += 1; - } - self.var_widths.push(width); - - println!( - "\tWidth: {width} ({})", - match width { - _ if width > 0 => "string", - _ if width == 0 => "numeric", - _ => "long string continuation record", - } - ); - - println!("\tVariable label: {has_variable_label}"); - println!( - "\tMissing values code: {missing_value_code} ({})", - match missing_value_code { - 0 => "no missing values", - 1 => "one missing value", - 2 => "two missing values", - 3 => "three missing values", - -2 => "one missing value range", - -3 => "one missing value, one range", - _ => "bad value", - } - ); - for (which, format) in [("Print", print_format), ("Worite", write_format)] { - let type_ = format_name(format >> 16); - let w = (format >> 8) & 0xff; - let d = format & 0xff; - println!("\t{which} format: {format:06x} ({type_}{w}.{d})"); - } - println!("\tName: {}", String::from_utf8_lossy(&name)); - - // Read variable label. - match has_variable_label { - 0 => (), - 1 => { - let offset = self.r.stream_position()?; - let len: u32 = self.read_swap()?; - let read_len = len.min(65535) as usize; - let label = read_vec(&mut self.r, read_len)?; - println!( - "\t{offset:08x} Variable label: \"{}\"", - String::from_utf8_lossy(&label) - ); - - self.skip_bytes((round_up(len, 4) - len).into())?; - } - _ => Err(anyhow!("Variable label indicator field is not 0 or 1."))?, - }; - - // Read missing values. - if missing_value_code != 0 { - print!("\t{:08x} Missing values:", self.r.stream_position()?); - match width.cmp(&0) { - Ordering::Equal => { - let (has_range, n_individual) = match missing_value_code { - -3 => (true, 1), - -2 => (true, 0), - 1 | 2 | 3 => (false, missing_value_code), - _ => Err(anyhow!( - "Numeric missing value indicator field is not -3, -2, 0, 1, 2, or 3." - ))?, - }; - if has_range { - let low: f64 = self.read_swap()?; - let high: f64 = self.read_swap()?; - print!(" {low}...{high}"); - } - for _ in 0..n_individual { - let value: f64 = self.read_swap()?; - print!(" {value}"); - } - } - Ordering::Greater => { - if !(0..=3).contains(&missing_value_code) { - Err(anyhow!( - "String missing value indicator field is not 0, 1, 2, or 3." - ))?; - } - for _ in 0..missing_value_code { - let string: [u8; 8] = read_bytes(&mut self.r)?; - let string: Vec = trim_end(Vec::from(string), b'\0'); - println!(" {}", String::from_utf8_lossy(&string)); - } - } - Ordering::Less => (), - } - println!(); - } - - Ok(()) - } - - fn read_value_label_record(&mut self) -> Result<()> { - println!("{:08x}: value labels record", self.r.stream_position()?); - - // Read the labels. - let n_labels: u32 = self.read_swap()?; - for _ in 0..n_labels { - let raw: [u8; 8] = read_bytes(&mut self.r)?; - let value = UntypedValue::new(raw, self.fp_format); - let label_len: u8 = self.read_swap()?; - let padded_len = round_up(label_len as usize + 1, 8); - - let mut label = read_vec(&mut self.r, padded_len)?; - label.truncate(label_len as usize); - let label = String::from_utf8_lossy(&label); - - println!("\t{value}: {label}"); - } - - // Read the type-4 record with the corresponding variable indexes. - let rec_type: u32 = self.read_swap()?; - if rec_type != 4 { - Err(anyhow!( - "Variable index record (type 4) does not immediately \ - follow value label record (type 3) as it should." - ))?; - } - - println!("\t{:08x}: apply to variables", self.r.stream_position()?); - let n_vars: u32 = self.read_swap()?; - for _ in 0..n_vars { - let index: u32 = self.read_swap()?; - print!(" {index}"); - } - println!(); - - Ok(()) - } - - fn read_document_record(&mut self) -> Result<()> { - println!("{:08x}: document record", self.r.stream_position()?); - let n_lines: u32 = self.read_swap()?; - println!("\t{n_lines} lines of documents"); - - for i in 0..n_lines { - print!("\t{:08x}: ", self.r.stream_position()?); - let line: [u8; 64] = read_bytes(&mut self.r)?; - let line = trim_end(Vec::from(line), b' '); - println!("line {i}: \"{}\"", String::from_utf8_lossy(&line)); - } - Ok(()) - } - - fn read_machine_integer_info(&mut self, size: u32, count: u32) -> Result<()> { - let offset = self.r.stream_position()?; - let version_major: u32 = self.read_swap()?; - let version_minor: u32 = self.read_swap()?; - let version_revision: u32 = self.read_swap()?; - let machine_code: u32 = self.read_swap()?; - let float_representation: u32 = self.read_swap()?; - let compression_code: u32 = self.read_swap()?; - let integer_representation: u32 = self.read_swap()?; - let character_code: u32 = self.read_swap()?; - - println!("{offset:08x}: machine integer info"); - if size != 4 || count != 8 { - Err(anyhow!( - "Bad size ({size}) or count ({count}) field on record type 7, subtype 3" - ))?; - } - println!("\tVersion: {version_major}.{version_minor}.{version_revision}"); - println!("\tMachine code: {machine_code}"); - println!( - "\tFloating point representation: {float_representation} ({})", - match float_representation { - 1 => "IEEE 754", - 2 => "IBM 370", - 3 => "DEC VAX", - _ => "unknown", - } - ); - println!("\tCompression code: {compression_code}"); - println!( - "\tEndianness: {integer_representation} ({})", - match integer_representation { - 1 => "big", - 2 => "little", - _ => "unknown", - } - ); - println!("\tCharacter code: {character_code}"); - Ok(()) - } - - fn read_machine_float_info(&mut self, size: u32, count: u32) -> Result<()> { - let offset = self.r.stream_position()?; - let sysmis: f64 = self.read_swap()?; - let highest: f64 = self.read_swap()?; - let lowest: f64 = self.read_swap()?; - - println!("{offset:08x}: machine float info"); - if size != 4 || count != 8 { - Err(anyhow!( - "Bad size ({size}) or count ({count}) field on extension 4." - ))?; - } - - println!("\tsysmis: {sysmis} ({})", HexFloat(sysmis)); - println!("\thighest: {highest} ({})", HexFloat(highest)); - println!("\tlowest: {lowest} ({})", HexFloat(lowest)); - Ok(()) - } - - fn read_variable_sets(&mut self, size: u32, count: u32) -> Result<()> { - println!("{:08x}: variable sets", self.r.stream_position()?); - let mut text = self.open_text_record(size, count)?; - loop { - while text.match_byte(b'\n') { - continue; - } - let set = match text.tokenize(b'=') { - Some(set) => String::from_utf8_lossy(set).into_owned(), - None => break, - }; - - // Always present even for an empty set. - text.match_byte(b' '); - - match text.tokenize(b'\n') { - None => println!("\tset \"{set}\" is empty"), - Some(variables) => { - println!( - "\tset \"{set}\" contains \"{}\"", - String::from_utf8_lossy(variables).trim_end_matches('\r') - ); - } - }; - } - Ok(()) - } - - // Read record type 7, subtype 7. - fn read_mrsets(&mut self, size: u32, count: u32) -> Result<()> { - print!("{:08x}: multiple response sets", self.r.stream_position()?); - let mut text = self.open_text_record(size, count)?; - loop { - #[derive(PartialEq, Eq)] - enum MrSet { - MC, - MD, - } - - while text.match_byte(b'\n') {} - let Some(name) = text.tokenize(b'=') else { - break; - }; - let name = Vec::from(name); - - let (mrset, cat_label_from_counted_values, label_from_var_label) = if text - .match_byte(b'C') - { - if !text.match_byte(b' ') { - Err(anyhow!( - "missing space following 'C' at offset {} in mrsets record", - text.pos - ))?; - } - (MrSet::MC, false, false) - } else if text.match_byte(b'D') { - (MrSet::MD, false, false) - } else if text.match_byte(b'E') { - if !text.match_byte(b' ') { - Err(anyhow!( - "missing space following 'E' at offset {} in mrsets record", - text.pos - ))?; - } - - let pos = text.pos; - let Some(number) = text.tokenize(b' ') else { - Err(anyhow!( - "Missing label source value following `E' at offset {}u in MRSETS record", - text.pos - ))? - }; - - let label_from_var_label = if number == b"11" { - true - } else if number == b"1" { - false - } else { - Err(anyhow!("Unexpected label source value `{}' following `E' at offset {pos} in MRSETS record", String::from_utf8_lossy(number)))? - }; - (MrSet::MD, true, label_from_var_label) - } else { - Err(anyhow!( - "missing `C', `D', or `E' at offset {} in mrsets record", - text.pos - ))? - }; - - let counted_value = if mrset == MrSet::MD { - Some(Vec::from(text.parse_counted_string()?)) - } else { - None - }; - - let label = Vec::from(text.parse_counted_string()?); - - let variables = text.tokenize(b'\n'); - - print!( - "\t\"{}\": multiple {} set", - String::from_utf8_lossy(&name), - if mrset == MrSet::MC { - "category" - } else { - "dichotomy" - } - ); - if let Some(counted_value) = counted_value { - print!( - ", counted value \"{}\"", - String::from_utf8_lossy(&counted_value) - ); - } - if cat_label_from_counted_values { - println!(", category labels from counted values"); - } - if label != b"" { - print!(", label \"{}\"", String::from_utf8_lossy(&label)); - } - if label_from_var_label { - print!(", label from variable label"); - } - if let Some(variables) = variables { - print!(", variables \"{}\"", String::from_utf8_lossy(variables)); - } else { - print!("no variables"); - } - println!(); - } - Ok(()) - } - - fn read_extra_product_info(&mut self, size: u32, count: u32) -> Result<()> { - print!("{:08x}: extra product info", self.r.stream_position()?); - let text = self.open_text_record(size, count)?; - print_string(&text.buffer); - Ok(()) - } - - fn read_display_parameters(&mut self, size: u32, count: u32) -> Result<()> { - println!( - "{:08x}: variable display parameters", - self.r.stream_position()? - ); - if size != 4 { - Err(anyhow!("Bad size ({size}) on extension 11."))?; - } - let n_vars = self.n_variables; - let includes_width = if count as usize == 3 * n_vars { - true - } else if count as usize == 2 * n_vars { - false - } else { - Err(anyhow!( - "Extension 11 has bad count {count} (for {n_vars} variables)." - ))? - }; - - for i in 0..n_vars { - let measure: u32 = self.read_swap()?; - print!( - "\tVar #{i}: measure={measure} ({})", - match measure { - 1 => "nominal", - 2 => "ordinal", - 3 => "scale", - _ => "invalid", - } - ); - - if includes_width { - let width: u32 = self.read_swap()?; - print!(", width={width}"); - } - - let align: u32 = self.read_swap()?; - println!( - ", align={align} ({})", - match align { - 0 => "left", - 1 => "right", - 2 => "centre", - _ => "invalid", - } - ); - } - Ok(()) - } - - fn read_long_var_name_map(&mut self, size: u32, count: u32) -> Result<()> { - print!( - "{:08x}: long variable names (short => long)", - self.r.stream_position()? - ); - let mut text = self.open_text_record(size, count)?; - while let Some((var, long_name)) = text.read_variable_to_value_pair() { - println!( - "\t{} => {}", - String::from_utf8_lossy(&var), - String::from_utf8_lossy(&long_name) - ); - } - Ok(()) - } - - fn read_long_string_map(&mut self, size: u32, count: u32) -> Result<()> { - print!( - "{:08x}: very long strings (variable => length)", - self.r.stream_position()? - ); - let mut text = self.open_text_record(size, count)?; - while let Some((var, length)) = text.read_variable_to_value_pair() { - println!( - "\t{} => {}", - String::from_utf8_lossy(&var), - String::from_utf8_lossy(&length) - ); - } - Ok(()) - } - - fn read_ncases64(&mut self, size: u32, count: u32) -> Result<()> { - if size != 8 { - Err(anyhow!("Bad size {size} for extended number of cases."))? - } - if count != 2 { - Err(anyhow!("Bad count {count} for extended number of cases."))? - } - let unknown: u64 = self.read_swap()?; - let ncases64: u64 = self.read_swap()?; - print!( - "{:08x}: extended number of cases: unknown={unknown}, ncases64={ncases64}", - self.r.stream_position()? - ); - Ok(()) - } - - fn read_attributes(&mut self, text: &mut TextRecord, variable: &str) -> Result<()> { - loop { - let Some(key) = text.tokenize_string(b'(') else { - break; - }; - for index in 1.. { - let Some(value) = text.tokenize_string(b'\n') else { - Err(anyhow!( - "{variable}: Error parsing attribute value {key}[{index}]" - ))? - }; - if value.starts_with('\'') && value.ends_with('\'') && value.len() >= 2 { - let middle = &value[1..value.len() - 2]; - println!("\t{variable}: {key}[{index}] = \"{middle}\""); - } else { - self.warn(format!( - "{variable}: Attribute value {key}[{index}] is not quoted: {value}" - ))?; - } - if text.match_byte(b')') { - break; - } - } - - if text.match_byte(b'/') { - break; - } - } - Ok(()) - } - - fn read_datafile_attributes(&mut self, size: u32, count: u32) -> Result<()> { - print!("{:08x}: datafile attributes", self.r.stream_position()?); - let mut text = self.open_text_record(size, count)?; - self.read_attributes(&mut text, "datafile")?; - Ok(()) - } - - fn read_variable_attributes(&mut self, size: u32, count: u32) -> Result<()> { - print!("{:08x}: variable attributes", self.r.stream_position()?); - let mut text = self.open_text_record(size, count)?; - loop { - let Some(variable) = text.tokenize_string(b':') else { - break; - }; - self.read_attributes(&mut text, &variable)?; - } - Ok(()) - } - - fn read_character_encoding(&mut self, size: u32, count: u32) -> Result<()> { - let offset = self.r.stream_position()?; - let encoding = read_vec(&mut self.r, (size * count) as usize)?; - println!("{offset:08x}: Character Encoding: {}", String::from_utf8_lossy(&encoding)); - Ok(()) - } - - fn read_long_string_value_labels(&mut self, size: u32, count: u32) -> Result<()> { - let start = self.r.stream_position()?; - - println!("{start:08x}: long string value labels"); - while self.r.stream_position()? - start < (size * count) as u64 { - let position = self.r.stream_position()?; - - let var_name_len: u32 = self.read_swap()?; - if var_name_len > ID_MAX_LEN { - Err(anyhow!("Variable name length in long string value label record ({var_name_len} exceeds {ID_MAX_LEN}-byte limit."))? - } - let var_name = read_vec(&mut self.r, var_name_len as usize)?; - - let width: u32 = self.read_swap()?; - let n_values: u32 = self.read_swap()?; - - println!("\t{position:08x}: {}, width {width}, {n_values} values", - String::from_utf8_lossy(&var_name)); - - for _ in 0..n_values { - let position = self.r.stream_position()?; - let value_length: u32 = self.read_swap()?; - let value = read_vec(&mut self.r, value_length as usize)?; - let label_length: u32 = self.read_swap()?; - let label = read_vec(&mut self.r, value_length as usize)?; - println!("\t\t{position:08x}: \"{}\" ({value_length} bytes) => \"{}\" ({label_length} bytes)", - String::from_utf8_lossy(&value), - String::from_utf8_lossy(&label)); - } - } - Ok(()) - } - - fn read_long_string_missing_values(&mut self, size: u32, count: u32) -> Result<()> { - let start = self.r.stream_position()?; - - println!("{start:08x}: long string missing values"); - while self.r.stream_position()? - start < (size * count) as u64 { - let position = self.r.stream_position()?; - - let var_name_len: u32 = self.read_swap()?; - if var_name_len > ID_MAX_LEN { - Err(anyhow!("Variable name length in long string missing value record ({var_name_len} exceeds {ID_MAX_LEN}-byte limit."))? - } - let var_name = read_vec(&mut self.r, var_name_len as usize)?; - - let n_missing_values: u8 = self.read_swap()?; - let value_length: u32 = self.read_swap()?; - - println!("\t{position:08x}: {}, {n_missing_values}, each {value_length} bytes:", - String::from_utf8_lossy(&var_name)); - - for _ in 0..n_missing_values { - let value = read_vec(&mut self.r, value_length as usize)?; - println!(" \"{}\"", String::from_utf8_lossy(&value)); - } - } - Ok(()) - } - - fn read_text_record(&mut self, size: u32, count: u32) -> Result> { - let Some(n_bytes) = u32::checked_mul(size, count) else { - Err(anyhow!("Extension record too large."))? - }; - read_vec(&mut self.r, n_bytes as usize) - } - - fn open_text_record(&mut self, size: u32, count: u32) -> Result { - Ok(TextRecord::new(self.read_text_record(size, count)?)) - } -} - -fn print_string(s: &[u8]) { - if s.contains(&b'\0') { - println!("{}", HexView::new(s)); - } else { - for &c in s { - match c { - b'\\' => print!("\\\\"), - b'\n' => println!(), - c if (b' '..=b'~').contains(&c) => print!("{}", c as char), - c => print!("\\{:2x}", c), - } - } - } -} - -struct TextRecord { - buffer: Vec, - pos: usize, -} - -impl TextRecord { - fn new(buffer: Vec) -> TextRecord { - TextRecord { buffer, pos: 0 } - } - - fn tokenize(&mut self, delimiter: u8) -> Option<&[u8]> { - let start = self.pos; - while self.pos < self.buffer.len() - && self.buffer[self.pos] != delimiter - && self.buffer[self.pos] != 0 - { - self.pos += 1 - } - if start == self.pos { - None - } else { - Some(&self.buffer[start..self.pos]) - } - } - - fn tokenize_string(&mut self, delimiter: u8) -> Option { - self.tokenize(delimiter) - .map(|s| String::from_utf8_lossy(s).into_owned()) - } - - fn match_byte(&mut self, c: u8) -> bool { - if self.pos < self.buffer.len() && self.buffer[self.pos] == c { - self.pos += 1; - true - } else { - false - } - } - - fn parse_usize(&mut self) -> Result { - let n_digits = self.buffer[self.pos..] - .iter() - .take_while(|c| c.is_ascii_digit()) - .count(); - if n_digits == 0 { - Err(anyhow!("expecting digit at offset {} in record", self.pos))?; - } - let start = self.pos; - self.pos += n_digits; - let end = self.pos; - let digits = str::from_utf8(&self.buffer[start..end]).unwrap(); - let Ok(number) = digits.parse::() else { - Err(anyhow!( - "expecting number in [0,{}] at offset {} in record", - usize::MAX, - self.pos - ))? - }; - self.pos = end; - Ok(number) - } - - fn get_n_bytes(&mut self, n: usize) -> Option<(usize, usize)> { - let start = self.pos; - let Some(end) = start.checked_add(n) else { - return None; - }; - self.pos = end; - Some((start, end)) - } - - fn parse_counted_string(&mut self) -> Result<&[u8]> { - let length = self.parse_usize()?; - if !self.match_byte(b' ') { - Err(anyhow!("expecting space at offset {} in record", self.pos))?; - } - - let Some((start, end)) = self.get_n_bytes(length) else { - Err(anyhow!( - "{length}-byte string starting at offset {} exceeds record length {}", - self.pos, - self.buffer.len() - ))? - }; - if !self.match_byte(b' ') { - Err(anyhow!( - "expecting space at offset {} following {}-byte string", - self.pos, - end - start - ))?; - } - Ok(&self.buffer[start..end]) - } - - fn read_variable_to_value_pair(&mut self) -> Option<(Vec, Vec)> { - let key = self.tokenize(b'=')?.into(); - let value = self.tokenize(b'\t')?.into(); - - while self.match_byte(b'\t') || self.match_byte(b'\0') {} - Some((key, value)) +fn dissect(file_name: &Path) -> Result<()> { + let reader = File::open(file_name)?; + let reader = BufReader::new(reader); + let reader = Reader::new(reader)?; + for record in reader { + println!("{record:?}"); } + Ok(()) } diff --git a/rust/src/raw.rs b/rust/src/raw.rs index f0e8c540c2..ca0596f541 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -3,6 +3,7 @@ use crate::Error; use flate2::read::ZlibDecoder; use num::Integer; +use std::fmt::{Debug, Formatter, Result as FmtResult}; use std::str::from_utf8; use std::{ collections::VecDeque, @@ -18,6 +19,7 @@ pub enum Compression { ZLib, } +#[derive(Clone, Debug)] pub enum Record { Header(Header), Document(Document), @@ -49,6 +51,27 @@ impl Record { } } +pub struct FallbackEncoding<'a>(&'a [u8]); + +impl<'a> Debug for FallbackEncoding<'a> { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + if let Ok(s) = from_utf8(self.0) { + let s = s.trim_end(); + write!(f, "\"{s}\"") + } else { + let s: String = self + .0 + .iter() + .map(|c| char::from(*c).escape_default()) + .flatten() + .collect(); + let s = s.trim_end(); + write!(f, "\"{s}\"") + } + } +} + +#[derive(Clone)] pub struct Header { /// Magic number. pub magic: Magic, @@ -90,6 +113,30 @@ pub struct Header { pub endian: Endian, } +impl Header { + fn debug_field(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult { + writeln!(f, "{name:>17}: {:?}", value) + } +} + +impl Debug for Header { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + writeln!(f, "File header record:")?; + self.debug_field(f, "Magic", self.magic)?; + self.debug_field(f, "Product name", FallbackEncoding(&self.eye_catcher))?; + self.debug_field(f, "Layout code", self.layout_code)?; + self.debug_field(f, "Nominal case size", self.nominal_case_size)?; + self.debug_field(f, "Compression", self.compression)?; + self.debug_field(f, "Weight index", self.weight_index)?; + self.debug_field(f, "Number of cases", self.n_cases)?; + self.debug_field(f, "Compression bias", self.bias)?; + self.debug_field(f, "Creation date", FallbackEncoding(&self.creation_date))?; + self.debug_field(f, "Creation time", FallbackEncoding(&self.creation_time))?; + self.debug_field(f, "File label", FallbackEncoding(&self.file_label))?; + self.debug_field(f, "Endianness", self.endian) + } +} + impl Header { fn read(r: &mut R) -> Result { let magic: [u8; 4] = read_bytes(r)?; @@ -116,7 +163,7 @@ impl Header { }; let weight_index: u32 = endian.parse(read_bytes(r)?); - let weight_index = (weight_index > 0).then_some(weight_index - 1); + let weight_index = (weight_index > 0).then(|| weight_index - 1); let n_cases: u32 = endian.parse(read_bytes(r)?); let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases); @@ -160,6 +207,18 @@ impl Magic { pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]); } +impl Debug for Magic { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + let s = match self { + &Magic::SAV => "$FL2", + &Magic::ZSAV => "$FL3", + &Magic::EBCDIC => "($FL2 in EBCDIC)", + _ => return write!(f, "{:?}", self.0), + }; + write!(f, "{s}") + } +} + impl TryFrom<[u8; 4]> for Magic { type Error = Error; @@ -336,7 +395,21 @@ pub enum Value { String([u8; 8]), } +impl Debug for Value { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + match self { + Value::Number(Some(number)) => write!(f, "{number:?}"), + Value::Number(None) => write!(f, "SYSMIS"), + Value::String(bytes) => write!(f, "{:?}", FallbackEncoding(bytes)), + } + } +} + impl Value { + fn read(r: &mut R, var_type: VarType, endian: Endian) -> Result { + Ok(Self::from_raw(var_type, read_bytes(r)?, endian)) + } + pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value { match var_type { VarType::String => Value::String(raw), @@ -517,6 +590,132 @@ impl Iterator for Reader { impl FusedIterator for Reader {} +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Format(pub u32); + +impl Debug for Format { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + let type_ = format_name(self.0 >> 16); + let w = (self.0 >> 8) & 0xff; + let d = self.0 & 0xff; + write!(f, "{:06x} ({type_}{w}.{d})", self.0) + } +} + +fn format_name(type_: u32) -> &'static str { + match type_ { + 1 => "A", + 2 => "AHEX", + 3 => "COMMA", + 4 => "DOLLAR", + 5 => "F", + 6 => "IB", + 7 => "PIBHEX", + 8 => "P", + 9 => "PIB", + 10 => "PK", + 11 => "RB", + 12 => "RBHEX", + 15 => "Z", + 16 => "N", + 17 => "E", + 20 => "DATE", + 21 => "TIME", + 22 => "DATETIME", + 23 => "ADATE", + 24 => "JDATE", + 25 => "DTIME", + 26 => "WKDAY", + 27 => "MONTH", + 28 => "MOYR", + 29 => "QYR", + 30 => "WKYR", + 31 => "PCT", + 32 => "DOT", + 33 => "CCA", + 34 => "CCB", + 35 => "CCC", + 36 => "CCD", + 37 => "CCE", + 38 => "EDATE", + 39 => "SDATE", + 40 => "MTIME", + 41 => "YMDHMS", + _ => "(unknown)", + } +} + +#[derive(Clone)] +pub struct MissingValues { + /// Individual missing values, up to 3 of them. + pub values: Vec, + + /// Optional range of missing values. + pub range: Option<(Value, Value)>, +} + +impl Debug for MissingValues { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + for (i, value) in self.values.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{value:?}")?; + } + + if let Some((low, high)) = self.range { + if !self.values.is_empty() { + write!(f, ", ")?; + } + write!(f, "{low:?} THRU {high:?}")?; + } + + if self.is_empty() { + write!(f, "none")?; + } + + Ok(()) + } +} + +impl MissingValues { + fn is_empty(&self) -> bool { + self.values.is_empty() && self.range.is_none() + } + + fn read( + r: &mut R, + offset: u64, + width: i32, + code: i32, + endian: Endian, + ) -> Result { + let (n_values, has_range) = match (width, code) { + (_, 0..=3) => (code, false), + (0, -2) => (0, true), + (0, -3) => (1, true), + (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }), + (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }), + }; + + let var_type = VarType::from_width(width); + + let mut values = Vec::new(); + for _ in 0..n_values { + values.push(Value::read(r, var_type, endian)?); + } + let range = if has_range { + let low = Value::read(r, var_type, endian)?; + let high = Value::read(r, var_type, endian)?; + Some((low, high)) + } else { + None + }; + Ok(MissingValues { values, range }) + } +} + +#[derive(Clone)] pub struct Variable { /// Offset from the start of the file to the start of the record. pub offset: u64, @@ -533,16 +732,41 @@ pub struct Variable { /// Write format. pub write_format: u32, - /// Missing value code, one of -3, -2, 0, 1, 2, or 3. - pub missing_value_code: i32, - - /// Raw missing values, up to 3 of them. - pub missing: Vec<[u8; 8]>, + /// Missing values. + pub missing_values: MissingValues, /// Optional variable label. pub label: Option>, } +impl Debug for Variable { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + writeln!( + f, + "Width: {} ({})", + self.width, + if self.width > 0 { + "string" + } else if self.width == 0 { + "numeric" + } else { + "long string continuation record" + } + )?; + writeln!(f, "Print format: {:?}", Format(self.print_format))?; + writeln!(f, "Write format: {:?}", Format(self.write_format))?; + writeln!(f, "Name: {:?}", FallbackEncoding(&self.name))?; + writeln!( + f, + "Variable label: {:?}", + self.label + .as_ref() + .map(|label| FallbackEncoding(&label[..])) + )?; + writeln!(f, "Missing values: {:?}", self.missing_values) + } +} + impl Variable { fn read(r: &mut R, endian: Endian) -> Result { let offset = r.stream_position()?; @@ -573,29 +797,7 @@ impl Variable { } }; - let mut missing = Vec::new(); - if missing_value_code != 0 { - match (width, missing_value_code) { - (0, -3 | -2 | 1 | 2 | 3) => (), - (0, _) => { - return Err(Error::BadNumericMissingValueCode { - offset, - code: missing_value_code, - }) - } - (_, 0..=3) => (), - (_, _) => { - return Err(Error::BadStringMissingValueCode { - offset, - code: missing_value_code, - }) - } - } - - for _ in 0..missing_value_code.abs() { - missing.push(read_bytes(r)?); - } - } + let missing_values = MissingValues::read(r, offset, width, missing_value_code, endian)?; Ok(Variable { offset, @@ -603,13 +805,13 @@ impl Variable { name, print_format, write_format, - missing_value_code, - missing, + missing_values, label, }) } } +#[derive(Clone, Debug)] pub struct ValueLabel { /// Offset from the start of the file to the start of the record. pub offset: u64, @@ -648,6 +850,7 @@ impl ValueLabel { } } +#[derive(Clone, Debug)] pub struct VarIndexes { /// Offset from the start of the file to the start of the record. pub offset: u64, @@ -682,6 +885,7 @@ impl VarIndexes { } } +#[derive(Clone, Debug)] pub struct Document { /// Offset from the start of the file to the start of the record. pub pos: u64, @@ -1348,6 +1552,7 @@ impl ExtensionRecord for NumberOfCasesRecord { } } +#[derive(Clone, Debug)] pub struct Extension { /// Offset from the start of the file to the start of the record. pub offset: u64, @@ -1443,6 +1648,7 @@ impl Extension { } } +#[derive(Clone, Debug)] pub struct ZHeader { /// File offset to the start of the record. pub offset: u64, @@ -1473,6 +1679,7 @@ impl ZHeader { } } +#[derive(Clone, Debug)] pub struct ZTrailer { /// File offset to the start of the record. pub offset: u64, @@ -1491,6 +1698,7 @@ pub struct ZTrailer { pub blocks: Vec, } +#[derive(Clone, Debug)] pub struct ZBlock { /// Offset of block of data if simple compression were used. pub uncompressed_ofs: u64,