From: Ben Pfaff Date: Sun, 23 Jul 2023 18:12:35 +0000 (-0700) Subject: feature complete X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9082562882c1d7a5b9403970bba077b70cb1d2b2;p=pspp feature complete --- diff --git a/rust/src/main.rs b/rust/src/main.rs index 7ee84de9ce..5da01dd024 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -32,6 +32,8 @@ use std::str; mod hexfloat; use hexfloat::HexFloat; +const ID_MAX_LEN: u32 = 64; + /// A utility to dissect SPSS system files. #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] @@ -588,6 +590,9 @@ impl Dissector { let size: u32 = self.read_swap()?; let count: u32 = self.read_swap()?; println!("{offset:08x}: Record 7, subtype {subtype}, size={size}, count={count}"); + if size.checked_mul(count).is_none() { + Err(anyhow!("{size} * {count} exceeds {}", u32::MAX))? + } match subtype { 3 => self.read_machine_integer_info(size, count), 4 => self.read_machine_float_info(size, count), @@ -599,7 +604,14 @@ impl Dissector { 7 | 19 => self.read_mrsets(size, count), 10 => self.read_extra_product_info(size, count), 11 => self.read_display_parameters(size, count), - 13 => self.read_long_string_map(size, count), + 13 => self.read_long_var_name_map(size, count), + 14 => self.read_long_string_map(size, count), + 16 => self.read_ncases64(size, count), + 17 => self.read_datafile_attributes(size, count), + 18 => self.read_variable_attributes(size, count), + 20 => self.read_character_encoding(size, count), + 21 => self.read_long_string_value_labels(size, count), + 22 => self.read_long_string_missing_values(size, count), _ => self.read_unknown_extension(subtype, size, count), } } @@ -1053,6 +1065,22 @@ impl Dissector { Ok(()) } + fn read_long_var_name_map(&mut self, size: u32, count: u32) -> Result<()> { + print!( + "{:08x}: long variable names (short => long)", + self.r.stream_position()? + ); + let mut text = self.open_text_record(size, count)?; + while let Some((var, long_name)) = text.read_variable_to_value_pair() { + println!( + "\t{} => {}", + String::from_utf8_lossy(&var), + String::from_utf8_lossy(&long_name) + ); + } + Ok(()) + } + fn read_long_string_map(&mut self, size: u32, count: u32) -> Result<()> { print!( "{:08x}: very long strings (variable => length)", @@ -1069,6 +1097,139 @@ impl Dissector { Ok(()) } + fn read_ncases64(&mut self, size: u32, count: u32) -> Result<()> { + if size != 8 { + Err(anyhow!("Bad size {size} for extended number of cases."))? + } + if count != 2 { + Err(anyhow!("Bad count {count} for extended number of cases."))? + } + let unknown: u64 = self.read_swap()?; + let ncases64: u64 = self.read_swap()?; + print!( + "{:08x}: extended number of cases: unknown={unknown}, ncases64={ncases64}", + self.r.stream_position()? + ); + Ok(()) + } + + fn read_attributes(&mut self, text: &mut TextRecord, variable: &str) -> Result<()> { + loop { + let Some(key) = text.tokenize_string(b'(') else { + break; + }; + for index in 1.. { + let Some(value) = text.tokenize_string(b'\n') else { + Err(anyhow!( + "{variable}: Error parsing attribute value {key}[{index}]" + ))? + }; + if value.starts_with('\'') && value.ends_with('\'') && value.len() >= 2 { + let middle = &value[1..value.len() - 2]; + println!("\t{variable}: {key}[{index}] = \"{middle}\""); + } else { + self.warn(format!( + "{variable}: Attribute value {key}[{index}] is not quoted: {value}" + ))?; + } + if text.match_byte(b')') { + break; + } + } + + if text.match_byte(b'/') { + break; + } + } + Ok(()) + } + + fn read_datafile_attributes(&mut self, size: u32, count: u32) -> Result<()> { + print!("{:08x}: datafile attributes", self.r.stream_position()?); + let mut text = self.open_text_record(size, count)?; + self.read_attributes(&mut text, "datafile")?; + Ok(()) + } + + fn read_variable_attributes(&mut self, size: u32, count: u32) -> Result<()> { + print!("{:08x}: variable attributes", self.r.stream_position()?); + let mut text = self.open_text_record(size, count)?; + loop { + let Some(variable) = text.tokenize_string(b':') else { + break; + }; + self.read_attributes(&mut text, &variable)?; + } + Ok(()) + } + + fn read_character_encoding(&mut self, size: u32, count: u32) -> Result<()> { + let offset = self.r.stream_position()?; + let encoding = read_vec(&mut self.r, (size * count) as usize)?; + println!("{offset:08x}: Character Encoding: {}", String::from_utf8_lossy(&encoding)); + Ok(()) + } + + fn read_long_string_value_labels(&mut self, size: u32, count: u32) -> Result<()> { + let start = self.r.stream_position()?; + + println!("{start:08x}: long string value labels"); + while self.r.stream_position()? - start < (size * count) as u64 { + let position = self.r.stream_position()?; + + let var_name_len: u32 = self.read_swap()?; + if var_name_len > ID_MAX_LEN { + Err(anyhow!("Variable name length in long string value label record ({var_name_len} exceeds {ID_MAX_LEN}-byte limit."))? + } + let var_name = read_vec(&mut self.r, var_name_len as usize)?; + + let width: u32 = self.read_swap()?; + let n_values: u32 = self.read_swap()?; + + println!("\t{position:08x}: {}, width {width}, {n_values} values", + String::from_utf8_lossy(&var_name)); + + for _ in 0..n_values { + let position = self.r.stream_position()?; + let value_length: u32 = self.read_swap()?; + let value = read_vec(&mut self.r, value_length as usize)?; + let label_length: u32 = self.read_swap()?; + let label = read_vec(&mut self.r, value_length as usize)?; + println!("\t\t{position:08x}: \"{}\" ({value_length} bytes) => \"{}\" ({label_length} bytes)", + String::from_utf8_lossy(&value), + String::from_utf8_lossy(&label)); + } + } + Ok(()) + } + + fn read_long_string_missing_values(&mut self, size: u32, count: u32) -> Result<()> { + let start = self.r.stream_position()?; + + println!("{start:08x}: long string missing values"); + while self.r.stream_position()? - start < (size * count) as u64 { + let position = self.r.stream_position()?; + + let var_name_len: u32 = self.read_swap()?; + if var_name_len > ID_MAX_LEN { + Err(anyhow!("Variable name length in long string missing value record ({var_name_len} exceeds {ID_MAX_LEN}-byte limit."))? + } + let var_name = read_vec(&mut self.r, var_name_len as usize)?; + + let n_missing_values: u8 = self.read_swap()?; + let value_length: u32 = self.read_swap()?; + + println!("\t{position:08x}: {}, {n_missing_values}, each {value_length} bytes:", + String::from_utf8_lossy(&var_name)); + + for _ in 0..n_missing_values { + let value = read_vec(&mut self.r, value_length as usize)?; + println!(" \"{}\"", String::from_utf8_lossy(&value)); + } + } + Ok(()) + } + fn read_text_record(&mut self, size: u32, count: u32) -> Result> { let Some(n_bytes) = u32::checked_mul(size, count) else { Err(anyhow!("Extension record too large."))? @@ -1121,6 +1282,11 @@ impl TextRecord { } } + fn tokenize_string(&mut self, delimiter: u8) -> Option { + self.tokenize(delimiter) + .map(|s| String::from_utf8_lossy(s).into_owned()) + } + fn match_byte(&mut self, c: u8) -> bool { if self.pos < self.buffer.len() && self.buffer[self.pos] == c { self.pos += 1; diff --git a/utilities/pspp-dump-sav.c b/utilities/pspp-dump-sav.c index 3d85b9b089..ba5a0c9045 100644 --- a/utilities/pspp-dump-sav.c +++ b/utilities/pspp-dump-sav.c @@ -1135,7 +1135,7 @@ read_long_string_missing_values (struct sfm_reader *r, /* Read variable name. */ int var_name_len = read_int (r); if (var_name_len > ID_MAX_LEN) - sys_error (r, "Variable name length in long string value label " + sys_error (r, "Variable name length in long string missing value " "record (%d) exceeds %d-byte limit.", var_name_len, ID_MAX_LEN); char var_name[ID_MAX_LEN + 1]; @@ -1154,8 +1154,6 @@ read_long_string_missing_values (struct sfm_reader *r, /* Read values. */ for (int i = 0; i < n_missing_values; i++) { - posn = ftello (r->file); - /* Read value. */ char *value = xmalloc (value_length + 1); read_string (r, value, value_length + 1);