mod hexfloat;
use hexfloat::HexFloat;
+const ID_MAX_LEN: u32 = 64;
+
/// A utility to dissect SPSS system files.
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
let size: u32 = self.read_swap()?;
let count: u32 = self.read_swap()?;
println!("{offset:08x}: Record 7, subtype {subtype}, size={size}, count={count}");
+ if size.checked_mul(count).is_none() {
+ Err(anyhow!("{size} * {count} exceeds {}", u32::MAX))?
+ }
match subtype {
3 => self.read_machine_integer_info(size, count),
4 => self.read_machine_float_info(size, count),
7 | 19 => self.read_mrsets(size, count),
10 => self.read_extra_product_info(size, count),
11 => self.read_display_parameters(size, count),
- 13 => self.read_long_string_map(size, count),
+ 13 => self.read_long_var_name_map(size, count),
+ 14 => self.read_long_string_map(size, count),
+ 16 => self.read_ncases64(size, count),
+ 17 => self.read_datafile_attributes(size, count),
+ 18 => self.read_variable_attributes(size, count),
+ 20 => self.read_character_encoding(size, count),
+ 21 => self.read_long_string_value_labels(size, count),
+ 22 => self.read_long_string_missing_values(size, count),
_ => self.read_unknown_extension(subtype, size, count),
}
}
Ok(())
}
+ fn read_long_var_name_map(&mut self, size: u32, count: u32) -> Result<()> {
+ print!(
+ "{:08x}: long variable names (short => long)",
+ self.r.stream_position()?
+ );
+ let mut text = self.open_text_record(size, count)?;
+ while let Some((var, long_name)) = text.read_variable_to_value_pair() {
+ println!(
+ "\t{} => {}",
+ String::from_utf8_lossy(&var),
+ String::from_utf8_lossy(&long_name)
+ );
+ }
+ Ok(())
+ }
+
fn read_long_string_map(&mut self, size: u32, count: u32) -> Result<()> {
print!(
"{:08x}: very long strings (variable => length)",
Ok(())
}
+ fn read_ncases64(&mut self, size: u32, count: u32) -> Result<()> {
+ if size != 8 {
+ Err(anyhow!("Bad size {size} for extended number of cases."))?
+ }
+ if count != 2 {
+ Err(anyhow!("Bad count {count} for extended number of cases."))?
+ }
+ let unknown: u64 = self.read_swap()?;
+ let ncases64: u64 = self.read_swap()?;
+ print!(
+ "{:08x}: extended number of cases: unknown={unknown}, ncases64={ncases64}",
+ self.r.stream_position()?
+ );
+ Ok(())
+ }
+
+ fn read_attributes(&mut self, text: &mut TextRecord, variable: &str) -> Result<()> {
+ loop {
+ let Some(key) = text.tokenize_string(b'(') else {
+ break;
+ };
+ for index in 1.. {
+ let Some(value) = text.tokenize_string(b'\n') else {
+ Err(anyhow!(
+ "{variable}: Error parsing attribute value {key}[{index}]"
+ ))?
+ };
+ if value.starts_with('\'') && value.ends_with('\'') && value.len() >= 2 {
+ let middle = &value[1..value.len() - 2];
+ println!("\t{variable}: {key}[{index}] = \"{middle}\"");
+ } else {
+ self.warn(format!(
+ "{variable}: Attribute value {key}[{index}] is not quoted: {value}"
+ ))?;
+ }
+ if text.match_byte(b')') {
+ break;
+ }
+ }
+
+ if text.match_byte(b'/') {
+ break;
+ }
+ }
+ Ok(())
+ }
+
+ fn read_datafile_attributes(&mut self, size: u32, count: u32) -> Result<()> {
+ print!("{:08x}: datafile attributes", self.r.stream_position()?);
+ let mut text = self.open_text_record(size, count)?;
+ self.read_attributes(&mut text, "datafile")?;
+ Ok(())
+ }
+
+ fn read_variable_attributes(&mut self, size: u32, count: u32) -> Result<()> {
+ print!("{:08x}: variable attributes", self.r.stream_position()?);
+ let mut text = self.open_text_record(size, count)?;
+ loop {
+ let Some(variable) = text.tokenize_string(b':') else {
+ break;
+ };
+ self.read_attributes(&mut text, &variable)?;
+ }
+ Ok(())
+ }
+
+ fn read_character_encoding(&mut self, size: u32, count: u32) -> Result<()> {
+ let offset = self.r.stream_position()?;
+ let encoding = read_vec(&mut self.r, (size * count) as usize)?;
+ println!("{offset:08x}: Character Encoding: {}", String::from_utf8_lossy(&encoding));
+ Ok(())
+ }
+
+ fn read_long_string_value_labels(&mut self, size: u32, count: u32) -> Result<()> {
+ let start = self.r.stream_position()?;
+
+ println!("{start:08x}: long string value labels");
+ while self.r.stream_position()? - start < (size * count) as u64 {
+ let position = self.r.stream_position()?;
+
+ let var_name_len: u32 = self.read_swap()?;
+ if var_name_len > ID_MAX_LEN {
+ Err(anyhow!("Variable name length in long string value label record ({var_name_len} exceeds {ID_MAX_LEN}-byte limit."))?
+ }
+ let var_name = read_vec(&mut self.r, var_name_len as usize)?;
+
+ let width: u32 = self.read_swap()?;
+ let n_values: u32 = self.read_swap()?;
+
+ println!("\t{position:08x}: {}, width {width}, {n_values} values",
+ String::from_utf8_lossy(&var_name));
+
+ for _ in 0..n_values {
+ let position = self.r.stream_position()?;
+ let value_length: u32 = self.read_swap()?;
+ let value = read_vec(&mut self.r, value_length as usize)?;
+ let label_length: u32 = self.read_swap()?;
+ let label = read_vec(&mut self.r, value_length as usize)?;
+ println!("\t\t{position:08x}: \"{}\" ({value_length} bytes) => \"{}\" ({label_length} bytes)",
+ String::from_utf8_lossy(&value),
+ String::from_utf8_lossy(&label));
+ }
+ }
+ Ok(())
+ }
+
+ fn read_long_string_missing_values(&mut self, size: u32, count: u32) -> Result<()> {
+ let start = self.r.stream_position()?;
+
+ println!("{start:08x}: long string missing values");
+ while self.r.stream_position()? - start < (size * count) as u64 {
+ let position = self.r.stream_position()?;
+
+ let var_name_len: u32 = self.read_swap()?;
+ if var_name_len > ID_MAX_LEN {
+ Err(anyhow!("Variable name length in long string missing value record ({var_name_len} exceeds {ID_MAX_LEN}-byte limit."))?
+ }
+ let var_name = read_vec(&mut self.r, var_name_len as usize)?;
+
+ let n_missing_values: u8 = self.read_swap()?;
+ let value_length: u32 = self.read_swap()?;
+
+ println!("\t{position:08x}: {}, {n_missing_values}, each {value_length} bytes:",
+ String::from_utf8_lossy(&var_name));
+
+ for _ in 0..n_missing_values {
+ let value = read_vec(&mut self.r, value_length as usize)?;
+ println!(" \"{}\"", String::from_utf8_lossy(&value));
+ }
+ }
+ Ok(())
+ }
+
fn read_text_record(&mut self, size: u32, count: u32) -> Result<Vec<u8>> {
let Some(n_bytes) = u32::checked_mul(size, count) else {
Err(anyhow!("Extension record too large."))?
}
}
+ fn tokenize_string(&mut self, delimiter: u8) -> Option<String> {
+ self.tokenize(delimiter)
+ .map(|s| String::from_utf8_lossy(s).into_owned())
+ }
+
fn match_byte(&mut self, c: u8) -> bool {
if self.pos < self.buffer.len() && self.buffer[self.pos] == c {
self.pos += 1;