-#![allow(unused_variables)]
-#![allow(dead_code)]
/* PSPP - a program for statistical analysis.
* Copyright (C) 2023 Free Software Foundation, Inc.
*
use clap::Parser;
use hexplay::HexView;
use hexplay::HexViewBuilder;
-use num::{Float, Num};
+use num::Num;
use std::cmp::Ordering;
+use std::collections::VecDeque;
+use std::fmt;
use std::fs::File;
use std::io::prelude::*;
use std::io::BufReader;
+use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use std::str;
-use std::{fmt, num::FpCategory};
+
+mod hexfloat;
+use hexfloat::HexFloat;
/// A utility to dissect SPSS system files.
#[derive(Parser, Debug)]
let Args { max_cases, files } = Args::parse();
for file in files {
- Dissector::new(file)?;
+ Dissector::new(file, max_cases)?;
}
Ok(())
}
fn read_swap(&mut self) -> Result<T>;
}
+impl ReadSwap<u8> for Dissector {
+ fn read_swap(&mut self) -> Result<u8> {
+ Ok(self.endianness.parse(read_bytes(&mut self.r)?))
+ }
+}
impl ReadSwap<u32> for Dissector {
fn read_swap(&mut self) -> Result<u32> {
Ok(self.endianness.parse(read_bytes(&mut self.r)?))
}
}
-impl ReadSwap<u8> for Dissector {
- fn read_swap(&mut self) -> Result<u8> {
+impl ReadSwap<u64> for Dissector {
+ fn read_swap(&mut self) -> Result<u64> {
Ok(self.endianness.parse(read_bytes(&mut self.r)?))
}
}
struct Dissector {
filename: String,
r: BufReader<File>,
- compression: Option<Compression>,
endianness: Endianness,
fp_format: Endianness,
bias: f64,
s
}
-fn slice_trim_end(mut s: &[u8], c: u8) -> &[u8] {
- while s.last() == Some(&c) {
- s = s.split_last().unwrap().1;
- }
- s
-}
-
fn format_name(type_: u32) -> &'static str {
match type_ {
1 => "A",
}
}
-struct HexFloat<T: Float>(T);
-
-impl<T: Float> fmt::Display for HexFloat<T> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- let sign = if self.0.is_sign_negative() { "-" } else { "" };
- match self.0.classify() {
- FpCategory::Nan => return write!(f, "NaN"),
- FpCategory::Infinite => return write!(f, "{sign}Infinity"),
- FpCategory::Zero => return write!(f, "{sign}0.0"),
- _ => (),
- };
- let (significand, mut exponent, _) = self.0.integer_decode();
- let mut hex_sig = format!("{:x}", significand);
- while hex_sig.ends_with('0') {
- hex_sig.pop();
- exponent += 4;
- }
- match hex_sig.len() {
- 0 => write!(f, "{sign}0.0"),
- 1 => write!(f, "{sign}0x{hex_sig}.0p{exponent}"),
- len => write!(
- f,
- "{sign}0x{}.{}p{}",
- hex_sig.chars().next().unwrap(),
- &hex_sig[1..],
- exponent + 4 * (len as i16 - 1)
- ),
- }
- }
-}
-
-#[cfg(test)]
-mod hex_float_tests {
- use crate::HexFloat;
- use num::Float;
-
- #[test]
- fn test() {
- assert_eq!(format!("{}", HexFloat(1.0)), "0x1.0p0");
- assert_eq!(format!("{}", HexFloat(123.0)), "0x1.ecp6");
- assert_eq!(format!("{}", HexFloat(1.0 / 16.0)), "0x1.0p-4");
- assert_eq!(format!("{}", HexFloat(f64::infinity())), "Infinity");
- assert_eq!(format!("{}", HexFloat(f64::neg_infinity())), "-Infinity");
- assert_eq!(format!("{}", HexFloat(f64::nan())), "NaN");
- assert_eq!(format!("{}", HexFloat(0.0)), "0.0");
- assert_eq!(format!("{}", HexFloat(f64::neg_zero())), "-0.0");
- }
-}
-
impl Dissector {
- fn new<P: AsRef<Path>>(filename: P) -> Result<Dissector> {
+ fn new<P: AsRef<Path>>(filename: P, max_cases: usize) -> Result<Dissector> {
let mut r = BufReader::new(File::open(&filename)?);
let filename = filename.as_ref().to_string_lossy().into_owned();
let rec_type: [u8; 4] = read_bytes(&mut r)?;
let mut d = Dissector {
filename,
r,
- compression,
endianness,
fp_format,
bias,
pos + 4
);
+ match compression {
+ Some(Compression::Simple) => {
+ if max_cases > 0 {
+ d.read_simple_compressed_data(max_cases)?;
+ }
+ }
+ Some(Compression::ZLib) => d.read_zlib_compressed_data()?,
+ None => (),
+ }
+
Ok(d)
}
+ fn read_simple_compressed_data(&mut self, max_cases: usize) -> Result<()> {
+ let _: i32 = self.read_swap()?;
+ println!("\n{:08x}: compressed data:", self.r.stream_position()?);
+
+ const N_OPCODES: usize = 8;
+ let mut opcodes = VecDeque::<u8>::with_capacity(8);
+ let mut opcode_ofs = 0;
+ for case_num in 0..max_cases {
+ println!(
+ "{:08x}: case {case_num}'s uncompressible data begins",
+ self.r.stream_position()?
+ );
+ let mut i = 0;
+ while i < self.var_widths.len() {
+ let width = self.var_widths[i];
+
+ let opcode_idx = N_OPCODES - opcodes.len();
+ let Some(opcode) = opcodes.pop_back() else {
+ opcode_ofs = self.r.stream_position()?;
+ let mut new_opcodes = [0; N_OPCODES];
+ if let Err(error) = self.r.read_exact(&mut new_opcodes) {
+ if i == 0 && error.kind() == ErrorKind::UnexpectedEof {
+ return Ok(());
+ } else {
+ return Err(error.into());
+ }
+ };
+ opcodes.extend(new_opcodes.into_iter());
+ continue;
+ };
+
+ print!(
+ "{:08x}: variable {i}: opcode {opcode}: ",
+ opcode_ofs + opcode_idx as u64
+ );
+ match opcode {
+ 0 => println!("ignored padding"),
+ 252 => {
+ println!("end of data");
+ break;
+ }
+ 253 => {
+ let raw: [u8; 8] = read_bytes(&mut self.r)?;
+ let value = UntypedValue::new(raw, self.fp_format);
+ println!("uncompressible data: {value}");
+ i += 1;
+ }
+ 254 => {
+ print!("spaces");
+ if width == 0 {
+ print!(", but this is a numeric variable");
+ }
+ println!();
+ i += 1;
+ }
+ 255 => {
+ print!("SYSMIS");
+ if width != 0 {
+ print!(", but this is a string variable (width={width})");
+ }
+ println!();
+ i += 1;
+ }
+ _ => {
+ print!("{}", opcode as f64 - self.bias);
+ if width != 0 {
+ print!(", but this is a string variable (width={width})");
+ }
+ println!();
+ i += 1;
+ }
+ }
+ }
+ }
+ Ok(())
+ }
+
+ fn read_zlib_compressed_data(&mut self) -> Result<()> {
+ let _: i32 = self.read_swap()?;
+ let ofs = self.r.stream_position()?;
+ println!("\n{ofs:08x}: ZLIB compressed data header:");
+
+ let this_ofs: u64 = self.read_swap()?;
+ let next_ofs: u64 = self.read_swap()?;
+ let next_len: u64 = self.read_swap()?;
+
+ println!("\theader_ofs: {this_ofs:#x}");
+ if this_ofs != ofs {
+ println!("\t\t(Expected {ofs:#x}.)");
+ }
+ println!("\ttrailer_ofs: {next_ofs:#x}");
+ println!("\ttrailer_len: {next_len}");
+ if next_len < 24 || next_len % 24 != 0 {
+ println!("\t\t(Trailer length is not positive multiple of 24.)");
+ }
+
+ let zlib_data_len = next_ofs - (ofs + 8 * 3);
+ println!(
+ "\n{:08x}: {zlib_data_len:#x} bytes of ZLIB compressed data",
+ ofs + 8 * 3
+ );
+
+ self.skip_bytes(zlib_data_len)?;
+
+ println!("\n{next_ofs:08x}: ZLIB trailer fixed header");
+ let bias: u64 = self.read_swap()?;
+ let zero: u64 = self.read_swap()?;
+ let block_size: u32 = self.read_swap()?;
+ let n_blocks: u32 = self.read_swap()?;
+ println!("\tbias: {bias}");
+ println!("\tzero: {zero:#x}");
+ if zero != 0 {
+ println!("\t\t(Expected 0.)");
+ }
+ println!("\tblock size: {block_size:#x}");
+ if block_size != 0x3ff000 {
+ println!("\t\t(Expected 0x3ff000.)");
+ }
+ println!("\tn_blocks: {n_blocks}");
+ if n_blocks as u64 != next_len / 24 - 1 {
+ println!("\t\t(Expected {}.)", next_len / 24 - 1);
+ }
+
+ let mut expected_uncmp_ofs = ofs;
+ let mut expected_cmp_ofs = ofs + 24;
+ for i in 1..=n_blocks {
+ let blockinfo_ofs = self.r.stream_position()?;
+ let uncompressed_ofs: u64 = self.read_swap()?;
+ let compressed_ofs: u64 = self.read_swap()?;
+ let uncompressed_size: u32 = self.read_swap()?;
+ let compressed_size: u32 = self.read_swap()?;
+
+ println!("\n{blockinfo_ofs:08x}: ZLIB block descriptor {i}");
+
+ println!("\tuncompressed_ofs: {uncompressed_ofs:#x}");
+ if uncompressed_ofs != expected_uncmp_ofs {
+ println!("\t\t(Expected {ofs:#x}.)");
+ }
+
+ println!("\tcompressed_ofs: {compressed_ofs:#x}");
+ if compressed_ofs != expected_cmp_ofs {
+ println!("\t\t(Expected {expected_cmp_ofs:#x}.)");
+ }
+
+ println!("\tuncompressed_size: {uncompressed_size:#x}");
+ if i < n_blocks && uncompressed_size != block_size {
+ println!("\t\t(Expected {block_size:#x}.)");
+ }
+
+ println!("\tcompressed_size: {compressed_size:#x}");
+ if i == n_blocks && compressed_ofs.checked_add(compressed_size as u64) != Some(next_ofs)
+ {
+ println!(
+ "\t\t(This was expected to be {:#x}.)",
+ next_ofs - compressed_size as u64
+ );
+ }
+
+ expected_uncmp_ofs += uncompressed_size as u64;
+ expected_cmp_ofs += uncompressed_size as u64;
+ }
+ Ok(())
+ }
+
fn read_extension_record(&mut self) -> Result<()> {
let offset = self.r.stream_position()?;
let subtype: u32 = self.read_swap()?;
7 | 19 => self.read_mrsets(size, count),
10 => self.read_extra_product_info(size, count),
11 => self.read_display_parameters(size, count),
+ 13 => self.read_long_string_map(size, count),
_ => self.read_unknown_extension(subtype, size, count),
}
}
let Some(name) = text.tokenize(b'=') else {
break;
};
+ let name = Vec::from(name);
let (mrset, cat_label_from_counted_values, label_from_var_label) = if text
.match_byte(b'C')
};
let counted_value = if mrset == MrSet::MD {
- Some(text.parse_counted_string()?)
- } else { None };
+ Some(Vec::from(text.parse_counted_string()?))
+ } else {
+ None
+ };
- let label = text.parse_counted_string()?;
+ let label = Vec::from(text.parse_counted_string()?);
let variables = text.tokenize(b'\n');
- print!("\t\"{}\": multiple {} set",
- String::from_utf8_lossy(name),
- if mrset == MrSet::MC { "category" } else { "dichotomy" });
-
+ print!(
+ "\t\"{}\": multiple {} set",
+ String::from_utf8_lossy(&name),
+ if mrset == MrSet::MC {
+ "category"
+ } else {
+ "dichotomy"
+ }
+ );
+ if let Some(counted_value) = counted_value {
+ print!(
+ ", counted value \"{}\"",
+ String::from_utf8_lossy(&counted_value)
+ );
+ }
+ if cat_label_from_counted_values {
+ println!(", category labels from counted values");
+ }
+ if label != b"" {
+ print!(", label \"{}\"", String::from_utf8_lossy(&label));
+ }
+ if label_from_var_label {
+ print!(", label from variable label");
+ }
+ if let Some(variables) = variables {
+ print!(", variables \"{}\"", String::from_utf8_lossy(variables));
+ } else {
+ print!("no variables");
+ }
+ println!();
}
Ok(())
}
Ok(())
}
- fn open_text_record(&mut self, size: u32, count: u32) -> Result<TextRecord> {
- let n_bytes = match u32::checked_mul(size, count) {
- Some(n) => n,
- None => Err(anyhow!("Extension record too large."))?,
+ fn read_long_string_map(&mut self, size: u32, count: u32) -> Result<()> {
+ print!(
+ "{:08x}: very long strings (variable => length)",
+ self.r.stream_position()?
+ );
+ let mut text = self.open_text_record(size, count)?;
+ while let Some((var, length)) = text.read_variable_to_value_pair() {
+ println!(
+ "\t{} => {}",
+ String::from_utf8_lossy(&var),
+ String::from_utf8_lossy(&length)
+ );
+ }
+ Ok(())
+ }
+
+ fn read_text_record(&mut self, size: u32, count: u32) -> Result<Vec<u8>> {
+ let Some(n_bytes) = u32::checked_mul(size, count) else {
+ Err(anyhow!("Extension record too large."))?
};
- Ok(TextRecord::new(read_vec(&mut self.r, n_bytes as usize)?))
+ read_vec(&mut self.r, n_bytes as usize)
+ }
+
+ fn open_text_record(&mut self, size: u32, count: u32) -> Result<TextRecord> {
+ Ok(TextRecord::new(self.read_text_record(size, count)?))
}
}
}
let Some((start, end)) = self.get_n_bytes(length) else {
- Err(anyhow!("{length}-byte string starting at offset {} exceeds record length {}",
- self.pos, self.buffer.len()))?
+ Err(anyhow!(
+ "{length}-byte string starting at offset {} exceeds record length {}",
+ self.pos,
+ self.buffer.len()
+ ))?
};
if !self.match_byte(b' ') {
Err(anyhow!(
}
Ok(&self.buffer[start..end])
}
+
+ fn read_variable_to_value_pair(&mut self) -> Option<(Vec<u8>, Vec<u8>)> {
+ let key = self.tokenize(b'=')?.into();
+ let value = self.tokenize(b'\t')?.into();
+
+ while self.match_byte(b'\t') || self.match_byte(b'\0') {}
+ Some((key, value))
+ }
}