* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>. */
-use anyhow::{anyhow, Result};
+use anyhow::{Result};
use clap::Parser;
-use hexplay::HexView;
-use hexplay::HexViewBuilder;
-use num::Num;
-use std::cmp::Ordering;
-use std::collections::VecDeque;
-use std::fmt;
+use pspp::raw::Reader;
use std::fs::File;
-use std::io::prelude::*;
use std::io::BufReader;
-use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use std::str;
-mod hexfloat;
-use hexfloat::HexFloat;
-
-const ID_MAX_LEN: u32 = 64;
-
/// A utility to dissect SPSS system files.
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
}
fn main() -> Result<()> {
- let Args { max_cases, files } = Args::parse();
+ let Args { files, .. } = Args::parse();
for file in files {
- Dissector::new(file, max_cases)?;
+ dissect(&file)?;
}
Ok(())
}
-#[derive(Copy, Clone, Debug)]
-enum Compression {
- Simple,
- ZLib,
-}
-
-#[derive(Copy, Clone, Debug)]
-enum Endianness {
- BigEndian,
- LittleEndian,
-}
-use Endianness::*;
-
-trait Parse<T, const N: usize> {
- fn parse(self, bytes: [u8; N]) -> T;
-}
-impl Parse<u64, 8> for Endianness {
- fn parse(self, bytes: [u8; 8]) -> u64 {
- match self {
- BigEndian => u64::from_be_bytes(bytes),
- LittleEndian => u64::from_le_bytes(bytes),
- }
- }
-}
-impl Parse<u32, 4> for Endianness {
- fn parse(self, bytes: [u8; 4]) -> u32 {
- match self {
- BigEndian => u32::from_be_bytes(bytes),
- LittleEndian => u32::from_le_bytes(bytes),
- }
- }
-}
-impl Parse<u16, 2> for Endianness {
- fn parse(self, bytes: [u8; 2]) -> u16 {
- match self {
- BigEndian => u16::from_be_bytes(bytes),
- LittleEndian => u16::from_le_bytes(bytes),
- }
- }
-}
-impl Parse<u8, 1> for Endianness {
- fn parse(self, bytes: [u8; 1]) -> u8 {
- match self {
- BigEndian => u8::from_be_bytes(bytes),
- LittleEndian => u8::from_le_bytes(bytes),
- }
- }
-}
-impl Parse<i64, 8> for Endianness {
- fn parse(self, bytes: [u8; 8]) -> i64 {
- match self {
- BigEndian => i64::from_be_bytes(bytes),
- LittleEndian => i64::from_le_bytes(bytes),
- }
- }
-}
-impl Parse<i32, 4> for Endianness {
- fn parse(self, bytes: [u8; 4]) -> i32 {
- match self {
- BigEndian => i32::from_be_bytes(bytes),
- LittleEndian => i32::from_le_bytes(bytes),
- }
- }
-}
-impl Parse<i16, 2> for Endianness {
- fn parse(self, bytes: [u8; 2]) -> i16 {
- match self {
- BigEndian => i16::from_be_bytes(bytes),
- LittleEndian => i16::from_le_bytes(bytes),
- }
- }
-}
-impl Parse<i8, 1> for Endianness {
- fn parse(self, bytes: [u8; 1]) -> i8 {
- match self {
- BigEndian => i8::from_be_bytes(bytes),
- LittleEndian => i8::from_le_bytes(bytes),
- }
- }
-}
-impl Parse<f64, 8> for Endianness {
- fn parse(self, bytes: [u8; 8]) -> f64 {
- match self {
- BigEndian => f64::from_be_bytes(bytes),
- LittleEndian => f64::from_le_bytes(bytes),
- }
- }
-}
-
-fn read_bytes<const N: usize>(r: &mut BufReader<File>) -> Result<[u8; N]> {
- let mut buf = [0; N];
- r.read_exact(&mut buf)?;
- Ok(buf)
-}
-
-fn read_vec(r: &mut BufReader<File>, n: usize) -> Result<Vec<u8>> {
- let mut vec = vec![0; n];
- r.read_exact(&mut vec)?;
- Ok(vec)
-}
-
-trait ReadSwap<T> {
- fn read_swap(&mut self) -> Result<T>;
-}
-
-impl ReadSwap<u8> for Dissector {
- fn read_swap(&mut self) -> Result<u8> {
- Ok(self.endianness.parse(read_bytes(&mut self.r)?))
- }
-}
-impl ReadSwap<u32> for Dissector {
- fn read_swap(&mut self) -> Result<u32> {
- Ok(self.endianness.parse(read_bytes(&mut self.r)?))
- }
-}
-impl ReadSwap<u64> for Dissector {
- fn read_swap(&mut self) -> Result<u64> {
- Ok(self.endianness.parse(read_bytes(&mut self.r)?))
- }
-}
-
-impl ReadSwap<i32> for Dissector {
- fn read_swap(&mut self) -> Result<i32> {
- Ok(self.endianness.parse(read_bytes(&mut self.r)?))
- }
-}
-
-impl ReadSwap<f64> for Dissector {
- fn read_swap(&mut self) -> Result<f64> {
- Ok(self.endianness.parse(read_bytes(&mut self.r)?))
- }
-}
-
-struct Dissector {
- filename: String,
- r: BufReader<File>,
- endianness: Endianness,
- fp_format: Endianness,
- bias: f64,
- n_variable_records: usize,
- n_variables: usize,
- var_widths: Vec<i32>,
-}
-
-fn detect_endianness(layout_code: [u8; 4]) -> Option<Endianness> {
- for endianness in [BigEndian, LittleEndian] {
- match endianness.parse(layout_code) {
- 2 | 3 => return Some(endianness),
- _ => (),
- }
- }
- None
-}
-
-fn detect_fp_format(bias: [u8; 8]) -> Option<Endianness> {
- for endianness in [BigEndian, LittleEndian] {
- let value: f64 = endianness.parse(bias);
- if value == 100.0 {
- return Some(endianness);
- }
- }
- None
-}
-
-fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
- while s.last() == Some(&c) {
- s.pop();
- }
- s
-}
-
-fn format_name(type_: u32) -> &'static str {
- match type_ {
- 1 => "A",
- 2 => "AHEX",
- 3 => "COMMA",
- 4 => "DOLLAR",
- 5 => "F",
- 6 => "IB",
- 7 => "PIBHEX",
- 8 => "P",
- 9 => "PIB",
- 10 => "PK",
- 11 => "RB",
- 12 => "RBHEX",
- 15 => "Z",
- 16 => "N",
- 17 => "E",
- 20 => "DATE",
- 21 => "TIME",
- 22 => "DATETIME",
- 23 => "ADATE",
- 24 => "JDATE",
- 25 => "DTIME",
- 26 => "WKDAY",
- 27 => "MONTH",
- 28 => "MOYR",
- 29 => "QYR",
- 30 => "WKYR",
- 31 => "PCT",
- 32 => "DOT",
- 33 => "CCA",
- 34 => "CCB",
- 35 => "CCC",
- 36 => "CCD",
- 37 => "CCE",
- 38 => "EDATE",
- 39 => "SDATE",
- 40 => "MTIME",
- 41 => "YMDHMS",
- _ => "invalid",
- }
-}
-
-fn round_up<T: Num + Copy>(x: T, y: T) -> T {
- (x + (y - T::one())) / y * y
-}
-
-struct UntypedValue {
- raw: [u8; 8],
- endianness: Endianness,
-}
-
-impl UntypedValue {
- fn new(raw: [u8; 8], endianness: Endianness) -> UntypedValue {
- UntypedValue { raw, endianness }
- }
-}
-
-impl fmt::Display for UntypedValue {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- let numeric: f64 = self.endianness.parse(self.raw);
- let n_printable = self
- .raw
- .iter()
- .take_while(|&&x| x == b' ' || x.is_ascii_graphic())
- .count();
- let printable_prefix = std::str::from_utf8(&self.raw[0..n_printable]).unwrap();
- write!(f, "{numeric}/\"{printable_prefix}\"")
- }
-}
-
-impl Dissector {
- fn new<P: AsRef<Path>>(filename: P, max_cases: usize) -> Result<Dissector> {
- let mut r = BufReader::new(File::open(&filename)?);
- let filename = filename.as_ref().to_string_lossy().into_owned();
- let rec_type: [u8; 4] = read_bytes(&mut r)?;
- let zmagic = match &rec_type {
- b"$FL2" => false,
- b"$FL3" => true,
- _ => Err(anyhow!("This is not an SPSS system file."))?,
- };
-
- let eye_catcher: [u8; 60] = read_bytes(&mut r)?;
- let layout_code: [u8; 4] = read_bytes(&mut r)?;
- let endianness = detect_endianness(layout_code)
- .ok_or_else(|| anyhow!("This is not an SPSS system file."))?;
- let layout_code: u32 = endianness.parse(layout_code);
- let _nominal_case_size: [u8; 4] = read_bytes(&mut r)?;
- let compressed: u32 = endianness.parse(read_bytes(&mut r)?);
- let compression = match (zmagic, compressed) {
- (false, 0) => None,
- (false, 1) => Some(Compression::Simple),
- (true, 2) => Some(Compression::ZLib),
- _ => Err(anyhow!(
- "{} file header has invalid compression value {compressed}.",
- if zmagic { "ZSAV" } else { "SAV" }
- ))?,
- };
-
- let weight_index: u32 = endianness.parse(read_bytes(&mut r)?);
- let n_cases: u32 = endianness.parse(read_bytes(&mut r)?);
-
- let bias: [u8; 8] = read_bytes(&mut r)?;
- let fp_format = detect_fp_format(bias)
- .unwrap_or_else(|| { eprintln!("Compression bias is not the usual value of 100, or system file uses unrecognized floating-point format."); endianness });
- let bias: f64 = fp_format.parse(bias);
-
- let mut d = Dissector {
- filename,
- r,
- endianness,
- fp_format,
- bias,
- n_variable_records: 0,
- n_variables: 0,
- var_widths: Vec::new(),
- };
-
- let creation_date: [u8; 9] = read_bytes(&mut d.r)?;
- let creation_time: [u8; 8] = read_bytes(&mut d.r)?;
- let file_label: [u8; 64] = read_bytes(&mut d.r)?;
- let file_label = trim_end(Vec::from(file_label), b' ');
- d.skip_bytes(3)?;
-
- println!("File header record:");
- println!(
- "{:>17}: {}",
- "Product name",
- String::from_utf8_lossy(&eye_catcher)
- );
- println!("{:>17}: {}", "Layout code", layout_code);
- println!(
- "{:>17}: {} ({})",
- "Compressed",
- compressed,
- match compression {
- None => "no compression",
- Some(Compression::Simple) => "simple compression",
- Some(Compression::ZLib) => "ZLIB compression",
- }
- );
- println!("{:>17}: {}", "Weight index", weight_index);
- println!("{:>17}: {}", "Number of cases", n_cases);
- println!("{:>17}: {}", "Compression bias", bias);
- println!(
- "{:>17}: {}",
- "Creation date",
- String::from_utf8_lossy(&creation_date)
- );
- println!(
- "{:>17}: {}",
- "Creation time",
- String::from_utf8_lossy(&creation_time)
- );
- println!(
- "{:>17}: \"{}\"",
- "File label",
- String::from_utf8_lossy(&file_label)
- );
-
- loop {
- let rec_type: u32 = d.read_swap()?;
- match rec_type {
- 2 => d.read_variable_record()?,
- 3 => d.read_value_label_record()?,
- 4 => Err(anyhow!("Misplaced type 4 record."))?,
- 6 => d.read_document_record()?,
- 7 => d.read_extension_record()?,
- 999 => break,
- _ => Err(anyhow!("Unrecognized record type {rec_type}."))?,
- }
- }
-
- let pos = d.r.stream_position()?;
- println!(
- "{:08x}: end-of-dictionary record (first byte of data at {:0x})",
- pos,
- pos + 4
- );
-
- match compression {
- Some(Compression::Simple) => {
- if max_cases > 0 {
- d.read_simple_compressed_data(max_cases)?;
- }
- }
- Some(Compression::ZLib) => d.read_zlib_compressed_data()?,
- None => (),
- }
-
- Ok(d)
- }
-
- fn read_simple_compressed_data(&mut self, max_cases: usize) -> Result<()> {
- let _: i32 = self.read_swap()?;
- println!("\n{:08x}: compressed data:", self.r.stream_position()?);
-
- const N_OPCODES: usize = 8;
- let mut opcodes = VecDeque::<u8>::with_capacity(8);
- let mut opcode_ofs = 0;
- for case_num in 0..max_cases {
- println!(
- "{:08x}: case {case_num}'s uncompressible data begins",
- self.r.stream_position()?
- );
- let mut i = 0;
- while i < self.var_widths.len() {
- let width = self.var_widths[i];
-
- let opcode_idx = N_OPCODES - opcodes.len();
- let Some(opcode) = opcodes.pop_back() else {
- opcode_ofs = self.r.stream_position()?;
- let mut new_opcodes = [0; N_OPCODES];
- if let Err(error) = self.r.read_exact(&mut new_opcodes) {
- if i == 0 && error.kind() == ErrorKind::UnexpectedEof {
- return Ok(());
- } else {
- return Err(error.into());
- }
- };
- opcodes.extend(new_opcodes.into_iter());
- continue;
- };
-
- print!(
- "{:08x}: variable {i}: opcode {opcode}: ",
- opcode_ofs + opcode_idx as u64
- );
- match opcode {
- 0 => println!("ignored padding"),
- 252 => {
- println!("end of data");
- break;
- }
- 253 => {
- let raw: [u8; 8] = read_bytes(&mut self.r)?;
- let value = UntypedValue::new(raw, self.fp_format);
- println!("uncompressible data: {value}");
- i += 1;
- }
- 254 => {
- print!("spaces");
- if width == 0 {
- print!(", but this is a numeric variable");
- }
- println!();
- i += 1;
- }
- 255 => {
- print!("SYSMIS");
- if width != 0 {
- print!(", but this is a string variable (width={width})");
- }
- println!();
- i += 1;
- }
- _ => {
- print!("{}", opcode as f64 - self.bias);
- if width != 0 {
- print!(", but this is a string variable (width={width})");
- }
- println!();
- i += 1;
- }
- }
- }
- }
- Ok(())
- }
-
- fn read_zlib_compressed_data(&mut self) -> Result<()> {
- let _: i32 = self.read_swap()?;
- let ofs = self.r.stream_position()?;
- println!("\n{ofs:08x}: ZLIB compressed data header:");
-
- let this_ofs: u64 = self.read_swap()?;
- let next_ofs: u64 = self.read_swap()?;
- let next_len: u64 = self.read_swap()?;
-
- println!("\theader_ofs: {this_ofs:#x}");
- if this_ofs != ofs {
- println!("\t\t(Expected {ofs:#x}.)");
- }
- println!("\ttrailer_ofs: {next_ofs:#x}");
- println!("\ttrailer_len: {next_len}");
- if next_len < 24 || next_len % 24 != 0 {
- println!("\t\t(Trailer length is not positive multiple of 24.)");
- }
-
- let zlib_data_len = next_ofs - (ofs + 8 * 3);
- println!(
- "\n{:08x}: {zlib_data_len:#x} bytes of ZLIB compressed data",
- ofs + 8 * 3
- );
-
- self.skip_bytes(zlib_data_len)?;
-
- println!("\n{next_ofs:08x}: ZLIB trailer fixed header");
- let bias: u64 = self.read_swap()?;
- let zero: u64 = self.read_swap()?;
- let block_size: u32 = self.read_swap()?;
- let n_blocks: u32 = self.read_swap()?;
- println!("\tbias: {bias}");
- println!("\tzero: {zero:#x}");
- if zero != 0 {
- println!("\t\t(Expected 0.)");
- }
- println!("\tblock size: {block_size:#x}");
- if block_size != 0x3ff000 {
- println!("\t\t(Expected 0x3ff000.)");
- }
- println!("\tn_blocks: {n_blocks}");
- if n_blocks as u64 != next_len / 24 - 1 {
- println!("\t\t(Expected {}.)", next_len / 24 - 1);
- }
-
- let mut expected_uncmp_ofs = ofs;
- let mut expected_cmp_ofs = ofs + 24;
- for i in 1..=n_blocks {
- let blockinfo_ofs = self.r.stream_position()?;
- let uncompressed_ofs: u64 = self.read_swap()?;
- let compressed_ofs: u64 = self.read_swap()?;
- let uncompressed_size: u32 = self.read_swap()?;
- let compressed_size: u32 = self.read_swap()?;
-
- println!("\n{blockinfo_ofs:08x}: ZLIB block descriptor {i}");
-
- println!("\tuncompressed_ofs: {uncompressed_ofs:#x}");
- if uncompressed_ofs != expected_uncmp_ofs {
- println!("\t\t(Expected {ofs:#x}.)");
- }
-
- println!("\tcompressed_ofs: {compressed_ofs:#x}");
- if compressed_ofs != expected_cmp_ofs {
- println!("\t\t(Expected {expected_cmp_ofs:#x}.)");
- }
-
- println!("\tuncompressed_size: {uncompressed_size:#x}");
- if i < n_blocks && uncompressed_size != block_size {
- println!("\t\t(Expected {block_size:#x}.)");
- }
-
- println!("\tcompressed_size: {compressed_size:#x}");
- if i == n_blocks && compressed_ofs.checked_add(compressed_size as u64) != Some(next_ofs)
- {
- println!(
- "\t\t(This was expected to be {:#x}.)",
- next_ofs - compressed_size as u64
- );
- }
-
- expected_uncmp_ofs += uncompressed_size as u64;
- expected_cmp_ofs += uncompressed_size as u64;
- }
- Ok(())
- }
-
- fn read_extension_record(&mut self) -> Result<()> {
- let offset = self.r.stream_position()?;
- let subtype: u32 = self.read_swap()?;
- let size: u32 = self.read_swap()?;
- let count: u32 = self.read_swap()?;
- println!("{offset:08x}: Record 7, subtype {subtype}, size={size}, count={count}");
- if size.checked_mul(count).is_none() {
- Err(anyhow!("{size} * {count} exceeds {}", u32::MAX))?
- }
- match subtype {
- 3 => self.read_machine_integer_info(size, count),
- 4 => self.read_machine_float_info(size, count),
- 5 => self.read_variable_sets(size, count),
- 6 => {
- // DATE variable information. We don't use it yet, but we should.
- Ok(())
- }
- 7 | 19 => self.read_mrsets(size, count),
- 10 => self.read_extra_product_info(size, count),
- 11 => self.read_display_parameters(size, count),
- 13 => self.read_long_var_name_map(size, count),
- 14 => self.read_long_string_map(size, count),
- 16 => self.read_ncases64(size, count),
- 17 => self.read_datafile_attributes(size, count),
- 18 => self.read_variable_attributes(size, count),
- 20 => self.read_character_encoding(size, count),
- 21 => self.read_long_string_value_labels(size, count),
- 22 => self.read_long_string_missing_values(size, count),
- _ => self.read_unknown_extension(subtype, size, count),
- }
- }
-
- fn warn(&mut self, s: String) -> Result<()> {
- println!(
- "\"{}\" near offset 0x{:08x}: {s}",
- self.filename,
- self.r.stream_position()?
- );
- Ok(())
- }
-
- fn skip_bytes(&mut self, mut n: u64) -> Result<()> {
- let mut buf = [0; 1024];
- while n > 0 {
- let chunk = u64::min(n, buf.len() as u64);
- self.r.read_exact(&mut buf[0..chunk as usize])?;
- n -= chunk;
- }
- Ok(())
- }
-
- fn read_unknown_extension(&mut self, subtype: u32, size: u32, count: u32) -> Result<()> {
- self.warn(format!("Unrecognized record type 7, subtype {subtype}."))?;
- if size == 0 || count > 65536 / size {
- self.skip_bytes(size as u64 * count as u64)?;
- } else if size != 1 {
- let mut offset = 0;
- for _ in 0..count {
- let vec = read_vec(&mut self.r, size as usize)?;
- println!(
- "{}",
- HexViewBuilder::new(&vec).address_offset(offset).finish()
- );
- offset += size as usize;
- }
- }
- Ok(())
- }
-
- fn read_variable_record(&mut self) -> Result<()> {
- self.n_variable_records += 1;
- println!(
- "{:08x}: variable record {}",
- self.r.stream_position()?,
- self.n_variable_records
- );
- let width: i32 = self.read_swap()?;
- let has_variable_label: u32 = self.read_swap()?;
- let missing_value_code: i32 = self.read_swap()?;
- let print_format: u32 = self.read_swap()?;
- let write_format: u32 = self.read_swap()?;
- let name: [u8; 8] = read_bytes(&mut self.r)?;
- let name: Vec<u8> = trim_end(Vec::from(name), b'\0');
-
- if width >= 0 {
- self.n_variables += 1;
- }
- self.var_widths.push(width);
-
- println!(
- "\tWidth: {width} ({})",
- match width {
- _ if width > 0 => "string",
- _ if width == 0 => "numeric",
- _ => "long string continuation record",
- }
- );
-
- println!("\tVariable label: {has_variable_label}");
- println!(
- "\tMissing values code: {missing_value_code} ({})",
- match missing_value_code {
- 0 => "no missing values",
- 1 => "one missing value",
- 2 => "two missing values",
- 3 => "three missing values",
- -2 => "one missing value range",
- -3 => "one missing value, one range",
- _ => "bad value",
- }
- );
- for (which, format) in [("Print", print_format), ("Worite", write_format)] {
- let type_ = format_name(format >> 16);
- let w = (format >> 8) & 0xff;
- let d = format & 0xff;
- println!("\t{which} format: {format:06x} ({type_}{w}.{d})");
- }
- println!("\tName: {}", String::from_utf8_lossy(&name));
-
- // Read variable label.
- match has_variable_label {
- 0 => (),
- 1 => {
- let offset = self.r.stream_position()?;
- let len: u32 = self.read_swap()?;
- let read_len = len.min(65535) as usize;
- let label = read_vec(&mut self.r, read_len)?;
- println!(
- "\t{offset:08x} Variable label: \"{}\"",
- String::from_utf8_lossy(&label)
- );
-
- self.skip_bytes((round_up(len, 4) - len).into())?;
- }
- _ => Err(anyhow!("Variable label indicator field is not 0 or 1."))?,
- };
-
- // Read missing values.
- if missing_value_code != 0 {
- print!("\t{:08x} Missing values:", self.r.stream_position()?);
- match width.cmp(&0) {
- Ordering::Equal => {
- let (has_range, n_individual) = match missing_value_code {
- -3 => (true, 1),
- -2 => (true, 0),
- 1 | 2 | 3 => (false, missing_value_code),
- _ => Err(anyhow!(
- "Numeric missing value indicator field is not -3, -2, 0, 1, 2, or 3."
- ))?,
- };
- if has_range {
- let low: f64 = self.read_swap()?;
- let high: f64 = self.read_swap()?;
- print!(" {low}...{high}");
- }
- for _ in 0..n_individual {
- let value: f64 = self.read_swap()?;
- print!(" {value}");
- }
- }
- Ordering::Greater => {
- if !(0..=3).contains(&missing_value_code) {
- Err(anyhow!(
- "String missing value indicator field is not 0, 1, 2, or 3."
- ))?;
- }
- for _ in 0..missing_value_code {
- let string: [u8; 8] = read_bytes(&mut self.r)?;
- let string: Vec<u8> = trim_end(Vec::from(string), b'\0');
- println!(" {}", String::from_utf8_lossy(&string));
- }
- }
- Ordering::Less => (),
- }
- println!();
- }
-
- Ok(())
- }
-
- fn read_value_label_record(&mut self) -> Result<()> {
- println!("{:08x}: value labels record", self.r.stream_position()?);
-
- // Read the labels.
- let n_labels: u32 = self.read_swap()?;
- for _ in 0..n_labels {
- let raw: [u8; 8] = read_bytes(&mut self.r)?;
- let value = UntypedValue::new(raw, self.fp_format);
- let label_len: u8 = self.read_swap()?;
- let padded_len = round_up(label_len as usize + 1, 8);
-
- let mut label = read_vec(&mut self.r, padded_len)?;
- label.truncate(label_len as usize);
- let label = String::from_utf8_lossy(&label);
-
- println!("\t{value}: {label}");
- }
-
- // Read the type-4 record with the corresponding variable indexes.
- let rec_type: u32 = self.read_swap()?;
- if rec_type != 4 {
- Err(anyhow!(
- "Variable index record (type 4) does not immediately \
- follow value label record (type 3) as it should."
- ))?;
- }
-
- println!("\t{:08x}: apply to variables", self.r.stream_position()?);
- let n_vars: u32 = self.read_swap()?;
- for _ in 0..n_vars {
- let index: u32 = self.read_swap()?;
- print!(" {index}");
- }
- println!();
-
- Ok(())
- }
-
- fn read_document_record(&mut self) -> Result<()> {
- println!("{:08x}: document record", self.r.stream_position()?);
- let n_lines: u32 = self.read_swap()?;
- println!("\t{n_lines} lines of documents");
-
- for i in 0..n_lines {
- print!("\t{:08x}: ", self.r.stream_position()?);
- let line: [u8; 64] = read_bytes(&mut self.r)?;
- let line = trim_end(Vec::from(line), b' ');
- println!("line {i}: \"{}\"", String::from_utf8_lossy(&line));
- }
- Ok(())
- }
-
- fn read_machine_integer_info(&mut self, size: u32, count: u32) -> Result<()> {
- let offset = self.r.stream_position()?;
- let version_major: u32 = self.read_swap()?;
- let version_minor: u32 = self.read_swap()?;
- let version_revision: u32 = self.read_swap()?;
- let machine_code: u32 = self.read_swap()?;
- let float_representation: u32 = self.read_swap()?;
- let compression_code: u32 = self.read_swap()?;
- let integer_representation: u32 = self.read_swap()?;
- let character_code: u32 = self.read_swap()?;
-
- println!("{offset:08x}: machine integer info");
- if size != 4 || count != 8 {
- Err(anyhow!(
- "Bad size ({size}) or count ({count}) field on record type 7, subtype 3"
- ))?;
- }
- println!("\tVersion: {version_major}.{version_minor}.{version_revision}");
- println!("\tMachine code: {machine_code}");
- println!(
- "\tFloating point representation: {float_representation} ({})",
- match float_representation {
- 1 => "IEEE 754",
- 2 => "IBM 370",
- 3 => "DEC VAX",
- _ => "unknown",
- }
- );
- println!("\tCompression code: {compression_code}");
- println!(
- "\tEndianness: {integer_representation} ({})",
- match integer_representation {
- 1 => "big",
- 2 => "little",
- _ => "unknown",
- }
- );
- println!("\tCharacter code: {character_code}");
- Ok(())
- }
-
- fn read_machine_float_info(&mut self, size: u32, count: u32) -> Result<()> {
- let offset = self.r.stream_position()?;
- let sysmis: f64 = self.read_swap()?;
- let highest: f64 = self.read_swap()?;
- let lowest: f64 = self.read_swap()?;
-
- println!("{offset:08x}: machine float info");
- if size != 4 || count != 8 {
- Err(anyhow!(
- "Bad size ({size}) or count ({count}) field on extension 4."
- ))?;
- }
-
- println!("\tsysmis: {sysmis} ({})", HexFloat(sysmis));
- println!("\thighest: {highest} ({})", HexFloat(highest));
- println!("\tlowest: {lowest} ({})", HexFloat(lowest));
- Ok(())
- }
-
- fn read_variable_sets(&mut self, size: u32, count: u32) -> Result<()> {
- println!("{:08x}: variable sets", self.r.stream_position()?);
- let mut text = self.open_text_record(size, count)?;
- loop {
- while text.match_byte(b'\n') {
- continue;
- }
- let set = match text.tokenize(b'=') {
- Some(set) => String::from_utf8_lossy(set).into_owned(),
- None => break,
- };
-
- // Always present even for an empty set.
- text.match_byte(b' ');
-
- match text.tokenize(b'\n') {
- None => println!("\tset \"{set}\" is empty"),
- Some(variables) => {
- println!(
- "\tset \"{set}\" contains \"{}\"",
- String::from_utf8_lossy(variables).trim_end_matches('\r')
- );
- }
- };
- }
- Ok(())
- }
-
- // Read record type 7, subtype 7.
- fn read_mrsets(&mut self, size: u32, count: u32) -> Result<()> {
- print!("{:08x}: multiple response sets", self.r.stream_position()?);
- let mut text = self.open_text_record(size, count)?;
- loop {
- #[derive(PartialEq, Eq)]
- enum MrSet {
- MC,
- MD,
- }
-
- while text.match_byte(b'\n') {}
- let Some(name) = text.tokenize(b'=') else {
- break;
- };
- let name = Vec::from(name);
-
- let (mrset, cat_label_from_counted_values, label_from_var_label) = if text
- .match_byte(b'C')
- {
- if !text.match_byte(b' ') {
- Err(anyhow!(
- "missing space following 'C' at offset {} in mrsets record",
- text.pos
- ))?;
- }
- (MrSet::MC, false, false)
- } else if text.match_byte(b'D') {
- (MrSet::MD, false, false)
- } else if text.match_byte(b'E') {
- if !text.match_byte(b' ') {
- Err(anyhow!(
- "missing space following 'E' at offset {} in mrsets record",
- text.pos
- ))?;
- }
-
- let pos = text.pos;
- let Some(number) = text.tokenize(b' ') else {
- Err(anyhow!(
- "Missing label source value following `E' at offset {}u in MRSETS record",
- text.pos
- ))?
- };
-
- let label_from_var_label = if number == b"11" {
- true
- } else if number == b"1" {
- false
- } else {
- Err(anyhow!("Unexpected label source value `{}' following `E' at offset {pos} in MRSETS record", String::from_utf8_lossy(number)))?
- };
- (MrSet::MD, true, label_from_var_label)
- } else {
- Err(anyhow!(
- "missing `C', `D', or `E' at offset {} in mrsets record",
- text.pos
- ))?
- };
-
- let counted_value = if mrset == MrSet::MD {
- Some(Vec::from(text.parse_counted_string()?))
- } else {
- None
- };
-
- let label = Vec::from(text.parse_counted_string()?);
-
- let variables = text.tokenize(b'\n');
-
- print!(
- "\t\"{}\": multiple {} set",
- String::from_utf8_lossy(&name),
- if mrset == MrSet::MC {
- "category"
- } else {
- "dichotomy"
- }
- );
- if let Some(counted_value) = counted_value {
- print!(
- ", counted value \"{}\"",
- String::from_utf8_lossy(&counted_value)
- );
- }
- if cat_label_from_counted_values {
- println!(", category labels from counted values");
- }
- if label != b"" {
- print!(", label \"{}\"", String::from_utf8_lossy(&label));
- }
- if label_from_var_label {
- print!(", label from variable label");
- }
- if let Some(variables) = variables {
- print!(", variables \"{}\"", String::from_utf8_lossy(variables));
- } else {
- print!("no variables");
- }
- println!();
- }
- Ok(())
- }
-
- fn read_extra_product_info(&mut self, size: u32, count: u32) -> Result<()> {
- print!("{:08x}: extra product info", self.r.stream_position()?);
- let text = self.open_text_record(size, count)?;
- print_string(&text.buffer);
- Ok(())
- }
-
- fn read_display_parameters(&mut self, size: u32, count: u32) -> Result<()> {
- println!(
- "{:08x}: variable display parameters",
- self.r.stream_position()?
- );
- if size != 4 {
- Err(anyhow!("Bad size ({size}) on extension 11."))?;
- }
- let n_vars = self.n_variables;
- let includes_width = if count as usize == 3 * n_vars {
- true
- } else if count as usize == 2 * n_vars {
- false
- } else {
- Err(anyhow!(
- "Extension 11 has bad count {count} (for {n_vars} variables)."
- ))?
- };
-
- for i in 0..n_vars {
- let measure: u32 = self.read_swap()?;
- print!(
- "\tVar #{i}: measure={measure} ({})",
- match measure {
- 1 => "nominal",
- 2 => "ordinal",
- 3 => "scale",
- _ => "invalid",
- }
- );
-
- if includes_width {
- let width: u32 = self.read_swap()?;
- print!(", width={width}");
- }
-
- let align: u32 = self.read_swap()?;
- println!(
- ", align={align} ({})",
- match align {
- 0 => "left",
- 1 => "right",
- 2 => "centre",
- _ => "invalid",
- }
- );
- }
- Ok(())
- }
-
- fn read_long_var_name_map(&mut self, size: u32, count: u32) -> Result<()> {
- print!(
- "{:08x}: long variable names (short => long)",
- self.r.stream_position()?
- );
- let mut text = self.open_text_record(size, count)?;
- while let Some((var, long_name)) = text.read_variable_to_value_pair() {
- println!(
- "\t{} => {}",
- String::from_utf8_lossy(&var),
- String::from_utf8_lossy(&long_name)
- );
- }
- Ok(())
- }
-
- fn read_long_string_map(&mut self, size: u32, count: u32) -> Result<()> {
- print!(
- "{:08x}: very long strings (variable => length)",
- self.r.stream_position()?
- );
- let mut text = self.open_text_record(size, count)?;
- while let Some((var, length)) = text.read_variable_to_value_pair() {
- println!(
- "\t{} => {}",
- String::from_utf8_lossy(&var),
- String::from_utf8_lossy(&length)
- );
- }
- Ok(())
- }
-
- fn read_ncases64(&mut self, size: u32, count: u32) -> Result<()> {
- if size != 8 {
- Err(anyhow!("Bad size {size} for extended number of cases."))?
- }
- if count != 2 {
- Err(anyhow!("Bad count {count} for extended number of cases."))?
- }
- let unknown: u64 = self.read_swap()?;
- let ncases64: u64 = self.read_swap()?;
- print!(
- "{:08x}: extended number of cases: unknown={unknown}, ncases64={ncases64}",
- self.r.stream_position()?
- );
- Ok(())
- }
-
- fn read_attributes(&mut self, text: &mut TextRecord, variable: &str) -> Result<()> {
- loop {
- let Some(key) = text.tokenize_string(b'(') else {
- break;
- };
- for index in 1.. {
- let Some(value) = text.tokenize_string(b'\n') else {
- Err(anyhow!(
- "{variable}: Error parsing attribute value {key}[{index}]"
- ))?
- };
- if value.starts_with('\'') && value.ends_with('\'') && value.len() >= 2 {
- let middle = &value[1..value.len() - 2];
- println!("\t{variable}: {key}[{index}] = \"{middle}\"");
- } else {
- self.warn(format!(
- "{variable}: Attribute value {key}[{index}] is not quoted: {value}"
- ))?;
- }
- if text.match_byte(b')') {
- break;
- }
- }
-
- if text.match_byte(b'/') {
- break;
- }
- }
- Ok(())
- }
-
- fn read_datafile_attributes(&mut self, size: u32, count: u32) -> Result<()> {
- print!("{:08x}: datafile attributes", self.r.stream_position()?);
- let mut text = self.open_text_record(size, count)?;
- self.read_attributes(&mut text, "datafile")?;
- Ok(())
- }
-
- fn read_variable_attributes(&mut self, size: u32, count: u32) -> Result<()> {
- print!("{:08x}: variable attributes", self.r.stream_position()?);
- let mut text = self.open_text_record(size, count)?;
- loop {
- let Some(variable) = text.tokenize_string(b':') else {
- break;
- };
- self.read_attributes(&mut text, &variable)?;
- }
- Ok(())
- }
-
- fn read_character_encoding(&mut self, size: u32, count: u32) -> Result<()> {
- let offset = self.r.stream_position()?;
- let encoding = read_vec(&mut self.r, (size * count) as usize)?;
- println!("{offset:08x}: Character Encoding: {}", String::from_utf8_lossy(&encoding));
- Ok(())
- }
-
- fn read_long_string_value_labels(&mut self, size: u32, count: u32) -> Result<()> {
- let start = self.r.stream_position()?;
-
- println!("{start:08x}: long string value labels");
- while self.r.stream_position()? - start < (size * count) as u64 {
- let position = self.r.stream_position()?;
-
- let var_name_len: u32 = self.read_swap()?;
- if var_name_len > ID_MAX_LEN {
- Err(anyhow!("Variable name length in long string value label record ({var_name_len} exceeds {ID_MAX_LEN}-byte limit."))?
- }
- let var_name = read_vec(&mut self.r, var_name_len as usize)?;
-
- let width: u32 = self.read_swap()?;
- let n_values: u32 = self.read_swap()?;
-
- println!("\t{position:08x}: {}, width {width}, {n_values} values",
- String::from_utf8_lossy(&var_name));
-
- for _ in 0..n_values {
- let position = self.r.stream_position()?;
- let value_length: u32 = self.read_swap()?;
- let value = read_vec(&mut self.r, value_length as usize)?;
- let label_length: u32 = self.read_swap()?;
- let label = read_vec(&mut self.r, value_length as usize)?;
- println!("\t\t{position:08x}: \"{}\" ({value_length} bytes) => \"{}\" ({label_length} bytes)",
- String::from_utf8_lossy(&value),
- String::from_utf8_lossy(&label));
- }
- }
- Ok(())
- }
-
- fn read_long_string_missing_values(&mut self, size: u32, count: u32) -> Result<()> {
- let start = self.r.stream_position()?;
-
- println!("{start:08x}: long string missing values");
- while self.r.stream_position()? - start < (size * count) as u64 {
- let position = self.r.stream_position()?;
-
- let var_name_len: u32 = self.read_swap()?;
- if var_name_len > ID_MAX_LEN {
- Err(anyhow!("Variable name length in long string missing value record ({var_name_len} exceeds {ID_MAX_LEN}-byte limit."))?
- }
- let var_name = read_vec(&mut self.r, var_name_len as usize)?;
-
- let n_missing_values: u8 = self.read_swap()?;
- let value_length: u32 = self.read_swap()?;
-
- println!("\t{position:08x}: {}, {n_missing_values}, each {value_length} bytes:",
- String::from_utf8_lossy(&var_name));
-
- for _ in 0..n_missing_values {
- let value = read_vec(&mut self.r, value_length as usize)?;
- println!(" \"{}\"", String::from_utf8_lossy(&value));
- }
- }
- Ok(())
- }
-
- fn read_text_record(&mut self, size: u32, count: u32) -> Result<Vec<u8>> {
- let Some(n_bytes) = u32::checked_mul(size, count) else {
- Err(anyhow!("Extension record too large."))?
- };
- read_vec(&mut self.r, n_bytes as usize)
- }
-
- fn open_text_record(&mut self, size: u32, count: u32) -> Result<TextRecord> {
- Ok(TextRecord::new(self.read_text_record(size, count)?))
- }
-}
-
-fn print_string(s: &[u8]) {
- if s.contains(&b'\0') {
- println!("{}", HexView::new(s));
- } else {
- for &c in s {
- match c {
- b'\\' => print!("\\\\"),
- b'\n' => println!(),
- c if (b' '..=b'~').contains(&c) => print!("{}", c as char),
- c => print!("\\{:2x}", c),
- }
- }
- }
-}
-
-struct TextRecord {
- buffer: Vec<u8>,
- pos: usize,
-}
-
-impl TextRecord {
- fn new(buffer: Vec<u8>) -> TextRecord {
- TextRecord { buffer, pos: 0 }
- }
-
- fn tokenize(&mut self, delimiter: u8) -> Option<&[u8]> {
- let start = self.pos;
- while self.pos < self.buffer.len()
- && self.buffer[self.pos] != delimiter
- && self.buffer[self.pos] != 0
- {
- self.pos += 1
- }
- if start == self.pos {
- None
- } else {
- Some(&self.buffer[start..self.pos])
- }
- }
-
- fn tokenize_string(&mut self, delimiter: u8) -> Option<String> {
- self.tokenize(delimiter)
- .map(|s| String::from_utf8_lossy(s).into_owned())
- }
-
- fn match_byte(&mut self, c: u8) -> bool {
- if self.pos < self.buffer.len() && self.buffer[self.pos] == c {
- self.pos += 1;
- true
- } else {
- false
- }
- }
-
- fn parse_usize(&mut self) -> Result<usize> {
- let n_digits = self.buffer[self.pos..]
- .iter()
- .take_while(|c| c.is_ascii_digit())
- .count();
- if n_digits == 0 {
- Err(anyhow!("expecting digit at offset {} in record", self.pos))?;
- }
- let start = self.pos;
- self.pos += n_digits;
- let end = self.pos;
- let digits = str::from_utf8(&self.buffer[start..end]).unwrap();
- let Ok(number) = digits.parse::<usize>() else {
- Err(anyhow!(
- "expecting number in [0,{}] at offset {} in record",
- usize::MAX,
- self.pos
- ))?
- };
- self.pos = end;
- Ok(number)
- }
-
- fn get_n_bytes(&mut self, n: usize) -> Option<(usize, usize)> {
- let start = self.pos;
- let Some(end) = start.checked_add(n) else {
- return None;
- };
- self.pos = end;
- Some((start, end))
- }
-
- fn parse_counted_string(&mut self) -> Result<&[u8]> {
- let length = self.parse_usize()?;
- if !self.match_byte(b' ') {
- Err(anyhow!("expecting space at offset {} in record", self.pos))?;
- }
-
- let Some((start, end)) = self.get_n_bytes(length) else {
- Err(anyhow!(
- "{length}-byte string starting at offset {} exceeds record length {}",
- self.pos,
- self.buffer.len()
- ))?
- };
- if !self.match_byte(b' ') {
- Err(anyhow!(
- "expecting space at offset {} following {}-byte string",
- self.pos,
- end - start
- ))?;
- }
- Ok(&self.buffer[start..end])
- }
-
- fn read_variable_to_value_pair(&mut self) -> Option<(Vec<u8>, Vec<u8>)> {
- let key = self.tokenize(b'=')?.into();
- let value = self.tokenize(b'\t')?.into();
-
- while self.match_byte(b'\t') || self.match_byte(b'\0') {}
- Some((key, value))
+fn dissect(file_name: &Path) -> Result<()> {
+ let reader = File::open(file_name)?;
+ let reader = BufReader::new(reader);
+ let reader = Reader::new(reader)?;
+ for record in reader {
+ println!("{record:?}");
}
+ Ok(())
}
use flate2::read::ZlibDecoder;
use num::Integer;
+use std::fmt::{Debug, Formatter, Result as FmtResult};
use std::str::from_utf8;
use std::{
collections::VecDeque,
ZLib,
}
+#[derive(Clone, Debug)]
pub enum Record {
Header(Header),
Document(Document),
}
}
+pub struct FallbackEncoding<'a>(&'a [u8]);
+
+impl<'a> Debug for FallbackEncoding<'a> {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ if let Ok(s) = from_utf8(self.0) {
+ let s = s.trim_end();
+ write!(f, "\"{s}\"")
+ } else {
+ let s: String = self
+ .0
+ .iter()
+ .map(|c| char::from(*c).escape_default())
+ .flatten()
+ .collect();
+ let s = s.trim_end();
+ write!(f, "\"{s}\"")
+ }
+ }
+}
+
+#[derive(Clone)]
pub struct Header {
/// Magic number.
pub magic: Magic,
pub endian: Endian,
}
+impl Header {
+ fn debug_field<T: Debug>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult {
+ writeln!(f, "{name:>17}: {:?}", value)
+ }
+}
+
+impl Debug for Header {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ writeln!(f, "File header record:")?;
+ self.debug_field(f, "Magic", self.magic)?;
+ self.debug_field(f, "Product name", FallbackEncoding(&self.eye_catcher))?;
+ self.debug_field(f, "Layout code", self.layout_code)?;
+ self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
+ self.debug_field(f, "Compression", self.compression)?;
+ self.debug_field(f, "Weight index", self.weight_index)?;
+ self.debug_field(f, "Number of cases", self.n_cases)?;
+ self.debug_field(f, "Compression bias", self.bias)?;
+ self.debug_field(f, "Creation date", FallbackEncoding(&self.creation_date))?;
+ self.debug_field(f, "Creation time", FallbackEncoding(&self.creation_time))?;
+ self.debug_field(f, "File label", FallbackEncoding(&self.file_label))?;
+ self.debug_field(f, "Endianness", self.endian)
+ }
+}
+
impl Header {
fn read<R: Read>(r: &mut R) -> Result<Header, Error> {
let magic: [u8; 4] = read_bytes(r)?;
};
let weight_index: u32 = endian.parse(read_bytes(r)?);
- let weight_index = (weight_index > 0).then_some(weight_index - 1);
+ let weight_index = (weight_index > 0).then(|| weight_index - 1);
let n_cases: u32 = endian.parse(read_bytes(r)?);
let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]);
}
+impl Debug for Magic {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ let s = match self {
+ &Magic::SAV => "$FL2",
+ &Magic::ZSAV => "$FL3",
+ &Magic::EBCDIC => "($FL2 in EBCDIC)",
+ _ => return write!(f, "{:?}", self.0),
+ };
+ write!(f, "{s}")
+ }
+}
+
impl TryFrom<[u8; 4]> for Magic {
type Error = Error;
String([u8; 8]),
}
+impl Debug for Value {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ match self {
+ Value::Number(Some(number)) => write!(f, "{number:?}"),
+ Value::Number(None) => write!(f, "SYSMIS"),
+ Value::String(bytes) => write!(f, "{:?}", FallbackEncoding(bytes)),
+ }
+ }
+}
+
impl Value {
+ fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Value, IoError> {
+ Ok(Self::from_raw(var_type, read_bytes(r)?, endian))
+ }
+
pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
match var_type {
VarType::String => Value::String(raw),
impl FusedIterator for Reader {}
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Format(pub u32);
+
+impl Debug for Format {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ let type_ = format_name(self.0 >> 16);
+ let w = (self.0 >> 8) & 0xff;
+ let d = self.0 & 0xff;
+ write!(f, "{:06x} ({type_}{w}.{d})", self.0)
+ }
+}
+
+fn format_name(type_: u32) -> &'static str {
+ match type_ {
+ 1 => "A",
+ 2 => "AHEX",
+ 3 => "COMMA",
+ 4 => "DOLLAR",
+ 5 => "F",
+ 6 => "IB",
+ 7 => "PIBHEX",
+ 8 => "P",
+ 9 => "PIB",
+ 10 => "PK",
+ 11 => "RB",
+ 12 => "RBHEX",
+ 15 => "Z",
+ 16 => "N",
+ 17 => "E",
+ 20 => "DATE",
+ 21 => "TIME",
+ 22 => "DATETIME",
+ 23 => "ADATE",
+ 24 => "JDATE",
+ 25 => "DTIME",
+ 26 => "WKDAY",
+ 27 => "MONTH",
+ 28 => "MOYR",
+ 29 => "QYR",
+ 30 => "WKYR",
+ 31 => "PCT",
+ 32 => "DOT",
+ 33 => "CCA",
+ 34 => "CCB",
+ 35 => "CCC",
+ 36 => "CCD",
+ 37 => "CCE",
+ 38 => "EDATE",
+ 39 => "SDATE",
+ 40 => "MTIME",
+ 41 => "YMDHMS",
+ _ => "(unknown)",
+ }
+}
+
+#[derive(Clone)]
+pub struct MissingValues {
+ /// Individual missing values, up to 3 of them.
+ pub values: Vec<Value>,
+
+ /// Optional range of missing values.
+ pub range: Option<(Value, Value)>,
+}
+
+impl Debug for MissingValues {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ for (i, value) in self.values.iter().enumerate() {
+ if i > 0 {
+ write!(f, ", ")?;
+ }
+ write!(f, "{value:?}")?;
+ }
+
+ if let Some((low, high)) = self.range {
+ if !self.values.is_empty() {
+ write!(f, ", ")?;
+ }
+ write!(f, "{low:?} THRU {high:?}")?;
+ }
+
+ if self.is_empty() {
+ write!(f, "none")?;
+ }
+
+ Ok(())
+ }
+}
+
+impl MissingValues {
+ fn is_empty(&self) -> bool {
+ self.values.is_empty() && self.range.is_none()
+ }
+
+ fn read<R: Read + Seek>(
+ r: &mut R,
+ offset: u64,
+ width: i32,
+ code: i32,
+ endian: Endian,
+ ) -> Result<MissingValues, Error> {
+ let (n_values, has_range) = match (width, code) {
+ (_, 0..=3) => (code, false),
+ (0, -2) => (0, true),
+ (0, -3) => (1, true),
+ (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }),
+ (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
+ };
+
+ let var_type = VarType::from_width(width);
+
+ let mut values = Vec::new();
+ for _ in 0..n_values {
+ values.push(Value::read(r, var_type, endian)?);
+ }
+ let range = if has_range {
+ let low = Value::read(r, var_type, endian)?;
+ let high = Value::read(r, var_type, endian)?;
+ Some((low, high))
+ } else {
+ None
+ };
+ Ok(MissingValues { values, range })
+ }
+}
+
+#[derive(Clone)]
pub struct Variable {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
/// Write format.
pub write_format: u32,
- /// Missing value code, one of -3, -2, 0, 1, 2, or 3.
- pub missing_value_code: i32,
-
- /// Raw missing values, up to 3 of them.
- pub missing: Vec<[u8; 8]>,
+ /// Missing values.
+ pub missing_values: MissingValues,
/// Optional variable label.
pub label: Option<Vec<u8>>,
}
+impl Debug for Variable {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ writeln!(
+ f,
+ "Width: {} ({})",
+ self.width,
+ if self.width > 0 {
+ "string"
+ } else if self.width == 0 {
+ "numeric"
+ } else {
+ "long string continuation record"
+ }
+ )?;
+ writeln!(f, "Print format: {:?}", Format(self.print_format))?;
+ writeln!(f, "Write format: {:?}", Format(self.write_format))?;
+ writeln!(f, "Name: {:?}", FallbackEncoding(&self.name))?;
+ writeln!(
+ f,
+ "Variable label: {:?}",
+ self.label
+ .as_ref()
+ .map(|label| FallbackEncoding(&label[..]))
+ )?;
+ writeln!(f, "Missing values: {:?}", self.missing_values)
+ }
+}
+
impl Variable {
fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Variable, Error> {
let offset = r.stream_position()?;
}
};
- let mut missing = Vec::new();
- if missing_value_code != 0 {
- match (width, missing_value_code) {
- (0, -3 | -2 | 1 | 2 | 3) => (),
- (0, _) => {
- return Err(Error::BadNumericMissingValueCode {
- offset,
- code: missing_value_code,
- })
- }
- (_, 0..=3) => (),
- (_, _) => {
- return Err(Error::BadStringMissingValueCode {
- offset,
- code: missing_value_code,
- })
- }
- }
-
- for _ in 0..missing_value_code.abs() {
- missing.push(read_bytes(r)?);
- }
- }
+ let missing_values = MissingValues::read(r, offset, width, missing_value_code, endian)?;
Ok(Variable {
offset,
name,
print_format,
write_format,
- missing_value_code,
- missing,
+ missing_values,
label,
})
}
}
+#[derive(Clone, Debug)]
pub struct ValueLabel {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
}
}
+#[derive(Clone, Debug)]
pub struct VarIndexes {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
}
}
+#[derive(Clone, Debug)]
pub struct Document {
/// Offset from the start of the file to the start of the record.
pub pos: u64,
}
}
+#[derive(Clone, Debug)]
pub struct Extension {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
}
}
+#[derive(Clone, Debug)]
pub struct ZHeader {
/// File offset to the start of the record.
pub offset: u64,
}
}
+#[derive(Clone, Debug)]
pub struct ZTrailer {
/// File offset to the start of the record.
pub offset: u64,
pub blocks: Vec<ZBlock>,
}
+#[derive(Clone, Debug)]
pub struct ZBlock {
/// Offset of block of data if simple compression were used.
pub uncompressed_ofs: u64,