X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=rust%2Fsrc%2Fmain.rs;h=404e96d57d07becaf09c4688319a0ac3739ceee4;hb=e0cbdf0daefcca81be9572aab0deedf945687f5a;hp=9c92b5515d50f8dd6c2c10f4cce46258d0cd1b5b;hpb=0b4388bfca70b1ce3e9daeb00d48681db823a337;p=pspp diff --git a/rust/src/main.rs b/rust/src/main.rs index 9c92b5515d..404e96d57d 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -1,5 +1,3 @@ -#![allow(unused_variables)] -#![allow(dead_code)] /* PSPP - a program for statistical analysis. * Copyright (C) 2023 Free Software Foundation, Inc. * @@ -16,18 +14,16 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -use anyhow::{anyhow, Result}; -use clap::Parser; -use hexplay::HexView; -use hexplay::HexViewBuilder; -use num::{Float, Num}; -use std::cmp::Ordering; +use anyhow::Result; +use clap::{Parser, ValueEnum}; +use encoding_rs::Encoding; +use pspp::cooked::decode; +use pspp::raw::{Reader, Record, Magic}; use std::fs::File; -use std::io::prelude::*; use std::io::BufReader; use std::path::{Path, PathBuf}; use std::str; -use std::{fmt, num::FpCategory}; +use thiserror::Error as ThisError; /// A utility to dissect SPSS system files. #[derive(Parser, Debug)] @@ -35,977 +31,89 @@ use std::{fmt, num::FpCategory}; struct Args { /// Maximum number of cases to print. #[arg(long = "data", default_value_t = 0)] - max_cases: usize, + max_cases: u64, /// Files to dissect. #[arg(required = true)] files: Vec, -} - -fn main() -> Result<()> { - let Args { max_cases, files } = Args::parse(); - - for file in files { - Dissector::new(file)?; - } - Ok(()) -} - -#[derive(Copy, Clone, Debug)] -enum Compression { - Simple, - ZLib, -} - -#[derive(Copy, Clone, Debug)] -enum Endianness { - BigEndian, - LittleEndian, -} -use Endianness::*; - -trait Parse { - fn parse(self, bytes: [u8; N]) -> T; -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 8]) -> u64 { - match self { - BigEndian => u64::from_be_bytes(bytes), - LittleEndian => u64::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 4]) -> u32 { - match self { - BigEndian => u32::from_be_bytes(bytes), - LittleEndian => u32::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 2]) -> u16 { - match self { - BigEndian => u16::from_be_bytes(bytes), - LittleEndian => u16::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 1]) -> u8 { - match self { - BigEndian => u8::from_be_bytes(bytes), - LittleEndian => u8::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 8]) -> i64 { - match self { - BigEndian => i64::from_be_bytes(bytes), - LittleEndian => i64::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 4]) -> i32 { - match self { - BigEndian => i32::from_be_bytes(bytes), - LittleEndian => i32::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 2]) -> i16 { - match self { - BigEndian => i16::from_be_bytes(bytes), - LittleEndian => i16::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 1]) -> i8 { - match self { - BigEndian => i8::from_be_bytes(bytes), - LittleEndian => i8::from_le_bytes(bytes), - } - } -} -impl Parse for Endianness { - fn parse(self, bytes: [u8; 8]) -> f64 { - match self { - BigEndian => f64::from_be_bytes(bytes), - LittleEndian => f64::from_le_bytes(bytes), - } - } -} - -fn read_bytes(r: &mut BufReader) -> Result<[u8; N]> { - let mut buf = [0; N]; - r.read_exact(&mut buf)?; - Ok(buf) -} - -fn read_vec(r: &mut BufReader, n: usize) -> Result> { - let mut vec = vec![0; n]; - r.read_exact(&mut vec)?; - Ok(vec) -} - -trait ReadSwap { - fn read_swap(&mut self) -> Result; -} - -impl ReadSwap for Dissector { - fn read_swap(&mut self) -> Result { - Ok(self.endianness.parse(read_bytes(&mut self.r)?)) - } -} -impl ReadSwap for Dissector { - fn read_swap(&mut self) -> Result { - Ok(self.endianness.parse(read_bytes(&mut self.r)?)) - } -} - -impl ReadSwap for Dissector { - fn read_swap(&mut self) -> Result { - Ok(self.endianness.parse(read_bytes(&mut self.r)?)) - } -} -impl ReadSwap for Dissector { - fn read_swap(&mut self) -> Result { - Ok(self.endianness.parse(read_bytes(&mut self.r)?)) - } -} + /// How to dissect the file. + #[arg(short, long, value_enum, default_value_t)] + mode: Mode, -struct Dissector { - filename: String, - r: BufReader, - compression: Option, - endianness: Endianness, - fp_format: Endianness, - bias: f64, - n_variable_records: usize, - n_variables: usize, - var_widths: Vec, -} - -fn detect_endianness(layout_code: [u8; 4]) -> Option { - for endianness in [BigEndian, LittleEndian] { - match endianness.parse(layout_code) { - 2 | 3 => return Some(endianness), - _ => (), - } - } - None + /// The encoding to use. + #[arg(long, value_parser = parse_encoding)] + encoding: Option<&'static Encoding>, } -fn detect_fp_format(bias: [u8; 8]) -> Option { - for endianness in [BigEndian, LittleEndian] { - let value: f64 = endianness.parse(bias); - if value == 100.0 { - return Some(endianness); - } - } - None -} +#[derive(ThisError, Debug)] +#[error("{0}: unknown encoding")] +struct UnknownEncodingError(String); -fn trim_end(mut s: Vec, c: u8) -> Vec { - while s.last() == Some(&c) { - s.pop(); +fn parse_encoding(arg: &str) -> Result<&'static Encoding, UnknownEncodingError> { + match Encoding::for_label_no_replacement(arg.as_bytes()) { + Some(encoding) => Ok(encoding), + None => Err(UnknownEncodingError(arg.to_string())), } - s } -fn slice_trim_end(mut s: &[u8], c: u8) -> &[u8] { - while s.last() == Some(&c) { - s = s.split_last().unwrap().1; - } - s +#[derive(Clone, Copy, Debug, Default, ValueEnum)] +enum Mode { + Identify, + Raw, + #[default] + Cooked, } -fn format_name(type_: u32) -> &'static str { - match type_ { - 1 => "A", - 2 => "AHEX", - 3 => "COMMA", - 4 => "DOLLAR", - 5 => "F", - 6 => "IB", - 7 => "PIBHEX", - 8 => "P", - 9 => "PIB", - 10 => "PK", - 11 => "RB", - 12 => "RBHEX", - 15 => "Z", - 16 => "N", - 17 => "E", - 20 => "DATE", - 21 => "TIME", - 22 => "DATETIME", - 23 => "ADATE", - 24 => "JDATE", - 25 => "DTIME", - 26 => "WKDAY", - 27 => "MONTH", - 28 => "MOYR", - 29 => "QYR", - 30 => "WKYR", - 31 => "PCT", - 32 => "DOT", - 33 => "CCA", - 34 => "CCB", - 35 => "CCC", - 36 => "CCD", - 37 => "CCE", - 38 => "EDATE", - 39 => "SDATE", - 40 => "MTIME", - 41 => "YMDHMS", - _ => "invalid", - } -} - -fn round_up(x: T, y: T) -> T { - (x + (y - T::one())) / y * y -} - -struct UntypedValue { - raw: [u8; 8], - endianness: Endianness, -} - -impl UntypedValue { - fn new(raw: [u8; 8], endianness: Endianness) -> UntypedValue { - UntypedValue { raw, endianness } - } -} - -impl fmt::Display for UntypedValue { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let numeric: f64 = self.endianness.parse(self.raw); - let n_printable = self - .raw - .iter() - .take_while(|&&x| x == b' ' || x.is_ascii_graphic()) - .count(); - let printable_prefix = std::str::from_utf8(&self.raw[0..n_printable]).unwrap(); - write!(f, "{numeric}/\"{printable_prefix}\"") - } -} - -struct HexFloat(T); - -impl fmt::Display for HexFloat { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let sign = if self.0.is_sign_negative() { "-" } else { "" }; - match self.0.classify() { - FpCategory::Nan => return write!(f, "NaN"), - FpCategory::Infinite => return write!(f, "{sign}Infinity"), - FpCategory::Zero => return write!(f, "{sign}0.0"), - _ => (), - }; - let (significand, mut exponent, _) = self.0.integer_decode(); - let mut hex_sig = format!("{:x}", significand); - while hex_sig.ends_with('0') { - hex_sig.pop(); - exponent += 4; - } - match hex_sig.len() { - 0 => write!(f, "{sign}0.0"), - 1 => write!(f, "{sign}0x{hex_sig}.0p{exponent}"), - len => write!( - f, - "{sign}0x{}.{}p{}", - hex_sig.chars().next().unwrap(), - &hex_sig[1..], - exponent + 4 * (len as i16 - 1) - ), - } - } -} - -#[cfg(test)] -mod hex_float_tests { - use crate::HexFloat; - use num::Float; +fn main() -> Result<()> { + let Args { + max_cases, + files, + mode, + encoding, + } = Args::parse(); - #[test] - fn test() { - assert_eq!(format!("{}", HexFloat(1.0)), "0x1.0p0"); - assert_eq!(format!("{}", HexFloat(123.0)), "0x1.ecp6"); - assert_eq!(format!("{}", HexFloat(1.0 / 16.0)), "0x1.0p-4"); - assert_eq!(format!("{}", HexFloat(f64::infinity())), "Infinity"); - assert_eq!(format!("{}", HexFloat(f64::neg_infinity())), "-Infinity"); - assert_eq!(format!("{}", HexFloat(f64::nan())), "NaN"); - assert_eq!(format!("{}", HexFloat(0.0)), "0.0"); - assert_eq!(format!("{}", HexFloat(f64::neg_zero())), "-0.0"); + for file in files { + dissect(&file, max_cases, mode, encoding)?; } + Ok(()) } -impl Dissector { - fn new>(filename: P) -> Result { - let mut r = BufReader::new(File::open(&filename)?); - let filename = filename.as_ref().to_string_lossy().into_owned(); - let rec_type: [u8; 4] = read_bytes(&mut r)?; - let zmagic = match &rec_type { - b"$FL2" => false, - b"$FL3" => true, - _ => Err(anyhow!("This is not an SPSS system file."))?, - }; - - let eye_catcher: [u8; 60] = read_bytes(&mut r)?; - let layout_code: [u8; 4] = read_bytes(&mut r)?; - let endianness = detect_endianness(layout_code) - .ok_or_else(|| anyhow!("This is not an SPSS system file."))?; - let layout_code: u32 = endianness.parse(layout_code); - let _nominal_case_size: [u8; 4] = read_bytes(&mut r)?; - let compressed: u32 = endianness.parse(read_bytes(&mut r)?); - let compression = match (zmagic, compressed) { - (false, 0) => None, - (false, 1) => Some(Compression::Simple), - (true, 2) => Some(Compression::ZLib), - _ => Err(anyhow!( - "{} file header has invalid compression value {compressed}.", - if zmagic { "ZSAV" } else { "SAV" } - ))?, - }; - - let weight_index: u32 = endianness.parse(read_bytes(&mut r)?); - let n_cases: u32 = endianness.parse(read_bytes(&mut r)?); - - let bias: [u8; 8] = read_bytes(&mut r)?; - let fp_format = detect_fp_format(bias) - .unwrap_or_else(|| { eprintln!("Compression bias is not the usual value of 100, or system file uses unrecognized floating-point format."); endianness }); - let bias: f64 = fp_format.parse(bias); - - let mut d = Dissector { - filename, - r, - compression, - endianness, - fp_format, - bias, - n_variable_records: 0, - n_variables: 0, - var_widths: Vec::new(), - }; - - let creation_date: [u8; 9] = read_bytes(&mut d.r)?; - let creation_time: [u8; 8] = read_bytes(&mut d.r)?; - let file_label: [u8; 64] = read_bytes(&mut d.r)?; - let file_label = trim_end(Vec::from(file_label), b' '); - d.skip_bytes(3)?; - - println!("File header record:"); - println!( - "{:>17}: {}", - "Product name", - String::from_utf8_lossy(&eye_catcher) - ); - println!("{:>17}: {}", "Layout code", layout_code); - println!( - "{:>17}: {} ({})", - "Compressed", - compressed, - match compression { - None => "no compression", - Some(Compression::Simple) => "simple compression", - Some(Compression::ZLib) => "ZLIB compression", - } - ); - println!("{:>17}: {}", "Weight index", weight_index); - println!("{:>17}: {}", "Number of cases", n_cases); - println!("{:>17}: {}", "Compression bias", bias); - println!( - "{:>17}: {}", - "Creation date", - String::from_utf8_lossy(&creation_date) - ); - println!( - "{:>17}: {}", - "Creation time", - String::from_utf8_lossy(&creation_time) - ); - println!( - "{:>17}: \"{}\"", - "File label", - String::from_utf8_lossy(&file_label) - ); - - loop { - let rec_type: u32 = d.read_swap()?; - match rec_type { - 2 => d.read_variable_record()?, - 3 => d.read_value_label_record()?, - 4 => Err(anyhow!("Misplaced type 4 record."))?, - 6 => d.read_document_record()?, - 7 => d.read_extension_record()?, - 999 => break, - _ => Err(anyhow!("Unrecognized record type {rec_type}."))?, - } - } - - let pos = d.r.stream_position()?; - println!( - "{:08x}: end-of-dictionary record (first byte of data at {:0x})", - pos, - pos + 4 - ); - - Ok(d) - } - - fn read_extension_record(&mut self) -> Result<()> { - let offset = self.r.stream_position()?; - let subtype: u32 = self.read_swap()?; - let size: u32 = self.read_swap()?; - let count: u32 = self.read_swap()?; - println!("{offset:08x}: Record 7, subtype {subtype}, size={size}, count={count}"); - match subtype { - 3 => self.read_machine_integer_info(size, count), - 4 => self.read_machine_float_info(size, count), - 5 => self.read_variable_sets(size, count), - 6 => { - // DATE variable information. We don't use it yet, but we should. - Ok(()) - } - 7 | 19 => self.read_mrsets(size, count), - 10 => self.read_extra_product_info(size, count), - 11 => self.read_display_parameters(size, count), - _ => self.read_unknown_extension(subtype, size, count), - } - } - - fn warn(&mut self, s: String) -> Result<()> { - println!( - "\"{}\" near offset 0x{:08x}: {s}", - self.filename, - self.r.stream_position()? - ); - Ok(()) - } - - fn skip_bytes(&mut self, mut n: u64) -> Result<()> { - let mut buf = [0; 1024]; - while n > 0 { - let chunk = u64::min(n, buf.len() as u64); - self.r.read_exact(&mut buf[0..chunk as usize])?; - n -= chunk; - } - Ok(()) - } - - fn read_unknown_extension(&mut self, subtype: u32, size: u32, count: u32) -> Result<()> { - self.warn(format!("Unrecognized record type 7, subtype {subtype}."))?; - if size == 0 || count > 65536 / size { - self.skip_bytes(size as u64 * count as u64)?; - } else if size != 1 { - let mut offset = 0; - for _ in 0..count { - let vec = read_vec(&mut self.r, size as usize)?; - println!( - "{}", - HexViewBuilder::new(&vec).address_offset(offset).finish() - ); - offset += size as usize; - } - } - Ok(()) - } - - fn read_variable_record(&mut self) -> Result<()> { - self.n_variable_records += 1; - println!( - "{:08x}: variable record {}", - self.r.stream_position()?, - self.n_variable_records - ); - let width: i32 = self.read_swap()?; - let has_variable_label: u32 = self.read_swap()?; - let missing_value_code: i32 = self.read_swap()?; - let print_format: u32 = self.read_swap()?; - let write_format: u32 = self.read_swap()?; - let name: [u8; 8] = read_bytes(&mut self.r)?; - let name: Vec = trim_end(Vec::from(name), b'\0'); - - if width >= 0 { - self.n_variables += 1; - } - self.var_widths.push(width); - - println!( - "\tWidth: {width} ({})", - match width { - _ if width > 0 => "string", - _ if width == 0 => "numeric", - _ => "long string continuation record", - } - ); - - println!("\tVariable label: {has_variable_label}"); - println!( - "\tMissing values code: {missing_value_code} ({})", - match missing_value_code { - 0 => "no missing values", - 1 => "one missing value", - 2 => "two missing values", - 3 => "three missing values", - -2 => "one missing value range", - -3 => "one missing value, one range", - _ => "bad value", - } - ); - for (which, format) in [("Print", print_format), ("Worite", write_format)] { - let type_ = format_name(format >> 16); - let w = (format >> 8) & 0xff; - let d = format & 0xff; - println!("\t{which} format: {format:06x} ({type_}{w}.{d})"); - } - println!("\tName: {}", String::from_utf8_lossy(&name)); - - // Read variable label. - match has_variable_label { - 0 => (), - 1 => { - let offset = self.r.stream_position()?; - let len: u32 = self.read_swap()?; - let read_len = len.min(65535) as usize; - let label = read_vec(&mut self.r, read_len)?; - println!( - "\t{offset:08x} Variable label: \"{}\"", - String::from_utf8_lossy(&label) - ); - - self.skip_bytes((round_up(len, 4) - len).into())?; - } - _ => Err(anyhow!("Variable label indicator field is not 0 or 1."))?, - }; - - // Read missing values. - if missing_value_code != 0 { - print!("\t{:08x} Missing values:", self.r.stream_position()?); - match width.cmp(&0) { - Ordering::Equal => { - let (has_range, n_individual) = match missing_value_code { - -3 => (true, 1), - -2 => (true, 0), - 1 | 2 | 3 => (false, missing_value_code), - _ => Err(anyhow!( - "Numeric missing value indicator field is not -3, -2, 0, 1, 2, or 3." - ))?, - }; - if has_range { - let low: f64 = self.read_swap()?; - let high: f64 = self.read_swap()?; - print!(" {low}...{high}"); - } - for _ in 0..n_individual { - let value: f64 = self.read_swap()?; - print!(" {value}"); - } - } - Ordering::Greater => { - if !(0..=3).contains(&missing_value_code) { - Err(anyhow!( - "String missing value indicator field is not 0, 1, 2, or 3." - ))?; - } - for _ in 0..missing_value_code { - let string: [u8; 8] = read_bytes(&mut self.r)?; - let string: Vec = trim_end(Vec::from(string), b'\0'); - println!(" {}", String::from_utf8_lossy(&string)); - } - } - Ordering::Less => (), - } - println!(); - } - - Ok(()) - } - - fn read_value_label_record(&mut self) -> Result<()> { - println!("{:08x}: value labels record", self.r.stream_position()?); - - // Read the labels. - let n_labels: u32 = self.read_swap()?; - for _ in 0..n_labels { - let raw: [u8; 8] = read_bytes(&mut self.r)?; - let value = UntypedValue::new(raw, self.fp_format); - let label_len: u8 = self.read_swap()?; - let padded_len = round_up(label_len as usize + 1, 8); - - let mut label = read_vec(&mut self.r, padded_len)?; - label.truncate(label_len as usize); - let label = String::from_utf8_lossy(&label); - - println!("\t{value}: {label}"); - } - - // Read the type-4 record with the corresponding variable indexes. - let rec_type: u32 = self.read_swap()?; - if rec_type != 4 { - Err(anyhow!( - "Variable index record (type 4) does not immediately \ - follow value label record (type 3) as it should." - ))?; - } - - println!("\t{:08x}: apply to variables", self.r.stream_position()?); - let n_vars: u32 = self.read_swap()?; - for _ in 0..n_vars { - let index: u32 = self.read_swap()?; - print!(" {index}"); - } - println!(); - - Ok(()) - } - - fn read_document_record(&mut self) -> Result<()> { - println!("{:08x}: document record", self.r.stream_position()?); - let n_lines: u32 = self.read_swap()?; - println!("\t{n_lines} lines of documents"); - - for i in 0..n_lines { - print!("\t{:08x}: ", self.r.stream_position()?); - let line: [u8; 64] = read_bytes(&mut self.r)?; - let line = trim_end(Vec::from(line), b' '); - println!("line {i}: \"{}\"", String::from_utf8_lossy(&line)); - } - Ok(()) - } - - fn read_machine_integer_info(&mut self, size: u32, count: u32) -> Result<()> { - let offset = self.r.stream_position()?; - let version_major: u32 = self.read_swap()?; - let version_minor: u32 = self.read_swap()?; - let version_revision: u32 = self.read_swap()?; - let machine_code: u32 = self.read_swap()?; - let float_representation: u32 = self.read_swap()?; - let compression_code: u32 = self.read_swap()?; - let integer_representation: u32 = self.read_swap()?; - let character_code: u32 = self.read_swap()?; - - println!("{offset:08x}: machine integer info"); - if size != 4 || count != 8 { - Err(anyhow!( - "Bad size ({size}) or count ({count}) field on record type 7, subtype 3" - ))?; - } - println!("\tVersion: {version_major}.{version_minor}.{version_revision}"); - println!("\tMachine code: {machine_code}"); - println!( - "\tFloating point representation: {float_representation} ({})", - match float_representation { - 1 => "IEEE 754", - 2 => "IBM 370", - 3 => "DEC VAX", - _ => "unknown", - } - ); - println!("\tCompression code: {compression_code}"); - println!( - "\tEndianness: {integer_representation} ({})", - match integer_representation { - 1 => "big", - 2 => "little", - _ => "unknown", - } - ); - println!("\tCharacter code: {character_code}"); - Ok(()) - } - - fn read_machine_float_info(&mut self, size: u32, count: u32) -> Result<()> { - let offset = self.r.stream_position()?; - let sysmis: f64 = self.read_swap()?; - let highest: f64 = self.read_swap()?; - let lowest: f64 = self.read_swap()?; - - println!("{offset:08x}: machine float info"); - if size != 4 || count != 8 { - Err(anyhow!( - "Bad size ({size}) or count ({count}) field on extension 4." - ))?; - } - - println!("\tsysmis: {sysmis} ({})", HexFloat(sysmis)); - println!("\thighest: {highest} ({})", HexFloat(highest)); - println!("\tlowest: {lowest} ({})", HexFloat(lowest)); - Ok(()) - } +fn dissect(file_name: &Path, max_cases: u64, mode: Mode, encoding: Option<&'static Encoding>) -> Result<()> { + let reader = File::open(file_name)?; + let reader = BufReader::new(reader); + let mut reader = Reader::new(reader)?; - fn read_variable_sets(&mut self, size: u32, count: u32) -> Result<()> { - println!("{:08x}: variable sets", self.r.stream_position()?); - let mut text = self.open_text_record(size, count)?; - loop { - while text.match_byte(b'\n') { - continue; + match mode { + Mode::Identify => { + let Record::Header(header) = reader.next().unwrap()? else { unreachable!() }; + match header.magic { + Magic::Sav => println!("SPSS System File"), + Magic::Zsav => println!("SPSS System File with Zlib compression"), + Magic::Ebcdic => println!("EBCDIC-encoded SPSS System File"), } - let set = match text.tokenize(b'=') { - Some(set) => String::from_utf8_lossy(set).into_owned(), - None => break, - }; - - // Always present even for an empty set. - text.match_byte(b' '); - - match text.tokenize(b'\n') { - None => println!("\tset \"{set}\" is empty"), - Some(variables) => { - println!( - "\tset \"{set}\" contains \"{}\"", - String::from_utf8_lossy(variables).trim_end_matches('\r') - ); - } - }; + return Ok(()) } - Ok(()) - } - - // Read record type 7, subtype 7. - fn read_mrsets(&mut self, size: u32, count: u32) -> Result<()> { - print!("{:08x}: multiple response sets", self.r.stream_position()?); - let mut text = self.open_text_record(size, count)?; - loop { - #[derive(PartialEq, Eq)] - enum MrSet { - MC, - MD, + Mode::Raw => { + let headers: Vec = reader.collect_headers()?; + for header in headers { + println!("{header:?}"); } - - while text.match_byte(b'\n') {} - let Some(name) = text.tokenize(b'=') else { - break; - }; - - let (mrset, cat_label_from_counted_values, label_from_var_label) = if text - .match_byte(b'C') - { - if !text.match_byte(b' ') { - Err(anyhow!( - "missing space following 'C' at offset {} in mrsets record", - text.pos - ))?; - } - (MrSet::MC, false, false) - } else if text.match_byte(b'D') { - (MrSet::MD, false, false) - } else if text.match_byte(b'E') { - if !text.match_byte(b' ') { - Err(anyhow!( - "missing space following 'E' at offset {} in mrsets record", - text.pos - ))?; - } - - let pos = text.pos; - let Some(number) = text.tokenize(b' ') else { - Err(anyhow!( - "Missing label source value following `E' at offset {}u in MRSETS record", - text.pos - ))? - }; - - let label_from_var_label = if number == b"11" { - true - } else if number == b"1" { - false - } else { - Err(anyhow!("Unexpected label source value `{}' following `E' at offset {pos} in MRSETS record", String::from_utf8_lossy(number)))? - }; - (MrSet::MD, true, label_from_var_label) - } else { - Err(anyhow!( - "missing `C', `D', or `E' at offset {} in mrsets record", - text.pos - ))? - }; - - let counted_value = if mrset == MrSet::MD { - Some(text.parse_counted_string()?) - } else { None }; - - let label = text.parse_counted_string()?; - - let variables = text.tokenize(b'\n'); - - print!("\t\"{}\": multiple {} set", - String::from_utf8_lossy(name), - if mrset == MrSet::MC { "category" } else { "dichotomy" }); - } - Ok(()) - } - - fn read_extra_product_info(&mut self, size: u32, count: u32) -> Result<()> { - print!("{:08x}: extra product info", self.r.stream_position()?); - let text = self.open_text_record(size, count)?; - print_string(&text.buffer); - Ok(()) - } - - fn read_display_parameters(&mut self, size: u32, count: u32) -> Result<()> { - println!( - "{:08x}: variable display parameters", - self.r.stream_position()? - ); - if size != 4 { - Err(anyhow!("Bad size ({size}) on extension 11."))?; - } - let n_vars = self.n_variables; - let includes_width = if count as usize == 3 * n_vars { - true - } else if count as usize == 2 * n_vars { - false - } else { - Err(anyhow!( - "Extension 11 has bad count {count} (for {n_vars} variables)." - ))? - }; - - for i in 0..n_vars { - let measure: u32 = self.read_swap()?; - print!( - "\tVar #{i}: measure={measure} ({})", - match measure { - 1 => "nominal", - 2 => "ordinal", - 3 => "scale", - _ => "invalid", - } - ); - - if includes_width { - let width: u32 = self.read_swap()?; - print!(", width={width}"); + Mode::Cooked => { + let headers: Vec = reader.collect_headers()?; + let headers = decode(headers, encoding, &|e| panic!("{e}"))?; + for header in headers { + println!("{header:?}"); } - - let align: u32 = self.read_swap()?; - println!( - ", align={align} ({})", - match align { - 0 => "left", - 1 => "right", - 2 => "centre", - _ => "invalid", - } - ); } - Ok(()) } - fn open_text_record(&mut self, size: u32, count: u32) -> Result { - let n_bytes = match u32::checked_mul(size, count) { - Some(n) => n, - None => Err(anyhow!("Extension record too large."))?, + for _ in 0..max_cases { + let Some(Ok(record)) = reader.next() else { + break; }; - Ok(TextRecord::new(read_vec(&mut self.r, n_bytes as usize)?)) - } -} - -fn print_string(s: &[u8]) { - if s.contains(&b'\0') { - println!("{}", HexView::new(s)); - } else { - for &c in s { - match c { - b'\\' => print!("\\\\"), - b'\n' => println!(), - c if (b' '..=b'~').contains(&c) => print!("{}", c as char), - c => print!("\\{:2x}", c), - } - } - } -} - -struct TextRecord { - buffer: Vec, - pos: usize, -} - -impl TextRecord { - fn new(buffer: Vec) -> TextRecord { - TextRecord { buffer, pos: 0 } - } - - fn tokenize(&mut self, delimiter: u8) -> Option<&[u8]> { - let start = self.pos; - while self.pos < self.buffer.len() - && self.buffer[self.pos] != delimiter - && self.buffer[self.pos] != 0 - { - self.pos += 1 - } - if start == self.pos { - None - } else { - Some(&self.buffer[start..self.pos]) - } - } - - fn match_byte(&mut self, c: u8) -> bool { - if self.pos < self.buffer.len() && self.buffer[self.pos] == c { - self.pos += 1; - true - } else { - false - } - } - - fn parse_usize(&mut self) -> Result { - let n_digits = self.buffer[self.pos..] - .iter() - .take_while(|c| c.is_ascii_digit()) - .count(); - if n_digits == 0 { - Err(anyhow!("expecting digit at offset {} in record", self.pos))?; - } - let start = self.pos; - self.pos += n_digits; - let end = self.pos; - let digits = str::from_utf8(&self.buffer[start..end]).unwrap(); - let Ok(number) = digits.parse::() else { - Err(anyhow!( - "expecting number in [0,{}] at offset {} in record", - usize::MAX, - self.pos - ))? - }; - self.pos = end; - Ok(number) - } - - fn get_n_bytes(&mut self, n: usize) -> Option<(usize, usize)> { - let start = self.pos; - let Some(end) = start.checked_add(n) else { - return None; - }; - self.pos = end; - Some((start, end)) - } - - fn parse_counted_string(&mut self) -> Result<&[u8]> { - let length = self.parse_usize()?; - if !self.match_byte(b' ') { - Err(anyhow!("expecting space at offset {} in record", self.pos))?; - } - - let Some((start, end)) = self.get_n_bytes(length) else { - Err(anyhow!("{length}-byte string starting at offset {} exceeds record length {}", - self.pos, self.buffer.len()))? - }; - if !self.match_byte(b' ') { - Err(anyhow!( - "expecting space at offset {} following {}-byte string", - self.pos, - end - start - ))?; - } - Ok(&self.buffer[start..end]) + println!("{:?}", record); } + Ok(()) }