From: Ben Pfaff Date: Sat, 22 Jul 2023 20:09:36 +0000 (-0700) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0b4388bfca70b1ce3e9daeb00d48681db823a337;p=pspp work --- diff --git a/rust/src/main.rs b/rust/src/main.rs index fbe11c5dfb..9c92b5515d 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -1,28 +1,33 @@ +#![allow(unused_variables)] +#![allow(dead_code)] /* PSPP - a program for statistical analysis. - Copyright (C) 2023 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + * Copyright (C) 2023 Free Software Foundation, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . */ use anyhow::{anyhow, Result}; use clap::Parser; +use hexplay::HexView; use hexplay::HexViewBuilder; use num::{Float, Num}; -use std::{fmt, num::FpCategory}; +use std::cmp::Ordering; use std::fs::File; use std::io::prelude::*; use std::io::BufReader; use std::path::{Path, PathBuf}; +use std::str; +use std::{fmt, num::FpCategory}; /// A utility to dissect SPSS system files. #[derive(Parser, Debug)] @@ -34,7 +39,7 @@ struct Args { /// Files to dissect. #[arg(required = true)] - files: Vec + files: Vec, } fn main() -> Result<()> { @@ -49,13 +54,13 @@ fn main() -> Result<()> { #[derive(Copy, Clone, Debug)] enum Compression { Simple, - ZLib + ZLib, } #[derive(Copy, Clone, Debug)] enum Endianness { BigEndian, - LittleEndian + LittleEndian, } use Endianness::*; @@ -66,7 +71,7 @@ impl Parse for Endianness { fn parse(self, bytes: [u8; 8]) -> u64 { match self { BigEndian => u64::from_be_bytes(bytes), - LittleEndian => u64::from_le_bytes(bytes) + LittleEndian => u64::from_le_bytes(bytes), } } } @@ -74,7 +79,7 @@ impl Parse for Endianness { fn parse(self, bytes: [u8; 4]) -> u32 { match self { BigEndian => u32::from_be_bytes(bytes), - LittleEndian => u32::from_le_bytes(bytes) + LittleEndian => u32::from_le_bytes(bytes), } } } @@ -82,7 +87,7 @@ impl Parse for Endianness { fn parse(self, bytes: [u8; 2]) -> u16 { match self { BigEndian => u16::from_be_bytes(bytes), - LittleEndian => u16::from_le_bytes(bytes) + LittleEndian => u16::from_le_bytes(bytes), } } } @@ -90,7 +95,7 @@ impl Parse for Endianness { fn parse(self, bytes: [u8; 1]) -> u8 { match self { BigEndian => u8::from_be_bytes(bytes), - LittleEndian => u8::from_le_bytes(bytes) + LittleEndian => u8::from_le_bytes(bytes), } } } @@ -98,7 +103,7 @@ impl Parse for Endianness { fn parse(self, bytes: [u8; 8]) -> i64 { match self { BigEndian => i64::from_be_bytes(bytes), - LittleEndian => i64::from_le_bytes(bytes) + LittleEndian => i64::from_le_bytes(bytes), } } } @@ -106,7 +111,7 @@ impl Parse for Endianness { fn parse(self, bytes: [u8; 4]) -> i32 { match self { BigEndian => i32::from_be_bytes(bytes), - LittleEndian => i32::from_le_bytes(bytes) + LittleEndian => i32::from_le_bytes(bytes), } } } @@ -114,7 +119,7 @@ impl Parse for Endianness { fn parse(self, bytes: [u8; 2]) -> i16 { match self { BigEndian => i16::from_be_bytes(bytes), - LittleEndian => i16::from_le_bytes(bytes) + LittleEndian => i16::from_le_bytes(bytes), } } } @@ -122,7 +127,7 @@ impl Parse for Endianness { fn parse(self, bytes: [u8; 1]) -> i8 { match self { BigEndian => i8::from_be_bytes(bytes), - LittleEndian => i8::from_le_bytes(bytes) + LittleEndian => i8::from_le_bytes(bytes), } } } @@ -130,7 +135,7 @@ impl Parse for Endianness { fn parse(self, bytes: [u8; 8]) -> f64 { match self { BigEndian => f64::from_be_bytes(bytes), - LittleEndian => f64::from_le_bytes(bytes) + LittleEndian => f64::from_le_bytes(bytes), } } } @@ -142,11 +147,10 @@ fn read_bytes(r: &mut BufReader) -> Result<[u8; N]> { } fn read_vec(r: &mut BufReader, n: usize) -> Result> { - let mut vec = Vec::with_capacity(n); - vec.resize(n, 0); + let mut vec = vec![0; n]; r.read_exact(&mut vec)?; Ok(vec) -} +} trait ReadSwap { fn read_swap(&mut self) -> Result; @@ -191,7 +195,7 @@ fn detect_endianness(layout_code: [u8; 4]) -> Option { for endianness in [BigEndian, LittleEndian] { match endianness.parse(layout_code) { 2 | 3 => return Some(endianness), - _ => () + _ => (), } } None @@ -201,7 +205,7 @@ fn detect_fp_format(bias: [u8; 8]) -> Option { for endianness in [BigEndian, LittleEndian] { let value: f64 = endianness.parse(bias); if value == 100.0 { - return Some(endianness) + return Some(endianness); } } None @@ -260,18 +264,17 @@ fn format_name(type_: u32) -> &'static str { 39 => "SDATE", 40 => "MTIME", 41 => "YMDHMS", - _ => "invalid" + _ => "invalid", } } -fn round_up(x: T, y: T) -> T -{ +fn round_up(x: T, y: T) -> T { (x + (y - T::one())) / y * y } struct UntypedValue { raw: [u8; 8], - endianness: Endianness + endianness: Endianness, } impl UntypedValue { @@ -283,7 +286,11 @@ impl UntypedValue { impl fmt::Display for UntypedValue { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let numeric: f64 = self.endianness.parse(self.raw); - let n_printable = self.raw.iter().take_while(|&&x| x == b' ' || x.is_ascii_graphic()).count(); + let n_printable = self + .raw + .iter() + .take_while(|&&x| x == b' ' || x.is_ascii_graphic()) + .count(); let printable_prefix = std::str::from_utf8(&self.raw[0..n_printable]).unwrap(); write!(f, "{numeric}/\"{printable_prefix}\"") } @@ -309,10 +316,13 @@ impl fmt::Display for HexFloat { match hex_sig.len() { 0 => write!(f, "{sign}0.0"), 1 => write!(f, "{sign}0x{hex_sig}.0p{exponent}"), - len => write!(f, "{sign}0x{}.{}p{}", - hex_sig.chars().nth(0).unwrap(), - &hex_sig[1..], - exponent + 4 * (len as i16 - 1)) + len => write!( + f, + "{sign}0x{}.{}p{}", + hex_sig.chars().next().unwrap(), + &hex_sig[1..], + exponent + 4 * (len as i16 - 1) + ), } } } @@ -343,7 +353,7 @@ impl Dissector { let zmagic = match &rec_type { b"$FL2" => false, b"$FL3" => true, - _ => Err(anyhow!("This is not an SPSS system file."))? + _ => Err(anyhow!("This is not an SPSS system file."))?, }; let eye_catcher: [u8; 60] = read_bytes(&mut r)?; @@ -357,8 +367,10 @@ impl Dissector { (false, 0) => None, (false, 1) => Some(Compression::Simple), (true, 2) => Some(Compression::ZLib), - _ => Err(anyhow!("{} file header has invalid compression value {compressed}.", - if zmagic { "ZSAV" } else { "SAV" }))?, + _ => Err(anyhow!( + "{} file header has invalid compression value {compressed}.", + if zmagic { "ZSAV" } else { "SAV" } + ))?, }; let weight_index: u32 = endianness.parse(read_bytes(&mut r)?); @@ -388,19 +400,40 @@ impl Dissector { d.skip_bytes(3)?; println!("File header record:"); - println!("{:>17}: {}", "Product name", String::from_utf8_lossy(&eye_catcher)); + println!( + "{:>17}: {}", + "Product name", + String::from_utf8_lossy(&eye_catcher) + ); println!("{:>17}: {}", "Layout code", layout_code); - println!("{:>17}: {} ({})", "Compressed", compressed, match compression { - None => "no compression", - Some(Compression::Simple) => "simple compression", - Some(Compression::ZLib) => "ZLIB compression", - }); + println!( + "{:>17}: {} ({})", + "Compressed", + compressed, + match compression { + None => "no compression", + Some(Compression::Simple) => "simple compression", + Some(Compression::ZLib) => "ZLIB compression", + } + ); println!("{:>17}: {}", "Weight index", weight_index); println!("{:>17}: {}", "Number of cases", n_cases); println!("{:>17}: {}", "Compression bias", bias); - println!("{:>17}: {}", "Creation date", String::from_utf8_lossy(&creation_date)); - println!("{:>17}: {}", "Creation time", String::from_utf8_lossy(&creation_time)); - println!("{:>17}: \"{}\"", "File label", String::from_utf8_lossy(&file_label)); + println!( + "{:>17}: {}", + "Creation date", + String::from_utf8_lossy(&creation_date) + ); + println!( + "{:>17}: {}", + "Creation time", + String::from_utf8_lossy(&creation_time) + ); + println!( + "{:>17}: \"{}\"", + "File label", + String::from_utf8_lossy(&file_label) + ); loop { let rec_type: u32 = d.read_swap()?; @@ -411,12 +444,16 @@ impl Dissector { 6 => d.read_document_record()?, 7 => d.read_extension_record()?, 999 => break, - _ => Err(anyhow!("Unrecognized record type {rec_type}."))? + _ => Err(anyhow!("Unrecognized record type {rec_type}."))?, } } let pos = d.r.stream_position()?; - println!("{:08x}: end-of-dictionary record (first byte of data at {:0x})", pos, pos + 4); + println!( + "{:08x}: end-of-dictionary record (first byte of data at {:0x})", + pos, + pos + 4 + ); Ok(d) } @@ -430,12 +467,24 @@ impl Dissector { match subtype { 3 => self.read_machine_integer_info(size, count), 4 => self.read_machine_float_info(size, count), + 5 => self.read_variable_sets(size, count), + 6 => { + // DATE variable information. We don't use it yet, but we should. + Ok(()) + } + 7 | 19 => self.read_mrsets(size, count), + 10 => self.read_extra_product_info(size, count), + 11 => self.read_display_parameters(size, count), _ => self.read_unknown_extension(subtype, size, count), } } fn warn(&mut self, s: String) -> Result<()> { - println!("\"{}\" near offset 0x{:08x}: {s}", self.filename, self.r.stream_position()?); + println!( + "\"{}\" near offset 0x{:08x}: {s}", + self.filename, + self.r.stream_position()? + ); Ok(()) } @@ -457,7 +506,10 @@ impl Dissector { let mut offset = 0; for _ in 0..count { let vec = read_vec(&mut self.r, size as usize)?; - println!("{}", HexViewBuilder::new(&vec).address_offset(offset).finish()); + println!( + "{}", + HexViewBuilder::new(&vec).address_offset(offset).finish() + ); offset += size as usize; } } @@ -466,7 +518,11 @@ impl Dissector { fn read_variable_record(&mut self) -> Result<()> { self.n_variable_records += 1; - println!("{:08x}: variable record {}", self.r.stream_position()?, self.n_variable_records); + println!( + "{:08x}: variable record {}", + self.r.stream_position()?, + self.n_variable_records + ); let width: i32 = self.read_swap()?; let has_variable_label: u32 = self.read_swap()?; let missing_value_code: i32 = self.read_swap()?; @@ -480,25 +536,29 @@ impl Dissector { } self.var_widths.push(width); - println!("\tWidth: {width} ({})", match width { - _ if width > 0 => "string", - _ if width == 0 => "numeric", - _ => "long string continuation record" - }); + println!( + "\tWidth: {width} ({})", + match width { + _ if width > 0 => "string", + _ if width == 0 => "numeric", + _ => "long string continuation record", + } + ); println!("\tVariable label: {has_variable_label}"); - println!("\tMissing values code: {missing_value_code} ({})", - match missing_value_code { - 0 => "no missing values", - 1 => "one missing value", - 2 => "two missing values", - 3 => "three missing values", - -2 => "one missing value range", - -3 => "one missing value, one range", - _ => "bad value" - }); - for (which, format) in [("Print", print_format), - ("Worite", write_format)] { + println!( + "\tMissing values code: {missing_value_code} ({})", + match missing_value_code { + 0 => "no missing values", + 1 => "one missing value", + 2 => "two missing values", + 3 => "three missing values", + -2 => "one missing value range", + -3 => "one missing value, one range", + _ => "bad value", + } + ); + for (which, format) in [("Print", print_format), ("Worite", write_format)] { let type_ = format_name(format >> 16); let w = (format >> 8) & 0xff; let d = format & 0xff; @@ -514,41 +574,52 @@ impl Dissector { let len: u32 = self.read_swap()?; let read_len = len.min(65535) as usize; let label = read_vec(&mut self.r, read_len)?; - println!("\t{offset:08x} Variable label: \"{}\"", String::from_utf8_lossy(&label)); + println!( + "\t{offset:08x} Variable label: \"{}\"", + String::from_utf8_lossy(&label) + ); self.skip_bytes((round_up(len, 4) - len).into())?; - }, + } _ => Err(anyhow!("Variable label indicator field is not 0 or 1."))?, }; // Read missing values. if missing_value_code != 0 { print!("\t{:08x} Missing values:", self.r.stream_position()?); - if width == 0 { - let (has_range, n_individual) = match missing_value_code { - -3 => (true, 1), - -2 => (true, 0), - 1 | 2 | 3 => (false, missing_value_code), - _ => Err(anyhow!("Numeric missing value indicator field is not -3, -2, 0, 1, 2, or 3."))?, - }; - if has_range { - let low: f64 = self.read_swap()?; - let high: f64 = self.read_swap()?; - print!(" {low}...{high}"); - } - for _ in 0..n_individual { - let value: f64 = self.read_swap()?; - print!(" {value}"); + match width.cmp(&0) { + Ordering::Equal => { + let (has_range, n_individual) = match missing_value_code { + -3 => (true, 1), + -2 => (true, 0), + 1 | 2 | 3 => (false, missing_value_code), + _ => Err(anyhow!( + "Numeric missing value indicator field is not -3, -2, 0, 1, 2, or 3." + ))?, + }; + if has_range { + let low: f64 = self.read_swap()?; + let high: f64 = self.read_swap()?; + print!(" {low}...{high}"); + } + for _ in 0..n_individual { + let value: f64 = self.read_swap()?; + print!(" {value}"); + } } - } else if width > 0 { - if missing_value_code < 1 || missing_value_code > 3 { - Err(anyhow!("String missing value indicator field is not 0, 1, 2, or 3."))?; - } - for _ in 0..missing_value_code { - let string: [u8; 8] = read_bytes(&mut self.r)?; - let string: Vec = trim_end(Vec::from(string), b'\0'); - println!(" {}", String::from_utf8_lossy(&string)); + Ordering::Greater => { + if !(0..=3).contains(&missing_value_code) { + Err(anyhow!( + "String missing value indicator field is not 0, 1, 2, or 3." + ))?; + } + for _ in 0..missing_value_code { + let string: [u8; 8] = read_bytes(&mut self.r)?; + let string: Vec = trim_end(Vec::from(string), b'\0'); + println!(" {}", String::from_utf8_lossy(&string)); + } } + Ordering::Less => (), } println!(); } @@ -577,8 +648,10 @@ impl Dissector { // Read the type-4 record with the corresponding variable indexes. let rec_type: u32 = self.read_swap()?; if rec_type != 4 { - Err(anyhow!("Variable index record (type 4) does not immediately \ - follow value label record (type 3) as it should."))?; + Err(anyhow!( + "Variable index record (type 4) does not immediately \ + follow value label record (type 3) as it should." + ))?; } println!("\t{:08x}: apply to variables", self.r.stream_position()?); @@ -619,24 +692,30 @@ impl Dissector { println!("{offset:08x}: machine integer info"); if size != 4 || count != 8 { - Err(anyhow!("Bad size ({size}) or count ({count}) field on record type 7, subtype 3"))?; + Err(anyhow!( + "Bad size ({size}) or count ({count}) field on record type 7, subtype 3" + ))?; } println!("\tVersion: {version_major}.{version_minor}.{version_revision}"); println!("\tMachine code: {machine_code}"); - println!("\tFloating point representation: {float_representation} ({})", - match float_representation { - 1 => "IEEE 754", - 2 => "IBM 370", - 3 => "DEC VAX", - _ => "unknown" - }); + println!( + "\tFloating point representation: {float_representation} ({})", + match float_representation { + 1 => "IEEE 754", + 2 => "IBM 370", + 3 => "DEC VAX", + _ => "unknown", + } + ); println!("\tCompression code: {compression_code}"); - println!("\tEndianness: {integer_representation} ({})", - match integer_representation { - 1 => "big", - 2 => "little", - _ => "unknown" - }); + println!( + "\tEndianness: {integer_representation} ({})", + match integer_representation { + 1 => "big", + 2 => "little", + _ => "unknown", + } + ); println!("\tCharacter code: {character_code}"); Ok(()) } @@ -649,7 +728,9 @@ impl Dissector { println!("{offset:08x}: machine float info"); if size != 4 || count != 8 { - Err(anyhow!("Bad size ({size}) or count ({count}) field on extension 4."))?; + Err(anyhow!( + "Bad size ({size}) or count ({count}) field on extension 4." + ))?; } println!("\tsysmis: {sysmis} ({})", HexFloat(sysmis)); @@ -666,7 +747,7 @@ impl Dissector { continue; } let set = match text.tokenize(b'=') { - Some(set) => String::from_utf8_lossy(&set).into_owned(), + Some(set) => String::from_utf8_lossy(set).into_owned(), None => break, }; @@ -676,9 +757,86 @@ impl Dissector { match text.tokenize(b'\n') { None => println!("\tset \"{set}\" is empty"), Some(variables) => { - println!("\tset \"{set}\" contains \"{}\"", String::from_utf8_lossy(variables).trim_end_matches('\r')); - }, + println!( + "\tset \"{set}\" contains \"{}\"", + String::from_utf8_lossy(variables).trim_end_matches('\r') + ); + } }; + } + Ok(()) + } + + // Read record type 7, subtype 7. + fn read_mrsets(&mut self, size: u32, count: u32) -> Result<()> { + print!("{:08x}: multiple response sets", self.r.stream_position()?); + let mut text = self.open_text_record(size, count)?; + loop { + #[derive(PartialEq, Eq)] + enum MrSet { + MC, + MD, + } + + while text.match_byte(b'\n') {} + let Some(name) = text.tokenize(b'=') else { + break; + }; + + let (mrset, cat_label_from_counted_values, label_from_var_label) = if text + .match_byte(b'C') + { + if !text.match_byte(b' ') { + Err(anyhow!( + "missing space following 'C' at offset {} in mrsets record", + text.pos + ))?; + } + (MrSet::MC, false, false) + } else if text.match_byte(b'D') { + (MrSet::MD, false, false) + } else if text.match_byte(b'E') { + if !text.match_byte(b' ') { + Err(anyhow!( + "missing space following 'E' at offset {} in mrsets record", + text.pos + ))?; + } + + let pos = text.pos; + let Some(number) = text.tokenize(b' ') else { + Err(anyhow!( + "Missing label source value following `E' at offset {}u in MRSETS record", + text.pos + ))? + }; + + let label_from_var_label = if number == b"11" { + true + } else if number == b"1" { + false + } else { + Err(anyhow!("Unexpected label source value `{}' following `E' at offset {pos} in MRSETS record", String::from_utf8_lossy(number)))? + }; + (MrSet::MD, true, label_from_var_label) + } else { + Err(anyhow!( + "missing `C', `D', or `E' at offset {} in mrsets record", + text.pos + ))? + }; + + let counted_value = if mrset == MrSet::MD { + Some(text.parse_counted_string()?) + } else { None }; + + let label = text.parse_counted_string()?; + + let variables = text.tokenize(b'\n'); + + print!("\t\"{}\": multiple {} set", + String::from_utf8_lossy(name), + if mrset == MrSet::MC { "category" } else { "dichotomy" }); } Ok(()) @@ -686,22 +844,88 @@ impl Dissector { fn read_extra_product_info(&mut self, size: u32, count: u32) -> Result<()> { print!("{:08x}: extra product info", self.r.stream_position()?); - let mut text = self.open_text_record(size, count)?; + let text = self.open_text_record(size, count)?; + print_string(&text.buffer); + Ok(()) + } + fn read_display_parameters(&mut self, size: u32, count: u32) -> Result<()> { + println!( + "{:08x}: variable display parameters", + self.r.stream_position()? + ); + if size != 4 { + Err(anyhow!("Bad size ({size}) on extension 11."))?; + } + let n_vars = self.n_variables; + let includes_width = if count as usize == 3 * n_vars { + true + } else if count as usize == 2 * n_vars { + false + } else { + Err(anyhow!( + "Extension 11 has bad count {count} (for {n_vars} variables)." + ))? + }; + + for i in 0..n_vars { + let measure: u32 = self.read_swap()?; + print!( + "\tVar #{i}: measure={measure} ({})", + match measure { + 1 => "nominal", + 2 => "ordinal", + 3 => "scale", + _ => "invalid", + } + ); + + if includes_width { + let width: u32 = self.read_swap()?; + print!(", width={width}"); + } + + let align: u32 = self.read_swap()?; + println!( + ", align={align} ({})", + match align { + 0 => "left", + 1 => "right", + 2 => "centre", + _ => "invalid", + } + ); + } + Ok(()) } fn open_text_record(&mut self, size: u32, count: u32) -> Result { let n_bytes = match u32::checked_mul(size, count) { Some(n) => n, - None => Err(anyhow!("Extension record too large."))? + None => Err(anyhow!("Extension record too large."))?, }; Ok(TextRecord::new(read_vec(&mut self.r, n_bytes as usize)?)) } } +fn print_string(s: &[u8]) { + if s.contains(&b'\0') { + println!("{}", HexView::new(s)); + } else { + for &c in s { + match c { + b'\\' => print!("\\\\"), + b'\n' => println!(), + c if (b' '..=b'~').contains(&c) => print!("{}", c as char), + c => print!("\\{:2x}", c), + } + } + } +} + struct TextRecord { buffer: Vec, - pos: usize + pos: usize, } impl TextRecord { @@ -709,9 +933,12 @@ impl TextRecord { TextRecord { buffer, pos: 0 } } - fn tokenize<'a>(&'a mut self, delimiter: u8) -> Option<&'a [u8]> { - let mut start = self.pos; - while self.pos < self.buffer.len() && self.buffer[self.pos] != delimiter && self.buffer[self.pos] != 0 { + fn tokenize(&mut self, delimiter: u8) -> Option<&[u8]> { + let start = self.pos; + while self.pos < self.buffer.len() + && self.buffer[self.pos] != delimiter + && self.buffer[self.pos] != 0 + { self.pos += 1 } if start == self.pos { @@ -729,4 +956,56 @@ impl TextRecord { false } } + + fn parse_usize(&mut self) -> Result { + let n_digits = self.buffer[self.pos..] + .iter() + .take_while(|c| c.is_ascii_digit()) + .count(); + if n_digits == 0 { + Err(anyhow!("expecting digit at offset {} in record", self.pos))?; + } + let start = self.pos; + self.pos += n_digits; + let end = self.pos; + let digits = str::from_utf8(&self.buffer[start..end]).unwrap(); + let Ok(number) = digits.parse::() else { + Err(anyhow!( + "expecting number in [0,{}] at offset {} in record", + usize::MAX, + self.pos + ))? + }; + self.pos = end; + Ok(number) + } + + fn get_n_bytes(&mut self, n: usize) -> Option<(usize, usize)> { + let start = self.pos; + let Some(end) = start.checked_add(n) else { + return None; + }; + self.pos = end; + Some((start, end)) + } + + fn parse_counted_string(&mut self) -> Result<&[u8]> { + let length = self.parse_usize()?; + if !self.match_byte(b' ') { + Err(anyhow!("expecting space at offset {} in record", self.pos))?; + } + + let Some((start, end)) = self.get_n_bytes(length) else { + Err(anyhow!("{length}-byte string starting at offset {} exceeds record length {}", + self.pos, self.buffer.len()))? + }; + if !self.match_byte(b' ') { + Err(anyhow!( + "expecting space at offset {} following {}-byte string", + self.pos, + end - start + ))?; + } + Ok(&self.buffer[start..end]) + } } diff --git a/utilities/pspp-dump-sav.c b/utilities/pspp-dump-sav.c index 7c261a93ba..3d85b9b089 100644 --- a/utilities/pspp-dump-sav.c +++ b/utilities/pspp-dump-sav.c @@ -911,7 +911,7 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count) includes_width = false; else { - sys_warn (r, "Extension 11 has bad count %zu (for %zu variables.", + sys_warn (r, "Extension 11 has bad count %zu (for %zu variables).", count, n_vars); skip_bytes (r, size * count); return;