From a8331d2f67af24ce1f9f5da99641b8d1cdc21300 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 15 Nov 2023 17:39:29 -0800 Subject: [PATCH] Works for at least one test file now --- rust/src/cooked.rs | 318 ++++++++++++++++++++++++++++++++++--------- rust/src/encoding.rs | 36 +++-- rust/src/endian.rs | 5 +- rust/src/format.rs | 3 +- rust/src/lib.rs | 2 +- rust/src/main.rs | 64 +++++++-- rust/src/raw.rs | 80 +++++------ 7 files changed, 369 insertions(+), 139 deletions(-) diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index 1749ecc301..2e67965e41 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -1,7 +1,7 @@ use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat}; use crate::{ - encoding::{get_encoding, Error as EncodingError, default_encoding}, + encoding::{default_encoding, get_encoding, Error as EncodingError}, endian::Endian, format::{Error as FormatError, Spec, UncheckedSpec}, identifier::{Error as IdError, Identifier}, @@ -144,6 +144,16 @@ pub enum Error { #[error("Invalid variable name in attribute record. {0}")] InvalidAttributeVariableName(IdError), + // XXX This is risky because `text` might be arbitarily long. + #[error("Text string contains invalid bytes for {encoding} encoding: {text}")] + MalformedString { encoding: String, text: String }, + + #[error("Invalid variable measurement level value {0}")] + InvalidMeasurement(u32), + + #[error("Invalid variable display alignment value {0}")] + InvalidAlignment(u32), + #[error("Details TBD")] TBD, } @@ -199,7 +209,11 @@ pub struct Decoder { n_generated_names: usize, } -pub fn decode(headers: Vec, warn: &impl Fn(Error)) -> Result, Error> { +pub fn decode( + headers: Vec, + encoding: Option<&'static Encoding>, + warn: &impl Fn(Error), +) -> Result, Error> { let Some(header_record) = headers.iter().find_map(|rec| { if let raw::Record::Header(header) = rec { Some(header) @@ -209,31 +223,36 @@ pub fn decode(headers: Vec, warn: &impl Fn(Error)) -> Result encoding, - Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)), - Err(err) => { - warn(Error::EncodingError(err)); - // Warn that we're using the default encoding. - default_encoding() + let encoding = match encoding { + Some(encoding) => encoding, + None => { + let encoding = headers.iter().find_map(|rec| { + if let raw::Record::Encoding(ref e) = rec { + Some(e.0.as_str()) + } else { + None + } + }); + let character_code = headers.iter().find_map(|rec| { + if let raw::Record::IntegerInfo(ref r) = rec { + Some(r.character_code) + } else { + None + } + }); + match get_encoding(encoding, character_code) { + Ok(encoding) => encoding, + Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)), + Err(err) => { + warn(Error::EncodingError(err)); + // Warn that we're using the default encoding. + default_encoding() + } + } } }; - let decoder = Decoder { + let mut decoder = Decoder { compression: header_record.compression, endian: header_record.endian, encoding, @@ -243,7 +262,99 @@ pub fn decode(headers: Vec, warn: &impl Fn(Error)) -> Result { + if let Some(header) = HeaderRecord::try_decode(&mut decoder, input, warn)? { + output.push(Record::Header(header)) + } + } + raw::Record::Variable(ref input) => { + if let Some(variable) = VariableRecord::try_decode(&mut decoder, input, warn)? { + output.push(Record::Variable(variable)); + } + } + raw::Record::ValueLabel(ref input) => { + if let Some(value_label) = ValueLabelRecord::try_decode(&mut decoder, input, warn)? + { + output.push(Record::ValueLabel(value_label)); + } + } + raw::Record::Document(ref input) => { + if let Some(document) = DocumentRecord::try_decode(&mut decoder, input, warn)? { + output.push(Record::Document(document)) + } + } + raw::Record::IntegerInfo(ref input) => output.push(Record::IntegerInfo(input.clone())), + raw::Record::FloatInfo(ref input) => output.push(Record::FloatInfo(input.clone())), + raw::Record::VariableSets(ref input) => { + let s = decoder.decode_string_cow(&input.text.0, warn); + output.push(Record::VariableSets(VariableSetRecord::parse(&s, warn)?)); + } + raw::Record::VarDisplay(ref input) => { + if let Some(vdr) = VarDisplayRecord::try_decode(&mut decoder, input, warn)? { + output.push(Record::VarDisplay(vdr)) + } + } + raw::Record::MultipleResponse(ref input) => { + if let Some(mrr) = MultipleResponseRecord::try_decode(&mut decoder, input, warn)? { + output.push(Record::MultipleResponse(mrr)) + } + } + raw::Record::LongStringValueLabels(ref input) => { + if let Some(mrr) = + LongStringValueLabelRecord::try_decode(&mut decoder, input, warn)? + { + output.push(Record::LongStringValueLabels(mrr)) + } + } + raw::Record::Encoding(ref input) => output.push(Record::Encoding(input.clone())), + raw::Record::NumberOfCases(ref input) => { + output.push(Record::NumberOfCases(input.clone())) + } + raw::Record::ProductInfo(ref input) => { + let s = decoder.decode_string_cow(&input.text.0, warn); + output.push(Record::ProductInfo(ProductInfoRecord::parse(&s, warn)?)); + } + raw::Record::LongNames(ref input) => { + let s = decoder.decode_string_cow(&input.text.0, warn); + output.push(Record::LongNames(LongNameRecord::parse( + &mut decoder, + &s, + warn, + )?)); + } + raw::Record::VeryLongStrings(ref input) => { + let s = decoder.decode_string_cow(&input.text.0, warn); + output.push(Record::VeryLongStrings(VeryLongStringRecord::parse( + &mut decoder, + &s, + warn, + )?)); + } + raw::Record::FileAttributes(ref input) => { + let s = decoder.decode_string_cow(&input.text.0, warn); + output.push(Record::FileAttributes(FileAttributeRecord::parse( + &decoder, &s, warn, + )?)); + } + raw::Record::VariableAttributes(ref input) => { + let s = decoder.decode_string_cow(&input.text.0, warn); + output.push(Record::VariableAttributes(VariableAttributeRecord::parse( + &decoder, &s, warn, + )?)); + } + raw::Record::OtherExtension(ref input) => { + output.push(Record::OtherExtension(input.clone())) + } + raw::Record::EndOfHeaders(_) => (), + raw::Record::ZHeader(_) => (), + raw::Record::ZTrailer(_) => (), + raw::Record::Case(_) => (), + }; + } + Ok(output) } impl Decoder { @@ -261,7 +372,10 @@ impl Decoder { fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> { let (output, malformed) = self.encoding.decode_without_bom_handling(input); if malformed { - warn(Error::TBD); + warn(Error::MalformedString { + encoding: self.encoding.name().into(), + text: output.clone().into(), + }); } output } @@ -277,14 +391,14 @@ impl Decoder { Identifier::new(&s, self.encoding) } fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> { - let max_index = self.n_dict_indexes - 1; - if dict_index == 0 || dict_index as usize > max_index { + let max_index = self.n_dict_indexes; + if dict_index == 0 || dict_index > max_index { return Err(Error::InvalidDictIndex { dict_index, max_index, }); } - let Some(variable) = self.variables.get(&dict_index) else { + let Some(variable) = self.variables.get(&(dict_index - 1)) else { return Err(Error::DictIndexIsContinuation(dict_index)); }; Ok(variable) @@ -328,10 +442,10 @@ impl Decoder { pub trait TryDecode: Sized { type Input; fn try_decode( - decoder: &Decoder, + decoder: &mut Decoder, input: &Self::Input, warn: impl Fn(Error), - ) -> Result; + ) -> Result, Error>; } pub trait Decode: Sized { @@ -353,23 +467,29 @@ pub struct HeaderRecord { pub file_label: String, } +fn trim_end_spaces(mut s: String) -> String { + s.truncate(s.trim_end_matches(' ').len()); + s +} + impl TryDecode for HeaderRecord { type Input = crate::raw::HeaderRecord; fn try_decode( - decoder: &Decoder, + decoder: &mut Decoder, input: &Self::Input, warn: impl Fn(Error), - ) -> Result { - let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn); - let file_label = decoder.decode_string(&input.file_label.0, &warn); + ) -> Result, Error> { + let eye_catcher = trim_end_spaces(decoder.decode_string(&input.eye_catcher.0, &warn)); + let file_label = trim_end_spaces(decoder.decode_string(&input.file_label.0, &warn)); let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn); - let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| { - warn(Error::InvalidCreationDate { - creation_date: creation_date.into(), + let creation_date = + NaiveDate::parse_from_str(&creation_date, "%e %b %Y").unwrap_or_else(|_| { + warn(Error::InvalidCreationDate { + creation_date: creation_date.into(), + }); + Default::default() }); - Default::default() - }); let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn); let creation_time = NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| { @@ -378,13 +498,13 @@ impl TryDecode for HeaderRecord { }); Default::default() }); - Ok(HeaderRecord { + Ok(Some(HeaderRecord { eye_catcher, weight_index: input.weight_index.map(|n| n as usize), n_cases: input.n_cases.map(|n| n as u64), creation: NaiveDateTime::new(creation_date, creation_time), file_label, - }) + })) } } @@ -473,8 +593,10 @@ fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatErro }) } -impl VariableRecord { - pub fn decode( +impl TryDecode for VariableRecord { + type Input = raw::VariableRecord; + + fn try_decode( decoder: &mut Decoder, input: &crate::raw::VariableRecord, warn: impl Fn(Error), @@ -490,7 +612,8 @@ impl VariableRecord { }) } }; - let name = match decoder.decode_identifier(&input.name.0, &warn) { + let name = trim_end_spaces(decoder.decode_string(&input.name.0, &warn)); + let name = match Identifier::new(&name, decoder.encoding) { Ok(name) => { if !decoder.var_names.contains_key(&name) { name @@ -564,17 +687,17 @@ impl TryDecode for DocumentRecord { type Input = crate::raw::DocumentRecord; fn try_decode( - decoder: &Decoder, + decoder: &mut Decoder, input: &Self::Input, warn: impl Fn(Error), - ) -> Result { - Ok(DocumentRecord( + ) -> Result, Error> { + Ok(Some(DocumentRecord( input .lines .iter() - .map(|s| decoder.decode_string(&s.0, &warn)) + .map(|s| trim_end_spaces(decoder.decode_string(&s.0, &warn))) .collect(), - )) + ))) } } @@ -646,14 +769,14 @@ pub struct ValueLabelRecord { pub variables: Vec, } -impl ValueLabelRecord { - pub fn decode( +impl TryDecode for ValueLabelRecord { + type Input = crate::raw::ValueLabelRecord; + fn try_decode( decoder: &mut Decoder, - raw_value_label: &crate::raw::ValueLabelRecord, - dict_indexes: &crate::raw::VarIndexRecord, + input: &Self::Input, warn: impl Fn(Error), ) -> Result, Error> { - let variables: Vec<&Variable> = dict_indexes + let variables: Vec<&Variable> = input .dict_indexes .iter() .filter_map(|&dict_index| { @@ -690,7 +813,7 @@ impl ValueLabelRecord { return Ok(None); } } - let labels = raw_value_label + let labels = input .labels .iter() .map(|(value, label)| { @@ -960,6 +1083,18 @@ pub enum Measure { Scale, } +impl Measure { + fn try_decode(source: u32) -> Result, Error> { + match source { + 0 => Ok(None), + 1 => Ok(Some(Measure::Nominal)), + 2 => Ok(Some(Measure::Ordinal)), + 3 => Ok(Some(Measure::Scale)), + _ => Err(Error::InvalidMeasurement(source)), + } + } +} + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Alignment { Left, @@ -967,16 +1102,67 @@ pub enum Alignment { Center, } +impl Alignment { + fn try_decode(source: u32) -> Result, Error> { + match source { + 0 => Ok(None), + 1 => Ok(Some(Alignment::Left)), + 2 => Ok(Some(Alignment::Right)), + 3 => Ok(Some(Alignment::Center)), + _ => Err(Error::InvalidAlignment(source)), + } + } +} + #[derive(Clone, Debug)] pub struct VarDisplay { pub measure: Option, - pub width: u32, - pub align: Option, + pub width: Option, + pub alignment: Option, } #[derive(Clone, Debug)] pub struct VarDisplayRecord(pub Vec); +impl TryDecode for VarDisplayRecord { + type Input = raw::VarDisplayRecord; + fn try_decode( + decoder: &mut Decoder, + input: &Self::Input, + warn: impl Fn(Error), + ) -> Result, Error> { + let n_vars = decoder.variables.len(); + let n_per_var = if input.0.len() == 3 * n_vars { + 3 + } else if input.0.len() == 2 * n_vars { + 2 + } else { + return Err(Error::TBD); + }; + + let var_displays = input + .0 + .chunks(n_per_var) + .map(|chunk| { + let (measure, width, alignment) = match n_per_var == 3 { + true => (chunk[0], Some(chunk[1]), chunk[2]), + false => (chunk[0], None, chunk[1]), + }; + let measure = Measure::try_decode(measure).warn_on_error(&warn).flatten(); + let alignment = Alignment::try_decode(alignment) + .warn_on_error(&warn) + .flatten(); + VarDisplay { + measure, + width, + alignment, + } + }) + .collect(); + Ok(Some(VarDisplayRecord(var_displays))) + } +} + #[derive(Clone, Debug)] pub enum MultipleResponseType { MultipleDichotomy { @@ -1109,10 +1295,10 @@ impl TryDecode for MultipleResponseRecord { type Input = raw::MultipleResponseRecord; fn try_decode( - decoder: &Decoder, + decoder: &mut Decoder, input: &Self::Input, warn: impl Fn(Error), - ) -> Result { + ) -> Result, Error> { let mut sets = Vec::with_capacity(input.0.len()); for set in &input.0 { match MultipleResponseSet::decode(decoder, set, &warn) { @@ -1120,7 +1306,7 @@ impl TryDecode for MultipleResponseRecord { Err(error) => warn(error), } } - Ok(MultipleResponseRecord(sets)) + Ok(Some(MultipleResponseRecord(sets))) } } @@ -1137,8 +1323,8 @@ impl LongStringValueLabels { input: &raw::LongStringValueLabels, warn: &impl Fn(Error), ) -> Result { - let var_name = decoder - .decode_identifier(&input.var_name.0, warn) + let var_name = decoder.decode_string(&input.var_name.0, warn); + let var_name = Identifier::new(var_name.trim_end(), decoder.encoding) .map_err(|e| Error::InvalidLongStringValueLabelName(e))?; let min_width = 9; @@ -1175,10 +1361,10 @@ impl TryDecode for LongStringValueLabelRecord { type Input = raw::LongStringValueLabelRecord; fn try_decode( - decoder: &Decoder, + decoder: &mut Decoder, input: &Self::Input, warn: impl Fn(Error), - ) -> Result { + ) -> Result, Error> { let mut labels = Vec::with_capacity(input.0.len()); for label in &input.0 { match LongStringValueLabels::decode(decoder, label, &warn) { @@ -1186,7 +1372,7 @@ impl TryDecode for LongStringValueLabelRecord { Err(error) => warn(error), } } - Ok(LongStringValueLabelRecord(labels)) + Ok(Some(LongStringValueLabelRecord(labels))) } } diff --git a/rust/src/encoding.rs b/rust/src/encoding.rs index d135b8e9e6..8fd13f3ea3 100644 --- a/rust/src/encoding.rs +++ b/rust/src/encoding.rs @@ -1,3 +1,4 @@ +use crate::locale_charset::locale_charset; use encoding_rs::{Encoding, UTF_8}; include!(concat!(env!("OUT_DIR"), "/encodings.rs")); @@ -10,7 +11,6 @@ pub fn codepage_from_encoding(encoding: &str) -> Option { use thiserror::Error as ThisError; -use crate::locale_charset::locale_charset; #[derive(ThisError, Debug)] pub enum Error { #[error("This system file does not indicate its own character encoding. For best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")] @@ -19,40 +19,46 @@ pub enum Error { #[error("This system file encodes text strings with unknown code page {0}.")] UnknownCodepage(i32), + #[error("This system file encodes text strings with unknown encoding {0}.")] + UnknownEncoding(String), + #[error("This system file is encoded in EBCDIC, which is not supported.")] Ebcdic, } - pub fn default_encoding() -> &'static Encoding { lazy_static! { - static ref DEFAULT_ENCODING: &'static Encoding = { - Encoding::for_label(locale_charset()).unwrap_or(&UTF_8) - }; + static ref DEFAULT_ENCODING: &'static Encoding = + Encoding::for_label(locale_charset().as_bytes()).unwrap_or(&UTF_8); } - DEFAULT_ENCODING + &DEFAULT_ENCODING } -pub fn get_encoding(encoding: Option<&str>, character_code: Option) -> Result<&str, Error> { - if let Some(encoding) = encoding { - Ok(encoding) +pub fn get_encoding( + encoding: Option<&str>, + character_code: Option, +) -> Result<&'static Encoding, Error> { + let label = if let Some(encoding) = encoding { + encoding } else if let Some(codepage) = character_code { match codepage { - 1 => Err(Error::Ebcdic), + 1 => return Err(Error::Ebcdic), 2 | 3 => { // These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic] // respectively. However, many files have character code 2 but // data which are clearly not ASCII. Therefore, ignore these // values. - Err(Error::NoEncoding) + return Err(Error::NoEncoding); } - 4 => Ok("MS_KANJI"), + 4 => "MS_KANJI", _ => CODEPAGE_NUMBER_TO_NAME .get(&codepage) .copied() - .ok_or(Error::UnknownCodepage(codepage)), + .ok_or(Error::UnknownCodepage(codepage))?, } } else { - Err(Error::NoEncoding) - } + return Err(Error::NoEncoding); + }; + + Ok(Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into()))?) } diff --git a/rust/src/endian.rs b/rust/src/endian.rs index bb63ec518d..3692180dba 100644 --- a/rust/src/endian.rs +++ b/rust/src/endian.rs @@ -19,7 +19,7 @@ impl Endian { match (as_big == expected_value, as_little == expected_value) { (true, false) => Some(Endian::Big), (false, true) => Some(Endian::Little), - _ => None + _ => None, } } @@ -29,7 +29,7 @@ impl Endian { match (as_big == expected_value, as_little == expected_value) { (true, false) => Some(Endian::Big), (false, true) => Some(Endian::Little), - _ => None + _ => None, } } } @@ -161,4 +161,3 @@ impl Parse for Endian { } } } - diff --git a/rust/src/format.rs b/rust/src/format.rs index 34798ed65a..9f285a9dd6 100644 --- a/rust/src/format.rs +++ b/rust/src/format.rs @@ -428,8 +428,7 @@ impl Spec { /// width `var_width`. pub fn check_width_compatibility(self, var_width: VarWidth) -> Result { // Verify that the format is right for the variable's type. - self.format - .check_type_compatibility(var_width.into())?; + self.format.check_type_compatibility(var_width.into())?; if let VarWidth::String(w) = var_width { if var_width != self.var_width() { diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 3eb4bbae4e..86422046bb 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -3,6 +3,6 @@ pub mod encoding; pub mod endian; pub mod format; pub mod identifier; +pub mod locale_charset; pub mod raw; pub mod sack; -pub mod locale_charset; diff --git a/rust/src/main.rs b/rust/src/main.rs index 8085957daa..2251b760fe 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -15,12 +15,15 @@ * along with this program. If not, see . */ use anyhow::Result; -use clap::Parser; -use pspp::{raw::{Reader, Record}, locale_charset::locale_charset}; +use clap::{Parser, ValueEnum}; +use encoding_rs::Encoding; +use pspp::cooked::decode; +use pspp::raw::{Reader, Record}; use std::fs::File; use std::io::BufReader; use std::path::{Path, PathBuf}; use std::str; +use thiserror::Error as ThisError; /// A utility to dissect SPSS system files. #[derive(Parser, Debug)] @@ -33,29 +36,66 @@ struct Args { /// Files to dissect. #[arg(required = true)] files: Vec, + + /// How to dissect the file. + #[arg(short, long, value_enum, default_value_t)] + mode: Mode, + + /// The encoding to use. + #[arg(long, value_parser = parse_encoding)] + encoding: Option<&'static Encoding>, +} + +#[derive(ThisError, Debug)] +#[error("{0}: unknown encoding")] +struct UnknownEncodingError(String); + +fn parse_encoding(arg: &str) -> Result<&'static Encoding, UnknownEncodingError> { + match Encoding::for_label_no_replacement(arg.as_bytes()) { + Some(encoding) => Ok(encoding), + None => Err(UnknownEncodingError(arg.to_string())), + } +} + +#[derive(Clone, Copy, Debug, Default, ValueEnum)] +enum Mode { + Raw, + #[default] + Cooked, } fn main() -> Result<()> { - println!("locale_charset={}", locale_charset()); - let Args { max_cases, files } = Args::parse(); + let Args { + max_cases, + files, + mode, + encoding, + } = Args::parse(); for file in files { - dissect(&file, max_cases)?; + dissect(&file, max_cases, mode, encoding)?; } Ok(()) } -fn dissect(file_name: &Path, max_cases: u64) -> Result<()> { +fn dissect(file_name: &Path, max_cases: u64, mode: Mode, encoding: Option<&'static Encoding>) -> Result<()> { let reader = File::open(file_name)?; let reader = BufReader::new(reader); let mut reader = Reader::new(reader)?; - let records: Vec = reader.collect_headers()?; - for record in records { - println!("{record:?}"); - if let Record::EndOfHeaders(_) = record { - break; - }; + let headers: Vec = reader.collect_headers()?; + match mode { + Mode::Raw => { + for header in headers { + println!("{header:?}"); + } + } + Mode::Cooked => { + let headers = decode(headers, encoding, &|e| panic!("{e}"))?; + for header in headers { + println!("{header:?}"); + } + } } for _ in 0..max_cases { diff --git a/rust/src/raw.rs b/rust/src/raw.rs index a8c7c858e9..ac8b3a0570 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -55,6 +55,9 @@ pub enum Error { #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")] BadNumberOfValueLabels { offset: u64, n: u32, max: u32 }, + #[error("At offset {offset:#x}, following value label record, found record type {rec_type} instead of expected type 4 for variable index record")] + ExpectedVarIndexRecord { offset: u64, rec_type: u32 }, + #[error("At offset {offset:#x}, number of variables indexes ({n}) is greater than the maximum number ({max}).")] BadNumberOfVarIndexes { offset: u64, n: u32, max: u32 }, @@ -130,7 +133,6 @@ pub enum Record { Header(HeaderRecord), Variable(VariableRecord), ValueLabel(ValueLabelRecord), - VarIndexes(VarIndexRecord), Document(DocumentRecord), IntegerInfo(IntegerInfoRecord), FloatInfo(FloatInfoRecord), @@ -158,7 +160,6 @@ impl Record { match rec_type { 2 => Ok(Record::Variable(VariableRecord::read(reader, endian)?)), 3 => Ok(Record::ValueLabel(ValueLabelRecord::read(reader, endian)?)), - 4 => Ok(Record::VarIndexes(VarIndexRecord::read(reader, endian)?)), 6 => Ok(Record::Document(DocumentRecord::read(reader, endian)?)), 7 => Ok(Extension::read(reader, endian)?), 999 => Ok(Record::EndOfHeaders(endian.parse(read_bytes(reader)?))), @@ -997,34 +998,50 @@ impl Debug for UnencodedStr { #[derive(Clone)] pub struct ValueLabelRecord { - /// Offset from the start of the file to the start of the record. - pub offset: u64, + /// Offset from the start of the file to the start of the value label + /// record. + pub label_offset: u64, /// The labels. pub labels: Vec<(UntypedValue, UnencodedString)>, + + /// Offset from the start of the file to the start of the variable index + /// record. + pub index_offset: u64, + + /// The 1-based indexes of the variable indexes. + pub dict_indexes: Vec, } impl Debug for ValueLabelRecord { fn fmt(&self, f: &mut Formatter) -> FmtResult { + writeln!(f, "labels: ")?; for (value, label) in self.labels.iter() { writeln!(f, "{value:?}: {label:?}")?; } + write!(f, "apply to variables")?; + for dict_index in self.dict_indexes.iter() { + write!(f, " #{dict_index}")?; + } Ok(()) } } impl ValueLabelRecord { /// Maximum number of value labels in a record. - pub const MAX: u32 = u32::MAX / 8; + pub const MAX_LABELS: u32 = u32::MAX / 8; + + /// Maximum number of variable indexes in a record. + pub const MAX_INDEXES: u32 = u32::MAX / 8; fn read(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; + let label_offset = r.stream_position()?; let n: u32 = endian.parse(read_bytes(r)?); - if n > ValueLabelRecord::MAX { + if n > Self::MAX_LABELS { return Err(Error::BadNumberOfValueLabels { - offset, + offset: label_offset, n, - max: ValueLabelRecord::MAX, + max: Self::MAX_LABELS, }); } @@ -1039,41 +1056,22 @@ impl ValueLabelRecord { label.truncate(label_len); labels.push((value, UnencodedString(label))); } - Ok(ValueLabelRecord { offset, labels }) - } -} -#[derive(Clone)] -pub struct VarIndexRecord { - /// Offset from the start of the file to the start of the record. - pub offset: u64, - - /// The 1-based indexes of the variable indexes. - pub dict_indexes: Vec, -} - -impl Debug for VarIndexRecord { - fn fmt(&self, f: &mut Formatter) -> FmtResult { - write!(f, "apply to variables")?; - for dict_index in self.dict_indexes.iter() { - write!(f, " #{dict_index}")?; + let index_offset = r.stream_position()?; + let rec_type: u32 = endian.parse(read_bytes(r)?); + if rec_type != 4 { + return Err(Error::ExpectedVarIndexRecord { + offset: index_offset, + rec_type, + }); } - Ok(()) - } -} -impl VarIndexRecord { - /// Maximum number of variable indexes in a record. - pub const MAX: u32 = u32::MAX / 8; - - fn read(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; let n: u32 = endian.parse(read_bytes(r)?); - if n > VarIndexRecord::MAX { + if n > Self::MAX_INDEXES { return Err(Error::BadNumberOfVarIndexes { - offset, + offset: index_offset, n, - max: VarIndexRecord::MAX, + max: Self::MAX_INDEXES, }); } let mut dict_indexes = Vec::with_capacity(n as usize); @@ -1081,8 +1079,10 @@ impl VarIndexRecord { dict_indexes.push(endian.parse(read_bytes(r)?)); } - Ok(VarIndexRecord { - offset, + Ok(ValueLabelRecord { + label_offset, + labels, + index_offset, dict_indexes, }) } -- 2.30.2