X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=rust%2Fsrc%2Fraw.rs;h=e8a279f5e848418e0fdb846333cc80f5e0a60ce4;hb=7e2346251b2a07f03e2b5e77f2f9b938a9b00ab7;hp=ed246717cef3f94b45c4330cf8ac8283cec4eba2;hpb=e0cbdf0daefcca81be9572aab0deedf945687f5a;p=pspp diff --git a/rust/src/raw.rs b/rust/src/raw.rs index ed246717ce..e8a279f5e8 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -1,22 +1,28 @@ -use crate::endian::{Endian, Parse, ToBytes}; +use crate::{ + dictionary::VarWidth, + encoding::{default_encoding, get_encoding, Error as EncodingError}, + endian::{Endian, Parse, ToBytes}, + identifier::{Error as IdError, Identifier}, +}; -use encoding_rs::mem::decode_latin1; +use encoding_rs::{mem::decode_latin1, DecoderResult, Encoding}; use flate2::read::ZlibDecoder; use num::Integer; -use std::borrow::Cow; -use std::cmp::Ordering; -use std::fmt::{Debug, Formatter, Result as FmtResult}; -use std::ops::Range; -use std::str::from_utf8; use std::{ - collections::VecDeque, + borrow::Cow, + cell::RefCell, + cmp::Ordering, + collections::{HashMap, VecDeque}, + fmt::{Debug, Display, Formatter, Result as FmtResult}, io::{Error as IoError, Read, Seek, SeekFrom}, - iter::FusedIterator, + iter::repeat, + mem::take, + ops::Range, + rc::Rc, + str::from_utf8, }; use thiserror::Error as ThisError; -use self::state::State; - #[derive(ThisError, Debug)] pub enum Error { #[error("Not an SPSS system file")] @@ -34,15 +40,18 @@ pub enum Error { #[error("Invalid ZSAV compression code {0}")] InvalidZsavCompression(u32), - #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")] - BadVariableWidth { offset: u64, width: i32 }, - #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")] BadDocumentLength { offset: u64, n: usize, max: usize }, #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")] BadRecordType { offset: u64, rec_type: u32 }, + #[error("In variable record starting at offset {start_offset:#x}, variable width is not in the valid range -1 to 255.")] + BadVariableWidth { + start_offset: u64, + width: i32, + }, + #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")] BadVariableLabelCode { start_offset: u64, @@ -64,8 +73,8 @@ pub enum Error { #[error("At offset {offset:#x}, following value label record, found record type {rec_type} instead of expected type 4 for variable index record")] ExpectedVarIndexRecord { offset: u64, rec_type: u32 }, - #[error("At offset {offset:#x}, number of variables indexes ({n}) is greater than the maximum number ({max}).")] - BadNumberOfVarIndexes { offset: u64, n: u32, max: u32 }, + #[error("At offset {offset:#x}, number of variables indexes for value labels ({n}) is greater than the maximum number ({max}).")] + TooManyVarIndexes { offset: u64, n: u32, max: u32 }, #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")] ExtensionRecordTooLarge { @@ -104,6 +113,32 @@ pub enum Error { ztrailer_len: u64, }, + #[error("{0}")] + EncodingError(EncodingError), +} + +#[derive(ThisError, Debug)] +pub enum Warning { + #[error("Unexpected end of data inside extension record.")] + UnexpectedEndOfData, + + #[error("At offset {offset:#x}, at least one valid variable index for value labels is required but none were specified.")] + NoVarIndexes { offset: u64 }, + + #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", var_type.opposite())] + MixedVarTypes { + offset: u64, + var_type: VarType, + wrong_types: Vec, + }, + + #[error("At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}]: {invalid:?}")] + InvalidVarIndexes { + offset: u64, + max: usize, + invalid: Vec, + }, + #[error("At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}.")] BadRecordSize { offset: u64, @@ -130,52 +165,186 @@ pub enum Error { #[error("The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8.")] BadEncodingName { offset: u64 }, + // XXX This is risky because `text` might be arbitarily long. + #[error("Text string contains invalid bytes for {encoding} encoding: {text}")] + MalformedString { encoding: String, text: String }, + + #[error("Invalid variable measurement level value {0}")] + InvalidMeasurement(u32), + + #[error("Invalid variable display alignment value {0}")] + InvalidAlignment(u32), + + #[error("Invalid attribute name. {0}")] + InvalidAttributeName(IdError), + + #[error("Invalid variable name in attribute record. {0}")] + InvalidAttributeVariableName(IdError), + + #[error("Invalid short name in long variable name record. {0}")] + InvalidShortName(IdError), + + #[error("Invalid name in long variable name record. {0}")] + InvalidLongName(IdError), + + #[error("Invalid variable name in very long string record. {0}")] + InvalidLongStringName(IdError), + + #[error("Invalid variable name in variable set record. {0}")] + InvalidVariableSetName(IdError), + + #[error("Invalid multiple response set name. {0}")] + InvalidMrSetName(IdError), + + #[error("Invalid multiple response set variable name. {0}")] + InvalidMrSetVariableName(IdError), + + #[error("Invalid variable name in long string missing values record. {0}")] + InvalidLongStringMissingValueVariableName(IdError), + + #[error("Invalid variable name in long string value label record. {0}")] + InvalidLongStringValueLabelName(IdError), + + #[error("{0}")] + EncodingError(EncodingError), + #[error("Details TBD")] TBD, } +impl From for Warning { + fn from(_source: IoError) -> Self { + Self::UnexpectedEndOfData + } +} + #[derive(Clone, Debug)] pub enum Record { - Header(HeaderRecord), - Variable(VariableRecord), - ValueLabel(ValueLabelRecord), - Document(DocumentRecord), + Header(HeaderRecord), + Variable(VariableRecord>), + ValueLabel(ValueLabelRecord, RawString>), + Document(DocumentRecord), + IntegerInfo(IntegerInfoRecord), + FloatInfo(FloatInfoRecord), + VarDisplay(VarDisplayRecord), + MultipleResponse(MultipleResponseRecord), + LongStringValueLabels(LongStringValueLabelRecord), + LongStringMissingValues(LongStringMissingValueRecord>), + Encoding(EncodingRecord), + NumberOfCases(NumberOfCasesRecord), + Text(TextRecord), + OtherExtension(Extension), + EndOfHeaders(u32), + ZHeader(ZHeader), + ZTrailer(ZTrailer), + Cases(Rc>), +} + +#[derive(Clone, Debug)] +pub enum DecodedRecord { + Header(HeaderRecord), + Variable(VariableRecord), + ValueLabel(ValueLabelRecord, String>), + Document(DocumentRecord), IntegerInfo(IntegerInfoRecord), FloatInfo(FloatInfoRecord), - VariableSets(TextRecord), VarDisplay(VarDisplayRecord), - MultipleResponse(MultipleResponseRecord), - LongStringValueLabels(LongStringValueLabelRecord), - LongStringMissingValues(LongStringMissingValueSet), + MultipleResponse(MultipleResponseRecord), + LongStringValueLabels(LongStringValueLabelRecord), + LongStringMissingValues(LongStringMissingValueRecord), Encoding(EncodingRecord), NumberOfCases(NumberOfCasesRecord), - ProductInfo(TextRecord), - LongNames(TextRecord), - VeryLongStrings(TextRecord), - FileAttributes(TextRecord), - VariableAttributes(TextRecord), + VariableSets(VariableSetRecord), + ProductInfo(ProductInfoRecord), + LongNames(LongNamesRecord), + VeryLongStrings(VeryLongStringsRecord), + FileAttributes(FileAttributeRecord), + VariableAttributes(VariableAttributeRecord), OtherExtension(Extension), EndOfHeaders(u32), ZHeader(ZHeader), ZTrailer(ZTrailer), - Case(Vec), + Cases(Rc>), } impl Record { - fn read(reader: &mut R, endian: Endian) -> Result { + fn read( + reader: &mut R, + endian: Endian, + var_types: &[VarType], + warn: &dyn Fn(Warning), + ) -> Result, Error> + where + R: Read + Seek, + { let rec_type: u32 = endian.parse(read_bytes(reader)?); match rec_type { - 2 => Ok(Record::Variable(VariableRecord::read(reader, endian)?)), - 3 => Ok(Record::ValueLabel(ValueLabelRecord::read(reader, endian)?)), - 6 => Ok(Record::Document(DocumentRecord::read(reader, endian)?)), - 7 => Ok(Extension::read(reader, endian)?), - 999 => Ok(Record::EndOfHeaders(endian.parse(read_bytes(reader)?))), + 2 => Ok(Some(VariableRecord::read(reader, endian)?)), + 3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?), + 6 => Ok(Some(DocumentRecord::read(reader, endian)?)), + 7 => Extension::read(reader, endian, var_types.len(), warn), + 999 => Ok(Some(Record::EndOfHeaders( + endian.parse(read_bytes(reader)?), + ))), _ => Err(Error::BadRecordType { offset: reader.stream_position()?, rec_type, }), } } + + pub fn decode(self, decoder: &Decoder) -> Result { + Ok(match self { + Record::Header(record) => record.decode(decoder), + Record::Variable(record) => record.decode(decoder), + Record::ValueLabel(record) => DecodedRecord::ValueLabel(record.decode(decoder)), + Record::Document(record) => record.decode(decoder), + Record::IntegerInfo(record) => DecodedRecord::IntegerInfo(record.clone()), + Record::FloatInfo(record) => DecodedRecord::FloatInfo(record.clone()), + Record::VarDisplay(record) => DecodedRecord::VarDisplay(record.clone()), + Record::MultipleResponse(record) => record.decode(decoder), + Record::LongStringValueLabels(record) => { + DecodedRecord::LongStringValueLabels(record.decode(decoder)) + } + Record::LongStringMissingValues(record) => { + DecodedRecord::LongStringMissingValues(record.decode(decoder)) + } + Record::Encoding(record) => DecodedRecord::Encoding(record.clone()), + Record::NumberOfCases(record) => DecodedRecord::NumberOfCases(record.clone()), + Record::Text(record) => record.decode(decoder), + Record::OtherExtension(record) => DecodedRecord::OtherExtension(record.clone()), + Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(record), + Record::ZHeader(record) => DecodedRecord::ZHeader(record.clone()), + Record::ZTrailer(record) => DecodedRecord::ZTrailer(record.clone()), + Record::Cases(record) => DecodedRecord::Cases(record.clone()), + }) + } +} + +pub fn encoding_from_headers( + headers: &Vec, + warn: &impl Fn(Warning), +) -> Result<&'static Encoding, Error> { + let mut encoding_record = None; + let mut integer_info_record = None; + for record in headers { + match record { + Record::Encoding(record) => encoding_record = Some(record), + Record::IntegerInfo(record) => integer_info_record = Some(record), + _ => (), + } + } + let encoding = encoding_record.map(|record| record.0.as_str()); + let character_code = integer_info_record.map(|record| record.character_code); + match get_encoding(encoding, character_code) { + Ok(encoding) => Ok(encoding), + Err(err @ EncodingError::Ebcdic) => Err(Error::EncodingError(err)), + Err(err) => { + warn(Warning::EncodingError(err)); + // Warn that we're using the default encoding. + Ok(default_encoding()) + } + } } // If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it @@ -184,7 +353,7 @@ fn default_decode(s: &[u8]) -> Cow { from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from) } -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum Compression { Simple, ZLib, @@ -195,7 +364,10 @@ trait Header { } #[derive(Clone)] -pub struct HeaderRecord { +pub struct HeaderRecord +where + S: Debug, +{ /// Offset in file. pub offsets: Range, @@ -204,7 +376,7 @@ pub struct HeaderRecord { /// Eye-catcher string, product name, in the file's encoding. Padded /// on the right with spaces. - pub eye_catcher: UnencodedStr<60>, + pub eye_catcher: S, /// Layout code, normally either 2 or 3. pub layout_code: u32, @@ -227,50 +399,59 @@ pub struct HeaderRecord { pub bias: f64, /// `dd mmm yy` in the file's encoding. - pub creation_date: UnencodedStr<9>, + pub creation_date: S, /// `HH:MM:SS` in the file's encoding. - pub creation_time: UnencodedStr<8>, + pub creation_time: S, /// File label, in the file's encoding. Padded on the right with spaces. - pub file_label: UnencodedStr<64>, + pub file_label: S, /// Endianness of the data in the file header. pub endian: Endian, } -impl HeaderRecord { - fn debug_field(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult { +impl HeaderRecord +where + S: Debug, +{ + fn debug_field(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult + where + T: Debug, + { writeln!(f, "{name:>17}: {:?}", value) } } -impl Debug for HeaderRecord { +impl Debug for HeaderRecord +where + S: Debug, +{ fn fmt(&self, f: &mut Formatter) -> FmtResult { writeln!(f, "File header record:")?; self.debug_field(f, "Magic", self.magic)?; - self.debug_field(f, "Product name", self.eye_catcher)?; + self.debug_field(f, "Product name", &self.eye_catcher)?; self.debug_field(f, "Layout code", self.layout_code)?; self.debug_field(f, "Nominal case size", self.nominal_case_size)?; self.debug_field(f, "Compression", self.compression)?; self.debug_field(f, "Weight index", self.weight_index)?; self.debug_field(f, "Number of cases", self.n_cases)?; self.debug_field(f, "Compression bias", self.bias)?; - self.debug_field(f, "Creation date", self.creation_date)?; - self.debug_field(f, "Creation time", self.creation_time)?; - self.debug_field(f, "File label", self.file_label)?; + self.debug_field(f, "Creation date", &self.creation_date)?; + self.debug_field(f, "Creation time", &self.creation_time)?; + self.debug_field(f, "File label", &self.file_label)?; self.debug_field(f, "Endianness", self.endian) } } -impl HeaderRecord { - fn read(r: &mut R) -> Result { +impl HeaderRecord { + fn read(r: &mut R) -> Result { let start = r.stream_position()?; let magic: [u8; 4] = read_bytes(r)?; let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?; - let eye_catcher = UnencodedStr::<60>(read_bytes(r)?); + let eye_catcher = RawString(read_vec(r, 60)?); let layout_code: [u8; 4] = read_bytes(r)?; let endian = Endian::identify_u32(2, layout_code) .or_else(|| Endian::identify_u32(2, layout_code)) @@ -298,9 +479,9 @@ impl HeaderRecord { let bias: f64 = endian.parse(read_bytes(r)?); - let creation_date = UnencodedStr::<9>(read_bytes(r)?); - let creation_time = UnencodedStr::<8>(read_bytes(r)?); - let file_label = UnencodedStr::<64>(read_bytes(r)?); + let creation_date = RawString(read_vec(r, 9)?); + let creation_time = RawString(read_vec(r, 8)?); + let file_label = RawString(read_vec(r, 64)?); let _: [u8; 3] = read_bytes(r)?; Ok(HeaderRecord { @@ -319,9 +500,110 @@ impl HeaderRecord { endian, }) } + + pub fn decode(self, decoder: &Decoder) -> DecodedRecord { + let eye_catcher = decoder.decode(&self.eye_catcher).to_string(); + let file_label = decoder.decode(&self.file_label).to_string(); + let creation_date = decoder.decode(&self.creation_date).to_string(); + let creation_time = decoder.decode(&self.creation_time).to_string(); + DecodedRecord::Header(HeaderRecord { + eye_catcher, + weight_index: self.weight_index, + n_cases: self.n_cases, + file_label, + offsets: self.offsets.clone(), + magic: self.magic, + layout_code: self.layout_code, + nominal_case_size: self.nominal_case_size, + compression: self.compression, + bias: self.bias, + creation_date, + creation_time, + endian: self.endian, + }) + } } -impl Header for HeaderRecord { +pub struct Decoder { + pub encoding: &'static Encoding, + pub warn: Box, +} + +impl Decoder { + pub fn new(encoding: &'static Encoding, warn: F) -> Self + where + F: Fn(Warning) + 'static, + { + Self { + encoding, + warn: Box::new(warn), + } + } + fn warn(&self, warning: Warning) { + (self.warn)(warning) + } + fn decode_slice<'a>(&self, input: &'a [u8]) -> Cow<'a, str> { + let (output, malformed) = self.encoding.decode_without_bom_handling(input); + if malformed { + self.warn(Warning::MalformedString { + encoding: self.encoding.name().into(), + text: output.clone().into(), + }); + } + output + } + + fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> { + self.decode_slice(input.0.as_slice()) + } + + /// Returns `input` decoded from `self.encoding` into UTF-8 such that + /// re-encoding the result back into `self.encoding` will have exactly the + /// same length in bytes. + /// + /// XXX warn about errors? + pub fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> { + if let (s, false) = self.encoding.decode_without_bom_handling(input) { + // This is the common case. Usually there will be no errors. + s + } else { + // Unusual case. Don't bother to optimize it much. + let mut decoder = self.encoding.new_decoder_without_bom_handling(); + let mut output = String::with_capacity( + decoder + .max_utf8_buffer_length_without_replacement(input.len()) + .unwrap(), + ); + let mut rest = input; + while !rest.is_empty() { + match decoder.decode_to_string_without_replacement(rest, &mut output, true) { + (DecoderResult::InputEmpty, _) => break, + (DecoderResult::OutputFull, _) => unreachable!(), + (DecoderResult::Malformed(a, b), consumed) => { + let skipped = a as usize + b as usize; + output.extend(repeat('?').take(skipped)); + rest = &rest[consumed..]; + } + } + } + assert_eq!(self.encoding.encode(&output).0.len(), input.len()); + output.into() + } + } + + pub fn decode_identifier(&self, input: &RawString) -> Result { + self.new_identifier(&self.decode(input)) + } + + pub fn new_identifier(&self, name: &str) -> Result { + Identifier::new(name, self.encoding) + } +} + +impl Header for HeaderRecord +where + S: Debug, +{ fn offsets(&self) -> Range { self.offsets.clone() } @@ -382,176 +664,56 @@ pub enum VarType { } impl VarType { - fn from_width(width: i32) -> VarType { + pub fn from_width(width: VarWidth) -> VarType { match width { - 0 => VarType::Numeric, - _ => VarType::String, - } - } -} - -mod state { - use super::{ - Compression, Error, HeaderRecord, Record, Value, VarType, VariableRecord, ZHeader, - ZTrailer, ZlibDecodeMultiple, - }; - use crate::endian::Endian; - use std::{ - collections::VecDeque, - io::{Read, Seek}, - }; - - pub trait State { - #[allow(clippy::type_complexity)] - fn read(self: Box) -> Result)>, Error>; - } - - struct Start { - reader: R, - } - - pub fn new(reader: R) -> Box { - Box::new(Start { reader }) - } - - struct CommonState { - reader: R, - endian: Endian, - bias: f64, - compression: Option, - var_types: Vec, - } - - impl State for Start { - fn read(mut self: Box) -> Result)>, Error> { - let header = HeaderRecord::read(&mut self.reader)?; - let next_state = Headers(CommonState { - reader: self.reader, - endian: header.endian, - bias: header.bias, - compression: header.compression, - var_types: Vec::new(), - }); - Ok(Some((Record::Header(header), Box::new(next_state)))) - } - } - - struct Headers(CommonState); - - impl State for Headers { - fn read(mut self: Box) -> Result)>, Error> { - let record = Record::read(&mut self.0.reader, self.0.endian)?; - match record { - Record::Variable(VariableRecord { width, .. }) => { - self.0.var_types.push(VarType::from_width(width)); - } - Record::EndOfHeaders(_) => { - let next_state: Box = match self.0.compression { - None => Box::new(Data(self.0)), - Some(Compression::Simple) => Box::new(CompressedData::new(self.0)), - Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)), - }; - return Ok(Some((record, next_state))); - } - _ => (), - }; - Ok(Some((record, self))) + VarWidth::Numeric => Self::Numeric, + VarWidth::String(_) => Self::String, } } - struct ZlibHeader(CommonState); - - impl State for ZlibHeader { - fn read(mut self: Box) -> Result)>, Error> { - let zheader = ZHeader::read(&mut self.0.reader, self.0.endian)?; - let next_state = Box::new(ZlibTrailer(self.0, zheader.clone())); - Ok(Some((Record::ZHeader(zheader), next_state))) - } - } - - struct ZlibTrailer(CommonState, ZHeader); - - impl State for ZlibTrailer { - fn read(mut self: Box) -> Result)>, Error> { - let retval = ZTrailer::read( - &mut self.0.reader, - self.0.endian, - self.1.ztrailer_offset, - self.1.ztrailer_len, - )?; - let next_state = Box::new(CompressedData::new(CommonState { - reader: ZlibDecodeMultiple::new(self.0.reader), - endian: self.0.endian, - bias: self.0.bias, - compression: self.0.compression, - var_types: self.0.var_types, - })); - match retval { - None => next_state.read(), - Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state))), - } - } - } - - struct Data(CommonState); - - impl State for Data { - fn read(mut self: Box) -> Result)>, Error> { - match Value::read_case(&mut self.0.reader, &self.0.var_types, self.0.endian)? { - None => Ok(None), - Some(values) => Ok(Some((Record::Case(values), self))), - } - } - } - - struct CompressedData { - common: CommonState, - codes: VecDeque, - } - - impl CompressedData { - fn new(common: CommonState) -> CompressedData { - CompressedData { - common, - codes: VecDeque::new(), - } + pub fn opposite(self) -> VarType { + match self { + Self::Numeric => Self::String, + Self::String => Self::Numeric, } } +} - impl State for CompressedData { - fn read(mut self: Box) -> Result)>, Error> { - match Value::read_compressed_case( - &mut self.common.reader, - &self.common.var_types, - &mut self.codes, - self.common.endian, - self.common.bias, - )? { - None => Ok(None), - Some(values) => Ok(Some((Record::Case(values), self))), - } +impl Display for VarType { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + match self { + VarType::Numeric => write!(f, "numeric"), + VarType::String => write!(f, "string"), } } } #[derive(Copy, Clone)] -pub enum Value { +pub enum Value +where + S: Debug, +{ Number(Option), - String(UnencodedStr<8>), + String(S), } -impl Debug for Value { +type RawValue = Value>; + +impl Debug for Value +where + S: Debug, +{ fn fmt(&self, f: &mut Formatter) -> FmtResult { match self { Value::Number(Some(number)) => write!(f, "{number:?}"), Value::Number(None) => write!(f, "SYSMIS"), - Value::String(bytes) => write!(f, "{:?}", bytes), + Value::String(s) => write!(f, "{:?}", s), } } } -impl Value { - fn read(r: &mut R, var_type: VarType, endian: Endian) -> Result { +impl RawValue { + fn read(r: &mut R, var_type: VarType, endian: Endian) -> Result { Ok(Self::from_raw( &UntypedValue(read_bytes(r)?), var_type, @@ -559,9 +721,9 @@ impl Value { )) } - pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Value { + pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Self { match var_type { - VarType::String => Value::String(UnencodedStr(raw.0)), + VarType::String => Value::String(RawStr(raw.0)), VarType::Numeric => { let number: f64 = endian.parse(raw.0); Value::Number((number != -f64::MAX).then_some(number)) @@ -573,7 +735,7 @@ impl Value { reader: &mut R, var_types: &[VarType], endian: Endian, - ) -> Result>, Error> { + ) -> Result>, Error> { let case_start = reader.stream_position()?; let mut values = Vec::with_capacity(var_types.len()); for (i, &var_type) in var_types.iter().enumerate() { @@ -600,7 +762,7 @@ impl Value { codes: &mut VecDeque, endian: Endian, bias: f64, - ) -> Result>, Error> { + ) -> Result>, Error> { let case_start = reader.stream_position()?; let mut values = Vec::with_capacity(var_types.len()); for (i, &var_type) in var_types.iter().enumerate() { @@ -623,9 +785,9 @@ impl Value { match code { 0 => (), 1..=251 => match var_type { - VarType::Numeric => break Value::Number(Some(code as f64 - bias)), + VarType::Numeric => break Self::Number(Some(code as f64 - bias)), VarType::String => { - break Value::String(UnencodedStr(endian.to_bytes(code as f64 - bias))) + break Self::String(RawStr(endian.to_bytes(code as f64 - bias))) } }, 252 => { @@ -640,10 +802,10 @@ impl Value { } } 253 => { - break Value::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian) + break Self::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian) } 254 => match var_type { - VarType::String => break Value::String(UnencodedStr(*b" ")), // XXX EBCDIC + VarType::String => break Self::String(RawStr(*b" ")), // XXX EBCDIC VarType::Numeric => { return Err(Error::CompressedStringExpected { offset: case_start, @@ -652,7 +814,7 @@ impl Value { } }, 255 => match var_type { - VarType::Numeric => break Value::Number(None), + VarType::Numeric => break Self::Number(None), VarType::String => { return Err(Error::CompressedNumberExpected { offset: case_start, @@ -666,6 +828,13 @@ impl Value { } Ok(Some(values)) } + + fn decode(self, decoder: &Decoder) -> Value { + match self { + Self::Number(x) => Value::Number(x), + Self::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()), + } + } } struct ZlibDecodeMultiple @@ -712,44 +881,206 @@ where } } -pub struct Reader { - state: Option>, +enum ReaderState { + Start, + Headers, + ZlibHeader, + ZlibTrailer { + ztrailer_offset: u64, + ztrailer_len: u64, + }, + Cases, + End, +} + +pub struct Reader +where + R: Read + Seek + 'static, +{ + reader: Option, + warn: Box, + + header: HeaderRecord, + var_types: Vec, + + state: ReaderState, } -impl Reader { - pub fn new(reader: R) -> Result { - Ok(Reader { - state: Some(state::new(reader)), +impl Reader +where + R: Read + Seek + 'static, +{ + pub fn new(mut reader: R, warn: F) -> Result + where + F: Fn(Warning) + 'static, + { + let header = HeaderRecord::read(&mut reader)?; + Ok(Self { + reader: Some(reader), + warn: Box::new(warn), + header, + var_types: Vec::new(), + state: ReaderState::Start, }) } - pub fn collect_headers(&mut self) -> Result, Error> { - let mut headers = Vec::new(); - for record in self { - match record? { - Record::EndOfHeaders(_) => break, - r => headers.push(r), - }; + fn cases(&mut self) -> Cases { + self.state = ReaderState::End; + Cases::new( + self.reader.take().unwrap(), + take(&mut self.var_types), + &self.header, + ) + } + fn _next(&mut self) -> Option<::Item> { + match self.state { + ReaderState::Start => { + self.state = ReaderState::Headers; + Some(Ok(Record::Header(self.header.clone()))) + } + ReaderState::Headers => { + let record = loop { + match Record::read( + self.reader.as_mut().unwrap(), + self.header.endian, + self.var_types.as_slice(), + &self.warn, + ) { + Ok(Some(record)) => break record, + Ok(None) => (), + Err(error) => return Some(Err(error)), + } + }; + match record { + Record::Variable(VariableRecord { width, .. }) => { + self.var_types.push(if width == 0 { + VarType::Numeric + } else { + VarType::String + }); + } + Record::EndOfHeaders(_) => { + self.state = if let Some(Compression::ZLib) = self.header.compression { + ReaderState::ZlibHeader + } else { + ReaderState::Cases + }; + } + _ => (), + }; + Some(Ok(record)) + } + ReaderState::ZlibHeader => { + let zheader = match ZHeader::read(self.reader.as_mut().unwrap(), self.header.endian) + { + Ok(zheader) => zheader, + Err(error) => return Some(Err(error)), + }; + self.state = ReaderState::ZlibTrailer { + ztrailer_offset: zheader.ztrailer_offset, + ztrailer_len: zheader.ztrailer_len, + }; + Some(Ok(Record::ZHeader(zheader))) + } + ReaderState::ZlibTrailer { + ztrailer_offset, + ztrailer_len, + } => { + match ZTrailer::read( + self.reader.as_mut().unwrap(), + self.header.endian, + ztrailer_offset, + ztrailer_len, + ) { + Ok(None) => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))), + Ok(Some(ztrailer)) => Some(Ok(Record::ZTrailer(ztrailer))), + Err(error) => Some(Err(error)), + } + } + ReaderState::Cases => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))), + ReaderState::End => None, } - Ok(headers) } } -impl Iterator for Reader { +impl Iterator for Reader +where + R: Read + Seek + 'static, +{ type Item = Result; fn next(&mut self) -> Option { - match self.state.take()?.read() { - Ok(Some((record, next_state))) => { - self.state = Some(next_state); - Some(Ok(record)) - } - Ok(None) => None, - Err(error) => Some(Err(error)), + let retval = self._next(); + if matches!(retval, Some(Err(_))) { + self.state = ReaderState::End; + } + retval + } +} + +trait ReadSeek: Read + Seek {} +impl ReadSeek for T where T: Read + Seek {} + +pub struct Cases { + reader: Box, + var_types: Vec, + compression: Option, + bias: f64, + endian: Endian, + codes: VecDeque, + eof: bool, +} + +impl Debug for Cases { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + write!(f, "Cases") + } +} + +impl Cases { + fn new(reader: R, var_types: Vec, header: &HeaderRecord) -> Self + where + R: Read + Seek + 'static, + { + Self { + reader: if header.compression == Some(Compression::ZLib) { + Box::new(ZlibDecodeMultiple::new(reader)) + } else { + Box::new(reader) + }, + var_types, + compression: header.compression, + bias: header.bias, + endian: header.endian, + codes: VecDeque::with_capacity(8), + eof: false, } } } -impl FusedIterator for Reader {} +impl Iterator for Cases { + type Item = Result, Error>; + + fn next(&mut self) -> Option { + if self.eof { + return None; + } + + let retval = if self.compression.is_some() { + Value::read_compressed_case( + &mut self.reader, + &self.var_types, + &mut self.codes, + self.endian, + self.bias, + ) + .transpose() + } else { + Value::read_case(&mut self.reader, &self.var_types, self.endian).transpose() + }; + self.eof = matches!(retval, None | Some(Err(_))); + retval + } +} #[derive(Copy, Clone, PartialEq, Eq, Hash)] pub struct Spec(pub u32); @@ -808,15 +1139,21 @@ fn format_name(type_: u32) -> Cow<'static, str> { } #[derive(Clone)] -pub struct MissingValues { +pub struct MissingValues +where + S: Debug, +{ /// Individual missing values, up to 3 of them. - pub values: Vec, + pub values: Vec>, /// Optional range of missing values. - pub range: Option<(Value, Value)>, + pub range: Option<(Value, Value)>, } -impl Debug for MissingValues { +impl Debug for MissingValues +where + S: Debug, +{ fn fmt(&self, f: &mut Formatter) -> FmtResult { for (i, value) in self.values.iter().enumerate() { if i > 0 { @@ -825,7 +1162,7 @@ impl Debug for MissingValues { write!(f, "{value:?}")?; } - if let Some((low, high)) = self.range { + if let Some((low, high)) = &self.range { if !self.values.is_empty() { write!(f, ", ")?; } @@ -840,18 +1177,35 @@ impl Debug for MissingValues { } } -impl MissingValues { +impl MissingValues +where + S: Debug, +{ fn is_empty(&self) -> bool { self.values.is_empty() && self.range.is_none() } +} +impl Default for MissingValues +where + S: Debug, +{ + fn default() -> Self { + Self { + values: Vec::new(), + range: None, + } + } +} + +impl MissingValues> { fn read( r: &mut R, offset: u64, width: i32, code: i32, endian: Endian, - ) -> Result { + ) -> Result { let (n_values, has_range) = match (width, code) { (_, 0..=3) => (code, false), (0, -2) => (0, true), @@ -860,25 +1214,46 @@ impl MissingValues { (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }), }; - let var_type = VarType::from_width(width); + let var_type = if width == 0 { + VarType::Numeric + } else { + VarType::String + }; let mut values = Vec::new(); for _ in 0..n_values { - values.push(Value::read(r, var_type, endian)?); + values.push(RawValue::read(r, var_type, endian)?); } let range = if has_range { - let low = Value::read(r, var_type, endian)?; - let high = Value::read(r, var_type, endian)?; + let low = RawValue::read(r, var_type, endian)?; + let high = RawValue::read(r, var_type, endian)?; Some((low, high)) } else { None }; - Ok(MissingValues { values, range }) + Ok(Self { values, range }) + } + fn decode(&self, decoder: &Decoder) -> MissingValues { + MissingValues { + values: self + .values + .iter() + .map(|value| value.decode(decoder)) + .collect(), + range: self + .range + .as_ref() + .map(|(low, high)| (low.decode(decoder), high.decode(decoder))), + } } } #[derive(Clone)] -pub struct VariableRecord { +pub struct VariableRecord +where + S: Debug, + V: Debug, +{ /// Range of offsets in file. pub offsets: Range, @@ -886,7 +1261,7 @@ pub struct VariableRecord { pub width: i32, /// Variable name, padded on the right with spaces. - pub name: UnencodedStr<8>, + pub name: S, /// Print format. pub print_format: Spec, @@ -895,13 +1270,17 @@ pub struct VariableRecord { pub write_format: Spec, /// Missing values. - pub missing_values: MissingValues, + pub missing_values: MissingValues, /// Optional variable label. - pub label: Option, + pub label: Option, } -impl Debug for VariableRecord { +impl Debug for VariableRecord +where + S: Debug, + V: Debug, +{ fn fmt(&self, f: &mut Formatter) -> FmtResult { writeln!( f, @@ -921,23 +1300,26 @@ impl Debug for VariableRecord { } } -impl VariableRecord { - fn read(r: &mut R, endian: Endian) -> Result { +impl VariableRecord> { + fn read(r: &mut R, endian: Endian) -> Result { let start_offset = r.stream_position()?; let width: i32 = endian.parse(read_bytes(r)?); + if !(-1..=255).contains(&width) { + return Err(Error::BadVariableWidth { start_offset, width }); + } let code_offset = r.stream_position()?; let has_variable_label: u32 = endian.parse(read_bytes(r)?); let missing_value_code: i32 = endian.parse(read_bytes(r)?); let print_format = Spec(endian.parse(read_bytes(r)?)); let write_format = Spec(endian.parse(read_bytes(r)?)); - let name = UnencodedStr::<8>(read_bytes(r)?); + let name = RawString(read_vec(r, 8)?); let label = match has_variable_label { 0 => None, 1 => { let len: u32 = endian.parse(read_bytes(r)?); let read_len = len.min(65535) as usize; - let label = UnencodedString(read_vec(r, read_len)?); + let label = RawString(read_vec(r, read_len)?); let padding_bytes = Integer::next_multiple_of(&len, &4) - len; let _ = read_vec(r, padding_bytes as usize)?; @@ -958,7 +1340,7 @@ impl VariableRecord { let end_offset = r.stream_position()?; - Ok(VariableRecord { + Ok(Record::Variable(VariableRecord { offsets: start_offset..end_offset, width, name, @@ -966,6 +1348,21 @@ impl VariableRecord { write_format, missing_values, label, + })) + } + + pub fn decode(self, decoder: &Decoder) -> DecodedRecord { + DecodedRecord::Variable(VariableRecord { + offsets: self.offsets.clone(), + width: self.width, + name: decoder.decode(&self.name).to_string(), + print_format: self.print_format, + write_format: self.write_format, + missing_values: self.missing_values.decode(decoder), + label: self + .label + .as_ref() + .map(|label| decoder.decode(label).to_string()), }) } } @@ -997,60 +1394,81 @@ impl Debug for UntypedValue { } #[derive(Clone)] -pub struct UnencodedString(pub Vec); +pub struct RawString(pub Vec); -impl From> for UnencodedString { +impl From> for RawString { fn from(source: Vec) -> Self { Self(source) } } -impl From<&[u8]> for UnencodedString { +impl From<&[u8]> for RawString { fn from(source: &[u8]) -> Self { Self(source.into()) } } -impl Debug for UnencodedString { +impl Debug for RawString { fn fmt(&self, f: &mut Formatter) -> FmtResult { write!(f, "{:?}", default_decode(self.0.as_slice())) } } #[derive(Copy, Clone)] -pub struct UnencodedStr(pub [u8; N]); +pub struct RawStr(pub [u8; N]); -impl From<[u8; N]> for UnencodedStr { +impl From<[u8; N]> for RawStr { fn from(source: [u8; N]) -> Self { Self(source) } } -impl Debug for UnencodedStr { +impl Debug for RawStr { fn fmt(&self, f: &mut Formatter) -> FmtResult { write!(f, "{:?}", default_decode(&self.0)) } } +#[derive(Clone, Debug)] +pub struct ValueLabel +where + V: Debug, + S: Debug, +{ + pub value: Value, + pub label: S, +} + #[derive(Clone)] -pub struct ValueLabelRecord { +pub struct ValueLabelRecord +where + V: Debug, + S: Debug, +{ /// Range of offsets in file. pub offsets: Range, /// The labels. - pub labels: Vec<(UntypedValue, UnencodedString)>, + pub labels: Vec>, /// The 1-based indexes of the variable indexes. pub dict_indexes: Vec, + + /// The types of the variables. + pub var_type: VarType, } -impl Debug for ValueLabelRecord { +impl Debug for ValueLabelRecord +where + V: Debug, + S: Debug, +{ fn fmt(&self, f: &mut Formatter) -> FmtResult { writeln!(f, "labels: ")?; - for (value, label) in self.labels.iter() { - writeln!(f, "{value:?}: {label:?}")?; + for label in self.labels.iter() { + writeln!(f, "{label:?}")?; } - write!(f, "apply to variables")?; + write!(f, "apply to {} variables", self.var_type)?; for dict_index in self.dict_indexes.iter() { write!(f, " #{dict_index}")?; } @@ -1058,20 +1476,35 @@ impl Debug for ValueLabelRecord { } } -impl Header for ValueLabelRecord { +impl Header for ValueLabelRecord +where + V: Debug, + S: Debug, +{ fn offsets(&self) -> Range { self.offsets.clone() } } -impl ValueLabelRecord { +impl ValueLabelRecord +where + V: Debug, + S: Debug, +{ /// Maximum number of value labels in a record. pub const MAX_LABELS: u32 = u32::MAX / 8; /// Maximum number of variable indexes in a record. pub const MAX_INDEXES: u32 = u32::MAX / 8; +} - fn read(r: &mut R, endian: Endian) -> Result { +impl ValueLabelRecord, RawString> { + fn read( + r: &mut R, + endian: Endian, + var_types: &[VarType], + warn: &dyn Fn(Warning), + ) -> Result, Error> { let label_offset = r.stream_position()?; let n: u32 = endian.parse(read_bytes(r)?); if n > Self::MAX_LABELS { @@ -1091,7 +1524,7 @@ impl ValueLabelRecord { let mut label = read_vec(r, padded_len - 1)?; label.truncate(label_len); - labels.push((value, UnencodedString(label))); + labels.push((value, RawString(label))); } let index_offset = r.stream_position()?; @@ -1105,46 +1538,115 @@ impl ValueLabelRecord { let n: u32 = endian.parse(read_bytes(r)?); if n > Self::MAX_INDEXES { - return Err(Error::BadNumberOfVarIndexes { + return Err(Error::TooManyVarIndexes { offset: index_offset, n, max: Self::MAX_INDEXES, }); } + + let index_offset = r.stream_position()?; let mut dict_indexes = Vec::with_capacity(n as usize); + let mut invalid_indexes = Vec::new(); for _ in 0..n { - dict_indexes.push(endian.parse(read_bytes(r)?)); + let index: u32 = endian.parse(read_bytes(r)?); + if index == 0 || index as usize > var_types.len() { + dict_indexes.push(index); + } else { + invalid_indexes.push(index); + } + } + if !invalid_indexes.is_empty() { + warn(Warning::InvalidVarIndexes { + offset: index_offset, + max: var_types.len(), + invalid: invalid_indexes, + }); } + let Some(&first_index) = dict_indexes.first() else { + warn(Warning::NoVarIndexes { + offset: index_offset, + }); + return Ok(None); + }; + let var_type = var_types[first_index as usize - 1]; + let mut wrong_type_indexes = Vec::new(); + dict_indexes.retain(|&index| { + if var_types[index as usize - 1] != var_type { + wrong_type_indexes.push(index); + false + } else { + true + } + }); + if !wrong_type_indexes.is_empty() { + warn(Warning::MixedVarTypes { + offset: index_offset, + var_type, + wrong_types: wrong_type_indexes, + }); + } + + let labels = labels + .into_iter() + .map(|(value, label)| ValueLabel { + value: Value::from_raw(&value, var_type, endian), + label, + }) + .collect(); + let end_offset = r.stream_position()?; - Ok(ValueLabelRecord { + Ok(Some(Record::ValueLabel(ValueLabelRecord { offsets: label_offset..end_offset, labels, dict_indexes, - }) + var_type, + }))) + } + + fn decode(self, decoder: &Decoder) -> ValueLabelRecord, String> { + let labels = self + .labels + .iter() + .map(|ValueLabel { value, label }| ValueLabel { + value: *value, + label: decoder.decode(label).to_string(), + }) + .collect(); + ValueLabelRecord { + offsets: self.offsets.clone(), + labels, + dict_indexes: self.dict_indexes.clone(), + var_type: self.var_type, + } } } #[derive(Clone, Debug)] -pub struct DocumentRecord { +pub struct DocumentRecord +where + S: Debug, +{ pub offsets: Range, - /// The document, as an array of 80-byte lines. - pub lines: Vec, + /// The document, as an array of lines. Raw lines are exactly 80 bytes long + /// and are right-padded with spaces without any new-line termination. + pub lines: Vec, } -pub type DocumentLine = UnencodedStr<{ DocumentRecord::LINE_LEN }>; +pub type RawDocumentLine = RawStr; -impl DocumentRecord { - /// Length of a line in a document. Document lines are fixed-length and - /// padded on the right with spaces. - pub const LINE_LEN: usize = 80; +/// Length of a line in a document. Document lines are fixed-length and +/// padded on the right with spaces. +pub const DOC_LINE_LEN: usize = 80; +impl DocumentRecord { /// Maximum number of lines we will accept in a document. This is simply /// the maximum number that will fit in a 32-bit space. - pub const MAX_LINES: usize = i32::MAX as usize / Self::LINE_LEN; + pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN; - fn read(r: &mut R, endian: Endian) -> Result { + fn read(r: &mut R, endian: Endian) -> Result { let start_offset = r.stream_position()?; let n: u32 = endian.parse(read_bytes(r)?); let n = n as usize; @@ -1157,18 +1659,32 @@ impl DocumentRecord { } else { let mut lines = Vec::with_capacity(n); for _ in 0..n { - lines.push(UnencodedStr::<{ DocumentRecord::LINE_LEN }>(read_bytes(r)?)); + lines.push(RawStr(read_bytes(r)?)); } let end_offset = r.stream_position()?; - Ok(DocumentRecord { + Ok(Record::Document(DocumentRecord { offsets: start_offset..end_offset, lines, - }) + })) } } + + pub fn decode(self, decoder: &Decoder) -> DecodedRecord { + DecodedRecord::Document(DocumentRecord { + offsets: self.offsets.clone(), + lines: self + .lines + .iter() + .map(|s| decoder.decode_slice(&s.0).to_string()) + .collect(), + }) + } } -impl Header for DocumentRecord { +impl Header for DocumentRecord +where + S: Debug, +{ fn offsets(&self) -> Range { self.offsets.clone() } @@ -1179,7 +1695,7 @@ trait ExtensionRecord { const SIZE: Option; const COUNT: Option; const NAME: &'static str; - fn parse(ext: &Extension, endian: Endian) -> Result; + fn parse(ext: &Extension, endian: Endian) -> Result; } #[derive(Clone, Debug)] @@ -1199,7 +1715,7 @@ impl ExtensionRecord for IntegerInfoRecord { const COUNT: Option = Some(8); const NAME: &'static str = "integer record"; - fn parse(ext: &Extension, endian: Endian) -> Result { + fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size::()?; let mut input = &ext.data[..]; @@ -1231,7 +1747,7 @@ impl ExtensionRecord for FloatInfoRecord { const COUNT: Option = Some(3); const NAME: &'static str = "floating point record"; - fn parse(ext: &Extension, endian: Endian) -> Result { + fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size::()?; let mut input = &ext.data[..]; @@ -1255,14 +1771,14 @@ pub enum CategoryLabels { #[derive(Clone, Debug)] pub enum MultipleResponseType { MultipleDichotomy { - value: UnencodedString, + value: RawString, labels: CategoryLabels, }, MultipleCategory, } impl MultipleResponseType { - fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Error> { + fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Warning> { let (mr_type, input) = match input.split_first() { Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input), Some((b'D', input)) => { @@ -1281,7 +1797,7 @@ impl MultipleResponseType { } else if let Some(rest) = input.strip_prefix(b" 11 ") { (CategoryLabels::VarLabels, rest) } else { - return Err(Error::TBD); + return Err(Warning::TBD); }; let (value, input) = parse_counted_string(input)?; ( @@ -1289,29 +1805,33 @@ impl MultipleResponseType { input, ) } - _ => return Err(Error::TBD), + _ => return Err(Warning::TBD), }; Ok((mr_type, input)) } } #[derive(Clone, Debug)] -pub struct MultipleResponseSet { - pub name: UnencodedString, - pub label: UnencodedString, +pub struct MultipleResponseSet +where + I: Debug, + S: Debug, +{ + pub name: I, + pub label: S, pub mr_type: MultipleResponseType, - pub short_names: Vec, + pub short_names: Vec, } -impl MultipleResponseSet { - fn parse(input: &[u8]) -> Result<(MultipleResponseSet, &[u8]), Error> { +impl MultipleResponseSet { + fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> { let Some(equals) = input.iter().position(|&b| b == b'=') else { - return Err(Error::TBD); + return Err(Warning::TBD); }; let (name, input) = input.split_at(equals); let (mr_type, input) = MultipleResponseType::parse(input)?; let Some(input) = input.strip_prefix(b" ") else { - return Err(Error::TBD); + return Err(Warning::TBD); }; let (label, mut input) = parse_counted_string(input)?; let mut vars = Vec::new(); @@ -1319,7 +1839,7 @@ impl MultipleResponseSet { match input.split_first() { Some((b' ', rest)) => { let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else { - return Err(Error::TBD); + return Err(Warning::TBD); }; let (var, rest) = rest.split_at(length); if !var.is_empty() { @@ -1327,7 +1847,7 @@ impl MultipleResponseSet { } input = rest; } - _ => return Err(Error::TBD), + _ => return Err(Warning::TBD), } } while input.first() == Some(&b'\n') { @@ -1343,18 +1863,45 @@ impl MultipleResponseSet { input, )) } + + fn decode( + &self, + decoder: &Decoder, + ) -> Result, Warning> { + let mut short_names = Vec::with_capacity(self.short_names.len()); + for short_name in self.short_names.iter() { + if let Some(short_name) = decoder + .decode_identifier(short_name) + .map_err(Warning::InvalidMrSetName) + .issue_warning(&decoder.warn) + { + short_names.push(short_name); + } + } + Ok(MultipleResponseSet { + name: decoder + .decode_identifier(&self.name) + .map_err(Warning::InvalidMrSetVariableName)?, + label: decoder.decode(&self.label).to_string(), + mr_type: self.mr_type.clone(), + short_names, + }) + } } #[derive(Clone, Debug)] -pub struct MultipleResponseRecord(pub Vec); +pub struct MultipleResponseRecord(pub Vec>) +where + I: Debug, + S: Debug; -impl ExtensionRecord for MultipleResponseRecord { +impl ExtensionRecord for MultipleResponseRecord { const SUBTYPE: u32 = 7; const SIZE: Option = Some(1); const COUNT: Option = None; const NAME: &'static str = "multiple response set record"; - fn parse(ext: &Extension, _endian: Endian) -> Result { + fn parse(ext: &Extension, _endian: Endian) -> Result { ext.check_size::()?; let mut input = &ext.data[..]; @@ -1368,65 +1915,184 @@ impl ExtensionRecord for MultipleResponseRecord { } } -fn parse_counted_string(input: &[u8]) -> Result<(UnencodedString, &[u8]), Error> { +impl MultipleResponseRecord { + fn decode(self, decoder: &Decoder) -> DecodedRecord { + let mut sets = Vec::new(); + for set in self.0.iter() { + if let Some(set) = set.decode(decoder).issue_warning(&decoder.warn) { + sets.push(set); + } + } + DecodedRecord::MultipleResponse(MultipleResponseRecord(sets)) + } +} + +fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> { let Some(space) = input.iter().position(|&b| b == b' ') else { - return Err(Error::TBD); + return Err(Warning::TBD); }; let Ok(length) = from_utf8(&input[..space]) else { - return Err(Error::TBD); + return Err(Warning::TBD); }; let Ok(length): Result = length.parse() else { - return Err(Error::TBD); + return Err(Warning::TBD); }; let input = &input[space + 1..]; if input.len() < length { - return Err(Error::TBD); + return Err(Warning::TBD); }; let (string, rest) = input.split_at(length); Ok((string.into(), rest)) } +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Measure { + Nominal, + Ordinal, + Scale, +} + +impl Measure { + pub fn default_for_type(var_type: VarType) -> Option { + match var_type { + VarType::Numeric => None, + VarType::String => Some(Self::Nominal), + } + } + + fn try_decode(source: u32) -> Result, Warning> { + match source { + 0 => Ok(None), + 1 => Ok(Some(Measure::Nominal)), + 2 => Ok(Some(Measure::Ordinal)), + 3 => Ok(Some(Measure::Scale)), + _ => Err(Warning::InvalidMeasurement(source)), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Alignment { + Left, + Right, + Center, +} + +impl Alignment { + fn try_decode(source: u32) -> Result, Warning> { + match source { + 0 => Ok(None), + 1 => Ok(Some(Alignment::Left)), + 2 => Ok(Some(Alignment::Right)), + 3 => Ok(Some(Alignment::Center)), + _ => Err(Warning::InvalidAlignment(source)), + } + } + + pub fn default_for_type(var_type: VarType) -> Self { + match var_type { + VarType::Numeric => Self::Right, + VarType::String => Self::Left, + } + } +} + #[derive(Clone, Debug)] -pub struct VarDisplayRecord(pub Vec); +pub struct VarDisplay { + pub measure: Option, + pub width: Option, + pub alignment: Option, +} -impl ExtensionRecord for VarDisplayRecord { +#[derive(Clone, Debug)] +pub struct VarDisplayRecord(pub Vec); + +impl VarDisplayRecord { const SUBTYPE: u32 = 11; - const SIZE: Option = Some(4); - const COUNT: Option = None; - const NAME: &'static str = "variable display record"; - fn parse(ext: &Extension, endian: Endian) -> Result { - ext.check_size::()?; + fn parse( + ext: &Extension, + n_vars: usize, + endian: Endian, + warn: &dyn Fn(Warning), + ) -> Result { + if ext.size != 4 { + return Err(Warning::BadRecordSize { + offset: ext.offsets.start, + record: String::from("variable display record"), + size: ext.size, + expected_size: 4, + }); + } + let has_width = if ext.count as usize == 3 * n_vars { + true + } else if ext.count as usize == 2 * n_vars { + false + } else { + return Err(Warning::TBD); + }; + + let mut var_displays = Vec::new(); let mut input = &ext.data[..]; - let display = (0..ext.count) - .map(|_| endian.parse(read_bytes(&mut input).unwrap())) - .collect(); - Ok(Record::VarDisplay(VarDisplayRecord(display))) + for _ in 0..n_vars { + let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap())) + .issue_warning(&warn) + .flatten(); + let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap())); + let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap())) + .issue_warning(&warn) + .flatten(); + var_displays.push(VarDisplay { + measure, + width, + alignment, + }); + } + Ok(Record::VarDisplay(VarDisplayRecord(var_displays))) } } #[derive(Clone, Debug)] -pub struct LongStringMissingValues { +pub struct LongStringMissingValues +where + N: Debug, + V: Debug, +{ /// Variable name. - pub var_name: UnencodedString, + pub var_name: N, /// Missing values. - pub missing_values: MissingValues, + pub missing_values: MissingValues, +} + +impl LongStringMissingValues> { + fn decode( + &self, + decoder: &Decoder, + ) -> Result, IdError> { + Ok(LongStringMissingValues { + var_name: decoder.decode_identifier(&self.var_name)?, + missing_values: self.missing_values.decode(decoder), + }) + } } #[derive(Clone, Debug)] -pub struct LongStringMissingValueSet(pub Vec); +pub struct LongStringMissingValueRecord(pub Vec>) +where + N: Debug, + V: Debug; -impl ExtensionRecord for LongStringMissingValueSet { +impl ExtensionRecord for LongStringMissingValueRecord> { const SUBTYPE: u32 = 22; const SIZE: Option = Some(1); const COUNT: Option = None; const NAME: &'static str = "long string missing values record"; - fn parse(ext: &Extension, endian: Endian) -> Result { + fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size::()?; let mut input = &ext.data[..]; @@ -1437,7 +2103,7 @@ impl ExtensionRecord for LongStringMissingValueSet { let value_len: u32 = endian.parse(read_bytes(&mut input)?); if value_len != 8 { let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start; - return Err(Error::BadLongMissingValueLength { + return Err(Warning::BadLongMissingValueLength { record_offset: ext.offsets.start, offset, value_len, @@ -1455,7 +2121,7 @@ impl ExtensionRecord for LongStringMissingValueSet { } else { value }; - values.push(Value::String(UnencodedStr(value))); + values.push(Value::String(RawStr(value))); } let missing_values = MissingValues { values, @@ -1466,9 +2132,25 @@ impl ExtensionRecord for LongStringMissingValueSet { missing_values, }); } - Ok(Record::LongStringMissingValues(LongStringMissingValueSet( - missing_value_set, - ))) + Ok(Record::LongStringMissingValues( + LongStringMissingValueRecord(missing_value_set), + )) + } +} + +impl LongStringMissingValueRecord> { + pub fn decode(self, decoder: &Decoder) -> LongStringMissingValueRecord { + let mut mvs = Vec::with_capacity(self.0.len()); + for mv in self.0.iter() { + if let Some(mv) = mv + .decode(decoder) + .map_err(Warning::InvalidLongStringMissingValueVariableName) + .issue_warning(&decoder.warn) + { + mvs.push(mv); + } + } + LongStringMissingValueRecord(mvs) } } @@ -1481,11 +2163,11 @@ impl ExtensionRecord for EncodingRecord { const COUNT: Option = None; const NAME: &'static str = "encoding record"; - fn parse(ext: &Extension, _endian: Endian) -> Result { + fn parse(ext: &Extension, _endian: Endian) -> Result { ext.check_size::()?; Ok(Record::Encoding(EncodingRecord( - String::from_utf8(ext.data.clone()).map_err(|_| Error::BadEncodingName { + String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName { offset: ext.offsets.start, })?, ))) @@ -1507,7 +2189,7 @@ impl ExtensionRecord for NumberOfCasesRecord { const COUNT: Option = Some(2); const NAME: &'static str = "extended number of cases record"; - fn parse(ext: &Extension, endian: Endian) -> Result { + fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size::()?; let mut input = &ext.data[..]; @@ -1522,15 +2204,331 @@ impl ExtensionRecord for NumberOfCasesRecord { pub struct TextRecord { pub offsets: Range, + /// Type of record. + pub rec_type: TextRecordType, + /// The text content of the record. - pub text: UnencodedString, + pub text: RawString, +} + +#[derive(Clone, Copy, Debug)] +pub enum TextRecordType { + VariableSets, + ProductInfo, + LongNames, + VeryLongStrings, + FileAttributes, + VariableAttributes, +} + +impl TextRecord { + fn new(extension: Extension, rec_type: TextRecordType) -> Self { + Self { + offsets: extension.offsets, + rec_type, + text: extension.data.into(), + } + } + pub fn decode(self, decoder: &Decoder) -> DecodedRecord { + match self.rec_type { + TextRecordType::VariableSets => { + DecodedRecord::VariableSets(VariableSetRecord::decode(&self, decoder)) + } + TextRecordType::ProductInfo => { + DecodedRecord::ProductInfo(ProductInfoRecord::decode(&self, decoder)) + } + TextRecordType::LongNames => { + DecodedRecord::LongNames(LongNamesRecord::decode(&self, decoder)) + } + TextRecordType::VeryLongStrings => { + DecodedRecord::VeryLongStrings(VeryLongStringsRecord::decode(&self, decoder)) + } + TextRecordType::FileAttributes => { + DecodedRecord::FileAttributes(FileAttributeRecord::decode(&self, decoder)) + } + TextRecordType::VariableAttributes => { + DecodedRecord::VariableAttributes(VariableAttributeRecord::decode(&self, decoder)) + } + } + } } -impl From for TextRecord { - fn from(source: Extension) -> Self { - TextRecord { - offsets: source.offsets, - text: source.data.into(), +#[derive(Clone, Debug)] +pub struct VeryLongString { + pub short_name: Identifier, + pub length: u16, +} + +impl VeryLongString { + fn parse(decoder: &Decoder, input: &str) -> Result { + let Some((short_name, length)) = input.split_once('=') else { + return Err(Warning::TBD); + }; + let short_name = decoder + .new_identifier(short_name) + .map_err(Warning::InvalidLongStringName)?; + let length = length.parse().map_err(|_| Warning::TBD)?; + Ok(VeryLongString { short_name, length }) + } +} + +#[derive(Clone, Debug)] +pub struct VeryLongStringsRecord(Vec); + +impl VeryLongStringsRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> Self { + let input = decoder.decode(&source.text); + let mut very_long_strings = Vec::new(); + for tuple in input + .split('\0') + .map(|s| s.trim_end_matches('\t')) + .filter(|s| !s.is_empty()) + { + if let Some(vls) = VeryLongString::parse(decoder, tuple).issue_warning(&decoder.warn) { + very_long_strings.push(vls) + } + } + VeryLongStringsRecord(very_long_strings) + } +} + +#[derive(Clone, Debug)] +pub struct Attribute { + pub name: Identifier, + pub values: Vec, +} + +impl Attribute { + fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> { + let Some((name, mut input)) = input.split_once('(') else { + return Err(Warning::TBD); + }; + let name = decoder + .new_identifier(name) + .map_err(Warning::InvalidAttributeName)?; + let mut values = Vec::new(); + loop { + let Some((value, rest)) = input.split_once('\n') else { + return Err(Warning::TBD); + }; + if let Some(stripped) = value + .strip_prefix('\'') + .and_then(|value| value.strip_suffix('\'')) + { + values.push(stripped.into()); + } else { + decoder.warn(Warning::TBD); + values.push(value.into()); + } + if let Some(rest) = rest.strip_prefix(')') { + let attribute = Attribute { name, values }; + return Ok((attribute, rest)); + }; + input = rest; + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct AttributeSet(pub HashMap>); + +impl AttributeSet { + fn parse<'a>( + decoder: &Decoder, + mut input: &'a str, + sentinel: Option, + ) -> Result<(AttributeSet, &'a str), Warning> { + let mut attributes = HashMap::new(); + let rest = loop { + match input.chars().next() { + None => break input, + c if c == sentinel => break &input[1..], + _ => { + let (attribute, rest) = Attribute::parse(decoder, input)?; + // XXX report duplicate name + attributes.insert(attribute.name, attribute.values); + input = rest; + } + } + }; + Ok((AttributeSet(attributes), rest)) + } +} + +#[derive(Clone, Debug, Default)] +pub struct FileAttributeRecord(pub AttributeSet); + +impl FileAttributeRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> Self { + let input = decoder.decode(&source.text); + match AttributeSet::parse(decoder, &input, None).issue_warning(&decoder.warn) { + Some((set, rest)) => { + if !rest.is_empty() { + decoder.warn(Warning::TBD); + } + FileAttributeRecord(set) + } + None => FileAttributeRecord::default(), + } + } +} + +#[derive(Clone, Debug)] +pub struct VarAttributeSet { + pub long_var_name: Identifier, + pub attributes: AttributeSet, +} + +impl VarAttributeSet { + fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(VarAttributeSet, &'a str), Warning> { + let Some((long_var_name, rest)) = input.split_once(':') else { + return Err(Warning::TBD); + }; + let long_var_name = decoder + .new_identifier(long_var_name) + .map_err(Warning::InvalidAttributeVariableName)?; + let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'))?; + let var_attribute = VarAttributeSet { + long_var_name, + attributes, + }; + Ok((var_attribute, rest)) + } +} + +#[derive(Clone, Debug)] +pub struct VariableAttributeRecord(Vec); + +impl VariableAttributeRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> Self { + let decoded = decoder.decode(&source.text); + let mut input = decoded.as_ref(); + let mut var_attribute_sets = Vec::new(); + while !input.is_empty() { + let Some((var_attribute, rest)) = + VarAttributeSet::parse(decoder, input).issue_warning(&decoder.warn) + else { + break; + }; + var_attribute_sets.push(var_attribute); + input = rest; + } + VariableAttributeRecord(var_attribute_sets) + } +} + +#[derive(Clone, Debug)] +pub struct LongName { + pub short_name: Identifier, + pub long_name: Identifier, +} + +impl LongName { + fn parse(input: &str, decoder: &Decoder) -> Result { + let Some((short_name, long_name)) = input.split_once('=') else { + return Err(Warning::TBD); + }; + let short_name = decoder + .new_identifier(short_name) + .map_err(Warning::InvalidShortName)?; + let long_name = decoder + .new_identifier(long_name) + .map_err(Warning::InvalidLongName)?; + Ok(LongName { + short_name, + long_name, + }) + } +} + +#[derive(Clone, Debug)] +pub struct LongNamesRecord(Vec); + +impl LongNamesRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> Self { + let input = decoder.decode(&source.text); + let mut names = Vec::new(); + for pair in input.split('\t').filter(|s| !s.is_empty()) { + if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&decoder.warn) { + names.push(long_name); + } + } + LongNamesRecord(names) + } +} + +#[derive(Clone, Debug)] +pub struct ProductInfoRecord(pub String); + +impl ProductInfoRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> Self { + Self(decoder.decode(&source.text).into()) + } +} +#[derive(Clone, Debug)] +pub struct VariableSet { + pub name: String, + pub vars: Vec, +} + +impl VariableSet { + fn parse(input: &str, decoder: &Decoder) -> Result { + let (name, input) = input.split_once('=').ok_or(Warning::TBD)?; + let mut vars = Vec::new(); + for var in input.split_ascii_whitespace() { + if let Some(identifier) = decoder + .new_identifier(var) + .map_err(Warning::InvalidVariableSetName) + .issue_warning(&decoder.warn) + { + vars.push(identifier); + } + } + Ok(VariableSet { + name: name.into(), + vars, + }) + } +} + +#[derive(Clone, Debug)] +pub struct VariableSetRecord { + pub offsets: Range, + pub sets: Vec, +} + +impl VariableSetRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> VariableSetRecord { + let mut sets = Vec::new(); + let input = decoder.decode(&source.text); + for line in input.lines() { + if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&decoder.warn) { + sets.push(set) + } + } + VariableSetRecord { + offsets: source.offsets.clone(), + sets, + } + } +} + +trait IssueWarning { + fn issue_warning(self, warn: &F) -> Option + where + F: Fn(Warning); +} +impl IssueWarning for Result { + fn issue_warning(self, warn: &F) -> Option + where + F: Fn(Warning), + { + match self { + Ok(result) => Some(result), + Err(error) => { + warn(error); + None + } } } } @@ -1553,10 +2551,10 @@ pub struct Extension { } impl Extension { - fn check_size(&self) -> Result<(), Error> { + fn check_size(&self) -> Result<(), Warning> { if let Some(expected_size) = E::SIZE { if self.size != expected_size { - return Err(Error::BadRecordSize { + return Err(Warning::BadRecordSize { offset: self.offsets.start, record: E::NAME.into(), size: self.size, @@ -1566,7 +2564,7 @@ impl Extension { } if let Some(expected_count) = E::COUNT { if self.count != expected_count { - return Err(Error::BadRecordCount { + return Err(Warning::BadRecordCount { offset: self.offsets.start, record: E::NAME.into(), count: self.count, @@ -1577,7 +2575,12 @@ impl Extension { Ok(()) } - fn read(r: &mut R, endian: Endian) -> Result { + fn read( + r: &mut R, + endian: Endian, + n_vars: usize, + warn: &dyn Fn(Warning), + ) -> Result, Error> { let subtype = endian.parse(read_bytes(r)?); let header_offset = r.stream_position()?; let size: u32 = endian.parse(read_bytes(r)?); @@ -1600,25 +2603,50 @@ impl Extension { count, data, }; - match subtype { - IntegerInfoRecord::SUBTYPE => Ok(IntegerInfoRecord::parse(&extension, endian)?), - FloatInfoRecord::SUBTYPE => Ok(FloatInfoRecord::parse(&extension, endian)?), - VarDisplayRecord::SUBTYPE => Ok(VarDisplayRecord::parse(&extension, endian)?), + let result = match subtype { + IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian), + FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian), + VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, n_vars, endian, warn), MultipleResponseRecord::SUBTYPE | 19 => { - Ok(MultipleResponseRecord::parse(&extension, endian)?) + MultipleResponseRecord::parse(&extension, endian) } LongStringValueLabelRecord::SUBTYPE => { - Ok(LongStringValueLabelRecord::parse(&extension, endian)?) + LongStringValueLabelRecord::parse(&extension, endian) } - EncodingRecord::SUBTYPE => Ok(EncodingRecord::parse(&extension, endian)?), - NumberOfCasesRecord::SUBTYPE => Ok(NumberOfCasesRecord::parse(&extension, endian)?), - 5 => Ok(Record::VariableSets(extension.into())), - 10 => Ok(Record::ProductInfo(extension.into())), - 13 => Ok(Record::LongNames(extension.into())), - 14 => Ok(Record::VeryLongStrings(extension.into())), - 17 => Ok(Record::FileAttributes(extension.into())), - 18 => Ok(Record::VariableAttributes(extension.into())), + EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian), + NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian), + 5 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::VariableSets, + ))), + 10 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::ProductInfo, + ))), + 13 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::LongNames, + ))), + 14 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::VeryLongStrings, + ))), + 17 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::FileAttributes, + ))), + 18 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::VariableAttributes, + ))), _ => Ok(Record::OtherExtension(extension)), + }; + match result { + Ok(result) => Ok(Some(result)), + Err(error) => { + warn(error); + Ok(None) + } } } } @@ -1764,30 +2792,60 @@ fn read_vec(r: &mut R, n: usize) -> Result, IoError> { Ok(vec) } -fn read_string(r: &mut R, endian: Endian) -> Result { +fn read_string(r: &mut R, endian: Endian) -> Result { let length: u32 = endian.parse(read_bytes(r)?); Ok(read_vec(r, length as usize)?.into()) } #[derive(Clone, Debug)] -pub struct LongStringValueLabels { - pub var_name: UnencodedString, +pub struct LongStringValueLabels +where + S: Debug, +{ + pub var_name: N, pub width: u32, /// `(value, label)` pairs, where each value is `width` bytes. - pub labels: Vec<(UnencodedString, UnencodedString)>, + pub labels: Vec<(S, S)>, +} + +impl LongStringValueLabels { + fn decode( + &self, + decoder: &Decoder, + ) -> Result, Warning> { + let var_name = decoder.decode(&self.var_name); + let var_name = Identifier::new(var_name.trim_end(), decoder.encoding) + .map_err(Warning::InvalidLongStringValueLabelName)?; + + let mut labels = Vec::with_capacity(self.labels.len()); + for (value, label) in self.labels.iter() { + let value = decoder.decode_exact_length(&value.0).to_string(); + let label = decoder.decode(label).to_string(); + labels.push((value, label)); + } + + Ok(LongStringValueLabels { + var_name, + width: self.width, + labels, + }) + } } #[derive(Clone, Debug)] -pub struct LongStringValueLabelRecord(pub Vec); +pub struct LongStringValueLabelRecord(pub Vec>) +where + N: Debug, + S: Debug; -impl ExtensionRecord for LongStringValueLabelRecord { +impl ExtensionRecord for LongStringValueLabelRecord { const SUBTYPE: u32 = 21; const SIZE: Option = Some(1); const COUNT: Option = None; const NAME: &'static str = "long string value labels record"; - fn parse(ext: &Extension, endian: Endian) -> Result { + fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size::()?; let mut input = &ext.data[..]; @@ -1813,3 +2871,16 @@ impl ExtensionRecord for LongStringValueLabelRecord { ))) } } + +impl LongStringValueLabelRecord { + fn decode(self, decoder: &Decoder) -> LongStringValueLabelRecord { + let mut labels = Vec::with_capacity(self.0.len()); + for label in &self.0 { + match label.decode(decoder) { + Ok(set) => labels.push(set), + Err(error) => decoder.warn(error), + } + } + LongStringValueLabelRecord(labels) + } +}