3 encoding::{default_encoding, get_encoding, Error as EncodingError},
4 endian::{Endian, Parse, ToBytes},
5 identifier::{Error as IdError, Identifier},
8 use encoding_rs::{mem::decode_latin1, DecoderResult, Encoding};
9 use flate2::read::ZlibDecoder;
15 collections::{HashMap, VecDeque},
16 fmt::{Debug, Display, Formatter, Result as FmtResult},
17 io::{Error as IoError, Read, Seek, SeekFrom},
24 use thiserror::Error as ThisError;
26 #[derive(ThisError, Debug)]
28 #[error("Not an SPSS system file")]
31 #[error("Invalid magic number {0:?}")]
34 #[error("I/O error ({0})")]
37 #[error("Invalid SAV compression code {0}")]
38 InvalidSavCompression(u32),
40 #[error("Invalid ZSAV compression code {0}")]
41 InvalidZsavCompression(u32),
43 #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
44 BadDocumentLength { offset: u64, n: usize, max: usize },
46 #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
47 BadRecordType { offset: u64, rec_type: u32 },
49 #[error("In variable record starting at offset {start_offset:#x}, variable width is not in the valid range -1 to 255.")]
50 BadVariableWidth { start_offset: u64, width: i32 },
52 #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")]
53 BadVariableLabelCode {
60 "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
62 BadNumericMissingValueCode { offset: u64, code: i32 },
64 #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
65 BadStringMissingValueCode { offset: u64, code: i32 },
67 #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
68 BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
70 #[error("At offset {offset:#x}, following value label record, found record type {rec_type} instead of expected type 4 for variable index record")]
71 ExpectedVarIndexRecord { offset: u64, rec_type: u32 },
73 #[error("At offset {offset:#x}, number of variables indexes for value labels ({n}) is greater than the maximum number ({max}).")]
74 TooManyVarIndexes { offset: u64, n: u32, max: u32 },
76 #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
77 ExtensionRecordTooLarge {
84 #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
92 "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
94 EofInCompressedCase { offset: u64, case_ofs: u64 },
96 #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
97 PartialCompressedCase { offset: u64, case_ofs: u64 },
99 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
100 CompressedNumberExpected { offset: u64, case_ofs: u64 },
102 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
103 CompressedStringExpected { offset: u64, case_ofs: u64 },
105 #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
106 BadZlibTrailerNBlocks {
109 expected_n_blocks: u64,
114 EncodingError(EncodingError),
117 #[derive(ThisError, Debug)]
119 #[error("Unexpected end of data inside extension record.")]
122 #[error("At offset {offset:#x}, at least one valid variable index for value labels is required but none were specified.")]
123 NoVarIndexes { offset: u64 },
125 #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", var_type.opposite())]
129 wrong_types: Vec<u32>,
132 #[error("At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}]: {invalid:?}")]
139 #[error("At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}.")]
147 #[error("At offset {offset:#x}, {record} has bad count {count} instead of the expected {expected_count}.")]
155 #[error("In long string missing values record starting at offset {record_offset:#x}, value length at offset {offset:#x} is {value_len} instead of the expected 8.")]
156 BadLongMissingValueLength {
162 #[error("The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8.")]
163 BadEncodingName { offset: u64 },
165 // XXX This is risky because `text` might be arbitarily long.
166 #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
167 MalformedString { encoding: String, text: String },
169 #[error("Invalid variable measurement level value {0}")]
170 InvalidMeasurement(u32),
172 #[error("Invalid variable display alignment value {0}")]
173 InvalidAlignment(u32),
175 #[error("Invalid attribute name. {0}")]
176 InvalidAttributeName(IdError),
178 #[error("Invalid variable name in attribute record. {0}")]
179 InvalidAttributeVariableName(IdError),
181 #[error("Invalid short name in long variable name record. {0}")]
182 InvalidShortName(IdError),
184 #[error("Invalid name in long variable name record. {0}")]
185 InvalidLongName(IdError),
187 #[error("Invalid variable name in very long string record. {0}")]
188 InvalidLongStringName(IdError),
190 #[error("Invalid variable name in variable set record. {0}")]
191 InvalidVariableSetName(IdError),
193 #[error("Invalid multiple response set name. {0}")]
194 InvalidMrSetName(IdError),
196 #[error("Invalid multiple response set variable name. {0}")]
197 InvalidMrSetVariableName(IdError),
199 #[error("Invalid variable name in long string missing values record. {0}")]
200 InvalidLongStringMissingValueVariableName(IdError),
202 #[error("Invalid variable name in long string value label record. {0}")]
203 InvalidLongStringValueLabelName(IdError),
206 EncodingError(EncodingError),
208 #[error("Details TBD")]
212 impl From<IoError> for Warning {
213 fn from(_source: IoError) -> Self {
214 Self::UnexpectedEndOfData
218 #[derive(Clone, Debug)]
220 Header(HeaderRecord<RawString>),
221 Variable(VariableRecord<RawString, RawStr<8>>),
222 ValueLabel(ValueLabelRecord<RawStr<8>, RawString>),
223 Document(DocumentRecord<RawDocumentLine>),
224 IntegerInfo(IntegerInfoRecord),
225 FloatInfo(FloatInfoRecord),
226 VarDisplay(VarDisplayRecord),
227 MultipleResponse(MultipleResponseRecord<RawString, RawString>),
228 LongStringValueLabels(LongStringValueLabelRecord<RawString, RawString>),
229 LongStringMissingValues(LongStringMissingValueRecord<RawString, RawStr<8>>),
230 Encoding(EncodingRecord),
231 NumberOfCases(NumberOfCasesRecord),
233 OtherExtension(Extension),
237 Cases(Rc<RefCell<Cases>>),
240 #[derive(Clone, Debug)]
241 pub enum DecodedRecord {
242 Header(HeaderRecord<String>),
243 Variable(VariableRecord<String, String>),
244 ValueLabel(ValueLabelRecord<RawStr<8>, String>),
245 Document(DocumentRecord<String>),
246 IntegerInfo(IntegerInfoRecord),
247 FloatInfo(FloatInfoRecord),
248 VarDisplay(VarDisplayRecord),
249 MultipleResponse(MultipleResponseRecord<Identifier, String>),
250 LongStringValueLabels(LongStringValueLabelRecord<Identifier, String>),
251 LongStringMissingValues(LongStringMissingValueRecord<Identifier, String>),
252 Encoding(EncodingRecord),
253 NumberOfCases(NumberOfCasesRecord),
254 VariableSets(VariableSetRecord),
255 ProductInfo(ProductInfoRecord),
256 LongNames(LongNamesRecord),
257 VeryLongStrings(VeryLongStringsRecord),
258 FileAttributes(FileAttributeRecord),
259 VariableAttributes(VariableAttributeRecord),
260 OtherExtension(Extension),
264 Cases(Rc<RefCell<Cases>>),
271 var_types: &[VarType],
272 warn: &dyn Fn(Warning),
273 ) -> Result<Option<Record>, Error>
277 let rec_type: u32 = endian.parse(read_bytes(reader)?);
279 2 => Ok(Some(VariableRecord::read(reader, endian)?)),
280 3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
281 6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
282 7 => Extension::read(reader, endian, var_types.len(), warn),
283 999 => Ok(Some(Record::EndOfHeaders(
284 endian.parse(read_bytes(reader)?),
286 _ => Err(Error::BadRecordType {
287 offset: reader.stream_position()?,
293 pub fn decode(self, decoder: &Decoder) -> Result<DecodedRecord, Error> {
295 Record::Header(record) => record.decode(decoder),
296 Record::Variable(record) => record.decode(decoder),
297 Record::ValueLabel(record) => DecodedRecord::ValueLabel(record.decode(decoder)),
298 Record::Document(record) => record.decode(decoder),
299 Record::IntegerInfo(record) => DecodedRecord::IntegerInfo(record.clone()),
300 Record::FloatInfo(record) => DecodedRecord::FloatInfo(record.clone()),
301 Record::VarDisplay(record) => DecodedRecord::VarDisplay(record.clone()),
302 Record::MultipleResponse(record) => record.decode(decoder),
303 Record::LongStringValueLabels(record) => {
304 DecodedRecord::LongStringValueLabels(record.decode(decoder))
306 Record::LongStringMissingValues(record) => {
307 DecodedRecord::LongStringMissingValues(record.decode(decoder))
309 Record::Encoding(record) => DecodedRecord::Encoding(record.clone()),
310 Record::NumberOfCases(record) => DecodedRecord::NumberOfCases(record.clone()),
311 Record::Text(record) => record.decode(decoder),
312 Record::OtherExtension(record) => DecodedRecord::OtherExtension(record.clone()),
313 Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(record),
314 Record::ZHeader(record) => DecodedRecord::ZHeader(record.clone()),
315 Record::ZTrailer(record) => DecodedRecord::ZTrailer(record.clone()),
316 Record::Cases(record) => DecodedRecord::Cases(record.clone()),
321 pub fn encoding_from_headers(
322 headers: &Vec<Record>,
323 warn: &impl Fn(Warning),
324 ) -> Result<&'static Encoding, Error> {
325 let mut encoding_record = None;
326 let mut integer_info_record = None;
327 for record in headers {
329 Record::Encoding(record) => encoding_record = Some(record),
330 Record::IntegerInfo(record) => integer_info_record = Some(record),
334 let encoding = encoding_record.map(|record| record.0.as_str());
335 let character_code = integer_info_record.map(|record| record.character_code);
336 match get_encoding(encoding, character_code) {
337 Ok(encoding) => Ok(encoding),
338 Err(err @ EncodingError::Ebcdic) => Err(Error::EncodingError(err)),
340 warn(Warning::EncodingError(err));
341 // Warn that we're using the default encoding.
342 Ok(default_encoding())
347 // If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
348 // decoded as Latin-1 (actually bytes interpreted as Unicode code points).
349 fn default_decode(s: &[u8]) -> Cow<str> {
350 from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
353 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
354 pub enum Compression {
360 fn offsets(&self) -> Range<u64>;
364 pub struct HeaderRecord<S>
369 pub offsets: Range<u64>,
374 /// Eye-catcher string, product name, in the file's encoding. Padded
375 /// on the right with spaces.
378 /// Layout code, normally either 2 or 3.
379 pub layout_code: u32,
381 /// Number of variable positions, or `None` if the value in the file is
382 /// questionably trustworthy.
383 pub nominal_case_size: Option<u32>,
385 /// Compression type, if any,
386 pub compression: Option<Compression>,
388 /// 1-based variable index of the weight variable, or `None` if the file is
390 pub weight_index: Option<u32>,
392 /// Claimed number of cases, if known.
393 pub n_cases: Option<u32>,
395 /// Compression bias, usually 100.0.
398 /// `dd mmm yy` in the file's encoding.
399 pub creation_date: S,
401 /// `HH:MM:SS` in the file's encoding.
402 pub creation_time: S,
404 /// File label, in the file's encoding. Padded on the right with spaces.
407 /// Endianness of the data in the file header.
411 impl<S> HeaderRecord<S>
415 fn debug_field<T>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult
419 writeln!(f, "{name:>17}: {:?}", value)
423 impl<S> Debug for HeaderRecord<S>
427 fn fmt(&self, f: &mut Formatter) -> FmtResult {
428 writeln!(f, "File header record:")?;
429 self.debug_field(f, "Magic", self.magic)?;
430 self.debug_field(f, "Product name", &self.eye_catcher)?;
431 self.debug_field(f, "Layout code", self.layout_code)?;
432 self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
433 self.debug_field(f, "Compression", self.compression)?;
434 self.debug_field(f, "Weight index", self.weight_index)?;
435 self.debug_field(f, "Number of cases", self.n_cases)?;
436 self.debug_field(f, "Compression bias", self.bias)?;
437 self.debug_field(f, "Creation date", &self.creation_date)?;
438 self.debug_field(f, "Creation time", &self.creation_time)?;
439 self.debug_field(f, "File label", &self.file_label)?;
440 self.debug_field(f, "Endianness", self.endian)
444 impl HeaderRecord<RawString> {
445 fn read<R: Read + Seek>(r: &mut R) -> Result<Self, Error> {
446 let start = r.stream_position()?;
448 let magic: [u8; 4] = read_bytes(r)?;
449 let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
451 let eye_catcher = RawString(read_vec(r, 60)?);
452 let layout_code: [u8; 4] = read_bytes(r)?;
453 let endian = Endian::identify_u32(2, layout_code)
454 .or_else(|| Endian::identify_u32(2, layout_code))
455 .ok_or_else(|| Error::NotASystemFile)?;
456 let layout_code = endian.parse(layout_code);
458 let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
459 let nominal_case_size =
460 (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
462 let compression_code: u32 = endian.parse(read_bytes(r)?);
463 let compression = match (magic, compression_code) {
464 (Magic::Zsav, 2) => Some(Compression::ZLib),
465 (Magic::Zsav, code) => return Err(Error::InvalidZsavCompression(code)),
467 (_, 1) => Some(Compression::Simple),
468 (_, code) => return Err(Error::InvalidSavCompression(code)),
471 let weight_index: u32 = endian.parse(read_bytes(r)?);
472 let weight_index = (weight_index > 0).then_some(weight_index);
474 let n_cases: u32 = endian.parse(read_bytes(r)?);
475 let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
477 let bias: f64 = endian.parse(read_bytes(r)?);
479 let creation_date = RawString(read_vec(r, 9)?);
480 let creation_time = RawString(read_vec(r, 8)?);
481 let file_label = RawString(read_vec(r, 64)?);
482 let _: [u8; 3] = read_bytes(r)?;
485 offsets: start..r.stream_position()?,
501 pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
502 let eye_catcher = decoder.decode(&self.eye_catcher).to_string();
503 let file_label = decoder.decode(&self.file_label).to_string();
504 let creation_date = decoder.decode(&self.creation_date).to_string();
505 let creation_time = decoder.decode(&self.creation_time).to_string();
506 DecodedRecord::Header(HeaderRecord {
508 weight_index: self.weight_index,
509 n_cases: self.n_cases,
511 offsets: self.offsets.clone(),
513 layout_code: self.layout_code,
514 nominal_case_size: self.nominal_case_size,
515 compression: self.compression,
525 pub encoding: &'static Encoding,
526 pub warn: Box<dyn Fn(Warning)>,
530 pub fn new<F>(encoding: &'static Encoding, warn: F) -> Self
532 F: Fn(Warning) + 'static,
536 warn: Box::new(warn),
539 fn warn(&self, warning: Warning) {
542 fn decode_slice<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
543 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
545 self.warn(Warning::MalformedString {
546 encoding: self.encoding.name().into(),
547 text: output.clone().into(),
553 fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> {
554 self.decode_slice(input.0.as_slice())
557 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
558 /// re-encoding the result back into `self.encoding` will have exactly the
559 /// same length in bytes.
561 /// XXX warn about errors?
562 pub fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
563 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
564 // This is the common case. Usually there will be no errors.
567 // Unusual case. Don't bother to optimize it much.
568 let mut decoder = self.encoding.new_decoder_without_bom_handling();
569 let mut output = String::with_capacity(
571 .max_utf8_buffer_length_without_replacement(input.len())
574 let mut rest = input;
575 while !rest.is_empty() {
576 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
577 (DecoderResult::InputEmpty, _) => break,
578 (DecoderResult::OutputFull, _) => unreachable!(),
579 (DecoderResult::Malformed(a, b), consumed) => {
580 let skipped = a as usize + b as usize;
581 output.extend(repeat('?').take(skipped));
582 rest = &rest[consumed..];
586 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
591 pub fn decode_identifier(&self, input: &RawString) -> Result<Identifier, IdError> {
592 self.new_identifier(&self.decode(input))
595 pub fn new_identifier(&self, name: &str) -> Result<Identifier, IdError> {
596 Identifier::new(name, self.encoding)
600 impl<S> Header for HeaderRecord<S>
604 fn offsets(&self) -> Range<u64> {
609 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
611 /// Regular system file.
614 /// System file with Zlib-compressed data.
617 /// EBCDIC-encoded system file.
622 /// Magic number for a regular system file.
623 pub const SAV: [u8; 4] = *b"$FL2";
625 /// Magic number for a system file that contains zlib-compressed data.
626 pub const ZSAV: [u8; 4] = *b"$FL3";
628 /// Magic number for an EBCDIC-encoded system file. This is `$FL2` encoded
630 pub const EBCDIC: [u8; 4] = [0x5b, 0xc6, 0xd3, 0xf2];
633 impl Debug for Magic {
634 fn fmt(&self, f: &mut Formatter) -> FmtResult {
635 let s = match *self {
636 Magic::Sav => "$FL2",
637 Magic::Zsav => "$FL3",
638 Magic::Ebcdic => "($FL2 in EBCDIC)",
644 impl TryFrom<[u8; 4]> for Magic {
647 fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
649 Magic::SAV => Ok(Magic::Sav),
650 Magic::ZSAV => Ok(Magic::Zsav),
651 Magic::EBCDIC => Ok(Magic::Ebcdic),
652 _ => Err(Error::BadMagic(value)),
657 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
664 pub fn from_width(width: VarWidth) -> VarType {
666 VarWidth::Numeric => Self::Numeric,
667 VarWidth::String(_) => Self::String,
671 pub fn opposite(self) -> VarType {
673 Self::Numeric => Self::String,
674 Self::String => Self::Numeric,
679 impl Display for VarType {
680 fn fmt(&self, f: &mut Formatter) -> FmtResult {
682 VarType::Numeric => write!(f, "numeric"),
683 VarType::String => write!(f, "string"),
688 #[derive(Copy, Clone)]
697 type RawValue = Value<RawStr<8>>;
699 impl<S> Debug for Value<S>
703 fn fmt(&self, f: &mut Formatter) -> FmtResult {
705 Value::Number(Some(number)) => write!(f, "{number:?}"),
706 Value::Number(None) => write!(f, "SYSMIS"),
707 Value::String(s) => write!(f, "{:?}", s),
713 fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Self, IoError> {
715 &UntypedValue(read_bytes(r)?),
721 pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Self {
723 VarType::String => Value::String(RawStr(raw.0)),
724 VarType::Numeric => {
725 let number: f64 = endian.parse(raw.0);
726 Value::Number((number != -f64::MAX).then_some(number))
731 fn read_case<R: Read + Seek>(
733 var_types: &[VarType],
735 ) -> Result<Option<Vec<Self>>, Error> {
736 let case_start = reader.stream_position()?;
737 let mut values = Vec::with_capacity(var_types.len());
738 for (i, &var_type) in var_types.iter().enumerate() {
739 let Some(raw) = try_read_bytes(reader)? else {
743 let offset = reader.stream_position()?;
744 return Err(Error::EofInCase {
746 case_ofs: offset - case_start,
747 case_len: var_types.len() * 8,
751 values.push(Value::from_raw(&UntypedValue(raw), var_type, endian));
756 fn read_compressed_case<R: Read + Seek>(
758 var_types: &[VarType],
759 codes: &mut VecDeque<u8>,
762 ) -> Result<Option<Vec<Self>>, Error> {
763 let case_start = reader.stream_position()?;
764 let mut values = Vec::with_capacity(var_types.len());
765 for (i, &var_type) in var_types.iter().enumerate() {
767 let Some(code) = codes.pop_front() else {
768 let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
772 let offset = reader.stream_position()?;
773 return Err(Error::EofInCompressedCase {
775 case_ofs: offset - case_start,
779 codes.extend(new_codes.into_iter());
784 1..=251 => match var_type {
785 VarType::Numeric => break Self::Number(Some(code as f64 - bias)),
787 break Self::String(RawStr(endian.to_bytes(code as f64 - bias)))
794 let offset = reader.stream_position()?;
795 return Err(Error::PartialCompressedCase {
797 case_ofs: offset - case_start,
802 break Self::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian)
804 254 => match var_type {
805 VarType::String => break Self::String(RawStr(*b" ")), // XXX EBCDIC
806 VarType::Numeric => {
807 return Err(Error::CompressedStringExpected {
809 case_ofs: reader.stream_position()? - case_start,
813 255 => match var_type {
814 VarType::Numeric => break Self::Number(None),
816 return Err(Error::CompressedNumberExpected {
818 case_ofs: reader.stream_position()? - case_start,
829 fn decode(self, decoder: &Decoder) -> Value<String> {
831 Self::Number(x) => Value::Number(x),
832 Self::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
837 struct ZlibDecodeMultiple<R>
841 reader: Option<ZlibDecoder<R>>,
844 impl<R> ZlibDecodeMultiple<R>
848 fn new(reader: R) -> ZlibDecodeMultiple<R> {
850 reader: Some(ZlibDecoder::new(reader)),
855 impl<R> Read for ZlibDecodeMultiple<R>
859 fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
861 match self.reader.as_mut().unwrap().read(buf)? {
863 let inner = self.reader.take().unwrap().into_inner();
864 self.reader = Some(ZlibDecoder::new(inner));
872 impl<R> Seek for ZlibDecodeMultiple<R>
876 fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
877 self.reader.as_mut().unwrap().get_mut().seek(pos)
886 ztrailer_offset: u64,
895 R: Read + Seek + 'static,
898 warn: Box<dyn Fn(Warning)>,
900 header: HeaderRecord<RawString>,
901 var_types: Vec<VarType>,
908 R: Read + Seek + 'static,
910 pub fn new<F>(mut reader: R, warn: F) -> Result<Self, Error>
912 F: Fn(Warning) + 'static,
914 let header = HeaderRecord::read(&mut reader)?;
916 reader: Some(reader),
917 warn: Box::new(warn),
919 var_types: Vec::new(),
920 state: ReaderState::Start,
923 fn cases(&mut self) -> Cases {
924 self.state = ReaderState::End;
926 self.reader.take().unwrap(),
927 take(&mut self.var_types),
931 fn _next(&mut self) -> Option<<Self as Iterator>::Item> {
933 ReaderState::Start => {
934 self.state = ReaderState::Headers;
935 Some(Ok(Record::Header(self.header.clone())))
937 ReaderState::Headers => {
940 self.reader.as_mut().unwrap(),
942 self.var_types.as_slice(),
945 Ok(Some(record)) => break record,
947 Err(error) => return Some(Err(error)),
951 Record::Variable(VariableRecord { width, .. }) => {
952 self.var_types.push(if width == 0 {
958 Record::EndOfHeaders(_) => {
959 self.state = if let Some(Compression::ZLib) = self.header.compression {
960 ReaderState::ZlibHeader
969 ReaderState::ZlibHeader => {
970 let zheader = match ZHeader::read(self.reader.as_mut().unwrap(), self.header.endian)
972 Ok(zheader) => zheader,
973 Err(error) => return Some(Err(error)),
975 self.state = ReaderState::ZlibTrailer {
976 ztrailer_offset: zheader.ztrailer_offset,
977 ztrailer_len: zheader.ztrailer_len,
979 Some(Ok(Record::ZHeader(zheader)))
981 ReaderState::ZlibTrailer {
985 match ZTrailer::read(
986 self.reader.as_mut().unwrap(),
991 Ok(None) => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
992 Ok(Some(ztrailer)) => Some(Ok(Record::ZTrailer(ztrailer))),
993 Err(error) => Some(Err(error)),
996 ReaderState::Cases => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
997 ReaderState::End => None,
1002 impl<R> Iterator for Reader<R>
1004 R: Read + Seek + 'static,
1006 type Item = Result<Record, Error>;
1008 fn next(&mut self) -> Option<Self::Item> {
1009 let retval = self._next();
1010 if matches!(retval, Some(Err(_))) {
1011 self.state = ReaderState::End;
1017 trait ReadSeek: Read + Seek {}
1018 impl<T> ReadSeek for T where T: Read + Seek {}
1021 reader: Box<dyn ReadSeek>,
1022 var_types: Vec<VarType>,
1023 compression: Option<Compression>,
1026 codes: VecDeque<u8>,
1030 impl Debug for Cases {
1031 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1037 fn new<R>(reader: R, var_types: Vec<VarType>, header: &HeaderRecord<RawString>) -> Self
1039 R: Read + Seek + 'static,
1042 reader: if header.compression == Some(Compression::ZLib) {
1043 Box::new(ZlibDecodeMultiple::new(reader))
1048 compression: header.compression,
1050 endian: header.endian,
1051 codes: VecDeque::with_capacity(8),
1057 impl Iterator for Cases {
1058 type Item = Result<Vec<RawValue>, Error>;
1060 fn next(&mut self) -> Option<Self::Item> {
1065 let retval = if self.compression.is_some() {
1066 Value::read_compressed_case(
1075 Value::read_case(&mut self.reader, &self.var_types, self.endian).transpose()
1077 self.eof = matches!(retval, None | Some(Err(_)));
1082 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
1083 pub struct Spec(pub u32);
1085 impl Debug for Spec {
1086 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1087 let type_ = format_name(self.0 >> 16);
1088 let w = (self.0 >> 8) & 0xff;
1089 let d = self.0 & 0xff;
1090 write!(f, "{:06x} ({type_}{w}.{d})", self.0)
1094 fn format_name(type_: u32) -> Cow<'static, str> {
1133 _ => return format!("<unknown format {type_}>").into(),
1139 pub struct MissingValues<S = String>
1143 /// Individual missing values, up to 3 of them.
1144 pub values: Vec<Value<S>>,
1146 /// Optional range of missing values.
1147 pub range: Option<(Value<S>, Value<S>)>,
1150 impl<S> Debug for MissingValues<S>
1154 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1155 for (i, value) in self.values.iter().enumerate() {
1159 write!(f, "{value:?}")?;
1162 if let Some((low, high)) = &self.range {
1163 if !self.values.is_empty() {
1166 write!(f, "{low:?} THRU {high:?}")?;
1169 if self.is_empty() {
1177 impl<S> MissingValues<S>
1181 fn is_empty(&self) -> bool {
1182 self.values.is_empty() && self.range.is_none()
1186 impl<S> Default for MissingValues<S>
1190 fn default() -> Self {
1198 impl MissingValues<RawStr<8>> {
1199 fn read<R: Read + Seek>(
1205 ) -> Result<Self, Error> {
1206 let (n_values, has_range) = match (width, code) {
1207 (_, 0..=3) => (code, false),
1208 (0, -2) => (0, true),
1209 (0, -3) => (1, true),
1210 (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }),
1211 (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
1214 let var_type = if width == 0 {
1220 let mut values = Vec::new();
1221 for _ in 0..n_values {
1222 values.push(RawValue::read(r, var_type, endian)?);
1224 let range = if has_range {
1225 let low = RawValue::read(r, var_type, endian)?;
1226 let high = RawValue::read(r, var_type, endian)?;
1231 Ok(Self { values, range })
1233 fn decode(&self, decoder: &Decoder) -> MissingValues<String> {
1238 .map(|value| value.decode(decoder))
1243 .map(|(low, high)| (low.decode(decoder), high.decode(decoder))),
1249 pub struct VariableRecord<S, V>
1254 /// Range of offsets in file.
1255 pub offsets: Range<u64>,
1257 /// Variable width, in the range -1..=255.
1260 /// Variable name, padded on the right with spaces.
1264 pub print_format: Spec,
1267 pub write_format: Spec,
1270 pub missing_values: MissingValues<V>,
1272 /// Optional variable label.
1273 pub label: Option<S>,
1276 impl<S, V> Debug for VariableRecord<S, V>
1281 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1286 match self.width.cmp(&0) {
1287 Ordering::Greater => "string",
1288 Ordering::Equal => "numeric",
1289 Ordering::Less => "long string continuation record",
1292 writeln!(f, "Print format: {:?}", self.print_format)?;
1293 writeln!(f, "Write format: {:?}", self.write_format)?;
1294 writeln!(f, "Name: {:?}", &self.name)?;
1295 writeln!(f, "Variable label: {:?}", self.label)?;
1296 writeln!(f, "Missing values: {:?}", self.missing_values)
1300 impl VariableRecord<RawString, RawStr<8>> {
1301 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
1302 let start_offset = r.stream_position()?;
1303 let width: i32 = endian.parse(read_bytes(r)?);
1304 if !(-1..=255).contains(&width) {
1305 return Err(Error::BadVariableWidth { start_offset, width });
1307 let code_offset = r.stream_position()?;
1308 let has_variable_label: u32 = endian.parse(read_bytes(r)?);
1309 let missing_value_code: i32 = endian.parse(read_bytes(r)?);
1310 let print_format = Spec(endian.parse(read_bytes(r)?));
1311 let write_format = Spec(endian.parse(read_bytes(r)?));
1312 let name = RawString(read_vec(r, 8)?);
1314 let label = match has_variable_label {
1317 let len: u32 = endian.parse(read_bytes(r)?);
1318 let read_len = len.min(65535) as usize;
1319 let label = RawString(read_vec(r, read_len)?);
1321 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
1322 let _ = read_vec(r, padding_bytes as usize)?;
1327 return Err(Error::BadVariableLabelCode {
1330 code: has_variable_label,
1335 let missing_values =
1336 MissingValues::read(r, start_offset, width, missing_value_code, endian)?;
1338 let end_offset = r.stream_position()?;
1340 Ok(Record::Variable(VariableRecord {
1341 offsets: start_offset..end_offset,
1351 pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
1352 DecodedRecord::Variable(VariableRecord {
1353 offsets: self.offsets.clone(),
1355 name: decoder.decode(&self.name).to_string(),
1356 print_format: self.print_format,
1357 write_format: self.write_format,
1358 missing_values: self.missing_values.decode(decoder),
1362 .map(|label| decoder.decode(label).to_string()),
1367 #[derive(Copy, Clone)]
1368 pub struct UntypedValue(pub [u8; 8]);
1370 impl Debug for UntypedValue {
1371 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1372 let little: f64 = Endian::Little.parse(self.0);
1373 let little = format!("{:?}", little);
1374 let big: f64 = Endian::Big.parse(self.0);
1375 let big = format!("{:?}", big);
1376 let number = if little.len() <= big.len() {
1381 write!(f, "{number}")?;
1383 let string = default_decode(&self.0);
1385 .split(|c: char| c == '\0' || c.is_control())
1388 write!(f, "{string:?}")?;
1394 pub struct RawString(pub Vec<u8>);
1396 impl From<Vec<u8>> for RawString {
1397 fn from(source: Vec<u8>) -> Self {
1402 impl From<&[u8]> for RawString {
1403 fn from(source: &[u8]) -> Self {
1408 impl Debug for RawString {
1409 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1410 write!(f, "{:?}", default_decode(self.0.as_slice()))
1414 #[derive(Copy, Clone)]
1415 pub struct RawStr<const N: usize>(pub [u8; N]);
1417 impl<const N: usize> From<[u8; N]> for RawStr<N> {
1418 fn from(source: [u8; N]) -> Self {
1423 impl<const N: usize> Debug for RawStr<N> {
1424 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1425 write!(f, "{:?}", default_decode(&self.0))
1429 #[derive(Clone, Debug)]
1430 pub struct ValueLabel<V, S>
1435 pub value: Value<V>,
1440 pub struct ValueLabelRecord<V, S>
1445 /// Range of offsets in file.
1446 pub offsets: Range<u64>,
1449 pub labels: Vec<ValueLabel<V, S>>,
1451 /// The 1-based indexes of the variable indexes.
1452 pub dict_indexes: Vec<u32>,
1454 /// The types of the variables.
1455 pub var_type: VarType,
1458 impl<V, S> Debug for ValueLabelRecord<V, S>
1463 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1464 writeln!(f, "labels: ")?;
1465 for label in self.labels.iter() {
1466 writeln!(f, "{label:?}")?;
1468 write!(f, "apply to {} variables", self.var_type)?;
1469 for dict_index in self.dict_indexes.iter() {
1470 write!(f, " #{dict_index}")?;
1476 impl<V, S> Header for ValueLabelRecord<V, S>
1481 fn offsets(&self) -> Range<u64> {
1482 self.offsets.clone()
1486 impl<V, S> ValueLabelRecord<V, S>
1491 /// Maximum number of value labels in a record.
1492 pub const MAX_LABELS: u32 = u32::MAX / 8;
1494 /// Maximum number of variable indexes in a record.
1495 pub const MAX_INDEXES: u32 = u32::MAX / 8;
1498 impl ValueLabelRecord<RawStr<8>, RawString> {
1499 fn read<R: Read + Seek>(
1502 var_types: &[VarType],
1503 warn: &dyn Fn(Warning),
1504 ) -> Result<Option<Record>, Error> {
1505 let label_offset = r.stream_position()?;
1506 let n: u32 = endian.parse(read_bytes(r)?);
1507 if n > Self::MAX_LABELS {
1508 return Err(Error::BadNumberOfValueLabels {
1509 offset: label_offset,
1511 max: Self::MAX_LABELS,
1515 let mut labels = Vec::new();
1517 let value = UntypedValue(read_bytes(r)?);
1518 let label_len: u8 = endian.parse(read_bytes(r)?);
1519 let label_len = label_len as usize;
1520 let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
1522 let mut label = read_vec(r, padded_len - 1)?;
1523 label.truncate(label_len);
1524 labels.push((value, RawString(label)));
1527 let index_offset = r.stream_position()?;
1528 let rec_type: u32 = endian.parse(read_bytes(r)?);
1530 return Err(Error::ExpectedVarIndexRecord {
1531 offset: index_offset,
1536 let n: u32 = endian.parse(read_bytes(r)?);
1537 if n > Self::MAX_INDEXES {
1538 return Err(Error::TooManyVarIndexes {
1539 offset: index_offset,
1541 max: Self::MAX_INDEXES,
1545 let index_offset = r.stream_position()?;
1546 let mut dict_indexes = Vec::with_capacity(n as usize);
1547 let mut invalid_indexes = Vec::new();
1549 let index: u32 = endian.parse(read_bytes(r)?);
1550 if index == 0 || index as usize > var_types.len() {
1551 dict_indexes.push(index);
1553 invalid_indexes.push(index);
1556 if !invalid_indexes.is_empty() {
1557 warn(Warning::InvalidVarIndexes {
1558 offset: index_offset,
1559 max: var_types.len(),
1560 invalid: invalid_indexes,
1564 let Some(&first_index) = dict_indexes.first() else {
1565 warn(Warning::NoVarIndexes {
1566 offset: index_offset,
1570 let var_type = var_types[first_index as usize - 1];
1571 let mut wrong_type_indexes = Vec::new();
1572 dict_indexes.retain(|&index| {
1573 if var_types[index as usize - 1] != var_type {
1574 wrong_type_indexes.push(index);
1580 if !wrong_type_indexes.is_empty() {
1581 warn(Warning::MixedVarTypes {
1582 offset: index_offset,
1584 wrong_types: wrong_type_indexes,
1590 .map(|(value, label)| ValueLabel {
1591 value: Value::from_raw(&value, var_type, endian),
1596 let end_offset = r.stream_position()?;
1597 Ok(Some(Record::ValueLabel(ValueLabelRecord {
1598 offsets: label_offset..end_offset,
1605 fn decode(self, decoder: &Decoder) -> ValueLabelRecord<RawStr<8>, String> {
1609 .map(|ValueLabel { value, label }| ValueLabel {
1611 label: decoder.decode(label).to_string(),
1615 offsets: self.offsets.clone(),
1617 dict_indexes: self.dict_indexes.clone(),
1618 var_type: self.var_type,
1623 #[derive(Clone, Debug)]
1624 pub struct DocumentRecord<S>
1628 pub offsets: Range<u64>,
1630 /// The document, as an array of lines. Raw lines are exactly 80 bytes long
1631 /// and are right-padded with spaces without any new-line termination.
1635 pub type RawDocumentLine = RawStr<DOC_LINE_LEN>;
1637 /// Length of a line in a document. Document lines are fixed-length and
1638 /// padded on the right with spaces.
1639 pub const DOC_LINE_LEN: usize = 80;
1641 impl DocumentRecord<RawDocumentLine> {
1642 /// Maximum number of lines we will accept in a document. This is simply
1643 /// the maximum number that will fit in a 32-bit space.
1644 pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
1646 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
1647 let start_offset = r.stream_position()?;
1648 let n: u32 = endian.parse(read_bytes(r)?);
1650 if n > Self::MAX_LINES {
1651 Err(Error::BadDocumentLength {
1652 offset: start_offset,
1654 max: Self::MAX_LINES,
1657 let mut lines = Vec::with_capacity(n);
1659 lines.push(RawStr(read_bytes(r)?));
1661 let end_offset = r.stream_position()?;
1662 Ok(Record::Document(DocumentRecord {
1663 offsets: start_offset..end_offset,
1669 pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
1670 DecodedRecord::Document(DocumentRecord {
1671 offsets: self.offsets.clone(),
1675 .map(|s| decoder.decode_slice(&s.0).to_string())
1681 impl<S> Header for DocumentRecord<S>
1685 fn offsets(&self) -> Range<u64> {
1686 self.offsets.clone()
1690 trait ExtensionRecord {
1692 const SIZE: Option<u32>;
1693 const COUNT: Option<u32>;
1694 const NAME: &'static str;
1695 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning>;
1698 #[derive(Clone, Debug)]
1699 pub struct IntegerInfoRecord {
1700 pub offsets: Range<u64>,
1701 pub version: (i32, i32, i32),
1702 pub machine_code: i32,
1703 pub floating_point_rep: i32,
1704 pub compression_code: i32,
1705 pub endianness: i32,
1706 pub character_code: i32,
1709 impl ExtensionRecord for IntegerInfoRecord {
1710 const SUBTYPE: u32 = 3;
1711 const SIZE: Option<u32> = Some(4);
1712 const COUNT: Option<u32> = Some(8);
1713 const NAME: &'static str = "integer record";
1715 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
1716 ext.check_size::<Self>()?;
1718 let mut input = &ext.data[..];
1719 let data: Vec<i32> = (0..8)
1720 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1722 Ok(Record::IntegerInfo(IntegerInfoRecord {
1723 offsets: ext.offsets.clone(),
1724 version: (data[0], data[1], data[2]),
1725 machine_code: data[3],
1726 floating_point_rep: data[4],
1727 compression_code: data[5],
1728 endianness: data[6],
1729 character_code: data[7],
1734 #[derive(Clone, Debug)]
1735 pub struct FloatInfoRecord {
1741 impl ExtensionRecord for FloatInfoRecord {
1742 const SUBTYPE: u32 = 4;
1743 const SIZE: Option<u32> = Some(8);
1744 const COUNT: Option<u32> = Some(3);
1745 const NAME: &'static str = "floating point record";
1747 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
1748 ext.check_size::<Self>()?;
1750 let mut input = &ext.data[..];
1751 let data: Vec<f64> = (0..3)
1752 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1754 Ok(Record::FloatInfo(FloatInfoRecord {
1762 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1763 pub enum CategoryLabels {
1768 #[derive(Clone, Debug)]
1769 pub enum MultipleResponseType {
1772 labels: CategoryLabels,
1777 impl MultipleResponseType {
1778 fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Warning> {
1779 let (mr_type, input) = match input.split_first() {
1780 Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input),
1781 Some((b'D', input)) => {
1782 let (value, input) = parse_counted_string(input)?;
1784 MultipleResponseType::MultipleDichotomy {
1786 labels: CategoryLabels::VarLabels,
1791 Some((b'E', input)) => {
1792 let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
1793 (CategoryLabels::CountedValues, rest)
1794 } else if let Some(rest) = input.strip_prefix(b" 11 ") {
1795 (CategoryLabels::VarLabels, rest)
1797 return Err(Warning::TBD);
1799 let (value, input) = parse_counted_string(input)?;
1801 MultipleResponseType::MultipleDichotomy { value, labels },
1805 _ => return Err(Warning::TBD),
1807 Ok((mr_type, input))
1811 #[derive(Clone, Debug)]
1812 pub struct MultipleResponseSet<I, S>
1819 pub mr_type: MultipleResponseType,
1820 pub short_names: Vec<I>,
1823 impl MultipleResponseSet<RawString, RawString> {
1824 fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> {
1825 let Some(equals) = input.iter().position(|&b| b == b'=') else {
1826 return Err(Warning::TBD);
1828 let (name, input) = input.split_at(equals);
1829 let (mr_type, input) = MultipleResponseType::parse(input)?;
1830 let Some(input) = input.strip_prefix(b" ") else {
1831 return Err(Warning::TBD);
1833 let (label, mut input) = parse_counted_string(input)?;
1834 let mut vars = Vec::new();
1835 while input.first() != Some(&b'\n') {
1836 match input.split_first() {
1837 Some((b' ', rest)) => {
1838 let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else {
1839 return Err(Warning::TBD);
1841 let (var, rest) = rest.split_at(length);
1842 if !var.is_empty() {
1843 vars.push(var.into());
1847 _ => return Err(Warning::TBD),
1850 while input.first() == Some(&b'\n') {
1851 input = &input[1..];
1854 MultipleResponseSet {
1867 ) -> Result<MultipleResponseSet<Identifier, String>, Warning> {
1868 let mut short_names = Vec::with_capacity(self.short_names.len());
1869 for short_name in self.short_names.iter() {
1870 if let Some(short_name) = decoder
1871 .decode_identifier(short_name)
1872 .map_err(Warning::InvalidMrSetName)
1873 .issue_warning(&decoder.warn)
1875 short_names.push(short_name);
1878 Ok(MultipleResponseSet {
1880 .decode_identifier(&self.name)
1881 .map_err(Warning::InvalidMrSetVariableName)?,
1882 label: decoder.decode(&self.label).to_string(),
1883 mr_type: self.mr_type.clone(),
1889 #[derive(Clone, Debug)]
1890 pub struct MultipleResponseRecord<I, S>(pub Vec<MultipleResponseSet<I, S>>)
1895 impl ExtensionRecord for MultipleResponseRecord<RawString, RawString> {
1896 const SUBTYPE: u32 = 7;
1897 const SIZE: Option<u32> = Some(1);
1898 const COUNT: Option<u32> = None;
1899 const NAME: &'static str = "multiple response set record";
1901 fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
1902 ext.check_size::<Self>()?;
1904 let mut input = &ext.data[..];
1905 let mut sets = Vec::new();
1906 while !input.is_empty() {
1907 let (set, rest) = MultipleResponseSet::parse(input)?;
1911 Ok(Record::MultipleResponse(MultipleResponseRecord(sets)))
1915 impl MultipleResponseRecord<RawString, RawString> {
1916 fn decode(self, decoder: &Decoder) -> DecodedRecord {
1917 let mut sets = Vec::new();
1918 for set in self.0.iter() {
1919 if let Some(set) = set.decode(decoder).issue_warning(&decoder.warn) {
1923 DecodedRecord::MultipleResponse(MultipleResponseRecord(sets))
1927 fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> {
1928 let Some(space) = input.iter().position(|&b| b == b' ') else {
1929 return Err(Warning::TBD);
1931 let Ok(length) = from_utf8(&input[..space]) else {
1932 return Err(Warning::TBD);
1934 let Ok(length): Result<usize, _> = length.parse() else {
1935 return Err(Warning::TBD);
1938 let input = &input[space + 1..];
1939 if input.len() < length {
1940 return Err(Warning::TBD);
1943 let (string, rest) = input.split_at(length);
1944 Ok((string.into(), rest))
1947 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1955 pub fn default_for_type(var_type: VarType) -> Option<Measure> {
1957 VarType::Numeric => None,
1958 VarType::String => Some(Self::Nominal),
1962 fn try_decode(source: u32) -> Result<Option<Measure>, Warning> {
1965 1 => Ok(Some(Measure::Nominal)),
1966 2 => Ok(Some(Measure::Ordinal)),
1967 3 => Ok(Some(Measure::Scale)),
1968 _ => Err(Warning::InvalidMeasurement(source)),
1973 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1974 pub enum Alignment {
1981 fn try_decode(source: u32) -> Result<Option<Alignment>, Warning> {
1984 1 => Ok(Some(Alignment::Left)),
1985 2 => Ok(Some(Alignment::Right)),
1986 3 => Ok(Some(Alignment::Center)),
1987 _ => Err(Warning::InvalidAlignment(source)),
1991 pub fn default_for_type(var_type: VarType) -> Self {
1993 VarType::Numeric => Self::Right,
1994 VarType::String => Self::Left,
1999 #[derive(Clone, Debug)]
2000 pub struct VarDisplay {
2001 pub measure: Option<Measure>,
2002 pub width: Option<u32>,
2003 pub alignment: Option<Alignment>,
2006 #[derive(Clone, Debug)]
2007 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
2009 impl VarDisplayRecord {
2010 const SUBTYPE: u32 = 11;
2016 warn: &dyn Fn(Warning),
2017 ) -> Result<Record, Warning> {
2019 return Err(Warning::BadRecordSize {
2020 offset: ext.offsets.start,
2021 record: String::from("variable display record"),
2027 let has_width = if ext.count as usize == 3 * n_vars {
2029 } else if ext.count as usize == 2 * n_vars {
2032 return Err(Warning::TBD);
2035 let mut var_displays = Vec::new();
2036 let mut input = &ext.data[..];
2037 for _ in 0..n_vars {
2038 let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
2039 .issue_warning(&warn)
2041 let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap()));
2042 let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
2043 .issue_warning(&warn)
2045 var_displays.push(VarDisplay {
2051 Ok(Record::VarDisplay(VarDisplayRecord(var_displays)))
2055 #[derive(Clone, Debug)]
2056 pub struct LongStringMissingValues<N, V>
2065 pub missing_values: MissingValues<V>,
2068 impl LongStringMissingValues<RawString, RawStr<8>> {
2072 ) -> Result<LongStringMissingValues<Identifier, String>, IdError> {
2073 Ok(LongStringMissingValues {
2074 var_name: decoder.decode_identifier(&self.var_name)?,
2075 missing_values: self.missing_values.decode(decoder),
2080 #[derive(Clone, Debug)]
2081 pub struct LongStringMissingValueRecord<N, V>(pub Vec<LongStringMissingValues<N, V>>)
2086 impl ExtensionRecord for LongStringMissingValueRecord<RawString, RawStr<8>> {
2087 const SUBTYPE: u32 = 22;
2088 const SIZE: Option<u32> = Some(1);
2089 const COUNT: Option<u32> = None;
2090 const NAME: &'static str = "long string missing values record";
2092 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
2093 ext.check_size::<Self>()?;
2095 let mut input = &ext.data[..];
2096 let mut missing_value_set = Vec::new();
2097 while !input.is_empty() {
2098 let var_name = read_string(&mut input, endian)?;
2099 let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
2100 let value_len: u32 = endian.parse(read_bytes(&mut input)?);
2102 let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
2103 return Err(Warning::BadLongMissingValueLength {
2104 record_offset: ext.offsets.start,
2109 let mut values = Vec::new();
2110 for i in 0..n_missing_values {
2111 let value: [u8; 8] = read_bytes(&mut input)?;
2112 let numeric_value: u64 = endian.parse(value);
2113 let value = if i > 0 && numeric_value == 8 {
2114 // Tolerate files written by old, buggy versions of PSPP
2115 // where we believed that the value_length was repeated
2116 // before each missing value.
2117 read_bytes(&mut input)?
2121 values.push(Value::String(RawStr(value)));
2123 let missing_values = MissingValues {
2127 missing_value_set.push(LongStringMissingValues {
2132 Ok(Record::LongStringMissingValues(
2133 LongStringMissingValueRecord(missing_value_set),
2138 impl LongStringMissingValueRecord<RawString, RawStr<8>> {
2139 pub fn decode(self, decoder: &Decoder) -> LongStringMissingValueRecord<Identifier, String> {
2140 let mut mvs = Vec::with_capacity(self.0.len());
2141 for mv in self.0.iter() {
2142 if let Some(mv) = mv
2144 .map_err(Warning::InvalidLongStringMissingValueVariableName)
2145 .issue_warning(&decoder.warn)
2150 LongStringMissingValueRecord(mvs)
2154 #[derive(Clone, Debug)]
2155 pub struct EncodingRecord(pub String);
2157 impl ExtensionRecord for EncodingRecord {
2158 const SUBTYPE: u32 = 20;
2159 const SIZE: Option<u32> = Some(1);
2160 const COUNT: Option<u32> = None;
2161 const NAME: &'static str = "encoding record";
2163 fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
2164 ext.check_size::<Self>()?;
2166 Ok(Record::Encoding(EncodingRecord(
2167 String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName {
2168 offset: ext.offsets.start,
2174 #[derive(Clone, Debug)]
2175 pub struct NumberOfCasesRecord {
2176 /// Always observed as 1.
2179 /// Number of cases.
2183 impl ExtensionRecord for NumberOfCasesRecord {
2184 const SUBTYPE: u32 = 16;
2185 const SIZE: Option<u32> = Some(8);
2186 const COUNT: Option<u32> = Some(2);
2187 const NAME: &'static str = "extended number of cases record";
2189 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
2190 ext.check_size::<Self>()?;
2192 let mut input = &ext.data[..];
2193 let one = endian.parse(read_bytes(&mut input)?);
2194 let n_cases = endian.parse(read_bytes(&mut input)?);
2196 Ok(Record::NumberOfCases(NumberOfCasesRecord { one, n_cases }))
2200 #[derive(Clone, Debug)]
2201 pub struct TextRecord {
2202 pub offsets: Range<u64>,
2205 pub rec_type: TextRecordType,
2207 /// The text content of the record.
2208 pub text: RawString,
2211 #[derive(Clone, Copy, Debug)]
2212 pub enum TextRecordType {
2222 fn new(extension: Extension, rec_type: TextRecordType) -> Self {
2224 offsets: extension.offsets,
2226 text: extension.data.into(),
2229 pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
2230 match self.rec_type {
2231 TextRecordType::VariableSets => {
2232 DecodedRecord::VariableSets(VariableSetRecord::decode(&self, decoder))
2234 TextRecordType::ProductInfo => {
2235 DecodedRecord::ProductInfo(ProductInfoRecord::decode(&self, decoder))
2237 TextRecordType::LongNames => {
2238 DecodedRecord::LongNames(LongNamesRecord::decode(&self, decoder))
2240 TextRecordType::VeryLongStrings => {
2241 DecodedRecord::VeryLongStrings(VeryLongStringsRecord::decode(&self, decoder))
2243 TextRecordType::FileAttributes => {
2244 DecodedRecord::FileAttributes(FileAttributeRecord::decode(&self, decoder))
2246 TextRecordType::VariableAttributes => {
2247 DecodedRecord::VariableAttributes(VariableAttributeRecord::decode(&self, decoder))
2253 #[derive(Clone, Debug)]
2254 pub struct VeryLongString {
2255 pub short_name: Identifier,
2259 impl VeryLongString {
2260 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Warning> {
2261 let Some((short_name, length)) = input.split_once('=') else {
2262 return Err(Warning::TBD);
2264 let short_name = decoder
2265 .new_identifier(short_name)
2266 .map_err(Warning::InvalidLongStringName)?;
2267 let length = length.parse().map_err(|_| Warning::TBD)?;
2268 Ok(VeryLongString { short_name, length })
2272 #[derive(Clone, Debug)]
2273 pub struct VeryLongStringsRecord(Vec<VeryLongString>);
2275 impl VeryLongStringsRecord {
2276 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2277 let input = decoder.decode(&source.text);
2278 let mut very_long_strings = Vec::new();
2281 .map(|s| s.trim_end_matches('\t'))
2282 .filter(|s| !s.is_empty())
2284 if let Some(vls) = VeryLongString::parse(decoder, tuple).issue_warning(&decoder.warn) {
2285 very_long_strings.push(vls)
2288 VeryLongStringsRecord(very_long_strings)
2292 #[derive(Clone, Debug)]
2293 pub struct Attribute {
2294 pub name: Identifier,
2295 pub values: Vec<String>,
2299 fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> {
2300 let Some((name, mut input)) = input.split_once('(') else {
2301 return Err(Warning::TBD);
2304 .new_identifier(name)
2305 .map_err(Warning::InvalidAttributeName)?;
2306 let mut values = Vec::new();
2308 let Some((value, rest)) = input.split_once('\n') else {
2309 return Err(Warning::TBD);
2311 if let Some(stripped) = value
2313 .and_then(|value| value.strip_suffix('\''))
2315 values.push(stripped.into());
2317 decoder.warn(Warning::TBD);
2318 values.push(value.into());
2320 if let Some(rest) = rest.strip_prefix(')') {
2321 let attribute = Attribute { name, values };
2322 return Ok((attribute, rest));
2329 #[derive(Clone, Debug, Default)]
2330 pub struct AttributeSet(pub HashMap<Identifier, Vec<String>>);
2336 sentinel: Option<char>,
2337 ) -> Result<(AttributeSet, &'a str), Warning> {
2338 let mut attributes = HashMap::new();
2340 match input.chars().next() {
2341 None => break input,
2342 c if c == sentinel => break &input[1..],
2344 let (attribute, rest) = Attribute::parse(decoder, input)?;
2345 // XXX report duplicate name
2346 attributes.insert(attribute.name, attribute.values);
2351 Ok((AttributeSet(attributes), rest))
2355 #[derive(Clone, Debug, Default)]
2356 pub struct FileAttributeRecord(pub AttributeSet);
2358 impl FileAttributeRecord {
2359 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2360 let input = decoder.decode(&source.text);
2361 match AttributeSet::parse(decoder, &input, None).issue_warning(&decoder.warn) {
2362 Some((set, rest)) => {
2363 if !rest.is_empty() {
2364 decoder.warn(Warning::TBD);
2366 FileAttributeRecord(set)
2368 None => FileAttributeRecord::default(),
2373 #[derive(Clone, Debug)]
2374 pub struct VarAttributeSet {
2375 pub long_var_name: Identifier,
2376 pub attributes: AttributeSet,
2379 impl VarAttributeSet {
2380 fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(VarAttributeSet, &'a str), Warning> {
2381 let Some((long_var_name, rest)) = input.split_once(':') else {
2382 return Err(Warning::TBD);
2384 let long_var_name = decoder
2385 .new_identifier(long_var_name)
2386 .map_err(Warning::InvalidAttributeVariableName)?;
2387 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'))?;
2388 let var_attribute = VarAttributeSet {
2392 Ok((var_attribute, rest))
2396 #[derive(Clone, Debug)]
2397 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
2399 impl VariableAttributeRecord {
2400 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2401 let decoded = decoder.decode(&source.text);
2402 let mut input = decoded.as_ref();
2403 let mut var_attribute_sets = Vec::new();
2404 while !input.is_empty() {
2405 let Some((var_attribute, rest)) =
2406 VarAttributeSet::parse(decoder, input).issue_warning(&decoder.warn)
2410 var_attribute_sets.push(var_attribute);
2413 VariableAttributeRecord(var_attribute_sets)
2417 #[derive(Clone, Debug)]
2418 pub struct LongName {
2419 pub short_name: Identifier,
2420 pub long_name: Identifier,
2424 fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
2425 let Some((short_name, long_name)) = input.split_once('=') else {
2426 return Err(Warning::TBD);
2428 let short_name = decoder
2429 .new_identifier(short_name)
2430 .map_err(Warning::InvalidShortName)?;
2431 let long_name = decoder
2432 .new_identifier(long_name)
2433 .map_err(Warning::InvalidLongName)?;
2441 #[derive(Clone, Debug)]
2442 pub struct LongNamesRecord(Vec<LongName>);
2444 impl LongNamesRecord {
2445 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2446 let input = decoder.decode(&source.text);
2447 let mut names = Vec::new();
2448 for pair in input.split('\t').filter(|s| !s.is_empty()) {
2449 if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&decoder.warn) {
2450 names.push(long_name);
2453 LongNamesRecord(names)
2457 #[derive(Clone, Debug)]
2458 pub struct ProductInfoRecord(pub String);
2460 impl ProductInfoRecord {
2461 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2462 Self(decoder.decode(&source.text).into())
2465 #[derive(Clone, Debug)]
2466 pub struct VariableSet {
2468 pub vars: Vec<Identifier>,
2472 fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
2473 let (name, input) = input.split_once('=').ok_or(Warning::TBD)?;
2474 let mut vars = Vec::new();
2475 for var in input.split_ascii_whitespace() {
2476 if let Some(identifier) = decoder
2477 .new_identifier(var)
2478 .map_err(Warning::InvalidVariableSetName)
2479 .issue_warning(&decoder.warn)
2481 vars.push(identifier);
2491 #[derive(Clone, Debug)]
2492 pub struct VariableSetRecord {
2493 pub offsets: Range<u64>,
2494 pub sets: Vec<VariableSet>,
2497 impl VariableSetRecord {
2498 fn decode(source: &TextRecord, decoder: &Decoder) -> VariableSetRecord {
2499 let mut sets = Vec::new();
2500 let input = decoder.decode(&source.text);
2501 for line in input.lines() {
2502 if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&decoder.warn) {
2507 offsets: source.offsets.clone(),
2513 trait IssueWarning<T> {
2514 fn issue_warning<F>(self, warn: &F) -> Option<T>
2518 impl<T> IssueWarning<T> for Result<T, Warning> {
2519 fn issue_warning<F>(self, warn: &F) -> Option<T>
2524 Ok(result) => Some(result),
2533 #[derive(Clone, Debug)]
2534 pub struct Extension {
2535 pub offsets: Range<u64>,
2540 /// Size of each data element.
2543 /// Number of data elements.
2546 /// `size * count` bytes of data.
2551 fn check_size<E: ExtensionRecord>(&self) -> Result<(), Warning> {
2552 if let Some(expected_size) = E::SIZE {
2553 if self.size != expected_size {
2554 return Err(Warning::BadRecordSize {
2555 offset: self.offsets.start,
2556 record: E::NAME.into(),
2562 if let Some(expected_count) = E::COUNT {
2563 if self.count != expected_count {
2564 return Err(Warning::BadRecordCount {
2565 offset: self.offsets.start,
2566 record: E::NAME.into(),
2575 fn read<R: Read + Seek>(
2579 warn: &dyn Fn(Warning),
2580 ) -> Result<Option<Record>, Error> {
2581 let subtype = endian.parse(read_bytes(r)?);
2582 let header_offset = r.stream_position()?;
2583 let size: u32 = endian.parse(read_bytes(r)?);
2584 let count = endian.parse(read_bytes(r)?);
2585 let Some(product) = size.checked_mul(count) else {
2586 return Err(Error::ExtensionRecordTooLarge {
2587 offset: header_offset,
2593 let start_offset = r.stream_position()?;
2594 let data = read_vec(r, product as usize)?;
2595 let end_offset = start_offset + product as u64;
2596 let extension = Extension {
2597 offsets: start_offset..end_offset,
2603 let result = match subtype {
2604 IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian),
2605 FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian),
2606 VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, n_vars, endian, warn),
2607 MultipleResponseRecord::SUBTYPE | 19 => {
2608 MultipleResponseRecord::parse(&extension, endian)
2610 LongStringValueLabelRecord::SUBTYPE => {
2611 LongStringValueLabelRecord::parse(&extension, endian)
2613 EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian),
2614 NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian),
2615 5 => Ok(Record::Text(TextRecord::new(
2617 TextRecordType::VariableSets,
2619 10 => Ok(Record::Text(TextRecord::new(
2621 TextRecordType::ProductInfo,
2623 13 => Ok(Record::Text(TextRecord::new(
2625 TextRecordType::LongNames,
2627 14 => Ok(Record::Text(TextRecord::new(
2629 TextRecordType::VeryLongStrings,
2631 17 => Ok(Record::Text(TextRecord::new(
2633 TextRecordType::FileAttributes,
2635 18 => Ok(Record::Text(TextRecord::new(
2637 TextRecordType::VariableAttributes,
2639 _ => Ok(Record::OtherExtension(extension)),
2642 Ok(result) => Ok(Some(result)),
2651 #[derive(Clone, Debug)]
2652 pub struct ZHeader {
2653 /// File offset to the start of the record.
2656 /// File offset to the ZLIB data header.
2657 pub zheader_offset: u64,
2659 /// File offset to the ZLIB trailer.
2660 pub ztrailer_offset: u64,
2662 /// Length of the ZLIB trailer in bytes.
2663 pub ztrailer_len: u64,
2667 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
2668 let offset = r.stream_position()?;
2669 let zheader_offset: u64 = endian.parse(read_bytes(r)?);
2670 let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
2671 let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
2682 #[derive(Clone, Debug)]
2683 pub struct ZTrailer {
2684 /// File offset to the start of the record.
2687 /// Compression bias as a negative integer, e.g. -100.
2690 /// Always observed as zero.
2693 /// Uncompressed size of each block, except possibly the last. Only
2694 /// `0x3ff000` has been observed so far.
2695 pub block_size: u32,
2697 /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
2698 pub blocks: Vec<ZBlock>,
2701 #[derive(Clone, Debug)]
2703 /// Offset of block of data if simple compression were used.
2704 pub uncompressed_ofs: u64,
2706 /// Actual offset within the file of the compressed data block.
2707 pub compressed_ofs: u64,
2709 /// The number of bytes in this data block after decompression. This is
2710 /// `block_size` in every data block but the last, which may be smaller.
2711 pub uncompressed_size: u32,
2713 /// The number of bytes in this data block, as stored compressed in this
2715 pub compressed_size: u32,
2719 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZBlock, Error> {
2721 uncompressed_ofs: endian.parse(read_bytes(r)?),
2722 compressed_ofs: endian.parse(read_bytes(r)?),
2723 uncompressed_size: endian.parse(read_bytes(r)?),
2724 compressed_size: endian.parse(read_bytes(r)?),
2730 fn read<R: Read + Seek>(
2735 ) -> Result<Option<ZTrailer>, Error> {
2736 let start_offset = reader.stream_position()?;
2737 if reader.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
2740 let int_bias = endian.parse(read_bytes(reader)?);
2741 let zero = endian.parse(read_bytes(reader)?);
2742 let block_size = endian.parse(read_bytes(reader)?);
2743 let n_blocks: u32 = endian.parse(read_bytes(reader)?);
2744 let expected_n_blocks = (ztrailer_len - 24) / 24;
2745 if n_blocks as u64 != expected_n_blocks {
2746 return Err(Error::BadZlibTrailerNBlocks {
2747 offset: ztrailer_ofs,
2753 let blocks = (0..n_blocks)
2754 .map(|_| ZBlock::read(reader, endian))
2755 .collect::<Result<Vec<_>, _>>()?;
2756 reader.seek(SeekFrom::Start(start_offset))?;
2758 offset: ztrailer_ofs,
2767 fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
2768 let mut buf = [0; N];
2769 let n = r.read(&mut buf)?;
2772 r.read_exact(&mut buf[n..])?;
2780 fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
2781 let mut buf = [0; N];
2782 r.read_exact(&mut buf)?;
2786 fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
2787 let mut vec = vec![0; n];
2788 r.read_exact(&mut vec)?;
2792 fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<RawString, IoError> {
2793 let length: u32 = endian.parse(read_bytes(r)?);
2794 Ok(read_vec(r, length as usize)?.into())
2797 #[derive(Clone, Debug)]
2798 pub struct LongStringValueLabels<N, S>
2805 /// `(value, label)` pairs, where each value is `width` bytes.
2806 pub labels: Vec<(S, S)>,
2809 impl LongStringValueLabels<RawString, RawString> {
2813 ) -> Result<LongStringValueLabels<Identifier, String>, Warning> {
2814 let var_name = decoder.decode(&self.var_name);
2815 let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
2816 .map_err(Warning::InvalidLongStringValueLabelName)?;
2818 let mut labels = Vec::with_capacity(self.labels.len());
2819 for (value, label) in self.labels.iter() {
2820 let value = decoder.decode_exact_length(&value.0).to_string();
2821 let label = decoder.decode(label).to_string();
2822 labels.push((value, label));
2825 Ok(LongStringValueLabels {
2833 #[derive(Clone, Debug)]
2834 pub struct LongStringValueLabelRecord<N, S>(pub Vec<LongStringValueLabels<N, S>>)
2839 impl ExtensionRecord for LongStringValueLabelRecord<RawString, RawString> {
2840 const SUBTYPE: u32 = 21;
2841 const SIZE: Option<u32> = Some(1);
2842 const COUNT: Option<u32> = None;
2843 const NAME: &'static str = "long string value labels record";
2845 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
2846 ext.check_size::<Self>()?;
2848 let mut input = &ext.data[..];
2849 let mut label_set = Vec::new();
2850 while !input.is_empty() {
2851 let var_name = read_string(&mut input, endian)?;
2852 let width: u32 = endian.parse(read_bytes(&mut input)?);
2853 let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
2854 let mut labels = Vec::new();
2855 for _ in 0..n_labels {
2856 let value = read_string(&mut input, endian)?;
2857 let label = read_string(&mut input, endian)?;
2858 labels.push((value, label));
2860 label_set.push(LongStringValueLabels {
2866 Ok(Record::LongStringValueLabels(LongStringValueLabelRecord(
2872 impl LongStringValueLabelRecord<RawString, RawString> {
2873 fn decode(self, decoder: &Decoder) -> LongStringValueLabelRecord<Identifier, String> {
2874 let mut labels = Vec::with_capacity(self.0.len());
2875 for label in &self.0 {
2876 match label.decode(decoder) {
2877 Ok(set) => labels.push(set),
2878 Err(error) => decoder.warn(error),
2881 LongStringValueLabelRecord(labels)