3 encoding::{default_encoding, get_encoding, Error as EncodingError},
4 endian::{Endian, Parse, ToBytes},
5 identifier::{Error as IdError, Identifier},
8 use encoding_rs::{mem::decode_latin1, DecoderResult, Encoding};
9 use flate2::read::ZlibDecoder;
15 collections::{HashMap, VecDeque},
16 fmt::{Debug, Display, Formatter, Result as FmtResult},
17 io::{Error as IoError, Read, Seek, SeekFrom},
24 use thiserror::Error as ThisError;
26 #[derive(ThisError, Debug)]
28 #[error("Not an SPSS system file")]
31 #[error("Invalid magic number {0:?}")]
34 #[error("I/O error ({0})")]
37 #[error("Invalid SAV compression code {0}")]
38 InvalidSavCompression(u32),
40 #[error("Invalid ZSAV compression code {0}")]
41 InvalidZsavCompression(u32),
43 #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
44 BadDocumentLength { offset: u64, n: usize, max: usize },
46 #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
47 BadRecordType { offset: u64, rec_type: u32 },
49 #[error("In variable record starting at offset {start_offset:#x}, variable width is not in the valid range -1 to 255.")]
55 #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")]
56 BadVariableLabelCode {
63 "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
65 BadNumericMissingValueCode { offset: u64, code: i32 },
67 #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
68 BadStringMissingValueCode { offset: u64, code: i32 },
70 #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
71 BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
73 #[error("At offset {offset:#x}, following value label record, found record type {rec_type} instead of expected type 4 for variable index record")]
74 ExpectedVarIndexRecord { offset: u64, rec_type: u32 },
76 #[error("At offset {offset:#x}, number of variables indexes for value labels ({n}) is greater than the maximum number ({max}).")]
77 TooManyVarIndexes { offset: u64, n: u32, max: u32 },
79 #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
80 ExtensionRecordTooLarge {
87 #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
95 "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
97 EofInCompressedCase { offset: u64, case_ofs: u64 },
99 #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
100 PartialCompressedCase { offset: u64, case_ofs: u64 },
102 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
103 CompressedNumberExpected { offset: u64, case_ofs: u64 },
105 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
106 CompressedStringExpected { offset: u64, case_ofs: u64 },
108 #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
109 BadZlibTrailerNBlocks {
112 expected_n_blocks: u64,
117 EncodingError(EncodingError),
120 #[derive(ThisError, Debug)]
122 #[error("Unexpected end of data inside extension record.")]
125 #[error("At offset {offset:#x}, at least one valid variable index for value labels is required but none were specified.")]
126 NoVarIndexes { offset: u64 },
128 #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", var_type.opposite())]
132 wrong_types: Vec<u32>,
135 #[error("At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}]: {invalid:?}")]
142 #[error("At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}.")]
150 #[error("At offset {offset:#x}, {record} has bad count {count} instead of the expected {expected_count}.")]
158 #[error("In long string missing values record starting at offset {record_offset:#x}, value length at offset {offset:#x} is {value_len} instead of the expected 8.")]
159 BadLongMissingValueLength {
165 #[error("The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8.")]
166 BadEncodingName { offset: u64 },
168 // XXX This is risky because `text` might be arbitarily long.
169 #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
170 MalformedString { encoding: String, text: String },
172 #[error("Invalid variable measurement level value {0}")]
173 InvalidMeasurement(u32),
175 #[error("Invalid variable display alignment value {0}")]
176 InvalidAlignment(u32),
178 #[error("Invalid attribute name. {0}")]
179 InvalidAttributeName(IdError),
181 #[error("Invalid variable name in attribute record. {0}")]
182 InvalidAttributeVariableName(IdError),
184 #[error("Invalid short name in long variable name record. {0}")]
185 InvalidShortName(IdError),
187 #[error("Invalid name in long variable name record. {0}")]
188 InvalidLongName(IdError),
190 #[error("Invalid variable name in very long string record. {0}")]
191 InvalidLongStringName(IdError),
193 #[error("Invalid variable name in variable set record. {0}")]
194 InvalidVariableSetName(IdError),
196 #[error("Invalid multiple response set name. {0}")]
197 InvalidMrSetName(IdError),
199 #[error("Invalid multiple response set variable name. {0}")]
200 InvalidMrSetVariableName(IdError),
202 #[error("Invalid variable name in long string missing values record. {0}")]
203 InvalidLongStringMissingValueVariableName(IdError),
205 #[error("Invalid variable name in long string value label record. {0}")]
206 InvalidLongStringValueLabelName(IdError),
209 EncodingError(EncodingError),
211 #[error("Details TBD")]
215 impl From<IoError> for Warning {
216 fn from(_source: IoError) -> Self {
217 Self::UnexpectedEndOfData
221 #[derive(Clone, Debug)]
223 Header(HeaderRecord<RawString>),
224 Variable(VariableRecord<RawString, RawStr<8>>),
225 ValueLabel(ValueLabelRecord<RawStr<8>, RawString>),
226 Document(DocumentRecord<RawDocumentLine>),
227 IntegerInfo(IntegerInfoRecord),
228 FloatInfo(FloatInfoRecord),
229 VarDisplay(VarDisplayRecord),
230 MultipleResponse(MultipleResponseRecord<RawString, RawString>),
231 LongStringValueLabels(LongStringValueLabelRecord<RawString, RawString>),
232 LongStringMissingValues(LongStringMissingValueRecord<RawString, RawStr<8>>),
233 Encoding(EncodingRecord),
234 NumberOfCases(NumberOfCasesRecord),
236 OtherExtension(Extension),
240 Cases(Rc<RefCell<Cases>>),
243 #[derive(Clone, Debug)]
244 pub enum DecodedRecord {
245 Header(HeaderRecord<String>),
246 Variable(VariableRecord<String, String>),
247 ValueLabel(ValueLabelRecord<RawStr<8>, String>),
248 Document(DocumentRecord<String>),
249 IntegerInfo(IntegerInfoRecord),
250 FloatInfo(FloatInfoRecord),
251 VarDisplay(VarDisplayRecord),
252 MultipleResponse(MultipleResponseRecord<Identifier, String>),
253 LongStringValueLabels(LongStringValueLabelRecord<Identifier, String>),
254 LongStringMissingValues(LongStringMissingValueRecord<Identifier, String>),
255 Encoding(EncodingRecord),
256 NumberOfCases(NumberOfCasesRecord),
257 VariableSets(VariableSetRecord),
258 ProductInfo(ProductInfoRecord),
259 LongNames(LongNamesRecord),
260 VeryLongStrings(VeryLongStringsRecord),
261 FileAttributes(FileAttributeRecord),
262 VariableAttributes(VariableAttributeRecord),
263 OtherExtension(Extension),
267 Cases(Rc<RefCell<Cases>>),
274 var_types: &[VarType],
275 warn: &dyn Fn(Warning),
276 ) -> Result<Option<Record>, Error>
280 let rec_type: u32 = endian.parse(read_bytes(reader)?);
282 2 => Ok(Some(VariableRecord::read(reader, endian)?)),
283 3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
284 6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
285 7 => Extension::read(reader, endian, var_types.len(), warn),
286 999 => Ok(Some(Record::EndOfHeaders(
287 endian.parse(read_bytes(reader)?),
289 _ => Err(Error::BadRecordType {
290 offset: reader.stream_position()?,
296 pub fn decode(self, decoder: &Decoder) -> Result<DecodedRecord, Error> {
298 Record::Header(record) => record.decode(decoder),
299 Record::Variable(record) => record.decode(decoder),
300 Record::ValueLabel(record) => DecodedRecord::ValueLabel(record.decode(decoder)),
301 Record::Document(record) => record.decode(decoder),
302 Record::IntegerInfo(record) => DecodedRecord::IntegerInfo(record.clone()),
303 Record::FloatInfo(record) => DecodedRecord::FloatInfo(record.clone()),
304 Record::VarDisplay(record) => DecodedRecord::VarDisplay(record.clone()),
305 Record::MultipleResponse(record) => record.decode(decoder),
306 Record::LongStringValueLabels(record) => {
307 DecodedRecord::LongStringValueLabels(record.decode(decoder))
309 Record::LongStringMissingValues(record) => {
310 DecodedRecord::LongStringMissingValues(record.decode(decoder))
312 Record::Encoding(record) => DecodedRecord::Encoding(record.clone()),
313 Record::NumberOfCases(record) => DecodedRecord::NumberOfCases(record.clone()),
314 Record::Text(record) => record.decode(decoder),
315 Record::OtherExtension(record) => DecodedRecord::OtherExtension(record.clone()),
316 Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(record),
317 Record::ZHeader(record) => DecodedRecord::ZHeader(record.clone()),
318 Record::ZTrailer(record) => DecodedRecord::ZTrailer(record.clone()),
319 Record::Cases(record) => DecodedRecord::Cases(record.clone()),
324 pub fn encoding_from_headers(
325 headers: &Vec<Record>,
326 warn: &impl Fn(Warning),
327 ) -> Result<&'static Encoding, Error> {
328 let mut encoding_record = None;
329 let mut integer_info_record = None;
330 for record in headers {
332 Record::Encoding(record) => encoding_record = Some(record),
333 Record::IntegerInfo(record) => integer_info_record = Some(record),
337 let encoding = encoding_record.map(|record| record.0.as_str());
338 let character_code = integer_info_record.map(|record| record.character_code);
339 match get_encoding(encoding, character_code) {
340 Ok(encoding) => Ok(encoding),
341 Err(err @ EncodingError::Ebcdic) => Err(Error::EncodingError(err)),
343 warn(Warning::EncodingError(err));
344 // Warn that we're using the default encoding.
345 Ok(default_encoding())
350 // If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
351 // decoded as Latin-1 (actually bytes interpreted as Unicode code points).
352 fn default_decode(s: &[u8]) -> Cow<str> {
353 from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
356 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
357 pub enum Compression {
363 fn offsets(&self) -> Range<u64>;
367 pub struct HeaderRecord<S>
372 pub offsets: Range<u64>,
377 /// Eye-catcher string, product name, in the file's encoding. Padded
378 /// on the right with spaces.
381 /// Layout code, normally either 2 or 3.
382 pub layout_code: u32,
384 /// Number of variable positions, or `None` if the value in the file is
385 /// questionably trustworthy.
386 pub nominal_case_size: Option<u32>,
388 /// Compression type, if any,
389 pub compression: Option<Compression>,
391 /// 1-based variable index of the weight variable, or `None` if the file is
393 pub weight_index: Option<u32>,
395 /// Claimed number of cases, if known.
396 pub n_cases: Option<u32>,
398 /// Compression bias, usually 100.0.
401 /// `dd mmm yy` in the file's encoding.
402 pub creation_date: S,
404 /// `HH:MM:SS` in the file's encoding.
405 pub creation_time: S,
407 /// File label, in the file's encoding. Padded on the right with spaces.
410 /// Endianness of the data in the file header.
414 impl<S> HeaderRecord<S>
418 fn debug_field<T>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult
422 writeln!(f, "{name:>17}: {:?}", value)
426 impl<S> Debug for HeaderRecord<S>
430 fn fmt(&self, f: &mut Formatter) -> FmtResult {
431 writeln!(f, "File header record:")?;
432 self.debug_field(f, "Magic", self.magic)?;
433 self.debug_field(f, "Product name", &self.eye_catcher)?;
434 self.debug_field(f, "Layout code", self.layout_code)?;
435 self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
436 self.debug_field(f, "Compression", self.compression)?;
437 self.debug_field(f, "Weight index", self.weight_index)?;
438 self.debug_field(f, "Number of cases", self.n_cases)?;
439 self.debug_field(f, "Compression bias", self.bias)?;
440 self.debug_field(f, "Creation date", &self.creation_date)?;
441 self.debug_field(f, "Creation time", &self.creation_time)?;
442 self.debug_field(f, "File label", &self.file_label)?;
443 self.debug_field(f, "Endianness", self.endian)
447 impl HeaderRecord<RawString> {
448 fn read<R: Read + Seek>(r: &mut R) -> Result<Self, Error> {
449 let start = r.stream_position()?;
451 let magic: [u8; 4] = read_bytes(r)?;
452 let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
454 let eye_catcher = RawString(read_vec(r, 60)?);
455 let layout_code: [u8; 4] = read_bytes(r)?;
456 let endian = Endian::identify_u32(2, layout_code)
457 .or_else(|| Endian::identify_u32(2, layout_code))
458 .ok_or_else(|| Error::NotASystemFile)?;
459 let layout_code = endian.parse(layout_code);
461 let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
462 let nominal_case_size =
463 (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
465 let compression_code: u32 = endian.parse(read_bytes(r)?);
466 let compression = match (magic, compression_code) {
467 (Magic::Zsav, 2) => Some(Compression::ZLib),
468 (Magic::Zsav, code) => return Err(Error::InvalidZsavCompression(code)),
470 (_, 1) => Some(Compression::Simple),
471 (_, code) => return Err(Error::InvalidSavCompression(code)),
474 let weight_index: u32 = endian.parse(read_bytes(r)?);
475 let weight_index = (weight_index > 0).then_some(weight_index);
477 let n_cases: u32 = endian.parse(read_bytes(r)?);
478 let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
480 let bias: f64 = endian.parse(read_bytes(r)?);
482 let creation_date = RawString(read_vec(r, 9)?);
483 let creation_time = RawString(read_vec(r, 8)?);
484 let file_label = RawString(read_vec(r, 64)?);
485 let _: [u8; 3] = read_bytes(r)?;
488 offsets: start..r.stream_position()?,
504 pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
505 let eye_catcher = decoder.decode(&self.eye_catcher).to_string();
506 let file_label = decoder.decode(&self.file_label).to_string();
507 let creation_date = decoder.decode(&self.creation_date).to_string();
508 let creation_time = decoder.decode(&self.creation_time).to_string();
509 DecodedRecord::Header(HeaderRecord {
511 weight_index: self.weight_index,
512 n_cases: self.n_cases,
514 offsets: self.offsets.clone(),
516 layout_code: self.layout_code,
517 nominal_case_size: self.nominal_case_size,
518 compression: self.compression,
528 pub encoding: &'static Encoding,
529 pub warn: Box<dyn Fn(Warning)>,
533 pub fn new<F>(encoding: &'static Encoding, warn: F) -> Self
535 F: Fn(Warning) + 'static,
539 warn: Box::new(warn),
542 fn warn(&self, warning: Warning) {
545 fn decode_slice<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
546 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
548 self.warn(Warning::MalformedString {
549 encoding: self.encoding.name().into(),
550 text: output.clone().into(),
556 fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> {
557 self.decode_slice(input.0.as_slice())
560 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
561 /// re-encoding the result back into `self.encoding` will have exactly the
562 /// same length in bytes.
564 /// XXX warn about errors?
565 pub fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
566 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
567 // This is the common case. Usually there will be no errors.
570 // Unusual case. Don't bother to optimize it much.
571 let mut decoder = self.encoding.new_decoder_without_bom_handling();
572 let mut output = String::with_capacity(
574 .max_utf8_buffer_length_without_replacement(input.len())
577 let mut rest = input;
578 while !rest.is_empty() {
579 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
580 (DecoderResult::InputEmpty, _) => break,
581 (DecoderResult::OutputFull, _) => unreachable!(),
582 (DecoderResult::Malformed(a, b), consumed) => {
583 let skipped = a as usize + b as usize;
584 output.extend(repeat('?').take(skipped));
585 rest = &rest[consumed..];
589 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
594 pub fn decode_identifier(&self, input: &RawString) -> Result<Identifier, IdError> {
595 self.new_identifier(&self.decode(input))
598 pub fn new_identifier(&self, name: &str) -> Result<Identifier, IdError> {
599 Identifier::new(name, self.encoding)
603 impl<S> Header for HeaderRecord<S>
607 fn offsets(&self) -> Range<u64> {
612 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
614 /// Regular system file.
617 /// System file with Zlib-compressed data.
620 /// EBCDIC-encoded system file.
625 /// Magic number for a regular system file.
626 pub const SAV: [u8; 4] = *b"$FL2";
628 /// Magic number for a system file that contains zlib-compressed data.
629 pub const ZSAV: [u8; 4] = *b"$FL3";
631 /// Magic number for an EBCDIC-encoded system file. This is `$FL2` encoded
633 pub const EBCDIC: [u8; 4] = [0x5b, 0xc6, 0xd3, 0xf2];
636 impl Debug for Magic {
637 fn fmt(&self, f: &mut Formatter) -> FmtResult {
638 let s = match *self {
639 Magic::Sav => "$FL2",
640 Magic::Zsav => "$FL3",
641 Magic::Ebcdic => "($FL2 in EBCDIC)",
647 impl TryFrom<[u8; 4]> for Magic {
650 fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
652 Magic::SAV => Ok(Magic::Sav),
653 Magic::ZSAV => Ok(Magic::Zsav),
654 Magic::EBCDIC => Ok(Magic::Ebcdic),
655 _ => Err(Error::BadMagic(value)),
660 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
667 pub fn from_width(width: VarWidth) -> VarType {
669 VarWidth::Numeric => Self::Numeric,
670 VarWidth::String(_) => Self::String,
674 pub fn opposite(self) -> VarType {
676 Self::Numeric => Self::String,
677 Self::String => Self::Numeric,
682 impl Display for VarType {
683 fn fmt(&self, f: &mut Formatter) -> FmtResult {
685 VarType::Numeric => write!(f, "numeric"),
686 VarType::String => write!(f, "string"),
691 #[derive(Copy, Clone)]
700 type RawValue = Value<RawStr<8>>;
702 impl<S> Debug for Value<S>
706 fn fmt(&self, f: &mut Formatter) -> FmtResult {
708 Value::Number(Some(number)) => write!(f, "{number:?}"),
709 Value::Number(None) => write!(f, "SYSMIS"),
710 Value::String(s) => write!(f, "{:?}", s),
716 fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Self, IoError> {
718 &UntypedValue(read_bytes(r)?),
724 pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Self {
726 VarType::String => Value::String(RawStr(raw.0)),
727 VarType::Numeric => {
728 let number: f64 = endian.parse(raw.0);
729 Value::Number((number != -f64::MAX).then_some(number))
734 fn read_case<R: Read + Seek>(
736 var_types: &[VarType],
738 ) -> Result<Option<Vec<Self>>, Error> {
739 let case_start = reader.stream_position()?;
740 let mut values = Vec::with_capacity(var_types.len());
741 for (i, &var_type) in var_types.iter().enumerate() {
742 let Some(raw) = try_read_bytes(reader)? else {
746 let offset = reader.stream_position()?;
747 return Err(Error::EofInCase {
749 case_ofs: offset - case_start,
750 case_len: var_types.len() * 8,
754 values.push(Value::from_raw(&UntypedValue(raw), var_type, endian));
759 fn read_compressed_case<R: Read + Seek>(
761 var_types: &[VarType],
762 codes: &mut VecDeque<u8>,
765 ) -> Result<Option<Vec<Self>>, Error> {
766 let case_start = reader.stream_position()?;
767 let mut values = Vec::with_capacity(var_types.len());
768 for (i, &var_type) in var_types.iter().enumerate() {
770 let Some(code) = codes.pop_front() else {
771 let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
775 let offset = reader.stream_position()?;
776 return Err(Error::EofInCompressedCase {
778 case_ofs: offset - case_start,
782 codes.extend(new_codes.into_iter());
787 1..=251 => match var_type {
788 VarType::Numeric => break Self::Number(Some(code as f64 - bias)),
790 break Self::String(RawStr(endian.to_bytes(code as f64 - bias)))
797 let offset = reader.stream_position()?;
798 return Err(Error::PartialCompressedCase {
800 case_ofs: offset - case_start,
805 break Self::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian)
807 254 => match var_type {
808 VarType::String => break Self::String(RawStr(*b" ")), // XXX EBCDIC
809 VarType::Numeric => {
810 return Err(Error::CompressedStringExpected {
812 case_ofs: reader.stream_position()? - case_start,
816 255 => match var_type {
817 VarType::Numeric => break Self::Number(None),
819 return Err(Error::CompressedNumberExpected {
821 case_ofs: reader.stream_position()? - case_start,
832 fn decode(self, decoder: &Decoder) -> Value<String> {
834 Self::Number(x) => Value::Number(x),
835 Self::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
840 struct ZlibDecodeMultiple<R>
844 reader: Option<ZlibDecoder<R>>,
847 impl<R> ZlibDecodeMultiple<R>
851 fn new(reader: R) -> ZlibDecodeMultiple<R> {
853 reader: Some(ZlibDecoder::new(reader)),
858 impl<R> Read for ZlibDecodeMultiple<R>
862 fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
864 match self.reader.as_mut().unwrap().read(buf)? {
866 let inner = self.reader.take().unwrap().into_inner();
867 self.reader = Some(ZlibDecoder::new(inner));
875 impl<R> Seek for ZlibDecodeMultiple<R>
879 fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
880 self.reader.as_mut().unwrap().get_mut().seek(pos)
889 ztrailer_offset: u64,
898 R: Read + Seek + 'static,
901 warn: Box<dyn Fn(Warning)>,
903 header: HeaderRecord<RawString>,
904 var_types: Vec<VarType>,
911 R: Read + Seek + 'static,
913 pub fn new<F>(mut reader: R, warn: F) -> Result<Self, Error>
915 F: Fn(Warning) + 'static,
917 let header = HeaderRecord::read(&mut reader)?;
919 reader: Some(reader),
920 warn: Box::new(warn),
922 var_types: Vec::new(),
923 state: ReaderState::Start,
926 fn cases(&mut self) -> Cases {
927 self.state = ReaderState::End;
929 self.reader.take().unwrap(),
930 take(&mut self.var_types),
934 fn _next(&mut self) -> Option<<Self as Iterator>::Item> {
936 ReaderState::Start => {
937 self.state = ReaderState::Headers;
938 Some(Ok(Record::Header(self.header.clone())))
940 ReaderState::Headers => {
943 self.reader.as_mut().unwrap(),
945 self.var_types.as_slice(),
948 Ok(Some(record)) => break record,
950 Err(error) => return Some(Err(error)),
954 Record::Variable(VariableRecord { width, .. }) => {
955 self.var_types.push(if width == 0 {
961 Record::EndOfHeaders(_) => {
962 self.state = if let Some(Compression::ZLib) = self.header.compression {
963 ReaderState::ZlibHeader
972 ReaderState::ZlibHeader => {
973 let zheader = match ZHeader::read(self.reader.as_mut().unwrap(), self.header.endian)
975 Ok(zheader) => zheader,
976 Err(error) => return Some(Err(error)),
978 self.state = ReaderState::ZlibTrailer {
979 ztrailer_offset: zheader.ztrailer_offset,
980 ztrailer_len: zheader.ztrailer_len,
982 Some(Ok(Record::ZHeader(zheader)))
984 ReaderState::ZlibTrailer {
988 match ZTrailer::read(
989 self.reader.as_mut().unwrap(),
994 Ok(None) => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
995 Ok(Some(ztrailer)) => Some(Ok(Record::ZTrailer(ztrailer))),
996 Err(error) => Some(Err(error)),
999 ReaderState::Cases => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
1000 ReaderState::End => None,
1005 impl<R> Iterator for Reader<R>
1007 R: Read + Seek + 'static,
1009 type Item = Result<Record, Error>;
1011 fn next(&mut self) -> Option<Self::Item> {
1012 let retval = self._next();
1013 if matches!(retval, Some(Err(_))) {
1014 self.state = ReaderState::End;
1020 trait ReadSeek: Read + Seek {}
1021 impl<T> ReadSeek for T where T: Read + Seek {}
1024 reader: Box<dyn ReadSeek>,
1025 var_types: Vec<VarType>,
1026 compression: Option<Compression>,
1029 codes: VecDeque<u8>,
1033 impl Debug for Cases {
1034 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1040 fn new<R>(reader: R, var_types: Vec<VarType>, header: &HeaderRecord<RawString>) -> Self
1042 R: Read + Seek + 'static,
1045 reader: if header.compression == Some(Compression::ZLib) {
1046 Box::new(ZlibDecodeMultiple::new(reader))
1051 compression: header.compression,
1053 endian: header.endian,
1054 codes: VecDeque::with_capacity(8),
1060 impl Iterator for Cases {
1061 type Item = Result<Vec<RawValue>, Error>;
1063 fn next(&mut self) -> Option<Self::Item> {
1068 let retval = if self.compression.is_some() {
1069 Value::read_compressed_case(
1078 Value::read_case(&mut self.reader, &self.var_types, self.endian).transpose()
1080 self.eof = matches!(retval, None | Some(Err(_)));
1085 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
1086 pub struct Spec(pub u32);
1088 impl Debug for Spec {
1089 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1090 let type_ = format_name(self.0 >> 16);
1091 let w = (self.0 >> 8) & 0xff;
1092 let d = self.0 & 0xff;
1093 write!(f, "{:06x} ({type_}{w}.{d})", self.0)
1097 fn format_name(type_: u32) -> Cow<'static, str> {
1136 _ => return format!("<unknown format {type_}>").into(),
1142 pub struct MissingValues<S = String>
1146 /// Individual missing values, up to 3 of them.
1147 pub values: Vec<Value<S>>,
1149 /// Optional range of missing values.
1150 pub range: Option<(Value<S>, Value<S>)>,
1153 impl<S> Debug for MissingValues<S>
1157 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1158 for (i, value) in self.values.iter().enumerate() {
1162 write!(f, "{value:?}")?;
1165 if let Some((low, high)) = &self.range {
1166 if !self.values.is_empty() {
1169 write!(f, "{low:?} THRU {high:?}")?;
1172 if self.is_empty() {
1180 impl<S> MissingValues<S>
1184 fn is_empty(&self) -> bool {
1185 self.values.is_empty() && self.range.is_none()
1189 impl<S> Default for MissingValues<S>
1193 fn default() -> Self {
1201 impl MissingValues<RawStr<8>> {
1202 fn read<R: Read + Seek>(
1208 ) -> Result<Self, Error> {
1209 let (n_values, has_range) = match (width, code) {
1210 (_, 0..=3) => (code, false),
1211 (0, -2) => (0, true),
1212 (0, -3) => (1, true),
1213 (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }),
1214 (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
1217 let var_type = if width == 0 {
1223 let mut values = Vec::new();
1224 for _ in 0..n_values {
1225 values.push(RawValue::read(r, var_type, endian)?);
1227 let range = if has_range {
1228 let low = RawValue::read(r, var_type, endian)?;
1229 let high = RawValue::read(r, var_type, endian)?;
1234 Ok(Self { values, range })
1236 fn decode(&self, decoder: &Decoder) -> MissingValues<String> {
1241 .map(|value| value.decode(decoder))
1246 .map(|(low, high)| (low.decode(decoder), high.decode(decoder))),
1252 pub struct VariableRecord<S, V>
1257 /// Range of offsets in file.
1258 pub offsets: Range<u64>,
1260 /// Variable width, in the range -1..=255.
1263 /// Variable name, padded on the right with spaces.
1267 pub print_format: Spec,
1270 pub write_format: Spec,
1273 pub missing_values: MissingValues<V>,
1275 /// Optional variable label.
1276 pub label: Option<S>,
1279 impl<S, V> Debug for VariableRecord<S, V>
1284 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1289 match self.width.cmp(&0) {
1290 Ordering::Greater => "string",
1291 Ordering::Equal => "numeric",
1292 Ordering::Less => "long string continuation record",
1295 writeln!(f, "Print format: {:?}", self.print_format)?;
1296 writeln!(f, "Write format: {:?}", self.write_format)?;
1297 writeln!(f, "Name: {:?}", &self.name)?;
1298 writeln!(f, "Variable label: {:?}", self.label)?;
1299 writeln!(f, "Missing values: {:?}", self.missing_values)
1303 impl VariableRecord<RawString, RawStr<8>> {
1304 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
1305 let start_offset = r.stream_position()?;
1306 let width: i32 = endian.parse(read_bytes(r)?);
1307 if !(-1..=255).contains(&width) {
1308 return Err(Error::BadVariableWidth { start_offset, width });
1310 let code_offset = r.stream_position()?;
1311 let has_variable_label: u32 = endian.parse(read_bytes(r)?);
1312 let missing_value_code: i32 = endian.parse(read_bytes(r)?);
1313 let print_format = Spec(endian.parse(read_bytes(r)?));
1314 let write_format = Spec(endian.parse(read_bytes(r)?));
1315 let name = RawString(read_vec(r, 8)?);
1317 let label = match has_variable_label {
1320 let len: u32 = endian.parse(read_bytes(r)?);
1321 let read_len = len.min(65535) as usize;
1322 let label = RawString(read_vec(r, read_len)?);
1324 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
1325 let _ = read_vec(r, padding_bytes as usize)?;
1330 return Err(Error::BadVariableLabelCode {
1333 code: has_variable_label,
1338 let missing_values =
1339 MissingValues::read(r, start_offset, width, missing_value_code, endian)?;
1341 let end_offset = r.stream_position()?;
1343 Ok(Record::Variable(VariableRecord {
1344 offsets: start_offset..end_offset,
1354 pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
1355 DecodedRecord::Variable(VariableRecord {
1356 offsets: self.offsets.clone(),
1358 name: decoder.decode(&self.name).to_string(),
1359 print_format: self.print_format,
1360 write_format: self.write_format,
1361 missing_values: self.missing_values.decode(decoder),
1365 .map(|label| decoder.decode(label).to_string()),
1370 #[derive(Copy, Clone)]
1371 pub struct UntypedValue(pub [u8; 8]);
1373 impl Debug for UntypedValue {
1374 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1375 let little: f64 = Endian::Little.parse(self.0);
1376 let little = format!("{:?}", little);
1377 let big: f64 = Endian::Big.parse(self.0);
1378 let big = format!("{:?}", big);
1379 let number = if little.len() <= big.len() {
1384 write!(f, "{number}")?;
1386 let string = default_decode(&self.0);
1388 .split(|c: char| c == '\0' || c.is_control())
1391 write!(f, "{string:?}")?;
1397 pub struct RawString(pub Vec<u8>);
1399 impl From<Vec<u8>> for RawString {
1400 fn from(source: Vec<u8>) -> Self {
1405 impl From<&[u8]> for RawString {
1406 fn from(source: &[u8]) -> Self {
1411 impl Debug for RawString {
1412 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1413 write!(f, "{:?}", default_decode(self.0.as_slice()))
1417 #[derive(Copy, Clone)]
1418 pub struct RawStr<const N: usize>(pub [u8; N]);
1420 impl<const N: usize> From<[u8; N]> for RawStr<N> {
1421 fn from(source: [u8; N]) -> Self {
1426 impl<const N: usize> Debug for RawStr<N> {
1427 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1428 write!(f, "{:?}", default_decode(&self.0))
1432 #[derive(Clone, Debug)]
1433 pub struct ValueLabel<V, S>
1438 pub value: Value<V>,
1443 pub struct ValueLabelRecord<V, S>
1448 /// Range of offsets in file.
1449 pub offsets: Range<u64>,
1452 pub labels: Vec<ValueLabel<V, S>>,
1454 /// The 1-based indexes of the variable indexes.
1455 pub dict_indexes: Vec<u32>,
1457 /// The types of the variables.
1458 pub var_type: VarType,
1461 impl<V, S> Debug for ValueLabelRecord<V, S>
1466 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1467 writeln!(f, "labels: ")?;
1468 for label in self.labels.iter() {
1469 writeln!(f, "{label:?}")?;
1471 write!(f, "apply to {} variables", self.var_type)?;
1472 for dict_index in self.dict_indexes.iter() {
1473 write!(f, " #{dict_index}")?;
1479 impl<V, S> Header for ValueLabelRecord<V, S>
1484 fn offsets(&self) -> Range<u64> {
1485 self.offsets.clone()
1489 impl<V, S> ValueLabelRecord<V, S>
1494 /// Maximum number of value labels in a record.
1495 pub const MAX_LABELS: u32 = u32::MAX / 8;
1497 /// Maximum number of variable indexes in a record.
1498 pub const MAX_INDEXES: u32 = u32::MAX / 8;
1501 impl ValueLabelRecord<RawStr<8>, RawString> {
1502 fn read<R: Read + Seek>(
1505 var_types: &[VarType],
1506 warn: &dyn Fn(Warning),
1507 ) -> Result<Option<Record>, Error> {
1508 let label_offset = r.stream_position()?;
1509 let n: u32 = endian.parse(read_bytes(r)?);
1510 if n > Self::MAX_LABELS {
1511 return Err(Error::BadNumberOfValueLabels {
1512 offset: label_offset,
1514 max: Self::MAX_LABELS,
1518 let mut labels = Vec::new();
1520 let value = UntypedValue(read_bytes(r)?);
1521 let label_len: u8 = endian.parse(read_bytes(r)?);
1522 let label_len = label_len as usize;
1523 let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
1525 let mut label = read_vec(r, padded_len - 1)?;
1526 label.truncate(label_len);
1527 labels.push((value, RawString(label)));
1530 let index_offset = r.stream_position()?;
1531 let rec_type: u32 = endian.parse(read_bytes(r)?);
1533 return Err(Error::ExpectedVarIndexRecord {
1534 offset: index_offset,
1539 let n: u32 = endian.parse(read_bytes(r)?);
1540 if n > Self::MAX_INDEXES {
1541 return Err(Error::TooManyVarIndexes {
1542 offset: index_offset,
1544 max: Self::MAX_INDEXES,
1548 let index_offset = r.stream_position()?;
1549 let mut dict_indexes = Vec::with_capacity(n as usize);
1550 let mut invalid_indexes = Vec::new();
1552 let index: u32 = endian.parse(read_bytes(r)?);
1553 if index == 0 || index as usize > var_types.len() {
1554 dict_indexes.push(index);
1556 invalid_indexes.push(index);
1559 if !invalid_indexes.is_empty() {
1560 warn(Warning::InvalidVarIndexes {
1561 offset: index_offset,
1562 max: var_types.len(),
1563 invalid: invalid_indexes,
1567 let Some(&first_index) = dict_indexes.first() else {
1568 warn(Warning::NoVarIndexes {
1569 offset: index_offset,
1573 let var_type = var_types[first_index as usize - 1];
1574 let mut wrong_type_indexes = Vec::new();
1575 dict_indexes.retain(|&index| {
1576 if var_types[index as usize - 1] != var_type {
1577 wrong_type_indexes.push(index);
1583 if !wrong_type_indexes.is_empty() {
1584 warn(Warning::MixedVarTypes {
1585 offset: index_offset,
1587 wrong_types: wrong_type_indexes,
1593 .map(|(value, label)| ValueLabel {
1594 value: Value::from_raw(&value, var_type, endian),
1599 let end_offset = r.stream_position()?;
1600 Ok(Some(Record::ValueLabel(ValueLabelRecord {
1601 offsets: label_offset..end_offset,
1608 fn decode(self, decoder: &Decoder) -> ValueLabelRecord<RawStr<8>, String> {
1612 .map(|ValueLabel { value, label }| ValueLabel {
1614 label: decoder.decode(label).to_string(),
1618 offsets: self.offsets.clone(),
1620 dict_indexes: self.dict_indexes.clone(),
1621 var_type: self.var_type,
1626 #[derive(Clone, Debug)]
1627 pub struct DocumentRecord<S>
1631 pub offsets: Range<u64>,
1633 /// The document, as an array of lines. Raw lines are exactly 80 bytes long
1634 /// and are right-padded with spaces without any new-line termination.
1638 pub type RawDocumentLine = RawStr<DOC_LINE_LEN>;
1640 /// Length of a line in a document. Document lines are fixed-length and
1641 /// padded on the right with spaces.
1642 pub const DOC_LINE_LEN: usize = 80;
1644 impl DocumentRecord<RawDocumentLine> {
1645 /// Maximum number of lines we will accept in a document. This is simply
1646 /// the maximum number that will fit in a 32-bit space.
1647 pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
1649 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
1650 let start_offset = r.stream_position()?;
1651 let n: u32 = endian.parse(read_bytes(r)?);
1653 if n > Self::MAX_LINES {
1654 Err(Error::BadDocumentLength {
1655 offset: start_offset,
1657 max: Self::MAX_LINES,
1660 let mut lines = Vec::with_capacity(n);
1662 lines.push(RawStr(read_bytes(r)?));
1664 let end_offset = r.stream_position()?;
1665 Ok(Record::Document(DocumentRecord {
1666 offsets: start_offset..end_offset,
1672 pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
1673 DecodedRecord::Document(DocumentRecord {
1674 offsets: self.offsets.clone(),
1678 .map(|s| decoder.decode_slice(&s.0).to_string())
1684 impl<S> Header for DocumentRecord<S>
1688 fn offsets(&self) -> Range<u64> {
1689 self.offsets.clone()
1693 trait ExtensionRecord {
1695 const SIZE: Option<u32>;
1696 const COUNT: Option<u32>;
1697 const NAME: &'static str;
1698 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning>;
1701 #[derive(Clone, Debug)]
1702 pub struct IntegerInfoRecord {
1703 pub offsets: Range<u64>,
1704 pub version: (i32, i32, i32),
1705 pub machine_code: i32,
1706 pub floating_point_rep: i32,
1707 pub compression_code: i32,
1708 pub endianness: i32,
1709 pub character_code: i32,
1712 impl ExtensionRecord for IntegerInfoRecord {
1713 const SUBTYPE: u32 = 3;
1714 const SIZE: Option<u32> = Some(4);
1715 const COUNT: Option<u32> = Some(8);
1716 const NAME: &'static str = "integer record";
1718 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
1719 ext.check_size::<Self>()?;
1721 let mut input = &ext.data[..];
1722 let data: Vec<i32> = (0..8)
1723 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1725 Ok(Record::IntegerInfo(IntegerInfoRecord {
1726 offsets: ext.offsets.clone(),
1727 version: (data[0], data[1], data[2]),
1728 machine_code: data[3],
1729 floating_point_rep: data[4],
1730 compression_code: data[5],
1731 endianness: data[6],
1732 character_code: data[7],
1737 #[derive(Clone, Debug)]
1738 pub struct FloatInfoRecord {
1744 impl ExtensionRecord for FloatInfoRecord {
1745 const SUBTYPE: u32 = 4;
1746 const SIZE: Option<u32> = Some(8);
1747 const COUNT: Option<u32> = Some(3);
1748 const NAME: &'static str = "floating point record";
1750 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
1751 ext.check_size::<Self>()?;
1753 let mut input = &ext.data[..];
1754 let data: Vec<f64> = (0..3)
1755 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1757 Ok(Record::FloatInfo(FloatInfoRecord {
1765 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1766 pub enum CategoryLabels {
1771 #[derive(Clone, Debug)]
1772 pub enum MultipleResponseType {
1775 labels: CategoryLabels,
1780 impl MultipleResponseType {
1781 fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Warning> {
1782 let (mr_type, input) = match input.split_first() {
1783 Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input),
1784 Some((b'D', input)) => {
1785 let (value, input) = parse_counted_string(input)?;
1787 MultipleResponseType::MultipleDichotomy {
1789 labels: CategoryLabels::VarLabels,
1794 Some((b'E', input)) => {
1795 let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
1796 (CategoryLabels::CountedValues, rest)
1797 } else if let Some(rest) = input.strip_prefix(b" 11 ") {
1798 (CategoryLabels::VarLabels, rest)
1800 return Err(Warning::TBD);
1802 let (value, input) = parse_counted_string(input)?;
1804 MultipleResponseType::MultipleDichotomy { value, labels },
1808 _ => return Err(Warning::TBD),
1810 Ok((mr_type, input))
1814 #[derive(Clone, Debug)]
1815 pub struct MultipleResponseSet<I, S>
1822 pub mr_type: MultipleResponseType,
1823 pub short_names: Vec<I>,
1826 impl MultipleResponseSet<RawString, RawString> {
1827 fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> {
1828 let Some(equals) = input.iter().position(|&b| b == b'=') else {
1829 return Err(Warning::TBD);
1831 let (name, input) = input.split_at(equals);
1832 let (mr_type, input) = MultipleResponseType::parse(input)?;
1833 let Some(input) = input.strip_prefix(b" ") else {
1834 return Err(Warning::TBD);
1836 let (label, mut input) = parse_counted_string(input)?;
1837 let mut vars = Vec::new();
1838 while input.first() != Some(&b'\n') {
1839 match input.split_first() {
1840 Some((b' ', rest)) => {
1841 let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else {
1842 return Err(Warning::TBD);
1844 let (var, rest) = rest.split_at(length);
1845 if !var.is_empty() {
1846 vars.push(var.into());
1850 _ => return Err(Warning::TBD),
1853 while input.first() == Some(&b'\n') {
1854 input = &input[1..];
1857 MultipleResponseSet {
1870 ) -> Result<MultipleResponseSet<Identifier, String>, Warning> {
1871 let mut short_names = Vec::with_capacity(self.short_names.len());
1872 for short_name in self.short_names.iter() {
1873 if let Some(short_name) = decoder
1874 .decode_identifier(short_name)
1875 .map_err(Warning::InvalidMrSetName)
1876 .issue_warning(&decoder.warn)
1878 short_names.push(short_name);
1881 Ok(MultipleResponseSet {
1883 .decode_identifier(&self.name)
1884 .map_err(Warning::InvalidMrSetVariableName)?,
1885 label: decoder.decode(&self.label).to_string(),
1886 mr_type: self.mr_type.clone(),
1892 #[derive(Clone, Debug)]
1893 pub struct MultipleResponseRecord<I, S>(pub Vec<MultipleResponseSet<I, S>>)
1898 impl ExtensionRecord for MultipleResponseRecord<RawString, RawString> {
1899 const SUBTYPE: u32 = 7;
1900 const SIZE: Option<u32> = Some(1);
1901 const COUNT: Option<u32> = None;
1902 const NAME: &'static str = "multiple response set record";
1904 fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
1905 ext.check_size::<Self>()?;
1907 let mut input = &ext.data[..];
1908 let mut sets = Vec::new();
1909 while !input.is_empty() {
1910 let (set, rest) = MultipleResponseSet::parse(input)?;
1914 Ok(Record::MultipleResponse(MultipleResponseRecord(sets)))
1918 impl MultipleResponseRecord<RawString, RawString> {
1919 fn decode(self, decoder: &Decoder) -> DecodedRecord {
1920 let mut sets = Vec::new();
1921 for set in self.0.iter() {
1922 if let Some(set) = set.decode(decoder).issue_warning(&decoder.warn) {
1926 DecodedRecord::MultipleResponse(MultipleResponseRecord(sets))
1930 fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> {
1931 let Some(space) = input.iter().position(|&b| b == b' ') else {
1932 return Err(Warning::TBD);
1934 let Ok(length) = from_utf8(&input[..space]) else {
1935 return Err(Warning::TBD);
1937 let Ok(length): Result<usize, _> = length.parse() else {
1938 return Err(Warning::TBD);
1941 let input = &input[space + 1..];
1942 if input.len() < length {
1943 return Err(Warning::TBD);
1946 let (string, rest) = input.split_at(length);
1947 Ok((string.into(), rest))
1950 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1958 pub fn default_for_type(var_type: VarType) -> Option<Measure> {
1960 VarType::Numeric => None,
1961 VarType::String => Some(Self::Nominal),
1965 fn try_decode(source: u32) -> Result<Option<Measure>, Warning> {
1968 1 => Ok(Some(Measure::Nominal)),
1969 2 => Ok(Some(Measure::Ordinal)),
1970 3 => Ok(Some(Measure::Scale)),
1971 _ => Err(Warning::InvalidMeasurement(source)),
1976 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1977 pub enum Alignment {
1984 fn try_decode(source: u32) -> Result<Option<Alignment>, Warning> {
1987 1 => Ok(Some(Alignment::Left)),
1988 2 => Ok(Some(Alignment::Right)),
1989 3 => Ok(Some(Alignment::Center)),
1990 _ => Err(Warning::InvalidAlignment(source)),
1994 pub fn default_for_type(var_type: VarType) -> Self {
1996 VarType::Numeric => Self::Right,
1997 VarType::String => Self::Left,
2002 #[derive(Clone, Debug)]
2003 pub struct VarDisplay {
2004 pub measure: Option<Measure>,
2005 pub width: Option<u32>,
2006 pub alignment: Option<Alignment>,
2009 #[derive(Clone, Debug)]
2010 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
2012 impl VarDisplayRecord {
2013 const SUBTYPE: u32 = 11;
2019 warn: &dyn Fn(Warning),
2020 ) -> Result<Record, Warning> {
2022 return Err(Warning::BadRecordSize {
2023 offset: ext.offsets.start,
2024 record: String::from("variable display record"),
2030 let has_width = if ext.count as usize == 3 * n_vars {
2032 } else if ext.count as usize == 2 * n_vars {
2035 return Err(Warning::TBD);
2038 let mut var_displays = Vec::new();
2039 let mut input = &ext.data[..];
2040 for _ in 0..n_vars {
2041 let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
2042 .issue_warning(&warn)
2044 let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap()));
2045 let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
2046 .issue_warning(&warn)
2048 var_displays.push(VarDisplay {
2054 Ok(Record::VarDisplay(VarDisplayRecord(var_displays)))
2058 #[derive(Clone, Debug)]
2059 pub struct LongStringMissingValues<N, V>
2068 pub missing_values: MissingValues<V>,
2071 impl LongStringMissingValues<RawString, RawStr<8>> {
2075 ) -> Result<LongStringMissingValues<Identifier, String>, IdError> {
2076 Ok(LongStringMissingValues {
2077 var_name: decoder.decode_identifier(&self.var_name)?,
2078 missing_values: self.missing_values.decode(decoder),
2083 #[derive(Clone, Debug)]
2084 pub struct LongStringMissingValueRecord<N, V>(pub Vec<LongStringMissingValues<N, V>>)
2089 impl ExtensionRecord for LongStringMissingValueRecord<RawString, RawStr<8>> {
2090 const SUBTYPE: u32 = 22;
2091 const SIZE: Option<u32> = Some(1);
2092 const COUNT: Option<u32> = None;
2093 const NAME: &'static str = "long string missing values record";
2095 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
2096 ext.check_size::<Self>()?;
2098 let mut input = &ext.data[..];
2099 let mut missing_value_set = Vec::new();
2100 while !input.is_empty() {
2101 let var_name = read_string(&mut input, endian)?;
2102 let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
2103 let value_len: u32 = endian.parse(read_bytes(&mut input)?);
2105 let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
2106 return Err(Warning::BadLongMissingValueLength {
2107 record_offset: ext.offsets.start,
2112 let mut values = Vec::new();
2113 for i in 0..n_missing_values {
2114 let value: [u8; 8] = read_bytes(&mut input)?;
2115 let numeric_value: u64 = endian.parse(value);
2116 let value = if i > 0 && numeric_value == 8 {
2117 // Tolerate files written by old, buggy versions of PSPP
2118 // where we believed that the value_length was repeated
2119 // before each missing value.
2120 read_bytes(&mut input)?
2124 values.push(Value::String(RawStr(value)));
2126 let missing_values = MissingValues {
2130 missing_value_set.push(LongStringMissingValues {
2135 Ok(Record::LongStringMissingValues(
2136 LongStringMissingValueRecord(missing_value_set),
2141 impl LongStringMissingValueRecord<RawString, RawStr<8>> {
2142 pub fn decode(self, decoder: &Decoder) -> LongStringMissingValueRecord<Identifier, String> {
2143 let mut mvs = Vec::with_capacity(self.0.len());
2144 for mv in self.0.iter() {
2145 if let Some(mv) = mv
2147 .map_err(Warning::InvalidLongStringMissingValueVariableName)
2148 .issue_warning(&decoder.warn)
2153 LongStringMissingValueRecord(mvs)
2157 #[derive(Clone, Debug)]
2158 pub struct EncodingRecord(pub String);
2160 impl ExtensionRecord for EncodingRecord {
2161 const SUBTYPE: u32 = 20;
2162 const SIZE: Option<u32> = Some(1);
2163 const COUNT: Option<u32> = None;
2164 const NAME: &'static str = "encoding record";
2166 fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
2167 ext.check_size::<Self>()?;
2169 Ok(Record::Encoding(EncodingRecord(
2170 String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName {
2171 offset: ext.offsets.start,
2177 #[derive(Clone, Debug)]
2178 pub struct NumberOfCasesRecord {
2179 /// Always observed as 1.
2182 /// Number of cases.
2186 impl ExtensionRecord for NumberOfCasesRecord {
2187 const SUBTYPE: u32 = 16;
2188 const SIZE: Option<u32> = Some(8);
2189 const COUNT: Option<u32> = Some(2);
2190 const NAME: &'static str = "extended number of cases record";
2192 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
2193 ext.check_size::<Self>()?;
2195 let mut input = &ext.data[..];
2196 let one = endian.parse(read_bytes(&mut input)?);
2197 let n_cases = endian.parse(read_bytes(&mut input)?);
2199 Ok(Record::NumberOfCases(NumberOfCasesRecord { one, n_cases }))
2203 #[derive(Clone, Debug)]
2204 pub struct TextRecord {
2205 pub offsets: Range<u64>,
2208 pub rec_type: TextRecordType,
2210 /// The text content of the record.
2211 pub text: RawString,
2214 #[derive(Clone, Copy, Debug)]
2215 pub enum TextRecordType {
2225 fn new(extension: Extension, rec_type: TextRecordType) -> Self {
2227 offsets: extension.offsets,
2229 text: extension.data.into(),
2232 pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
2233 match self.rec_type {
2234 TextRecordType::VariableSets => {
2235 DecodedRecord::VariableSets(VariableSetRecord::decode(&self, decoder))
2237 TextRecordType::ProductInfo => {
2238 DecodedRecord::ProductInfo(ProductInfoRecord::decode(&self, decoder))
2240 TextRecordType::LongNames => {
2241 DecodedRecord::LongNames(LongNamesRecord::decode(&self, decoder))
2243 TextRecordType::VeryLongStrings => {
2244 DecodedRecord::VeryLongStrings(VeryLongStringsRecord::decode(&self, decoder))
2246 TextRecordType::FileAttributes => {
2247 DecodedRecord::FileAttributes(FileAttributeRecord::decode(&self, decoder))
2249 TextRecordType::VariableAttributes => {
2250 DecodedRecord::VariableAttributes(VariableAttributeRecord::decode(&self, decoder))
2256 #[derive(Clone, Debug)]
2257 pub struct VeryLongString {
2258 pub short_name: Identifier,
2262 impl VeryLongString {
2263 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Warning> {
2264 let Some((short_name, length)) = input.split_once('=') else {
2265 return Err(Warning::TBD);
2267 let short_name = decoder
2268 .new_identifier(short_name)
2269 .map_err(Warning::InvalidLongStringName)?;
2270 let length = length.parse().map_err(|_| Warning::TBD)?;
2271 Ok(VeryLongString { short_name, length })
2275 #[derive(Clone, Debug)]
2276 pub struct VeryLongStringsRecord(Vec<VeryLongString>);
2278 impl VeryLongStringsRecord {
2279 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2280 let input = decoder.decode(&source.text);
2281 let mut very_long_strings = Vec::new();
2284 .map(|s| s.trim_end_matches('\t'))
2285 .filter(|s| !s.is_empty())
2287 if let Some(vls) = VeryLongString::parse(decoder, tuple).issue_warning(&decoder.warn) {
2288 very_long_strings.push(vls)
2291 VeryLongStringsRecord(very_long_strings)
2295 #[derive(Clone, Debug)]
2296 pub struct Attribute {
2297 pub name: Identifier,
2298 pub values: Vec<String>,
2302 fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> {
2303 let Some((name, mut input)) = input.split_once('(') else {
2304 return Err(Warning::TBD);
2307 .new_identifier(name)
2308 .map_err(Warning::InvalidAttributeName)?;
2309 let mut values = Vec::new();
2311 let Some((value, rest)) = input.split_once('\n') else {
2312 return Err(Warning::TBD);
2314 if let Some(stripped) = value
2316 .and_then(|value| value.strip_suffix('\''))
2318 values.push(stripped.into());
2320 decoder.warn(Warning::TBD);
2321 values.push(value.into());
2323 if let Some(rest) = rest.strip_prefix(')') {
2324 let attribute = Attribute { name, values };
2325 return Ok((attribute, rest));
2332 #[derive(Clone, Debug, Default)]
2333 pub struct AttributeSet(pub HashMap<Identifier, Vec<String>>);
2339 sentinel: Option<char>,
2340 ) -> Result<(AttributeSet, &'a str), Warning> {
2341 let mut attributes = HashMap::new();
2343 match input.chars().next() {
2344 None => break input,
2345 c if c == sentinel => break &input[1..],
2347 let (attribute, rest) = Attribute::parse(decoder, input)?;
2348 // XXX report duplicate name
2349 attributes.insert(attribute.name, attribute.values);
2354 Ok((AttributeSet(attributes), rest))
2358 #[derive(Clone, Debug, Default)]
2359 pub struct FileAttributeRecord(pub AttributeSet);
2361 impl FileAttributeRecord {
2362 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2363 let input = decoder.decode(&source.text);
2364 match AttributeSet::parse(decoder, &input, None).issue_warning(&decoder.warn) {
2365 Some((set, rest)) => {
2366 if !rest.is_empty() {
2367 decoder.warn(Warning::TBD);
2369 FileAttributeRecord(set)
2371 None => FileAttributeRecord::default(),
2376 #[derive(Clone, Debug)]
2377 pub struct VarAttributeSet {
2378 pub long_var_name: Identifier,
2379 pub attributes: AttributeSet,
2382 impl VarAttributeSet {
2383 fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(VarAttributeSet, &'a str), Warning> {
2384 let Some((long_var_name, rest)) = input.split_once(':') else {
2385 return Err(Warning::TBD);
2387 let long_var_name = decoder
2388 .new_identifier(long_var_name)
2389 .map_err(Warning::InvalidAttributeVariableName)?;
2390 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'))?;
2391 let var_attribute = VarAttributeSet {
2395 Ok((var_attribute, rest))
2399 #[derive(Clone, Debug)]
2400 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
2402 impl VariableAttributeRecord {
2403 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2404 let decoded = decoder.decode(&source.text);
2405 let mut input = decoded.as_ref();
2406 let mut var_attribute_sets = Vec::new();
2407 while !input.is_empty() {
2408 let Some((var_attribute, rest)) =
2409 VarAttributeSet::parse(decoder, input).issue_warning(&decoder.warn)
2413 var_attribute_sets.push(var_attribute);
2416 VariableAttributeRecord(var_attribute_sets)
2420 #[derive(Clone, Debug)]
2421 pub struct LongName {
2422 pub short_name: Identifier,
2423 pub long_name: Identifier,
2427 fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
2428 let Some((short_name, long_name)) = input.split_once('=') else {
2429 return Err(Warning::TBD);
2431 let short_name = decoder
2432 .new_identifier(short_name)
2433 .map_err(Warning::InvalidShortName)?;
2434 let long_name = decoder
2435 .new_identifier(long_name)
2436 .map_err(Warning::InvalidLongName)?;
2444 #[derive(Clone, Debug)]
2445 pub struct LongNamesRecord(Vec<LongName>);
2447 impl LongNamesRecord {
2448 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2449 let input = decoder.decode(&source.text);
2450 let mut names = Vec::new();
2451 for pair in input.split('\t').filter(|s| !s.is_empty()) {
2452 if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&decoder.warn) {
2453 names.push(long_name);
2456 LongNamesRecord(names)
2460 #[derive(Clone, Debug)]
2461 pub struct ProductInfoRecord(pub String);
2463 impl ProductInfoRecord {
2464 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2465 Self(decoder.decode(&source.text).into())
2468 #[derive(Clone, Debug)]
2469 pub struct VariableSet {
2471 pub vars: Vec<Identifier>,
2475 fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
2476 let (name, input) = input.split_once('=').ok_or(Warning::TBD)?;
2477 let mut vars = Vec::new();
2478 for var in input.split_ascii_whitespace() {
2479 if let Some(identifier) = decoder
2480 .new_identifier(var)
2481 .map_err(Warning::InvalidVariableSetName)
2482 .issue_warning(&decoder.warn)
2484 vars.push(identifier);
2494 #[derive(Clone, Debug)]
2495 pub struct VariableSetRecord {
2496 pub offsets: Range<u64>,
2497 pub sets: Vec<VariableSet>,
2500 impl VariableSetRecord {
2501 fn decode(source: &TextRecord, decoder: &Decoder) -> VariableSetRecord {
2502 let mut sets = Vec::new();
2503 let input = decoder.decode(&source.text);
2504 for line in input.lines() {
2505 if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&decoder.warn) {
2510 offsets: source.offsets.clone(),
2516 trait IssueWarning<T> {
2517 fn issue_warning<F>(self, warn: &F) -> Option<T>
2521 impl<T> IssueWarning<T> for Result<T, Warning> {
2522 fn issue_warning<F>(self, warn: &F) -> Option<T>
2527 Ok(result) => Some(result),
2536 #[derive(Clone, Debug)]
2537 pub struct Extension {
2538 pub offsets: Range<u64>,
2543 /// Size of each data element.
2546 /// Number of data elements.
2549 /// `size * count` bytes of data.
2554 fn check_size<E: ExtensionRecord>(&self) -> Result<(), Warning> {
2555 if let Some(expected_size) = E::SIZE {
2556 if self.size != expected_size {
2557 return Err(Warning::BadRecordSize {
2558 offset: self.offsets.start,
2559 record: E::NAME.into(),
2565 if let Some(expected_count) = E::COUNT {
2566 if self.count != expected_count {
2567 return Err(Warning::BadRecordCount {
2568 offset: self.offsets.start,
2569 record: E::NAME.into(),
2578 fn read<R: Read + Seek>(
2582 warn: &dyn Fn(Warning),
2583 ) -> Result<Option<Record>, Error> {
2584 let subtype = endian.parse(read_bytes(r)?);
2585 let header_offset = r.stream_position()?;
2586 let size: u32 = endian.parse(read_bytes(r)?);
2587 let count = endian.parse(read_bytes(r)?);
2588 let Some(product) = size.checked_mul(count) else {
2589 return Err(Error::ExtensionRecordTooLarge {
2590 offset: header_offset,
2596 let start_offset = r.stream_position()?;
2597 let data = read_vec(r, product as usize)?;
2598 let end_offset = start_offset + product as u64;
2599 let extension = Extension {
2600 offsets: start_offset..end_offset,
2606 let result = match subtype {
2607 IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian),
2608 FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian),
2609 VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, n_vars, endian, warn),
2610 MultipleResponseRecord::SUBTYPE | 19 => {
2611 MultipleResponseRecord::parse(&extension, endian)
2613 LongStringValueLabelRecord::SUBTYPE => {
2614 LongStringValueLabelRecord::parse(&extension, endian)
2616 EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian),
2617 NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian),
2618 5 => Ok(Record::Text(TextRecord::new(
2620 TextRecordType::VariableSets,
2622 10 => Ok(Record::Text(TextRecord::new(
2624 TextRecordType::ProductInfo,
2626 13 => Ok(Record::Text(TextRecord::new(
2628 TextRecordType::LongNames,
2630 14 => Ok(Record::Text(TextRecord::new(
2632 TextRecordType::VeryLongStrings,
2634 17 => Ok(Record::Text(TextRecord::new(
2636 TextRecordType::FileAttributes,
2638 18 => Ok(Record::Text(TextRecord::new(
2640 TextRecordType::VariableAttributes,
2642 _ => Ok(Record::OtherExtension(extension)),
2645 Ok(result) => Ok(Some(result)),
2654 #[derive(Clone, Debug)]
2655 pub struct ZHeader {
2656 /// File offset to the start of the record.
2659 /// File offset to the ZLIB data header.
2660 pub zheader_offset: u64,
2662 /// File offset to the ZLIB trailer.
2663 pub ztrailer_offset: u64,
2665 /// Length of the ZLIB trailer in bytes.
2666 pub ztrailer_len: u64,
2670 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
2671 let offset = r.stream_position()?;
2672 let zheader_offset: u64 = endian.parse(read_bytes(r)?);
2673 let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
2674 let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
2685 #[derive(Clone, Debug)]
2686 pub struct ZTrailer {
2687 /// File offset to the start of the record.
2690 /// Compression bias as a negative integer, e.g. -100.
2693 /// Always observed as zero.
2696 /// Uncompressed size of each block, except possibly the last. Only
2697 /// `0x3ff000` has been observed so far.
2698 pub block_size: u32,
2700 /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
2701 pub blocks: Vec<ZBlock>,
2704 #[derive(Clone, Debug)]
2706 /// Offset of block of data if simple compression were used.
2707 pub uncompressed_ofs: u64,
2709 /// Actual offset within the file of the compressed data block.
2710 pub compressed_ofs: u64,
2712 /// The number of bytes in this data block after decompression. This is
2713 /// `block_size` in every data block but the last, which may be smaller.
2714 pub uncompressed_size: u32,
2716 /// The number of bytes in this data block, as stored compressed in this
2718 pub compressed_size: u32,
2722 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZBlock, Error> {
2724 uncompressed_ofs: endian.parse(read_bytes(r)?),
2725 compressed_ofs: endian.parse(read_bytes(r)?),
2726 uncompressed_size: endian.parse(read_bytes(r)?),
2727 compressed_size: endian.parse(read_bytes(r)?),
2733 fn read<R: Read + Seek>(
2738 ) -> Result<Option<ZTrailer>, Error> {
2739 let start_offset = reader.stream_position()?;
2740 if reader.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
2743 let int_bias = endian.parse(read_bytes(reader)?);
2744 let zero = endian.parse(read_bytes(reader)?);
2745 let block_size = endian.parse(read_bytes(reader)?);
2746 let n_blocks: u32 = endian.parse(read_bytes(reader)?);
2747 let expected_n_blocks = (ztrailer_len - 24) / 24;
2748 if n_blocks as u64 != expected_n_blocks {
2749 return Err(Error::BadZlibTrailerNBlocks {
2750 offset: ztrailer_ofs,
2756 let blocks = (0..n_blocks)
2757 .map(|_| ZBlock::read(reader, endian))
2758 .collect::<Result<Vec<_>, _>>()?;
2759 reader.seek(SeekFrom::Start(start_offset))?;
2761 offset: ztrailer_ofs,
2770 fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
2771 let mut buf = [0; N];
2772 let n = r.read(&mut buf)?;
2775 r.read_exact(&mut buf[n..])?;
2783 fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
2784 let mut buf = [0; N];
2785 r.read_exact(&mut buf)?;
2789 fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
2790 let mut vec = vec![0; n];
2791 r.read_exact(&mut vec)?;
2795 fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<RawString, IoError> {
2796 let length: u32 = endian.parse(read_bytes(r)?);
2797 Ok(read_vec(r, length as usize)?.into())
2800 #[derive(Clone, Debug)]
2801 pub struct LongStringValueLabels<N, S>
2808 /// `(value, label)` pairs, where each value is `width` bytes.
2809 pub labels: Vec<(S, S)>,
2812 impl LongStringValueLabels<RawString, RawString> {
2816 ) -> Result<LongStringValueLabels<Identifier, String>, Warning> {
2817 let var_name = decoder.decode(&self.var_name);
2818 let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
2819 .map_err(Warning::InvalidLongStringValueLabelName)?;
2821 let mut labels = Vec::with_capacity(self.labels.len());
2822 for (value, label) in self.labels.iter() {
2823 let value = decoder.decode_exact_length(&value.0).to_string();
2824 let label = decoder.decode(label).to_string();
2825 labels.push((value, label));
2828 Ok(LongStringValueLabels {
2836 #[derive(Clone, Debug)]
2837 pub struct LongStringValueLabelRecord<N, S>(pub Vec<LongStringValueLabels<N, S>>)
2842 impl ExtensionRecord for LongStringValueLabelRecord<RawString, RawString> {
2843 const SUBTYPE: u32 = 21;
2844 const SIZE: Option<u32> = Some(1);
2845 const COUNT: Option<u32> = None;
2846 const NAME: &'static str = "long string value labels record";
2848 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
2849 ext.check_size::<Self>()?;
2851 let mut input = &ext.data[..];
2852 let mut label_set = Vec::new();
2853 while !input.is_empty() {
2854 let var_name = read_string(&mut input, endian)?;
2855 let width: u32 = endian.parse(read_bytes(&mut input)?);
2856 let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
2857 let mut labels = Vec::new();
2858 for _ in 0..n_labels {
2859 let value = read_string(&mut input, endian)?;
2860 let label = read_string(&mut input, endian)?;
2861 labels.push((value, label));
2863 label_set.push(LongStringValueLabels {
2869 Ok(Record::LongStringValueLabels(LongStringValueLabelRecord(
2875 impl LongStringValueLabelRecord<RawString, RawString> {
2876 fn decode(self, decoder: &Decoder) -> LongStringValueLabelRecord<Identifier, String> {
2877 let mut labels = Vec::with_capacity(self.0.len());
2878 for label in &self.0 {
2879 match label.decode(decoder) {
2880 Ok(set) => labels.push(set),
2881 Err(error) => decoder.warn(error),
2884 LongStringValueLabelRecord(labels)