3 encoding::{default_encoding, get_encoding, Error as EncodingError},
4 endian::{Endian, Parse, ToBytes},
5 identifier::{Error as IdError, Identifier},
8 use encoding_rs::{mem::decode_latin1, DecoderResult, Encoding};
9 use flate2::read::ZlibDecoder;
15 collections::{HashMap, VecDeque},
16 fmt::{Debug, Display, Formatter, Result as FmtResult},
17 io::{Error as IoError, Read, Seek, SeekFrom},
24 use thiserror::Error as ThisError;
26 #[derive(ThisError, Debug)]
28 #[error("Not an SPSS system file")]
31 #[error("Invalid magic number {0:?}")]
34 #[error("I/O error ({0})")]
37 #[error("Invalid SAV compression code {0}")]
38 InvalidSavCompression(u32),
40 #[error("Invalid ZSAV compression code {0}")]
41 InvalidZsavCompression(u32),
43 #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
44 BadDocumentLength { offset: u64, n: usize, max: usize },
46 #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
47 BadRecordType { offset: u64, rec_type: u32 },
49 #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")]
50 BadVariableLabelCode {
57 "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
59 BadNumericMissingValueCode { offset: u64, code: i32 },
61 #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
62 BadStringMissingValueCode { offset: u64, code: i32 },
64 #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
65 BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
67 #[error("At offset {offset:#x}, following value label record, found record type {rec_type} instead of expected type 4 for variable index record")]
68 ExpectedVarIndexRecord { offset: u64, rec_type: u32 },
70 #[error("At offset {offset:#x}, number of variables indexes for value labels ({n}) is greater than the maximum number ({max}).")]
71 TooManyVarIndexes { offset: u64, n: u32, max: u32 },
73 #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
74 ExtensionRecordTooLarge {
81 #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
89 "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
91 EofInCompressedCase { offset: u64, case_ofs: u64 },
93 #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
94 PartialCompressedCase { offset: u64, case_ofs: u64 },
96 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
97 CompressedNumberExpected { offset: u64, case_ofs: u64 },
99 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
100 CompressedStringExpected { offset: u64, case_ofs: u64 },
102 #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
103 BadZlibTrailerNBlocks {
106 expected_n_blocks: u64,
111 EncodingError(EncodingError),
114 #[derive(ThisError, Debug)]
116 #[error("Unexpected end of data inside extension record.")]
119 #[error("At offset {offset:#x}, at least one valid variable index for value labels is required but none were specified.")]
120 NoVarIndexes { offset: u64 },
122 #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", var_type.opposite())]
126 wrong_types: Vec<u32>,
129 #[error("At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}]: {invalid:?}")]
136 #[error("At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}.")]
144 #[error("At offset {offset:#x}, {record} has bad count {count} instead of the expected {expected_count}.")]
152 #[error("In long string missing values record starting at offset {record_offset:#x}, value length at offset {offset:#x} is {value_len} instead of the expected 8.")]
153 BadLongMissingValueLength {
159 #[error("The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8.")]
160 BadEncodingName { offset: u64 },
162 // XXX This is risky because `text` might be arbitarily long.
163 #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
164 MalformedString { encoding: String, text: String },
166 #[error("Invalid variable measurement level value {0}")]
167 InvalidMeasurement(u32),
169 #[error("Invalid variable display alignment value {0}")]
170 InvalidAlignment(u32),
172 #[error("Invalid attribute name. {0}")]
173 InvalidAttributeName(IdError),
175 #[error("Invalid variable name in attribute record. {0}")]
176 InvalidAttributeVariableName(IdError),
178 #[error("Invalid short name in long variable name record. {0}")]
179 InvalidShortName(IdError),
181 #[error("Invalid name in long variable name record. {0}")]
182 InvalidLongName(IdError),
184 #[error("Invalid variable name in very long string record. {0}")]
185 InvalidLongStringName(IdError),
187 #[error("Invalid variable name in variable set record. {0}")]
188 InvalidVariableSetName(IdError),
190 #[error("Invalid multiple response set name. {0}")]
191 InvalidMrSetName(IdError),
193 #[error("Invalid multiple response set variable name. {0}")]
194 InvalidMrSetVariableName(IdError),
196 #[error("Invalid variable name in long string missing values record. {0}")]
197 InvalidLongStringMissingValueVariableName(IdError),
199 #[error("Invalid variable name in long string value label record. {0}")]
200 InvalidLongStringValueLabelName(IdError),
203 EncodingError(EncodingError),
205 #[error("Details TBD")]
209 impl From<IoError> for Warning {
210 fn from(_source: IoError) -> Self {
211 Self::UnexpectedEndOfData
215 #[derive(Clone, Debug)]
217 Header(HeaderRecord<RawString>),
218 Variable(VariableRecord<RawString, RawStr<8>>),
219 ValueLabel(ValueLabelRecord<RawStr<8>, RawString>),
220 Document(DocumentRecord<RawDocumentLine>),
221 IntegerInfo(IntegerInfoRecord),
222 FloatInfo(FloatInfoRecord),
223 VarDisplay(VarDisplayRecord),
224 MultipleResponse(MultipleResponseRecord<RawString, RawString>),
225 LongStringValueLabels(LongStringValueLabelRecord<RawString, RawString>),
226 LongStringMissingValues(LongStringMissingValueRecord<RawString, RawStr<8>>),
227 Encoding(EncodingRecord),
228 NumberOfCases(NumberOfCasesRecord),
230 OtherExtension(Extension),
234 Cases(Rc<RefCell<Cases>>),
237 #[derive(Clone, Debug)]
238 pub enum DecodedRecord<'a> {
239 Header(HeaderRecord<Cow<'a, str>>),
240 Variable(VariableRecord<Cow<'a, str>, String>),
241 ValueLabel(ValueLabelRecord<RawStr<8>, Cow<'a, str>>),
242 Document(DocumentRecord<Cow<'a, str>>),
243 IntegerInfo(IntegerInfoRecord),
244 FloatInfo(FloatInfoRecord),
245 VarDisplay(VarDisplayRecord),
246 MultipleResponse(MultipleResponseRecord<Identifier, Cow<'a, str>>),
247 LongStringValueLabels(LongStringValueLabelRecord<Identifier, Cow<'a, str>>),
248 LongStringMissingValues(LongStringMissingValueRecord<Identifier, String>),
249 Encoding(EncodingRecord),
250 NumberOfCases(NumberOfCasesRecord),
251 VariableSets(VariableSetRecord),
252 ProductInfo(ProductInfoRecord),
253 LongNames(LongNamesRecord),
254 VeryLongStrings(VeryLongStringsRecord),
255 FileAttributes(FileAttributeRecord),
256 VariableAttributes(VariableAttributeRecord),
257 OtherExtension(Extension),
267 var_types: &[VarType],
268 warn: &dyn Fn(Warning),
269 ) -> Result<Option<Record>, Error>
273 let rec_type: u32 = endian.parse(read_bytes(reader)?);
275 2 => Ok(Some(VariableRecord::read(reader, endian)?)),
276 3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
277 6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
278 7 => Extension::read(reader, endian, var_types.len(), warn),
279 999 => Ok(Some(Record::EndOfHeaders(
280 endian.parse(read_bytes(reader)?),
282 _ => Err(Error::BadRecordType {
283 offset: reader.stream_position()?,
289 pub fn decode<'a>(&'a self, decoder: &Decoder) -> Result<DecodedRecord<'a>, Error> {
291 Record::Header(record) => record.decode(decoder),
292 Record::Variable(record) => record.decode(decoder),
293 Record::ValueLabel(record) => DecodedRecord::ValueLabel(record.decode(decoder)),
294 Record::Document(record) => record.decode(decoder),
295 Record::IntegerInfo(record) => DecodedRecord::IntegerInfo(record.clone()),
296 Record::FloatInfo(record) => DecodedRecord::FloatInfo(record.clone()),
297 Record::VarDisplay(record) => DecodedRecord::VarDisplay(record.clone()),
298 Record::MultipleResponse(record) => record.decode(decoder),
299 Record::LongStringValueLabels(record) => {
300 DecodedRecord::LongStringValueLabels(record.decode(decoder))
302 Record::LongStringMissingValues(record) => {
303 DecodedRecord::LongStringMissingValues(record.decode(decoder))
305 Record::Encoding(record) => DecodedRecord::Encoding(record.clone()),
306 Record::NumberOfCases(record) => DecodedRecord::NumberOfCases(record.clone()),
307 Record::Text(record) => record.decode(decoder),
308 Record::OtherExtension(record) => DecodedRecord::OtherExtension(record.clone()),
309 Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(*record),
310 Record::ZHeader(record) => DecodedRecord::ZHeader(record.clone()),
311 Record::ZTrailer(record) => DecodedRecord::ZTrailer(record.clone()),
312 Record::Cases(_) => todo!(),
317 pub fn encoding_from_headers(
318 headers: &Vec<Record>,
319 warn: &impl Fn(Warning),
320 ) -> Result<&'static Encoding, Error> {
321 let mut encoding_record = None;
322 let mut integer_info_record = None;
323 for record in headers {
325 Record::Encoding(record) => encoding_record = Some(record),
326 Record::IntegerInfo(record) => integer_info_record = Some(record),
330 let encoding = encoding_record.map(|record| record.0.as_str());
331 let character_code = integer_info_record.map(|record| record.character_code);
332 match get_encoding(encoding, character_code) {
333 Ok(encoding) => Ok(encoding),
334 Err(err @ EncodingError::Ebcdic) => Err(Error::EncodingError(err)),
336 warn(Warning::EncodingError(err));
337 // Warn that we're using the default encoding.
338 Ok(default_encoding())
343 // If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
344 // decoded as Latin-1 (actually bytes interpreted as Unicode code points).
345 fn default_decode(s: &[u8]) -> Cow<str> {
346 from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
349 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
350 pub enum Compression {
356 fn offsets(&self) -> Range<u64>;
360 pub struct HeaderRecord<S>
365 pub offsets: Range<u64>,
370 /// Eye-catcher string, product name, in the file's encoding. Padded
371 /// on the right with spaces.
374 /// Layout code, normally either 2 or 3.
375 pub layout_code: u32,
377 /// Number of variable positions, or `None` if the value in the file is
378 /// questionably trustworthy.
379 pub nominal_case_size: Option<u32>,
381 /// Compression type, if any,
382 pub compression: Option<Compression>,
384 /// 1-based variable index of the weight variable, or `None` if the file is
386 pub weight_index: Option<u32>,
388 /// Claimed number of cases, if known.
389 pub n_cases: Option<u32>,
391 /// Compression bias, usually 100.0.
394 /// `dd mmm yy` in the file's encoding.
395 pub creation_date: S,
397 /// `HH:MM:SS` in the file's encoding.
398 pub creation_time: S,
400 /// File label, in the file's encoding. Padded on the right with spaces.
403 /// Endianness of the data in the file header.
407 impl<S> HeaderRecord<S>
411 fn debug_field<T>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult
415 writeln!(f, "{name:>17}: {:?}", value)
419 impl<S> Debug for HeaderRecord<S>
423 fn fmt(&self, f: &mut Formatter) -> FmtResult {
424 writeln!(f, "File header record:")?;
425 self.debug_field(f, "Magic", self.magic)?;
426 self.debug_field(f, "Product name", &self.eye_catcher)?;
427 self.debug_field(f, "Layout code", self.layout_code)?;
428 self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
429 self.debug_field(f, "Compression", self.compression)?;
430 self.debug_field(f, "Weight index", self.weight_index)?;
431 self.debug_field(f, "Number of cases", self.n_cases)?;
432 self.debug_field(f, "Compression bias", self.bias)?;
433 self.debug_field(f, "Creation date", &self.creation_date)?;
434 self.debug_field(f, "Creation time", &self.creation_time)?;
435 self.debug_field(f, "File label", &self.file_label)?;
436 self.debug_field(f, "Endianness", self.endian)
440 impl HeaderRecord<RawString> {
441 fn read<R: Read + Seek>(r: &mut R) -> Result<Self, Error> {
442 let start = r.stream_position()?;
444 let magic: [u8; 4] = read_bytes(r)?;
445 let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
447 let eye_catcher = RawString(read_vec(r, 60)?);
448 let layout_code: [u8; 4] = read_bytes(r)?;
449 let endian = Endian::identify_u32(2, layout_code)
450 .or_else(|| Endian::identify_u32(2, layout_code))
451 .ok_or_else(|| Error::NotASystemFile)?;
452 let layout_code = endian.parse(layout_code);
454 let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
455 let nominal_case_size =
456 (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
458 let compression_code: u32 = endian.parse(read_bytes(r)?);
459 let compression = match (magic, compression_code) {
460 (Magic::Zsav, 2) => Some(Compression::ZLib),
461 (Magic::Zsav, code) => return Err(Error::InvalidZsavCompression(code)),
463 (_, 1) => Some(Compression::Simple),
464 (_, code) => return Err(Error::InvalidSavCompression(code)),
467 let weight_index: u32 = endian.parse(read_bytes(r)?);
468 let weight_index = (weight_index > 0).then_some(weight_index);
470 let n_cases: u32 = endian.parse(read_bytes(r)?);
471 let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
473 let bias: f64 = endian.parse(read_bytes(r)?);
475 let creation_date = RawString(read_vec(r, 9)?);
476 let creation_time = RawString(read_vec(r, 8)?);
477 let file_label = RawString(read_vec(r, 64)?);
478 let _: [u8; 3] = read_bytes(r)?;
481 offsets: start..r.stream_position()?,
497 pub fn decode<'a>(&'a self, decoder: &Decoder) -> DecodedRecord<'a> {
498 let eye_catcher = decoder.decode(&self.eye_catcher);
499 let file_label = decoder.decode(&self.file_label);
500 let creation_date = decoder.decode(&self.creation_date);
501 let creation_time = decoder.decode(&self.creation_time);
502 DecodedRecord::Header(HeaderRecord {
504 weight_index: self.weight_index,
505 n_cases: self.n_cases,
507 offsets: self.offsets.clone(),
509 layout_code: self.layout_code,
510 nominal_case_size: self.nominal_case_size,
511 compression: self.compression,
521 pub encoding: &'static Encoding,
522 pub warn: Box<dyn Fn(Warning)>,
526 pub fn new<F>(encoding: &'static Encoding, warn: F) -> Self
528 F: Fn(Warning) + 'static,
532 warn: Box::new(warn),
535 fn warn(&self, warning: Warning) {
538 fn decode_slice<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
539 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
541 self.warn(Warning::MalformedString {
542 encoding: self.encoding.name().into(),
543 text: output.clone().into(),
549 fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> {
550 self.decode_slice(input.0.as_slice())
553 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
554 /// re-encoding the result back into `self.encoding` will have exactly the
555 /// same length in bytes.
557 /// XXX warn about errors?
558 pub fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
559 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
560 // This is the common case. Usually there will be no errors.
563 // Unusual case. Don't bother to optimize it much.
564 let mut decoder = self.encoding.new_decoder_without_bom_handling();
565 let mut output = String::with_capacity(
567 .max_utf8_buffer_length_without_replacement(input.len())
570 let mut rest = input;
571 while !rest.is_empty() {
572 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
573 (DecoderResult::InputEmpty, _) => break,
574 (DecoderResult::OutputFull, _) => unreachable!(),
575 (DecoderResult::Malformed(a, b), consumed) => {
576 let skipped = a as usize + b as usize;
577 output.extend(repeat('?').take(skipped));
578 rest = &rest[consumed..];
582 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
587 pub fn decode_identifier(&self, input: &RawString) -> Result<Identifier, IdError> {
588 self.new_identifier(&self.decode(input))
591 pub fn new_identifier(&self, name: &str) -> Result<Identifier, IdError> {
592 Identifier::new(name, self.encoding)
596 impl<S> Header for HeaderRecord<S>
600 fn offsets(&self) -> Range<u64> {
605 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
607 /// Regular system file.
610 /// System file with Zlib-compressed data.
613 /// EBCDIC-encoded system file.
618 /// Magic number for a regular system file.
619 pub const SAV: [u8; 4] = *b"$FL2";
621 /// Magic number for a system file that contains zlib-compressed data.
622 pub const ZSAV: [u8; 4] = *b"$FL3";
624 /// Magic number for an EBCDIC-encoded system file. This is `$FL2` encoded
626 pub const EBCDIC: [u8; 4] = [0x5b, 0xc6, 0xd3, 0xf2];
629 impl Debug for Magic {
630 fn fmt(&self, f: &mut Formatter) -> FmtResult {
631 let s = match *self {
632 Magic::Sav => "$FL2",
633 Magic::Zsav => "$FL3",
634 Magic::Ebcdic => "($FL2 in EBCDIC)",
640 impl TryFrom<[u8; 4]> for Magic {
643 fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
645 Magic::SAV => Ok(Magic::Sav),
646 Magic::ZSAV => Ok(Magic::Zsav),
647 Magic::EBCDIC => Ok(Magic::Ebcdic),
648 _ => Err(Error::BadMagic(value)),
653 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
660 pub fn from_width(width: VarWidth) -> VarType {
662 VarWidth::Numeric => Self::Numeric,
663 VarWidth::String(_) => Self::String,
667 pub fn opposite(self) -> VarType {
669 Self::Numeric => Self::String,
670 Self::String => Self::Numeric,
675 impl Display for VarType {
676 fn fmt(&self, f: &mut Formatter) -> FmtResult {
678 VarType::Numeric => write!(f, "numeric"),
679 VarType::String => write!(f, "string"),
684 #[derive(Copy, Clone)]
693 type RawValue = Value<RawStr<8>>;
695 impl<S> Debug for Value<S>
699 fn fmt(&self, f: &mut Formatter) -> FmtResult {
701 Value::Number(Some(number)) => write!(f, "{number:?}"),
702 Value::Number(None) => write!(f, "SYSMIS"),
703 Value::String(s) => write!(f, "{:?}", s),
709 fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Self, IoError> {
711 &UntypedValue(read_bytes(r)?),
717 pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Self {
719 VarType::String => Value::String(RawStr(raw.0)),
720 VarType::Numeric => {
721 let number: f64 = endian.parse(raw.0);
722 Value::Number((number != -f64::MAX).then_some(number))
727 fn read_case<R: Read + Seek>(
729 var_types: &[VarType],
731 ) -> Result<Option<Vec<Self>>, Error> {
732 let case_start = reader.stream_position()?;
733 let mut values = Vec::with_capacity(var_types.len());
734 for (i, &var_type) in var_types.iter().enumerate() {
735 let Some(raw) = try_read_bytes(reader)? else {
739 let offset = reader.stream_position()?;
740 return Err(Error::EofInCase {
742 case_ofs: offset - case_start,
743 case_len: var_types.len() * 8,
747 values.push(Value::from_raw(&UntypedValue(raw), var_type, endian));
752 fn read_compressed_case<R: Read + Seek>(
754 var_types: &[VarType],
755 codes: &mut VecDeque<u8>,
758 ) -> Result<Option<Vec<Self>>, Error> {
759 let case_start = reader.stream_position()?;
760 let mut values = Vec::with_capacity(var_types.len());
761 for (i, &var_type) in var_types.iter().enumerate() {
763 let Some(code) = codes.pop_front() else {
764 let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
768 let offset = reader.stream_position()?;
769 return Err(Error::EofInCompressedCase {
771 case_ofs: offset - case_start,
775 codes.extend(new_codes.into_iter());
780 1..=251 => match var_type {
781 VarType::Numeric => break Self::Number(Some(code as f64 - bias)),
783 break Self::String(RawStr(endian.to_bytes(code as f64 - bias)))
790 let offset = reader.stream_position()?;
791 return Err(Error::PartialCompressedCase {
793 case_ofs: offset - case_start,
798 break Self::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian)
800 254 => match var_type {
801 VarType::String => break Self::String(RawStr(*b" ")), // XXX EBCDIC
802 VarType::Numeric => {
803 return Err(Error::CompressedStringExpected {
805 case_ofs: reader.stream_position()? - case_start,
809 255 => match var_type {
810 VarType::Numeric => break Self::Number(None),
812 return Err(Error::CompressedNumberExpected {
814 case_ofs: reader.stream_position()? - case_start,
825 fn decode(&self, decoder: &Decoder) -> Value<String> {
827 Self::Number(x) => Value::Number(*x),
828 Self::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
833 struct ZlibDecodeMultiple<R>
837 reader: Option<ZlibDecoder<R>>,
840 impl<R> ZlibDecodeMultiple<R>
844 fn new(reader: R) -> ZlibDecodeMultiple<R> {
846 reader: Some(ZlibDecoder::new(reader)),
851 impl<R> Read for ZlibDecodeMultiple<R>
855 fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
857 match self.reader.as_mut().unwrap().read(buf)? {
859 let inner = self.reader.take().unwrap().into_inner();
860 self.reader = Some(ZlibDecoder::new(inner));
868 impl<R> Seek for ZlibDecodeMultiple<R>
872 fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
873 self.reader.as_mut().unwrap().get_mut().seek(pos)
882 ztrailer_offset: u64,
891 R: Read + Seek + 'static,
894 warn: Box<dyn Fn(Warning)>,
896 header: HeaderRecord<RawString>,
897 var_types: Vec<VarType>,
904 R: Read + Seek + 'static,
906 pub fn new<F>(mut reader: R, warn: F) -> Result<Self, Error>
908 F: Fn(Warning) + 'static,
910 let header = HeaderRecord::read(&mut reader)?;
912 reader: Some(reader),
913 warn: Box::new(warn),
915 var_types: Vec::new(),
916 state: ReaderState::Start,
919 fn cases(&mut self) -> Cases {
920 self.state = ReaderState::End;
922 self.reader.take().unwrap(),
923 take(&mut self.var_types),
927 fn _next(&mut self) -> Option<<Self as Iterator>::Item> {
929 ReaderState::Start => {
930 self.state = ReaderState::Headers;
931 Some(Ok(Record::Header(self.header.clone())))
933 ReaderState::Headers => {
936 self.reader.as_mut().unwrap(),
938 self.var_types.as_slice(),
941 Ok(Some(record)) => break record,
943 Err(error) => return Some(Err(error)),
947 Record::Variable(VariableRecord { width, .. }) => {
948 self.var_types.push(if width == 0 {
954 Record::EndOfHeaders(_) => {
955 self.state = if let Some(Compression::ZLib) = self.header.compression {
956 ReaderState::ZlibHeader
965 ReaderState::ZlibHeader => {
966 let zheader = match ZHeader::read(self.reader.as_mut().unwrap(), self.header.endian)
968 Ok(zheader) => zheader,
969 Err(error) => return Some(Err(error)),
971 self.state = ReaderState::ZlibTrailer {
972 ztrailer_offset: zheader.ztrailer_offset,
973 ztrailer_len: zheader.ztrailer_len,
975 Some(Ok(Record::ZHeader(zheader)))
977 ReaderState::ZlibTrailer {
981 match ZTrailer::read(
982 self.reader.as_mut().unwrap(),
987 Ok(None) => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
988 Ok(Some(ztrailer)) => Some(Ok(Record::ZTrailer(ztrailer))),
989 Err(error) => Some(Err(error)),
992 ReaderState::Cases => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
993 ReaderState::End => None,
998 impl<R> Iterator for Reader<R>
1000 R: Read + Seek + 'static,
1002 type Item = Result<Record, Error>;
1004 fn next(&mut self) -> Option<Self::Item> {
1005 let retval = self._next();
1006 if matches!(retval, Some(Err(_))) {
1007 self.state = ReaderState::End;
1013 trait ReadSeek: Read + Seek {}
1014 impl<T> ReadSeek for T where T: Read + Seek {}
1017 reader: Box<dyn ReadSeek>,
1018 var_types: Vec<VarType>,
1019 compression: Option<Compression>,
1022 codes: VecDeque<u8>,
1026 impl Debug for Cases {
1027 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1033 fn new<R>(reader: R, var_types: Vec<VarType>, header: &HeaderRecord<RawString>) -> Self
1035 R: Read + Seek + 'static,
1038 reader: if header.compression == Some(Compression::ZLib) {
1039 Box::new(ZlibDecodeMultiple::new(reader))
1044 compression: header.compression,
1046 endian: header.endian,
1047 codes: VecDeque::with_capacity(8),
1053 impl Iterator for Cases {
1054 type Item = Result<Vec<RawValue>, Error>;
1056 fn next(&mut self) -> Option<Self::Item> {
1061 let retval = if self.compression.is_some() {
1062 Value::read_compressed_case(
1071 Value::read_case(&mut self.reader, &self.var_types, self.endian).transpose()
1073 self.eof = matches!(retval, None | Some(Err(_)));
1078 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
1079 pub struct Spec(pub u32);
1081 impl Debug for Spec {
1082 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1083 let type_ = format_name(self.0 >> 16);
1084 let w = (self.0 >> 8) & 0xff;
1085 let d = self.0 & 0xff;
1086 write!(f, "{:06x} ({type_}{w}.{d})", self.0)
1090 fn format_name(type_: u32) -> Cow<'static, str> {
1129 _ => return format!("<unknown format {type_}>").into(),
1135 pub struct MissingValues<S = String>
1139 /// Individual missing values, up to 3 of them.
1140 pub values: Vec<Value<S>>,
1142 /// Optional range of missing values.
1143 pub range: Option<(Value<S>, Value<S>)>,
1146 impl<S> Debug for MissingValues<S>
1150 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1151 for (i, value) in self.values.iter().enumerate() {
1155 write!(f, "{value:?}")?;
1158 if let Some((low, high)) = &self.range {
1159 if !self.values.is_empty() {
1162 write!(f, "{low:?} THRU {high:?}")?;
1165 if self.is_empty() {
1173 impl<S> MissingValues<S>
1177 fn is_empty(&self) -> bool {
1178 self.values.is_empty() && self.range.is_none()
1182 impl<S> Default for MissingValues<S>
1186 fn default() -> Self {
1194 impl MissingValues<RawStr<8>> {
1195 fn read<R: Read + Seek>(
1201 ) -> Result<Self, Error> {
1202 let (n_values, has_range) = match (width, code) {
1203 (_, 0..=3) => (code, false),
1204 (0, -2) => (0, true),
1205 (0, -3) => (1, true),
1206 (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }),
1207 (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
1210 let var_type = if width == 0 {
1216 let mut values = Vec::new();
1217 for _ in 0..n_values {
1218 values.push(RawValue::read(r, var_type, endian)?);
1220 let range = if has_range {
1221 let low = RawValue::read(r, var_type, endian)?;
1222 let high = RawValue::read(r, var_type, endian)?;
1227 Ok(Self { values, range })
1229 fn decode(&self, decoder: &Decoder) -> MissingValues<String> {
1234 .map(|value| value.decode(decoder))
1239 .map(|(low, high)| (low.decode(decoder), high.decode(decoder))),
1245 pub struct VariableRecord<S, V>
1250 /// Range of offsets in file.
1251 pub offsets: Range<u64>,
1253 /// Variable width, in the range -1..=255.
1256 /// Variable name, padded on the right with spaces.
1260 pub print_format: Spec,
1263 pub write_format: Spec,
1266 pub missing_values: MissingValues<V>,
1268 /// Optional variable label.
1269 pub label: Option<S>,
1272 impl<S, V> Debug for VariableRecord<S, V>
1277 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1282 match self.width.cmp(&0) {
1283 Ordering::Greater => "string",
1284 Ordering::Equal => "numeric",
1285 Ordering::Less => "long string continuation record",
1288 writeln!(f, "Print format: {:?}", self.print_format)?;
1289 writeln!(f, "Write format: {:?}", self.write_format)?;
1290 writeln!(f, "Name: {:?}", &self.name)?;
1291 writeln!(f, "Variable label: {:?}", self.label)?;
1292 writeln!(f, "Missing values: {:?}", self.missing_values)
1296 impl VariableRecord<RawString, RawStr<8>> {
1297 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
1298 let start_offset = r.stream_position()?;
1299 let width: i32 = endian.parse(read_bytes(r)?);
1300 let code_offset = r.stream_position()?;
1301 let has_variable_label: u32 = endian.parse(read_bytes(r)?);
1302 let missing_value_code: i32 = endian.parse(read_bytes(r)?);
1303 let print_format = Spec(endian.parse(read_bytes(r)?));
1304 let write_format = Spec(endian.parse(read_bytes(r)?));
1305 let name = RawString(read_vec(r, 8)?);
1307 let label = match has_variable_label {
1310 let len: u32 = endian.parse(read_bytes(r)?);
1311 let read_len = len.min(65535) as usize;
1312 let label = RawString(read_vec(r, read_len)?);
1314 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
1315 let _ = read_vec(r, padding_bytes as usize)?;
1320 return Err(Error::BadVariableLabelCode {
1323 code: has_variable_label,
1328 let missing_values =
1329 MissingValues::read(r, start_offset, width, missing_value_code, endian)?;
1331 let end_offset = r.stream_position()?;
1333 Ok(Record::Variable(VariableRecord {
1334 offsets: start_offset..end_offset,
1344 pub fn decode(&self, decoder: &Decoder) -> DecodedRecord {
1345 DecodedRecord::Variable(VariableRecord {
1346 offsets: self.offsets.clone(),
1348 name: decoder.decode(&self.name),
1349 print_format: self.print_format,
1350 write_format: self.write_format,
1351 missing_values: self.missing_values.decode(decoder),
1352 label: self.label.as_ref().map(|label| decoder.decode(label)),
1357 #[derive(Copy, Clone)]
1358 pub struct UntypedValue(pub [u8; 8]);
1360 impl Debug for UntypedValue {
1361 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1362 let little: f64 = Endian::Little.parse(self.0);
1363 let little = format!("{:?}", little);
1364 let big: f64 = Endian::Big.parse(self.0);
1365 let big = format!("{:?}", big);
1366 let number = if little.len() <= big.len() {
1371 write!(f, "{number}")?;
1373 let string = default_decode(&self.0);
1375 .split(|c: char| c == '\0' || c.is_control())
1378 write!(f, "{string:?}")?;
1384 pub struct RawString(pub Vec<u8>);
1386 impl From<Vec<u8>> for RawString {
1387 fn from(source: Vec<u8>) -> Self {
1392 impl From<&[u8]> for RawString {
1393 fn from(source: &[u8]) -> Self {
1398 impl Debug for RawString {
1399 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1400 write!(f, "{:?}", default_decode(self.0.as_slice()))
1404 #[derive(Copy, Clone)]
1405 pub struct RawStr<const N: usize>(pub [u8; N]);
1407 impl<const N: usize> From<[u8; N]> for RawStr<N> {
1408 fn from(source: [u8; N]) -> Self {
1413 impl<const N: usize> Debug for RawStr<N> {
1414 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1415 write!(f, "{:?}", default_decode(&self.0))
1419 #[derive(Clone, Debug)]
1420 pub struct ValueLabel<V, S>
1425 pub value: Value<V>,
1430 pub struct ValueLabelRecord<V, S>
1435 /// Range of offsets in file.
1436 pub offsets: Range<u64>,
1439 pub labels: Vec<ValueLabel<V, S>>,
1441 /// The 1-based indexes of the variable indexes.
1442 pub dict_indexes: Vec<u32>,
1444 /// The types of the variables.
1445 pub var_type: VarType,
1448 impl<V, S> Debug for ValueLabelRecord<V, S>
1453 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1454 writeln!(f, "labels: ")?;
1455 for label in self.labels.iter() {
1456 writeln!(f, "{label:?}")?;
1458 write!(f, "apply to {} variables", self.var_type)?;
1459 for dict_index in self.dict_indexes.iter() {
1460 write!(f, " #{dict_index}")?;
1466 impl<V, S> Header for ValueLabelRecord<V, S>
1471 fn offsets(&self) -> Range<u64> {
1472 self.offsets.clone()
1476 impl<V, S> ValueLabelRecord<V, S>
1481 /// Maximum number of value labels in a record.
1482 pub const MAX_LABELS: u32 = u32::MAX / 8;
1484 /// Maximum number of variable indexes in a record.
1485 pub const MAX_INDEXES: u32 = u32::MAX / 8;
1488 impl ValueLabelRecord<RawStr<8>, RawString> {
1489 fn read<R: Read + Seek>(
1492 var_types: &[VarType],
1493 warn: &dyn Fn(Warning),
1494 ) -> Result<Option<Record>, Error> {
1495 let label_offset = r.stream_position()?;
1496 let n: u32 = endian.parse(read_bytes(r)?);
1497 if n > Self::MAX_LABELS {
1498 return Err(Error::BadNumberOfValueLabels {
1499 offset: label_offset,
1501 max: Self::MAX_LABELS,
1505 let mut labels = Vec::new();
1507 let value = UntypedValue(read_bytes(r)?);
1508 let label_len: u8 = endian.parse(read_bytes(r)?);
1509 let label_len = label_len as usize;
1510 let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
1512 let mut label = read_vec(r, padded_len - 1)?;
1513 label.truncate(label_len);
1514 labels.push((value, RawString(label)));
1517 let index_offset = r.stream_position()?;
1518 let rec_type: u32 = endian.parse(read_bytes(r)?);
1520 return Err(Error::ExpectedVarIndexRecord {
1521 offset: index_offset,
1526 let n: u32 = endian.parse(read_bytes(r)?);
1527 if n > Self::MAX_INDEXES {
1528 return Err(Error::TooManyVarIndexes {
1529 offset: index_offset,
1531 max: Self::MAX_INDEXES,
1535 let index_offset = r.stream_position()?;
1536 let mut dict_indexes = Vec::with_capacity(n as usize);
1537 let mut invalid_indexes = Vec::new();
1539 let index: u32 = endian.parse(read_bytes(r)?);
1540 if index == 0 || index as usize > var_types.len() {
1541 dict_indexes.push(index);
1543 invalid_indexes.push(index);
1546 if !invalid_indexes.is_empty() {
1547 warn(Warning::InvalidVarIndexes {
1548 offset: index_offset,
1549 max: var_types.len(),
1550 invalid: invalid_indexes,
1554 let Some(&first_index) = dict_indexes.first() else {
1555 warn(Warning::NoVarIndexes {
1556 offset: index_offset,
1560 let var_type = var_types[first_index as usize - 1];
1561 let mut wrong_type_indexes = Vec::new();
1562 dict_indexes.retain(|&index| {
1563 if var_types[index as usize - 1] != var_type {
1564 wrong_type_indexes.push(index);
1570 if !wrong_type_indexes.is_empty() {
1571 warn(Warning::MixedVarTypes {
1572 offset: index_offset,
1574 wrong_types: wrong_type_indexes,
1580 .map(|(value, label)| ValueLabel {
1581 value: Value::from_raw(&value, var_type, endian),
1586 let end_offset = r.stream_position()?;
1587 Ok(Some(Record::ValueLabel(ValueLabelRecord {
1588 offsets: label_offset..end_offset,
1595 fn decode<'a>(&'a self, decoder: &Decoder) -> ValueLabelRecord<RawStr<8>, Cow<'a, str>> {
1599 .map(|ValueLabel { value, label }| ValueLabel {
1601 label: decoder.decode(label),
1605 offsets: self.offsets.clone(),
1607 dict_indexes: self.dict_indexes.clone(),
1608 var_type: self.var_type,
1613 #[derive(Clone, Debug)]
1614 pub struct DocumentRecord<S>
1618 pub offsets: Range<u64>,
1620 /// The document, as an array of 80-byte lines.
1624 pub type RawDocumentLine = RawStr<DOC_LINE_LEN>;
1626 /// Length of a line in a document. Document lines are fixed-length and
1627 /// padded on the right with spaces.
1628 pub const DOC_LINE_LEN: usize = 80;
1630 impl DocumentRecord<RawDocumentLine> {
1631 /// Maximum number of lines we will accept in a document. This is simply
1632 /// the maximum number that will fit in a 32-bit space.
1633 pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
1635 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
1636 let start_offset = r.stream_position()?;
1637 let n: u32 = endian.parse(read_bytes(r)?);
1639 if n > Self::MAX_LINES {
1640 Err(Error::BadDocumentLength {
1641 offset: start_offset,
1643 max: Self::MAX_LINES,
1646 let mut lines = Vec::with_capacity(n);
1648 lines.push(RawStr(read_bytes(r)?));
1650 let end_offset = r.stream_position()?;
1651 Ok(Record::Document(DocumentRecord {
1652 offsets: start_offset..end_offset,
1658 pub fn decode(&self, decoder: &Decoder) -> DecodedRecord {
1659 DecodedRecord::Document(DocumentRecord {
1660 offsets: self.offsets.clone(),
1664 .map(|s| decoder.decode_slice(&s.0))
1670 impl<S> Header for DocumentRecord<S>
1674 fn offsets(&self) -> Range<u64> {
1675 self.offsets.clone()
1679 trait ExtensionRecord {
1681 const SIZE: Option<u32>;
1682 const COUNT: Option<u32>;
1683 const NAME: &'static str;
1684 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning>;
1687 #[derive(Clone, Debug)]
1688 pub struct IntegerInfoRecord {
1689 pub offsets: Range<u64>,
1690 pub version: (i32, i32, i32),
1691 pub machine_code: i32,
1692 pub floating_point_rep: i32,
1693 pub compression_code: i32,
1694 pub endianness: i32,
1695 pub character_code: i32,
1698 impl ExtensionRecord for IntegerInfoRecord {
1699 const SUBTYPE: u32 = 3;
1700 const SIZE: Option<u32> = Some(4);
1701 const COUNT: Option<u32> = Some(8);
1702 const NAME: &'static str = "integer record";
1704 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
1705 ext.check_size::<Self>()?;
1707 let mut input = &ext.data[..];
1708 let data: Vec<i32> = (0..8)
1709 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1711 Ok(Record::IntegerInfo(IntegerInfoRecord {
1712 offsets: ext.offsets.clone(),
1713 version: (data[0], data[1], data[2]),
1714 machine_code: data[3],
1715 floating_point_rep: data[4],
1716 compression_code: data[5],
1717 endianness: data[6],
1718 character_code: data[7],
1723 #[derive(Clone, Debug)]
1724 pub struct FloatInfoRecord {
1730 impl ExtensionRecord for FloatInfoRecord {
1731 const SUBTYPE: u32 = 4;
1732 const SIZE: Option<u32> = Some(8);
1733 const COUNT: Option<u32> = Some(3);
1734 const NAME: &'static str = "floating point record";
1736 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
1737 ext.check_size::<Self>()?;
1739 let mut input = &ext.data[..];
1740 let data: Vec<f64> = (0..3)
1741 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1743 Ok(Record::FloatInfo(FloatInfoRecord {
1751 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1752 pub enum CategoryLabels {
1757 #[derive(Clone, Debug)]
1758 pub enum MultipleResponseType {
1761 labels: CategoryLabels,
1766 impl MultipleResponseType {
1767 fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Warning> {
1768 let (mr_type, input) = match input.split_first() {
1769 Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input),
1770 Some((b'D', input)) => {
1771 let (value, input) = parse_counted_string(input)?;
1773 MultipleResponseType::MultipleDichotomy {
1775 labels: CategoryLabels::VarLabels,
1780 Some((b'E', input)) => {
1781 let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
1782 (CategoryLabels::CountedValues, rest)
1783 } else if let Some(rest) = input.strip_prefix(b" 11 ") {
1784 (CategoryLabels::VarLabels, rest)
1786 return Err(Warning::TBD);
1788 let (value, input) = parse_counted_string(input)?;
1790 MultipleResponseType::MultipleDichotomy { value, labels },
1794 _ => return Err(Warning::TBD),
1796 Ok((mr_type, input))
1800 #[derive(Clone, Debug)]
1801 pub struct MultipleResponseSet<I, S>
1808 pub mr_type: MultipleResponseType,
1809 pub short_names: Vec<I>,
1812 impl MultipleResponseSet<RawString, RawString> {
1813 fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> {
1814 let Some(equals) = input.iter().position(|&b| b == b'=') else {
1815 return Err(Warning::TBD);
1817 let (name, input) = input.split_at(equals);
1818 let (mr_type, input) = MultipleResponseType::parse(input)?;
1819 let Some(input) = input.strip_prefix(b" ") else {
1820 return Err(Warning::TBD);
1822 let (label, mut input) = parse_counted_string(input)?;
1823 let mut vars = Vec::new();
1824 while input.first() != Some(&b'\n') {
1825 match input.split_first() {
1826 Some((b' ', rest)) => {
1827 let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else {
1828 return Err(Warning::TBD);
1830 let (var, rest) = rest.split_at(length);
1831 if !var.is_empty() {
1832 vars.push(var.into());
1836 _ => return Err(Warning::TBD),
1839 while input.first() == Some(&b'\n') {
1840 input = &input[1..];
1843 MultipleResponseSet {
1856 ) -> Result<MultipleResponseSet<Identifier, Cow<'a, str>>, Warning> {
1857 let mut short_names = Vec::with_capacity(self.short_names.len());
1858 for short_name in self.short_names.iter() {
1859 if let Some(short_name) = decoder
1860 .decode_identifier(short_name)
1861 .map_err(Warning::InvalidMrSetName)
1862 .issue_warning(&decoder.warn)
1864 short_names.push(short_name);
1867 Ok(MultipleResponseSet {
1869 .decode_identifier(&self.name)
1870 .map_err(Warning::InvalidMrSetVariableName)?,
1871 label: decoder.decode(&self.label),
1872 mr_type: self.mr_type.clone(),
1878 #[derive(Clone, Debug)]
1879 pub struct MultipleResponseRecord<I, S>(pub Vec<MultipleResponseSet<I, S>>)
1884 impl ExtensionRecord for MultipleResponseRecord<RawString, RawString> {
1885 const SUBTYPE: u32 = 7;
1886 const SIZE: Option<u32> = Some(1);
1887 const COUNT: Option<u32> = None;
1888 const NAME: &'static str = "multiple response set record";
1890 fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
1891 ext.check_size::<Self>()?;
1893 let mut input = &ext.data[..];
1894 let mut sets = Vec::new();
1895 while !input.is_empty() {
1896 let (set, rest) = MultipleResponseSet::parse(input)?;
1900 Ok(Record::MultipleResponse(MultipleResponseRecord(sets)))
1904 impl MultipleResponseRecord<RawString, RawString> {
1905 fn decode(&self, decoder: &Decoder) -> DecodedRecord {
1906 let mut sets = Vec::new();
1907 for set in self.0.iter() {
1908 if let Some(set) = set.decode(decoder).issue_warning(&decoder.warn) {
1912 DecodedRecord::MultipleResponse(MultipleResponseRecord(sets))
1916 fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> {
1917 let Some(space) = input.iter().position(|&b| b == b' ') else {
1918 return Err(Warning::TBD);
1920 let Ok(length) = from_utf8(&input[..space]) else {
1921 return Err(Warning::TBD);
1923 let Ok(length): Result<usize, _> = length.parse() else {
1924 return Err(Warning::TBD);
1927 let input = &input[space + 1..];
1928 if input.len() < length {
1929 return Err(Warning::TBD);
1932 let (string, rest) = input.split_at(length);
1933 Ok((string.into(), rest))
1936 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1944 pub fn default_for_type(var_type: VarType) -> Option<Measure> {
1946 VarType::Numeric => None,
1947 VarType::String => Some(Self::Nominal),
1951 fn try_decode(source: u32) -> Result<Option<Measure>, Warning> {
1954 1 => Ok(Some(Measure::Nominal)),
1955 2 => Ok(Some(Measure::Ordinal)),
1956 3 => Ok(Some(Measure::Scale)),
1957 _ => Err(Warning::InvalidMeasurement(source)),
1962 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1963 pub enum Alignment {
1970 fn try_decode(source: u32) -> Result<Option<Alignment>, Warning> {
1973 1 => Ok(Some(Alignment::Left)),
1974 2 => Ok(Some(Alignment::Right)),
1975 3 => Ok(Some(Alignment::Center)),
1976 _ => Err(Warning::InvalidAlignment(source)),
1980 pub fn default_for_type(var_type: VarType) -> Self {
1982 VarType::Numeric => Self::Right,
1983 VarType::String => Self::Left,
1988 #[derive(Clone, Debug)]
1989 pub struct VarDisplay {
1990 pub measure: Option<Measure>,
1991 pub width: Option<u32>,
1992 pub alignment: Option<Alignment>,
1995 #[derive(Clone, Debug)]
1996 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
1998 impl VarDisplayRecord {
1999 const SUBTYPE: u32 = 11;
2005 warn: &dyn Fn(Warning),
2006 ) -> Result<Record, Warning> {
2008 return Err(Warning::BadRecordSize {
2009 offset: ext.offsets.start,
2010 record: String::from("variable display record"),
2016 let has_width = if ext.count as usize == 3 * n_vars {
2018 } else if ext.count as usize == 2 * n_vars {
2021 return Err(Warning::TBD);
2024 let mut var_displays = Vec::new();
2025 let mut input = &ext.data[..];
2026 for _ in 0..n_vars {
2027 let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
2028 .issue_warning(&warn)
2030 let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap()));
2031 let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
2032 .issue_warning(&warn)
2034 var_displays.push(VarDisplay {
2040 Ok(Record::VarDisplay(VarDisplayRecord(var_displays)))
2044 #[derive(Clone, Debug)]
2045 pub struct LongStringMissingValues<N, V>
2054 pub missing_values: MissingValues<V>,
2057 impl LongStringMissingValues<RawString, RawStr<8>> {
2061 ) -> Result<LongStringMissingValues<Identifier, String>, IdError> {
2062 Ok(LongStringMissingValues {
2063 var_name: decoder.decode_identifier(&self.var_name)?,
2064 missing_values: self.missing_values.decode(decoder),
2069 #[derive(Clone, Debug)]
2070 pub struct LongStringMissingValueRecord<N, V>(pub Vec<LongStringMissingValues<N, V>>)
2075 impl ExtensionRecord for LongStringMissingValueRecord<RawString, RawStr<8>> {
2076 const SUBTYPE: u32 = 22;
2077 const SIZE: Option<u32> = Some(1);
2078 const COUNT: Option<u32> = None;
2079 const NAME: &'static str = "long string missing values record";
2081 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
2082 ext.check_size::<Self>()?;
2084 let mut input = &ext.data[..];
2085 let mut missing_value_set = Vec::new();
2086 while !input.is_empty() {
2087 let var_name = read_string(&mut input, endian)?;
2088 let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
2089 let value_len: u32 = endian.parse(read_bytes(&mut input)?);
2091 let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
2092 return Err(Warning::BadLongMissingValueLength {
2093 record_offset: ext.offsets.start,
2098 let mut values = Vec::new();
2099 for i in 0..n_missing_values {
2100 let value: [u8; 8] = read_bytes(&mut input)?;
2101 let numeric_value: u64 = endian.parse(value);
2102 let value = if i > 0 && numeric_value == 8 {
2103 // Tolerate files written by old, buggy versions of PSPP
2104 // where we believed that the value_length was repeated
2105 // before each missing value.
2106 read_bytes(&mut input)?
2110 values.push(Value::String(RawStr(value)));
2112 let missing_values = MissingValues {
2116 missing_value_set.push(LongStringMissingValues {
2121 Ok(Record::LongStringMissingValues(
2122 LongStringMissingValueRecord(missing_value_set),
2127 impl LongStringMissingValueRecord<RawString, RawStr<8>> {
2128 pub fn decode(&self, decoder: &Decoder) -> LongStringMissingValueRecord<Identifier, String> {
2129 let mut mvs = Vec::with_capacity(self.0.len());
2130 for mv in self.0.iter() {
2131 if let Some(mv) = mv
2133 .map_err(Warning::InvalidLongStringMissingValueVariableName)
2134 .issue_warning(&decoder.warn)
2139 LongStringMissingValueRecord(mvs)
2143 #[derive(Clone, Debug)]
2144 pub struct EncodingRecord(pub String);
2146 impl ExtensionRecord for EncodingRecord {
2147 const SUBTYPE: u32 = 20;
2148 const SIZE: Option<u32> = Some(1);
2149 const COUNT: Option<u32> = None;
2150 const NAME: &'static str = "encoding record";
2152 fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
2153 ext.check_size::<Self>()?;
2155 Ok(Record::Encoding(EncodingRecord(
2156 String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName {
2157 offset: ext.offsets.start,
2163 #[derive(Clone, Debug)]
2164 pub struct NumberOfCasesRecord {
2165 /// Always observed as 1.
2168 /// Number of cases.
2172 impl ExtensionRecord for NumberOfCasesRecord {
2173 const SUBTYPE: u32 = 16;
2174 const SIZE: Option<u32> = Some(8);
2175 const COUNT: Option<u32> = Some(2);
2176 const NAME: &'static str = "extended number of cases record";
2178 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
2179 ext.check_size::<Self>()?;
2181 let mut input = &ext.data[..];
2182 let one = endian.parse(read_bytes(&mut input)?);
2183 let n_cases = endian.parse(read_bytes(&mut input)?);
2185 Ok(Record::NumberOfCases(NumberOfCasesRecord { one, n_cases }))
2189 #[derive(Clone, Debug)]
2190 pub struct TextRecord {
2191 pub offsets: Range<u64>,
2194 pub rec_type: TextRecordType,
2196 /// The text content of the record.
2197 pub text: RawString,
2200 #[derive(Clone, Copy, Debug)]
2201 pub enum TextRecordType {
2211 fn new(extension: Extension, rec_type: TextRecordType) -> Self {
2213 offsets: extension.offsets,
2215 text: extension.data.into(),
2218 pub fn decode(&self, decoder: &Decoder) -> DecodedRecord {
2219 match self.rec_type {
2220 TextRecordType::VariableSets => {
2221 DecodedRecord::VariableSets(VariableSetRecord::decode(self, decoder))
2223 TextRecordType::ProductInfo => {
2224 DecodedRecord::ProductInfo(ProductInfoRecord::decode(self, decoder))
2226 TextRecordType::LongNames => {
2227 DecodedRecord::LongNames(LongNamesRecord::decode(self, decoder))
2229 TextRecordType::VeryLongStrings => {
2230 DecodedRecord::VeryLongStrings(VeryLongStringsRecord::decode(self, decoder))
2232 TextRecordType::FileAttributes => {
2233 DecodedRecord::FileAttributes(FileAttributeRecord::decode(self, decoder))
2235 TextRecordType::VariableAttributes => {
2236 DecodedRecord::VariableAttributes(VariableAttributeRecord::decode(self, decoder))
2242 #[derive(Clone, Debug)]
2243 pub struct VeryLongString {
2244 pub short_name: Identifier,
2248 impl VeryLongString {
2249 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Warning> {
2250 let Some((short_name, length)) = input.split_once('=') else {
2251 return Err(Warning::TBD);
2253 let short_name = decoder
2254 .new_identifier(short_name)
2255 .map_err(Warning::InvalidLongStringName)?;
2256 let length = length.parse().map_err(|_| Warning::TBD)?;
2257 Ok(VeryLongString { short_name, length })
2261 #[derive(Clone, Debug)]
2262 pub struct VeryLongStringsRecord(Vec<VeryLongString>);
2264 impl VeryLongStringsRecord {
2265 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2266 let input = decoder.decode(&source.text);
2267 let mut very_long_strings = Vec::new();
2270 .map(|s| s.trim_end_matches('\t'))
2271 .filter(|s| !s.is_empty())
2273 if let Some(vls) = VeryLongString::parse(decoder, tuple).issue_warning(&decoder.warn) {
2274 very_long_strings.push(vls)
2277 VeryLongStringsRecord(very_long_strings)
2281 #[derive(Clone, Debug)]
2282 pub struct Attribute {
2283 pub name: Identifier,
2284 pub values: Vec<String>,
2288 fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> {
2289 let Some((name, mut input)) = input.split_once('(') else {
2290 return Err(Warning::TBD);
2293 .new_identifier(name)
2294 .map_err(Warning::InvalidAttributeName)?;
2295 let mut values = Vec::new();
2297 let Some((value, rest)) = input.split_once('\n') else {
2298 return Err(Warning::TBD);
2300 if let Some(stripped) = value
2302 .and_then(|value| value.strip_suffix('\''))
2304 values.push(stripped.into());
2306 decoder.warn(Warning::TBD);
2307 values.push(value.into());
2309 if let Some(rest) = rest.strip_prefix(')') {
2310 let attribute = Attribute { name, values };
2311 return Ok((attribute, rest));
2318 #[derive(Clone, Debug, Default)]
2319 pub struct AttributeSet(pub HashMap<Identifier, Vec<String>>);
2325 sentinel: Option<char>,
2326 ) -> Result<(AttributeSet, &'a str), Warning> {
2327 let mut attributes = HashMap::new();
2329 match input.chars().next() {
2330 None => break input,
2331 c if c == sentinel => break &input[1..],
2333 let (attribute, rest) = Attribute::parse(decoder, input)?;
2334 // XXX report duplicate name
2335 attributes.insert(attribute.name, attribute.values);
2340 Ok((AttributeSet(attributes), rest))
2344 #[derive(Clone, Debug, Default)]
2345 pub struct FileAttributeRecord(AttributeSet);
2347 impl FileAttributeRecord {
2348 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2349 let input = decoder.decode(&source.text);
2350 match AttributeSet::parse(decoder, &input, None).issue_warning(&decoder.warn) {
2351 Some((set, rest)) => {
2352 if !rest.is_empty() {
2353 decoder.warn(Warning::TBD);
2355 FileAttributeRecord(set)
2357 None => FileAttributeRecord::default(),
2362 #[derive(Clone, Debug)]
2363 pub struct VarAttributeSet {
2364 pub long_var_name: Identifier,
2365 pub attributes: AttributeSet,
2368 impl VarAttributeSet {
2369 fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(VarAttributeSet, &'a str), Warning> {
2370 let Some((long_var_name, rest)) = input.split_once(':') else {
2371 return Err(Warning::TBD);
2373 let long_var_name = decoder
2374 .new_identifier(long_var_name)
2375 .map_err(Warning::InvalidAttributeVariableName)?;
2376 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'))?;
2377 let var_attribute = VarAttributeSet {
2381 Ok((var_attribute, rest))
2385 #[derive(Clone, Debug)]
2386 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
2388 impl VariableAttributeRecord {
2389 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2390 let decoded = decoder.decode(&source.text);
2391 let mut input = decoded.as_ref();
2392 let mut var_attribute_sets = Vec::new();
2393 while !input.is_empty() {
2394 let Some((var_attribute, rest)) =
2395 VarAttributeSet::parse(decoder, input).issue_warning(&decoder.warn)
2399 var_attribute_sets.push(var_attribute);
2402 VariableAttributeRecord(var_attribute_sets)
2406 #[derive(Clone, Debug)]
2407 pub struct LongName {
2408 pub short_name: Identifier,
2409 pub long_name: Identifier,
2413 fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
2414 let Some((short_name, long_name)) = input.split_once('=') else {
2415 return Err(Warning::TBD);
2417 let short_name = decoder
2418 .new_identifier(short_name)
2419 .map_err(Warning::InvalidShortName)?;
2420 let long_name = decoder
2421 .new_identifier(long_name)
2422 .map_err(Warning::InvalidLongName)?;
2430 #[derive(Clone, Debug)]
2431 pub struct LongNamesRecord(Vec<LongName>);
2433 impl LongNamesRecord {
2434 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2435 let input = decoder.decode(&source.text);
2436 let mut names = Vec::new();
2437 for pair in input.split('\t').filter(|s| !s.is_empty()) {
2438 if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&decoder.warn) {
2439 names.push(long_name);
2442 LongNamesRecord(names)
2446 #[derive(Clone, Debug)]
2447 pub struct ProductInfoRecord(pub String);
2449 impl ProductInfoRecord {
2450 fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
2451 Self(decoder.decode(&source.text).into())
2454 #[derive(Clone, Debug)]
2455 pub struct VariableSet {
2457 pub vars: Vec<Identifier>,
2461 fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
2462 let (name, input) = input.split_once('=').ok_or(Warning::TBD)?;
2463 let mut vars = Vec::new();
2464 for var in input.split_ascii_whitespace() {
2465 if let Some(identifier) = decoder
2466 .new_identifier(var)
2467 .map_err(Warning::InvalidVariableSetName)
2468 .issue_warning(&decoder.warn)
2470 vars.push(identifier);
2480 #[derive(Clone, Debug)]
2481 pub struct VariableSetRecord {
2482 pub offsets: Range<u64>,
2483 pub sets: Vec<VariableSet>,
2486 impl VariableSetRecord {
2487 fn decode(source: &TextRecord, decoder: &Decoder) -> VariableSetRecord {
2488 let mut sets = Vec::new();
2489 let input = decoder.decode(&source.text);
2490 for line in input.lines() {
2491 if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&decoder.warn) {
2496 offsets: source.offsets.clone(),
2502 trait IssueWarning<T> {
2503 fn issue_warning<F>(self, warn: &F) -> Option<T>
2507 impl<T> IssueWarning<T> for Result<T, Warning> {
2508 fn issue_warning<F>(self, warn: &F) -> Option<T>
2513 Ok(result) => Some(result),
2522 #[derive(Clone, Debug)]
2523 pub struct Extension {
2524 pub offsets: Range<u64>,
2529 /// Size of each data element.
2532 /// Number of data elements.
2535 /// `size * count` bytes of data.
2540 fn check_size<E: ExtensionRecord>(&self) -> Result<(), Warning> {
2541 if let Some(expected_size) = E::SIZE {
2542 if self.size != expected_size {
2543 return Err(Warning::BadRecordSize {
2544 offset: self.offsets.start,
2545 record: E::NAME.into(),
2551 if let Some(expected_count) = E::COUNT {
2552 if self.count != expected_count {
2553 return Err(Warning::BadRecordCount {
2554 offset: self.offsets.start,
2555 record: E::NAME.into(),
2564 fn read<R: Read + Seek>(
2568 warn: &dyn Fn(Warning),
2569 ) -> Result<Option<Record>, Error> {
2570 let subtype = endian.parse(read_bytes(r)?);
2571 let header_offset = r.stream_position()?;
2572 let size: u32 = endian.parse(read_bytes(r)?);
2573 let count = endian.parse(read_bytes(r)?);
2574 let Some(product) = size.checked_mul(count) else {
2575 return Err(Error::ExtensionRecordTooLarge {
2576 offset: header_offset,
2582 let start_offset = r.stream_position()?;
2583 let data = read_vec(r, product as usize)?;
2584 let end_offset = start_offset + product as u64;
2585 let extension = Extension {
2586 offsets: start_offset..end_offset,
2592 let result = match subtype {
2593 IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian),
2594 FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian),
2595 VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, n_vars, endian, warn),
2596 MultipleResponseRecord::SUBTYPE | 19 => {
2597 MultipleResponseRecord::parse(&extension, endian)
2599 LongStringValueLabelRecord::SUBTYPE => {
2600 LongStringValueLabelRecord::parse(&extension, endian)
2602 EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian),
2603 NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian),
2604 5 => Ok(Record::Text(TextRecord::new(
2606 TextRecordType::VariableSets,
2608 10 => Ok(Record::Text(TextRecord::new(
2610 TextRecordType::ProductInfo,
2612 13 => Ok(Record::Text(TextRecord::new(
2614 TextRecordType::LongNames,
2616 14 => Ok(Record::Text(TextRecord::new(
2618 TextRecordType::VeryLongStrings,
2620 17 => Ok(Record::Text(TextRecord::new(
2622 TextRecordType::FileAttributes,
2624 18 => Ok(Record::Text(TextRecord::new(
2626 TextRecordType::VariableAttributes,
2628 _ => Ok(Record::OtherExtension(extension)),
2631 Ok(result) => Ok(Some(result)),
2640 #[derive(Clone, Debug)]
2641 pub struct ZHeader {
2642 /// File offset to the start of the record.
2645 /// File offset to the ZLIB data header.
2646 pub zheader_offset: u64,
2648 /// File offset to the ZLIB trailer.
2649 pub ztrailer_offset: u64,
2651 /// Length of the ZLIB trailer in bytes.
2652 pub ztrailer_len: u64,
2656 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
2657 let offset = r.stream_position()?;
2658 let zheader_offset: u64 = endian.parse(read_bytes(r)?);
2659 let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
2660 let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
2671 #[derive(Clone, Debug)]
2672 pub struct ZTrailer {
2673 /// File offset to the start of the record.
2676 /// Compression bias as a negative integer, e.g. -100.
2679 /// Always observed as zero.
2682 /// Uncompressed size of each block, except possibly the last. Only
2683 /// `0x3ff000` has been observed so far.
2684 pub block_size: u32,
2686 /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
2687 pub blocks: Vec<ZBlock>,
2690 #[derive(Clone, Debug)]
2692 /// Offset of block of data if simple compression were used.
2693 pub uncompressed_ofs: u64,
2695 /// Actual offset within the file of the compressed data block.
2696 pub compressed_ofs: u64,
2698 /// The number of bytes in this data block after decompression. This is
2699 /// `block_size` in every data block but the last, which may be smaller.
2700 pub uncompressed_size: u32,
2702 /// The number of bytes in this data block, as stored compressed in this
2704 pub compressed_size: u32,
2708 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZBlock, Error> {
2710 uncompressed_ofs: endian.parse(read_bytes(r)?),
2711 compressed_ofs: endian.parse(read_bytes(r)?),
2712 uncompressed_size: endian.parse(read_bytes(r)?),
2713 compressed_size: endian.parse(read_bytes(r)?),
2719 fn read<R: Read + Seek>(
2724 ) -> Result<Option<ZTrailer>, Error> {
2725 let start_offset = reader.stream_position()?;
2726 if reader.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
2729 let int_bias = endian.parse(read_bytes(reader)?);
2730 let zero = endian.parse(read_bytes(reader)?);
2731 let block_size = endian.parse(read_bytes(reader)?);
2732 let n_blocks: u32 = endian.parse(read_bytes(reader)?);
2733 let expected_n_blocks = (ztrailer_len - 24) / 24;
2734 if n_blocks as u64 != expected_n_blocks {
2735 return Err(Error::BadZlibTrailerNBlocks {
2736 offset: ztrailer_ofs,
2742 let blocks = (0..n_blocks)
2743 .map(|_| ZBlock::read(reader, endian))
2744 .collect::<Result<Vec<_>, _>>()?;
2745 reader.seek(SeekFrom::Start(start_offset))?;
2747 offset: ztrailer_ofs,
2756 fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
2757 let mut buf = [0; N];
2758 let n = r.read(&mut buf)?;
2761 r.read_exact(&mut buf[n..])?;
2769 fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
2770 let mut buf = [0; N];
2771 r.read_exact(&mut buf)?;
2775 fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
2776 let mut vec = vec![0; n];
2777 r.read_exact(&mut vec)?;
2781 fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<RawString, IoError> {
2782 let length: u32 = endian.parse(read_bytes(r)?);
2783 Ok(read_vec(r, length as usize)?.into())
2786 #[derive(Clone, Debug)]
2787 pub struct LongStringValueLabels<N, S>
2794 /// `(value, label)` pairs, where each value is `width` bytes.
2795 pub labels: Vec<(S, S)>,
2798 impl LongStringValueLabels<RawString, RawString> {
2802 ) -> Result<LongStringValueLabels<Identifier, Cow<'a, str>>, Warning> {
2803 let var_name = decoder.decode(&self.var_name);
2804 let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
2805 .map_err(Warning::InvalidLongStringValueLabelName)?;
2807 let mut labels = Vec::with_capacity(self.labels.len());
2808 for (value, label) in self.labels.iter() {
2809 let value = decoder.decode_exact_length(&value.0);
2810 let label = decoder.decode(label);
2811 labels.push((value, label));
2814 Ok(LongStringValueLabels {
2822 #[derive(Clone, Debug)]
2823 pub struct LongStringValueLabelRecord<N, S>(pub Vec<LongStringValueLabels<N, S>>)
2828 impl ExtensionRecord for LongStringValueLabelRecord<RawString, RawString> {
2829 const SUBTYPE: u32 = 21;
2830 const SIZE: Option<u32> = Some(1);
2831 const COUNT: Option<u32> = None;
2832 const NAME: &'static str = "long string value labels record";
2834 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
2835 ext.check_size::<Self>()?;
2837 let mut input = &ext.data[..];
2838 let mut label_set = Vec::new();
2839 while !input.is_empty() {
2840 let var_name = read_string(&mut input, endian)?;
2841 let width: u32 = endian.parse(read_bytes(&mut input)?);
2842 let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
2843 let mut labels = Vec::new();
2844 for _ in 0..n_labels {
2845 let value = read_string(&mut input, endian)?;
2846 let label = read_string(&mut input, endian)?;
2847 labels.push((value, label));
2849 label_set.push(LongStringValueLabels {
2855 Ok(Record::LongStringValueLabels(LongStringValueLabelRecord(
2861 impl LongStringValueLabelRecord<RawString, RawString> {
2865 ) -> LongStringValueLabelRecord<Identifier, Cow<'a, str>> {
2866 let mut labels = Vec::with_capacity(self.0.len());
2867 for label in &self.0 {
2868 match label.decode(decoder) {
2869 Ok(set) => labels.push(set),
2870 Err(error) => decoder.warn(error),
2873 LongStringValueLabelRecord(labels)