1 use crate::endian::{Endian, Parse, ToBytes};
3 use encoding_rs::{mem::decode_latin1, DecoderResult, Encoding};
4 use flate2::read::ZlibDecoder;
10 collections::VecDeque,
11 fmt::{Debug, Display, Formatter, Result as FmtResult},
12 io::{Error as IoError, Read, Seek, SeekFrom},
19 use thiserror::Error as ThisError;
21 #[derive(ThisError, Debug)]
23 #[error("Not an SPSS system file")]
26 #[error("Invalid magic number {0:?}")]
29 #[error("I/O error ({0})")]
32 #[error("Invalid SAV compression code {0}")]
33 InvalidSavCompression(u32),
35 #[error("Invalid ZSAV compression code {0}")]
36 InvalidZsavCompression(u32),
38 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
39 BadVariableWidth { offset: u64, width: i32 },
41 #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
42 BadDocumentLength { offset: u64, n: usize, max: usize },
44 #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
45 BadRecordType { offset: u64, rec_type: u32 },
47 #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")]
48 BadVariableLabelCode {
55 "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
57 BadNumericMissingValueCode { offset: u64, code: i32 },
59 #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
60 BadStringMissingValueCode { offset: u64, code: i32 },
62 #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
63 BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
65 #[error("At offset {offset:#x}, following value label record, found record type {rec_type} instead of expected type 4 for variable index record")]
66 ExpectedVarIndexRecord { offset: u64, rec_type: u32 },
68 #[error("At offset {offset:#x}, number of variables indexes for value labels ({n}) is greater than the maximum number ({max}).")]
69 TooManyVarIndexes { offset: u64, n: u32, max: u32 },
71 #[error("At offset {offset:#x}, at least one valid variable index for value labels is required but none were specified.")]
72 NoVarIndexes { offset: u64 },
74 #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", var_type.opposite())]
78 wrong_types: Vec<u32>,
81 #[error("At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}]: {invalid:?}")]
88 #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
89 ExtensionRecordTooLarge {
96 #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
104 "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
106 EofInCompressedCase { offset: u64, case_ofs: u64 },
108 #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
109 PartialCompressedCase { offset: u64, case_ofs: u64 },
111 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
112 CompressedNumberExpected { offset: u64, case_ofs: u64 },
114 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
115 CompressedStringExpected { offset: u64, case_ofs: u64 },
117 #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
118 BadZlibTrailerNBlocks {
121 expected_n_blocks: u64,
125 #[error("At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}.")]
133 #[error("At offset {offset:#x}, {record} has bad count {count} instead of the expected {expected_count}.")]
141 #[error("In long string missing values record starting at offset {record_offset:#x}, value length at offset {offset:#x} is {value_len} instead of the expected 8.")]
142 BadLongMissingValueLength {
148 #[error("The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8.")]
149 BadEncodingName { offset: u64 },
151 // XXX This is risky because `text` might be arbitarily long.
152 #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
153 MalformedString { encoding: String, text: String },
155 #[error("Details TBD")]
159 #[derive(Clone, Debug)]
161 Header(HeaderRecord<RawString>),
162 Variable(VariableRecord<RawString, RawStr<8>>),
163 ValueLabel(ValueLabelRecord<RawStr<8>, RawString>),
164 Document(DocumentRecord<RawDocumentLine>),
165 IntegerInfo(IntegerInfoRecord),
166 FloatInfo(FloatInfoRecord),
167 VariableSets(TextRecord),
168 VarDisplay(VarDisplayRecord),
169 MultipleResponse(MultipleResponseRecord),
170 LongStringValueLabels(LongStringValueLabelRecord),
171 LongStringMissingValues(LongStringMissingValueRecord),
172 Encoding(EncodingRecord),
173 NumberOfCases(NumberOfCasesRecord),
174 ProductInfo(TextRecord),
175 LongNames(TextRecord),
176 VeryLongStrings(TextRecord),
177 FileAttributes(TextRecord),
178 VariableAttributes(TextRecord),
179 OtherExtension(Extension),
183 Cases(Rc<RefCell<Cases>>),
190 var_types: &[VarType],
191 warn: &Box<dyn Fn(Error)>,
192 ) -> Result<Option<Record>, Error>
196 let rec_type: u32 = endian.parse(read_bytes(reader)?);
198 2 => Ok(Some(VariableRecord::read(reader, endian)?)),
199 3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
200 6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
201 7 => Extension::read(reader, endian, warn),
202 999 => Ok(Some(Record::EndOfHeaders(
203 endian.parse(read_bytes(reader)?),
205 _ => Err(Error::BadRecordType {
206 offset: reader.stream_position()?,
213 // If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
214 // decoded as Latin-1 (actually bytes interpreted as Unicode code points).
215 fn default_decode(s: &[u8]) -> Cow<str> {
216 from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
219 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
220 pub enum Compression {
226 fn offsets(&self) -> Range<u64>;
230 pub struct HeaderRecord<S>
235 pub offsets: Range<u64>,
240 /// Eye-catcher string, product name, in the file's encoding. Padded
241 /// on the right with spaces.
244 /// Layout code, normally either 2 or 3.
245 pub layout_code: u32,
247 /// Number of variable positions, or `None` if the value in the file is
248 /// questionably trustworthy.
249 pub nominal_case_size: Option<u32>,
251 /// Compression type, if any,
252 pub compression: Option<Compression>,
254 /// 1-based variable index of the weight variable, or `None` if the file is
256 pub weight_index: Option<u32>,
258 /// Claimed number of cases, if known.
259 pub n_cases: Option<u32>,
261 /// Compression bias, usually 100.0.
264 /// `dd mmm yy` in the file's encoding.
265 pub creation_date: S,
267 /// `HH:MM:SS` in the file's encoding.
268 pub creation_time: S,
270 /// File label, in the file's encoding. Padded on the right with spaces.
273 /// Endianness of the data in the file header.
277 impl<S> HeaderRecord<S>
281 fn debug_field<T>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult
285 writeln!(f, "{name:>17}: {:?}", value)
289 impl<S> Debug for HeaderRecord<S>
293 fn fmt(&self, f: &mut Formatter) -> FmtResult {
294 writeln!(f, "File header record:")?;
295 self.debug_field(f, "Magic", self.magic)?;
296 self.debug_field(f, "Product name", &self.eye_catcher)?;
297 self.debug_field(f, "Layout code", self.layout_code)?;
298 self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
299 self.debug_field(f, "Compression", self.compression)?;
300 self.debug_field(f, "Weight index", self.weight_index)?;
301 self.debug_field(f, "Number of cases", self.n_cases)?;
302 self.debug_field(f, "Compression bias", self.bias)?;
303 self.debug_field(f, "Creation date", &self.creation_date)?;
304 self.debug_field(f, "Creation time", &self.creation_time)?;
305 self.debug_field(f, "File label", &self.file_label)?;
306 self.debug_field(f, "Endianness", self.endian)
310 impl HeaderRecord<RawString> {
311 fn read<R: Read + Seek>(r: &mut R) -> Result<Self, Error> {
312 let start = r.stream_position()?;
314 let magic: [u8; 4] = read_bytes(r)?;
315 let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
317 let eye_catcher = RawString(read_vec(r, 60)?);
318 let layout_code: [u8; 4] = read_bytes(r)?;
319 let endian = Endian::identify_u32(2, layout_code)
320 .or_else(|| Endian::identify_u32(2, layout_code))
321 .ok_or_else(|| Error::NotASystemFile)?;
322 let layout_code = endian.parse(layout_code);
324 let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
325 let nominal_case_size =
326 (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
328 let compression_code: u32 = endian.parse(read_bytes(r)?);
329 let compression = match (magic, compression_code) {
330 (Magic::Zsav, 2) => Some(Compression::ZLib),
331 (Magic::Zsav, code) => return Err(Error::InvalidZsavCompression(code)),
333 (_, 1) => Some(Compression::Simple),
334 (_, code) => return Err(Error::InvalidSavCompression(code)),
337 let weight_index: u32 = endian.parse(read_bytes(r)?);
338 let weight_index = (weight_index > 0).then_some(weight_index);
340 let n_cases: u32 = endian.parse(read_bytes(r)?);
341 let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
343 let bias: f64 = endian.parse(read_bytes(r)?);
345 let creation_date = RawString(read_vec(r, 9)?);
346 let creation_time = RawString(read_vec(r, 8)?);
347 let file_label = RawString(read_vec(r, 64)?);
348 let _: [u8; 3] = read_bytes(r)?;
351 offsets: start..r.stream_position()?,
367 fn decode<'a>(&'a self, decoder: &Decoder) -> HeaderRecord<Cow<'a, str>> {
368 let eye_catcher = decoder.decode(&self.eye_catcher);
369 let file_label = decoder.decode(&self.file_label);
370 let creation_date = decoder.decode(&self.creation_date);
371 let creation_time = decoder.decode(&self.creation_time);
374 weight_index: self.weight_index,
375 n_cases: self.n_cases,
377 offsets: self.offsets.clone(),
379 layout_code: self.layout_code,
380 nominal_case_size: self.nominal_case_size,
381 compression: self.compression,
391 encoding: &'static Encoding,
392 warn: Box<dyn Fn(Error)>,
396 fn decode_slice<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
397 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
399 (self.warn)(Error::MalformedString {
400 encoding: self.encoding.name().into(),
401 text: output.clone().into(),
407 fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> {
408 self.decode_slice(input.0.as_slice())
411 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
412 /// re-encoding the result back into `self.encoding` will have exactly the
413 /// same length in bytes.
415 /// XXX warn about errors?
416 fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
417 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
418 // This is the common case. Usually there will be no errors.
421 // Unusual case. Don't bother to optimize it much.
422 let mut decoder = self.encoding.new_decoder_without_bom_handling();
423 let mut output = String::with_capacity(
425 .max_utf8_buffer_length_without_replacement(input.len())
428 let mut rest = input;
429 while !rest.is_empty() {
430 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
431 (DecoderResult::InputEmpty, _) => break,
432 (DecoderResult::OutputFull, _) => unreachable!(),
433 (DecoderResult::Malformed(a, b), consumed) => {
434 let skipped = a as usize + b as usize;
435 output.extend(repeat('?').take(skipped));
436 rest = &rest[consumed..];
440 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
446 impl<S> Header for HeaderRecord<S>
450 fn offsets(&self) -> Range<u64> {
455 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
457 /// Regular system file.
460 /// System file with Zlib-compressed data.
463 /// EBCDIC-encoded system file.
468 /// Magic number for a regular system file.
469 pub const SAV: [u8; 4] = *b"$FL2";
471 /// Magic number for a system file that contains zlib-compressed data.
472 pub const ZSAV: [u8; 4] = *b"$FL3";
474 /// Magic number for an EBCDIC-encoded system file. This is `$FL2` encoded
476 pub const EBCDIC: [u8; 4] = [0x5b, 0xc6, 0xd3, 0xf2];
479 impl Debug for Magic {
480 fn fmt(&self, f: &mut Formatter) -> FmtResult {
481 let s = match *self {
482 Magic::Sav => "$FL2",
483 Magic::Zsav => "$FL3",
484 Magic::Ebcdic => "($FL2 in EBCDIC)",
490 impl TryFrom<[u8; 4]> for Magic {
493 fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
495 Magic::SAV => Ok(Magic::Sav),
496 Magic::ZSAV => Ok(Magic::Zsav),
497 Magic::EBCDIC => Ok(Magic::Ebcdic),
498 _ => Err(Error::BadMagic(value)),
503 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
510 fn from_width(width: i32) -> VarType {
512 0 => VarType::Numeric,
513 _ => VarType::String,
517 fn opposite(self) -> VarType {
519 Self::Numeric => Self::String,
520 Self::String => Self::Numeric,
525 impl Display for VarType {
526 fn fmt(&self, f: &mut Formatter) -> FmtResult {
528 VarType::Numeric => write!(f, "numeric"),
529 VarType::String => write!(f, "string"),
534 #[derive(Copy, Clone)]
543 type RawValue = Value<RawStr<8>>;
545 impl<S> Debug for Value<S>
549 fn fmt(&self, f: &mut Formatter) -> FmtResult {
551 Value::Number(Some(number)) => write!(f, "{number:?}"),
552 Value::Number(None) => write!(f, "SYSMIS"),
553 Value::String(s) => write!(f, "{:?}", s),
559 fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Self, IoError> {
561 &UntypedValue(read_bytes(r)?),
567 pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Self {
569 VarType::String => Value::String(RawStr(raw.0)),
570 VarType::Numeric => {
571 let number: f64 = endian.parse(raw.0);
572 Value::Number((number != -f64::MAX).then_some(number))
577 fn read_case<R: Read + Seek>(
579 var_types: &[VarType],
581 ) -> Result<Option<Vec<Self>>, Error> {
582 let case_start = reader.stream_position()?;
583 let mut values = Vec::with_capacity(var_types.len());
584 for (i, &var_type) in var_types.iter().enumerate() {
585 let Some(raw) = try_read_bytes(reader)? else {
589 let offset = reader.stream_position()?;
590 return Err(Error::EofInCase {
592 case_ofs: offset - case_start,
593 case_len: var_types.len() * 8,
597 values.push(Value::from_raw(&UntypedValue(raw), var_type, endian));
602 fn read_compressed_case<R: Read + Seek>(
604 var_types: &[VarType],
605 codes: &mut VecDeque<u8>,
608 ) -> Result<Option<Vec<Self>>, Error> {
609 let case_start = reader.stream_position()?;
610 let mut values = Vec::with_capacity(var_types.len());
611 for (i, &var_type) in var_types.iter().enumerate() {
613 let Some(code) = codes.pop_front() else {
614 let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
618 let offset = reader.stream_position()?;
619 return Err(Error::EofInCompressedCase {
621 case_ofs: offset - case_start,
625 codes.extend(new_codes.into_iter());
630 1..=251 => match var_type {
631 VarType::Numeric => break Self::Number(Some(code as f64 - bias)),
633 break Self::String(RawStr(endian.to_bytes(code as f64 - bias)))
640 let offset = reader.stream_position()?;
641 return Err(Error::PartialCompressedCase {
643 case_ofs: offset - case_start,
648 break Self::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian)
650 254 => match var_type {
651 VarType::String => break Self::String(RawStr(*b" ")), // XXX EBCDIC
652 VarType::Numeric => {
653 return Err(Error::CompressedStringExpected {
655 case_ofs: reader.stream_position()? - case_start,
659 255 => match var_type {
660 VarType::Numeric => break Self::Number(None),
662 return Err(Error::CompressedNumberExpected {
664 case_ofs: reader.stream_position()? - case_start,
675 fn decode(&self, decoder: &Decoder) -> Value<String> {
677 Self::Number(x) => Value::Number(*x),
678 Self::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
683 struct ZlibDecodeMultiple<R>
687 reader: Option<ZlibDecoder<R>>,
690 impl<R> ZlibDecodeMultiple<R>
694 fn new(reader: R) -> ZlibDecodeMultiple<R> {
696 reader: Some(ZlibDecoder::new(reader)),
701 impl<R> Read for ZlibDecodeMultiple<R>
705 fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
707 match self.reader.as_mut().unwrap().read(buf)? {
709 let inner = self.reader.take().unwrap().into_inner();
710 self.reader = Some(ZlibDecoder::new(inner));
718 impl<R> Seek for ZlibDecodeMultiple<R>
722 fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
723 self.reader.as_mut().unwrap().get_mut().seek(pos)
732 ztrailer_offset: u64,
741 R: Read + Seek + 'static,
744 warn: Box<dyn Fn(Error)>,
746 header: HeaderRecord<RawString>,
747 var_types: Vec<VarType>,
754 R: Read + Seek + 'static,
756 pub fn new<F>(mut reader: R, warn: F) -> Result<Self, Error>
758 F: Fn(Error) + 'static,
760 let header = HeaderRecord::read(&mut reader)?;
762 reader: Some(reader),
763 warn: Box::new(warn),
765 var_types: Vec::new(),
766 state: ReaderState::Start,
769 fn cases(&mut self) -> Cases {
770 self.state = ReaderState::End;
772 self.reader.take().unwrap(),
773 take(&mut self.var_types),
779 impl<R> Iterator for Reader<R>
781 R: Read + Seek + 'static,
783 type Item = Result<Record, Error>;
785 fn next(&mut self) -> Option<Self::Item> {
787 ReaderState::Start => {
788 self.state = ReaderState::Headers;
789 Some(Ok(Record::Header(self.header.clone())))
791 ReaderState::Headers => {
794 self.reader.as_mut().unwrap(),
796 self.var_types.as_slice(),
799 Ok(Some(record)) => break record,
801 Err(error) => return Some(Err(error)),
805 Record::Variable(VariableRecord { width, .. }) => {
806 self.var_types.push(VarType::from_width(width));
808 Record::EndOfHeaders(_) => {
809 self.state = if let Some(Compression::ZLib) = self.header.compression {
810 ReaderState::ZlibHeader
819 ReaderState::ZlibHeader => {
820 let zheader = match ZHeader::read(self.reader.as_mut().unwrap(), self.header.endian)
822 Ok(zheader) => zheader,
823 Err(error) => return Some(Err(error)),
825 self.state = ReaderState::ZlibTrailer {
826 ztrailer_offset: zheader.ztrailer_offset,
827 ztrailer_len: zheader.ztrailer_len,
829 Some(Ok(Record::ZHeader(zheader)))
831 ReaderState::ZlibTrailer {
835 match ZTrailer::read(
836 self.reader.as_mut().unwrap(),
841 Ok(None) => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
842 Ok(Some(ztrailer)) => Some(Ok(Record::ZTrailer(ztrailer))),
843 Err(error) => Some(Err(error)),
846 ReaderState::Cases => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
847 ReaderState::End => None,
852 trait ReadSeek: Read + Seek {}
853 impl<T> ReadSeek for T where T: Read + Seek {}
856 reader: Box<dyn ReadSeek>,
857 var_types: Vec<VarType>,
858 compression: Option<Compression>,
865 impl Debug for Cases {
866 fn fmt(&self, f: &mut Formatter) -> FmtResult {
872 fn new<R>(reader: R, var_types: Vec<VarType>, header: &HeaderRecord<RawString>) -> Self
874 R: Read + Seek + 'static,
877 reader: if header.compression == Some(Compression::ZLib) {
878 Box::new(ZlibDecodeMultiple::new(reader))
883 compression: header.compression,
885 endian: header.endian,
886 codes: VecDeque::with_capacity(8),
892 impl Iterator for Cases {
893 type Item = Result<Vec<RawValue>, Error>;
895 fn next(&mut self) -> Option<Self::Item> {
900 let retval = if self.compression.is_some() {
901 Value::read_compressed_case(
910 Value::read_case(&mut self.reader, &self.var_types, self.endian).transpose()
912 self.eof = matches!(retval, None | Some(Err(_)));
917 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
918 pub struct Spec(pub u32);
920 impl Debug for Spec {
921 fn fmt(&self, f: &mut Formatter) -> FmtResult {
922 let type_ = format_name(self.0 >> 16);
923 let w = (self.0 >> 8) & 0xff;
924 let d = self.0 & 0xff;
925 write!(f, "{:06x} ({type_}{w}.{d})", self.0)
929 fn format_name(type_: u32) -> Cow<'static, str> {
968 _ => return format!("<unknown format {type_}>").into(),
974 pub struct MissingValues<S>
978 /// Individual missing values, up to 3 of them.
979 pub values: Vec<Value<S>>,
981 /// Optional range of missing values.
982 pub range: Option<(Value<S>, Value<S>)>,
985 impl<S> Debug for MissingValues<S>
989 fn fmt(&self, f: &mut Formatter) -> FmtResult {
990 for (i, value) in self.values.iter().enumerate() {
994 write!(f, "{value:?}")?;
997 if let Some((low, high)) = &self.range {
998 if !self.values.is_empty() {
1001 write!(f, "{low:?} THRU {high:?}")?;
1004 if self.is_empty() {
1012 impl<S> MissingValues<S>
1016 fn is_empty(&self) -> bool {
1017 self.values.is_empty() && self.range.is_none()
1021 impl MissingValues<RawStr<8>> {
1022 fn read<R: Read + Seek>(
1028 ) -> Result<Self, Error> {
1029 let (n_values, has_range) = match (width, code) {
1030 (_, 0..=3) => (code, false),
1031 (0, -2) => (0, true),
1032 (0, -3) => (1, true),
1033 (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }),
1034 (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
1037 let var_type = VarType::from_width(width);
1039 let mut values = Vec::new();
1040 for _ in 0..n_values {
1041 values.push(RawValue::read(r, var_type, endian)?);
1043 let range = if has_range {
1044 let low = RawValue::read(r, var_type, endian)?;
1045 let high = RawValue::read(r, var_type, endian)?;
1050 Ok(Self { values, range })
1052 fn decode<'a>(&'a self, decoder: &Decoder) -> MissingValues<String> {
1057 .map(|value| value.decode(decoder))
1062 .map(|(low, high)| (low.decode(decoder), high.decode(decoder))),
1068 pub struct VariableRecord<S, V>
1073 /// Range of offsets in file.
1074 pub offsets: Range<u64>,
1076 /// Variable width, in the range -1..=255.
1079 /// Variable name, padded on the right with spaces.
1083 pub print_format: Spec,
1086 pub write_format: Spec,
1089 pub missing_values: MissingValues<V>,
1091 /// Optional variable label.
1092 pub label: Option<S>,
1095 impl<S, V> Debug for VariableRecord<S, V>
1100 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1105 match self.width.cmp(&0) {
1106 Ordering::Greater => "string",
1107 Ordering::Equal => "numeric",
1108 Ordering::Less => "long string continuation record",
1111 writeln!(f, "Print format: {:?}", self.print_format)?;
1112 writeln!(f, "Write format: {:?}", self.write_format)?;
1113 writeln!(f, "Name: {:?}", &self.name)?;
1114 writeln!(f, "Variable label: {:?}", self.label)?;
1115 writeln!(f, "Missing values: {:?}", self.missing_values)
1119 impl VariableRecord<RawString, RawStr<8>> {
1120 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
1121 let start_offset = r.stream_position()?;
1122 let width: i32 = endian.parse(read_bytes(r)?);
1123 let code_offset = r.stream_position()?;
1124 let has_variable_label: u32 = endian.parse(read_bytes(r)?);
1125 let missing_value_code: i32 = endian.parse(read_bytes(r)?);
1126 let print_format = Spec(endian.parse(read_bytes(r)?));
1127 let write_format = Spec(endian.parse(read_bytes(r)?));
1128 let name = RawString(read_vec(r, 8)?);
1130 let label = match has_variable_label {
1133 let len: u32 = endian.parse(read_bytes(r)?);
1134 let read_len = len.min(65535) as usize;
1135 let label = RawString(read_vec(r, read_len)?);
1137 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
1138 let _ = read_vec(r, padding_bytes as usize)?;
1143 return Err(Error::BadVariableLabelCode {
1146 code: has_variable_label,
1151 let missing_values =
1152 MissingValues::read(r, start_offset, width, missing_value_code, endian)?;
1154 let end_offset = r.stream_position()?;
1156 Ok(Record::Variable(VariableRecord {
1157 offsets: start_offset..end_offset,
1167 fn decode<'a>(&'a self, decoder: &Decoder) -> VariableRecord<Cow<'a, str>, String> {
1169 offsets: self.offsets.clone(),
1171 name: decoder.decode(&self.name),
1172 print_format: self.print_format,
1173 write_format: self.write_format,
1174 missing_values: self.missing_values.decode(decoder),
1175 label: self.label.as_ref().map(|label| decoder.decode(label)),
1180 #[derive(Copy, Clone)]
1181 pub struct UntypedValue(pub [u8; 8]);
1183 impl Debug for UntypedValue {
1184 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1185 let little: f64 = Endian::Little.parse(self.0);
1186 let little = format!("{:?}", little);
1187 let big: f64 = Endian::Big.parse(self.0);
1188 let big = format!("{:?}", big);
1189 let number = if little.len() <= big.len() {
1194 write!(f, "{number}")?;
1196 let string = default_decode(&self.0);
1198 .split(|c: char| c == '\0' || c.is_control())
1201 write!(f, "{string:?}")?;
1207 pub struct RawString(pub Vec<u8>);
1209 impl From<Vec<u8>> for RawString {
1210 fn from(source: Vec<u8>) -> Self {
1215 impl From<&[u8]> for RawString {
1216 fn from(source: &[u8]) -> Self {
1221 impl Debug for RawString {
1222 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1223 write!(f, "{:?}", default_decode(self.0.as_slice()))
1227 #[derive(Copy, Clone)]
1228 pub struct RawStr<const N: usize>(pub [u8; N]);
1230 impl<const N: usize> From<[u8; N]> for RawStr<N> {
1231 fn from(source: [u8; N]) -> Self {
1236 impl<const N: usize> Debug for RawStr<N> {
1237 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1238 write!(f, "{:?}", default_decode(&self.0))
1242 #[derive(Clone, Debug)]
1243 pub struct ValueLabel<V, S>
1248 pub value: Value<V>,
1253 pub struct ValueLabelRecord<V, S>
1258 /// Range of offsets in file.
1259 pub offsets: Range<u64>,
1262 pub labels: Vec<ValueLabel<V, S>>,
1264 /// The 1-based indexes of the variable indexes.
1265 pub dict_indexes: Vec<u32>,
1267 /// The types of the variables.
1268 pub var_type: VarType,
1271 impl<V, S> Debug for ValueLabelRecord<V, S>
1276 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1277 writeln!(f, "labels: ")?;
1278 for label in self.labels.iter() {
1279 writeln!(f, "{label:?}")?;
1281 write!(f, "apply to {} variables", self.var_type)?;
1282 for dict_index in self.dict_indexes.iter() {
1283 write!(f, " #{dict_index}")?;
1289 impl<V, S> Header for ValueLabelRecord<V, S>
1294 fn offsets(&self) -> Range<u64> {
1295 self.offsets.clone()
1299 impl<V, S> ValueLabelRecord<V, S>
1304 /// Maximum number of value labels in a record.
1305 pub const MAX_LABELS: u32 = u32::MAX / 8;
1307 /// Maximum number of variable indexes in a record.
1308 pub const MAX_INDEXES: u32 = u32::MAX / 8;
1311 impl ValueLabelRecord<RawStr<8>, RawString> {
1312 fn read<R: Read + Seek>(
1315 var_types: &[VarType],
1316 warn: &Box<dyn Fn(Error)>,
1317 ) -> Result<Option<Record>, Error> {
1318 let label_offset = r.stream_position()?;
1319 let n: u32 = endian.parse(read_bytes(r)?);
1320 if n > Self::MAX_LABELS {
1321 return Err(Error::BadNumberOfValueLabels {
1322 offset: label_offset,
1324 max: Self::MAX_LABELS,
1328 let mut labels = Vec::new();
1330 let value = UntypedValue(read_bytes(r)?);
1331 let label_len: u8 = endian.parse(read_bytes(r)?);
1332 let label_len = label_len as usize;
1333 let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
1335 let mut label = read_vec(r, padded_len - 1)?;
1336 label.truncate(label_len);
1337 labels.push((value, RawString(label)));
1340 let index_offset = r.stream_position()?;
1341 let rec_type: u32 = endian.parse(read_bytes(r)?);
1343 return Err(Error::ExpectedVarIndexRecord {
1344 offset: index_offset,
1349 let n: u32 = endian.parse(read_bytes(r)?);
1350 if n > Self::MAX_INDEXES {
1351 return Err(Error::TooManyVarIndexes {
1352 offset: index_offset,
1354 max: Self::MAX_INDEXES,
1358 let index_offset = r.stream_position()?;
1359 let mut dict_indexes = Vec::with_capacity(n as usize);
1360 let mut invalid_indexes = Vec::new();
1362 let index: u32 = endian.parse(read_bytes(r)?);
1363 if index == 0 || index as usize > var_types.len() {
1364 dict_indexes.push(index);
1366 invalid_indexes.push(index);
1369 if !invalid_indexes.is_empty() {
1370 warn(Error::InvalidVarIndexes {
1371 offset: index_offset,
1372 max: var_types.len(),
1373 invalid: invalid_indexes,
1377 let Some(&first_index) = dict_indexes.first() else {
1378 warn(Error::NoVarIndexes {
1379 offset: index_offset,
1383 let var_type = var_types[first_index as usize - 1];
1384 let mut wrong_type_indexes = Vec::new();
1385 dict_indexes.retain(|&index| {
1386 if var_types[index as usize - 1] != var_type {
1387 wrong_type_indexes.push(index);
1393 if !wrong_type_indexes.is_empty() {
1394 warn(Error::MixedVarTypes {
1395 offset: index_offset,
1397 wrong_types: wrong_type_indexes,
1403 .map(|(value, label)| ValueLabel {
1404 value: Value::from_raw(&value, var_type, endian),
1409 let end_offset = r.stream_position()?;
1410 Ok(Some(Record::ValueLabel(ValueLabelRecord {
1411 offsets: label_offset..end_offset,
1419 #[derive(Clone, Debug)]
1420 pub struct DocumentRecord<S>
1424 pub offsets: Range<u64>,
1426 /// The document, as an array of 80-byte lines.
1430 pub type RawDocumentLine = RawStr<DOC_LINE_LEN>;
1432 /// Length of a line in a document. Document lines are fixed-length and
1433 /// padded on the right with spaces.
1434 pub const DOC_LINE_LEN: usize = 80;
1436 impl DocumentRecord<RawDocumentLine> {
1437 /// Maximum number of lines we will accept in a document. This is simply
1438 /// the maximum number that will fit in a 32-bit space.
1439 pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
1441 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
1442 let start_offset = r.stream_position()?;
1443 let n: u32 = endian.parse(read_bytes(r)?);
1445 if n > Self::MAX_LINES {
1446 Err(Error::BadDocumentLength {
1447 offset: start_offset,
1449 max: Self::MAX_LINES,
1452 let mut lines = Vec::with_capacity(n);
1454 lines.push(RawStr(read_bytes(r)?));
1456 let end_offset = r.stream_position()?;
1457 Ok(Record::Document(DocumentRecord {
1458 offsets: start_offset..end_offset,
1464 fn decode<'a>(&'a self, decoder: &Decoder) -> DocumentRecord<Cow<'a, str>> {
1466 offsets: self.offsets.clone(),
1467 lines: self.lines.iter().map(|s| decoder.decode_slice(&s.0)).collect(),
1472 impl<S> Header for DocumentRecord<S>
1476 fn offsets(&self) -> Range<u64> {
1477 self.offsets.clone()
1481 trait ExtensionRecord {
1483 const SIZE: Option<u32>;
1484 const COUNT: Option<u32>;
1485 const NAME: &'static str;
1486 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error>;
1489 #[derive(Clone, Debug)]
1490 pub struct IntegerInfoRecord {
1491 pub offsets: Range<u64>,
1492 pub version: (i32, i32, i32),
1493 pub machine_code: i32,
1494 pub floating_point_rep: i32,
1495 pub compression_code: i32,
1496 pub endianness: i32,
1497 pub character_code: i32,
1500 impl ExtensionRecord for IntegerInfoRecord {
1501 const SUBTYPE: u32 = 3;
1502 const SIZE: Option<u32> = Some(4);
1503 const COUNT: Option<u32> = Some(8);
1504 const NAME: &'static str = "integer record";
1506 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
1507 ext.check_size::<Self>()?;
1509 let mut input = &ext.data[..];
1510 let data: Vec<i32> = (0..8)
1511 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1513 Ok(Record::IntegerInfo(IntegerInfoRecord {
1514 offsets: ext.offsets.clone(),
1515 version: (data[0], data[1], data[2]),
1516 machine_code: data[3],
1517 floating_point_rep: data[4],
1518 compression_code: data[5],
1519 endianness: data[6],
1520 character_code: data[7],
1525 #[derive(Clone, Debug)]
1526 pub struct FloatInfoRecord {
1532 impl ExtensionRecord for FloatInfoRecord {
1533 const SUBTYPE: u32 = 4;
1534 const SIZE: Option<u32> = Some(8);
1535 const COUNT: Option<u32> = Some(3);
1536 const NAME: &'static str = "floating point record";
1538 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
1539 ext.check_size::<Self>()?;
1541 let mut input = &ext.data[..];
1542 let data: Vec<f64> = (0..3)
1543 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1545 Ok(Record::FloatInfo(FloatInfoRecord {
1553 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1554 pub enum CategoryLabels {
1559 #[derive(Clone, Debug)]
1560 pub enum MultipleResponseType {
1563 labels: CategoryLabels,
1568 impl MultipleResponseType {
1569 fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Error> {
1570 let (mr_type, input) = match input.split_first() {
1571 Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input),
1572 Some((b'D', input)) => {
1573 let (value, input) = parse_counted_string(input)?;
1575 MultipleResponseType::MultipleDichotomy {
1577 labels: CategoryLabels::VarLabels,
1582 Some((b'E', input)) => {
1583 let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
1584 (CategoryLabels::CountedValues, rest)
1585 } else if let Some(rest) = input.strip_prefix(b" 11 ") {
1586 (CategoryLabels::VarLabels, rest)
1588 return Err(Error::TBD);
1590 let (value, input) = parse_counted_string(input)?;
1592 MultipleResponseType::MultipleDichotomy { value, labels },
1596 _ => return Err(Error::TBD),
1598 Ok((mr_type, input))
1602 #[derive(Clone, Debug)]
1603 pub struct MultipleResponseSet {
1604 pub name: RawString,
1605 pub label: RawString,
1606 pub mr_type: MultipleResponseType,
1607 pub short_names: Vec<RawString>,
1610 impl MultipleResponseSet {
1611 fn parse(input: &[u8]) -> Result<(MultipleResponseSet, &[u8]), Error> {
1612 let Some(equals) = input.iter().position(|&b| b == b'=') else {
1613 return Err(Error::TBD);
1615 let (name, input) = input.split_at(equals);
1616 let (mr_type, input) = MultipleResponseType::parse(input)?;
1617 let Some(input) = input.strip_prefix(b" ") else {
1618 return Err(Error::TBD);
1620 let (label, mut input) = parse_counted_string(input)?;
1621 let mut vars = Vec::new();
1622 while input.first() != Some(&b'\n') {
1623 match input.split_first() {
1624 Some((b' ', rest)) => {
1625 let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else {
1626 return Err(Error::TBD);
1628 let (var, rest) = rest.split_at(length);
1629 if !var.is_empty() {
1630 vars.push(var.into());
1634 _ => return Err(Error::TBD),
1637 while input.first() == Some(&b'\n') {
1638 input = &input[1..];
1641 MultipleResponseSet {
1652 #[derive(Clone, Debug)]
1653 pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
1655 impl ExtensionRecord for MultipleResponseRecord {
1656 const SUBTYPE: u32 = 7;
1657 const SIZE: Option<u32> = Some(1);
1658 const COUNT: Option<u32> = None;
1659 const NAME: &'static str = "multiple response set record";
1661 fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Error> {
1662 ext.check_size::<Self>()?;
1664 let mut input = &ext.data[..];
1665 let mut sets = Vec::new();
1666 while !input.is_empty() {
1667 let (set, rest) = MultipleResponseSet::parse(input)?;
1671 Ok(Record::MultipleResponse(MultipleResponseRecord(sets)))
1675 fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Error> {
1676 let Some(space) = input.iter().position(|&b| b == b' ') else {
1677 return Err(Error::TBD);
1679 let Ok(length) = from_utf8(&input[..space]) else {
1680 return Err(Error::TBD);
1682 let Ok(length): Result<usize, _> = length.parse() else {
1683 return Err(Error::TBD);
1686 let input = &input[space + 1..];
1687 if input.len() < length {
1688 return Err(Error::TBD);
1691 let (string, rest) = input.split_at(length);
1692 Ok((string.into(), rest))
1695 #[derive(Clone, Debug)]
1696 pub struct VarDisplayRecord(pub Vec<u32>);
1698 impl ExtensionRecord for VarDisplayRecord {
1699 const SUBTYPE: u32 = 11;
1700 const SIZE: Option<u32> = Some(4);
1701 const COUNT: Option<u32> = None;
1702 const NAME: &'static str = "variable display record";
1704 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
1705 ext.check_size::<Self>()?;
1707 let mut input = &ext.data[..];
1708 let display = (0..ext.count)
1709 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1711 Ok(Record::VarDisplay(VarDisplayRecord(display)))
1715 #[derive(Clone, Debug)]
1716 pub struct LongStringMissingValues {
1718 pub var_name: RawString,
1721 pub missing_values: MissingValues<RawStr<8>>,
1724 #[derive(Clone, Debug)]
1725 pub struct LongStringMissingValueRecord(pub Vec<LongStringMissingValues>);
1727 impl ExtensionRecord for LongStringMissingValueRecord {
1728 const SUBTYPE: u32 = 22;
1729 const SIZE: Option<u32> = Some(1);
1730 const COUNT: Option<u32> = None;
1731 const NAME: &'static str = "long string missing values record";
1733 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
1734 ext.check_size::<Self>()?;
1736 let mut input = &ext.data[..];
1737 let mut missing_value_set = Vec::new();
1738 while !input.is_empty() {
1739 let var_name = read_string(&mut input, endian)?;
1740 let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
1741 let value_len: u32 = endian.parse(read_bytes(&mut input)?);
1743 let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
1744 return Err(Error::BadLongMissingValueLength {
1745 record_offset: ext.offsets.start,
1750 let mut values = Vec::new();
1751 for i in 0..n_missing_values {
1752 let value: [u8; 8] = read_bytes(&mut input)?;
1753 let numeric_value: u64 = endian.parse(value);
1754 let value = if i > 0 && numeric_value == 8 {
1755 // Tolerate files written by old, buggy versions of PSPP
1756 // where we believed that the value_length was repeated
1757 // before each missing value.
1758 read_bytes(&mut input)?
1762 values.push(Value::String(RawStr(value)));
1764 let missing_values = MissingValues {
1768 missing_value_set.push(LongStringMissingValues {
1773 Ok(Record::LongStringMissingValues(
1774 LongStringMissingValueRecord(missing_value_set),
1779 #[derive(Clone, Debug)]
1780 pub struct EncodingRecord(pub String);
1782 impl ExtensionRecord for EncodingRecord {
1783 const SUBTYPE: u32 = 20;
1784 const SIZE: Option<u32> = Some(1);
1785 const COUNT: Option<u32> = None;
1786 const NAME: &'static str = "encoding record";
1788 fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Error> {
1789 ext.check_size::<Self>()?;
1791 Ok(Record::Encoding(EncodingRecord(
1792 String::from_utf8(ext.data.clone()).map_err(|_| Error::BadEncodingName {
1793 offset: ext.offsets.start,
1799 #[derive(Copy, Clone, Debug)]
1800 pub struct NumberOfCasesRecord {
1801 /// Always observed as 1.
1804 /// Number of cases.
1808 impl ExtensionRecord for NumberOfCasesRecord {
1809 const SUBTYPE: u32 = 16;
1810 const SIZE: Option<u32> = Some(8);
1811 const COUNT: Option<u32> = Some(2);
1812 const NAME: &'static str = "extended number of cases record";
1814 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
1815 ext.check_size::<Self>()?;
1817 let mut input = &ext.data[..];
1818 let one = endian.parse(read_bytes(&mut input)?);
1819 let n_cases = endian.parse(read_bytes(&mut input)?);
1821 Ok(Record::NumberOfCases(NumberOfCasesRecord { one, n_cases }))
1825 #[derive(Clone, Debug)]
1826 pub struct TextRecord {
1827 pub offsets: Range<u64>,
1829 /// The text content of the record.
1830 pub text: RawString,
1833 impl From<Extension> for TextRecord {
1834 fn from(source: Extension) -> Self {
1836 offsets: source.offsets,
1837 text: source.data.into(),
1842 #[derive(Clone, Debug)]
1843 pub struct VariableSet {
1845 pub vars: Vec<String>,
1849 fn parse(input: &str) -> Result<Self, Error> {
1850 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
1851 let vars = input.split_ascii_whitespace().map(String::from).collect();
1859 #[derive(Clone, Debug)]
1860 pub struct VariableSetRecord {
1861 pub offsets: Range<u64>,
1862 pub sets: Vec<VariableSet>
1865 impl VariableSetRecord {
1866 fn decode<'a>(source: &TextRecord, decoder: &Decoder) -> VariableSetRecord {
1867 let mut sets = Vec::new();
1868 let input = decoder.decode(&source.text);
1869 for line in input.lines() {
1870 if let Some(set) = VariableSet::parse(line).warn_on_error(&decoder.warn) {
1874 VariableSetRecord { offsets: source.offsets.clone(), sets }
1878 trait WarnOnError<T> {
1879 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
1881 impl<T> WarnOnError<T> for Result<T, Error> {
1882 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
1884 Ok(result) => Some(result),
1893 #[derive(Clone, Debug)]
1894 pub struct Extension {
1895 pub offsets: Range<u64>,
1900 /// Size of each data element.
1903 /// Number of data elements.
1906 /// `size * count` bytes of data.
1911 fn check_size<E: ExtensionRecord>(&self) -> Result<(), Error> {
1912 if let Some(expected_size) = E::SIZE {
1913 if self.size != expected_size {
1914 return Err(Error::BadRecordSize {
1915 offset: self.offsets.start,
1916 record: E::NAME.into(),
1922 if let Some(expected_count) = E::COUNT {
1923 if self.count != expected_count {
1924 return Err(Error::BadRecordCount {
1925 offset: self.offsets.start,
1926 record: E::NAME.into(),
1935 fn read<R: Read + Seek>(
1938 warn: &Box<dyn Fn(Error)>,
1939 ) -> Result<Option<Record>, Error> {
1940 let subtype = endian.parse(read_bytes(r)?);
1941 let header_offset = r.stream_position()?;
1942 let size: u32 = endian.parse(read_bytes(r)?);
1943 let count = endian.parse(read_bytes(r)?);
1944 let Some(product) = size.checked_mul(count) else {
1945 return Err(Error::ExtensionRecordTooLarge {
1946 offset: header_offset,
1952 let start_offset = r.stream_position()?;
1953 let data = read_vec(r, product as usize)?;
1954 let end_offset = start_offset + product as u64;
1955 let extension = Extension {
1956 offsets: start_offset..end_offset,
1962 let result = match subtype {
1963 IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian),
1964 FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian),
1965 VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, endian),
1966 MultipleResponseRecord::SUBTYPE | 19 => {
1967 MultipleResponseRecord::parse(&extension, endian)
1969 LongStringValueLabelRecord::SUBTYPE => {
1970 LongStringValueLabelRecord::parse(&extension, endian)
1972 EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian),
1973 NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian),
1974 5 => Ok(Record::VariableSets(extension.into())),
1975 10 => Ok(Record::ProductInfo(extension.into())),
1976 13 => Ok(Record::LongNames(extension.into())),
1977 14 => Ok(Record::VeryLongStrings(extension.into())),
1978 17 => Ok(Record::FileAttributes(extension.into())),
1979 18 => Ok(Record::VariableAttributes(extension.into())),
1980 _ => Ok(Record::OtherExtension(extension)),
1983 Ok(result) => Ok(Some(result)),
1992 #[derive(Clone, Debug)]
1993 pub struct ZHeader {
1994 /// File offset to the start of the record.
1997 /// File offset to the ZLIB data header.
1998 pub zheader_offset: u64,
2000 /// File offset to the ZLIB trailer.
2001 pub ztrailer_offset: u64,
2003 /// Length of the ZLIB trailer in bytes.
2004 pub ztrailer_len: u64,
2008 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
2009 let offset = r.stream_position()?;
2010 let zheader_offset: u64 = endian.parse(read_bytes(r)?);
2011 let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
2012 let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
2023 #[derive(Clone, Debug)]
2024 pub struct ZTrailer {
2025 /// File offset to the start of the record.
2028 /// Compression bias as a negative integer, e.g. -100.
2031 /// Always observed as zero.
2034 /// Uncompressed size of each block, except possibly the last. Only
2035 /// `0x3ff000` has been observed so far.
2036 pub block_size: u32,
2038 /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
2039 pub blocks: Vec<ZBlock>,
2042 #[derive(Clone, Debug)]
2044 /// Offset of block of data if simple compression were used.
2045 pub uncompressed_ofs: u64,
2047 /// Actual offset within the file of the compressed data block.
2048 pub compressed_ofs: u64,
2050 /// The number of bytes in this data block after decompression. This is
2051 /// `block_size` in every data block but the last, which may be smaller.
2052 pub uncompressed_size: u32,
2054 /// The number of bytes in this data block, as stored compressed in this
2056 pub compressed_size: u32,
2060 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZBlock, Error> {
2062 uncompressed_ofs: endian.parse(read_bytes(r)?),
2063 compressed_ofs: endian.parse(read_bytes(r)?),
2064 uncompressed_size: endian.parse(read_bytes(r)?),
2065 compressed_size: endian.parse(read_bytes(r)?),
2071 fn read<R: Read + Seek>(
2076 ) -> Result<Option<ZTrailer>, Error> {
2077 let start_offset = reader.stream_position()?;
2078 if reader.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
2081 let int_bias = endian.parse(read_bytes(reader)?);
2082 let zero = endian.parse(read_bytes(reader)?);
2083 let block_size = endian.parse(read_bytes(reader)?);
2084 let n_blocks: u32 = endian.parse(read_bytes(reader)?);
2085 let expected_n_blocks = (ztrailer_len - 24) / 24;
2086 if n_blocks as u64 != expected_n_blocks {
2087 return Err(Error::BadZlibTrailerNBlocks {
2088 offset: ztrailer_ofs,
2094 let blocks = (0..n_blocks)
2095 .map(|_| ZBlock::read(reader, endian))
2096 .collect::<Result<Vec<_>, _>>()?;
2097 reader.seek(SeekFrom::Start(start_offset))?;
2099 offset: ztrailer_ofs,
2108 fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
2109 let mut buf = [0; N];
2110 let n = r.read(&mut buf)?;
2113 r.read_exact(&mut buf[n..])?;
2121 fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
2122 let mut buf = [0; N];
2123 r.read_exact(&mut buf)?;
2127 fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
2128 let mut vec = vec![0; n];
2129 r.read_exact(&mut vec)?;
2133 fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<RawString, IoError> {
2134 let length: u32 = endian.parse(read_bytes(r)?);
2135 Ok(read_vec(r, length as usize)?.into())
2138 #[derive(Clone, Debug)]
2139 pub struct LongStringValueLabels {
2140 pub var_name: RawString,
2143 /// `(value, label)` pairs, where each value is `width` bytes.
2144 pub labels: Vec<(RawString, RawString)>,
2147 #[derive(Clone, Debug)]
2148 pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
2150 impl ExtensionRecord for LongStringValueLabelRecord {
2151 const SUBTYPE: u32 = 21;
2152 const SIZE: Option<u32> = Some(1);
2153 const COUNT: Option<u32> = None;
2154 const NAME: &'static str = "long string value labels record";
2156 fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
2157 ext.check_size::<Self>()?;
2159 let mut input = &ext.data[..];
2160 let mut label_set = Vec::new();
2161 while !input.is_empty() {
2162 let var_name = read_string(&mut input, endian)?;
2163 let width: u32 = endian.parse(read_bytes(&mut input)?);
2164 let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
2165 let mut labels = Vec::new();
2166 for _ in 0..n_labels {
2167 let value = read_string(&mut input, endian)?;
2168 let label = read_string(&mut input, endian)?;
2169 labels.push((value, label));
2171 label_set.push(LongStringValueLabels {
2177 Ok(Record::LongStringValueLabels(LongStringValueLabelRecord(