1 #![allow(unused_variables)]
2 use endian::{Endian, Parse, ToBytes};
3 use flate2::bufread::ZlibDecoder;
5 use num_derive::FromPrimitive;
8 io::{BufReader, Error as IoError, Read, Seek, SeekFrom},
14 #[derive(Error, Debug)]
16 #[error("Not an SPSS system file")]
19 #[error("Invalid magic number {0:?}")]
22 #[error("I/O error ({0})")]
25 #[error("Invalid SAV compression code {0}")]
26 InvalidSavCompression(u32),
28 #[error("Invalid ZSAV compression code {0}")]
29 InvalidZsavCompression(u32),
31 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
32 BadVariableWidth { offset: u64, width: i32 },
34 #[error("Misplaced type 4 record near offset {0:#x}.")]
35 MisplacedType4Record(u64),
37 #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
38 BadDocumentLength { offset: u64, n: u32, max: u32 },
40 #[error("At offset {offset:#x}, Unrecognized record type {rec_type}.")]
41 BadRecordType { offset: u64, rec_type: u32 },
43 #[error("At offset {offset:#x}, variable label code ({code}) is not 0 or 1.")]
44 BadVariableLabelCode { offset: u64, code: u32 },
47 "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
49 BadNumericMissingValueCode { offset: u64, code: i32 },
51 #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
52 BadStringMissingValueCode { offset: u64, code: i32 },
54 #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
55 BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
57 #[error("At offset {offset:#x}, variable index record (type 4) does not immediately follow value label record (type 3) as it should.")]
58 MissingVariableIndexRecord { offset: u64 },
60 #[error("At offset {offset:#x}, number of variables indexes ({n}) is greater than the maximum number ({max}).")]
61 BadNumberOfVarIndexes { offset: u64, n: u32, max: u32 },
63 #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
64 ExtensionRecordTooLarge {
71 #[error("Wrong ZLIB data header offset {zheader_offset:#x} (expected {offset:#x}).")]
72 BadZlibHeaderOffset { offset: u64, zheader_offset: u64 },
74 #[error("At offset {offset:#x}, impossible ZLIB trailer offset {ztrailer_offset:#x}.")]
75 BadZlibTrailerOffset { offset: u64, ztrailer_offset: u64 },
77 #[error("At offset {offset:#x}, impossible ZLIB trailer length {ztrailer_len}.")]
78 BadZlibTrailerLen { offset: u64, ztrailer_len: u64 },
80 #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
88 "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
90 EofInCompressedCase { offset: u64, case_ofs: u64 },
92 #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
93 PartialCompressedCase { offset: u64, case_ofs: u64 },
95 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
96 CompressedNumberExpected { offset: u64, case_ofs: u64 },
98 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
99 CompressedStringExpected { offset: u64, case_ofs: u64 },
101 #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
102 BadZlibTrailerNBlocks {
105 expected_n_blocks: u64,
110 #[derive(Error, Debug)]
112 #[error("Unexpected floating-point bias {0} or unrecognized floating-point format.")]
115 #[error("Duplicate type 6 (document) record.")]
116 DuplicateDocumentRecord,
119 #[derive(Copy, Clone, Debug)]
120 pub enum Compression {
129 ValueLabel(ValueLabel),
130 VarIndexes(VarIndexes),
131 Extension(Extension),
142 /// Eye-catcher string, product name, in the file's encoding. Padded
143 /// on the right with spaces.
144 pub eye_catcher: [u8; 60],
146 /// Layout code, normally either 2 or 3.
147 pub layout_code: u32,
149 /// Number of variable positions, or `None` if the value in the file is
150 /// questionably trustworthy.
151 pub nominal_case_size: Option<u32>,
153 /// Compression type, if any,
154 pub compression: Option<Compression>,
156 /// 0-based variable index of the weight variable, or `None` if the file is
158 pub weight_index: Option<u32>,
160 /// Claimed number of cases, if known.
161 pub n_cases: Option<u32>,
163 /// Compression bias, usually 100.0.
166 /// `dd mmm yy` in the file's encoding.
167 pub creation_date: [u8; 9],
169 /// `HH:MM:SS` in the file's encoding.
170 pub creation_time: [u8; 8],
172 /// File label, in the file's encoding. Padded on the right with spaces.
173 pub file_label: [u8; 64],
175 /// Endianness of the data in the file header.
176 pub endianness: Endian,
179 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
180 pub struct Magic([u8; 4]);
183 /// Magic number for a regular system file.
184 pub const SAV: Magic = Magic(*b"$FL2");
186 /// Magic number for a system file that contains zlib-compressed data.
187 pub const ZSAV: Magic = Magic(*b"$FL3");
189 /// Magic number for an EBDIC-encoded system file. This is `$FL2` encoded
191 pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]);
194 impl TryFrom<[u8; 4]> for Magic {
197 fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
198 let magic = Magic(value);
200 Magic::SAV | Magic::ZSAV | Magic::EBCDIC => Ok(magic),
201 _ => Err(Error::BadMagic(value)),
206 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
213 fn from_width(width: i32) -> VarType {
215 0 => VarType::Number,
216 _ => VarType::String,
221 pub struct Reader<R: Read + Seek> {
223 var_types: Vec<VarType>,
228 fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
231 struct Start<R: Read + Seek> {
235 impl<R: Read + Seek + 'static> State for Start<R> {
236 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
237 let header = read_header(&mut self.r)?;
238 Ok(Some((Record::Header(header), self)))
242 struct Headers<R: Read + Seek> {
243 reader: BufReader<R>,
245 compression: Option<Compression>,
246 var_types: Vec<VarType>,
249 impl<R: Read + Seek + 'static> State for Headers<R> {
250 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
251 let rec_type: u32 = self.endian.parse(read_bytes(&mut self.reader)?);
252 let record = match rec_type {
254 let variable = read_variable_record(&mut self.reader, self.endian)?;
255 self.var_types.push(VarType::from_width(variable.width));
256 Record::Variable(variable)
258 3 => Record::ValueLabel(read_value_label_record(&mut self.reader, self.endian)?),
259 4 => Record::VarIndexes(read_var_indexes_record(&mut self.reader, self.endian)?),
260 6 => Record::Document(read_document_record(&mut self.reader, self.endian)?),
261 7 => Record::Extension(read_extension_record(&mut self.reader, self.endian)?),
263 let _: [u8; 4] = read_bytes(&mut self.reader)?;
264 let next_state: Box<dyn State> = match self.compression {
265 None => Box::new(Data {
268 var_types: self.var_types,
270 Some(Compression::Simple) => Box::new(CompressedData {
273 var_types: self.var_types,
274 codes: VecDeque::new(),
276 Some(Compression::ZLib) => Box::new(CompressedData {
277 reader: ZlibDecodeMultiple::new(self.reader),
279 var_types: self.var_types,
280 codes: VecDeque::new(),
283 return Ok(Some((Record::EndOfHeaders, next_state)));
286 return Err(Error::BadRecordType {
287 offset: self.reader.stream_position()?,
292 Ok(Some((record, self)))
296 struct Data<R: Read + Seek> {
297 reader: BufReader<R>,
299 var_types: Vec<VarType>,
302 impl<R: Read + Seek + 'static> State for Data<R> {
303 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
304 let case_start = self.reader.stream_position()?;
305 let mut values = Vec::with_capacity(self.var_types.len());
306 for (i, &var_type) in self.var_types.iter().enumerate() {
307 let Some(raw) = try_read_bytes(&mut self.reader)? else {
311 let offset = self.reader.stream_position()?;
312 return Err(Error::EofInCase {
314 case_ofs: offset - case_start,
315 case_len: self.var_types.len() * 8,
319 values.push(Value::from_raw(var_type, raw, self.endian));
321 Ok(Some((Record::Case(values), self)))
325 struct CompressedData<R: Read + Seek> {
328 var_types: Vec<VarType>,
332 impl<R: Read + Seek + 'static> State for CompressedData<R> {
333 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
334 let case_start = self.reader.stream_position()?;
335 let mut values = Vec::with_capacity(self.var_types.len());
336 let bias = 100.0; // XXX
337 for (i, &var_type) in self.var_types.iter().enumerate() {
339 let Some(code) = self.codes.pop_front() else {
340 let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.reader)? else {
344 let offset = self.reader.stream_position()?;
345 return Err(Error::EofInCompressedCase {
347 case_ofs: offset - case_start,
351 self.codes.extend(new_codes.into_iter());
356 1..=251 => match var_type {
357 VarType::Number => break Value::Number(Some(code as f64 - bias)),
358 VarType::String => break Value::String(self.endian.to_bytes(code as f64 - bias)),
364 let offset = self.reader.stream_position()?;
365 return Err(Error::PartialCompressedCase {
367 case_ofs: offset - case_start,
371 253 => break Value::from_raw(var_type, read_bytes(&mut self.reader)?, self.endian),
372 254 => match var_type {
373 VarType::String => break Value::String(*b" "), // XXX EBCDIC
375 return Err(Error::CompressedStringExpected {
377 case_ofs: self.reader.stream_position()? - case_start,
381 255 => match var_type {
382 VarType::Number => break Value::Number(None),
384 return Err(Error::CompressedNumberExpected {
386 case_ofs: self.reader.stream_position()? - case_start,
394 Ok(Some((Record::Case(values), self)))
398 struct ZlibDecodeMultiple<R>
402 reader: Option<ZlibDecoder<BufReader<R>>>,
405 impl<R> ZlibDecodeMultiple<R>
409 fn new(reader: BufReader<R>) -> ZlibDecodeMultiple<R> {
411 reader: Some(ZlibDecoder::new(reader)),
416 impl<R> Read for ZlibDecodeMultiple<R>
420 fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
422 match self.reader.as_mut().unwrap().read(buf)? {
424 let inner = self.reader.take().unwrap().into_inner();
425 self.reader = Some(ZlibDecoder::new(inner));
433 impl<R> Seek for ZlibDecodeMultiple<R>
437 fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
443 impl<R> BufRead for ZlibDecodeMultiple<R>
447 fn fill_buf(&mut self) -> Result<&[u8], IoError> {
448 self.reader.as_mut().unwrap().fill_buf()
450 fn consume(&mut self, amt: usize) {
451 self.reader.as_mut().unwrap().consume(amt)
457 Headers(Endian, Option<Compression>),
459 CompressedData(Endian, VecDeque<u8>),
470 #[derive(Copy, Clone)]
477 pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
479 VarType::String => Value::String(raw),
481 let number: f64 = endian.parse(raw);
482 Value::Number((number != -f64::MAX).then_some(number))
488 impl<R: Read + Seek> Reader<R> {
489 pub fn new(r: R) -> Result<Reader<R>, Error> {
491 r: BufReader::new(r),
492 var_types: Vec::new(),
493 state: ReaderState::Start,
496 fn _next(&mut self) -> Result<Option<Record>, Error> {
498 ReaderState::Start => {
499 let header = read_header(&mut self.r)?;
500 self.state = ReaderState::Headers(header.endianness, header.compression);
501 Ok(Some(Record::Header(header)))
503 ReaderState::Headers(endian, compression) => {
504 let rec_type: u32 = endian.parse(read_bytes(&mut self.r)?);
505 let record = match rec_type {
507 let variable = read_variable_record(&mut self.r, endian)?;
508 self.var_types.push(VarType::from_width(variable.width));
509 Record::Variable(variable)
511 3 => Record::ValueLabel(read_value_label_record(&mut self.r, endian)?),
512 4 => Record::VarIndexes(read_var_indexes_record(&mut self.r, endian)?),
513 6 => Record::Document(read_document_record(&mut self.r, endian)?),
514 7 => Record::Extension(read_extension_record(&mut self.r, endian)?),
516 let _: [u8; 4] = read_bytes(&mut self.r)?;
517 self.state = match compression {
518 None => ReaderState::Data(endian),
519 Some(Compression::Simple) => {
520 ReaderState::CompressedData(endian, VecDeque::new())
522 Some(Compression::ZLib) => ReaderState::ZHeader(endian),
524 return Ok(Some(Record::EndOfHeaders));
527 return Err(Error::BadRecordType {
528 offset: self.r.stream_position()?,
535 ReaderState::Data(endian) => {
536 let case_start = self.r.stream_position()?;
537 let mut values = Vec::with_capacity(self.var_types.len());
538 for (i, &var_type) in self.var_types.iter().enumerate() {
539 let Some(raw) = try_read_bytes(&mut self.r)? else {
543 let offset = self.r.stream_position()?;
544 return Err(Error::EofInCase {
546 case_ofs: offset - case_start,
547 case_len: self.var_types.len() * 8,
551 values.push(Value::from_raw(var_type, raw, endian));
553 Ok(Some(Record::Case(values)))
555 ReaderState::CompressedData(endian, ref mut codes) => {
556 let case_start = self.r.stream_position()?;
557 let mut values = Vec::with_capacity(self.var_types.len());
558 let bias = 100.0; // XXX
559 for (i, &var_type) in self.var_types.iter().enumerate() {
561 let Some(code) = codes.pop_front() else {
562 let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.r)?
567 let offset = self.r.stream_position()?;
568 return Err(Error::EofInCompressedCase {
570 case_ofs: offset - case_start,
574 codes.extend(new_codes.into_iter());
579 1..=251 => match var_type {
580 VarType::Number => break Value::Number(Some(code as f64 - bias)),
582 break Value::String(endian.to_bytes(code as f64 - bias))
589 let offset = self.r.stream_position()?;
590 return Err(Error::PartialCompressedCase {
592 case_ofs: offset - case_start,
597 break Value::from_raw(var_type, read_bytes(&mut self.r)?, endian)
599 254 => match var_type {
600 VarType::String => break Value::String(*b" "), // XXX EBCDIC
602 return Err(Error::CompressedStringExpected {
604 case_ofs: self.r.stream_position()? - case_start,
608 255 => match var_type {
609 VarType::Number => break Value::Number(None),
611 return Err(Error::CompressedNumberExpected {
613 case_ofs: self.r.stream_position()? - case_start,
621 Ok(Some(Record::Case(values)))
623 ReaderState::ZHeader(endian) => {
624 let zheader = read_zheader(&mut self.r, endian)?;
625 self.state = ReaderState::ZTrailer {
627 ztrailer_ofs: zheader.ztrailer_offset,
628 ztrailer_len: zheader.ztrailer_len,
630 Ok(Some(Record::ZHeader(zheader)))
632 ReaderState::ZTrailer {
637 //self.state = ReaderState::ZData;
638 match read_ztrailer(&mut self.r, endian, ztrailer_ofs, ztrailer_len)? {
639 Some(ztrailer) => Ok(Some(Record::ZTrailer(ztrailer))),
640 None => self._next(),
644 ReaderState::ZData(zlib_decoder) => {
645 let zlib_decoder = zlib_decoder.unwrap_or_else(
648 ReaderState::End => Ok(None),
653 impl<R: Read + Seek> Iterator for Reader<R> {
654 type Item = Result<Record, Error>;
656 fn next(&mut self) -> Option<Self::Item> {
657 let retval = self._next();
660 self.state = ReaderState::End;
663 Ok(Some(record)) => Some(Ok(record)),
665 self.state = ReaderState::End;
672 fn read_header<R: Read>(r: &mut R) -> Result<Header, Error> {
673 let magic: [u8; 4] = read_bytes(r)?;
674 let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
676 let eye_catcher: [u8; 60] = read_bytes(r)?;
677 let layout_code: [u8; 4] = read_bytes(r)?;
678 let endianness = Endian::identify_u32(2, layout_code)
679 .or_else(|| Endian::identify_u32(2, layout_code))
680 .ok_or_else(|| Error::NotASystemFile)?;
681 let layout_code = endianness.parse(layout_code);
683 let nominal_case_size: u32 = endianness.parse(read_bytes(r)?);
684 let nominal_case_size =
685 (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
687 let compression_code: u32 = endianness.parse(read_bytes(r)?);
688 let compression = match (magic, compression_code) {
689 (Magic::ZSAV, 2) => Some(Compression::ZLib),
690 (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
692 (_, 1) => Some(Compression::Simple),
693 (_, code) => return Err(Error::InvalidSavCompression(code)),
696 let weight_index: u32 = endianness.parse(read_bytes(r)?);
697 let weight_index = (weight_index > 0).then_some(weight_index - 1);
699 let n_cases: u32 = endianness.parse(read_bytes(r)?);
700 let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
702 let bias: f64 = endianness.parse(read_bytes(r)?);
704 let creation_date: [u8; 9] = read_bytes(r)?;
705 let creation_time: [u8; 8] = read_bytes(r)?;
706 let file_label: [u8; 64] = read_bytes(r)?;
707 let _: [u8; 3] = read_bytes(r)?;
725 pub struct Variable {
726 /// Offset from the start of the file to the start of the record.
729 /// Variable width, in the range -1..=255.
732 /// Variable name, padded on the right with spaces.
736 pub print_format: u32,
739 pub write_format: u32,
741 /// Missing value code, one of -3, -2, 0, 1, 2, or 3.
742 pub missing_value_code: i32,
744 /// Raw missing values, up to 3 of them.
745 pub missing: Vec<[u8; 8]>,
747 /// Optional variable label.
748 pub label: Option<Vec<u8>>,
751 fn read_variable_record<R: Read + Seek>(
752 r: &mut BufReader<R>,
754 ) -> Result<Variable, Error> {
755 let offset = r.stream_position()?;
756 let width: i32 = endian.parse(read_bytes(r)?);
757 let has_variable_label: u32 = endian.parse(read_bytes(r)?);
758 let missing_value_code: i32 = endian.parse(read_bytes(r)?);
759 let print_format: u32 = endian.parse(read_bytes(r)?);
760 let write_format: u32 = endian.parse(read_bytes(r)?);
761 let name: [u8; 8] = read_bytes(r)?;
763 let label = match has_variable_label {
766 let len: u32 = endian.parse(read_bytes(r)?);
767 let read_len = len.min(65535) as usize;
768 let label = Some(read_vec(r, read_len)?);
770 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
771 let _ = read_vec(r, padding_bytes as usize)?;
776 return Err(Error::BadVariableLabelCode {
778 code: has_variable_label,
783 let mut missing = Vec::new();
784 if missing_value_code != 0 {
785 match (width, missing_value_code) {
786 (0, -3 | -2 | 1 | 2 | 3) => (),
788 return Err(Error::BadNumericMissingValueCode {
790 code: missing_value_code,
795 return Err(Error::BadStringMissingValueCode {
797 code: missing_value_code,
802 for _ in 0..missing_value_code.abs() {
803 missing.push(read_bytes(r)?);
819 pub struct ValueLabel {
820 /// Offset from the start of the file to the start of the record.
824 pub labels: Vec<([u8; 8], Vec<u8>)>,
828 /// Maximum number of value labels in a record.
829 pub const MAX: u32 = u32::MAX / 8;
832 fn read_value_label_record<R: Read + Seek>(
833 r: &mut BufReader<R>,
835 ) -> Result<ValueLabel, Error> {
836 let offset = r.stream_position()?;
837 let n: u32 = endian.parse(read_bytes(r)?);
838 if n > ValueLabel::MAX {
839 return Err(Error::BadNumberOfValueLabels {
842 max: ValueLabel::MAX,
846 let mut labels = Vec::new();
848 let value: [u8; 8] = read_bytes(r)?;
849 let label_len: u8 = endian.parse(read_bytes(r)?);
850 let label_len = label_len as usize;
851 let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
853 let mut label = read_vec(r, padded_len)?;
854 label.truncate(label_len);
855 labels.push((value, label));
857 Ok(ValueLabel { offset, labels })
860 pub struct VarIndexes {
861 /// Offset from the start of the file to the start of the record.
864 /// The 0-based indexes of the variable indexes.
865 pub var_indexes: Vec<u32>,
869 /// Maximum number of variable indexes in a record.
870 pub const MAX: u32 = u32::MAX / 8;
873 fn read_var_indexes_record<R: Read + Seek>(
874 r: &mut BufReader<R>,
876 ) -> Result<VarIndexes, Error> {
877 let offset = r.stream_position()?;
878 let n: u32 = endian.parse(read_bytes(r)?);
879 if n > VarIndexes::MAX {
880 return Err(Error::BadNumberOfVarIndexes {
883 max: VarIndexes::MAX,
886 let mut var_indexes = Vec::with_capacity(n as usize);
888 var_indexes.push(endian.parse(read_bytes(r)?));
897 pub const DOC_LINE_LEN: u32 = 80;
898 pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
900 pub struct Document {
901 /// Offset from the start of the file to the start of the record.
904 /// The document, as an array of 80-byte lines.
905 pub lines: Vec<[u8; DOC_LINE_LEN as usize]>,
908 fn read_document_record<R: Read + Seek>(
909 r: &mut BufReader<R>,
911 ) -> Result<Document, Error> {
912 let offset = r.stream_position()?;
913 let n: u32 = endian.parse(read_bytes(r)?);
915 0..=DOC_MAX_LINES => {
916 let pos = r.stream_position()?;
917 let mut lines = Vec::with_capacity(n as usize);
919 let line: [u8; 80] = read_bytes(r)?;
922 Ok(Document { pos, lines })
924 _ => Err(Error::BadDocumentLength {
932 #[derive(FromPrimitive)]
934 /// Machine integer info.
936 /// Machine floating-point info.
942 /// Multiple response sets.
946 /// Extra product info text.
948 /// Variable display parameters.
950 /// Long variable names.
954 /// Extended number of cases.
956 /// Data file attributes.
958 /// Variable attributes.
960 /// Multiple response sets (extended).
962 /// Character encoding.
964 /// Value labels for long strings.
966 /// Missing values for long strings.
968 /// "Format properties in dataview table".
972 pub struct Extension {
973 /// Offset from the start of the file to the start of the record.
979 /// Size of each data element.
982 /// Number of data elements.
985 /// `size * count` bytes of data.
989 fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) {
991 /* Implemented record types. */
992 ExtensionType::Integer => (4, 8),
993 ExtensionType::Float => (8, 3),
994 ExtensionType::VarSets => (1, 0),
995 ExtensionType::Mrsets => (1, 0),
996 ExtensionType::ProductInfo => (1, 0),
997 ExtensionType::Display => (4, 0),
998 ExtensionType::LongNames => (1, 0),
999 ExtensionType::LongStrings => (1, 0),
1000 ExtensionType::Ncases => (8, 2),
1001 ExtensionType::FileAttrs => (1, 0),
1002 ExtensionType::VarAttrs => (1, 0),
1003 ExtensionType::Mrsets2 => (1, 0),
1004 ExtensionType::Encoding => (1, 0),
1005 ExtensionType::LongLabels => (1, 0),
1006 ExtensionType::LongMissing => (1, 0),
1008 /* Ignored record types. */
1009 ExtensionType::Date => (0, 0),
1010 ExtensionType::DataEntry => (0, 0),
1011 ExtensionType::Dataview => (0, 0),
1015 fn read_extension_record<R: Read + Seek>(
1016 r: &mut BufReader<R>,
1018 ) -> Result<Extension, Error> {
1019 let subtype = endian.parse(read_bytes(r)?);
1020 let offset = r.stream_position()?;
1021 let size: u32 = endian.parse(read_bytes(r)?);
1022 let count = endian.parse(read_bytes(r)?);
1023 let Some(product) = size.checked_mul(count) else {
1024 return Err(Error::ExtensionRecordTooLarge {
1031 let offset = r.stream_position()?;
1032 let data = read_vec(r, product as usize)?;
1042 pub struct ZHeader {
1043 /// File offset to the start of the record.
1046 /// File offset to the ZLIB data header.
1047 pub zheader_offset: u64,
1049 /// File offset to the ZLIB trailer.
1050 pub ztrailer_offset: u64,
1052 /// Length of the ZLIB trailer in bytes.
1053 pub ztrailer_len: u64,
1056 fn read_zheader<R: Read + Seek>(r: &mut BufReader<R>, endian: Endian) -> Result<ZHeader, Error> {
1057 let offset = r.stream_position()?;
1058 let zheader_offset: u64 = endian.parse(read_bytes(r)?);
1059 let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
1060 let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
1070 pub struct ZTrailer {
1071 /// File offset to the start of the record.
1074 /// Compression bias as a negative integer, e.g. -100.
1077 /// Always observed as zero.
1080 /// Uncompressed size of each block, except possibly the last. Only
1081 /// `0x3ff000` has been observed so far.
1082 pub block_size: u32,
1084 /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
1085 pub blocks: Vec<ZBlock>,
1089 /// Offset of block of data if simple compression were used.
1090 pub uncompressed_ofs: u64,
1092 /// Actual offset within the file of the compressed data block.
1093 pub compressed_ofs: u64,
1095 /// The number of bytes in this data block after decompression. This is
1096 /// `block_size` in every data block but the last, which may be smaller.
1097 pub uncompressed_size: u32,
1099 /// The number of bytes in this data block, as stored compressed in this
1101 pub compressed_size: u32,
1104 fn read_ztrailer<R: Read + Seek>(
1105 r: &mut BufReader<R>,
1109 ) -> Result<Option<ZTrailer>, Error> {
1110 let start_offset = r.stream_position()?;
1111 if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
1114 let int_bias = endian.parse(read_bytes(r)?);
1115 let zero = endian.parse(read_bytes(r)?);
1116 let block_size = endian.parse(read_bytes(r)?);
1117 let n_blocks: u32 = endian.parse(read_bytes(r)?);
1118 let expected_n_blocks = (ztrailer_len - 24) / 24;
1119 if n_blocks as u64 != expected_n_blocks {
1120 return Err(Error::BadZlibTrailerNBlocks {
1121 offset: ztrailer_ofs,
1127 let mut blocks = Vec::with_capacity(n_blocks as usize);
1128 for _ in 0..n_blocks {
1129 let uncompressed_ofs = endian.parse(read_bytes(r)?);
1130 let compressed_ofs = endian.parse(read_bytes(r)?);
1131 let uncompressed_size = endian.parse(read_bytes(r)?);
1132 let compressed_size = endian.parse(read_bytes(r)?);
1133 blocks.push(ZBlock {
1140 r.seek(SeekFrom::Start(start_offset))?;
1142 offset: ztrailer_ofs,
1150 fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
1151 let mut buf = [0; N];
1152 let n = r.read(&mut buf)?;
1155 r.read_exact(&mut buf[n..])?;
1163 fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
1164 let mut buf = [0; N];
1165 r.read_exact(&mut buf)?;
1169 fn read_vec<R: Read>(r: &mut BufReader<R>, n: usize) -> Result<Vec<u8>, IoError> {
1170 let mut vec = vec![0; n];
1171 r.read_exact(&mut vec)?;
1176 fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
1177 while s.last() == Some(&c) {
1183 fn skip_bytes<R: Read>(r: &mut R, mut n: u64) -> Result<(), IoError> {
1184 let mut buf = [0; 1024];
1186 let chunk = u64::min(n, buf.len() as u64);
1187 r.read_exact(&mut buf[0..chunk as usize])?;