1 #![allow(unused_variables)]
2 use endian::{Endian, Parse, ToBytes};
3 use flate2::bufread::ZlibDecoder;
5 use num_derive::FromPrimitive;
8 io::{BufReader, Error as IoError, Read, Seek, SeekFrom},
14 #[derive(Error, Debug)]
16 #[error("Not an SPSS system file")]
19 #[error("Invalid magic number {0:?}")]
22 #[error("I/O error ({0})")]
25 #[error("Invalid SAV compression code {0}")]
26 InvalidSavCompression(u32),
28 #[error("Invalid ZSAV compression code {0}")]
29 InvalidZsavCompression(u32),
31 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
32 BadVariableWidth { offset: u64, width: i32 },
34 #[error("Misplaced type 4 record near offset {0:#x}.")]
35 MisplacedType4Record(u64),
37 #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
38 BadDocumentLength { offset: u64, n: u32, max: u32 },
40 #[error("At offset {offset:#x}, Unrecognized record type {rec_type}.")]
41 BadRecordType { offset: u64, rec_type: u32 },
43 #[error("At offset {offset:#x}, variable label code ({code}) is not 0 or 1.")]
44 BadVariableLabelCode { offset: u64, code: u32 },
47 "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
49 BadNumericMissingValueCode { offset: u64, code: i32 },
51 #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
52 BadStringMissingValueCode { offset: u64, code: i32 },
54 #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
55 BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
57 #[error("At offset {offset:#x}, variable index record (type 4) does not immediately follow value label record (type 3) as it should.")]
58 MissingVariableIndexRecord { offset: u64 },
60 #[error("At offset {offset:#x}, number of variables indexes ({n}) is greater than the maximum number ({max}).")]
61 BadNumberOfVarIndexes { offset: u64, n: u32, max: u32 },
63 #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
64 ExtensionRecordTooLarge {
71 #[error("Wrong ZLIB data header offset {zheader_offset:#x} (expected {offset:#x}).")]
72 BadZlibHeaderOffset { offset: u64, zheader_offset: u64 },
74 #[error("At offset {offset:#x}, impossible ZLIB trailer offset {ztrailer_offset:#x}.")]
75 BadZlibTrailerOffset { offset: u64, ztrailer_offset: u64 },
77 #[error("At offset {offset:#x}, impossible ZLIB trailer length {ztrailer_len}.")]
78 BadZlibTrailerLen { offset: u64, ztrailer_len: u64 },
80 #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
88 "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
90 EofInCompressedCase { offset: u64, case_ofs: u64 },
92 #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
93 PartialCompressedCase { offset: u64, case_ofs: u64 },
95 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
96 CompressedNumberExpected { offset: u64, case_ofs: u64 },
98 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
99 CompressedStringExpected { offset: u64, case_ofs: u64 },
101 #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
102 BadZlibTrailerNBlocks {
105 expected_n_blocks: u64,
110 #[derive(Error, Debug)]
112 #[error("Unexpected floating-point bias {0} or unrecognized floating-point format.")]
115 #[error("Duplicate type 6 (document) record.")]
116 DuplicateDocumentRecord,
119 #[derive(Copy, Clone, Debug)]
120 pub enum Compression {
129 ValueLabel(ValueLabel),
130 VarIndexes(VarIndexes),
131 Extension(Extension),
142 /// Eye-catcher string, product name, in the file's encoding. Padded
143 /// on the right with spaces.
144 pub eye_catcher: [u8; 60],
146 /// Layout code, normally either 2 or 3.
147 pub layout_code: u32,
149 /// Number of variable positions, or `None` if the value in the file is
150 /// questionably trustworthy.
151 pub nominal_case_size: Option<u32>,
153 /// Compression type, if any,
154 pub compression: Option<Compression>,
156 /// 0-based variable index of the weight variable, or `None` if the file is
158 pub weight_index: Option<u32>,
160 /// Claimed number of cases, if known.
161 pub n_cases: Option<u32>,
163 /// Compression bias, usually 100.0.
166 /// `dd mmm yy` in the file's encoding.
167 pub creation_date: [u8; 9],
169 /// `HH:MM:SS` in the file's encoding.
170 pub creation_time: [u8; 8],
172 /// File label, in the file's encoding. Padded on the right with spaces.
173 pub file_label: [u8; 64],
175 /// Endianness of the data in the file header.
176 pub endianness: Endian,
179 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
180 pub struct Magic([u8; 4]);
183 /// Magic number for a regular system file.
184 pub const SAV: Magic = Magic(*b"$FL2");
186 /// Magic number for a system file that contains zlib-compressed data.
187 pub const ZSAV: Magic = Magic(*b"$FL3");
189 /// Magic number for an EBDIC-encoded system file. This is `$FL2` encoded
191 pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]);
194 impl TryFrom<[u8; 4]> for Magic {
197 fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
198 let magic = Magic(value);
200 Magic::SAV | Magic::ZSAV | Magic::EBCDIC => Ok(magic),
201 _ => Err(Error::BadMagic(value)),
206 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
213 fn from_width(width: i32) -> VarType {
215 0 => VarType::Number,
216 _ => VarType::String,
221 pub struct Reader<R: Read + Seek> {
223 var_types: Vec<VarType>,
228 fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
231 struct Start<R: Read + Seek> {
235 impl<R: Read + Seek + 'static> State for Start<R> {
236 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
237 let header = read_header(&mut self.r)?;
238 Ok(Some((Record::Header(header), self)))
242 struct Headers<R: Read + Seek> {
243 reader: BufReader<R>,
245 compression: Option<Compression>,
246 var_types: Vec<VarType>,
249 impl<R: Read + Seek + 'static> State for Headers<R> {
250 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
251 let rec_type: u32 = self.endian.parse(read_bytes(&mut self.reader)?);
252 let record = match rec_type {
254 let variable = read_variable_record(&mut self.reader, self.endian)?;
255 self.var_types.push(VarType::from_width(variable.width));
256 Record::Variable(variable)
258 3 => Record::ValueLabel(read_value_label_record(&mut self.reader, self.endian)?),
259 4 => Record::VarIndexes(read_var_indexes_record(&mut self.reader, self.endian)?),
260 6 => Record::Document(read_document_record(&mut self.reader, self.endian)?),
261 7 => Record::Extension(read_extension_record(&mut self.reader, self.endian)?),
263 let _: [u8; 4] = read_bytes(&mut self.reader)?;
264 let next_state: Box<dyn State> = match self.compression {
265 None => Box::new(Data {
268 var_types: self.var_types,
270 Some(Compression::Simple) => Box::new(CompressedData {
273 var_types: self.var_types,
274 codes: VecDeque::new(),
276 Some(Compression::ZLib) => Box::new(ZlibData {
277 reader: ZlibDecodeMultiple::new(self.reader),
279 var_types: self.var_types,
280 codes: VecDeque::new(),
283 return Ok(Some((Record::EndOfHeaders, next_state)));
286 return Err(Error::BadRecordType {
287 offset: self.reader.stream_position()?,
292 Ok(Some((record, self)))
296 struct Data<R: Read + Seek> {
297 reader: BufReader<R>,
299 var_types: Vec<VarType>,
302 impl<R: Read + Seek + 'static> State for Data<R> {
303 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
304 let case_start = self.reader.stream_position()?;
305 let mut values = Vec::with_capacity(self.var_types.len());
306 for (i, &var_type) in self.var_types.iter().enumerate() {
307 let Some(raw) = try_read_bytes(&mut self.reader)? else {
311 let offset = self.reader.stream_position()?;
312 return Err(Error::EofInCase {
314 case_ofs: offset - case_start,
315 case_len: self.var_types.len() * 8,
319 values.push(Value::from_raw(var_type, raw, self.endian));
321 Ok(Some((Record::Case(values), self)))
325 struct CompressedData<R: Read + Seek> {
326 reader: BufReader<R>,
328 var_types: Vec<VarType>,
332 fn read_compressed_data<R>(
335 var_types: &Vec<VarType>,
336 codes: &mut VecDeque<u8>,
337 ) -> Result<Option<Record>, Error>
341 let case_start = reader.stream_position()?;
342 let mut values = Vec::with_capacity(var_types.len());
343 let bias = 100.0; // XXX
344 for (i, &var_type) in var_types.iter().enumerate() {
346 let Some(code) = codes.pop_front() else {
347 let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
351 let offset = reader.stream_position()?;
352 return Err(Error::EofInCompressedCase {
354 case_ofs: offset - case_start,
358 codes.extend(new_codes.into_iter());
363 1..=251 => match var_type {
364 VarType::Number => break Value::Number(Some(code as f64 - bias)),
365 VarType::String => break Value::String(endian.to_bytes(code as f64 - bias)),
371 let offset = reader.stream_position()?;
372 return Err(Error::PartialCompressedCase {
374 case_ofs: offset - case_start,
378 253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
379 254 => match var_type {
380 VarType::String => break Value::String(*b" "), // XXX EBCDIC
382 return Err(Error::CompressedStringExpected {
384 case_ofs: reader.stream_position()? - case_start,
388 255 => match var_type {
389 VarType::Number => break Value::Number(None),
391 return Err(Error::CompressedNumberExpected {
393 case_ofs: reader.stream_position()? - case_start,
401 Ok(Some(Record::Case(values)))
404 impl<R: Read + Seek + 'static> State for CompressedData<R> {
405 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
406 match read_compressed_data(
413 Some(record) => Ok(Some((record, self))),
418 struct ZlibData<R: Read + Seek> {
419 reader: ZlibDecodeMultiple<R>,
421 var_types: Vec<VarType>,
425 impl<R: Read + Seek + 'static> State for ZlibData<R> {
426 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
427 match read_compressed_data(
434 Some(record) => Ok(Some((record, self))),
439 struct ZlibDecodeMultiple<R>
443 reader: Option<ZlibDecoder<BufReader<R>>>,
446 impl<R> ZlibDecodeMultiple<R>
450 fn new(reader: BufReader<R>) -> ZlibDecodeMultiple<R> {
452 reader: Some(ZlibDecoder::new(reader)),
457 impl<R> Read for ZlibDecodeMultiple<R>
461 fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
463 match self.reader.as_mut().unwrap().read(buf)? {
465 let inner = self.reader.take().unwrap().into_inner();
466 self.reader = Some(ZlibDecoder::new(inner));
474 impl<R> Seek for ZlibDecodeMultiple<R>
478 fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
484 impl<R> BufRead for ZlibDecodeMultiple<R>
488 fn fill_buf(&mut self) -> Result<&[u8], IoError> {
489 self.reader.as_mut().unwrap().fill_buf()
491 fn consume(&mut self, amt: usize) {
492 self.reader.as_mut().unwrap().consume(amt)
498 Headers(Endian, Option<Compression>),
500 CompressedData(Endian, VecDeque<u8>),
511 #[derive(Copy, Clone)]
518 pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
520 VarType::String => Value::String(raw),
522 let number: f64 = endian.parse(raw);
523 Value::Number((number != -f64::MAX).then_some(number))
529 impl<R: Read + Seek> Reader<R> {
530 pub fn new(r: R) -> Result<Reader<R>, Error> {
532 r: BufReader::new(r),
533 var_types: Vec::new(),
534 state: ReaderState::Start,
537 fn _next(&mut self) -> Result<Option<Record>, Error> {
539 ReaderState::Start => {
540 let header = read_header(&mut self.r)?;
541 self.state = ReaderState::Headers(header.endianness, header.compression);
542 Ok(Some(Record::Header(header)))
544 ReaderState::Headers(endian, compression) => {
545 let rec_type: u32 = endian.parse(read_bytes(&mut self.r)?);
546 let record = match rec_type {
548 let variable = read_variable_record(&mut self.r, endian)?;
549 self.var_types.push(VarType::from_width(variable.width));
550 Record::Variable(variable)
552 3 => Record::ValueLabel(read_value_label_record(&mut self.r, endian)?),
553 4 => Record::VarIndexes(read_var_indexes_record(&mut self.r, endian)?),
554 6 => Record::Document(read_document_record(&mut self.r, endian)?),
555 7 => Record::Extension(read_extension_record(&mut self.r, endian)?),
557 let _: [u8; 4] = read_bytes(&mut self.r)?;
558 self.state = match compression {
559 None => ReaderState::Data(endian),
560 Some(Compression::Simple) => {
561 ReaderState::CompressedData(endian, VecDeque::new())
563 Some(Compression::ZLib) => ReaderState::ZHeader(endian),
565 return Ok(Some(Record::EndOfHeaders));
568 return Err(Error::BadRecordType {
569 offset: self.r.stream_position()?,
576 ReaderState::Data(endian) => {
577 let case_start = self.r.stream_position()?;
578 let mut values = Vec::with_capacity(self.var_types.len());
579 for (i, &var_type) in self.var_types.iter().enumerate() {
580 let Some(raw) = try_read_bytes(&mut self.r)? else {
584 let offset = self.r.stream_position()?;
585 return Err(Error::EofInCase {
587 case_ofs: offset - case_start,
588 case_len: self.var_types.len() * 8,
592 values.push(Value::from_raw(var_type, raw, endian));
594 Ok(Some(Record::Case(values)))
596 ReaderState::CompressedData(endian, ref mut codes) => {
597 let case_start = self.r.stream_position()?;
598 let mut values = Vec::with_capacity(self.var_types.len());
599 let bias = 100.0; // XXX
600 for (i, &var_type) in self.var_types.iter().enumerate() {
602 let Some(code) = codes.pop_front() else {
603 let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.r)?
608 let offset = self.r.stream_position()?;
609 return Err(Error::EofInCompressedCase {
611 case_ofs: offset - case_start,
615 codes.extend(new_codes.into_iter());
620 1..=251 => match var_type {
621 VarType::Number => break Value::Number(Some(code as f64 - bias)),
623 break Value::String(endian.to_bytes(code as f64 - bias))
630 let offset = self.r.stream_position()?;
631 return Err(Error::PartialCompressedCase {
633 case_ofs: offset - case_start,
638 break Value::from_raw(var_type, read_bytes(&mut self.r)?, endian)
640 254 => match var_type {
641 VarType::String => break Value::String(*b" "), // XXX EBCDIC
643 return Err(Error::CompressedStringExpected {
645 case_ofs: self.r.stream_position()? - case_start,
649 255 => match var_type {
650 VarType::Number => break Value::Number(None),
652 return Err(Error::CompressedNumberExpected {
654 case_ofs: self.r.stream_position()? - case_start,
662 Ok(Some(Record::Case(values)))
664 ReaderState::ZHeader(endian) => {
665 let zheader = read_zheader(&mut self.r, endian)?;
666 self.state = ReaderState::ZTrailer {
668 ztrailer_ofs: zheader.ztrailer_offset,
669 ztrailer_len: zheader.ztrailer_len,
671 Ok(Some(Record::ZHeader(zheader)))
673 ReaderState::ZTrailer {
678 //self.state = ReaderState::ZData;
679 match read_ztrailer(&mut self.r, endian, ztrailer_ofs, ztrailer_len)? {
680 Some(ztrailer) => Ok(Some(Record::ZTrailer(ztrailer))),
681 None => self._next(),
685 ReaderState::ZData(zlib_decoder) => {
686 let zlib_decoder = zlib_decoder.unwrap_or_else(
689 ReaderState::End => Ok(None),
694 impl<R: Read + Seek> Iterator for Reader<R> {
695 type Item = Result<Record, Error>;
697 fn next(&mut self) -> Option<Self::Item> {
698 let retval = self._next();
701 self.state = ReaderState::End;
704 Ok(Some(record)) => Some(Ok(record)),
706 self.state = ReaderState::End;
713 fn read_header<R: Read>(r: &mut R) -> Result<Header, Error> {
714 let magic: [u8; 4] = read_bytes(r)?;
715 let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
717 let eye_catcher: [u8; 60] = read_bytes(r)?;
718 let layout_code: [u8; 4] = read_bytes(r)?;
719 let endianness = Endian::identify_u32(2, layout_code)
720 .or_else(|| Endian::identify_u32(2, layout_code))
721 .ok_or_else(|| Error::NotASystemFile)?;
722 let layout_code = endianness.parse(layout_code);
724 let nominal_case_size: u32 = endianness.parse(read_bytes(r)?);
725 let nominal_case_size =
726 (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
728 let compression_code: u32 = endianness.parse(read_bytes(r)?);
729 let compression = match (magic, compression_code) {
730 (Magic::ZSAV, 2) => Some(Compression::ZLib),
731 (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
733 (_, 1) => Some(Compression::Simple),
734 (_, code) => return Err(Error::InvalidSavCompression(code)),
737 let weight_index: u32 = endianness.parse(read_bytes(r)?);
738 let weight_index = (weight_index > 0).then_some(weight_index - 1);
740 let n_cases: u32 = endianness.parse(read_bytes(r)?);
741 let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
743 let bias: f64 = endianness.parse(read_bytes(r)?);
745 let creation_date: [u8; 9] = read_bytes(r)?;
746 let creation_time: [u8; 8] = read_bytes(r)?;
747 let file_label: [u8; 64] = read_bytes(r)?;
748 let _: [u8; 3] = read_bytes(r)?;
766 pub struct Variable {
767 /// Offset from the start of the file to the start of the record.
770 /// Variable width, in the range -1..=255.
773 /// Variable name, padded on the right with spaces.
777 pub print_format: u32,
780 pub write_format: u32,
782 /// Missing value code, one of -3, -2, 0, 1, 2, or 3.
783 pub missing_value_code: i32,
785 /// Raw missing values, up to 3 of them.
786 pub missing: Vec<[u8; 8]>,
788 /// Optional variable label.
789 pub label: Option<Vec<u8>>,
792 fn read_variable_record<R: Read + Seek>(
793 r: &mut BufReader<R>,
795 ) -> Result<Variable, Error> {
796 let offset = r.stream_position()?;
797 let width: i32 = endian.parse(read_bytes(r)?);
798 let has_variable_label: u32 = endian.parse(read_bytes(r)?);
799 let missing_value_code: i32 = endian.parse(read_bytes(r)?);
800 let print_format: u32 = endian.parse(read_bytes(r)?);
801 let write_format: u32 = endian.parse(read_bytes(r)?);
802 let name: [u8; 8] = read_bytes(r)?;
804 let label = match has_variable_label {
807 let len: u32 = endian.parse(read_bytes(r)?);
808 let read_len = len.min(65535) as usize;
809 let label = Some(read_vec(r, read_len)?);
811 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
812 let _ = read_vec(r, padding_bytes as usize)?;
817 return Err(Error::BadVariableLabelCode {
819 code: has_variable_label,
824 let mut missing = Vec::new();
825 if missing_value_code != 0 {
826 match (width, missing_value_code) {
827 (0, -3 | -2 | 1 | 2 | 3) => (),
829 return Err(Error::BadNumericMissingValueCode {
831 code: missing_value_code,
836 return Err(Error::BadStringMissingValueCode {
838 code: missing_value_code,
843 for _ in 0..missing_value_code.abs() {
844 missing.push(read_bytes(r)?);
860 pub struct ValueLabel {
861 /// Offset from the start of the file to the start of the record.
865 pub labels: Vec<([u8; 8], Vec<u8>)>,
869 /// Maximum number of value labels in a record.
870 pub const MAX: u32 = u32::MAX / 8;
873 fn read_value_label_record<R: Read + Seek>(
874 r: &mut BufReader<R>,
876 ) -> Result<ValueLabel, Error> {
877 let offset = r.stream_position()?;
878 let n: u32 = endian.parse(read_bytes(r)?);
879 if n > ValueLabel::MAX {
880 return Err(Error::BadNumberOfValueLabels {
883 max: ValueLabel::MAX,
887 let mut labels = Vec::new();
889 let value: [u8; 8] = read_bytes(r)?;
890 let label_len: u8 = endian.parse(read_bytes(r)?);
891 let label_len = label_len as usize;
892 let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
894 let mut label = read_vec(r, padded_len)?;
895 label.truncate(label_len);
896 labels.push((value, label));
898 Ok(ValueLabel { offset, labels })
901 pub struct VarIndexes {
902 /// Offset from the start of the file to the start of the record.
905 /// The 0-based indexes of the variable indexes.
906 pub var_indexes: Vec<u32>,
910 /// Maximum number of variable indexes in a record.
911 pub const MAX: u32 = u32::MAX / 8;
914 fn read_var_indexes_record<R: Read + Seek>(
915 r: &mut BufReader<R>,
917 ) -> Result<VarIndexes, Error> {
918 let offset = r.stream_position()?;
919 let n: u32 = endian.parse(read_bytes(r)?);
920 if n > VarIndexes::MAX {
921 return Err(Error::BadNumberOfVarIndexes {
924 max: VarIndexes::MAX,
927 let mut var_indexes = Vec::with_capacity(n as usize);
929 var_indexes.push(endian.parse(read_bytes(r)?));
938 pub const DOC_LINE_LEN: u32 = 80;
939 pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
941 pub struct Document {
942 /// Offset from the start of the file to the start of the record.
945 /// The document, as an array of 80-byte lines.
946 pub lines: Vec<[u8; DOC_LINE_LEN as usize]>,
949 fn read_document_record<R: Read + Seek>(
950 r: &mut BufReader<R>,
952 ) -> Result<Document, Error> {
953 let offset = r.stream_position()?;
954 let n: u32 = endian.parse(read_bytes(r)?);
956 0..=DOC_MAX_LINES => {
957 let pos = r.stream_position()?;
958 let mut lines = Vec::with_capacity(n as usize);
960 let line: [u8; 80] = read_bytes(r)?;
963 Ok(Document { pos, lines })
965 _ => Err(Error::BadDocumentLength {
973 #[derive(FromPrimitive)]
975 /// Machine integer info.
977 /// Machine floating-point info.
983 /// Multiple response sets.
987 /// Extra product info text.
989 /// Variable display parameters.
991 /// Long variable names.
995 /// Extended number of cases.
997 /// Data file attributes.
999 /// Variable attributes.
1001 /// Multiple response sets (extended).
1003 /// Character encoding.
1005 /// Value labels for long strings.
1007 /// Missing values for long strings.
1009 /// "Format properties in dataview table".
1013 pub struct Extension {
1014 /// Offset from the start of the file to the start of the record.
1020 /// Size of each data element.
1023 /// Number of data elements.
1026 /// `size * count` bytes of data.
1030 fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) {
1032 /* Implemented record types. */
1033 ExtensionType::Integer => (4, 8),
1034 ExtensionType::Float => (8, 3),
1035 ExtensionType::VarSets => (1, 0),
1036 ExtensionType::Mrsets => (1, 0),
1037 ExtensionType::ProductInfo => (1, 0),
1038 ExtensionType::Display => (4, 0),
1039 ExtensionType::LongNames => (1, 0),
1040 ExtensionType::LongStrings => (1, 0),
1041 ExtensionType::Ncases => (8, 2),
1042 ExtensionType::FileAttrs => (1, 0),
1043 ExtensionType::VarAttrs => (1, 0),
1044 ExtensionType::Mrsets2 => (1, 0),
1045 ExtensionType::Encoding => (1, 0),
1046 ExtensionType::LongLabels => (1, 0),
1047 ExtensionType::LongMissing => (1, 0),
1049 /* Ignored record types. */
1050 ExtensionType::Date => (0, 0),
1051 ExtensionType::DataEntry => (0, 0),
1052 ExtensionType::Dataview => (0, 0),
1056 fn read_extension_record<R: Read + Seek>(
1057 r: &mut BufReader<R>,
1059 ) -> Result<Extension, Error> {
1060 let subtype = endian.parse(read_bytes(r)?);
1061 let offset = r.stream_position()?;
1062 let size: u32 = endian.parse(read_bytes(r)?);
1063 let count = endian.parse(read_bytes(r)?);
1064 let Some(product) = size.checked_mul(count) else {
1065 return Err(Error::ExtensionRecordTooLarge {
1072 let offset = r.stream_position()?;
1073 let data = read_vec(r, product as usize)?;
1083 pub struct ZHeader {
1084 /// File offset to the start of the record.
1087 /// File offset to the ZLIB data header.
1088 pub zheader_offset: u64,
1090 /// File offset to the ZLIB trailer.
1091 pub ztrailer_offset: u64,
1093 /// Length of the ZLIB trailer in bytes.
1094 pub ztrailer_len: u64,
1097 fn read_zheader<R: Read + Seek>(r: &mut BufReader<R>, endian: Endian) -> Result<ZHeader, Error> {
1098 let offset = r.stream_position()?;
1099 let zheader_offset: u64 = endian.parse(read_bytes(r)?);
1100 let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
1101 let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
1111 pub struct ZTrailer {
1112 /// File offset to the start of the record.
1115 /// Compression bias as a negative integer, e.g. -100.
1118 /// Always observed as zero.
1121 /// Uncompressed size of each block, except possibly the last. Only
1122 /// `0x3ff000` has been observed so far.
1123 pub block_size: u32,
1125 /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
1126 pub blocks: Vec<ZBlock>,
1130 /// Offset of block of data if simple compression were used.
1131 pub uncompressed_ofs: u64,
1133 /// Actual offset within the file of the compressed data block.
1134 pub compressed_ofs: u64,
1136 /// The number of bytes in this data block after decompression. This is
1137 /// `block_size` in every data block but the last, which may be smaller.
1138 pub uncompressed_size: u32,
1140 /// The number of bytes in this data block, as stored compressed in this
1142 pub compressed_size: u32,
1145 fn read_ztrailer<R: Read + Seek>(
1146 r: &mut BufReader<R>,
1150 ) -> Result<Option<ZTrailer>, Error> {
1151 let start_offset = r.stream_position()?;
1152 if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
1155 let int_bias = endian.parse(read_bytes(r)?);
1156 let zero = endian.parse(read_bytes(r)?);
1157 let block_size = endian.parse(read_bytes(r)?);
1158 let n_blocks: u32 = endian.parse(read_bytes(r)?);
1159 let expected_n_blocks = (ztrailer_len - 24) / 24;
1160 if n_blocks as u64 != expected_n_blocks {
1161 return Err(Error::BadZlibTrailerNBlocks {
1162 offset: ztrailer_ofs,
1168 let mut blocks = Vec::with_capacity(n_blocks as usize);
1169 for _ in 0..n_blocks {
1170 let uncompressed_ofs = endian.parse(read_bytes(r)?);
1171 let compressed_ofs = endian.parse(read_bytes(r)?);
1172 let uncompressed_size = endian.parse(read_bytes(r)?);
1173 let compressed_size = endian.parse(read_bytes(r)?);
1174 blocks.push(ZBlock {
1181 r.seek(SeekFrom::Start(start_offset))?;
1183 offset: ztrailer_ofs,
1191 fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
1192 let mut buf = [0; N];
1193 let n = r.read(&mut buf)?;
1196 r.read_exact(&mut buf[n..])?;
1204 fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
1205 let mut buf = [0; N];
1206 r.read_exact(&mut buf)?;
1210 fn read_vec<R: Read>(r: &mut BufReader<R>, n: usize) -> Result<Vec<u8>, IoError> {
1211 let mut vec = vec![0; n];
1212 r.read_exact(&mut vec)?;
1217 fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
1218 while s.last() == Some(&c) {
1224 fn skip_bytes<R: Read>(r: &mut R, mut n: u64) -> Result<(), IoError> {
1225 let mut buf = [0; 1024];
1227 let chunk = u64::min(n, buf.len() as u64);
1228 r.read_exact(&mut buf[0..chunk as usize])?;