1 use crate::endian::{Endian, Parse, ToBytes};
2 use crate::{CategoryLabels, Compression};
4 use flate2::read::ZlibDecoder;
7 use std::fmt::{Debug, Formatter, Result as FmtResult};
8 use std::str::from_utf8;
10 collections::VecDeque,
11 io::{Error as IoError, Read, Seek, SeekFrom},
14 use thiserror::Error as ThisError;
16 use self::state::State;
18 #[derive(ThisError, Debug)]
20 #[error("Not an SPSS system file")]
23 #[error("Invalid magic number {0:?}")]
26 #[error("I/O error ({0})")]
29 #[error("Invalid SAV compression code {0}")]
30 InvalidSavCompression(u32),
32 #[error("Invalid ZSAV compression code {0}")]
33 InvalidZsavCompression(u32),
35 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
36 BadVariableWidth { offset: u64, width: i32 },
38 #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
39 BadDocumentLength { offset: u64, n: usize, max: usize },
41 #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
42 BadRecordType { offset: u64, rec_type: u32 },
44 #[error("At offset {offset:#x}, variable label code ({code}) is not 0 or 1.")]
45 BadVariableLabelCode { offset: u64, code: u32 },
48 "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
50 BadNumericMissingValueCode { offset: u64, code: i32 },
52 #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
53 BadStringMissingValueCode { offset: u64, code: i32 },
55 #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
56 BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
58 #[error("At offset {offset:#x}, number of variables indexes ({n}) is greater than the maximum number ({max}).")]
59 BadNumberOfVarIndexes { offset: u64, n: u32, max: u32 },
61 #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
62 ExtensionRecordTooLarge {
69 #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
77 "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
79 EofInCompressedCase { offset: u64, case_ofs: u64 },
81 #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
82 PartialCompressedCase { offset: u64, case_ofs: u64 },
84 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
85 CompressedNumberExpected { offset: u64, case_ofs: u64 },
87 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
88 CompressedStringExpected { offset: u64, case_ofs: u64 },
90 #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
91 BadZlibTrailerNBlocks {
94 expected_n_blocks: u64,
98 #[error("At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}.")]
99 BadRecordSize { offset: u64, record: String, size: u32, expected_size: u32 },
101 #[error("At offset {offset:#x}, {record} has bad count {count} instead of the expected {expected_count}.")]
102 BadRecordCount { offset: u64, record: String, count: u32, expected_count: u32 },
104 #[error("In long string missing values record starting at offset {record_offset:#x}, value length at offset {offset:#x} is {value_len} instead of the expected 8.")]
105 BadLongMissingValueLength { record_offset: u64, offset: u64, value_len: u32 },
107 #[error("The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8.")]
108 BadEncodingName { offset: u64 },
110 #[error("Details TBD")]
114 #[derive(Clone, Debug)]
118 ValueLabel(ValueLabel),
119 VarIndexes(VarIndexes),
121 IntegerInfo(IntegerInfo),
122 FloatInfo(FloatInfo),
123 VariableSets(UnencodedString),
124 VarDisplay(VarDisplayRecord),
125 MultipleResponse(MultipleResponseRecord),
126 LongStringValueLabels(LongStringValueLabelRecord),
127 Encoding(EncodingRecord),
128 NumberOfCases(NumberOfCasesRecord),
129 ProductInfo(UnencodedString),
130 LongNames(UnencodedString),
131 LongStrings(UnencodedString),
132 FileAttributes(UnencodedString),
133 VariableAttributes(UnencodedString),
134 TextExtension(TextExtension),
135 OtherExtension(Extension),
143 fn read<R: Read + Seek>(reader: &mut R, endian: Endian) -> Result<Record, Error> {
144 let rec_type: u32 = endian.parse(read_bytes(reader)?);
146 2 => Ok(Record::Variable(Variable::read(reader, endian)?)),
147 3 => Ok(Record::ValueLabel(ValueLabel::read(reader, endian)?)),
148 4 => Ok(Record::VarIndexes(VarIndexes::read(reader, endian)?)),
149 6 => Ok(Record::Document(Document::read(reader, endian)?)),
150 7 => Ok(Extension::read(reader, endian)?),
151 999 => Ok(Record::EndOfHeaders(endian.parse(read_bytes(reader)?))),
152 _ => Err(Error::BadRecordType {
153 offset: reader.stream_position()?,
160 pub struct FallbackEncoding<'a>(&'a [u8]);
162 fn fallback_encode<'a>(s: &'a [u8]) -> Cow<'a, str> {
163 if let Ok(s) = from_utf8(s) {
166 let s: String = s.iter().map(|c| char::from(*c)).collect();
171 impl<'a> Debug for FallbackEncoding<'a> {
172 fn fmt(&self, f: &mut Formatter) -> FmtResult {
173 if let Ok(s) = from_utf8(self.0) {
174 let s = s.trim_end();
180 .map(|c| char::from(*c).escape_default())
183 let s = s.trim_end();
194 /// Eye-catcher string, product name, in the file's encoding. Padded
195 /// on the right with spaces.
196 pub eye_catcher: UnencodedStr<60>,
198 /// Layout code, normally either 2 or 3.
199 pub layout_code: u32,
201 /// Number of variable positions, or `None` if the value in the file is
202 /// questionably trustworthy.
203 pub nominal_case_size: Option<u32>,
205 /// Compression type, if any,
206 pub compression: Option<Compression>,
208 /// 0-based variable index of the weight variable, or `None` if the file is
210 pub weight_index: Option<u32>,
212 /// Claimed number of cases, if known.
213 pub n_cases: Option<u32>,
215 /// Compression bias, usually 100.0.
218 /// `dd mmm yy` in the file's encoding.
219 pub creation_date: UnencodedStr<9>,
221 /// `HH:MM:SS` in the file's encoding.
222 pub creation_time: UnencodedStr<8>,
224 /// File label, in the file's encoding. Padded on the right with spaces.
225 pub file_label: UnencodedStr<64>,
227 /// Endianness of the data in the file header.
232 fn debug_field<T: Debug>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult {
233 writeln!(f, "{name:>17}: {:?}", value)
237 impl Debug for Header {
238 fn fmt(&self, f: &mut Formatter) -> FmtResult {
239 writeln!(f, "File header record:")?;
240 self.debug_field(f, "Magic", self.magic)?;
241 self.debug_field(f, "Product name", &self.eye_catcher)?;
242 self.debug_field(f, "Layout code", self.layout_code)?;
243 self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
244 self.debug_field(f, "Compression", self.compression)?;
245 self.debug_field(f, "Weight index", self.weight_index)?;
246 self.debug_field(f, "Number of cases", self.n_cases)?;
247 self.debug_field(f, "Compression bias", self.bias)?;
248 self.debug_field(f, "Creation date", &self.creation_date)?;
249 self.debug_field(f, "Creation time", &self.creation_time)?;
250 self.debug_field(f, "File label", &self.file_label)?;
251 self.debug_field(f, "Endianness", self.endian)
256 fn read<R: Read>(r: &mut R) -> Result<Header, Error> {
257 let magic: [u8; 4] = read_bytes(r)?;
258 let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
260 let eye_catcher = UnencodedStr::<60>(read_bytes(r)?);
261 let layout_code: [u8; 4] = read_bytes(r)?;
262 let endian = Endian::identify_u32(2, layout_code)
263 .or_else(|| Endian::identify_u32(2, layout_code))
264 .ok_or_else(|| Error::NotASystemFile)?;
265 let layout_code = endian.parse(layout_code);
267 let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
268 let nominal_case_size =
269 (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
271 let compression_code: u32 = endian.parse(read_bytes(r)?);
272 let compression = match (magic, compression_code) {
273 (Magic::ZSAV, 2) => Some(Compression::ZLib),
274 (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
276 (_, 1) => Some(Compression::Simple),
277 (_, code) => return Err(Error::InvalidSavCompression(code)),
280 let weight_index: u32 = endian.parse(read_bytes(r)?);
281 let weight_index = (weight_index > 0).then(|| weight_index - 1);
283 let n_cases: u32 = endian.parse(read_bytes(r)?);
284 let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
286 let bias: f64 = endian.parse(read_bytes(r)?);
288 let creation_date = UnencodedStr::<9>(read_bytes(r)?);
289 let creation_time = UnencodedStr::<8>(read_bytes(r)?);
290 let file_label = UnencodedStr::<64>(read_bytes(r)?);
291 let _: [u8; 3] = read_bytes(r)?;
310 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
311 pub struct Magic([u8; 4]);
314 /// Magic number for a regular system file.
315 pub const SAV: Magic = Magic(*b"$FL2");
317 /// Magic number for a system file that contains zlib-compressed data.
318 pub const ZSAV: Magic = Magic(*b"$FL3");
320 /// Magic number for an EBDIC-encoded system file. This is `$FL2` encoded
322 pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]);
325 impl Debug for Magic {
326 fn fmt(&self, f: &mut Formatter) -> FmtResult {
328 &Magic::SAV => "$FL2",
329 &Magic::ZSAV => "$FL3",
330 &Magic::EBCDIC => "($FL2 in EBCDIC)",
331 _ => return write!(f, "{:?}", self.0),
337 impl TryFrom<[u8; 4]> for Magic {
340 fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
341 let magic = Magic(value);
343 Magic::SAV | Magic::ZSAV | Magic::EBCDIC => Ok(magic),
344 _ => Err(Error::BadMagic(value)),
349 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
356 fn from_width(width: i32) -> VarType {
358 0 => VarType::Number,
359 _ => VarType::String,
366 Compression, Error, Header, Record, Value, VarType, Variable, ZHeader, ZTrailer,
369 use crate::endian::Endian;
371 collections::VecDeque,
376 #[allow(clippy::type_complexity)]
377 fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
380 struct Start<R: Read + Seek> {
384 pub fn new<R: Read + Seek + 'static>(reader: R) -> Box<dyn State> {
385 Box::new(Start { reader })
388 struct CommonState<R: Read + Seek> {
392 compression: Option<Compression>,
393 var_types: Vec<VarType>,
396 impl<R: Read + Seek + 'static> State for Start<R> {
397 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
398 let header = Header::read(&mut self.reader)?;
399 let next_state = Headers(CommonState {
401 endian: header.endian,
403 compression: header.compression,
404 var_types: Vec::new(),
406 Ok(Some((Record::Header(header), Box::new(next_state))))
410 struct Headers<R: Read + Seek>(CommonState<R>);
412 impl<R: Read + Seek + 'static> State for Headers<R> {
413 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
414 let record = Record::read(&mut self.0.reader, self.0.endian)?;
416 Record::Variable(Variable { width, .. }) => {
417 self.0.var_types.push(VarType::from_width(width));
419 Record::EndOfHeaders(_) => {
420 let next_state: Box<dyn State> = match self.0.compression {
421 None => Box::new(Data(self.0)),
422 Some(Compression::Simple) => Box::new(CompressedData::new(self.0)),
423 Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)),
425 return Ok(Some((record, next_state)));
429 Ok(Some((record, self)))
433 struct ZlibHeader<R: Read + Seek>(CommonState<R>);
435 impl<R: Read + Seek + 'static> State for ZlibHeader<R> {
436 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
437 let zheader = ZHeader::read(&mut self.0.reader, self.0.endian)?;
438 Ok(Some((Record::ZHeader(zheader), self)))
442 struct ZlibTrailer<R: Read + Seek>(CommonState<R>, ZHeader);
444 impl<R: Read + Seek + 'static> State for ZlibTrailer<R> {
445 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
446 let retval = ZTrailer::read(
449 self.1.ztrailer_offset,
452 let next_state = Box::new(CompressedData::new(CommonState {
453 reader: ZlibDecodeMultiple::new(self.0.reader),
454 endian: self.0.endian,
456 compression: self.0.compression,
457 var_types: self.0.var_types,
460 None => next_state.read(),
461 Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state))),
466 struct Data<R: Read + Seek>(CommonState<R>);
468 impl<R: Read + Seek + 'static> State for Data<R> {
469 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
470 match Value::read_case(&mut self.0.reader, &self.0.var_types, self.0.endian)? {
472 Some(values) => Ok(Some((Record::Case(values), self))),
477 struct CompressedData<R: Read + Seek> {
478 common: CommonState<R>,
482 impl<R: Read + Seek + 'static> CompressedData<R> {
483 fn new(common: CommonState<R>) -> CompressedData<R> {
486 codes: VecDeque::new(),
491 impl<R: Read + Seek + 'static> State for CompressedData<R> {
492 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
493 match Value::read_compressed_case(
494 &mut self.common.reader,
495 &self.common.var_types,
501 Some(values) => Ok(Some((Record::Case(values), self))),
507 #[derive(Copy, Clone)]
510 String(UnencodedStr<8>),
513 impl Debug for Value {
514 fn fmt(&self, f: &mut Formatter) -> FmtResult {
516 Value::Number(Some(number)) => write!(f, "{number:?}"),
517 Value::Number(None) => write!(f, "SYSMIS"),
518 Value::String(bytes) => write!(f, "{:?}", bytes),
524 fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Value, IoError> {
525 Ok(Self::from_raw(var_type, read_bytes(r)?, endian))
528 pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
530 VarType::String => Value::String(UnencodedStr(raw)),
532 let number: f64 = endian.parse(raw);
533 Value::Number((number != -f64::MAX).then_some(number))
538 fn read_case<R: Read + Seek>(
540 var_types: &[VarType],
542 ) -> Result<Option<Vec<Value>>, Error> {
543 let case_start = reader.stream_position()?;
544 let mut values = Vec::with_capacity(var_types.len());
545 for (i, &var_type) in var_types.iter().enumerate() {
546 let Some(raw) = try_read_bytes(reader)? else {
550 let offset = reader.stream_position()?;
551 return Err(Error::EofInCase {
553 case_ofs: offset - case_start,
554 case_len: var_types.len() * 8,
558 values.push(Value::from_raw(var_type, raw, endian));
563 fn read_compressed_case<R: Read + Seek>(
565 var_types: &[VarType],
566 codes: &mut VecDeque<u8>,
569 ) -> Result<Option<Vec<Value>>, Error> {
570 let case_start = reader.stream_position()?;
571 let mut values = Vec::with_capacity(var_types.len());
572 for (i, &var_type) in var_types.iter().enumerate() {
574 let Some(code) = codes.pop_front() else {
575 let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
579 let offset = reader.stream_position()?;
580 return Err(Error::EofInCompressedCase {
582 case_ofs: offset - case_start,
586 codes.extend(new_codes.into_iter());
591 1..=251 => match var_type {
592 VarType::Number => break Value::Number(Some(code as f64 - bias)),
594 break Value::String(UnencodedStr(endian.to_bytes(code as f64 - bias)))
601 let offset = reader.stream_position()?;
602 return Err(Error::PartialCompressedCase {
604 case_ofs: offset - case_start,
608 253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
609 254 => match var_type {
610 VarType::String => break Value::String(UnencodedStr(*b" ")), // XXX EBCDIC
612 return Err(Error::CompressedStringExpected {
614 case_ofs: reader.stream_position()? - case_start,
618 255 => match var_type {
619 VarType::Number => break Value::Number(None),
621 return Err(Error::CompressedNumberExpected {
623 case_ofs: reader.stream_position()? - case_start,
635 struct ZlibDecodeMultiple<R>
639 reader: Option<ZlibDecoder<R>>,
642 impl<R> ZlibDecodeMultiple<R>
646 fn new(reader: R) -> ZlibDecodeMultiple<R> {
648 reader: Some(ZlibDecoder::new(reader)),
653 impl<R> Read for ZlibDecodeMultiple<R>
657 fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
659 match self.reader.as_mut().unwrap().read(buf)? {
661 let inner = self.reader.take().unwrap().into_inner();
662 self.reader = Some(ZlibDecoder::new(inner));
670 impl<R> Seek for ZlibDecodeMultiple<R>
674 fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
675 self.reader.as_mut().unwrap().get_mut().seek(pos)
680 state: Option<Box<dyn State>>,
684 pub fn new<R: Read + Seek + 'static>(reader: R) -> Result<Reader, Error> {
686 state: Some(state::new(reader)),
689 pub fn collect_headers(&mut self) -> Result<Vec<Record>, Error> {
690 let mut headers = Vec::new();
693 Record::EndOfHeaders(_) => break,
694 r => headers.push(r),
701 impl Iterator for Reader {
702 type Item = Result<Record, Error>;
704 fn next(&mut self) -> Option<Self::Item> {
705 match self.state.take()?.read() {
706 Ok(Some((record, next_state))) => {
707 self.state = Some(next_state);
711 Err(error) => Some(Err(error)),
716 impl FusedIterator for Reader {}
718 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
719 pub struct Spec(pub u32);
721 impl Debug for Spec {
722 fn fmt(&self, f: &mut Formatter) -> FmtResult {
723 let type_ = format_name(self.0 >> 16);
724 let w = (self.0 >> 8) & 0xff;
725 let d = self.0 & 0xff;
726 write!(f, "{:06x} ({type_}{w}.{d})", self.0)
730 fn format_name(type_: u32) -> Cow<'static, str> {
769 _ => return format!("<unknown format {type_}>").into()
774 pub struct MissingValues {
775 /// Individual missing values, up to 3 of them.
776 pub values: Vec<Value>,
778 /// Optional range of missing values.
779 pub range: Option<(Value, Value)>,
782 impl Debug for MissingValues {
783 fn fmt(&self, f: &mut Formatter) -> FmtResult {
784 for (i, value) in self.values.iter().enumerate() {
788 write!(f, "{value:?}")?;
791 if let Some((low, high)) = self.range {
792 if !self.values.is_empty() {
795 write!(f, "{low:?} THRU {high:?}")?;
807 fn is_empty(&self) -> bool {
808 self.values.is_empty() && self.range.is_none()
811 fn read<R: Read + Seek>(
817 ) -> Result<MissingValues, Error> {
818 let (n_values, has_range) = match (width, code) {
819 (_, 0..=3) => (code, false),
820 (0, -2) => (0, true),
821 (0, -3) => (1, true),
822 (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }),
823 (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
826 let var_type = VarType::from_width(width);
828 let mut values = Vec::new();
829 for _ in 0..n_values {
830 values.push(Value::read(r, var_type, endian)?);
832 let range = if has_range {
833 let low = Value::read(r, var_type, endian)?;
834 let high = Value::read(r, var_type, endian)?;
839 Ok(MissingValues { values, range })
844 pub struct Variable {
845 /// Offset from the start of the file to the start of the record.
848 /// Variable width, in the range -1..=255.
851 /// Variable name, padded on the right with spaces.
852 pub name: UnencodedStr<8>,
855 pub print_format: Spec,
858 pub write_format: Spec,
861 pub missing_values: MissingValues,
863 /// Optional variable label.
864 pub label: Option<UnencodedString>,
867 impl Debug for Variable {
868 fn fmt(&self, f: &mut Formatter) -> FmtResult {
875 } else if self.width == 0 {
878 "long string continuation record"
881 writeln!(f, "Print format: {:?}", self.print_format)?;
882 writeln!(f, "Write format: {:?}", self.write_format)?;
883 writeln!(f, "Name: {:?}", &self.name)?;
884 writeln!(f, "Variable label: {:?}", self.label)?;
885 writeln!(f, "Missing values: {:?}", self.missing_values)
890 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Variable, Error> {
891 let offset = r.stream_position()?;
892 let width: i32 = endian.parse(read_bytes(r)?);
893 let has_variable_label: u32 = endian.parse(read_bytes(r)?);
894 let missing_value_code: i32 = endian.parse(read_bytes(r)?);
895 let print_format = Spec(endian.parse(read_bytes(r)?));
896 let write_format = Spec(endian.parse(read_bytes(r)?));
897 let name = UnencodedStr::<8>(read_bytes(r)?);
899 let label = match has_variable_label {
902 let len: u32 = endian.parse(read_bytes(r)?);
903 let read_len = len.min(65535) as usize;
904 let label = UnencodedString(read_vec(r, read_len)?);
906 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
907 let _ = read_vec(r, padding_bytes as usize)?;
912 return Err(Error::BadVariableLabelCode {
914 code: has_variable_label,
919 let missing_values = MissingValues::read(r, offset, width, missing_value_code, endian)?;
933 #[derive(Copy, Clone)]
934 pub struct UntypedValue(pub [u8; 8]);
936 impl Debug for UntypedValue {
937 fn fmt(&self, f: &mut Formatter) -> FmtResult {
938 let little: f64 = Endian::Little.parse(self.0);
939 let little = format!("{:?}", little);
940 let big: f64 = Endian::Big.parse(self.0);
941 let big = format!("{:?}", big);
942 let number = if little.len() <= big.len() {
947 write!(f, "{number}")?;
949 let string = fallback_encode(&self.0);
951 .split(|c: char| c == '\0' || c.is_control())
954 write!(f, "/\"{string}\"")?;
960 pub struct UnencodedString(pub Vec<u8>);
962 impl From<Vec<u8>> for UnencodedString {
963 fn from(source: Vec<u8>) -> Self {
968 impl From<&[u8]> for UnencodedString {
969 fn from(source: &[u8]) -> Self {
974 impl Debug for UnencodedString {
975 fn fmt(&self, f: &mut Formatter) -> FmtResult {
976 write!(f, "{:?}", FallbackEncoding(self.0.as_slice()))
980 #[derive(Copy, Clone)]
981 pub struct UnencodedStr<const N: usize>(pub [u8; N]);
983 impl<const N: usize> From<[u8; N]> for UnencodedStr<N> {
984 fn from(source: [u8; N]) -> Self {
989 impl<const N: usize> Debug for UnencodedStr<N> {
990 fn fmt(&self, f: &mut Formatter) -> FmtResult {
991 write!(f, "{:?}", FallbackEncoding(&self.0))
996 pub struct ValueLabel {
997 /// Offset from the start of the file to the start of the record.
1001 pub labels: Vec<(UntypedValue, UnencodedString)>,
1004 impl Debug for ValueLabel {
1005 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1006 for (value, label) in self.labels.iter() {
1007 writeln!(f, "{value:?}: {label:?}")?;
1014 /// Maximum number of value labels in a record.
1015 pub const MAX: u32 = u32::MAX / 8;
1017 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ValueLabel, Error> {
1018 let offset = r.stream_position()?;
1019 let n: u32 = endian.parse(read_bytes(r)?);
1020 if n > ValueLabel::MAX {
1021 return Err(Error::BadNumberOfValueLabels {
1024 max: ValueLabel::MAX,
1028 let mut labels = Vec::new();
1030 let value = UntypedValue(read_bytes(r)?);
1031 let label_len: u8 = endian.parse(read_bytes(r)?);
1032 let label_len = label_len as usize;
1033 let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
1035 let mut label = read_vec(r, padded_len - 1)?;
1036 label.truncate(label_len);
1037 labels.push((value, UnencodedString(label)));
1039 Ok(ValueLabel { offset, labels })
1044 pub struct VarIndexes {
1045 /// Offset from the start of the file to the start of the record.
1048 /// The 0-based indexes of the variable indexes.
1049 pub var_indexes: Vec<u32>,
1052 impl Debug for VarIndexes {
1053 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1054 write!(f, "apply to variables")?;
1055 for var_index in self.var_indexes.iter() {
1056 write!(f, " #{var_index}")?;
1063 /// Maximum number of variable indexes in a record.
1064 pub const MAX: u32 = u32::MAX / 8;
1066 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<VarIndexes, Error> {
1067 let offset = r.stream_position()?;
1068 let n: u32 = endian.parse(read_bytes(r)?);
1069 if n > VarIndexes::MAX {
1070 return Err(Error::BadNumberOfVarIndexes {
1073 max: VarIndexes::MAX,
1076 let mut var_indexes = Vec::with_capacity(n as usize);
1078 var_indexes.push(endian.parse(read_bytes(r)?));
1088 #[derive(Clone, Debug)]
1089 pub struct Document {
1090 /// Offset from the start of the file to the start of the record.
1093 /// The document, as an array of 80-byte lines.
1094 pub lines: Vec<DocumentLine>
1097 pub type DocumentLine = UnencodedStr<{Document::LINE_LEN}>;
1100 /// Length of a line in a document. Document lines are fixed-length and
1101 /// padded on the right with spaces.
1102 pub const LINE_LEN: usize = 80;
1104 /// Maximum number of lines we will accept in a document. This is simply
1105 /// the maximum number that will fit in a 32-bit space.
1106 pub const MAX_LINES: usize = i32::MAX as usize / Self::LINE_LEN;
1108 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Document, Error> {
1109 let offset = r.stream_position()?;
1110 let n: u32 = endian.parse(read_bytes(r)?);
1112 if n > Self::MAX_LINES {
1113 Err(Error::BadDocumentLength {
1116 max: Self::MAX_LINES,
1119 let pos = r.stream_position()?;
1120 let mut lines = Vec::with_capacity(n);
1122 lines.push(UnencodedStr::<{Document::LINE_LEN}>(read_bytes(r)?));
1124 Ok(Document { pos, lines })
1133 const NAME: &'static str;
1134 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
1137 trait ExtensionRecord
1142 const SIZE: Option<u32>;
1143 const COUNT: Option<u32>;
1144 const NAME: &'static str;
1145 fn parse(ext: &Extension, endian: Endian, warn: impl Fn(Error)) -> Result<Self, Error>;
1148 #[derive(Clone, Debug)]
1149 pub struct IntegerInfo {
1150 pub version: (i32, i32, i32),
1151 pub machine_code: i32,
1152 pub floating_point_rep: i32,
1153 pub compression_code: i32,
1154 pub endianness: i32,
1155 pub character_code: i32,
1158 impl ExtensionRecord for IntegerInfo {
1159 const SUBTYPE: u32 = 3;
1160 const SIZE: Option<u32> = Some(4);
1161 const COUNT: Option<u32> = Some(8);
1162 const NAME: &'static str = "integer record";
1164 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1165 ext.check_size::<Self>()?;
1167 let mut input = &ext.data[..];
1168 let data: Vec<i32> = (0..8)
1169 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1172 version: (data[0], data[1], data[2]),
1173 machine_code: data[3],
1174 floating_point_rep: data[4],
1175 compression_code: data[5],
1176 endianness: data[6],
1177 character_code: data[7],
1182 #[derive(Clone, Debug)]
1183 pub struct FloatInfo {
1189 impl ExtensionRecord for FloatInfo {
1190 const SUBTYPE: u32 = 4;
1191 const SIZE: Option<u32> = Some(8);
1192 const COUNT: Option<u32> = Some(3);
1193 const NAME: &'static str = "floating point record";
1195 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1196 ext.check_size::<Self>()?;
1198 let mut input = &ext.data[..];
1199 let data: Vec<f64> = (0..3)
1200 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1210 #[derive(Clone, Debug)]
1211 pub enum MultipleResponseType {
1213 value: UnencodedString,
1214 labels: CategoryLabels,
1218 #[derive(Clone, Debug)]
1219 pub struct MultipleResponseSet {
1220 pub name: UnencodedString,
1221 pub label: UnencodedString,
1222 pub mr_type: MultipleResponseType,
1223 pub vars: Vec<UnencodedString>,
1226 impl MultipleResponseSet {
1227 fn parse(input: &[u8]) -> Result<(MultipleResponseSet, &[u8]), Error> {
1228 let Some(equals) = input.iter().position(|&b| b == b'=') else {
1229 return Err(Error::TBD);
1231 let (name, input) = input.split_at(equals);
1232 let (mr_type, input) = match input.get(0) {
1233 Some(b'C') => (MultipleResponseType::MultipleCategory, &input[1..]),
1235 let (value, input) = parse_counted_string(&input[1..])?;
1237 MultipleResponseType::MultipleDichotomy {
1238 value: value.into(),
1239 labels: CategoryLabels::VarLabels,
1245 let Some(b' ') = input.get(1) else {
1246 return Err(Error::TBD);
1248 let input = &input[2..];
1249 let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
1250 (CategoryLabels::CountedValues, rest)
1251 } else if let Some(rest) = input.strip_prefix(b" 11 ") {
1252 (CategoryLabels::VarLabels, rest)
1254 return Err(Error::TBD);
1256 let (value, input) = parse_counted_string(input)?;
1258 MultipleResponseType::MultipleDichotomy {
1259 value: value.into(),
1265 _ => return Err(Error::TBD),
1267 let Some(b' ') = input.get(0) else {
1268 return Err(Error::TBD);
1270 let (label, mut input) = parse_counted_string(&input[1..])?;
1271 let mut vars = Vec::new();
1272 while input.get(0) == Some(&b' ') {
1273 input = &input[1..];
1274 let Some(length) = input.iter().position(|b| b" \n".contains(b)) else {
1275 return Err(Error::TBD);
1278 vars.push(input[..length].into());
1280 input = &input[length..];
1282 if input.get(0) != Some(&b'\n') {
1283 return Err(Error::TBD);
1285 while input.get(0) == Some(&b'\n') {
1286 input = &input[1..];
1289 MultipleResponseSet {
1291 label: label.into(),
1300 #[derive(Clone, Debug)]
1301 pub struct MultipleResponseRecord(Vec<MultipleResponseSet>);
1303 impl ExtensionRecord for MultipleResponseRecord {
1304 const SUBTYPE: u32 = 7;
1305 const SIZE: Option<u32> = Some(1);
1306 const COUNT: Option<u32> = None;
1307 const NAME: &'static str = "multiple response set record";
1309 fn parse(ext: &Extension, _endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1310 ext.check_size::<Self>()?;
1312 let mut input = &ext.data[..];
1313 let mut sets = Vec::new();
1314 while !input.is_empty() {
1315 let (set, rest) = MultipleResponseSet::parse(input)?;
1319 Ok(MultipleResponseRecord(sets))
1323 fn parse_counted_string(input: &[u8]) -> Result<(UnencodedString, &[u8]), Error> {
1324 let Some(space) = input.iter().position(|&b| b == b' ') else {
1325 return Err(Error::TBD);
1327 let Ok(length) = from_utf8(&input[..space]) else {
1328 return Err(Error::TBD);
1330 let Ok(length): Result<usize, _> = length.parse() else {
1331 return Err(Error::TBD);
1334 let input = &input[space + 1..];
1335 if input.len() < length {
1336 return Err(Error::TBD);
1339 let (string, rest) = input.split_at(length);
1340 Ok((string.into(), rest))
1343 pub struct ProductInfo(String);
1345 impl TextRecord for ProductInfo {
1346 const NAME: &'static str = "extra product info";
1347 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
1348 Ok(ProductInfo(input.into()))
1352 #[derive(Clone, Debug)]
1353 pub struct VarDisplayRecord(pub Vec<u32>);
1355 impl ExtensionRecord for VarDisplayRecord {
1356 const SUBTYPE: u32 = 11;
1357 const SIZE: Option<u32> = Some(4);
1358 const COUNT: Option<u32> = None;
1359 const NAME: &'static str = "variable display record";
1361 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1362 ext.check_size::<Self>()?;
1364 let mut input = &ext.data[..];
1365 let display = (0..ext.count)
1366 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1368 Ok(VarDisplayRecord(display))
1372 pub struct VariableSet {
1374 pub vars: Vec<String>,
1378 fn parse(input: &str) -> Result<Self, Error> {
1379 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
1380 let vars = input.split_ascii_whitespace().map(String::from).collect();
1388 pub struct VariableSetRecord(Vec<VariableSet>);
1390 impl TextRecord for VariableSetRecord {
1391 const NAME: &'static str = "variable set";
1392 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1393 let mut sets = Vec::new();
1394 for line in input.lines() {
1395 match VariableSet::parse(line) {
1396 Ok(set) => sets.push(set),
1397 Err(error) => warn(error),
1400 Ok(VariableSetRecord(sets))
1404 pub struct LongVariableName {
1405 pub short_name: String,
1406 pub long_name: String,
1409 pub struct LongVariableNameRecord(Vec<LongVariableName>);
1411 impl TextRecord for LongVariableNameRecord {
1412 const NAME: &'static str = "long variable names";
1413 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1414 let mut names = Vec::new();
1415 for pair in input.split('\t').filter(|s| !s.is_empty()) {
1416 if let Some((short_name, long_name)) = pair.split_once('=') {
1417 let name = LongVariableName {
1418 short_name: short_name.into(),
1419 long_name: long_name.into(),
1426 Ok(LongVariableNameRecord(names))
1430 pub struct VeryLongString {
1431 pub short_name: String,
1435 impl VeryLongString {
1436 fn parse(input: &str) -> Result<VeryLongString, Error> {
1437 let Some((short_name, length)) = input.split_once('=') else {
1438 return Err(Error::TBD);
1440 let length: usize = length.parse().map_err(|_| Error::TBD)?;
1442 short_name: short_name.into(),
1448 pub struct VeryLongStringRecord(Vec<VeryLongString>);
1450 impl TextRecord for VeryLongStringRecord {
1451 const NAME: &'static str = "very long strings";
1452 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1453 let mut very_long_strings = Vec::new();
1456 .map(|s| s.trim_end_matches('\t'))
1457 .filter(|s| !s.is_empty())
1459 match VeryLongString::parse(tuple) {
1460 Ok(vls) => very_long_strings.push(vls),
1461 Err(error) => warn(error),
1464 Ok(VeryLongStringRecord(very_long_strings))
1468 #[derive(Clone, Debug)]
1469 pub struct LongStringValueLabels {
1470 pub var_name: UnencodedString,
1473 /// `(value, label)` pairs, where each value is `width` bytes.
1474 pub labels: Vec<(UnencodedString, UnencodedString)>,
1477 #[derive(Clone, Debug)]
1478 pub struct LongStringValueLabelRecord(Vec<LongStringValueLabels>);
1480 impl ExtensionRecord for LongStringValueLabelRecord {
1481 const SUBTYPE: u32 = 21;
1482 const SIZE: Option<u32> = Some(1);
1483 const COUNT: Option<u32> = None;
1484 const NAME: &'static str = "long string value labels record";
1486 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1487 ext.check_size::<Self>()?;
1489 let mut input = &ext.data[..];
1490 let mut label_set = Vec::new();
1491 while !input.is_empty() {
1492 let var_name = read_string(&mut input, endian)?;
1493 let width: u32 = endian.parse(read_bytes(&mut input)?);
1494 let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
1495 let mut labels = Vec::new();
1496 for _ in 0..n_labels {
1497 let value = read_string(&mut input, endian)?;
1498 let label = read_string(&mut input, endian)?;
1499 labels.push((value, label));
1501 label_set.push(LongStringValueLabels {
1507 Ok(LongStringValueLabelRecord(label_set))
1511 pub struct LongStringMissingValues {
1513 pub var_name: UnencodedString,
1516 pub missing_values: MissingValues,
1519 pub struct LongStringMissingValueSet(Vec<LongStringMissingValues>);
1521 impl ExtensionRecord for LongStringMissingValueSet {
1522 const SUBTYPE: u32 = 22;
1523 const SIZE: Option<u32> = Some(1);
1524 const COUNT: Option<u32> = None;
1525 const NAME: &'static str = "long string missing values record";
1527 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1528 ext.check_size::<Self>()?;
1530 let mut input = &ext.data[..];
1531 let mut missing_value_set = Vec::new();
1532 while !input.is_empty() {
1533 let var_name = read_string(&mut input, endian)?;
1534 let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
1535 let value_len: u32 = endian.parse(read_bytes(&mut input)?);
1537 let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offset;
1538 return Err(Error::BadLongMissingValueLength {
1539 record_offset: ext.offset,
1544 let mut values = Vec::new();
1545 for i in 0..n_missing_values {
1546 let value: [u8; 8] = read_bytes(&mut input)?;
1547 let numeric_value: u64 = endian.parse(value);
1548 let value = if i > 0 && numeric_value == 8 {
1549 // Tolerate files written by old, buggy versions of PSPP
1550 // where we believed that the value_length was repeated
1551 // before each missing value.
1552 read_bytes(&mut input)?
1556 values.push(Value::String(UnencodedStr(value)));
1558 let missing_values = MissingValues {
1562 missing_value_set.push(LongStringMissingValues {
1567 Ok(LongStringMissingValueSet(missing_value_set))
1571 #[derive(Clone, Debug)]
1572 pub struct EncodingRecord(pub String);
1574 impl ExtensionRecord for EncodingRecord {
1575 const SUBTYPE: u32 = 20;
1576 const SIZE: Option<u32> = Some(1);
1577 const COUNT: Option<u32> = None;
1578 const NAME: &'static str = "encoding record";
1580 fn parse(ext: &Extension, _endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1581 ext.check_size::<Self>()?;
1584 String::from_utf8(ext.data.clone())
1585 .map_err(|_| Error::BadEncodingName { offset: ext.offset })?,
1590 pub struct Attribute {
1592 pub values: Vec<String>,
1596 fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
1597 let Some((name, mut input)) = input.split_once('(') else {
1598 return Err(Error::TBD);
1600 let mut values = Vec::new();
1602 let Some((value, rest)) = input.split_once('\n') else {
1603 return Err(Error::TBD);
1605 if let Some(stripped) = value
1607 .and_then(|value| value.strip_suffix('\''))
1609 values.push(stripped.into());
1612 values.push(value.into());
1614 if let Some(rest) = rest.strip_prefix(')') {
1628 pub struct AttributeSet(pub Vec<Attribute>);
1633 sentinel: Option<char>,
1634 warn: &impl Fn(Error),
1635 ) -> Result<(AttributeSet, &'a str), Error> {
1636 let mut attributes = Vec::new();
1638 match input.chars().next() {
1639 None => break input,
1640 c if c == sentinel => break &input[1..],
1642 let (attribute, rest) = Attribute::parse(input, &warn)?;
1643 attributes.push(attribute);
1648 Ok((AttributeSet(attributes), rest))
1652 pub struct FileAttributeRecord(AttributeSet);
1654 impl TextRecord for FileAttributeRecord {
1655 const NAME: &'static str = "data file attributes";
1656 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1657 let (set, rest) = AttributeSet::parse(input, None, &warn)?;
1658 if !rest.is_empty() {
1661 Ok(FileAttributeRecord(set))
1665 pub struct VarAttributeSet {
1666 pub long_var_name: String,
1667 pub attributes: AttributeSet,
1670 impl VarAttributeSet {
1673 warn: &impl Fn(Error),
1674 ) -> Result<(VarAttributeSet, &'a str), Error> {
1675 let Some((long_var_name, rest)) = input.split_once(':') else {
1676 return Err(Error::TBD);
1678 let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
1681 long_var_name: long_var_name.into(),
1689 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
1691 impl TextRecord for VariableAttributeRecord {
1692 const NAME: &'static str = "variable attributes";
1693 fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1694 let mut var_attribute_sets = Vec::new();
1695 while !input.is_empty() {
1696 match VarAttributeSet::parse(input, &warn) {
1697 Ok((var_attribute, rest)) => {
1698 var_attribute_sets.push(var_attribute);
1707 Ok(VariableAttributeRecord(var_attribute_sets))
1711 #[derive(Clone, Debug)]
1712 pub struct NumberOfCasesRecord {
1713 /// Always observed as 1.
1716 /// Number of cases.
1720 impl ExtensionRecord for NumberOfCasesRecord {
1721 const SUBTYPE: u32 = 16;
1722 const SIZE: Option<u32> = Some(8);
1723 const COUNT: Option<u32> = Some(2);
1724 const NAME: &'static str = "extended number of cases record";
1726 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1727 ext.check_size::<Self>()?;
1729 let mut input = &ext.data[..];
1730 let one = endian.parse(read_bytes(&mut input)?);
1731 let n_cases = endian.parse(read_bytes(&mut input)?);
1733 Ok(NumberOfCasesRecord { one, n_cases })
1737 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
1738 pub enum TextExtensionSubtype {
1743 FileAttributes = 17,
1744 VariableAttributes = 18,
1747 #[derive(Clone, Debug)]
1748 pub struct TextExtension {
1749 pub subtype: TextExtensionSubtype,
1750 pub string: UnencodedString,
1753 #[derive(Clone, Debug)]
1754 pub struct Extension {
1755 /// Offset from the start of the file to the start of the record.
1761 /// Size of each data element.
1764 /// Number of data elements.
1767 /// `size * count` bytes of data.
1772 fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) {
1774 /* Implemented record types. */
1775 ExtensionType::Integer => (4, 8),
1776 ExtensionType::Float => (8, 3),
1777 ExtensionType::VarSets => (1, 0),
1778 ExtensionType::Mrsets => (1, 0),
1779 ExtensionType::ProductInfo => (1, 0),
1780 ExtensionType::Display => (4, 0),
1781 ExtensionType::LongNames => (1, 0),
1782 ExtensionType::LongStrings => (1, 0),
1783 ExtensionType::Ncases => (8, 2),
1784 ExtensionType::FileAttrs => (1, 0),
1785 ExtensionType::VarAttrs => (1, 0),
1786 ExtensionType::Mrsets2 => (1, 0),
1787 ExtensionType::Encoding => (1, 0),
1788 ExtensionType::LongLabels => (1, 0),
1789 ExtensionType::LongMissing => (1, 0),
1791 /* Ignored record types. */
1792 ExtensionType::Date => (0, 0),
1793 ExtensionType::DataEntry => (0, 0),
1794 ExtensionType::Dataview => (0, 0),
1800 fn check_size<E: ExtensionRecord>(&self) -> Result<(), Error> {
1801 if let Some(expected_size) = E::SIZE {
1802 if self.size != expected_size {
1803 return Err(Error::BadRecordSize {
1804 offset: self.offset,
1805 record: E::NAME.into(),
1811 if let Some(expected_count) = E::COUNT {
1812 if self.count != expected_count {
1813 return Err(Error::BadRecordCount {
1814 offset: self.offset,
1815 record: E::NAME.into(),
1824 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
1825 let subtype = endian.parse(read_bytes(r)?);
1826 let offset = r.stream_position()?;
1827 let size: u32 = endian.parse(read_bytes(r)?);
1828 let count = endian.parse(read_bytes(r)?);
1829 let Some(product) = size.checked_mul(count) else {
1830 return Err(Error::ExtensionRecordTooLarge {
1837 let offset = r.stream_position()?;
1838 let data = read_vec(r, product as usize)?;
1839 let extension = Extension {
1847 IntegerInfo::SUBTYPE => Ok(Record::IntegerInfo(IntegerInfo::parse(
1852 FloatInfo::SUBTYPE => Ok(Record::FloatInfo(FloatInfo::parse(
1857 VarDisplayRecord::SUBTYPE => Ok(Record::VarDisplay(VarDisplayRecord::parse(
1862 MultipleResponseRecord::SUBTYPE | 19 => Ok(Record::MultipleResponse(
1863 MultipleResponseRecord::parse(&extension, endian, |_| ())?,
1865 LongStringValueLabelRecord::SUBTYPE => Ok(Record::LongStringValueLabels(
1866 LongStringValueLabelRecord::parse(&extension, endian, |_| ())?,
1868 EncodingRecord::SUBTYPE => Ok(Record::Encoding(EncodingRecord::parse(
1873 NumberOfCasesRecord::SUBTYPE => Ok(Record::NumberOfCases(NumberOfCasesRecord::parse(
1878 x if x == TextExtensionSubtype::VariableSets as u32 => {
1879 Ok(Record::VariableSets(UnencodedString(extension.data)))
1881 x if x == TextExtensionSubtype::ProductInfo as u32 => {
1882 Ok(Record::ProductInfo(UnencodedString(extension.data)))
1884 x if x == TextExtensionSubtype::LongNames as u32 => {
1885 Ok(Record::LongNames(UnencodedString(extension.data)))
1887 x if x == TextExtensionSubtype::LongStrings as u32 => {
1888 Ok(Record::LongStrings(UnencodedString(extension.data)))
1890 x if x == TextExtensionSubtype::FileAttributes as u32 => {
1891 Ok(Record::FileAttributes(UnencodedString(extension.data)))
1893 x if x == TextExtensionSubtype::VariableAttributes as u32 => {
1894 Ok(Record::VariableAttributes(UnencodedString(extension.data)))
1896 _ => Ok(Record::OtherExtension(extension)),
1901 #[derive(Clone, Debug)]
1902 pub struct ZHeader {
1903 /// File offset to the start of the record.
1906 /// File offset to the ZLIB data header.
1907 pub zheader_offset: u64,
1909 /// File offset to the ZLIB trailer.
1910 pub ztrailer_offset: u64,
1912 /// Length of the ZLIB trailer in bytes.
1913 pub ztrailer_len: u64,
1917 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
1918 let offset = r.stream_position()?;
1919 let zheader_offset: u64 = endian.parse(read_bytes(r)?);
1920 let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
1921 let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
1932 #[derive(Clone, Debug)]
1933 pub struct ZTrailer {
1934 /// File offset to the start of the record.
1937 /// Compression bias as a negative integer, e.g. -100.
1940 /// Always observed as zero.
1943 /// Uncompressed size of each block, except possibly the last. Only
1944 /// `0x3ff000` has been observed so far.
1945 pub block_size: u32,
1947 /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
1948 pub blocks: Vec<ZBlock>,
1951 #[derive(Clone, Debug)]
1953 /// Offset of block of data if simple compression were used.
1954 pub uncompressed_ofs: u64,
1956 /// Actual offset within the file of the compressed data block.
1957 pub compressed_ofs: u64,
1959 /// The number of bytes in this data block after decompression. This is
1960 /// `block_size` in every data block but the last, which may be smaller.
1961 pub uncompressed_size: u32,
1963 /// The number of bytes in this data block, as stored compressed in this
1965 pub compressed_size: u32,
1969 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZBlock, Error> {
1971 uncompressed_ofs: endian.parse(read_bytes(r)?),
1972 compressed_ofs: endian.parse(read_bytes(r)?),
1973 uncompressed_size: endian.parse(read_bytes(r)?),
1974 compressed_size: endian.parse(read_bytes(r)?),
1980 fn read<R: Read + Seek>(
1985 ) -> Result<Option<ZTrailer>, Error> {
1986 let start_offset = reader.stream_position()?;
1987 if reader.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
1990 let int_bias = endian.parse(read_bytes(reader)?);
1991 let zero = endian.parse(read_bytes(reader)?);
1992 let block_size = endian.parse(read_bytes(reader)?);
1993 let n_blocks: u32 = endian.parse(read_bytes(reader)?);
1994 let expected_n_blocks = (ztrailer_len - 24) / 24;
1995 if n_blocks as u64 != expected_n_blocks {
1996 return Err(Error::BadZlibTrailerNBlocks {
1997 offset: ztrailer_ofs,
2003 let blocks = (0..n_blocks)
2004 .map(|_| ZBlock::read(reader, endian))
2005 .collect::<Result<Vec<_>, _>>()?;
2006 reader.seek(SeekFrom::Start(start_offset))?;
2008 offset: ztrailer_ofs,
2017 fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
2018 let mut buf = [0; N];
2019 let n = r.read(&mut buf)?;
2022 r.read_exact(&mut buf[n..])?;
2030 fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
2031 let mut buf = [0; N];
2032 r.read_exact(&mut buf)?;
2036 fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
2037 let mut vec = vec![0; n];
2038 r.read_exact(&mut vec)?;
2042 fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<UnencodedString, IoError> {
2043 let length: u32 = endian.parse(read_bytes(r)?);
2044 Ok(read_vec(r, length as usize)?.into())