1 use crate::endian::{Endian, Parse, ToBytes};
2 use crate::{CategoryLabels, Compression};
4 use flate2::read::ZlibDecoder;
7 use std::fmt::{Debug, Formatter, Result as FmtResult};
8 use std::str::from_utf8;
10 collections::VecDeque,
11 io::{Error as IoError, Read, Seek, SeekFrom},
14 use thiserror::Error as ThisError;
16 use self::state::State;
18 #[derive(ThisError, Debug)]
20 #[error("Not an SPSS system file")]
23 #[error("Invalid magic number {0:?}")]
26 #[error("I/O error ({0})")]
29 #[error("Invalid SAV compression code {0}")]
30 InvalidSavCompression(u32),
32 #[error("Invalid ZSAV compression code {0}")]
33 InvalidZsavCompression(u32),
35 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
36 BadVariableWidth { offset: u64, width: i32 },
38 #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
39 BadDocumentLength { offset: u64, n: u32, max: u32 },
41 #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
42 BadRecordType { offset: u64, rec_type: u32 },
44 #[error("At offset {offset:#x}, variable label code ({code}) is not 0 or 1.")]
45 BadVariableLabelCode { offset: u64, code: u32 },
48 "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
50 BadNumericMissingValueCode { offset: u64, code: i32 },
52 #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
53 BadStringMissingValueCode { offset: u64, code: i32 },
55 #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
56 BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
58 #[error("At offset {offset:#x}, number of variables indexes ({n}) is greater than the maximum number ({max}).")]
59 BadNumberOfVarIndexes { offset: u64, n: u32, max: u32 },
61 #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
62 ExtensionRecordTooLarge {
69 #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
77 "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
79 EofInCompressedCase { offset: u64, case_ofs: u64 },
81 #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
82 PartialCompressedCase { offset: u64, case_ofs: u64 },
84 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
85 CompressedNumberExpected { offset: u64, case_ofs: u64 },
87 #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
88 CompressedStringExpected { offset: u64, case_ofs: u64 },
90 #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
91 BadZlibTrailerNBlocks {
94 expected_n_blocks: u64,
98 #[error("At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}.")]
99 BadRecordSize { offset: u64, record: String, size: u32, expected_size: u32 },
101 #[error("At offset {offset:#x}, {record} has bad count {count} instead of the expected {expected_count}.")]
102 BadRecordCount { offset: u64, record: String, count: u32, expected_count: u32 },
104 #[error("In long string missing values record starting at offset {record_offset:#x}, value length at offset {offset:#x} is {value_len} instead of the expected 8.")]
105 BadLongMissingValueLength { record_offset: u64, offset: u64, value_len: u32 },
107 #[error("The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8.")]
108 BadEncodingName { offset: u64 },
110 #[error("Details TBD")]
114 #[derive(Clone, Debug)]
118 ValueLabel(ValueLabel),
119 VarIndexes(VarIndexes),
121 IntegerInfo(IntegerInfo),
122 FloatInfo(FloatInfo),
123 VariableSets(UnencodedString),
124 VarDisplay(VarDisplayRecord),
125 MultipleResponse(MultipleResponseRecord),
126 LongStringValueLabels(LongStringValueLabelRecord),
127 Encoding(EncodingRecord),
128 NumberOfCases(NumberOfCasesRecord),
129 ProductInfo(UnencodedString),
130 LongNames(UnencodedString),
131 LongStrings(UnencodedString),
132 FileAttributes(UnencodedString),
133 VariableAttributes(UnencodedString),
134 TextExtension(TextExtension),
135 OtherExtension(Extension),
143 fn read<R: Read + Seek>(reader: &mut R, endian: Endian) -> Result<Record, Error> {
144 let rec_type: u32 = endian.parse(read_bytes(reader)?);
146 2 => Ok(Record::Variable(Variable::read(reader, endian)?)),
147 3 => Ok(Record::ValueLabel(ValueLabel::read(reader, endian)?)),
148 4 => Ok(Record::VarIndexes(VarIndexes::read(reader, endian)?)),
149 6 => Ok(Record::Document(Document::read(reader, endian)?)),
150 7 => Ok(Extension::read(reader, endian)?),
151 999 => Ok(Record::EndOfHeaders(endian.parse(read_bytes(reader)?))),
152 _ => Err(Error::BadRecordType {
153 offset: reader.stream_position()?,
160 pub struct FallbackEncoding<'a>(&'a [u8]);
162 fn fallback_encode<'a>(s: &'a [u8]) -> Cow<'a, str> {
163 if let Ok(s) = from_utf8(s) {
166 let s: String = s.iter().map(|c| char::from(*c)).collect();
171 impl<'a> Debug for FallbackEncoding<'a> {
172 fn fmt(&self, f: &mut Formatter) -> FmtResult {
173 if let Ok(s) = from_utf8(self.0) {
174 let s = s.trim_end();
180 .map(|c| char::from(*c).escape_default())
183 let s = s.trim_end();
194 /// Eye-catcher string, product name, in the file's encoding. Padded
195 /// on the right with spaces.
196 pub eye_catcher: [u8; 60],
198 /// Layout code, normally either 2 or 3.
199 pub layout_code: u32,
201 /// Number of variable positions, or `None` if the value in the file is
202 /// questionably trustworthy.
203 pub nominal_case_size: Option<u32>,
205 /// Compression type, if any,
206 pub compression: Option<Compression>,
208 /// 0-based variable index of the weight variable, or `None` if the file is
210 pub weight_index: Option<u32>,
212 /// Claimed number of cases, if known.
213 pub n_cases: Option<u32>,
215 /// Compression bias, usually 100.0.
218 /// `dd mmm yy` in the file's encoding.
219 pub creation_date: [u8; 9],
221 /// `HH:MM:SS` in the file's encoding.
222 pub creation_time: [u8; 8],
224 /// File label, in the file's encoding. Padded on the right with spaces.
225 pub file_label: [u8; 64],
227 /// Endianness of the data in the file header.
232 fn debug_field<T: Debug>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult {
233 writeln!(f, "{name:>17}: {:?}", value)
237 impl Debug for Header {
238 fn fmt(&self, f: &mut Formatter) -> FmtResult {
239 writeln!(f, "File header record:")?;
240 self.debug_field(f, "Magic", self.magic)?;
241 self.debug_field(f, "Product name", FallbackEncoding(&self.eye_catcher))?;
242 self.debug_field(f, "Layout code", self.layout_code)?;
243 self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
244 self.debug_field(f, "Compression", self.compression)?;
245 self.debug_field(f, "Weight index", self.weight_index)?;
246 self.debug_field(f, "Number of cases", self.n_cases)?;
247 self.debug_field(f, "Compression bias", self.bias)?;
248 self.debug_field(f, "Creation date", FallbackEncoding(&self.creation_date))?;
249 self.debug_field(f, "Creation time", FallbackEncoding(&self.creation_time))?;
250 self.debug_field(f, "File label", FallbackEncoding(&self.file_label))?;
251 self.debug_field(f, "Endianness", self.endian)
256 fn read<R: Read>(r: &mut R) -> Result<Header, Error> {
257 let magic: [u8; 4] = read_bytes(r)?;
258 let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
260 let eye_catcher: [u8; 60] = read_bytes(r)?;
261 let layout_code: [u8; 4] = read_bytes(r)?;
262 let endian = Endian::identify_u32(2, layout_code)
263 .or_else(|| Endian::identify_u32(2, layout_code))
264 .ok_or_else(|| Error::NotASystemFile)?;
265 let layout_code = endian.parse(layout_code);
267 let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
268 let nominal_case_size =
269 (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
271 let compression_code: u32 = endian.parse(read_bytes(r)?);
272 let compression = match (magic, compression_code) {
273 (Magic::ZSAV, 2) => Some(Compression::ZLib),
274 (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
276 (_, 1) => Some(Compression::Simple),
277 (_, code) => return Err(Error::InvalidSavCompression(code)),
280 let weight_index: u32 = endian.parse(read_bytes(r)?);
281 let weight_index = (weight_index > 0).then(|| weight_index - 1);
283 let n_cases: u32 = endian.parse(read_bytes(r)?);
284 let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
286 let bias: f64 = endian.parse(read_bytes(r)?);
288 let creation_date: [u8; 9] = read_bytes(r)?;
289 let creation_time: [u8; 8] = read_bytes(r)?;
290 let file_label: [u8; 64] = read_bytes(r)?;
291 let _: [u8; 3] = read_bytes(r)?;
310 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
311 pub struct Magic([u8; 4]);
314 /// Magic number for a regular system file.
315 pub const SAV: Magic = Magic(*b"$FL2");
317 /// Magic number for a system file that contains zlib-compressed data.
318 pub const ZSAV: Magic = Magic(*b"$FL3");
320 /// Magic number for an EBDIC-encoded system file. This is `$FL2` encoded
322 pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]);
325 impl Debug for Magic {
326 fn fmt(&self, f: &mut Formatter) -> FmtResult {
328 &Magic::SAV => "$FL2",
329 &Magic::ZSAV => "$FL3",
330 &Magic::EBCDIC => "($FL2 in EBCDIC)",
331 _ => return write!(f, "{:?}", self.0),
337 impl TryFrom<[u8; 4]> for Magic {
340 fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
341 let magic = Magic(value);
343 Magic::SAV | Magic::ZSAV | Magic::EBCDIC => Ok(magic),
344 _ => Err(Error::BadMagic(value)),
349 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
356 fn from_width(width: i32) -> VarType {
358 0 => VarType::Number,
359 _ => VarType::String,
366 Compression, Error, Header, Record, Value, VarType, Variable, ZHeader, ZTrailer,
369 use crate::endian::Endian;
371 collections::VecDeque,
376 #[allow(clippy::type_complexity)]
377 fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
380 struct Start<R: Read + Seek> {
384 pub fn new<R: Read + Seek + 'static>(reader: R) -> Box<dyn State> {
385 Box::new(Start { reader })
388 struct CommonState<R: Read + Seek> {
392 compression: Option<Compression>,
393 var_types: Vec<VarType>,
396 impl<R: Read + Seek + 'static> State for Start<R> {
397 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
398 let header = Header::read(&mut self.reader)?;
399 let next_state = Headers(CommonState {
401 endian: header.endian,
403 compression: header.compression,
404 var_types: Vec::new(),
406 Ok(Some((Record::Header(header), Box::new(next_state))))
410 struct Headers<R: Read + Seek>(CommonState<R>);
412 impl<R: Read + Seek + 'static> State for Headers<R> {
413 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
414 let record = Record::read(&mut self.0.reader, self.0.endian)?;
416 Record::Variable(Variable { width, .. }) => {
417 self.0.var_types.push(VarType::from_width(width));
419 Record::EndOfHeaders(_) => {
420 let next_state: Box<dyn State> = match self.0.compression {
421 None => Box::new(Data(self.0)),
422 Some(Compression::Simple) => Box::new(CompressedData::new(self.0)),
423 Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)),
425 return Ok(Some((record, next_state)));
429 Ok(Some((record, self)))
433 struct ZlibHeader<R: Read + Seek>(CommonState<R>);
435 impl<R: Read + Seek + 'static> State for ZlibHeader<R> {
436 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
437 let zheader = ZHeader::read(&mut self.0.reader, self.0.endian)?;
438 Ok(Some((Record::ZHeader(zheader), self)))
442 struct ZlibTrailer<R: Read + Seek>(CommonState<R>, ZHeader);
444 impl<R: Read + Seek + 'static> State for ZlibTrailer<R> {
445 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
446 let retval = ZTrailer::read(
449 self.1.ztrailer_offset,
452 let next_state = Box::new(CompressedData::new(CommonState {
453 reader: ZlibDecodeMultiple::new(self.0.reader),
454 endian: self.0.endian,
456 compression: self.0.compression,
457 var_types: self.0.var_types,
460 None => next_state.read(),
461 Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state))),
466 struct Data<R: Read + Seek>(CommonState<R>);
468 impl<R: Read + Seek + 'static> State for Data<R> {
469 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
470 match Value::read_case(&mut self.0.reader, &self.0.var_types, self.0.endian)? {
472 Some(values) => Ok(Some((Record::Case(values), self))),
477 struct CompressedData<R: Read + Seek> {
478 common: CommonState<R>,
482 impl<R: Read + Seek + 'static> CompressedData<R> {
483 fn new(common: CommonState<R>) -> CompressedData<R> {
486 codes: VecDeque::new(),
491 impl<R: Read + Seek + 'static> State for CompressedData<R> {
492 fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
493 match Value::read_compressed_case(
494 &mut self.common.reader,
495 &self.common.var_types,
501 Some(values) => Ok(Some((Record::Case(values), self))),
507 #[derive(Copy, Clone)]
513 impl Debug for Value {
514 fn fmt(&self, f: &mut Formatter) -> FmtResult {
516 Value::Number(Some(number)) => write!(f, "{number:?}"),
517 Value::Number(None) => write!(f, "SYSMIS"),
518 Value::String(bytes) => write!(f, "{:?}", FallbackEncoding(bytes)),
524 fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Value, IoError> {
525 Ok(Self::from_raw(var_type, read_bytes(r)?, endian))
528 pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
530 VarType::String => Value::String(raw),
532 let number: f64 = endian.parse(raw);
533 Value::Number((number != -f64::MAX).then_some(number))
538 fn read_case<R: Read + Seek>(
540 var_types: &[VarType],
542 ) -> Result<Option<Vec<Value>>, Error> {
543 let case_start = reader.stream_position()?;
544 let mut values = Vec::with_capacity(var_types.len());
545 for (i, &var_type) in var_types.iter().enumerate() {
546 let Some(raw) = try_read_bytes(reader)? else {
550 let offset = reader.stream_position()?;
551 return Err(Error::EofInCase {
553 case_ofs: offset - case_start,
554 case_len: var_types.len() * 8,
558 values.push(Value::from_raw(var_type, raw, endian));
563 fn read_compressed_case<R: Read + Seek>(
565 var_types: &[VarType],
566 codes: &mut VecDeque<u8>,
569 ) -> Result<Option<Vec<Value>>, Error> {
570 let case_start = reader.stream_position()?;
571 let mut values = Vec::with_capacity(var_types.len());
572 for (i, &var_type) in var_types.iter().enumerate() {
574 let Some(code) = codes.pop_front() else {
575 let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
579 let offset = reader.stream_position()?;
580 return Err(Error::EofInCompressedCase {
582 case_ofs: offset - case_start,
586 codes.extend(new_codes.into_iter());
591 1..=251 => match var_type {
592 VarType::Number => break Value::Number(Some(code as f64 - bias)),
594 break Value::String(endian.to_bytes(code as f64 - bias))
601 let offset = reader.stream_position()?;
602 return Err(Error::PartialCompressedCase {
604 case_ofs: offset - case_start,
608 253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
609 254 => match var_type {
610 VarType::String => break Value::String(*b" "), // XXX EBCDIC
612 return Err(Error::CompressedStringExpected {
614 case_ofs: reader.stream_position()? - case_start,
618 255 => match var_type {
619 VarType::Number => break Value::Number(None),
621 return Err(Error::CompressedNumberExpected {
623 case_ofs: reader.stream_position()? - case_start,
635 struct ZlibDecodeMultiple<R>
639 reader: Option<ZlibDecoder<R>>,
642 impl<R> ZlibDecodeMultiple<R>
646 fn new(reader: R) -> ZlibDecodeMultiple<R> {
648 reader: Some(ZlibDecoder::new(reader)),
653 impl<R> Read for ZlibDecodeMultiple<R>
657 fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
659 match self.reader.as_mut().unwrap().read(buf)? {
661 let inner = self.reader.take().unwrap().into_inner();
662 self.reader = Some(ZlibDecoder::new(inner));
670 impl<R> Seek for ZlibDecodeMultiple<R>
674 fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
675 self.reader.as_mut().unwrap().get_mut().seek(pos)
680 state: Option<Box<dyn State>>,
684 pub fn new<R: Read + Seek + 'static>(reader: R) -> Result<Reader, Error> {
686 state: Some(state::new(reader)),
689 pub fn collect_headers(&mut self) -> Result<Vec<Record>, Error> {
690 let mut headers = Vec::new();
693 Record::EndOfHeaders(_) => break,
694 r => headers.push(r),
701 impl Iterator for Reader {
702 type Item = Result<Record, Error>;
704 fn next(&mut self) -> Option<Self::Item> {
705 match self.state.take()?.read() {
706 Ok(Some((record, next_state))) => {
707 self.state = Some(next_state);
711 Err(error) => Some(Err(error)),
716 impl FusedIterator for Reader {}
718 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
719 pub struct Format(pub u32);
721 impl Debug for Format {
722 fn fmt(&self, f: &mut Formatter) -> FmtResult {
723 let type_ = format_name(self.0 >> 16);
724 let w = (self.0 >> 8) & 0xff;
725 let d = self.0 & 0xff;
726 write!(f, "{:06x} ({type_}{w}.{d})", self.0)
730 fn format_name(type_: u32) -> Cow<'static, str> {
769 _ => return format!("<unknown format {type_}>").into()
774 pub struct MissingValues {
775 /// Individual missing values, up to 3 of them.
776 pub values: Vec<Value>,
778 /// Optional range of missing values.
779 pub range: Option<(Value, Value)>,
782 impl Debug for MissingValues {
783 fn fmt(&self, f: &mut Formatter) -> FmtResult {
784 for (i, value) in self.values.iter().enumerate() {
788 write!(f, "{value:?}")?;
791 if let Some((low, high)) = self.range {
792 if !self.values.is_empty() {
795 write!(f, "{low:?} THRU {high:?}")?;
807 fn is_empty(&self) -> bool {
808 self.values.is_empty() && self.range.is_none()
811 fn read<R: Read + Seek>(
817 ) -> Result<MissingValues, Error> {
818 let (n_values, has_range) = match (width, code) {
819 (_, 0..=3) => (code, false),
820 (0, -2) => (0, true),
821 (0, -3) => (1, true),
822 (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }),
823 (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
826 let var_type = VarType::from_width(width);
828 let mut values = Vec::new();
829 for _ in 0..n_values {
830 values.push(Value::read(r, var_type, endian)?);
832 let range = if has_range {
833 let low = Value::read(r, var_type, endian)?;
834 let high = Value::read(r, var_type, endian)?;
839 Ok(MissingValues { values, range })
844 pub struct Variable {
845 /// Offset from the start of the file to the start of the record.
848 /// Variable width, in the range -1..=255.
851 /// Variable name, padded on the right with spaces.
855 pub print_format: Format,
858 pub write_format: Format,
861 pub missing_values: MissingValues,
863 /// Optional variable label.
864 pub label: Option<UnencodedString>,
867 impl Debug for Variable {
868 fn fmt(&self, f: &mut Formatter) -> FmtResult {
875 } else if self.width == 0 {
878 "long string continuation record"
881 writeln!(f, "Print format: {:?}", self.print_format)?;
882 writeln!(f, "Write format: {:?}", self.write_format)?;
883 writeln!(f, "Name: {:?}", FallbackEncoding(&self.name))?;
884 writeln!(f, "Variable label: {:?}", self.label)?;
885 writeln!(f, "Missing values: {:?}", self.missing_values)
890 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Variable, Error> {
891 let offset = r.stream_position()?;
892 let width: i32 = endian.parse(read_bytes(r)?);
893 let has_variable_label: u32 = endian.parse(read_bytes(r)?);
894 let missing_value_code: i32 = endian.parse(read_bytes(r)?);
895 let print_format = Format(endian.parse(read_bytes(r)?));
896 let write_format = Format(endian.parse(read_bytes(r)?));
897 let name: [u8; 8] = read_bytes(r)?;
899 let label = match has_variable_label {
902 let len: u32 = endian.parse(read_bytes(r)?);
903 let read_len = len.min(65535) as usize;
904 let label = UnencodedString(read_vec(r, read_len)?);
906 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
907 let _ = read_vec(r, padding_bytes as usize)?;
912 return Err(Error::BadVariableLabelCode {
914 code: has_variable_label,
919 let missing_values = MissingValues::read(r, offset, width, missing_value_code, endian)?;
933 #[derive(Copy, Clone)]
934 pub struct UntypedValue(pub [u8; 8]);
936 impl Debug for UntypedValue {
937 fn fmt(&self, f: &mut Formatter) -> FmtResult {
938 let little: f64 = Endian::Little.parse(self.0);
939 let little = format!("{:?}", little);
940 let big: f64 = Endian::Big.parse(self.0);
941 let big = format!("{:?}", big);
942 let number = if little.len() <= big.len() {
947 write!(f, "{number}")?;
949 let string = fallback_encode(&self.0);
951 .split(|c: char| c == '\0' || c.is_control())
954 write!(f, "/\"{string}\"")?;
960 pub struct UnencodedString(Vec<u8>);
962 impl From<Vec<u8>> for UnencodedString {
963 fn from(source: Vec<u8>) -> Self {
968 impl From<&[u8]> for UnencodedString {
969 fn from(source: &[u8]) -> Self {
974 impl Debug for UnencodedString {
975 fn fmt(&self, f: &mut Formatter) -> FmtResult {
976 write!(f, "{:?}", FallbackEncoding(self.0.as_slice()))
981 pub struct ValueLabel {
982 /// Offset from the start of the file to the start of the record.
986 pub labels: Vec<(UntypedValue, UnencodedString)>,
989 impl Debug for ValueLabel {
990 fn fmt(&self, f: &mut Formatter) -> FmtResult {
991 for (value, label) in self.labels.iter() {
992 writeln!(f, "{value:?}: {label:?}")?;
999 /// Maximum number of value labels in a record.
1000 pub const MAX: u32 = u32::MAX / 8;
1002 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ValueLabel, Error> {
1003 let offset = r.stream_position()?;
1004 let n: u32 = endian.parse(read_bytes(r)?);
1005 if n > ValueLabel::MAX {
1006 return Err(Error::BadNumberOfValueLabels {
1009 max: ValueLabel::MAX,
1013 let mut labels = Vec::new();
1015 let value = UntypedValue(read_bytes(r)?);
1016 let label_len: u8 = endian.parse(read_bytes(r)?);
1017 let label_len = label_len as usize;
1018 let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
1020 let mut label = read_vec(r, padded_len - 1)?;
1021 label.truncate(label_len);
1022 labels.push((value, UnencodedString(label)));
1024 Ok(ValueLabel { offset, labels })
1029 pub struct VarIndexes {
1030 /// Offset from the start of the file to the start of the record.
1033 /// The 0-based indexes of the variable indexes.
1034 pub var_indexes: Vec<u32>,
1037 impl Debug for VarIndexes {
1038 fn fmt(&self, f: &mut Formatter) -> FmtResult {
1039 write!(f, "apply to variables")?;
1040 for var_index in self.var_indexes.iter() {
1041 write!(f, " #{var_index}")?;
1048 /// Maximum number of variable indexes in a record.
1049 pub const MAX: u32 = u32::MAX / 8;
1051 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<VarIndexes, Error> {
1052 let offset = r.stream_position()?;
1053 let n: u32 = endian.parse(read_bytes(r)?);
1054 if n > VarIndexes::MAX {
1055 return Err(Error::BadNumberOfVarIndexes {
1058 max: VarIndexes::MAX,
1061 let mut var_indexes = Vec::with_capacity(n as usize);
1063 var_indexes.push(endian.parse(read_bytes(r)?));
1073 #[derive(Clone, Debug)]
1074 pub struct Document {
1075 /// Offset from the start of the file to the start of the record.
1078 /// The document, as an array of 80-byte lines.
1079 pub lines: Vec<[u8; Document::LINE_LEN as usize]>,
1083 /// Length of a line in a document. Document lines are fixed-length and
1084 /// padded on the right with spaces.
1085 pub const LINE_LEN: u32 = 80;
1087 /// Maximum number of lines we will accept in a document. This is simply
1088 /// the maximum number that will fit in a 32-bit space.
1089 pub const MAX_LINES: u32 = i32::MAX as u32 / Self::LINE_LEN;
1091 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Document, Error> {
1092 let offset = r.stream_position()?;
1093 let n: u32 = endian.parse(read_bytes(r)?);
1095 0..=Self::MAX_LINES => Ok(Document {
1096 pos: r.stream_position()?,
1098 .map(|_| read_bytes(r))
1099 .collect::<Result<Vec<_>, _>>()?,
1101 _ => Err(Error::BadDocumentLength {
1104 max: Self::MAX_LINES,
1114 const NAME: &'static str;
1115 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
1118 trait ExtensionRecord
1123 const SIZE: Option<u32>;
1124 const COUNT: Option<u32>;
1125 const NAME: &'static str;
1126 fn parse(ext: &Extension, endian: Endian, warn: impl Fn(Error)) -> Result<Self, Error>;
1129 #[derive(Clone, Debug)]
1130 pub struct IntegerInfo {
1131 pub version: (i32, i32, i32),
1132 pub machine_code: i32,
1133 pub floating_point_rep: i32,
1134 pub compression_code: i32,
1135 pub endianness: i32,
1136 pub character_code: i32,
1139 impl ExtensionRecord for IntegerInfo {
1140 const SUBTYPE: u32 = 3;
1141 const SIZE: Option<u32> = Some(4);
1142 const COUNT: Option<u32> = Some(8);
1143 const NAME: &'static str = "integer record";
1145 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1146 ext.check_size::<Self>()?;
1148 let mut input = &ext.data[..];
1149 let data: Vec<i32> = (0..8)
1150 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1153 version: (data[0], data[1], data[2]),
1154 machine_code: data[3],
1155 floating_point_rep: data[4],
1156 compression_code: data[5],
1157 endianness: data[6],
1158 character_code: data[7],
1163 #[derive(Clone, Debug)]
1164 pub struct FloatInfo {
1170 impl ExtensionRecord for FloatInfo {
1171 const SUBTYPE: u32 = 4;
1172 const SIZE: Option<u32> = Some(8);
1173 const COUNT: Option<u32> = Some(3);
1174 const NAME: &'static str = "floating point record";
1176 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1177 ext.check_size::<Self>()?;
1179 let mut input = &ext.data[..];
1180 let data: Vec<f64> = (0..3)
1181 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1191 #[derive(Clone, Debug)]
1192 pub enum MultipleResponseType {
1194 value: UnencodedString,
1195 labels: CategoryLabels,
1199 #[derive(Clone, Debug)]
1200 pub struct MultipleResponseSet {
1201 pub name: UnencodedString,
1202 pub label: UnencodedString,
1203 pub mr_type: MultipleResponseType,
1204 pub vars: Vec<UnencodedString>,
1207 impl MultipleResponseSet {
1208 fn parse(input: &[u8]) -> Result<(MultipleResponseSet, &[u8]), Error> {
1209 let Some(equals) = input.iter().position(|&b| b == b'=') else {
1210 return Err(Error::TBD);
1212 let (name, input) = input.split_at(equals);
1213 let (mr_type, input) = match input.get(0) {
1214 Some(b'C') => (MultipleResponseType::MultipleCategory, &input[1..]),
1216 let (value, input) = parse_counted_string(&input[1..])?;
1218 MultipleResponseType::MultipleDichotomy {
1219 value: value.into(),
1220 labels: CategoryLabels::VarLabels,
1226 let Some(b' ') = input.get(1) else {
1227 return Err(Error::TBD);
1229 let input = &input[2..];
1230 let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
1231 (CategoryLabels::CountedValues, rest)
1232 } else if let Some(rest) = input.strip_prefix(b" 11 ") {
1233 (CategoryLabels::VarLabels, rest)
1235 return Err(Error::TBD);
1237 let (value, input) = parse_counted_string(input)?;
1239 MultipleResponseType::MultipleDichotomy {
1240 value: value.into(),
1246 _ => return Err(Error::TBD),
1248 let Some(b' ') = input.get(0) else {
1249 return Err(Error::TBD);
1251 let (label, mut input) = parse_counted_string(&input[1..])?;
1252 let mut vars = Vec::new();
1253 while input.get(0) == Some(&b' ') {
1254 input = &input[1..];
1255 let Some(length) = input.iter().position(|b| b" \n".contains(b)) else {
1256 return Err(Error::TBD);
1259 vars.push(input[..length].into());
1261 input = &input[length..];
1263 if input.get(0) != Some(&b'\n') {
1264 return Err(Error::TBD);
1266 while input.get(0) == Some(&b'\n') {
1267 input = &input[1..];
1270 MultipleResponseSet {
1272 label: label.into(),
1281 #[derive(Clone, Debug)]
1282 pub struct MultipleResponseRecord(Vec<MultipleResponseSet>);
1284 impl ExtensionRecord for MultipleResponseRecord {
1285 const SUBTYPE: u32 = 7;
1286 const SIZE: Option<u32> = Some(1);
1287 const COUNT: Option<u32> = None;
1288 const NAME: &'static str = "multiple response set record";
1290 fn parse(ext: &Extension, _endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1291 ext.check_size::<Self>()?;
1293 let mut input = &ext.data[..];
1294 let mut sets = Vec::new();
1295 while !input.is_empty() {
1296 let (set, rest) = MultipleResponseSet::parse(input)?;
1300 Ok(MultipleResponseRecord(sets))
1304 fn parse_counted_string(input: &[u8]) -> Result<(UnencodedString, &[u8]), Error> {
1305 let Some(space) = input.iter().position(|&b| b == b' ') else {
1306 return Err(Error::TBD);
1308 let Ok(length) = from_utf8(&input[..space]) else {
1309 return Err(Error::TBD);
1311 let Ok(length): Result<usize, _> = length.parse() else {
1312 return Err(Error::TBD);
1315 let input = &input[space + 1..];
1316 if input.len() < length {
1317 return Err(Error::TBD);
1320 let (string, rest) = input.split_at(length);
1321 Ok((string.into(), rest))
1324 pub struct ProductInfo(String);
1326 impl TextRecord for ProductInfo {
1327 const NAME: &'static str = "extra product info";
1328 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
1329 Ok(ProductInfo(input.into()))
1333 #[derive(Clone, Debug)]
1334 pub struct VarDisplayRecord(pub Vec<u32>);
1336 impl ExtensionRecord for VarDisplayRecord {
1337 const SUBTYPE: u32 = 11;
1338 const SIZE: Option<u32> = Some(4);
1339 const COUNT: Option<u32> = None;
1340 const NAME: &'static str = "variable display record";
1342 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1343 ext.check_size::<Self>()?;
1345 let mut input = &ext.data[..];
1346 let display = (0..ext.count)
1347 .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
1349 Ok(VarDisplayRecord(display))
1353 pub struct VariableSet {
1355 pub vars: Vec<String>,
1359 fn parse(input: &str) -> Result<Self, Error> {
1360 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
1361 let vars = input.split_ascii_whitespace().map(String::from).collect();
1369 pub struct VariableSetRecord(Vec<VariableSet>);
1371 impl TextRecord for VariableSetRecord {
1372 const NAME: &'static str = "variable set";
1373 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1374 let mut sets = Vec::new();
1375 for line in input.lines() {
1376 match VariableSet::parse(line) {
1377 Ok(set) => sets.push(set),
1378 Err(error) => warn(error),
1381 Ok(VariableSetRecord(sets))
1385 pub struct LongVariableName {
1386 pub short_name: String,
1387 pub long_name: String,
1390 pub struct LongVariableNameRecord(Vec<LongVariableName>);
1392 impl TextRecord for LongVariableNameRecord {
1393 const NAME: &'static str = "long variable names";
1394 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1395 let mut names = Vec::new();
1396 for pair in input.split('\t').filter(|s| !s.is_empty()) {
1397 if let Some((short_name, long_name)) = pair.split_once('=') {
1398 let name = LongVariableName {
1399 short_name: short_name.into(),
1400 long_name: long_name.into(),
1407 Ok(LongVariableNameRecord(names))
1411 pub struct VeryLongString {
1412 pub short_name: String,
1416 impl VeryLongString {
1417 fn parse(input: &str) -> Result<VeryLongString, Error> {
1418 let Some((short_name, length)) = input.split_once('=') else {
1419 return Err(Error::TBD);
1421 let length: usize = length.parse().map_err(|_| Error::TBD)?;
1423 short_name: short_name.into(),
1429 pub struct VeryLongStringRecord(Vec<VeryLongString>);
1431 impl TextRecord for VeryLongStringRecord {
1432 const NAME: &'static str = "very long strings";
1433 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1434 let mut very_long_strings = Vec::new();
1437 .map(|s| s.trim_end_matches('\t'))
1438 .filter(|s| !s.is_empty())
1440 match VeryLongString::parse(tuple) {
1441 Ok(vls) => very_long_strings.push(vls),
1442 Err(error) => warn(error),
1445 Ok(VeryLongStringRecord(very_long_strings))
1449 #[derive(Clone, Debug)]
1450 pub struct LongStringValueLabels {
1451 pub var_name: UnencodedString,
1454 /// `(value, label)` pairs, where each value is `width` bytes.
1455 pub labels: Vec<(UnencodedString, UnencodedString)>,
1458 #[derive(Clone, Debug)]
1459 pub struct LongStringValueLabelRecord(Vec<LongStringValueLabels>);
1461 impl ExtensionRecord for LongStringValueLabelRecord {
1462 const SUBTYPE: u32 = 21;
1463 const SIZE: Option<u32> = Some(1);
1464 const COUNT: Option<u32> = None;
1465 const NAME: &'static str = "long string value labels record";
1467 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1468 ext.check_size::<Self>()?;
1470 let mut input = &ext.data[..];
1471 let mut label_set = Vec::new();
1472 while !input.is_empty() {
1473 let var_name = read_string(&mut input, endian)?;
1474 let width: u32 = endian.parse(read_bytes(&mut input)?);
1475 let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
1476 let mut labels = Vec::new();
1477 for _ in 0..n_labels {
1478 let value = read_string(&mut input, endian)?;
1479 let label = read_string(&mut input, endian)?;
1480 labels.push((value, label));
1482 label_set.push(LongStringValueLabels {
1488 Ok(LongStringValueLabelRecord(label_set))
1492 pub struct LongStringMissingValues {
1494 pub var_name: UnencodedString,
1497 pub missing_values: MissingValues,
1500 pub struct LongStringMissingValueSet(Vec<LongStringMissingValues>);
1502 impl ExtensionRecord for LongStringMissingValueSet {
1503 const SUBTYPE: u32 = 22;
1504 const SIZE: Option<u32> = Some(1);
1505 const COUNT: Option<u32> = None;
1506 const NAME: &'static str = "long string missing values record";
1508 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1509 ext.check_size::<Self>()?;
1511 let mut input = &ext.data[..];
1512 let mut missing_value_set = Vec::new();
1513 while !input.is_empty() {
1514 let var_name = read_string(&mut input, endian)?;
1515 let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
1516 let value_len: u32 = endian.parse(read_bytes(&mut input)?);
1518 let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offset;
1519 return Err(Error::BadLongMissingValueLength {
1520 record_offset: ext.offset,
1525 let mut values = Vec::new();
1526 for i in 0..n_missing_values {
1527 let value: [u8; 8] = read_bytes(&mut input)?;
1528 let numeric_value: u64 = endian.parse(value);
1529 let value = if i > 0 && numeric_value == 8 {
1530 // Tolerate files written by old, buggy versions of PSPP
1531 // where we believed that the value_length was repeated
1532 // before each missing value.
1533 read_bytes(&mut input)?
1537 values.push(Value::String(value));
1539 let missing_values = MissingValues {
1543 missing_value_set.push(LongStringMissingValues {
1548 Ok(LongStringMissingValueSet(missing_value_set))
1552 #[derive(Clone, Debug)]
1553 pub struct EncodingRecord(pub String);
1555 impl ExtensionRecord for EncodingRecord {
1556 const SUBTYPE: u32 = 20;
1557 const SIZE: Option<u32> = Some(1);
1558 const COUNT: Option<u32> = None;
1559 const NAME: &'static str = "encoding record";
1561 fn parse(ext: &Extension, _endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1562 ext.check_size::<Self>()?;
1565 String::from_utf8(ext.data.clone())
1566 .map_err(|_| Error::BadEncodingName { offset: ext.offset })?,
1571 pub struct Attribute {
1573 pub values: Vec<String>,
1577 fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
1578 let Some((name, mut input)) = input.split_once('(') else {
1579 return Err(Error::TBD);
1581 let mut values = Vec::new();
1583 let Some((value, rest)) = input.split_once('\n') else {
1584 return Err(Error::TBD);
1586 if let Some(stripped) = value
1588 .and_then(|value| value.strip_suffix('\''))
1590 values.push(stripped.into());
1593 values.push(value.into());
1595 if let Some(rest) = rest.strip_prefix(')') {
1609 pub struct AttributeSet(pub Vec<Attribute>);
1614 sentinel: Option<char>,
1615 warn: &impl Fn(Error),
1616 ) -> Result<(AttributeSet, &'a str), Error> {
1617 let mut attributes = Vec::new();
1619 match input.chars().next() {
1620 None => break input,
1621 c if c == sentinel => break &input[1..],
1623 let (attribute, rest) = Attribute::parse(input, &warn)?;
1624 attributes.push(attribute);
1629 Ok((AttributeSet(attributes), rest))
1633 pub struct FileAttributeRecord(AttributeSet);
1635 impl TextRecord for FileAttributeRecord {
1636 const NAME: &'static str = "data file attributes";
1637 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1638 let (set, rest) = AttributeSet::parse(input, None, &warn)?;
1639 if !rest.is_empty() {
1642 Ok(FileAttributeRecord(set))
1646 pub struct VarAttributeSet {
1647 pub long_var_name: String,
1648 pub attributes: AttributeSet,
1651 impl VarAttributeSet {
1654 warn: &impl Fn(Error),
1655 ) -> Result<(VarAttributeSet, &'a str), Error> {
1656 let Some((long_var_name, rest)) = input.split_once(':') else {
1657 return Err(Error::TBD);
1659 let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
1662 long_var_name: long_var_name.into(),
1670 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
1672 impl TextRecord for VariableAttributeRecord {
1673 const NAME: &'static str = "variable attributes";
1674 fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1675 let mut var_attribute_sets = Vec::new();
1676 while !input.is_empty() {
1677 match VarAttributeSet::parse(input, &warn) {
1678 Ok((var_attribute, rest)) => {
1679 var_attribute_sets.push(var_attribute);
1688 Ok(VariableAttributeRecord(var_attribute_sets))
1692 #[derive(Clone, Debug)]
1693 pub struct NumberOfCasesRecord {
1694 /// Always observed as 1.
1697 /// Number of cases.
1701 impl ExtensionRecord for NumberOfCasesRecord {
1702 const SUBTYPE: u32 = 16;
1703 const SIZE: Option<u32> = Some(8);
1704 const COUNT: Option<u32> = Some(2);
1705 const NAME: &'static str = "extended number of cases record";
1707 fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
1708 ext.check_size::<Self>()?;
1710 let mut input = &ext.data[..];
1711 let one = endian.parse(read_bytes(&mut input)?);
1712 let n_cases = endian.parse(read_bytes(&mut input)?);
1714 Ok(NumberOfCasesRecord { one, n_cases })
1718 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
1719 pub enum TextExtensionSubtype {
1724 FileAttributes = 17,
1725 VariableAttributes = 18,
1728 #[derive(Clone, Debug)]
1729 pub struct TextExtension {
1730 pub subtype: TextExtensionSubtype,
1731 pub string: UnencodedString,
1734 #[derive(Clone, Debug)]
1735 pub struct Extension {
1736 /// Offset from the start of the file to the start of the record.
1742 /// Size of each data element.
1745 /// Number of data elements.
1748 /// `size * count` bytes of data.
1753 fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) {
1755 /* Implemented record types. */
1756 ExtensionType::Integer => (4, 8),
1757 ExtensionType::Float => (8, 3),
1758 ExtensionType::VarSets => (1, 0),
1759 ExtensionType::Mrsets => (1, 0),
1760 ExtensionType::ProductInfo => (1, 0),
1761 ExtensionType::Display => (4, 0),
1762 ExtensionType::LongNames => (1, 0),
1763 ExtensionType::LongStrings => (1, 0),
1764 ExtensionType::Ncases => (8, 2),
1765 ExtensionType::FileAttrs => (1, 0),
1766 ExtensionType::VarAttrs => (1, 0),
1767 ExtensionType::Mrsets2 => (1, 0),
1768 ExtensionType::Encoding => (1, 0),
1769 ExtensionType::LongLabels => (1, 0),
1770 ExtensionType::LongMissing => (1, 0),
1772 /* Ignored record types. */
1773 ExtensionType::Date => (0, 0),
1774 ExtensionType::DataEntry => (0, 0),
1775 ExtensionType::Dataview => (0, 0),
1781 fn check_size<E: ExtensionRecord>(&self) -> Result<(), Error> {
1782 if let Some(expected_size) = E::SIZE {
1783 if self.size != expected_size {
1784 return Err(Error::BadRecordSize {
1785 offset: self.offset,
1786 record: E::NAME.into(),
1792 if let Some(expected_count) = E::COUNT {
1793 if self.count != expected_count {
1794 return Err(Error::BadRecordCount {
1795 offset: self.offset,
1796 record: E::NAME.into(),
1805 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
1806 let subtype = endian.parse(read_bytes(r)?);
1807 let offset = r.stream_position()?;
1808 let size: u32 = endian.parse(read_bytes(r)?);
1809 let count = endian.parse(read_bytes(r)?);
1810 let Some(product) = size.checked_mul(count) else {
1811 return Err(Error::ExtensionRecordTooLarge {
1818 let offset = r.stream_position()?;
1819 let data = read_vec(r, product as usize)?;
1820 let extension = Extension {
1828 IntegerInfo::SUBTYPE => Ok(Record::IntegerInfo(IntegerInfo::parse(
1833 FloatInfo::SUBTYPE => Ok(Record::FloatInfo(FloatInfo::parse(
1838 VarDisplayRecord::SUBTYPE => Ok(Record::VarDisplay(VarDisplayRecord::parse(
1843 MultipleResponseRecord::SUBTYPE | 19 => Ok(Record::MultipleResponse(
1844 MultipleResponseRecord::parse(&extension, endian, |_| ())?,
1846 LongStringValueLabelRecord::SUBTYPE => Ok(Record::LongStringValueLabels(
1847 LongStringValueLabelRecord::parse(&extension, endian, |_| ())?,
1849 EncodingRecord::SUBTYPE => Ok(Record::Encoding(EncodingRecord::parse(
1854 NumberOfCasesRecord::SUBTYPE => Ok(Record::NumberOfCases(NumberOfCasesRecord::parse(
1859 x if x == TextExtensionSubtype::VariableSets as u32 => {
1860 Ok(Record::VariableSets(UnencodedString(extension.data)))
1862 x if x == TextExtensionSubtype::ProductInfo as u32 => {
1863 Ok(Record::ProductInfo(UnencodedString(extension.data)))
1865 x if x == TextExtensionSubtype::LongNames as u32 => {
1866 Ok(Record::LongNames(UnencodedString(extension.data)))
1868 x if x == TextExtensionSubtype::LongStrings as u32 => {
1869 Ok(Record::LongStrings(UnencodedString(extension.data)))
1871 x if x == TextExtensionSubtype::FileAttributes as u32 => {
1872 Ok(Record::FileAttributes(UnencodedString(extension.data)))
1874 x if x == TextExtensionSubtype::VariableAttributes as u32 => {
1875 Ok(Record::VariableAttributes(UnencodedString(extension.data)))
1877 _ => Ok(Record::OtherExtension(extension)),
1882 #[derive(Clone, Debug)]
1883 pub struct ZHeader {
1884 /// File offset to the start of the record.
1887 /// File offset to the ZLIB data header.
1888 pub zheader_offset: u64,
1890 /// File offset to the ZLIB trailer.
1891 pub ztrailer_offset: u64,
1893 /// Length of the ZLIB trailer in bytes.
1894 pub ztrailer_len: u64,
1898 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
1899 let offset = r.stream_position()?;
1900 let zheader_offset: u64 = endian.parse(read_bytes(r)?);
1901 let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
1902 let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
1913 #[derive(Clone, Debug)]
1914 pub struct ZTrailer {
1915 /// File offset to the start of the record.
1918 /// Compression bias as a negative integer, e.g. -100.
1921 /// Always observed as zero.
1924 /// Uncompressed size of each block, except possibly the last. Only
1925 /// `0x3ff000` has been observed so far.
1926 pub block_size: u32,
1928 /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
1929 pub blocks: Vec<ZBlock>,
1932 #[derive(Clone, Debug)]
1934 /// Offset of block of data if simple compression were used.
1935 pub uncompressed_ofs: u64,
1937 /// Actual offset within the file of the compressed data block.
1938 pub compressed_ofs: u64,
1940 /// The number of bytes in this data block after decompression. This is
1941 /// `block_size` in every data block but the last, which may be smaller.
1942 pub uncompressed_size: u32,
1944 /// The number of bytes in this data block, as stored compressed in this
1946 pub compressed_size: u32,
1950 fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZBlock, Error> {
1952 uncompressed_ofs: endian.parse(read_bytes(r)?),
1953 compressed_ofs: endian.parse(read_bytes(r)?),
1954 uncompressed_size: endian.parse(read_bytes(r)?),
1955 compressed_size: endian.parse(read_bytes(r)?),
1961 fn read<R: Read + Seek>(
1966 ) -> Result<Option<ZTrailer>, Error> {
1967 let start_offset = reader.stream_position()?;
1968 if reader.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
1971 let int_bias = endian.parse(read_bytes(reader)?);
1972 let zero = endian.parse(read_bytes(reader)?);
1973 let block_size = endian.parse(read_bytes(reader)?);
1974 let n_blocks: u32 = endian.parse(read_bytes(reader)?);
1975 let expected_n_blocks = (ztrailer_len - 24) / 24;
1976 if n_blocks as u64 != expected_n_blocks {
1977 return Err(Error::BadZlibTrailerNBlocks {
1978 offset: ztrailer_ofs,
1984 let blocks = (0..n_blocks)
1985 .map(|_| ZBlock::read(reader, endian))
1986 .collect::<Result<Vec<_>, _>>()?;
1987 reader.seek(SeekFrom::Start(start_offset))?;
1989 offset: ztrailer_ofs,
1998 fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
1999 let mut buf = [0; N];
2000 let n = r.read(&mut buf)?;
2003 r.read_exact(&mut buf[n..])?;
2011 fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
2012 let mut buf = [0; N];
2013 r.read_exact(&mut buf)?;
2017 fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
2018 let mut vec = vec![0; n];
2019 r.read_exact(&mut vec)?;
2023 fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<UnencodedString, IoError> {
2024 let length: u32 = endian.parse(read_bytes(r)?);
2025 Ok(read_vec(r, length as usize)?.into())