1 #![allow(unused_variables)]
2 use endian::{Endian, Parse};
4 use std::io::{BufReader, Error as IoError, Read, Seek};
9 #[derive(Error, Debug)]
11 #[error("Not an SPSS system file")]
14 #[error("I/O error ({source})")]
20 #[error("Invalid SAV compression code {0}")]
21 InvalidSavCompression(u32),
23 #[error("Invalid ZSAV compression code {0}")]
24 InvalidZsavCompression(u32),
26 #[error("Misplaced type 4 record near offset {0:#x}.")]
27 MisplacedType4Record(u64),
29 #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
30 BadDocumentLength { offset: u64, n: u32, max: u32 },
32 #[error("At offset {offset:#x}, Unrecognized record type {rec_type}.")]
33 BadRecordType { offset: u64, rec_type: u32 },
35 #[error("At offset {offset:#x}, variable label code ({code}) is not 0 or 1.")]
36 BadVariableLabelCode { offset: u64, code: u32 },
39 "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
41 BadNumericMissingValueCode { offset: u64, code: i32 },
43 #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
44 BadStringMissingValueCode { offset: u64, code: i32 },
46 #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
47 BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
49 #[error("At offset {offset:#x}, variable index record (type 4) does not immediately follow value label record (type 3) as it should.")]
50 MissingVariableIndexRecord { offset: u64 },
52 #[error("At offset {offset:#x}, number of variables associated with a value label ({n}) is not between 1 and the number of variables ({max}).")]
53 BadNumberOfValueLabelVariables { offset: u64, n: u32, max: u32 },
56 #[derive(Error, Debug)]
58 #[error("Unexpected floating-point bias {0} or unrecognized floating-point format.")]
61 #[error("Duplicate type 6 (document) record.")]
62 DuplicateDocumentRecord,
65 #[derive(Copy, Clone, Debug)]
66 pub enum Compression {
71 pub struct Reader<R: Read> {
74 document_record: Option<DocumentRecord>,
76 variables: Vec<VariableRecord>,
78 value_labels: Vec<ValueLabelRecord>,
81 /// Magic number for a regular system file.
82 pub const ASCII_MAGIC: &[u8; 4] = b"$FL2";
84 /// Magic number for a system file that contains zlib-compressed data.
85 pub const ASCII_ZMAGIC: &[u8; 4] = b"$FL3";
87 /// Magic number for an EBDIC-encoded system file. This is `$FL2` encoded in
89 pub const EBCDIC_MAGIC: &[u8; 4] = &[0x5b, 0xc6, 0xd3, 0xf2];
91 pub struct FileHeader {
92 /// First 4 bytes of the file, one of `ASCII_MAGIC`, `ASCII_ZMAGIC`, and
96 /// True if `magic` indicates that this file contained zlib-compressed data.
99 /// True if `magic` indicates that this file contained EBCDIC data.
102 /// Endianness of the data in the file header.
103 pub endianness: Endian,
105 /// 0-based variable index of the weight variable, or `None` if the file is
107 pub weight_index: Option<u32>,
109 /// Number of variable positions, or `None` if the value in the file is
110 /// questionably trustworthy.
111 pub nominal_case_size: Option<u32>,
113 /// `dd mmm yy` in the file's encoding.
114 pub creation_date: [u8; 9],
116 /// `HH:MM:SS` in the file's encoding.
117 pub creation_time: [u8; 8],
119 /// Eye-catcher string, then product name, in the file's encoding. Padded
120 /// on the right with spaces.
121 pub eye_catcher: [u8; 60],
123 /// File label, in the file's encoding. Padded on the right with spaces.
124 pub file_label: [u8; 64],
127 pub const DOC_LINE_LEN: u32 = 80;
128 pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
130 impl<R: Read + Seek> Reader<R> {
131 pub fn new(r: R, warn: impl Fn(Warning)) -> Result<Reader<R>, Error> {
132 let mut r = BufReader::new(r);
134 let header = read_header(&mut r, &warn)?;
135 let e = header.endianness;
136 let mut document_record = None;
137 let mut variables = Vec::new();
138 let mut value_labels = Vec::new();
140 let offset = r.stream_position()?;
141 let rec_type: u32 = e.parse(read_bytes(&mut r)?);
143 2 => variables.push(read_variable_record(&mut r, e)?),
144 3 => value_labels.push(read_value_label_record(&mut r, e, variables.len())?),
145 // A Type 4 record is always immediately after a type 3 record,
146 // the code for type 3 records reads the type 4 record too.
147 4 => return Err(Error::MisplacedType4Record(offset)),
150 let d = read_document_record(&mut r, e)?;
151 if document_record.is_some() {
152 warn(Warning::DuplicateDocumentRecord);
158 7 => d.read_extension_record()?,
161 _ => return Err(Error::BadRecordType { offset, rec_type }),
174 fn read_header<R: Read>(r: &mut R, warn: impl Fn(Warning)) -> Result<FileHeader, Error> {
175 let magic: [u8; 4] = read_bytes(r)?;
176 let (is_zsav, is_ebcdic) = match &magic {
177 ASCII_MAGIC => (false, false),
178 ASCII_ZMAGIC => (true, false),
179 EBCDIC_MAGIC => (false, true),
180 _ => return Err(Error::NotASystemFile),
183 let eye_catcher: [u8; 60] = read_bytes(r)?;
184 let layout_code: [u8; 4] = read_bytes(r)?;
185 let endianness = Endian::identify_u32(2, layout_code)
186 .or_else(|| Endian::identify_u32(2, layout_code))
187 .ok_or_else(|| Error::NotASystemFile)?;
189 let nominal_case_size: u32 = endianness.parse(read_bytes(r)?);
190 let nominal_case_size =
191 (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
193 let compression_code: u32 = endianness.parse(read_bytes(r)?);
194 let compression = match (is_zsav, compression_code) {
196 (false, 1) => Some(Compression::Simple),
197 (true, 2) => Some(Compression::ZLib),
198 (false, code) => return Err(Error::InvalidSavCompression(code)),
199 (true, code) => return Err(Error::InvalidZsavCompression(code)),
202 let weight_index: u32 = endianness.parse(read_bytes(r)?);
203 let weight_index = (weight_index > 0).then_some(weight_index - 1);
205 let n_cases: u32 = endianness.parse(read_bytes(r)?);
206 let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
208 let bias: f64 = endianness.parse(read_bytes(r)?);
210 warn(Warning::UnexpectedBias(bias))
213 let creation_date: [u8; 9] = read_bytes(r)?;
214 let creation_time: [u8; 8] = read_bytes(r)?;
215 let file_label: [u8; 64] = read_bytes(r)?;
216 let _: [u8; 3] = read_bytes(r)?;
232 pub struct VariableRecord {
233 /// Offset from the start of the file to the start of the record.
236 /// Variable width, in the range -1..=255.
239 /// Variable name, padded on the right with spaces.
243 pub print_format: u32,
246 pub write_format: u32,
248 /// Missing value code, one of -3, -2, 0, 1, 2, or 3.
249 pub missing_value_code: i32,
251 /// Raw missing values, up to 3 of them.
252 pub missing: Vec<[u8; 8]>,
254 /// Optional variable label.
255 pub label: Option<Vec<u8>>,
258 fn read_variable_record<R: Read + Seek>(
259 r: &mut BufReader<R>,
261 ) -> Result<VariableRecord, Error> {
262 let offset = r.stream_position()?;
263 let width: i32 = e.parse(read_bytes(r)?);
264 let has_variable_label: u32 = e.parse(read_bytes(r)?);
265 let missing_value_code: i32 = e.parse(read_bytes(r)?);
266 let print_format: u32 = e.parse(read_bytes(r)?);
267 let write_format: u32 = e.parse(read_bytes(r)?);
268 let name: [u8; 8] = read_bytes(r)?;
270 let label = match has_variable_label {
273 let len: u32 = e.parse(read_bytes(r)?);
274 let read_len = len.min(65535) as usize;
275 let label = Some(read_vec(r, read_len)?);
277 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
278 let _ = read_vec(r, padding_bytes as usize)?;
283 return Err(Error::BadVariableLabelCode {
285 code: has_variable_label,
290 let mut missing = Vec::new();
291 if missing_value_code != 0 {
292 match (width, missing_value_code) {
293 (0, -3 | -2 | 1 | 2 | 3) => (),
295 return Err(Error::BadNumericMissingValueCode {
297 code: missing_value_code,
302 return Err(Error::BadStringMissingValueCode {
304 code: missing_value_code,
309 for _ in 0..missing_value_code.abs() {
310 missing.push(read_bytes(r)?);
326 pub struct ValueLabelRecord {
327 /// Offset from the start of the file to the start of the record.
331 pub labels: Vec<([u8; 8], Vec<u8>)>,
333 /// The 0-based indexes of the variables to which the labels are assigned.
334 pub var_indexes: Vec<u32>,
337 pub const MAX_VALUE_LABELS: u32 = u32::MAX / 8;
339 fn read_value_label_record<R: Read + Seek>(
340 r: &mut BufReader<R>,
342 n_var_records: usize,
343 ) -> Result<ValueLabelRecord, Error> {
344 let offset = r.stream_position()?;
345 let n: u32 = e.parse(read_bytes(r)?);
346 if n > MAX_VALUE_LABELS {
347 return Err(Error::BadNumberOfValueLabels {
350 max: MAX_VALUE_LABELS,
354 let mut labels = Vec::new();
356 let value: [u8; 8] = read_bytes(r)?;
357 let label_len: u8 = e.parse(read_bytes(r)?);
358 let label_len = label_len as usize;
359 let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
361 let mut label = read_vec(r, padded_len)?;
362 label.truncate(label_len);
363 labels.push((value, label));
366 let rec_type: u32 = e.parse(read_bytes(r)?);
368 return Err(Error::MissingVariableIndexRecord {
369 offset: r.stream_position()?,
373 let n_vars: u32 = e.parse(read_bytes(r)?);
374 if n_vars < 1 || n_vars as usize > n_var_records {
375 return Err(Error::BadNumberOfValueLabelVariables {
376 offset: r.stream_position()?,
378 max: n_var_records as u32,
381 let mut var_indexes = Vec::with_capacity(n_vars as usize);
383 var_indexes.push(e.parse(read_bytes(r)?));
386 Ok(ValueLabelRecord {
393 pub struct DocumentRecord {
394 /// Offset from the start of the file to the start of the record.
397 /// The document, as an array of 80-byte lines.
398 pub lines: Vec<[u8; DOC_LINE_LEN as usize]>,
401 fn read_document_record<R: Read + Seek>(
402 r: &mut BufReader<R>,
404 ) -> Result<Option<DocumentRecord>, Error> {
405 let offset = r.stream_position()?;
406 let n: u32 = e.parse(read_bytes(r)?);
409 } else if n > DOC_MAX_LINES {
410 Err(Error::BadDocumentLength {
416 let pos = r.stream_position()?;
417 let mut lines = Vec::with_capacity(n as usize);
419 let line: [u8; 80] = read_bytes(r)?;
422 Ok(Some(DocumentRecord { pos, lines }))
426 fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
427 let mut buf = [0; N];
428 r.read_exact(&mut buf)?;
432 fn read_vec<R: Read>(r: &mut BufReader<R>, n: usize) -> Result<Vec<u8>, IoError> {
433 let mut vec = vec![0; n];
434 r.read_exact(&mut vec)?;
439 fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
440 while s.last() == Some(&c) {
446 fn skip_bytes<R: Read>(r: &mut R, mut n: u64) -> Result<(), IoError> {
447 let mut buf = [0; 1024];
449 let chunk = u64::min(n, buf.len() as u64);
450 r.read_exact(&mut buf[0..chunk as usize])?;