1 #![allow(unused_variables)]
2 use endian::{Endian, Parse};
4 use std::io::{BufReader, Error as IoError, Read, Seek};
9 #[derive(Error, Debug)]
11 #[error("Not an SPSS system file")]
14 #[error("I/O error ({source})")]
20 #[error("Invalid SAV compression code {0}")]
21 InvalidSavCompression(u32),
23 #[error("Invalid ZSAV compression code {0}")]
24 InvalidZsavCompression(u32),
26 #[error("Misplaced type 4 record.")]
29 #[error("Number of document lines ({n}) must be greater than 0 and less than {max}.")]
30 BadDocumentLength { n: u32, max: u32 },
32 #[error("Unrecognized record type {0}.")]
35 #[error("Variable label indicator ({0}) is not 0 or 1.")]
36 BadVariableLabelIndicator(u32),
38 #[error("Numeric missing value indicator ({0}) is not -3, -2, 0, 1, 2, or 3.")]
39 BadNumericMissingValueIndicator(i32),
41 #[error("String missing value indicator ({0}) is not 0, 1, 2, or 3.")]
42 BadStringMissingValueIndicator(i32),
45 #[derive(Error, Debug)]
47 #[error("Unexpected floating-point bias {0} or unrecognized floating-point format.")]
50 #[error("Duplicate type 6 (document) record.")]
51 DuplicateDocumentRecord,
54 #[derive(Copy, Clone, Debug)]
55 pub enum Compression {
60 pub struct Reader<R: Read> {
63 document_record: Option<DocumentRecord>,
66 /// Magic number for a regular system file.
67 pub const ASCII_MAGIC: &[u8; 4] = b"$FL2";
69 /// Magic number for a system file that contains zlib-compressed data.
70 pub const ASCII_ZMAGIC: &[u8; 4] = b"$FL3";
72 /// Magic number for an EBDIC-encoded system file. This is `$FL2` encoded in
74 pub const EBCDIC_MAGIC: &[u8; 4] = &[0x5b, 0xc6, 0xd3, 0xf2];
76 pub struct FileHeader {
77 /// First 4 bytes of the file, one of `ASCII_MAGIC`, `ASCII_ZMAGIC`, and
81 /// True if `magic` indicates that this file contained zlib-compressed data.
84 /// True if `magic` indicates that this file contained EBCDIC data.
87 /// Endianness of the data in the file header.
88 pub endianness: Endian,
90 /// 0-based variable index of the weight variable, or `None` if the file is
92 pub weight_index: Option<u32>,
94 /// Number of variable positions, or `None` if the value in the file is
95 /// questionably trustworthy.
96 pub nominal_case_size: Option<u32>,
98 /// `dd mmm yy` in the file's encoding.
99 pub creation_date: [u8; 9],
101 /// `HH:MM:SS` in the file's encoding.
102 pub creation_time: [u8; 8],
104 /// Eye-catcher string, then product name, in the file's encoding. Padded
105 /// on the right with spaces.
106 pub eye_catcher: [u8; 60],
108 /// File label, in the file's encoding. Padded on the right with spaces.
109 pub file_label: [u8; 64],
112 pub const DOC_LINE_LEN: u32 = 80;
113 pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
115 impl<R: Read + Seek> Reader<R> {
116 pub fn new(r: R, warn: impl Fn(Warning)) -> Result<Reader<R>, Error> {
117 let mut r = BufReader::new(r);
119 let header = read_header(&mut r, &warn)?;
120 let e = header.endianness;
121 let mut document_record = None;
122 let mut variables = Vec::new();
124 let rec_type: u32 = e.parse(read_bytes(&mut r)?);
126 2 => variables.push(read_variable_record(&mut r, e)?),
128 3 => d.read_value_label_record()?,
130 // A Type 4 record is always immediately after a type 3 record,
131 // the code for type 3 records reads the type 4 record too.
132 4 => return Err(Error::MisplacedType4Record),
135 let d = read_document_record(&mut r, e)?;
136 if document_record.is_some() {
137 warn(Warning::DuplicateDocumentRecord);
143 7 => d.read_extension_record()?,
146 _ => return Err(Error::BadRecordType(rec_type)),
150 Ok(Reader { r, document_record })
154 fn read_header<R: Read>(r: &mut R, warn: impl Fn(Warning)) -> Result<FileHeader, Error> {
155 let magic: [u8; 4] = read_bytes(r)?;
156 let (is_zsav, is_ebcdic) = match &magic {
157 ASCII_MAGIC => (false, false),
158 ASCII_ZMAGIC => (true, false),
159 EBCDIC_MAGIC => (false, true),
160 _ => return Err(Error::NotASystemFile),
163 let eye_catcher: [u8; 60] = read_bytes(r)?;
164 let layout_code: [u8; 4] = read_bytes(r)?;
165 let endianness = Endian::identify_u32(2, layout_code)
166 .or_else(|| Endian::identify_u32(2, layout_code))
167 .ok_or_else(|| Error::NotASystemFile)?;
169 let nominal_case_size: u32 = endianness.parse(read_bytes(r)?);
170 let nominal_case_size =
171 (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
173 let compression_code: u32 = endianness.parse(read_bytes(r)?);
174 let compression = match (is_zsav, compression_code) {
176 (false, 1) => Some(Compression::Simple),
177 (true, 2) => Some(Compression::ZLib),
178 (false, code) => return Err(Error::InvalidSavCompression(code)),
179 (true, code) => return Err(Error::InvalidZsavCompression(code)),
182 let weight_index: u32 = endianness.parse(read_bytes(r)?);
183 let weight_index = (weight_index > 0).then_some(weight_index - 1);
185 let n_cases: u32 = endianness.parse(read_bytes(r)?);
186 let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
188 let bias: f64 = endianness.parse(read_bytes(r)?);
190 warn(Warning::UnexpectedBias(bias))
193 let creation_date: [u8; 9] = read_bytes(r)?;
194 let creation_time: [u8; 8] = read_bytes(r)?;
195 let file_label: [u8; 64] = read_bytes(r)?;
196 let _: [u8; 3] = read_bytes(r)?;
212 pub struct VariableRecord {
213 /// Offset from the start of the file to the start of the record.
216 /// Variable width, in the range -1..=255.
219 /// Variable name, padded on the right with spaces.
223 pub print_format: u32,
226 pub write_format: u32,
228 /// Missing value code, one of -3, -2, 0, 1, 2, or 3.
229 pub missing_value_code: i32,
231 /// Raw missing values, up to 3 of them.
232 pub missing: Vec<[u8; 8]>,
234 /// Optional variable label.
235 pub label: Option<Vec<u8>>,
238 fn read_variable_record<R: Read + Seek>(
239 r: &mut BufReader<R>,
241 ) -> Result<VariableRecord, Error> {
242 let pos = r.stream_position()?;
243 let width: i32 = e.parse(read_bytes(r)?);
244 let has_variable_label: u32 = e.parse(read_bytes(r)?);
245 let missing_value_code: i32 = e.parse(read_bytes(r)?);
246 let print_format: u32 = e.parse(read_bytes(r)?);
247 let write_format: u32 = e.parse(read_bytes(r)?);
248 let name: [u8; 8] = read_bytes(r)?;
250 let label = match has_variable_label {
253 let len: u32 = e.parse(read_bytes(r)?);
254 let read_len = len.min(65535) as usize;
255 let label = Some(read_vec(r, read_len)?);
257 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
258 let _ = read_vec(r, padding_bytes as usize)?;
262 _ => return Err(Error::BadVariableLabelIndicator(has_variable_label)),
265 let mut missing = Vec::new();
266 if missing_value_code != 0 {
267 match (width, missing_value_code) {
268 (0, -3 | -2 | 1 | 2 | 3) => (),
269 (0, _) => return Err(Error::BadNumericMissingValueIndicator(missing_value_code)),
271 (_, _) => return Err(Error::BadStringMissingValueIndicator(missing_value_code)),
274 for _ in 0..missing_value_code.abs() {
275 missing.push(read_bytes(r)?);
291 pub struct DocumentRecord {
292 /// Offset from the start of the file to the start of the record.
295 /// The document, as an array of 80-byte lines.
296 pub lines: Vec<[u8; DOC_LINE_LEN as usize]>,
299 fn read_document_record<R: Read + Seek>(
300 r: &mut BufReader<R>,
302 ) -> Result<Option<DocumentRecord>, Error> {
303 let n: u32 = e.parse(read_bytes(r)?);
306 } else if n > DOC_MAX_LINES {
307 Err(Error::BadDocumentLength {
312 let pos = r.stream_position()?;
313 let mut lines = Vec::with_capacity(n as usize);
315 let line: [u8; 80] = read_bytes(r)?;
318 Ok(Some(DocumentRecord { pos, lines }))
322 fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
323 let mut buf = [0; N];
324 r.read_exact(&mut buf)?;
328 fn read_vec<R: Read>(r: &mut BufReader<R>, n: usize) -> Result<Vec<u8>, IoError> {
329 let mut vec = vec![0; n];
330 r.read_exact(&mut vec)?;
335 fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
336 while s.last() == Some(&c) {
342 fn skip_bytes<R: Read>(r: &mut R, mut n: u64) -> Result<(), IoError> {
343 let mut buf = [0; 1024];
345 let chunk = u64::min(n, buf.len() as u64);
346 r.read_exact(&mut buf[0..chunk as usize])?;