1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 use anyhow::{anyhow, Result};
21 use std::io::prelude::*;
22 use std::io::BufReader;
23 use std::path::{Path, PathBuf};
25 /// A utility to dissect SPSS system files.
26 #[derive(Parser, Debug)]
27 #[command(author, version, about, long_about = None)]
29 /// Maximum number of cases to print.
30 #[arg(long = "data", default_value_t = 0)]
34 #[arg(required = true)]
38 fn main() -> Result<()> {
39 let Args { max_cases, files } = Args::parse();
43 Dissector::new(file)?;
48 #[derive(Copy, Clone, Debug)]
54 #[derive(Copy, Clone, Debug)]
61 trait Parse<T, const N: usize> {
62 fn parse(self, bytes: [u8; N]) -> T;
64 impl Parse<u64, 8> for Endianness {
65 fn parse(self, bytes: [u8; 8]) -> u64 {
67 BigEndian => u64::from_be_bytes(bytes),
68 LittleEndian => u64::from_le_bytes(bytes)
72 impl Parse<u32, 4> for Endianness {
73 fn parse(self, bytes: [u8; 4]) -> u32 {
75 BigEndian => u32::from_be_bytes(bytes),
76 LittleEndian => u32::from_le_bytes(bytes)
80 impl Parse<u16, 2> for Endianness {
81 fn parse(self, bytes: [u8; 2]) -> u16 {
83 BigEndian => u16::from_be_bytes(bytes),
84 LittleEndian => u16::from_le_bytes(bytes)
88 impl Parse<u8, 1> for Endianness {
89 fn parse(self, bytes: [u8; 1]) -> u8 {
91 BigEndian => u8::from_be_bytes(bytes),
92 LittleEndian => u8::from_le_bytes(bytes)
96 impl Parse<i64, 8> for Endianness {
97 fn parse(self, bytes: [u8; 8]) -> i64 {
99 BigEndian => i64::from_be_bytes(bytes),
100 LittleEndian => i64::from_le_bytes(bytes)
104 impl Parse<i32, 4> for Endianness {
105 fn parse(self, bytes: [u8; 4]) -> i32 {
107 BigEndian => i32::from_be_bytes(bytes),
108 LittleEndian => i32::from_le_bytes(bytes)
112 impl Parse<i16, 2> for Endianness {
113 fn parse(self, bytes: [u8; 2]) -> i16 {
115 BigEndian => i16::from_be_bytes(bytes),
116 LittleEndian => i16::from_le_bytes(bytes)
120 impl Parse<i8, 1> for Endianness {
121 fn parse(self, bytes: [u8; 1]) -> i8 {
123 BigEndian => i8::from_be_bytes(bytes),
124 LittleEndian => i8::from_le_bytes(bytes)
128 impl Parse<f64, 8> for Endianness {
129 fn parse(self, bytes: [u8; 8]) -> f64 {
131 BigEndian => f64::from_be_bytes(bytes),
132 LittleEndian => f64::from_le_bytes(bytes)
137 fn read_bytes<const N: usize>(r: &mut BufReader<File>) -> Result<[u8; N]> {
138 let mut buf = [0; N];
139 r.read_exact(&mut buf)?;
143 fn read_vec(r: &mut BufReader<File>, n: usize) -> Result<Vec<u8>> {
144 let mut vec = Vec::with_capacity(n);
146 r.read_exact(&mut vec)?;
151 fn read_swap(&mut self) -> Result<T>;
154 impl ReadSwap<u32> for Dissector {
155 fn read_swap(&mut self) -> Result<u32> {
156 Ok(self.endianness.parse(read_bytes(&mut self.r)?))
159 impl ReadSwap<u8> for Dissector {
160 fn read_swap(&mut self) -> Result<u8> {
161 Ok(self.endianness.parse(read_bytes(&mut self.r)?))
165 impl ReadSwap<i32> for Dissector {
166 fn read_swap(&mut self) -> Result<i32> {
167 Ok(self.endianness.parse(read_bytes(&mut self.r)?))
171 impl ReadSwap<f64> for Dissector {
172 fn read_swap(&mut self) -> Result<f64> {
173 Ok(self.endianness.parse(read_bytes(&mut self.r)?))
180 compression: Option<Compression>,
181 endianness: Endianness,
182 fp_format: Endianness,
184 n_variable_records: usize,
186 var_widths: Vec<i32>,
189 fn detect_endianness(layout_code: [u8; 4]) -> Option<Endianness> {
190 for endianness in [BigEndian, LittleEndian] {
191 match endianness.parse(layout_code) {
192 2 | 3 => return Some(endianness),
199 fn detect_fp_format(bias: [u8; 8]) -> Option<Endianness> {
200 for endianness in [BigEndian, LittleEndian] {
201 let value: f64 = endianness.parse(bias);
203 return Some(endianness)
209 fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
210 while s.last() == Some(&c) {
216 fn format_name(type_: u32) -> &'static str {
259 fn round_up<T: Num + Copy>(x: T, y: T) -> T
261 (x + (y - T::one())) / y * y
269 fn new<P: AsRef<Path>>(filename: P) -> Result<Dissector> {
270 let mut r = BufReader::new(File::open(&filename)?);
271 let filename = filename.as_ref().to_string_lossy().into_owned();
272 let rec_type: [u8; 4] = read_bytes(&mut r)?;
273 let zmagic = match &rec_type {
276 _ => Err(anyhow!("This is not an SPSS system file."))?
279 let eye_catcher: [u8; 60] = read_bytes(&mut r)?;
280 let layout_code: [u8; 4] = read_bytes(&mut r)?;
281 let endianness = detect_endianness(layout_code)
282 .ok_or_else(|| anyhow!("This is not an SPSS system file."))?;
283 let layout_code: u32 = endianness.parse(layout_code);
284 let _nominal_case_size: [u8; 4] = read_bytes(&mut r)?;
285 let compressed: u32 = endianness.parse(read_bytes(&mut r)?);
286 let compression = match (zmagic, compressed) {
288 (false, 1) => Some(Compression::Simple),
289 (true, 2) => Some(Compression::ZLib),
290 _ => Err(anyhow!("{} file header has invalid compression value {compressed}.",
291 if zmagic { "ZSAV" } else { "SAV" }))?,
294 let weight_index: u32 = endianness.parse(read_bytes(&mut r)?);
295 let n_cases: u32 = endianness.parse(read_bytes(&mut r)?);
297 let bias: [u8; 8] = read_bytes(&mut r)?;
298 let fp_format = detect_fp_format(bias)
299 .unwrap_or_else(|| { eprintln!("Compression bias is not the usual value of 100, or system file uses unrecognized floating-point format."); endianness });
300 let bias: f64 = fp_format.parse(bias);
302 let mut d = Dissector {
309 n_variable_records: 0,
311 var_widths: Vec::new(),
314 let creation_date: [u8; 9] = read_bytes(&mut d.r)?;
315 let creation_time: [u8; 8] = read_bytes(&mut d.r)?;
316 let file_label: [u8; 64] = read_bytes(&mut d.r)?;
317 let mut file_label = trim_end(Vec::from(file_label), b' ');
318 d.r.seek_relative(3)?;
320 println!("File header record:");
321 println!("{:>17}: {}", "Product name", String::from_utf8_lossy(&eye_catcher));
322 println!("{:>17}: {}", "Layout code", layout_code);
323 println!("{:>17}: {} ({})", "Compressed", compressed, match compression {
324 None => "no compression",
325 Some(Compression::Simple) => "simple compression",
326 Some(Compression::ZLib) => "ZLIB compression",
328 println!("{:>17}: {}", "Weight index", weight_index);
329 println!("{:>17}: {}", "Number of cases", n_cases);
330 println!("{:>17}: {}", "Compression bias", bias);
331 println!("{:>17}: {}", "Creation date", String::from_utf8_lossy(&creation_date));
332 println!("{:>17}: {}", "Creation time", String::from_utf8_lossy(&creation_time));
333 println!("{:>17}: \"{}\"", "File label", String::from_utf8_lossy(&file_label));
336 let rec_type: u32 = d.read_swap()?;
338 2 => d.read_variable_record()?,
339 3 => d.read_value_label_record()?,
340 4 => Err(anyhow!("Misplaced type 4 record."))?,
342 _ => Err(anyhow!("Unrecognized record type {rec_type}."))?
346 let pos = d.r.stream_position()?;
347 println!("{:08x}: end-of-dictionary record (first byte of data at {:0x})", pos, pos + 4);
352 fn read_variable_record(&mut self) -> Result<()> {
353 self.n_variable_records += 1;
354 println!("{:08x}: variable record {}", self.r.stream_position()?, self.n_variable_records);
355 let width: i32 = self.read_swap()?;
356 let has_variable_label: u32 = self.read_swap()?;
357 let missing_value_code: i32 = self.read_swap()?;
358 let print_format: u32 = self.read_swap()?;
359 let write_format: u32 = self.read_swap()?;
360 let name: [u8; 8] = read_bytes(&mut self.r)?;
361 let name: Vec<u8> = trim_end(Vec::from(name), b'\0');
364 self.n_variables += 1;
366 self.var_widths.push(width);
368 println!("\tWidth: {width} ({})", match width {
369 _ if width > 0 => "string",
370 _ if width == 0 => "numeric",
371 _ => "long string continuation record"
374 println!("\tVariable label: {has_variable_label}");
375 println!("\tMissing values code: {missing_value_code} ({})",
376 match missing_value_code {
377 0 => "no missing values",
378 1 => "one missing value",
379 2 => "two missing values",
380 3 => "three missing values",
381 -2 => "one missing value range",
382 -3 => "one missing value, one range",
385 for (which, format) in [("Print", print_format),
386 ("Worite", write_format)] {
387 let type_ = format_name(format >> 16);
388 let w = (format >> 8) & 0xff;
389 let d = format & 0xff;
390 println!("\t{which} format: {format:06x} ({type_}{w}.{d})");
392 println!("\tName: {}", String::from_utf8_lossy(&name));
394 // Read variable label.
395 match has_variable_label {
398 let offset = self.r.stream_position()?;
399 let len: u32 = self.read_swap()?;
400 let read_len = len.min(65535) as usize;
401 let label = read_vec(&mut self.r, read_len)?;
402 println!("\t{offset:08x} Variable label: \"{}\"", String::from_utf8_lossy(&label));
404 self.r.seek_relative((round_up(len, 4) - len).into())?;
406 _ => Err(anyhow!("Variable label indicator field is not 0 or 1."))?,
409 // Read missing values.
410 if missing_value_code != 0 {
411 print!("\t{:08x} Missing values:", self.r.stream_position()?);
413 let (has_range, n_individual) = match missing_value_code {
416 1 | 2 | 3 => (false, missing_value_code),
417 _ => Err(anyhow!("Numeric missing value indicator field is not -3, -2, 0, 1, 2, or 3."))?,
420 let low: f64 = self.read_swap()?;
421 let high: f64 = self.read_swap()?;
422 print!(" {low}...{high}");
424 for _i in 0..n_individual {
425 let value: f64 = self.read_swap()?;
428 } else if width > 0 {
429 if missing_value_code < 1 || missing_value_code > 3 {
430 Err(anyhow!("String missing value indicator field is not 0, 1, 2, or 3."))?;
432 for _i in 0..missing_value_code {
433 let string: [u8; 8] = read_bytes(&mut self.r)?;
434 let string: Vec<u8> = trim_end(Vec::from(string), b'\0');
435 println!(" {}", String::from_utf8_lossy(&string));
444 fn read_value_label_record(&mut self) -> Result<()> {
445 println!("{:08x}: value labels record", self.r.stream_position()?);
447 let n_labels: u32 = self.read_swap()?;
448 for _i in 0..n_labels {
449 let raw: [u8; 8] = read_bytes(&mut self.r)?;
450 let label_len: u8 = self.read_swap()?;
451 let padded_len = round_up(label_len as usize + 1, 8);
453 let mut label = read_vec(&mut self.r, padded_len)?;
454 label.truncate(label_len as usize);