1 /* PSPP - a program for statistical analysis.
2 * Copyright (C) 2023 Free Software Foundation, Inc.
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 use anyhow::{anyhow, Result};
20 use hexplay::HexViewBuilder;
22 use std::cmp::Ordering;
23 use std::collections::VecDeque;
26 use std::io::prelude::*;
27 use std::io::BufReader;
28 use std::io::ErrorKind;
29 use std::path::{Path, PathBuf};
33 use hexfloat::HexFloat;
35 /// A utility to dissect SPSS system files.
36 #[derive(Parser, Debug)]
37 #[command(author, version, about, long_about = None)]
39 /// Maximum number of cases to print.
40 #[arg(long = "data", default_value_t = 0)]
44 #[arg(required = true)]
48 fn main() -> Result<()> {
49 let Args { max_cases, files } = Args::parse();
52 Dissector::new(file, max_cases)?;
57 #[derive(Copy, Clone, Debug)]
63 #[derive(Copy, Clone, Debug)]
70 trait Parse<T, const N: usize> {
71 fn parse(self, bytes: [u8; N]) -> T;
73 impl Parse<u64, 8> for Endianness {
74 fn parse(self, bytes: [u8; 8]) -> u64 {
76 BigEndian => u64::from_be_bytes(bytes),
77 LittleEndian => u64::from_le_bytes(bytes),
81 impl Parse<u32, 4> for Endianness {
82 fn parse(self, bytes: [u8; 4]) -> u32 {
84 BigEndian => u32::from_be_bytes(bytes),
85 LittleEndian => u32::from_le_bytes(bytes),
89 impl Parse<u16, 2> for Endianness {
90 fn parse(self, bytes: [u8; 2]) -> u16 {
92 BigEndian => u16::from_be_bytes(bytes),
93 LittleEndian => u16::from_le_bytes(bytes),
97 impl Parse<u8, 1> for Endianness {
98 fn parse(self, bytes: [u8; 1]) -> u8 {
100 BigEndian => u8::from_be_bytes(bytes),
101 LittleEndian => u8::from_le_bytes(bytes),
105 impl Parse<i64, 8> for Endianness {
106 fn parse(self, bytes: [u8; 8]) -> i64 {
108 BigEndian => i64::from_be_bytes(bytes),
109 LittleEndian => i64::from_le_bytes(bytes),
113 impl Parse<i32, 4> for Endianness {
114 fn parse(self, bytes: [u8; 4]) -> i32 {
116 BigEndian => i32::from_be_bytes(bytes),
117 LittleEndian => i32::from_le_bytes(bytes),
121 impl Parse<i16, 2> for Endianness {
122 fn parse(self, bytes: [u8; 2]) -> i16 {
124 BigEndian => i16::from_be_bytes(bytes),
125 LittleEndian => i16::from_le_bytes(bytes),
129 impl Parse<i8, 1> for Endianness {
130 fn parse(self, bytes: [u8; 1]) -> i8 {
132 BigEndian => i8::from_be_bytes(bytes),
133 LittleEndian => i8::from_le_bytes(bytes),
137 impl Parse<f64, 8> for Endianness {
138 fn parse(self, bytes: [u8; 8]) -> f64 {
140 BigEndian => f64::from_be_bytes(bytes),
141 LittleEndian => f64::from_le_bytes(bytes),
146 fn read_bytes<const N: usize>(r: &mut BufReader<File>) -> Result<[u8; N]> {
147 let mut buf = [0; N];
148 r.read_exact(&mut buf)?;
152 fn read_vec(r: &mut BufReader<File>, n: usize) -> Result<Vec<u8>> {
153 let mut vec = vec![0; n];
154 r.read_exact(&mut vec)?;
159 fn read_swap(&mut self) -> Result<T>;
162 impl ReadSwap<u8> for Dissector {
163 fn read_swap(&mut self) -> Result<u8> {
164 Ok(self.endianness.parse(read_bytes(&mut self.r)?))
167 impl ReadSwap<u32> for Dissector {
168 fn read_swap(&mut self) -> Result<u32> {
169 Ok(self.endianness.parse(read_bytes(&mut self.r)?))
172 impl ReadSwap<u64> for Dissector {
173 fn read_swap(&mut self) -> Result<u64> {
174 Ok(self.endianness.parse(read_bytes(&mut self.r)?))
178 impl ReadSwap<i32> for Dissector {
179 fn read_swap(&mut self) -> Result<i32> {
180 Ok(self.endianness.parse(read_bytes(&mut self.r)?))
184 impl ReadSwap<f64> for Dissector {
185 fn read_swap(&mut self) -> Result<f64> {
186 Ok(self.endianness.parse(read_bytes(&mut self.r)?))
193 endianness: Endianness,
194 fp_format: Endianness,
196 n_variable_records: usize,
198 var_widths: Vec<i32>,
201 fn detect_endianness(layout_code: [u8; 4]) -> Option<Endianness> {
202 for endianness in [BigEndian, LittleEndian] {
203 match endianness.parse(layout_code) {
204 2 | 3 => return Some(endianness),
211 fn detect_fp_format(bias: [u8; 8]) -> Option<Endianness> {
212 for endianness in [BigEndian, LittleEndian] {
213 let value: f64 = endianness.parse(bias);
215 return Some(endianness);
221 fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
222 while s.last() == Some(&c) {
228 fn format_name(type_: u32) -> &'static str {
271 fn round_up<T: Num + Copy>(x: T, y: T) -> T {
272 (x + (y - T::one())) / y * y
275 struct UntypedValue {
277 endianness: Endianness,
281 fn new(raw: [u8; 8], endianness: Endianness) -> UntypedValue {
282 UntypedValue { raw, endianness }
286 impl fmt::Display for UntypedValue {
287 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288 let numeric: f64 = self.endianness.parse(self.raw);
289 let n_printable = self
292 .take_while(|&&x| x == b' ' || x.is_ascii_graphic())
294 let printable_prefix = std::str::from_utf8(&self.raw[0..n_printable]).unwrap();
295 write!(f, "{numeric}/\"{printable_prefix}\"")
300 fn new<P: AsRef<Path>>(filename: P, max_cases: usize) -> Result<Dissector> {
301 let mut r = BufReader::new(File::open(&filename)?);
302 let filename = filename.as_ref().to_string_lossy().into_owned();
303 let rec_type: [u8; 4] = read_bytes(&mut r)?;
304 let zmagic = match &rec_type {
307 _ => Err(anyhow!("This is not an SPSS system file."))?,
310 let eye_catcher: [u8; 60] = read_bytes(&mut r)?;
311 let layout_code: [u8; 4] = read_bytes(&mut r)?;
312 let endianness = detect_endianness(layout_code)
313 .ok_or_else(|| anyhow!("This is not an SPSS system file."))?;
314 let layout_code: u32 = endianness.parse(layout_code);
315 let _nominal_case_size: [u8; 4] = read_bytes(&mut r)?;
316 let compressed: u32 = endianness.parse(read_bytes(&mut r)?);
317 let compression = match (zmagic, compressed) {
319 (false, 1) => Some(Compression::Simple),
320 (true, 2) => Some(Compression::ZLib),
322 "{} file header has invalid compression value {compressed}.",
323 if zmagic { "ZSAV" } else { "SAV" }
327 let weight_index: u32 = endianness.parse(read_bytes(&mut r)?);
328 let n_cases: u32 = endianness.parse(read_bytes(&mut r)?);
330 let bias: [u8; 8] = read_bytes(&mut r)?;
331 let fp_format = detect_fp_format(bias)
332 .unwrap_or_else(|| { eprintln!("Compression bias is not the usual value of 100, or system file uses unrecognized floating-point format."); endianness });
333 let bias: f64 = fp_format.parse(bias);
335 let mut d = Dissector {
341 n_variable_records: 0,
343 var_widths: Vec::new(),
346 let creation_date: [u8; 9] = read_bytes(&mut d.r)?;
347 let creation_time: [u8; 8] = read_bytes(&mut d.r)?;
348 let file_label: [u8; 64] = read_bytes(&mut d.r)?;
349 let file_label = trim_end(Vec::from(file_label), b' ');
352 println!("File header record:");
356 String::from_utf8_lossy(&eye_catcher)
358 println!("{:>17}: {}", "Layout code", layout_code);
364 None => "no compression",
365 Some(Compression::Simple) => "simple compression",
366 Some(Compression::ZLib) => "ZLIB compression",
369 println!("{:>17}: {}", "Weight index", weight_index);
370 println!("{:>17}: {}", "Number of cases", n_cases);
371 println!("{:>17}: {}", "Compression bias", bias);
375 String::from_utf8_lossy(&creation_date)
380 String::from_utf8_lossy(&creation_time)
385 String::from_utf8_lossy(&file_label)
389 let rec_type: u32 = d.read_swap()?;
391 2 => d.read_variable_record()?,
392 3 => d.read_value_label_record()?,
393 4 => Err(anyhow!("Misplaced type 4 record."))?,
394 6 => d.read_document_record()?,
395 7 => d.read_extension_record()?,
397 _ => Err(anyhow!("Unrecognized record type {rec_type}."))?,
401 let pos = d.r.stream_position()?;
403 "{:08x}: end-of-dictionary record (first byte of data at {:0x})",
409 Some(Compression::Simple) => {
411 d.read_simple_compressed_data(max_cases)?;
414 Some(Compression::ZLib) => d.read_zlib_compressed_data()?,
421 fn read_simple_compressed_data(&mut self, max_cases: usize) -> Result<()> {
422 let _: i32 = self.read_swap()?;
423 println!("\n{:08x}: compressed data:", self.r.stream_position()?);
425 const N_OPCODES: usize = 8;
426 let mut opcodes = VecDeque::<u8>::with_capacity(8);
427 let mut opcode_ofs = 0;
428 for case_num in 0..max_cases {
430 "{:08x}: case {case_num}'s uncompressible data begins",
431 self.r.stream_position()?
434 while i < self.var_widths.len() {
435 let width = self.var_widths[i];
437 let opcode_idx = N_OPCODES - opcodes.len();
438 let Some(opcode) = opcodes.pop_back() else {
439 opcode_ofs = self.r.stream_position()?;
440 let mut new_opcodes = [0; N_OPCODES];
441 if let Err(error) = self.r.read_exact(&mut new_opcodes) {
442 if i == 0 && error.kind() == ErrorKind::UnexpectedEof {
445 return Err(error.into());
448 opcodes.extend(new_opcodes.into_iter());
453 "{:08x}: variable {i}: opcode {opcode}: ",
454 opcode_ofs + opcode_idx as u64
457 0 => println!("ignored padding"),
459 println!("end of data");
463 let raw: [u8; 8] = read_bytes(&mut self.r)?;
464 let value = UntypedValue::new(raw, self.fp_format);
465 println!("uncompressible data: {value}");
471 print!(", but this is a numeric variable");
479 print!(", but this is a string variable (width={width})");
485 print!("{}", opcode as f64 - self.bias);
487 print!(", but this is a string variable (width={width})");
498 fn read_zlib_compressed_data(&mut self) -> Result<()> {
499 let _: i32 = self.read_swap()?;
500 let ofs = self.r.stream_position()?;
501 println!("\n{ofs:08x}: ZLIB compressed data header:");
503 let this_ofs: u64 = self.read_swap()?;
504 let next_ofs: u64 = self.read_swap()?;
505 let next_len: u64 = self.read_swap()?;
507 println!("\theader_ofs: {this_ofs:#x}");
509 println!("\t\t(Expected {ofs:#x}.)");
511 println!("\ttrailer_ofs: {next_ofs:#x}");
512 println!("\ttrailer_len: {next_len}");
513 if next_len < 24 || next_len % 24 != 0 {
514 println!("\t\t(Trailer length is not positive multiple of 24.)");
517 let zlib_data_len = next_ofs - (ofs + 8 * 3);
519 "\n{:08x}: {zlib_data_len:#x} bytes of ZLIB compressed data",
523 self.skip_bytes(zlib_data_len)?;
525 println!("\n{next_ofs:08x}: ZLIB trailer fixed header");
526 let bias: u64 = self.read_swap()?;
527 let zero: u64 = self.read_swap()?;
528 let block_size: u32 = self.read_swap()?;
529 let n_blocks: u32 = self.read_swap()?;
530 println!("\tbias: {bias}");
531 println!("\tzero: {zero:#x}");
533 println!("\t\t(Expected 0.)");
535 println!("\tblock size: {block_size:#x}");
536 if block_size != 0x3ff000 {
537 println!("\t\t(Expected 0x3ff000.)");
539 println!("\tn_blocks: {n_blocks}");
540 if n_blocks as u64 != next_len / 24 - 1 {
541 println!("\t\t(Expected {}.)", next_len / 24 - 1);
544 let mut expected_uncmp_ofs = ofs;
545 let mut expected_cmp_ofs = ofs + 24;
546 for i in 1..=n_blocks {
547 let blockinfo_ofs = self.r.stream_position()?;
548 let uncompressed_ofs: u64 = self.read_swap()?;
549 let compressed_ofs: u64 = self.read_swap()?;
550 let uncompressed_size: u32 = self.read_swap()?;
551 let compressed_size: u32 = self.read_swap()?;
553 println!("\n{blockinfo_ofs:08x}: ZLIB block descriptor {i}");
555 println!("\tuncompressed_ofs: {uncompressed_ofs:#x}");
556 if uncompressed_ofs != expected_uncmp_ofs {
557 println!("\t\t(Expected {ofs:#x}.)");
560 println!("\tcompressed_ofs: {compressed_ofs:#x}");
561 if compressed_ofs != expected_cmp_ofs {
562 println!("\t\t(Expected {expected_cmp_ofs:#x}.)");
565 println!("\tuncompressed_size: {uncompressed_size:#x}");
566 if i < n_blocks && uncompressed_size != block_size {
567 println!("\t\t(Expected {block_size:#x}.)");
570 println!("\tcompressed_size: {compressed_size:#x}");
571 if i == n_blocks && compressed_ofs.checked_add(compressed_size as u64) != Some(next_ofs)
574 "\t\t(This was expected to be {:#x}.)",
575 next_ofs - compressed_size as u64
579 expected_uncmp_ofs += uncompressed_size as u64;
580 expected_cmp_ofs += uncompressed_size as u64;
585 fn read_extension_record(&mut self) -> Result<()> {
586 let offset = self.r.stream_position()?;
587 let subtype: u32 = self.read_swap()?;
588 let size: u32 = self.read_swap()?;
589 let count: u32 = self.read_swap()?;
590 println!("{offset:08x}: Record 7, subtype {subtype}, size={size}, count={count}");
592 3 => self.read_machine_integer_info(size, count),
593 4 => self.read_machine_float_info(size, count),
594 5 => self.read_variable_sets(size, count),
596 // DATE variable information. We don't use it yet, but we should.
599 7 | 19 => self.read_mrsets(size, count),
600 10 => self.read_extra_product_info(size, count),
601 11 => self.read_display_parameters(size, count),
602 13 => self.read_long_string_map(size, count),
603 _ => self.read_unknown_extension(subtype, size, count),
607 fn warn(&mut self, s: String) -> Result<()> {
609 "\"{}\" near offset 0x{:08x}: {s}",
611 self.r.stream_position()?
616 fn skip_bytes(&mut self, mut n: u64) -> Result<()> {
617 let mut buf = [0; 1024];
619 let chunk = u64::min(n, buf.len() as u64);
620 self.r.read_exact(&mut buf[0..chunk as usize])?;
626 fn read_unknown_extension(&mut self, subtype: u32, size: u32, count: u32) -> Result<()> {
627 self.warn(format!("Unrecognized record type 7, subtype {subtype}."))?;
628 if size == 0 || count > 65536 / size {
629 self.skip_bytes(size as u64 * count as u64)?;
630 } else if size != 1 {
633 let vec = read_vec(&mut self.r, size as usize)?;
636 HexViewBuilder::new(&vec).address_offset(offset).finish()
638 offset += size as usize;
644 fn read_variable_record(&mut self) -> Result<()> {
645 self.n_variable_records += 1;
647 "{:08x}: variable record {}",
648 self.r.stream_position()?,
649 self.n_variable_records
651 let width: i32 = self.read_swap()?;
652 let has_variable_label: u32 = self.read_swap()?;
653 let missing_value_code: i32 = self.read_swap()?;
654 let print_format: u32 = self.read_swap()?;
655 let write_format: u32 = self.read_swap()?;
656 let name: [u8; 8] = read_bytes(&mut self.r)?;
657 let name: Vec<u8> = trim_end(Vec::from(name), b'\0');
660 self.n_variables += 1;
662 self.var_widths.push(width);
665 "\tWidth: {width} ({})",
667 _ if width > 0 => "string",
668 _ if width == 0 => "numeric",
669 _ => "long string continuation record",
673 println!("\tVariable label: {has_variable_label}");
675 "\tMissing values code: {missing_value_code} ({})",
676 match missing_value_code {
677 0 => "no missing values",
678 1 => "one missing value",
679 2 => "two missing values",
680 3 => "three missing values",
681 -2 => "one missing value range",
682 -3 => "one missing value, one range",
686 for (which, format) in [("Print", print_format), ("Worite", write_format)] {
687 let type_ = format_name(format >> 16);
688 let w = (format >> 8) & 0xff;
689 let d = format & 0xff;
690 println!("\t{which} format: {format:06x} ({type_}{w}.{d})");
692 println!("\tName: {}", String::from_utf8_lossy(&name));
694 // Read variable label.
695 match has_variable_label {
698 let offset = self.r.stream_position()?;
699 let len: u32 = self.read_swap()?;
700 let read_len = len.min(65535) as usize;
701 let label = read_vec(&mut self.r, read_len)?;
703 "\t{offset:08x} Variable label: \"{}\"",
704 String::from_utf8_lossy(&label)
707 self.skip_bytes((round_up(len, 4) - len).into())?;
709 _ => Err(anyhow!("Variable label indicator field is not 0 or 1."))?,
712 // Read missing values.
713 if missing_value_code != 0 {
714 print!("\t{:08x} Missing values:", self.r.stream_position()?);
715 match width.cmp(&0) {
717 let (has_range, n_individual) = match missing_value_code {
720 1 | 2 | 3 => (false, missing_value_code),
722 "Numeric missing value indicator field is not -3, -2, 0, 1, 2, or 3."
726 let low: f64 = self.read_swap()?;
727 let high: f64 = self.read_swap()?;
728 print!(" {low}...{high}");
730 for _ in 0..n_individual {
731 let value: f64 = self.read_swap()?;
735 Ordering::Greater => {
736 if !(0..=3).contains(&missing_value_code) {
738 "String missing value indicator field is not 0, 1, 2, or 3."
741 for _ in 0..missing_value_code {
742 let string: [u8; 8] = read_bytes(&mut self.r)?;
743 let string: Vec<u8> = trim_end(Vec::from(string), b'\0');
744 println!(" {}", String::from_utf8_lossy(&string));
747 Ordering::Less => (),
755 fn read_value_label_record(&mut self) -> Result<()> {
756 println!("{:08x}: value labels record", self.r.stream_position()?);
759 let n_labels: u32 = self.read_swap()?;
760 for _ in 0..n_labels {
761 let raw: [u8; 8] = read_bytes(&mut self.r)?;
762 let value = UntypedValue::new(raw, self.fp_format);
763 let label_len: u8 = self.read_swap()?;
764 let padded_len = round_up(label_len as usize + 1, 8);
766 let mut label = read_vec(&mut self.r, padded_len)?;
767 label.truncate(label_len as usize);
768 let label = String::from_utf8_lossy(&label);
770 println!("\t{value}: {label}");
773 // Read the type-4 record with the corresponding variable indexes.
774 let rec_type: u32 = self.read_swap()?;
777 "Variable index record (type 4) does not immediately \
778 follow value label record (type 3) as it should."
782 println!("\t{:08x}: apply to variables", self.r.stream_position()?);
783 let n_vars: u32 = self.read_swap()?;
785 let index: u32 = self.read_swap()?;
793 fn read_document_record(&mut self) -> Result<()> {
794 println!("{:08x}: document record", self.r.stream_position()?);
795 let n_lines: u32 = self.read_swap()?;
796 println!("\t{n_lines} lines of documents");
798 for i in 0..n_lines {
799 print!("\t{:08x}: ", self.r.stream_position()?);
800 let line: [u8; 64] = read_bytes(&mut self.r)?;
801 let line = trim_end(Vec::from(line), b' ');
802 println!("line {i}: \"{}\"", String::from_utf8_lossy(&line));
807 fn read_machine_integer_info(&mut self, size: u32, count: u32) -> Result<()> {
808 let offset = self.r.stream_position()?;
809 let version_major: u32 = self.read_swap()?;
810 let version_minor: u32 = self.read_swap()?;
811 let version_revision: u32 = self.read_swap()?;
812 let machine_code: u32 = self.read_swap()?;
813 let float_representation: u32 = self.read_swap()?;
814 let compression_code: u32 = self.read_swap()?;
815 let integer_representation: u32 = self.read_swap()?;
816 let character_code: u32 = self.read_swap()?;
818 println!("{offset:08x}: machine integer info");
819 if size != 4 || count != 8 {
821 "Bad size ({size}) or count ({count}) field on record type 7, subtype 3"
824 println!("\tVersion: {version_major}.{version_minor}.{version_revision}");
825 println!("\tMachine code: {machine_code}");
827 "\tFloating point representation: {float_representation} ({})",
828 match float_representation {
835 println!("\tCompression code: {compression_code}");
837 "\tEndianness: {integer_representation} ({})",
838 match integer_representation {
844 println!("\tCharacter code: {character_code}");
848 fn read_machine_float_info(&mut self, size: u32, count: u32) -> Result<()> {
849 let offset = self.r.stream_position()?;
850 let sysmis: f64 = self.read_swap()?;
851 let highest: f64 = self.read_swap()?;
852 let lowest: f64 = self.read_swap()?;
854 println!("{offset:08x}: machine float info");
855 if size != 4 || count != 8 {
857 "Bad size ({size}) or count ({count}) field on extension 4."
861 println!("\tsysmis: {sysmis} ({})", HexFloat(sysmis));
862 println!("\thighest: {highest} ({})", HexFloat(highest));
863 println!("\tlowest: {lowest} ({})", HexFloat(lowest));
867 fn read_variable_sets(&mut self, size: u32, count: u32) -> Result<()> {
868 println!("{:08x}: variable sets", self.r.stream_position()?);
869 let mut text = self.open_text_record(size, count)?;
871 while text.match_byte(b'\n') {
874 let set = match text.tokenize(b'=') {
875 Some(set) => String::from_utf8_lossy(set).into_owned(),
879 // Always present even for an empty set.
880 text.match_byte(b' ');
882 match text.tokenize(b'\n') {
883 None => println!("\tset \"{set}\" is empty"),
886 "\tset \"{set}\" contains \"{}\"",
887 String::from_utf8_lossy(variables).trim_end_matches('\r')
895 // Read record type 7, subtype 7.
896 fn read_mrsets(&mut self, size: u32, count: u32) -> Result<()> {
897 print!("{:08x}: multiple response sets", self.r.stream_position()?);
898 let mut text = self.open_text_record(size, count)?;
900 #[derive(PartialEq, Eq)]
906 while text.match_byte(b'\n') {}
907 let Some(name) = text.tokenize(b'=') else {
910 let name = Vec::from(name);
912 let (mrset, cat_label_from_counted_values, label_from_var_label) = if text
915 if !text.match_byte(b' ') {
917 "missing space following 'C' at offset {} in mrsets record",
921 (MrSet::MC, false, false)
922 } else if text.match_byte(b'D') {
923 (MrSet::MD, false, false)
924 } else if text.match_byte(b'E') {
925 if !text.match_byte(b' ') {
927 "missing space following 'E' at offset {} in mrsets record",
933 let Some(number) = text.tokenize(b' ') else {
935 "Missing label source value following `E' at offset {}u in MRSETS record",
940 let label_from_var_label = if number == b"11" {
942 } else if number == b"1" {
945 Err(anyhow!("Unexpected label source value `{}' following `E' at offset {pos} in MRSETS record", String::from_utf8_lossy(number)))?
947 (MrSet::MD, true, label_from_var_label)
950 "missing `C', `D', or `E' at offset {} in mrsets record",
955 let counted_value = if mrset == MrSet::MD {
956 Some(Vec::from(text.parse_counted_string()?))
961 let label = Vec::from(text.parse_counted_string()?);
963 let variables = text.tokenize(b'\n');
966 "\t\"{}\": multiple {} set",
967 String::from_utf8_lossy(&name),
968 if mrset == MrSet::MC {
974 if let Some(counted_value) = counted_value {
976 ", counted value \"{}\"",
977 String::from_utf8_lossy(&counted_value)
980 if cat_label_from_counted_values {
981 println!(", category labels from counted values");
984 print!(", label \"{}\"", String::from_utf8_lossy(&label));
986 if label_from_var_label {
987 print!(", label from variable label");
989 if let Some(variables) = variables {
990 print!(", variables \"{}\"", String::from_utf8_lossy(variables));
992 print!("no variables");
999 fn read_extra_product_info(&mut self, size: u32, count: u32) -> Result<()> {
1000 print!("{:08x}: extra product info", self.r.stream_position()?);
1001 let text = self.open_text_record(size, count)?;
1002 print_string(&text.buffer);
1006 fn read_display_parameters(&mut self, size: u32, count: u32) -> Result<()> {
1008 "{:08x}: variable display parameters",
1009 self.r.stream_position()?
1012 Err(anyhow!("Bad size ({size}) on extension 11."))?;
1014 let n_vars = self.n_variables;
1015 let includes_width = if count as usize == 3 * n_vars {
1017 } else if count as usize == 2 * n_vars {
1021 "Extension 11 has bad count {count} (for {n_vars} variables)."
1025 for i in 0..n_vars {
1026 let measure: u32 = self.read_swap()?;
1028 "\tVar #{i}: measure={measure} ({})",
1038 let width: u32 = self.read_swap()?;
1039 print!(", width={width}");
1042 let align: u32 = self.read_swap()?;
1044 ", align={align} ({})",
1056 fn read_long_string_map(&mut self, size: u32, count: u32) -> Result<()> {
1058 "{:08x}: very long strings (variable => length)",
1059 self.r.stream_position()?
1061 let mut text = self.open_text_record(size, count)?;
1062 while let Some((var, length)) = text.read_variable_to_value_pair() {
1065 String::from_utf8_lossy(&var),
1066 String::from_utf8_lossy(&length)
1072 fn read_text_record(&mut self, size: u32, count: u32) -> Result<Vec<u8>> {
1073 let Some(n_bytes) = u32::checked_mul(size, count) else {
1074 Err(anyhow!("Extension record too large."))?
1076 read_vec(&mut self.r, n_bytes as usize)
1079 fn open_text_record(&mut self, size: u32, count: u32) -> Result<TextRecord> {
1080 Ok(TextRecord::new(self.read_text_record(size, count)?))
1084 fn print_string(s: &[u8]) {
1085 if s.contains(&b'\0') {
1086 println!("{}", HexView::new(s));
1090 b'\\' => print!("\\\\"),
1091 b'\n' => println!(),
1092 c if (b' '..=b'~').contains(&c) => print!("{}", c as char),
1093 c => print!("\\{:2x}", c),
1105 fn new(buffer: Vec<u8>) -> TextRecord {
1106 TextRecord { buffer, pos: 0 }
1109 fn tokenize(&mut self, delimiter: u8) -> Option<&[u8]> {
1110 let start = self.pos;
1111 while self.pos < self.buffer.len()
1112 && self.buffer[self.pos] != delimiter
1113 && self.buffer[self.pos] != 0
1117 if start == self.pos {
1120 Some(&self.buffer[start..self.pos])
1124 fn match_byte(&mut self, c: u8) -> bool {
1125 if self.pos < self.buffer.len() && self.buffer[self.pos] == c {
1133 fn parse_usize(&mut self) -> Result<usize> {
1134 let n_digits = self.buffer[self.pos..]
1136 .take_while(|c| c.is_ascii_digit())
1139 Err(anyhow!("expecting digit at offset {} in record", self.pos))?;
1141 let start = self.pos;
1142 self.pos += n_digits;
1144 let digits = str::from_utf8(&self.buffer[start..end]).unwrap();
1145 let Ok(number) = digits.parse::<usize>() else {
1147 "expecting number in [0,{}] at offset {} in record",
1156 fn get_n_bytes(&mut self, n: usize) -> Option<(usize, usize)> {
1157 let start = self.pos;
1158 let Some(end) = start.checked_add(n) else {
1165 fn parse_counted_string(&mut self) -> Result<&[u8]> {
1166 let length = self.parse_usize()?;
1167 if !self.match_byte(b' ') {
1168 Err(anyhow!("expecting space at offset {} in record", self.pos))?;
1171 let Some((start, end)) = self.get_n_bytes(length) else {
1173 "{length}-byte string starting at offset {} exceeds record length {}",
1178 if !self.match_byte(b' ') {
1180 "expecting space at offset {} following {}-byte string",
1185 Ok(&self.buffer[start..end])
1188 fn read_variable_to_value_pair(&mut self) -> Option<(Vec<u8>, Vec<u8>)> {
1189 let key = self.tokenize(b'=')?.into();
1190 let value = self.tokenize(b'\t')?.into();
1192 while self.match_byte(b'\t') || self.match_byte(b'\0') {}