use anyhow::Result;
use clap::Parser;
-use pspp::raw::{Reader, Record};
+use pspp::raw::{Reader, Record, UnencodedStr, Value};
use std::fs::File;
use std::io::BufReader;
use std::path::{Path, PathBuf};
-use std::str;
+use std::str::{self, from_utf8};
/// A utility to dissect SPSS system files.
#[derive(Parser, Debug)]
let Args { max_cases, files } = Args::parse();
for file in files {
- dissect(&file, max_cases)?;
+ if let Err(error) = dissect(&file, max_cases) {
+ println!("{}: {error}", file.display());
+ }
}
Ok(())
}
-fn dissect(file_name: &Path, max_cases: u64) -> Result<()> {
+fn dissect(file_name: &Path, _max_cases: u64) -> Result<()> {
let reader = File::open(file_name)?;
let reader = BufReader::new(reader);
let mut reader = Reader::new(reader)?;
let records: Vec<Record> = reader.collect_headers()?;
+ let mut character_code = None;
for record in records {
- println!("{record:?}");
+ //println!("{record:?}");
+ if let Record::IntegerInfo(ref info) = record {
+ character_code = Some(info.character_code);
+ }
if let Record::EndOfHeaders(_) = record {
break;
};
}
- for _ in 0..max_cases {
- let Some(Ok(Record::Case(data))) = reader.next() else {
- break;
- };
- println!("{:?}", data);
+ if character_code != Some(65001) {
+ return Ok(());
+ }
+ let mut n = 0;
+ while let Some(Ok(Record::Case(data))) = reader.next() {
+ n += 1;
+ let mut strings = Vec::new();
+ for value in data.iter() {
+ if let Value::String(UnencodedStr(s)) = value {
+ strings.extend_from_slice(&s[..]);
+ }
+ }
+
+ let mut rest = &strings[..];
+ let mut any_errors = false;
+ while let Err(error) = from_utf8(&rest) {
+ if !any_errors {
+ print!("{}: UTF-8 error", file_name.display());
+ any_errors = true;
+ }
+ let start = error.valid_up_to();
+ let len = match error.error_len() {
+ Some(len) => len,
+ None => rest.len() - start
+ };
+// print!(" {}", (start + len) % 8);
+ print!("[");
+ for i in 0..len {
+ print!("{:02x}", rest[i + start]);
+ }
+ print!("]");
+ rest = &rest[start + len..];
+ }
+ if any_errors {
+ println!();
+ println!("Lossy: {}", String::from_utf8_lossy(&strings[..]).replace(char::REPLACEMENT_CHARACTER, "??????").replace(&[' ', '\0'], ""));
+ return Ok(())
+ }
+ //println!("{:?}", data);
}
+ println!("{}: read {n} records", file_name.display());
Ok(())
}