X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=rust%2Fsrc%2Fmain.rs;h=45d0622f0d4e039f09c4973596d2d4a5b1b9078c;hb=ad10c0fc07a3183f0a991b23d7523023baa9a098;hp=6024b67f2dea68c9395e1b20436d94a7fedc0ac4;hpb=163d8e396c5b2fd5afd68903d08bff938f13d048;p=pspp diff --git a/rust/src/main.rs b/rust/src/main.rs index 6024b67f2d..45d0622f0d 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -15,12 +15,15 @@ * along with this program. If not, see . */ use anyhow::Result; -use clap::Parser; -use pspp::raw::{Reader, Record}; +use clap::{Parser, ValueEnum}; +use encoding_rs::Encoding; +use pspp::cooked::decode; +use pspp::raw::{Reader, Record, Magic}; use std::fs::File; use std::io::BufReader; use std::path::{Path, PathBuf}; use std::str; +use thiserror::Error as ThisError; /// A utility to dissect SPSS system files. #[derive(Parser, Debug)] @@ -33,35 +36,87 @@ struct Args { /// Files to dissect. #[arg(required = true)] files: Vec, + + /// How to dissect the file. + #[arg(short, long, value_enum, default_value_t)] + mode: Mode, + + /// The encoding to use. + #[arg(long, value_parser = parse_encoding)] + encoding: Option<&'static Encoding>, +} + +#[derive(ThisError, Debug)] +#[error("{0}: unknown encoding")] +struct UnknownEncodingError(String); + +fn parse_encoding(arg: &str) -> Result<&'static Encoding, UnknownEncodingError> { + match Encoding::for_label_no_replacement(arg.as_bytes()) { + Some(encoding) => Ok(encoding), + None => Err(UnknownEncodingError(arg.to_string())), + } +} + +#[derive(Clone, Copy, Debug, Default, ValueEnum)] +enum Mode { + Identify, + Raw, + #[default] + Cooked, } fn main() -> Result<()> { - let Args { max_cases, files } = Args::parse(); + let Args { + max_cases, + files, + mode, + encoding, + } = Args::parse(); for file in files { - dissect(&file, max_cases)?; + dissect(&file, max_cases, mode, encoding)?; } Ok(()) } -fn dissect(file_name: &Path, max_cases: u64) -> Result<()> { +fn dissect(file_name: &Path, max_cases: u64, mode: Mode, encoding: Option<&'static Encoding>) -> Result<()> { let reader = File::open(file_name)?; let reader = BufReader::new(reader); - let mut reader = Reader::new(reader)?; - let records: Vec = reader.collect_headers()?; + let mut reader = Reader::new(reader, |warning| println!("{warning}"))?; - for record in records { - println!("{record:?}"); - if let Record::EndOfHeaders(_) = record { - break; - }; + match mode { + Mode::Identify => { + let Record::Header(header) = reader.next().unwrap()? else { unreachable!() }; + match header.magic { + Magic::Sav => println!("SPSS System File"), + Magic::Zsav => println!("SPSS System File with Zlib compression"), + Magic::Ebcdic => println!("EBCDIC-encoded SPSS System File"), + } + return Ok(()) + } + Mode::Raw => { + for header in reader { + let header = header?; + println!("{:?}", header); + if let Record::Cases(cases) = header { + let mut cases = cases.borrow_mut(); + for _ in 0..max_cases { + let Some(Ok(record)) = cases.next() else { + break; + }; + println!("{:?}", record); + } + } + } + } + Mode::Cooked => { + let headers: Vec = reader.collect::, _>>()?; + let headers = decode(headers, encoding, &|e| eprintln!("{e}"))?; + for header in headers { + println!("{header:?}"); + } + } } - for _ in 0..max_cases { - let Some(Ok(Record::Case(data))) = reader.next() else { - break; - }; - println!("{:?}", data); - } Ok(()) }