From: Ben Pfaff Date: Tue, 29 Jul 2025 19:51:17 +0000 (-0700) Subject: works! X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fdea49c990a0b1d605a3a7b802893726933e5967;p=pspp works! --- diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index bcc6cafc8e..3de82de89f 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -38,7 +38,10 @@ use std::{ use encoding_rs::{mem::decode_latin1, Encoding}; use itertools::Itertools; use ordered_float::OrderedFloat; -use serde::{ser::SerializeTupleVariant, Serialize}; +use serde::{ + ser::{SerializeSeq, SerializeTupleVariant}, + Serialize, +}; use crate::{ dictionary::{VarType, VarWidth}, @@ -733,27 +736,75 @@ where pub fn len(&self) -> usize { self.data.borrow().len() } + pub fn iter(&self) -> CaseIter<'_> { + self.into_iter() + } +} + +impl Serialize for Case +where + B: Borrow<[Datum]>, +{ + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for datum in self.iter() { + seq.serialize_element(&datum)?; + } + seq.end() + } +} + +pub struct CaseIter<'a> { + encoding: &'static Encoding, + iter: std::slice::Iter<'a, Datum>, +} + +impl<'a> Iterator for CaseIter<'a> { + type Item = Datum>; + + fn next(&mut self) -> Option { + self.iter.next().map(|d| d.as_encoded(self.encoding)) + } +} + +impl<'a, B> IntoIterator for &'a Case +where + B: Borrow<[Datum]>, +{ + type Item = Datum>; + + type IntoIter = CaseIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + CaseIter { + encoding: self.encoding, + iter: self.data.borrow().into_iter(), + } + } } impl IntoIterator for Case>> { type Item = Datum; - type IntoIter = CaseVecIter; + type IntoIter = CaseIntoIter; fn into_iter(self) -> Self::IntoIter { - CaseVecIter { + CaseIntoIter { encoding: self.encoding, iter: self.data.into_iter(), } } } -pub struct CaseVecIter { +pub struct CaseIntoIter { encoding: &'static Encoding, iter: std::vec::IntoIter>, } -impl Iterator for CaseVecIter { +impl Iterator for CaseIntoIter { type Item = Datum; fn next(&mut self) -> Option { diff --git a/rust/pspp/src/data/encoded.rs b/rust/pspp/src/data/encoded.rs index 248507c8db..e5d2d62897 100644 --- a/rust/pspp/src/data/encoded.rs +++ b/rust/pspp/src/data/encoded.rs @@ -179,7 +179,10 @@ impl<'a> From<&'a String> for BorrowedEncodedString<'a> { } } -impl Serialize for OwnedEncodedString { +impl Serialize for EncodedString +where + R: Borrow, +{ fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index 8025c6a626..096f74ea5b 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -231,6 +231,10 @@ struct Show { #[arg(short, long, value_enum, default_value_t)] mode: Mode, + /// Output format. + #[arg(long, value_enum, default_value_t)] + format: ShowFormat, + /// The encoding to use. #[arg(long, value_parser = parse_encoding)] encoding: Option<&'static Encoding>, @@ -238,9 +242,80 @@ struct Show { impl Show { fn run(self) -> Result<()> { - for file in self.files { - show(&file, self.max_cases, self.mode, self.encoding)?; + for file in &self.files { + self.show(file)?; + } + Ok(()) + } + fn show(&self, file_name: &Path) -> Result<()> { + let reader = File::open(file_name)?; + let reader = BufReader::new(reader); + let mut reader = Reader::new(reader, Box::new(|warning| println!("{warning}")))?; + + match self.mode { + Mode::Identify => { + match reader.header().magic { + Magic::Sav => println!("SPSS System File"), + Magic::Zsav => println!("SPSS System File with Zlib compression"), + Magic::Ebcdic => println!("EBCDIC-encoded SPSS System File"), + } + return Ok(()); + } + Mode::Raw => { + self.show_json(reader.header())?; + for record in reader.records() { + self.show_json(&record?)?; + } + for (_index, case) in (0..self.max_cases).zip(reader.cases()) { + self.show_json(&case?)?; + } + } + Mode::Decoded => { + let records: Vec = reader.records().collect::, _>>()?; + let encoding = match self.encoding { + Some(encoding) => encoding, + None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?, + }; + let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}")); + for record in records { + self.show_json(&record.decode(&mut decoder))?; + } + } + Mode::Parsed => { + let records: Vec = reader.records().collect::, _>>()?; + let encoding = match self.encoding { + Some(encoding) => encoding, + None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?, + }; + let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}")); + let records = Records::from_raw(records, &mut decoder); + let (dictionary, metadata, cases) = records + .decode( + reader.header().clone().decode(&mut decoder), + reader.cases(), + encoding, + |e| eprintln!("{e}"), + ) + .into_parts(); + self.show_json(&dictionary)?; + self.show_json(&metadata)?; + for (_index, case) in (0..self.max_cases).zip(cases) { + self.show_json(&case?)?; + } + } } + + Ok(()) + } + fn show_json(&self, value: &T) -> Result<()> + where + T: Serialize, + { + match self.format { + ShowFormat::Json => serde_json::to_writer_pretty(stdout(), value)?, + ShowFormat::Ndjson => serde_json::to_writer(stdout(), value)?, + }; + println!(); Ok(()) } } @@ -282,78 +357,15 @@ enum Mode { Parsed, } -fn main() -> Result<()> { - Cli::parse().command.run() -} - -fn show_json(value: &T) -> Result<()> -where - T: Serialize, -{ - serde_json::to_writer_pretty(stdout(), value)?; - println!(); - Ok(()) +#[derive(Clone, Copy, Debug, Default, ValueEnum)] +enum ShowFormat { + /// Pretty-printed JSON. + #[default] + Json, + /// Newline-delimited JSON. + Ndjson, } -fn show( - file_name: &Path, - max_cases: u64, - mode: Mode, - encoding: Option<&'static Encoding>, -) -> Result<()> { - let reader = File::open(file_name)?; - let reader = BufReader::new(reader); - let mut reader = Reader::new(reader, Box::new(|warning| println!("{warning}")))?; - - match mode { - Mode::Identify => { - match reader.header().magic { - Magic::Sav => println!("SPSS System File"), - Magic::Zsav => println!("SPSS System File with Zlib compression"), - Magic::Ebcdic => println!("EBCDIC-encoded SPSS System File"), - } - return Ok(()); - } - Mode::Raw => { - show_json(reader.header())?; - for record in reader.records() { - show_json(&record?)?; - } - for (_index, case) in (0..max_cases).zip(reader.cases()) { - show_json(&case?)?; - } - } - Mode::Decoded => { - let records: Vec = reader.records().collect::, _>>()?; - let encoding = match encoding { - Some(encoding) => encoding, - None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?, - }; - let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}")); - for record in records { - show_json(&record.decode(&mut decoder))?; - } - } - Mode::Parsed => { - let records: Vec = reader.records().collect::, _>>()?; - let encoding = match encoding { - Some(encoding) => encoding, - None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?, - }; - let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}")); - let records = Records::from_raw(records, &mut decoder); - let (dictionary, metadata, _) = records - .decode( - reader.header().clone().decode(&mut decoder), - reader.cases(), - encoding, - |e| eprintln!("{e}"), - ) - .into_parts(); - show_json(&dictionary)?; - show_json(&metadata)?; - } - } - - Ok(()) +fn main() -> Result<()> { + Cli::parse().command.run() }