works!
authorBen Pfaff <blp@cs.stanford.edu>
Tue, 29 Jul 2025 19:51:17 +0000 (12:51 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Tue, 29 Jul 2025 19:51:17 +0000 (12:51 -0700)
rust/pspp/src/data.rs
rust/pspp/src/data/encoded.rs
rust/pspp/src/main.rs

index bcc6cafc8e5384d6030757f2e55c0d5fd5e317e7..3de82de89fd5c4e70c6991da4176f29bf312aaef 100644 (file)
@@ -38,7 +38,10 @@ use std::{
 use encoding_rs::{mem::decode_latin1, Encoding};
 use itertools::Itertools;
 use ordered_float::OrderedFloat;
-use serde::{ser::SerializeTupleVariant, Serialize};
+use serde::{
+    ser::{SerializeSeq, SerializeTupleVariant},
+    Serialize,
+};
 
 use crate::{
     dictionary::{VarType, VarWidth},
@@ -733,27 +736,75 @@ where
     pub fn len(&self) -> usize {
         self.data.borrow().len()
     }
+    pub fn iter(&self) -> CaseIter<'_> {
+        self.into_iter()
+    }
+}
+
+impl<B> Serialize for Case<B>
+where
+    B: Borrow<[Datum<OwnedRawString>]>,
+{
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        let mut seq = serializer.serialize_seq(Some(self.len()))?;
+        for datum in self.iter() {
+            seq.serialize_element(&datum)?;
+        }
+        seq.end()
+    }
+}
+
+pub struct CaseIter<'a> {
+    encoding: &'static Encoding,
+    iter: std::slice::Iter<'a, Datum<OwnedRawString>>,
+}
+
+impl<'a> Iterator for CaseIter<'a> {
+    type Item = Datum<BorrowedEncodedString<'a>>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter.next().map(|d| d.as_encoded(self.encoding))
+    }
+}
+
+impl<'a, B> IntoIterator for &'a Case<B>
+where
+    B: Borrow<[Datum<OwnedRawString>]>,
+{
+    type Item = Datum<BorrowedEncodedString<'a>>;
+
+    type IntoIter = CaseIter<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        CaseIter {
+            encoding: self.encoding,
+            iter: self.data.borrow().into_iter(),
+        }
+    }
 }
 
 impl IntoIterator for Case<Vec<Datum<OwnedRawString>>> {
     type Item = Datum<OwnedEncodedString>;
 
-    type IntoIter = CaseVecIter;
+    type IntoIter = CaseIntoIter;
 
     fn into_iter(self) -> Self::IntoIter {
-        CaseVecIter {
+        CaseIntoIter {
             encoding: self.encoding,
             iter: self.data.into_iter(),
         }
     }
 }
 
-pub struct CaseVecIter {
+pub struct CaseIntoIter {
     encoding: &'static Encoding,
     iter: std::vec::IntoIter<Datum<OwnedRawString>>,
 }
 
-impl Iterator for CaseVecIter {
+impl Iterator for CaseIntoIter {
     type Item = Datum<OwnedEncodedString>;
 
     fn next(&mut self) -> Option<Self::Item> {
index 248507c8dbe849b3bb08d907a9280f02994414f8..e5d2d6289743323bbc0821b6facac7949f478b0b 100644 (file)
@@ -179,7 +179,10 @@ impl<'a> From<&'a String> for BorrowedEncodedString<'a> {
     }
 }
 
-impl Serialize for OwnedEncodedString {
+impl<R> Serialize for EncodedString<R>
+where
+    R: Borrow<BorrowedRawString>,
+{
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
         S: serde::Serializer,
index 8025c6a6266f62d3501f7bfe1d52f8f05abb38e5..096f74ea5bd09fa22509fe6dab536c0973040aec 100644 (file)
@@ -231,6 +231,10 @@ struct Show {
     #[arg(short, long, value_enum, default_value_t)]
     mode: Mode,
 
+    /// Output format.
+    #[arg(long, value_enum, default_value_t)]
+    format: ShowFormat,
+
     /// The encoding to use.
     #[arg(long, value_parser = parse_encoding)]
     encoding: Option<&'static Encoding>,
@@ -238,9 +242,80 @@ struct Show {
 
 impl Show {
     fn run(self) -> Result<()> {
-        for file in self.files {
-            show(&file, self.max_cases, self.mode, self.encoding)?;
+        for file in &self.files {
+            self.show(file)?;
+        }
+        Ok(())
+    }
+    fn show(&self, file_name: &Path) -> Result<()> {
+        let reader = File::open(file_name)?;
+        let reader = BufReader::new(reader);
+        let mut reader = Reader::new(reader, Box::new(|warning| println!("{warning}")))?;
+
+        match self.mode {
+            Mode::Identify => {
+                match reader.header().magic {
+                    Magic::Sav => println!("SPSS System File"),
+                    Magic::Zsav => println!("SPSS System File with Zlib compression"),
+                    Magic::Ebcdic => println!("EBCDIC-encoded SPSS System File"),
+                }
+                return Ok(());
+            }
+            Mode::Raw => {
+                self.show_json(reader.header())?;
+                for record in reader.records() {
+                    self.show_json(&record?)?;
+                }
+                for (_index, case) in (0..self.max_cases).zip(reader.cases()) {
+                    self.show_json(&case?)?;
+                }
+            }
+            Mode::Decoded => {
+                let records: Vec<Record> = reader.records().collect::<Result<Vec<_>, _>>()?;
+                let encoding = match self.encoding {
+                    Some(encoding) => encoding,
+                    None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?,
+                };
+                let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}"));
+                for record in records {
+                    self.show_json(&record.decode(&mut decoder))?;
+                }
+            }
+            Mode::Parsed => {
+                let records: Vec<Record> = reader.records().collect::<Result<Vec<_>, _>>()?;
+                let encoding = match self.encoding {
+                    Some(encoding) => encoding,
+                    None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?,
+                };
+                let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}"));
+                let records = Records::from_raw(records, &mut decoder);
+                let (dictionary, metadata, cases) = records
+                    .decode(
+                        reader.header().clone().decode(&mut decoder),
+                        reader.cases(),
+                        encoding,
+                        |e| eprintln!("{e}"),
+                    )
+                    .into_parts();
+                self.show_json(&dictionary)?;
+                self.show_json(&metadata)?;
+                for (_index, case) in (0..self.max_cases).zip(cases) {
+                    self.show_json(&case?)?;
+                }
+            }
         }
+
+        Ok(())
+    }
+    fn show_json<T>(&self, value: &T) -> Result<()>
+    where
+        T: Serialize,
+    {
+        match self.format {
+            ShowFormat::Json => serde_json::to_writer_pretty(stdout(), value)?,
+            ShowFormat::Ndjson => serde_json::to_writer(stdout(), value)?,
+        };
+        println!();
         Ok(())
     }
 }
@@ -282,78 +357,15 @@ enum Mode {
     Parsed,
 }
 
-fn main() -> Result<()> {
-    Cli::parse().command.run()
-}
-
-fn show_json<T>(value: &T) -> Result<()>
-where
-    T: Serialize,
-{
-    serde_json::to_writer_pretty(stdout(), value)?;
-    println!();
-    Ok(())
+#[derive(Clone, Copy, Debug, Default, ValueEnum)]
+enum ShowFormat {
+    /// Pretty-printed JSON.
+    #[default]
+    Json,
+    /// Newline-delimited JSON.
+    Ndjson,
 }
 
-fn show(
-    file_name: &Path,
-    max_cases: u64,
-    mode: Mode,
-    encoding: Option<&'static Encoding>,
-) -> Result<()> {
-    let reader = File::open(file_name)?;
-    let reader = BufReader::new(reader);
-    let mut reader = Reader::new(reader, Box::new(|warning| println!("{warning}")))?;
-
-    match mode {
-        Mode::Identify => {
-            match reader.header().magic {
-                Magic::Sav => println!("SPSS System File"),
-                Magic::Zsav => println!("SPSS System File with Zlib compression"),
-                Magic::Ebcdic => println!("EBCDIC-encoded SPSS System File"),
-            }
-            return Ok(());
-        }
-        Mode::Raw => {
-            show_json(reader.header())?;
-            for record in reader.records() {
-                show_json(&record?)?;
-            }
-            for (_index, case) in (0..max_cases).zip(reader.cases()) {
-                show_json(&case?)?;
-            }
-        }
-        Mode::Decoded => {
-            let records: Vec<Record> = reader.records().collect::<Result<Vec<_>, _>>()?;
-            let encoding = match encoding {
-                Some(encoding) => encoding,
-                None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?,
-            };
-            let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}"));
-            for record in records {
-                show_json(&record.decode(&mut decoder))?;
-            }
-        }
-        Mode::Parsed => {
-            let records: Vec<Record> = reader.records().collect::<Result<Vec<_>, _>>()?;
-            let encoding = match encoding {
-                Some(encoding) => encoding,
-                None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?,
-            };
-            let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}"));
-            let records = Records::from_raw(records, &mut decoder);
-            let (dictionary, metadata, _) = records
-                .decode(
-                    reader.header().clone().decode(&mut decoder),
-                    reader.cases(),
-                    encoding,
-                    |e| eprintln!("{e}"),
-                )
-                .into_parts();
-            show_json(&dictionary)?;
-            show_json(&metadata)?;
-        }
-    }
-
-    Ok(())
+fn main() -> Result<()> {
+    Cli::parse().command.run()
 }