refined csv
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 5 Jan 2025 03:11:01 +0000 (19:11 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 5 Jan 2025 03:11:01 +0000 (19:11 -0800)
rust/Cargo.lock
rust/pspp/Cargo.toml
rust/pspp/src/output/csv.rs

index 0399f37e7186aa35f38d91102c91aa0ff54e3581..5e691e5ca20803306eb3e9ef4bfd85a422622215 100644 (file)
@@ -276,27 +276,6 @@ dependencies = [
  "cfg-if",
 ]
 
-[[package]]
-name = "csv"
-version = "1.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf"
-dependencies = [
- "csv-core",
- "itoa",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "csv-core"
-version = "0.1.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
-dependencies = [
- "memchr",
-]
-
 [[package]]
 name = "dashmap"
 version = "5.5.3"
@@ -910,7 +889,6 @@ dependencies = [
  "chardetng",
  "chrono",
  "clap",
- "csv",
  "diff",
  "either",
  "encoding_rs",
index 5c94d4deedbe32b165692467f2a01946570e4a10..027a8440ff4e3e9c2a48327cfb5346d72ccc45cf 100644 (file)
@@ -34,7 +34,6 @@ enum-iterator = "2.1.0"
 smallvec = { version = "1.13.2", features = ["const_generics", "write"] }
 libm = "0.2.11"
 smallstr = "0.3.0"
-csv = "1.3.1"
 
 [target.'cfg(windows)'.dependencies]
 windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] }
index 5b4dca1ded2e1baad800e71851c5f4d69929e641..61157b8385e17890309586d44448cf13be0380ae 100644 (file)
@@ -1,6 +1,4 @@
-use std::{borrow::Cow, fs::File, io::Write, sync::Arc};
-
-use csv::Writer;
+use std::{borrow::Cow, fmt::Display, fs::File, io::Write, sync::Arc};
 
 use crate::output::pivot::Coord2;
 
@@ -8,14 +6,73 @@ use super::{driver::Driver, pivot::PivotTable, table::Table, Details, Item, Text
 
 struct CsvDriver {
     file: File,
+    options: CsvOptions,
 
     /// Number of items written so far.
     n_items: usize,
 }
 
+#[derive(Copy, Clone, Debug)]
+struct CsvOptions {
+    quote: u8,
+    delimiter: u8,
+}
+
+impl Default for CsvOptions {
+    fn default() -> Self {
+        Self {
+            quote: b'"',
+            delimiter: b',',
+        }
+    }
+}
+
+impl CsvOptions {
+    fn byte_needs_quoting(&self, b: u8) -> bool {
+        b == b'\r' || b == b'\n' || b == self.quote || b == self.delimiter
+    }
+
+    fn string_needs_quoting(&self, s: &str) -> bool {
+        s.bytes().find(|&b| self.byte_needs_quoting(b)).is_some()
+    }
+}
+
+struct CsvField<'a> {
+    text: &'a str,
+    options: CsvOptions,
+}
+
+impl<'a> CsvField<'a> {
+    fn new(text: &'a str, options: CsvOptions) -> Self {
+        Self { text, options }
+    }
+}
+
+impl<'a> Display for CsvField<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if self.options.string_needs_quoting(self.text) {
+            let quote = self.options.quote as char;
+            write!(f, "{quote}")?;
+            for c in self.text.chars() {
+                if c == quote {
+                    write!(f, "{c}")?;
+                }
+                write!(f, "{c}")?;
+            }
+            write!(f, "{quote}")
+        } else {
+            write!(f, "{}", self.text)
+        }
+    }
+}
+
 impl CsvDriver {
     pub fn new(file: File) -> Self {
-        Self { file, n_items: 0 }
+        Self {
+            file,
+            options: CsvOptions::default(),
+            n_items: 0,
+        }
     }
 
     fn start_item(&mut self) {
@@ -29,49 +86,48 @@ impl CsvDriver {
         let output = pt.output(layer, true);
         self.start_item();
 
-        let mut writer = Writer::from_writer(&mut self.file);
-        output_table(&mut writer, pt, output.title.as_ref(), Some("Table"))?;
-        output_table(&mut writer, pt, output.layers.as_ref(), Some("Layer"))?;
-        output_table(&mut writer, pt, Some(&output.body), None)?;
-        output_table(&mut writer, pt, output.caption.as_ref(), Some("Caption"))?;
-        output_table(&mut writer, pt, output.footnotes.as_ref(), Some("Footnote"))?;
+        self.output_table(pt, output.title.as_ref(), Some("Table"))?;
+        self.output_table(pt, output.layers.as_ref(), Some("Layer"))?;
+        self.output_table(pt, Some(&output.body), None)?;
+        self.output_table(pt, output.caption.as_ref(), Some("Caption"))?;
+        self.output_table(pt, output.footnotes.as_ref(), Some("Footnote"))?;
         Ok(())
     }
-}
 
-fn output_table<W>(
-    writer: &mut Writer<W>,
-    pivot_table: &PivotTable,
-    table: Option<&Table>,
-    leader: Option<&str>,
-) -> Result<(), csv::Error>
-where
-    W: Write,
-{
-    let Some(table) = table else {
-        return Ok(());
-    };
-
-    for y in 0..table.n.y() {
-        for x in 0..table.n.x() {
-            let coord = Coord2::new(x, y);
-            let content = table.get(coord);
-            match &content.inner().value {
-                Some(value) if content.is_top_left(coord) => {
-                    let display = value.display(Some(pivot_table));
-                    let s = match leader {
-                        Some(leader) if x == 0 && y == 0 => format!("{leader}: {display}"),
-                        _ => display.to_string(),
-                    };
-                    writer.write_field(&s)?
+    fn output_table(
+        &mut self,
+        pivot_table: &PivotTable,
+        table: Option<&Table>,
+        leader: Option<&str>,
+    ) -> Result<(), csv::Error> {
+        let Some(table) = table else {
+            return Ok(());
+        };
+
+        for y in 0..table.n.y() {
+            for x in 0..table.n.x() {
+                if x > 0 {
+                    write!(&mut self.file, "{}", self.options.delimiter as char).unwrap();
+                }
+
+                let coord = Coord2::new(x, y);
+                let content = table.get(coord);
+                if content.is_top_left(coord) {
+                    if let Some(value) = &content.inner().value {
+                        let display = value.display(Some(pivot_table));
+                        let s = match leader {
+                            Some(leader) if x == 0 && y == 0 => format!("{leader}: {display}"),
+                            _ => display.to_string(),
+                        };
+                        write!(&mut self.file, "{}", CsvField::new(&s, self.options)).unwrap();
+                    }
                 }
-                _ => writer.write_field("")?,
             }
+            writeln!(&mut self.file).unwrap();
         }
-        writer.write_record(None::<&[u8]>)?;
-    }
 
-    Ok(())
+        Ok(())
+    }
 }
 
 impl Driver for CsvDriver {
@@ -85,9 +141,8 @@ impl Driver for CsvDriver {
             Details::Chart | Details::Image | Details::Group(_) => (),
             Details::Message(diagnostic) => {
                 self.start_item();
-                Writer::from_writer(&mut self.file)
-                    .write_record([diagnostic.to_string()])
-                    .unwrap();
+                let text = diagnostic.to_string();
+                writeln!(&self.file, "{}", CsvField::new(&text, self.options)).unwrap();
             }
             Details::Table(pivot_table) => {
                 for layer in pivot_table.layers(true) {
@@ -102,9 +157,8 @@ impl Driver for CsvDriver {
                 TextType::Syntax | TextType::PageTitle => (),
                 TextType::Title | TextType::Log => {
                     self.start_item();
-                    let mut writer = Writer::from_writer(&mut self.file);
                     for line in text.content.display(None).to_string().lines() {
-                        writer.write_record([line]).unwrap();
+                        writeln!(&self.file, "{}", CsvField::new(line, self.options)).unwrap();
                     }
                 }
             },
@@ -112,6 +166,6 @@ impl Driver for CsvDriver {
     }
 
     fn flush(&mut self) {
-        self.file.flush();
+        let _ = self.file.flush();
     }
 }