From be85df825fc389bb76f9dc515ce8a536d5d950d5 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 4 Jan 2025 19:11:01 -0800 Subject: [PATCH] refined csv --- rust/Cargo.lock | 22 ------ rust/pspp/Cargo.toml | 1 - rust/pspp/src/output/csv.rs | 146 ++++++++++++++++++++++++------------ 3 files changed, 100 insertions(+), 69 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 0399f37e71..5e691e5ca2 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -276,27 +276,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "csv" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" -dependencies = [ - "memchr", -] - [[package]] name = "dashmap" version = "5.5.3" @@ -910,7 +889,6 @@ dependencies = [ "chardetng", "chrono", "clap", - "csv", "diff", "either", "encoding_rs", diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 5c94d4deed..027a8440ff 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -34,7 +34,6 @@ enum-iterator = "2.1.0" smallvec = { version = "1.13.2", features = ["const_generics", "write"] } libm = "0.2.11" smallstr = "0.3.0" -csv = "1.3.1" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/output/csv.rs b/rust/pspp/src/output/csv.rs index 5b4dca1ded..61157b8385 100644 --- a/rust/pspp/src/output/csv.rs +++ b/rust/pspp/src/output/csv.rs @@ -1,6 +1,4 @@ -use std::{borrow::Cow, fs::File, io::Write, sync::Arc}; - -use csv::Writer; +use std::{borrow::Cow, fmt::Display, fs::File, io::Write, sync::Arc}; use crate::output::pivot::Coord2; @@ -8,14 +6,73 @@ use super::{driver::Driver, pivot::PivotTable, table::Table, Details, Item, Text struct CsvDriver { file: File, + options: CsvOptions, /// Number of items written so far. n_items: usize, } +#[derive(Copy, Clone, Debug)] +struct CsvOptions { + quote: u8, + delimiter: u8, +} + +impl Default for CsvOptions { + fn default() -> Self { + Self { + quote: b'"', + delimiter: b',', + } + } +} + +impl CsvOptions { + fn byte_needs_quoting(&self, b: u8) -> bool { + b == b'\r' || b == b'\n' || b == self.quote || b == self.delimiter + } + + fn string_needs_quoting(&self, s: &str) -> bool { + s.bytes().find(|&b| self.byte_needs_quoting(b)).is_some() + } +} + +struct CsvField<'a> { + text: &'a str, + options: CsvOptions, +} + +impl<'a> CsvField<'a> { + fn new(text: &'a str, options: CsvOptions) -> Self { + Self { text, options } + } +} + +impl<'a> Display for CsvField<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.options.string_needs_quoting(self.text) { + let quote = self.options.quote as char; + write!(f, "{quote}")?; + for c in self.text.chars() { + if c == quote { + write!(f, "{c}")?; + } + write!(f, "{c}")?; + } + write!(f, "{quote}") + } else { + write!(f, "{}", self.text) + } + } +} + impl CsvDriver { pub fn new(file: File) -> Self { - Self { file, n_items: 0 } + Self { + file, + options: CsvOptions::default(), + n_items: 0, + } } fn start_item(&mut self) { @@ -29,49 +86,48 @@ impl CsvDriver { let output = pt.output(layer, true); self.start_item(); - let mut writer = Writer::from_writer(&mut self.file); - output_table(&mut writer, pt, output.title.as_ref(), Some("Table"))?; - output_table(&mut writer, pt, output.layers.as_ref(), Some("Layer"))?; - output_table(&mut writer, pt, Some(&output.body), None)?; - output_table(&mut writer, pt, output.caption.as_ref(), Some("Caption"))?; - output_table(&mut writer, pt, output.footnotes.as_ref(), Some("Footnote"))?; + self.output_table(pt, output.title.as_ref(), Some("Table"))?; + self.output_table(pt, output.layers.as_ref(), Some("Layer"))?; + self.output_table(pt, Some(&output.body), None)?; + self.output_table(pt, output.caption.as_ref(), Some("Caption"))?; + self.output_table(pt, output.footnotes.as_ref(), Some("Footnote"))?; Ok(()) } -} -fn output_table( - writer: &mut Writer, - pivot_table: &PivotTable, - table: Option<&Table>, - leader: Option<&str>, -) -> Result<(), csv::Error> -where - W: Write, -{ - let Some(table) = table else { - return Ok(()); - }; - - for y in 0..table.n.y() { - for x in 0..table.n.x() { - let coord = Coord2::new(x, y); - let content = table.get(coord); - match &content.inner().value { - Some(value) if content.is_top_left(coord) => { - let display = value.display(Some(pivot_table)); - let s = match leader { - Some(leader) if x == 0 && y == 0 => format!("{leader}: {display}"), - _ => display.to_string(), - }; - writer.write_field(&s)? + fn output_table( + &mut self, + pivot_table: &PivotTable, + table: Option<&Table>, + leader: Option<&str>, + ) -> Result<(), csv::Error> { + let Some(table) = table else { + return Ok(()); + }; + + for y in 0..table.n.y() { + for x in 0..table.n.x() { + if x > 0 { + write!(&mut self.file, "{}", self.options.delimiter as char).unwrap(); + } + + let coord = Coord2::new(x, y); + let content = table.get(coord); + if content.is_top_left(coord) { + if let Some(value) = &content.inner().value { + let display = value.display(Some(pivot_table)); + let s = match leader { + Some(leader) if x == 0 && y == 0 => format!("{leader}: {display}"), + _ => display.to_string(), + }; + write!(&mut self.file, "{}", CsvField::new(&s, self.options)).unwrap(); + } } - _ => writer.write_field("")?, } + writeln!(&mut self.file).unwrap(); } - writer.write_record(None::<&[u8]>)?; - } - Ok(()) + Ok(()) + } } impl Driver for CsvDriver { @@ -85,9 +141,8 @@ impl Driver for CsvDriver { Details::Chart | Details::Image | Details::Group(_) => (), Details::Message(diagnostic) => { self.start_item(); - Writer::from_writer(&mut self.file) - .write_record([diagnostic.to_string()]) - .unwrap(); + let text = diagnostic.to_string(); + writeln!(&self.file, "{}", CsvField::new(&text, self.options)).unwrap(); } Details::Table(pivot_table) => { for layer in pivot_table.layers(true) { @@ -102,9 +157,8 @@ impl Driver for CsvDriver { TextType::Syntax | TextType::PageTitle => (), TextType::Title | TextType::Log => { self.start_item(); - let mut writer = Writer::from_writer(&mut self.file); for line in text.content.display(None).to_string().lines() { - writer.write_record([line]).unwrap(); + writeln!(&self.file, "{}", CsvField::new(line, self.options)).unwrap(); } } }, @@ -112,6 +166,6 @@ impl Driver for CsvDriver { } fn flush(&mut self) { - self.file.flush(); + let _ = self.file.flush(); } } -- 2.30.2