From a11c12476adf3567dee85daa505433888833b77a Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 4 Jan 2025 18:46:00 -0800 Subject: [PATCH] first take at csv --- rust/Cargo.lock | 22 +++++ rust/pspp/Cargo.toml | 1 + rust/pspp/src/output/csv.rs | 117 +++++++++++++++++++++++++++ rust/pspp/src/output/driver.rs | 17 ++-- rust/pspp/src/output/mod.rs | 5 +- rust/pspp/src/output/pivot/mod.rs | 110 +++++++++++++++++++------ rust/pspp/src/output/pivot/output.rs | 6 +- rust/pspp/src/output/table.rs | 88 ++++++++++++++++---- 8 files changed, 318 insertions(+), 48 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 5e691e5ca2..0399f37e71 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -276,6 +276,27 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + [[package]] name = "dashmap" version = "5.5.3" @@ -889,6 +910,7 @@ dependencies = [ "chardetng", "chrono", "clap", + "csv", "diff", "either", "encoding_rs", diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 027a8440ff..5c94d4deed 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -34,6 +34,7 @@ enum-iterator = "2.1.0" smallvec = { version = "1.13.2", features = ["const_generics", "write"] } libm = "0.2.11" smallstr = "0.3.0" +csv = "1.3.1" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/output/csv.rs b/rust/pspp/src/output/csv.rs index e69de29bb2..5b4dca1ded 100644 --- a/rust/pspp/src/output/csv.rs +++ b/rust/pspp/src/output/csv.rs @@ -0,0 +1,117 @@ +use std::{borrow::Cow, fs::File, io::Write, sync::Arc}; + +use csv::Writer; + +use crate::output::pivot::Coord2; + +use super::{driver::Driver, pivot::PivotTable, table::Table, Details, Item, TextType}; + +struct CsvDriver { + file: File, + + /// Number of items written so far. + n_items: usize, +} + +impl CsvDriver { + pub fn new(file: File) -> Self { + Self { file, n_items: 0 } + } + + fn start_item(&mut self) { + if self.n_items > 0 { + write!(&mut self.file, "\n").unwrap(); + } + self.n_items += 1; + } + + fn output_table_layer(&mut self, pt: &PivotTable, layer: &[usize]) -> Result<(), csv::Error> { + let output = pt.output(layer, true); + self.start_item(); + + let mut writer = Writer::from_writer(&mut self.file); + output_table(&mut writer, pt, output.title.as_ref(), Some("Table"))?; + output_table(&mut writer, pt, output.layers.as_ref(), Some("Layer"))?; + output_table(&mut writer, pt, Some(&output.body), None)?; + output_table(&mut writer, pt, output.caption.as_ref(), Some("Caption"))?; + output_table(&mut writer, pt, output.footnotes.as_ref(), Some("Footnote"))?; + Ok(()) + } +} + +fn output_table( + writer: &mut Writer, + pivot_table: &PivotTable, + table: Option<&Table>, + leader: Option<&str>, +) -> Result<(), csv::Error> +where + W: Write, +{ + let Some(table) = table else { + return Ok(()); + }; + + for y in 0..table.n.y() { + for x in 0..table.n.x() { + let coord = Coord2::new(x, y); + let content = table.get(coord); + match &content.inner().value { + Some(value) if content.is_top_left(coord) => { + let display = value.display(Some(pivot_table)); + let s = match leader { + Some(leader) if x == 0 && y == 0 => format!("{leader}: {display}"), + _ => display.to_string(), + }; + writer.write_field(&s)? + } + _ => writer.write_field("")?, + } + } + writer.write_record(None::<&[u8]>)?; + } + + Ok(()) +} + +impl Driver for CsvDriver { + fn name(&self) -> Cow<'static, str> { + Cow::from("csv") + } + + fn write(&mut self, item: &Arc) { + // todo: error handling (should not unwrap) + match &item.details { + Details::Chart | Details::Image | Details::Group(_) => (), + Details::Message(diagnostic) => { + self.start_item(); + Writer::from_writer(&mut self.file) + .write_record([diagnostic.to_string()]) + .unwrap(); + } + Details::Table(pivot_table) => { + for layer in pivot_table.layers(true) { + self.output_table_layer(&*pivot_table, &layer).unwrap(); + } + } + Details::PageBreak => { + self.start_item(); + write!(&mut self.file, "\n").unwrap(); + } + Details::Text(text) => match text.type_ { + TextType::Syntax | TextType::PageTitle => (), + TextType::Title | TextType::Log => { + self.start_item(); + let mut writer = Writer::from_writer(&mut self.file); + for line in text.content.display(None).to_string().lines() { + writer.write_record([line]).unwrap(); + } + } + }, + } + } + + fn flush(&mut self) { + self.file.flush(); + } +} diff --git a/rust/pspp/src/output/driver.rs b/rust/pspp/src/output/driver.rs index 89d0bacb92..e5b95bad27 100644 --- a/rust/pspp/src/output/driver.rs +++ b/rust/pspp/src/output/driver.rs @@ -1,17 +1,18 @@ use std::{borrow::Cow, sync::Arc}; -use chrono::format::Item; - -use super::page::Setup; +use super::{page::Setup, Item}; // An output driver. pub trait Driver { - fn name(&self) -> Cow<'static, &str>; + fn name(&self) -> Cow<'static, str>; - fn write(&mut self, item: Arc); + fn write(&mut self, item: &Arc); /// Returns false if the driver doesn't support page setup. - fn setup(&mut self, page_setup: &Setup) -> bool; + fn setup(&mut self, page_setup: &Setup) -> bool { + let _ = page_setup; + false + } /// Ensures that anything written with [Self::write] has been displayed. /// @@ -19,7 +20,7 @@ pub trait Driver { /// to ensure that the user has actually been shown any preceding output If /// it doesn't make sense for this driver to be used this way, then this /// function need not do anything. - fn flush(&mut self); + fn flush(&mut self) {} /// Ordinarily, the core driver code will skip passing hidden output items /// to [Self::write]. If this returns true, the core driver hands them to @@ -56,7 +57,7 @@ trait DriverFactory { /// because the caller may issue errors about unknown options for any pairs /// that remain. fn create(&self, file_handle: (), - + enum settings_output_devices type, struct driver_options *); diff --git a/rust/pspp/src/output/mod.rs b/rust/pspp/src/output/mod.rs index b26a42a170..efcddc8169 100644 --- a/rust/pspp/src/output/mod.rs +++ b/rust/pspp/src/output/mod.rs @@ -3,6 +3,8 @@ use std::sync::Arc; use pivot::PivotTable; +use crate::message::Diagnostic; + use self::pivot::Value; pub mod csv; @@ -39,7 +41,8 @@ pub enum Details { Chart, Image, Group(Vec>), - Message, + Message(Diagnostic), + PageBreak, Table(Box), Text(Box), } diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index c69b6d57af..11a1c22b32 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -58,6 +58,7 @@ use std::{ collections::HashMap, fmt::{Display, Write}, + iter::{once, repeat}, ops::{Index, Not, Range}, str::from_utf8, sync::{Arc, OnceLock, Weak}, @@ -708,7 +709,7 @@ impl Not for Axis2 { } /// A 2-dimensional `(x,y)` pair. -#[derive(Copy, Clone, Debug, Default)] +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] pub struct Coord2(pub EnumMap); impl Coord2 { @@ -751,22 +752,26 @@ impl Index for Coord2 { pub struct Rect2(pub EnumMap>); impl Rect2 { - fn new(x_range: Range, y_range: Range) -> Self { + pub fn new(x_range: Range, y_range: Range) -> Self { Self(enum_map! { Axis2::X => x_range.clone(), Axis2::Y => y_range.clone(), }) } - fn for_cell(cell: Coord2) -> Self { + pub fn for_cell(cell: Coord2) -> Self { Self::new(cell.x()..cell.x() + 1, cell.y()..cell.y() + 1) } - fn for_ranges((a, a_range): (Axis2, Range), b_range: Range) -> Self { + pub fn for_ranges((a, a_range): (Axis2, Range), b_range: Range) -> Self { let b = !a; let mut ranges = EnumMap::default(); ranges[a] = a_range; ranges[b] = b_range; Self(ranges) } + pub fn top_left(&self) -> Coord2 { + use Axis2::*; + Coord2::new(self[X].start, self[Y].start) + } } impl From>> for Rect2 { @@ -823,11 +828,11 @@ pub struct PivotTable { weight_format: Format, - /// Current layer indexes, with axes[PIVOT_AXIS_LAYER].n_dimensions - /// elements. current_layer[i] is an offset into - /// axes[PIVOT_AXIS_LAYER].dimensions[i]->data_leaves[], EXCEPT that a - /// dimension can have zero leaves, in which case current_layer[i] is zero - /// and there's no corresponding leaf. + /// Current layer indexes, with `axes[Axis3::Z].dimensions.len()` elements. + /// `current_layer[i]` is an offset into + /// `axes[Axis3::Z].dimensions[i].data_leaves[]`, except that a dimension + /// can have zero leaves, in which case `current_layer[i]` is zero and + /// there's no corresponding leaf. current_layer: Vec, /// Column and row sizing and page breaks. @@ -930,8 +935,24 @@ impl PivotTable { } data_indexes } + + /// Returns an iterator for the layer axis: + /// + /// - If `print` is true and `self.look.print_all_layers`, then the iterator + /// will visit all values of the layer axis. + /// + /// - Otherwise, the iterator will just visit `self.current_layer`. + pub fn layers(&self, print: bool) -> Box> + '_> { + if print && self.look.print_all_layers { + Box::new(self.axes[Axis3::Z].iter()) + } else { + Box::new(once(SmallVec::from_slice(&self.current_layer))) + } + } } +pub struct Layers {} + #[derive(Clone, Debug)] pub struct Footnote { index: usize, @@ -941,21 +962,37 @@ pub struct Footnote { } impl Footnote { - pub fn display_marker<'a, 'b>(&'a self, table: &'b PivotTable) -> DisplayMarker<'a, 'b> { + pub fn display_marker<'a, 'b>( + &'a self, + table: Option<&'b PivotTable>, + ) -> DisplayMarker<'a, 'b> { DisplayMarker { footnote: self, table, } } - pub fn display_content<'a, 'b>(&'a self, table: &'b PivotTable) -> DisplayValue<'a, 'b> { + pub fn display_content<'a, 'b>( + &'a self, + table: Option<&'b PivotTable>, + ) -> DisplayValue<'a, 'b> { self.content.display(table) } } pub struct DisplayMarker<'a, 'b> { footnote: &'a Footnote, - table: &'b PivotTable, + table: Option<&'b PivotTable>, +} + +impl<'a, 'b> DisplayMarker<'a, 'b> { + fn marker_type(&self) -> FootnoteMarkerType { + if let Some(table) = self.table { + table.look.footnote_marker_type + } else { + FootnoteMarkerType::default() + } + } } impl<'a, 'b> Display for DisplayMarker<'a, 'b> { @@ -964,7 +1001,7 @@ impl<'a, 'b> Display for DisplayMarker<'a, 'b> { write!(f, "{}", marker.display(self.table).without_suffixes()) } else { let i = self.footnote.index + 1; - match self.table.look.footnote_marker_type { + match self.marker_type() { FootnoteMarkerType::Alphabetic => write!(f, "{}", Display26Adic(i)), FootnoteMarkerType::Numeric => write!(f, "{i}"), } @@ -1051,7 +1088,7 @@ impl Value { pub struct DisplayValue<'a, 'b> { value: &'a Value, - table: &'b PivotTable, + table: Option<&'b PivotTable>, /// Whether to show subscripts and footnotes (which follow the body). show_suffixes: bool, @@ -1091,7 +1128,7 @@ impl<'a, 'b> DisplayValue<'a, 'b> { .. } => interpret_show( || Settings::global().show_values, - self.table.show_values, + || self.table.map_or(None, |table| table.show_values), *show, label, ), @@ -1102,13 +1139,17 @@ impl<'a, 'b> DisplayValue<'a, 'b> { .. } => interpret_show( || Settings::global().show_variables, - self.table.show_variables, + || self.table.map_or(None, |table| table.show_variables), *show, label, ), } } + fn small(&self) -> f64 { + self.table.map_or(0.0, |table| table.small) + } + fn template( &self, f: &mut std::fmt::Formatter<'_>, @@ -1130,7 +1171,7 @@ impl<'a, 'b> DisplayValue<'a, 'b> { continue; }; if let Some(arg) = arg.get(0) { - write!(f, "{}", arg.display(&self.table))?; + write!(f, "{}", arg.display(self.table))?; } } b'[' => { @@ -1186,7 +1227,7 @@ impl<'a, 'b> DisplayValue<'a, 'b> { continue; }; args_consumed = args_consumed.max(index); - write!(f, "{}", arg.display(&self.table))?; + write!(f, "{}", arg.display(self.table))?; } c => write!(f, "{c}")?, } @@ -1208,7 +1249,7 @@ fn consume_int(input: &[u8]) -> (usize, &[u8]) { fn extract_inner_template(input: &[u8]) -> (&[u8], &[u8]) { for (index, c) in input.iter().copied().enumerate() { - if c == b':' && (index == 0 || input[index-1] != b'\\') { + if c == b':' && (index == 0 || input[index - 1] != b'\\') { return input.split_at(index); } } @@ -1217,11 +1258,11 @@ fn extract_inner_template(input: &[u8]) -> (&[u8], &[u8]) { fn interpret_show( global_show: impl Fn() -> Show, - table_show: Option, + table_show: impl Fn() -> Option, value_show: Option, label: &String, ) -> (bool, Option<&String>) { - match value_show.or(table_show).unwrap_or_else(global_show) { + match value_show.or_else(table_show).unwrap_or_else(global_show) { Show::Value => (true, None), Show::Label => (false, Some(label)), Show::Both => (true, Some(label)), @@ -1241,7 +1282,7 @@ impl<'a, 'b> Display for DisplayValue<'a, 'b> { if show_value { let format = if format.type_() == Type::F && *honor_small - && value.is_some_and(|value| value != 0.0 && value.abs() < self.table.small) + && value.is_some_and(|value| value != 0.0 && value.abs() < self.small()) { UncheckedFormat::new(Type::E, 40, format.d() as u8).fix() } else { @@ -1287,15 +1328,36 @@ impl<'a, 'b> Display for DisplayValue<'a, 'b> { } ValueInner::Template { args, local, .. } => self.template(f, &local, args), + }?; + + if self.show_suffixes { + if let Some(styling) = &self.value.styling { + for (subscript, delimiter) in + styling.subscripts.iter().zip(once('_').chain(repeat(','))) + { + write!(f, "{delimiter}{subscript}")?; + } + + for footnote_index in styling.footnote_indexes.iter().copied() { + if let Some(table) = self.table { + if let Some(footnote) = table.footnotes.get(footnote_index) { + write!(f, "[{}]", footnote.display_marker(self.table))?; + } + } + } + } } + + Ok(()) } } impl Value { // Returns an object that will format this value, including subscripts and // superscripts and footnotes. Settings on `table` control whether variable - // and value labels are included. - fn display<'a, 'b>(&'a self, table: &'b PivotTable) -> DisplayValue<'a, 'b> { + // and value labels are included; if `table` is not provided, then defaults + // are used. `table` is also needed to display footnote markers. + pub fn display<'a, 'b>(&'a self, table: Option<&'b PivotTable>) -> DisplayValue<'a, 'b> { DisplayValue { value: self, table, diff --git a/rust/pspp/src/output/pivot/output.rs b/rust/pspp/src/output/pivot/output.rs index 7c136bcec9..0b2d42806e 100644 --- a/rust/pspp/src/output/pivot/output.rs +++ b/rust/pspp/src/output/pivot/output.rs @@ -270,7 +270,11 @@ impl PivotTable { let footnotes = if !f.is_empty() { let mut footnotes = self.create_aux_table(Coord2::new(1, f.len())); for (y, f) in f.into_iter().enumerate() { - let s = format!("{}. {}", f.display_marker(self), f.display_content(self)); + let s = format!( + "{}. {}", + f.display_marker(Some(self)), + f.display_content(Some(self)) + ); let value = Some(Value::new_user_text(s)); footnotes.put( Rect2::for_cell(Coord2::new(0, y)), diff --git a/rust/pspp/src/output/table.rs b/rust/pspp/src/output/table.rs index ae40092e3a..ef194a83d2 100644 --- a/rust/pspp/src/output/table.rs +++ b/rust/pspp/src/output/table.rs @@ -27,6 +27,47 @@ pub enum Content { Join(Arc), } +impl Content { + pub fn inner(&self) -> &CellInner { + match self { + Content::Empty => { + static INNER: CellInner = CellInner { + rotate: false, + area: Area::Title, + value: None, + }; + &INNER + } + Content::Value(cell_inner) => &cell_inner, + Content::Join(cell) => &cell.inner, + } + } + + pub fn is_empty(&self) -> bool { + if let Content::Empty = self { + true + } else { + false + } + } + + pub fn region(&self) -> Option<&Rect2> { + if let Content::Join(cell) = self { + Some(&cell.region) + } else { + None + } + } + + pub fn next_x(&self, x: usize) -> usize { + self.region().map_or(x + 1, |region| region[Axis2::X].end) + } + + pub fn is_top_left(&self, coord: Coord2) -> bool { + self.region().map_or(true, |r| coord == r.top_left()) + } +} + #[derive(Clone)] pub struct Cell { inner: CellInner, @@ -163,26 +204,45 @@ impl Table { } } + pub fn iter_x(&self, y: usize) -> XIter<'_> { + XIter { + table: self, + x: None, + y, + } + } + /// Visits all the nonempty cells once. pub fn visit_cells(&self, mut f: impl FnMut(&CellInner)) { for y in 0..self.n.y() { - let mut x = 0; - while x < self.n.x() { + for x in self.iter_x(y) { let content = self.get(Coord2::new(x, y)); - match content { - Content::Empty => { - x += 1; - } - Content::Value(cell_inner) => { - f(&cell_inner); - x += 1; - } - Content::Join(cell) => { - f(&cell.inner); - x = cell.region[Axis2::X].end; - } + if !content.is_empty() { + f(content.inner()); } } } } } + +pub struct XIter<'a> { + table: &'a Table, + x: Option, + y: usize, +} + +impl<'a> Iterator for XIter<'a> { + type Item = usize; + + fn next(&mut self) -> Option { + let next_x = self + .x + .map_or(0, |x| self.table.get(Coord2::new(x, self.y)).next_x(x)); + if next_x >= self.table.n.x() { + None + } else { + self.x = Some(next_x); + Some(next_x) + } + } +} -- 2.30.2