From 91e846f49e46f6aa68daeca08580ba47aa261109 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 11 Dec 2025 08:57:11 -0800 Subject: [PATCH] Move Value into submodule. --- rust/pspp/src/cli/convert.rs | 2 +- rust/pspp/src/cli/show_spv.rs | 4 +- rust/pspp/src/lib.rs | 41 + rust/pspp/src/output/drivers/cairo/driver.rs | 2 +- rust/pspp/src/output/drivers/cairo/fsm.rs | 2 +- rust/pspp/src/output/drivers/cairo/pager.rs | 30 +- rust/pspp/src/output/drivers/spv.rs | 2 +- rust/pspp/src/output/page.rs | 2 +- rust/pspp/src/output/pivot.rs | 1000 +---------------- rust/pspp/src/output/pivot/output.rs | 29 +- rust/pspp/src/output/pivot/value.rs | 1005 ++++++++++++++++++ rust/pspp/src/output/table.rs | 10 +- rust/pspp/src/spv.rs | 15 +- rust/pspp/src/spv/read/html.rs | 189 +++- rust/pspp/src/spv/read/legacy_xml.rs | 5 +- rust/pspp/src/spv/read/light.rs | 11 +- rust/pspp/src/spv/write.rs | 33 +- 17 files changed, 1291 insertions(+), 1091 deletions(-) create mode 100644 rust/pspp/src/output/pivot/value.rs diff --git a/rust/pspp/src/cli/convert.rs b/rust/pspp/src/cli/convert.rs index 3df207134d..312268aa3a 100644 --- a/rust/pspp/src/cli/convert.rs +++ b/rust/pspp/src/cli/convert.rs @@ -143,7 +143,7 @@ impl Convert { self.write_data(dictionary, cases) } Some(FileType::Viewer { .. }) => { - let (items, page_setup) = pspp::spv::read::ReadOptions::new() + let (items, page_setup) = pspp::spv::ReadOptions::new() .with_password(self.password.clone()) .open_file(&self.input)? .into_parts(); diff --git a/rust/pspp/src/cli/show_spv.rs b/rust/pspp/src/cli/show_spv.rs index 2a068602e6..aae727854a 100644 --- a/rust/pspp/src/cli/show_spv.rs +++ b/rust/pspp/src/cli/show_spv.rs @@ -90,7 +90,7 @@ impl ShowSpv { pub fn run(self) -> Result<()> { match self.mode { Mode::Directory => { - let item = pspp::spv::read::ReadOptions::new() + let item = pspp::spv::ReadOptions::new() .with_password(self.password) .open_file(&self.input)? .into_items(); @@ -101,7 +101,7 @@ impl ShowSpv { Ok(()) } Mode::View => { - let item = pspp::spv::read::ReadOptions::new() + let item = pspp::spv::ReadOptions::new() .with_password(self.password) .open_file(&self.input)? .into_items(); diff --git a/rust/pspp/src/lib.rs b/rust/pspp/src/lib.rs index ffb4044774..79fbb1295c 100644 --- a/rust/pspp/src/lib.rs +++ b/rust/pspp/src/lib.rs @@ -178,3 +178,44 @@ pub(crate) fn subslice_range(slice: &[T], subslice: &[T]) -> Option(range: R, bounds: std::ops::RangeTo) -> std::ops::Range +where + R: std::ops::RangeBounds, +{ + try_range(range, bounds).unwrap() +} + +/// This is [slice::try_range] copied out from the standard library so that we +/// can use it while it is still experimental. +#[allow(dead_code)] +pub(crate) fn try_range( + range: R, + bounds: std::ops::RangeTo, +) -> Option> +where + R: std::ops::RangeBounds, +{ + let len = bounds.end; + + let start = match range.start_bound() { + std::ops::Bound::Included(&start) => start, + std::ops::Bound::Excluded(start) => start.checked_add(1)?, + std::ops::Bound::Unbounded => 0, + }; + + let end = match range.end_bound() { + std::ops::Bound::Included(end) => end.checked_add(1)?, + std::ops::Bound::Excluded(&end) => end, + std::ops::Bound::Unbounded => len, + }; + + if start > end || end > len { + None + } else { + Some(std::ops::Range { start, end }) + } +} diff --git a/rust/pspp/src/output/drivers/cairo/driver.rs b/rust/pspp/src/output/drivers/cairo/driver.rs index 8b8b29089f..7a4933ae85 100644 --- a/rust/pspp/src/output/drivers/cairo/driver.rs +++ b/rust/pspp/src/output/drivers/cairo/driver.rs @@ -40,7 +40,7 @@ use crate::{ page::PageSetup, pivot::{Color, Coord2, FontStyle}, }, - spv::read::html::Variable, + spv::html::Variable, }; use crate::output::pivot::Axis2; diff --git a/rust/pspp/src/output/drivers/cairo/fsm.rs b/rust/pspp/src/output/drivers/cairo/fsm.rs index 1d92e12ec4..4f38fabc3f 100644 --- a/rust/pspp/src/output/drivers/cairo/fsm.rs +++ b/rust/pspp/src/output/drivers/cairo/fsm.rs @@ -32,7 +32,7 @@ use crate::output::render::{Device, Extreme, Pager, Params}; use crate::output::table::DrawCell; use crate::output::{Details, Item}; use crate::output::{pivot::Color, table::Content}; -use crate::spv::read::html::Markup; +use crate::spv::html::Markup; /// Width of an ordinary line. const LINE_WIDTH: isize = LINE_SPACE / 2; diff --git a/rust/pspp/src/output/drivers/cairo/pager.rs b/rust/pspp/src/output/drivers/cairo/pager.rs index 6811646c26..62f999d2b6 100644 --- a/rust/pspp/src/output/drivers/cairo/pager.rs +++ b/rust/pspp/src/output/drivers/cairo/pager.rs @@ -20,12 +20,18 @@ use cairo::{Context, RecordingSurface}; use enum_map::EnumMap; use pango::Layout; -use crate::{output::{ - drivers::cairo::{ - fsm::{CairoFsm, CairoFsmStyle}, - xr_to_pt, - }, pivot::{Axis2, CellStyle, FontStyle, Rect2, ValueOptions}, table::DrawCell, Item -}, spv::read::html::{Document, Variable}}; +use crate::{ + output::{ + Item, + drivers::cairo::{ + fsm::{CairoFsm, CairoFsmStyle}, + xr_to_pt, + }, + pivot::{Axis2, CellStyle, FontStyle, Rect2, value::ValueOptions}, + table::DrawCell, + }, + spv::html::{Document, Variable}, +}; #[derive(Clone, Debug)] pub struct CairoPageStyle { @@ -191,15 +197,15 @@ where let substitutions = &|variable| Some(Cow::from((self.substitutions)(variable, self.page_number))); - for paragraph in self.heading.to_values() { + for block in self.heading.to_values() { // XXX substitute heading variables let cell = DrawCell { rotate: false, - inner: ¶graph.inner, - cell_style: paragraph.cell_style().unwrap_or(&default_cell_style), - font_style: paragraph.font_style().unwrap_or(&default_font_style), - subscripts: paragraph.subscripts(), - footnotes: paragraph.footnotes(), + inner: &block.inner, + cell_style: block.cell_style().unwrap_or(&default_cell_style), + font_style: block.font_style().unwrap_or(&default_font_style), + subscripts: block.subscripts(), + footnotes: block.footnotes(), value_options: &value_options, substitutions, }; diff --git a/rust/pspp/src/output/drivers/spv.rs b/rust/pspp/src/output/drivers/spv.rs index a0bdd1d720..22dde37235 100644 --- a/rust/pspp/src/output/drivers/spv.rs +++ b/rust/pspp/src/output/drivers/spv.rs @@ -54,7 +54,7 @@ where } fn write(&mut self, item: &Arc) { - self.writer.write(item); + self.writer.write(item).unwrap(); } fn setup(&mut self, page_setup: &PageSetup) -> bool { diff --git a/rust/pspp/src/output/page.rs b/rust/pspp/src/output/page.rs index 611a70442a..3104f8ba16 100644 --- a/rust/pspp/src/output/page.rs +++ b/rust/pspp/src/output/page.rs @@ -20,7 +20,7 @@ use enum_map::{EnumMap, enum_map}; use paper_sizes::{Catalog, Length, PaperSize, Unit}; use serde::{Deserialize, Deserializer, Serialize, de::Error}; -use crate::spv::read::html::Document; +use crate::spv::html::Document; use super::pivot::Axis2; diff --git a/rust/pspp/src/output/pivot.rs b/rust/pspp/src/output/pivot.rs index 2b588301c4..1944ef3a26 100644 --- a/rust/pspp/src/output/pivot.rs +++ b/rust/pspp/src/output/pivot.rs @@ -44,9 +44,9 @@ use std::{ collections::HashMap, - fmt::{Debug, Display, Write}, + fmt::{Debug, Display}, io::Read, - iter::{FusedIterator, once, repeat, repeat_n}, + iter::{FusedIterator, once, repeat_n}, ops::{Index, IndexMut, Not, Range, RangeInclusive}, str::{FromStr, Utf8Error, from_utf8}, sync::{Arc, OnceLock}, @@ -62,27 +62,34 @@ use itertools::Itertools; pub use look_xml::{Length, TableProperties}; use quick_xml::{DeError, de::from_str}; use serde::{ - Deserialize, Serialize, Serializer, + Deserialize, Serialize, de::Visitor, ser::{SerializeMap, SerializeStruct}, }; use smallvec::SmallVec; use thiserror::Error as ThisError; -pub use tlo::parse_bool; use tlo::parse_tlo; use crate::{ - calendar::date_time_to_pspp, data::{ByteString, Datum, EncodedString}, format::{ - Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat, DATETIME40_0, F40, F40_2, F40_3, F8_2, PCT40_1 - }, settings::{Settings, Show}, spv::read::html::Markup, util::ToSmallString, variable::{VarType, Variable} + format::{Decimal, F40, F40_2, F40_3, Format, PCT40_1, Settings as FormatSettings}, + output::pivot::value::{ + BareValue, DisplayValue, IntoValueOptions, NumberValue, ValueInner, ValueOptions, + }, + settings::{Settings, Show}, + util::ToSmallString, + variable::{VarType, Variable}, }; +pub(crate) use tlo::parse_bool; -pub mod output; - +mod output; +pub use output::OutputTables; mod look_xml; +pub mod value; +pub use value::Value; +mod tlo; + #[cfg(test)] pub mod tests; -mod tlo; /// Areas of a pivot table for styling purposes. #[derive(Copy, Clone, Debug, Enum, PartialEq, Eq)] @@ -309,16 +316,22 @@ pub struct Sizing { pub keeps: Vec>, } +/// A 3-dimensional axis. #[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Sequence, Serialize)] #[serde(rename_all = "snake_case")] pub enum Axis3 { + /// X axis. X, + /// Y axis. Y, + /// Z axis. Z, } impl Axis3 { - fn transpose(&self) -> Option { + /// Transposes the X and Y axes. Returns `None` if this represents the Z + /// axis. + pub fn transpose(&self) -> Option { match self { Axis3::X => Some(Axis3::Y), Axis3::Y => Some(Axis3::X), @@ -1584,7 +1597,9 @@ impl Not for Axis2 { } } -/// Can't convert `Axis3::Z` to `Axis2`. +/// Error converting [Axis3::Z] to [Axis2]. +#[derive(Copy, Clone, Debug, PartialEq, Eq, thiserror::Error)] +#[error("Can't convert `Axis3::Z` to `Axis2`.")] pub struct ZAxis; impl TryFrom for Axis2 { @@ -1739,57 +1754,6 @@ pub enum FootnoteMarkerPosition { Superscript, } -#[derive(Copy, Clone, Debug)] -pub struct ValueOptions { - pub show_values: Option, - - pub show_variables: Option, - - pub small: f64, - - /// Where to put the footnote markers. - pub footnote_marker_type: FootnoteMarkerType, -} - -impl Default for ValueOptions { - fn default() -> Self { - Self { - show_values: None, - show_variables: None, - small: 0.0001, - footnote_marker_type: FootnoteMarkerType::default(), - } - } -} - -pub trait IntoValueOptions { - fn into_value_options(self) -> ValueOptions; -} - -impl IntoValueOptions for () { - fn into_value_options(self) -> ValueOptions { - ValueOptions::default() - } -} - -impl IntoValueOptions for &PivotTable { - fn into_value_options(self) -> ValueOptions { - self.value_options() - } -} - -impl IntoValueOptions for &ValueOptions { - fn into_value_options(self) -> ValueOptions { - *self - } -} - -impl IntoValueOptions for ValueOptions { - fn into_value_options(self) -> ValueOptions { - self - } -} - #[derive(Clone, Debug, Serialize)] pub struct PivotTableStyle { pub look: Arc, @@ -2371,913 +2335,6 @@ impl Display for Display26Adic { } } -/// The content of a single pivot table cell. -/// -/// A [Value] is also a pivot table's title, caption, footnote marker and -/// contents, and so on. -/// -/// A given [Value] is one of: -/// -/// 1. A number resulting from a calculation. -/// -/// A number has an associated display format (usually [F] or [Pct]). This -/// format can be set directly, but that is not usually the easiest way. -/// Instead, it is usually true that all of the values in a single category -/// should have the same format (e.g. all "Significance" values might use -/// format `F40.3`), so PSPP makes it easy to set the default format for a -/// category while creating the category. See pivot_dimension_create() for -/// more details. -/// -/// [F]: crate::format::Type::F -/// [Pct]: crate::format::Type::Pct -/// -/// 2. A numeric or string value obtained from data ([ValueInner::Number] or -/// [ValueInner::String]). If such a value corresponds to a variable, then the -/// variable's name can be attached to the pivot_value. If the value has a -/// value label, then that can also be attached. When a label is present, -/// the user can control whether to show the value or the label or both. -/// -/// 3. A variable name ([ValueInner::Variable]). The variable label, if any, can -/// be attached too, and again the user can control whether to show the value -/// or the label or both. -/// -/// 4. A text string ([ValueInner::Text). The value stores the string in English -/// and translated into the output language (localized). Use -/// pivot_value_new_text() or pivot_value_new_text_format() for those cases. -/// In some cases, only an English or a localized version is available for -/// one reason or another, although this is regrettable; in those cases, use -/// pivot_value_new_user_text() or pivot_value_new_user_text_nocopy(). -/// -/// 5. A template. PSPP doesn't create these itself yet, but it can read and -/// interpret those created by SPSS. -#[derive(Clone, Default, PartialEq)] -pub struct Value { - pub inner: ValueInner, - pub styling: Option>, -} - -impl Serialize for Value { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - self.inner.serialize(serializer) - } -} - -/// Wrapper for [Value] that uses [Value::serialize_bare] for serialization. -#[derive(Serialize)] -struct BareValue<'a>(#[serde(serialize_with = "Value::serialize_bare")] pub &'a Value); - -impl Value { - pub fn serialize_bare(&self, serializer: S) -> Result - where - S: Serializer, - { - match &self.inner { - ValueInner::Number(number_value) => number_value.serialize_bare(serializer), - ValueInner::String(string_value) => string_value.s.serialize(serializer), - ValueInner::Variable(variable_value) => variable_value.var_name.serialize(serializer), - ValueInner::Text(text_value) => text_value.localized.serialize(serializer), - ValueInner::Markup(markup) => markup.serialize(serializer), - ValueInner::Template(template_value) => template_value.localized.serialize(serializer), - ValueInner::Empty => serializer.serialize_none(), - } - } - - pub fn new(inner: ValueInner) -> Self { - Self { - inner, - styling: None, - } - } - pub fn new_date_time(date_time: NaiveDateTime) -> Self { - Self::new_number_with_format(Some(date_time_to_pspp(date_time)), DATETIME40_0) - } - pub fn new_number_with_format(x: Option, format: Format) -> Self { - Self::new(ValueInner::Number(NumberValue { - show: None, - format, - honor_small: false, - value: x, - variable: None, - value_label: None, - })) - } - pub fn new_variable(variable: &Variable) -> Self { - Self::new(ValueInner::Variable(VariableValue { - show: None, - var_name: String::from(variable.name.as_str()), - variable_label: variable.label.clone(), - })) - } - pub fn new_datum(value: &Datum) -> Self - where - B: EncodedString, - { - match value { - Datum::Number(number) => Self::new_number(*number), - Datum::String(string) => Self::new_user_text(string.as_str()), - } - } - pub fn new_datum_with_format(value: &Datum, format: Format) -> Self - where - B: EncodedString, - { - match value { - Datum::Number(number) => Self::new(ValueInner::Number(NumberValue { - show: None, - format: match format.var_type() { - VarType::Numeric => format, - VarType::String => { - #[cfg(debug_assertions)] - panic!("cannot create numeric pivot value with string format"); - - #[cfg(not(debug_assertions))] - F8_2 - } - }, - honor_small: false, - value: *number, - variable: None, - value_label: None, - })), - Datum::String(string) => Self::new(ValueInner::String(StringValue { - show: None, - hex: format.type_() == Type::AHex, - s: string.as_str().into_owned(), - var_name: None, - value_label: None, - })), - } - } - pub fn new_variable_value(variable: &Variable, value: &Datum) -> Self { - Self::new_datum_with_format( - &value.as_encoded(variable.encoding()), - variable.print_format, - ) - .with_variable_name(Some(variable.name.as_str().into())) - .with_value_label(variable.value_labels.get(value).map(String::from)) - } - pub fn new_number(x: Option) -> Self { - Self::new_number_with_format(x, F8_2) - } - pub fn new_integer(x: Option) -> Self { - Self::new_number_with_format(x, F40) - } - pub fn new_text(s: impl Into) -> Self { - Self::new_user_text(s) - } - pub fn new_general_text(localized: String, c: String, id: String, user_provided: bool) -> Self { - Self::new(ValueInner::Text(TextValue { - user_provided, - c: (c != localized).then_some(c), - id: (id != localized).then_some(id), - localized, - })) - } - pub fn new_markup(markup: Markup) -> Self { - Self::new(ValueInner::Markup(markup)) - } - pub fn new_user_text(s: impl Into) -> Self { - let s: String = s.into(); - if s.is_empty() { - Self::default() - } else { - Self::new(ValueInner::Text(TextValue { - user_provided: true, - localized: s, - c: None, - id: None, - })) - } - } - pub fn with_footnote(mut self, footnote: &Arc) -> Self { - self.add_footnote(footnote); - self - } - pub fn add_footnote(&mut self, footnote: &Arc) { - let footnotes = &mut self.styling_mut().footnotes; - footnotes.push(footnote.clone()); - footnotes.sort_by_key(|f| f.index); - } - pub fn with_show_value_label(mut self, show: Option) -> Self { - let new_show = show; - match &mut self.inner { - ValueInner::Number(NumberValue { show, .. }) - | ValueInner::String(StringValue { show, .. }) => { - *show = new_show; - } - _ => (), - } - self - } - pub fn with_show_variable_label(mut self, show: Option) -> Self { - if let ValueInner::Variable(variable_value) = &mut self.inner { - variable_value.show = show; - } - self - } - pub fn with_value_label(mut self, label: Option) -> Self { - match &mut self.inner { - ValueInner::Number(NumberValue { value_label, .. }) - | ValueInner::String(StringValue { value_label, .. }) => *value_label = label.clone(), - _ => (), - } - self - } - pub fn with_variable_name(mut self, variable_name: Option) -> Self { - match &mut self.inner { - ValueInner::Number(NumberValue { variable, .. }) - | ValueInner::String(StringValue { - var_name: variable, .. - }) => *variable = variable_name, - ValueInner::Variable(VariableValue { - var_name: variable, .. - }) => { - if let Some(name) = variable_name { - *variable = name; - } - } - _ => (), - } - self - } - pub fn styling_mut(&mut self) -> &mut ValueStyle { - self.styling.get_or_insert_default() - } - pub fn with_font_style(mut self, font_style: FontStyle) -> Self { - self.styling_mut().font_style = Some(font_style); - self - } - pub fn with_cell_style(mut self, cell_style: CellStyle) -> Self { - self.styling_mut().cell_style = Some(cell_style); - self - } - pub fn with_styling(self, styling: Option>) -> Self { - Self { styling, ..self } - } - pub fn font_style(&self) -> Option<&FontStyle> { - self.styling - .as_ref() - .map(|styling| styling.font_style.as_ref()) - .flatten() - } - pub fn cell_style(&self) -> Option<&CellStyle> { - self.styling - .as_ref() - .map(|styling| styling.cell_style.as_ref()) - .flatten() - } - pub fn subscripts(&self) -> &[String] { - self.styling - .as_ref() - .map_or(&[], |styling| &styling.subscripts) - } - pub fn footnotes(&self) -> &[Arc] { - self.styling - .as_ref() - .map_or(&[], |styling| &styling.footnotes) - } - pub const fn empty() -> Self { - Value { - inner: ValueInner::Empty, - styling: None, - } - } - pub const fn is_empty(&self) -> bool { - self.inner.is_empty() && self.styling.is_none() - } -} - -impl From<&str> for Value { - fn from(value: &str) -> Self { - Self::new_text(value) - } -} - -impl From for Value { - fn from(value: String) -> Self { - Self::new_text(value) - } -} - -impl From<&Variable> for Value { - fn from(variable: &Variable) -> Self { - Self::new_variable(variable) - } -} - -pub struct DisplayValue<'a> { - inner: &'a ValueInner, - subscripts: &'a [String], - footnotes: &'a [Arc], - options: ValueOptions, - show_value: bool, - show_label: Option<&'a str>, -} - -impl<'a> DisplayValue<'a> { - pub fn subscripts(&self) -> impl Iterator { - self.subscripts.iter().map(String::as_str) - } - - pub fn has_subscripts(&self) -> bool { - !self.subscripts.is_empty() - } - - pub fn footnotes(&self) -> impl Iterator> { - self.footnotes - .iter() - .filter(|f| f.show) - .map(|f| f.display_marker(self.options)) - } - - pub fn has_footnotes(&self) -> bool { - self.footnotes().next().is_some() - } - - pub fn without_suffixes(self) -> Self { - Self { - subscripts: &[], - footnotes: &[], - ..self - } - } - - pub fn markup(&self) -> Option<&Markup> { - self.inner.markup() - } - - /// Returns this display split into `(body, suffixes)` where `suffixes` is - /// subscripts and footnotes and `body` is everything else. - pub fn split_suffixes(self) -> (Self, Self) { - let suffixes = Self { - inner: &ValueInner::Empty, - ..self - }; - (self.without_suffixes(), suffixes) - } - - pub fn with_styling(mut self, styling: &'a ValueStyle) -> Self { - self.subscripts = styling.subscripts.as_slice(); - self.footnotes = styling.footnotes.as_slice(); - self - } - - pub fn with_subscripts(self, subscripts: &'a [String]) -> Self { - Self { subscripts, ..self } - } - - pub fn with_footnotes(self, footnotes: &'a [Arc]) -> Self { - Self { footnotes, ..self } - } - - pub fn is_empty(&self) -> bool { - self.inner.is_empty() && self.subscripts.is_empty() && self.footnotes.is_empty() - } - - fn small(&self) -> f64 { - self.options.small - } - - pub fn var_type(&self) -> VarType { - match self.inner { - ValueInner::Number(NumberValue { .. }) if self.show_label.is_none() => VarType::Numeric, - _ => VarType::String, - } - } -} - -impl Display for DisplayValue<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.inner { - ValueInner::Number(number_value) => number_value.display(self, f), - - ValueInner::String(StringValue { s, .. }) - | ValueInner::Variable(VariableValue { var_name: s, .. }) => { - match (self.show_value, self.show_label) { - (true, None) => write!(f, "{s}"), - (false, Some(label)) => write!(f, "{label}"), - (true, Some(label)) => write!(f, "{s} {label}"), - (false, None) => unreachable!(), - } - } - - ValueInner::Markup(markup) => write!(f, "{markup}"), - - ValueInner::Text(text_value) => write!(f, "{text_value}"), - - ValueInner::Template(template_value) => template_value.display(self, f), - - ValueInner::Empty => Ok(()), - }?; - - for (subscript, delimiter) in self.subscripts.iter().zip(once('_').chain(repeat(','))) { - write!(f, "{delimiter}{subscript}")?; - } - - for footnote in self.footnotes { - write!(f, "[{}]", footnote.display_marker(self.options))?; - } - - Ok(()) - } -} - -impl Value { - // Returns an object that will format this value, including subscripts and - // superscripts and footnotes. `options` controls whether variable and - // value labels are included. - pub fn display(&self, options: impl IntoValueOptions) -> DisplayValue<'_> { - let display = self.inner.display(options.into_value_options()); - match &self.styling { - Some(styling) => display.with_styling(styling), - None => display, - } - } -} - -impl Debug for Value { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let name = match &self.inner { - ValueInner::Number(_) => "Number", - ValueInner::String(_) => "String", - ValueInner::Variable(_) => "Variable", - ValueInner::Text(_) => "Text", - ValueInner::Markup(_) => "Markup", - ValueInner::Template(_) => "Template", - ValueInner::Empty => "Empty", - }; - write!(f, "{name}:{:?}", self.display(()).to_string())?; - if let Some(markup) = self.inner.markup() { - write!(f, " (markup: {markup:?})")?; - } - if let Some(styling) = &self.styling { - write!(f, " ({styling:?})")?; - } - Ok(()) - } -} - -/// A numeric value and how to display it. -#[derive(Clone, Debug, PartialEq)] -pub struct NumberValue { - /// The numerical value, or `None` if it is a missing value. - pub value: Option, - - /// The display format (usually [F] or [Pct]). - /// - /// [F]: crate::format::Type::F - /// [Pct]: crate::format::Type::Pct - pub format: Format, - - /// Whether to show `value` or `value_label` or both. - /// - /// If this is unset, then a higher-level default is used. - pub show: Option, - - /// If true, then numbers smaller than a threshold will be displayed in - /// scientific notation. Otherwise, all numbers will be displayed with - /// `format`. - pub honor_small: bool, - - /// The name of the variable that `value` came from, if any. - pub variable: Option, - - /// The value label associated with `value`, if any. - pub value_label: Option, -} - -impl Serialize for NumberValue { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - if self.format.type_() == Type::F && self.variable.is_none() && self.value_label.is_none() { - self.value.serialize(serializer) - } else { - let mut s = serializer.serialize_map(None)?; - s.serialize_entry("value", &self.value)?; - s.serialize_entry("format", &self.format)?; - if let Some(show) = self.show { - s.serialize_entry("show", &show)?; - } - if self.honor_small { - s.serialize_entry("honor_small", &self.honor_small)?; - } - if let Some(variable) = &self.variable { - s.serialize_entry("variable", variable)?; - } - if let Some(value_label) = &self.value_label { - s.serialize_entry("value_label", value_label)?; - } - s.end() - } - } -} - -impl NumberValue { - pub fn display<'a>( - &self, - display: &DisplayValue<'a>, - f: &mut std::fmt::Formatter<'_>, - ) -> std::fmt::Result { - if display.show_value { - let format = if self.format.type_() == Type::F - && self.honor_small - && let Some(value) = self.value - && value != 0.0 - && value.abs() < display.small() - { - UncheckedFormat::new(Type::E, 40, self.format.d() as u8).fix() - } else { - self.format - }; - Datum::<&str>::Number(self.value) - .display(format) - .without_leading_spaces() - .fmt(f)?; - } - if let Some(label) = display.show_label { - if display.show_value { - write!(f, " ")?; - } - f.write_str(label)?; - } - Ok(()) - } - - pub fn serialize_bare(&self, serializer: S) -> Result - where - S: Serializer, - { - if let Some(number) = self.value - && number.trunc() == number - && number >= -(1i64 << 53) as f64 - && number <= (1i64 << 53) as f64 - { - (number as u64).serialize(serializer) - } else { - self.value.serialize(serializer) - } - } -} - -#[derive(Serialize)] -pub struct BareNumberValue<'a>( - #[serde(serialize_with = "NumberValue::serialize_bare")] pub &'a NumberValue, -); - -/// A string value and how to display it. -#[derive(Clone, Debug, Serialize, PartialEq)] -pub struct StringValue { - /// The string value. - /// - /// If `hex` is true, this should contain hex digits, not raw binary data - /// (otherwise it would be impossible to encode non-UTF-8 data). - pub s: String, - - /// True if `s` is hex digits. - pub hex: bool, - - /// Whether to show `s` or `value_label` or both. - /// - /// If this is unset, then a higher-level default is used. - pub show: Option, - - /// The name of the variable that `s` came from, if any. - pub var_name: Option, - - /// The value label associated with `s`, if any. - pub value_label: Option, -} - -#[derive(Clone, Debug, Serialize, PartialEq)] -pub struct VariableValue { - /// Whether to show `var_name` or `variable_label` or both. - /// - /// If this is unset, then a higher-level default is used. - pub show: Option, - pub var_name: String, - pub variable_label: Option, -} - -/// A text string. -/// -/// Whereas a [StringValue] is usually related to data, a `TextValue` is used -/// for other text within a table, such as a title, a column or row heading, or -/// a footnote. -#[derive(Clone, Debug, PartialEq)] -pub struct TextValue { - /// Whether the text came from the user. - /// - /// PSPP can localize text that it writes itself, but not text provided by - /// the user. - pub user_provided: bool, - - /// Localized. - /// - /// This is the main output string. - pub localized: String, - - /// English version of the string. - /// - /// Only for strings that are not user-provided, and only if it is different - /// from `localized`. - pub c: Option, - - /// Identifier. - /// - /// Only for strings that are not user-provided, and only if it is different - /// from `localized`. - pub id: Option, -} - -impl Serialize for TextValue { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - if self.user_provided && self.c.is_none() && self.id.is_none() { - serializer.serialize_str(&self.localized) - } else { - let mut s = serializer.serialize_struct( - "TextValue", - 2 + self.c.is_some() as usize + self.id.is_some() as usize, - )?; - s.serialize_field("user_provided", &self.user_provided)?; - s.serialize_field("localized", &self.localized)?; - if let Some(c) = &self.c { - s.serialize_field("c", &c)?; - } - if let Some(id) = &self.id { - s.serialize_field("id", &id)?; - } - s.end() - } - } -} - -impl Display for TextValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(&self.localized) - } -} - -impl TextValue { - pub fn localized(&self) -> &str { - self.localized.as_str() - } - pub fn c(&self) -> &str { - self.c.as_ref().unwrap_or(&self.localized).as_str() - } - pub fn id(&self) -> &str { - self.id.as_ref().unwrap_or(&self.localized).as_str() - } -} - -#[derive(Clone, Debug, Serialize, PartialEq)] -pub struct TemplateValue { - pub args: Vec>, - pub localized: String, - pub id: Option, -} - -impl TemplateValue { - fn display<'a>( - &self, - display: &DisplayValue<'a>, - f: &mut std::fmt::Formatter<'_>, - ) -> std::fmt::Result { - fn extract_inner_template(input: &str) -> (&str, &str) { - let mut prev = None; - for (index, c) in input.char_indices() { - if c == ':' && prev != Some('\\') { - return (&input[..index], &input[index + 1..]); - } - prev = Some(c); - } - (input, "") - } - - let mut iter = self.localized.chars(); - while let Some(c) = iter.next() { - match c { - '\\' => { - let c = match iter.next() { - None => '\\', - Some('n') => '\n', - Some(c) => c, - }; - f.write_char(c)?; - } - '^' => { - let (index, rest) = Self::consume_int(iter.as_str()); - if let Some(index) = index.checked_sub(1) - && let Some(arg) = self.args.get(index) - && let Some(arg) = arg.first() - { - arg.display(display.options).fmt(f)?; - } - iter = rest.chars(); - } - '[' => { - let (a, rest) = extract_inner_template(iter.as_str()); - let (b, rest) = extract_inner_template(rest); - let rest = rest.strip_prefix("]").unwrap_or(rest); - let (index, rest) = Self::consume_int(rest); - iter = rest.chars(); - - if let Some(index) = index.checked_sub(1) - && let Some(args) = self.args.get(index) - { - let mut args = args.as_slice(); - let (mut template, mut escape) = - if !a.is_empty() { (a, '%') } else { (b, '^') }; - while !args.is_empty() - && let n_consumed = - self.inner_template(display, f, template, escape, args)? - && n_consumed > 0 - { - args = &args[n_consumed..]; - template = b; - escape = '^'; - } - } - } - c => f.write_char(c)?, - } - } - Ok(()) - } - - fn inner_template<'a>( - &self, - display: &DisplayValue<'a>, - f: &mut std::fmt::Formatter<'_>, - template: &str, - escape: char, - args: &[Value], - ) -> Result { - let mut iter = template.chars(); - let mut args_consumed = 0; - while let Some(c) = iter.next() { - match c { - '\\' => { - let c = iter.next().unwrap_or('\\') as char; - let c = if c == 'n' { '\n' } else { c }; - write!(f, "{c}")?; - } - c if c == escape => { - let (index, rest) = Self::consume_int(iter.as_str()); - iter = rest.chars(); - if let Some(index) = index.checked_sub(1) - && let Some(arg) = args.get(index) - { - args_consumed = args_consumed.max(index + 1); - write!(f, "{}", arg.display(display.options))?; - } - } - c => write!(f, "{c}")?, - } - } - Ok(args_consumed) - } - - fn consume_int(input: &str) -> (usize, &str) { - let mut n = 0; - for (index, c) in input.char_indices() { - match c.to_digit(10) { - Some(digit) => n = n * 10 + digit as usize, - None => return (n, &input[index..]), - } - } - (n, "") - } -} - -#[derive(Clone, Debug, Default, Serialize, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum ValueInner { - Number(NumberValue), - String(StringValue), - Variable(VariableValue), - Text(TextValue), - Markup(Markup), - Template(TemplateValue), - - #[default] - Empty, -} - -impl ValueInner { - pub const fn is_empty(&self) -> bool { - matches!(self, Self::Empty) - } - fn show(&self) -> Option { - match self { - ValueInner::Number(NumberValue { show, .. }) - | ValueInner::String(StringValue { show, .. }) - | ValueInner::Variable(VariableValue { show, .. }) => *show, - _ => None, - } - } - - fn label(&self) -> Option<&str> { - self.value_label().or_else(|| self.variable_label()) - } - - fn value_label(&self) -> Option<&str> { - match self { - ValueInner::Number(NumberValue { value_label, .. }) - | ValueInner::String(StringValue { value_label, .. }) => { - value_label.as_ref().map(String::as_str) - } - _ => None, - } - } - - fn variable_label(&self) -> Option<&str> { - match self { - ValueInner::Variable(VariableValue { variable_label, .. }) => { - variable_label.as_ref().map(String::as_str) - } - _ => None, - } - } - - fn markup(&self) -> Option<&Markup> { - match self { - ValueInner::Markup(markup) => Some(markup), - _ => None, - } - } -} - -#[derive(Clone, Debug, Default, PartialEq)] -pub struct ValueStyle { - pub cell_style: Option, - pub font_style: Option, - pub subscripts: Vec, - pub footnotes: Vec>, -} - -impl ValueStyle { - pub fn is_empty(&self) -> bool { - self.font_style.is_none() - && self.cell_style.is_none() - && self.subscripts.is_empty() - && self.footnotes.is_empty() - } -} - -impl ValueInner { - // Returns an object that will format this value. Settings on `options` - // control whether variable and value labels are included. - pub fn display(&self, options: impl IntoValueOptions) -> DisplayValue<'_> { - fn interpret_show( - global_show: impl Fn() -> Show, - table_show: Option, - value_show: Option, - label: &str, - ) -> (bool, Option<&str>) { - match value_show.or(table_show).unwrap_or_else(global_show) { - Show::Value => (true, None), - Show::Label => (false, Some(label)), - Show::Both => (true, Some(label)), - } - } - - let options = options.into_value_options(); - let (show_value, show_label) = if let Some(value_label) = self.value_label() { - interpret_show( - || Settings::global().show_values, - options.show_values, - self.show(), - value_label, - ) - } else if let Some(variable_label) = self.variable_label() { - interpret_show( - || Settings::global().show_variables, - options.show_variables, - self.show(), - variable_label, - ) - } else { - (true, None) - }; - DisplayValue { - inner: self, - subscripts: &[], - footnotes: &[], - options, - show_value, - show_label, - } - } -} - pub struct MetadataEntry { pub name: Value, pub value: MetadataValue, @@ -3374,8 +2431,9 @@ mod test { use std::str::FromStr; use crate::output::pivot::{ - Color, Display26Adic, MetadataEntry, MetadataValue, TemplateValue, Value, ValueInner, + Color, Display26Adic, MetadataEntry, MetadataValue, Value, tests::assert_rendering, + value::{TemplateValue, ValueInner}, }; #[test] diff --git a/rust/pspp/src/output/pivot/output.rs b/rust/pspp/src/output/pivot/output.rs index dad7607e02..33bfc4c6b7 100644 --- a/rust/pspp/src/output/pivot/output.rs +++ b/rust/pspp/src/output/pivot/output.rs @@ -25,8 +25,8 @@ use crate::output::{ }; use super::{ - Area, Axis2, Axis3, Border, BorderStyle, BoxBorder, Color, Dimension, Footnote, - IntoValueOptions, PivotTable, RowColBorder, Stroke, Value, + Area, Axis2, Axis3, Border, BorderStyle, BoxBorder, Color, Dimension, Footnote, PivotTable, + RowColBorder, Stroke, Value, value::IntoValueOptions, }; /// All of the combinations of dimensions along an axis. @@ -662,31 +662,6 @@ impl<'a> Headings<'a> { } } -pub fn try_range(range: R, bounds: std::ops::RangeTo) -> Option> -where - R: std::ops::RangeBounds, -{ - let len = bounds.end; - - let start = match range.start_bound() { - std::ops::Bound::Included(&start) => start, - std::ops::Bound::Excluded(start) => start.checked_add(1)?, - std::ops::Bound::Unbounded => 0, - }; - - let end = match range.end_bound() { - std::ops::Bound::Included(end) => end.checked_add(1)?, - std::ops::Bound::Excluded(&end) => end, - std::ops::Bound::Unbounded => len, - }; - - if start > end || end > len { - None - } else { - Some(std::ops::Range { start, end }) - } -} - fn resolve_border_style( border: Border, borders: &EnumMap, diff --git a/rust/pspp/src/output/pivot/value.rs b/rust/pspp/src/output/pivot/value.rs new file mode 100644 index 0000000000..6d1a87cba8 --- /dev/null +++ b/rust/pspp/src/output/pivot/value.rs @@ -0,0 +1,1005 @@ +use std::{ + fmt::{Debug, Display, Write}, + iter::{once, repeat}, + sync::Arc, +}; + +use chrono::NaiveDateTime; +use serde::{ + Serialize, Serializer, + ser::{SerializeMap, SerializeStruct}, +}; + +use crate::{ + calendar::date_time_to_pspp, + data::{Datum, EncodedString}, + format::{DATETIME40_0, F8_2, F40, Format, Type, UncheckedFormat}, + output::pivot::{ + CellStyle, DisplayMarker, FontStyle, Footnote, FootnoteMarkerType, PivotTable, + }, + settings::{Settings, Show}, + spv::html::Markup, + variable::{VarType, Variable}, +}; + +/// The content of a single pivot table cell. +/// +/// A [Value] is also a pivot table's title, caption, footnote marker and +/// contents, and so on. +#[derive(Clone, Default, PartialEq)] +pub struct Value { + /// Content. + pub inner: ValueInner, + + /// Optional styling. + pub styling: Option>, +} + +impl Serialize for Value { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.inner.serialize(serializer) + } +} + +/// Wrapper for [Value] that uses [Value::serialize_bare] for serialization. +#[derive(Serialize)] +pub struct BareValue<'a>(#[serde(serialize_with = "Value::serialize_bare")] pub &'a Value); + +impl Value { + pub fn serialize_bare(&self, serializer: S) -> Result + where + S: Serializer, + { + match &self.inner { + ValueInner::Number(number_value) => number_value.serialize_bare(serializer), + ValueInner::String(string_value) => string_value.s.serialize(serializer), + ValueInner::Variable(variable_value) => variable_value.var_name.serialize(serializer), + ValueInner::Text(text_value) => text_value.localized.serialize(serializer), + ValueInner::Markup(markup) => markup.serialize(serializer), + ValueInner::Template(template_value) => template_value.localized.serialize(serializer), + ValueInner::Empty => serializer.serialize_none(), + } + } + + pub fn new(inner: ValueInner) -> Self { + Self { + inner, + styling: None, + } + } + pub fn new_date_time(date_time: NaiveDateTime) -> Self { + Self::new_number_with_format(Some(date_time_to_pspp(date_time)), DATETIME40_0) + } + pub fn new_number_with_format(x: Option, format: Format) -> Self { + Self::new(ValueInner::Number(NumberValue { + show: None, + format, + honor_small: false, + value: x, + variable: None, + value_label: None, + })) + } + pub fn new_variable(variable: &Variable) -> Self { + Self::new(ValueInner::Variable(VariableValue { + show: None, + var_name: String::from(variable.name.as_str()), + variable_label: variable.label.clone(), + })) + } + pub fn new_datum(value: &Datum) -> Self + where + B: EncodedString, + { + match value { + Datum::Number(number) => Self::new_number(*number), + Datum::String(string) => Self::new_user_text(string.as_str()), + } + } + pub fn new_datum_with_format(value: &Datum, format: Format) -> Self + where + B: EncodedString, + { + match value { + Datum::Number(number) => Self::new(ValueInner::Number(NumberValue { + show: None, + format: match format.var_type() { + VarType::Numeric => format, + VarType::String => { + #[cfg(debug_assertions)] + panic!("cannot create numeric pivot value with string format"); + + #[cfg(not(debug_assertions))] + F8_2 + } + }, + honor_small: false, + value: *number, + variable: None, + value_label: None, + })), + Datum::String(string) => Self::new(ValueInner::String(StringValue { + show: None, + hex: format.type_() == Type::AHex, + s: string.as_str().into_owned(), + var_name: None, + value_label: None, + })), + } + } + pub fn new_variable_value(variable: &Variable, value: &Datum) -> Self { + Self::new_datum_with_format( + &value.as_encoded(variable.encoding()), + variable.print_format, + ) + .with_variable_name(Some(variable.name.as_str().into())) + .with_value_label(variable.value_labels.get(value).map(String::from)) + } + pub fn new_number(x: Option) -> Self { + Self::new_number_with_format(x, F8_2) + } + pub fn new_integer(x: Option) -> Self { + Self::new_number_with_format(x, F40) + } + pub fn new_text(s: impl Into) -> Self { + Self::new_user_text(s) + } + pub fn new_general_text(localized: String, c: String, id: String, user_provided: bool) -> Self { + Self::new(ValueInner::Text(TextValue { + user_provided, + c: (c != localized).then_some(c), + id: (id != localized).then_some(id), + localized, + })) + } + pub fn new_markup(markup: Markup) -> Self { + Self::new(ValueInner::Markup(markup)) + } + pub fn new_user_text(s: impl Into) -> Self { + let s: String = s.into(); + if s.is_empty() { + Self::default() + } else { + Self::new(ValueInner::Text(TextValue { + user_provided: true, + localized: s, + c: None, + id: None, + })) + } + } + pub fn with_footnote(mut self, footnote: &Arc) -> Self { + self.add_footnote(footnote); + self + } + pub fn add_footnote(&mut self, footnote: &Arc) { + let footnotes = &mut self.styling_mut().footnotes; + footnotes.push(footnote.clone()); + footnotes.sort_by_key(|f| f.index); + } + pub fn with_show_value_label(mut self, show: Option) -> Self { + let new_show = show; + match &mut self.inner { + ValueInner::Number(NumberValue { show, .. }) + | ValueInner::String(StringValue { show, .. }) => { + *show = new_show; + } + _ => (), + } + self + } + pub fn with_show_variable_label(mut self, show: Option) -> Self { + if let ValueInner::Variable(variable_value) = &mut self.inner { + variable_value.show = show; + } + self + } + pub fn with_value_label(mut self, label: Option) -> Self { + match &mut self.inner { + ValueInner::Number(NumberValue { value_label, .. }) + | ValueInner::String(StringValue { value_label, .. }) => *value_label = label.clone(), + _ => (), + } + self + } + pub fn with_variable_name(mut self, variable_name: Option) -> Self { + match &mut self.inner { + ValueInner::Number(NumberValue { variable, .. }) + | ValueInner::String(StringValue { + var_name: variable, .. + }) => *variable = variable_name, + ValueInner::Variable(VariableValue { + var_name: variable, .. + }) => { + if let Some(name) = variable_name { + *variable = name; + } + } + _ => (), + } + self + } + pub fn styling_mut(&mut self) -> &mut ValueStyle { + self.styling.get_or_insert_default() + } + pub fn with_font_style(mut self, font_style: FontStyle) -> Self { + self.styling_mut().font_style = Some(font_style); + self + } + pub fn with_cell_style(mut self, cell_style: CellStyle) -> Self { + self.styling_mut().cell_style = Some(cell_style); + self + } + pub fn with_styling(self, styling: Option>) -> Self { + Self { styling, ..self } + } + pub fn font_style(&self) -> Option<&FontStyle> { + self.styling + .as_ref() + .map(|styling| styling.font_style.as_ref()) + .flatten() + } + pub fn cell_style(&self) -> Option<&CellStyle> { + self.styling + .as_ref() + .map(|styling| styling.cell_style.as_ref()) + .flatten() + } + pub fn subscripts(&self) -> &[String] { + self.styling + .as_ref() + .map_or(&[], |styling| &styling.subscripts) + } + pub fn footnotes(&self) -> &[Arc] { + self.styling + .as_ref() + .map_or(&[], |styling| &styling.footnotes) + } + pub const fn empty() -> Self { + Value { + inner: ValueInner::Empty, + styling: None, + } + } + pub const fn is_empty(&self) -> bool { + self.inner.is_empty() && self.styling.is_none() + } +} + +impl From<&str> for Value { + fn from(value: &str) -> Self { + Self::new_text(value) + } +} + +impl From for Value { + fn from(value: String) -> Self { + Self::new_text(value) + } +} + +impl From<&Variable> for Value { + fn from(variable: &Variable) -> Self { + Self::new_variable(variable) + } +} + +pub struct DisplayValue<'a> { + inner: &'a ValueInner, + subscripts: &'a [String], + footnotes: &'a [Arc], + options: ValueOptions, + show_value: bool, + show_label: Option<&'a str>, +} + +impl<'a> DisplayValue<'a> { + pub fn subscripts(&self) -> impl Iterator { + self.subscripts.iter().map(String::as_str) + } + + pub fn has_subscripts(&self) -> bool { + !self.subscripts.is_empty() + } + + pub fn footnotes(&self) -> impl Iterator> { + self.footnotes + .iter() + .filter(|f| f.show) + .map(|f| f.display_marker(self.options)) + } + + pub fn has_footnotes(&self) -> bool { + self.footnotes().next().is_some() + } + + pub fn without_suffixes(self) -> Self { + Self { + subscripts: &[], + footnotes: &[], + ..self + } + } + + pub fn markup(&self) -> Option<&Markup> { + self.inner.markup() + } + + /// Returns this display split into `(body, suffixes)` where `suffixes` is + /// subscripts and footnotes and `body` is everything else. + pub fn split_suffixes(self) -> (Self, Self) { + let suffixes = Self { + inner: &ValueInner::Empty, + ..self + }; + (self.without_suffixes(), suffixes) + } + + pub fn with_styling(mut self, styling: &'a ValueStyle) -> Self { + self.subscripts = styling.subscripts.as_slice(); + self.footnotes = styling.footnotes.as_slice(); + self + } + + pub fn with_subscripts(self, subscripts: &'a [String]) -> Self { + Self { subscripts, ..self } + } + + pub fn with_footnotes(self, footnotes: &'a [Arc]) -> Self { + Self { footnotes, ..self } + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() && self.subscripts.is_empty() && self.footnotes.is_empty() + } + + fn small(&self) -> f64 { + self.options.small + } + + pub fn var_type(&self) -> VarType { + match self.inner { + ValueInner::Number(NumberValue { .. }) if self.show_label.is_none() => VarType::Numeric, + _ => VarType::String, + } + } +} + +impl Display for DisplayValue<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.inner { + ValueInner::Number(number_value) => number_value.display(self, f), + + ValueInner::String(StringValue { s, .. }) + | ValueInner::Variable(VariableValue { var_name: s, .. }) => { + match (self.show_value, self.show_label) { + (true, None) => write!(f, "{s}"), + (false, Some(label)) => write!(f, "{label}"), + (true, Some(label)) => write!(f, "{s} {label}"), + (false, None) => unreachable!(), + } + } + + ValueInner::Markup(markup) => write!(f, "{markup}"), + + ValueInner::Text(text_value) => write!(f, "{text_value}"), + + ValueInner::Template(template_value) => template_value.display(self, f), + + ValueInner::Empty => Ok(()), + }?; + + for (subscript, delimiter) in self.subscripts.iter().zip(once('_').chain(repeat(','))) { + write!(f, "{delimiter}{subscript}")?; + } + + for footnote in self.footnotes { + write!(f, "[{}]", footnote.display_marker(self.options))?; + } + + Ok(()) + } +} + +impl Value { + // Returns an object that will format this value, including subscripts and + // superscripts and footnotes. `options` controls whether variable and + // value labels are included. + pub fn display(&self, options: impl IntoValueOptions) -> DisplayValue<'_> { + let display = self.inner.display(options.into_value_options()); + match &self.styling { + Some(styling) => display.with_styling(styling), + None => display, + } + } +} + +impl Debug for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let name = match &self.inner { + ValueInner::Number(_) => "Number", + ValueInner::String(_) => "String", + ValueInner::Variable(_) => "Variable", + ValueInner::Text(_) => "Text", + ValueInner::Markup(_) => "Markup", + ValueInner::Template(_) => "Template", + ValueInner::Empty => "Empty", + }; + write!(f, "{name}:{:?}", self.display(()).to_string())?; + if let Some(markup) = self.inner.markup() { + write!(f, " (markup: {markup:?})")?; + } + if let Some(styling) = &self.styling { + write!(f, " ({styling:?})")?; + } + Ok(()) + } +} + +/// A numeric value and how to display it. +#[derive(Clone, Debug, PartialEq)] +pub struct NumberValue { + /// The numerical value, or `None` if it is a missing value. + pub value: Option, + + /// The display format (usually [F] or [Pct]). + /// + /// [F]: crate::format::Type::F + /// [Pct]: crate::format::Type::Pct + pub format: Format, + + /// Whether to show `value` or `value_label` or both. + /// + /// If this is unset, then a higher-level default is used. + pub show: Option, + + /// If true, then numbers smaller than a threshold will be displayed in + /// scientific notation. Otherwise, all numbers will be displayed with + /// `format`. + pub honor_small: bool, + + /// The name of the variable that `value` came from, if any. + pub variable: Option, + + /// The value label associated with `value`, if any. + pub value_label: Option, +} + +impl Serialize for NumberValue { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + if self.format.type_() == Type::F && self.variable.is_none() && self.value_label.is_none() { + self.value.serialize(serializer) + } else { + let mut s = serializer.serialize_map(None)?; + s.serialize_entry("value", &self.value)?; + s.serialize_entry("format", &self.format)?; + if let Some(show) = self.show { + s.serialize_entry("show", &show)?; + } + if self.honor_small { + s.serialize_entry("honor_small", &self.honor_small)?; + } + if let Some(variable) = &self.variable { + s.serialize_entry("variable", variable)?; + } + if let Some(value_label) = &self.value_label { + s.serialize_entry("value_label", value_label)?; + } + s.end() + } + } +} + +impl NumberValue { + pub fn display<'a>( + &self, + display: &DisplayValue<'a>, + f: &mut std::fmt::Formatter<'_>, + ) -> std::fmt::Result { + if display.show_value { + let format = if self.format.type_() == Type::F + && self.honor_small + && let Some(value) = self.value + && value != 0.0 + && value.abs() < display.small() + { + UncheckedFormat::new(Type::E, 40, self.format.d() as u8).fix() + } else { + self.format + }; + Datum::<&str>::Number(self.value) + .display(format) + .without_leading_spaces() + .fmt(f)?; + } + if let Some(label) = display.show_label { + if display.show_value { + write!(f, " ")?; + } + f.write_str(label)?; + } + Ok(()) + } + + pub fn serialize_bare(&self, serializer: S) -> Result + where + S: Serializer, + { + if let Some(number) = self.value + && number.trunc() == number + && number >= -(1i64 << 53) as f64 + && number <= (1i64 << 53) as f64 + { + (number as u64).serialize(serializer) + } else { + self.value.serialize(serializer) + } + } +} + +#[derive(Serialize)] +pub struct BareNumberValue<'a>( + #[serde(serialize_with = "NumberValue::serialize_bare")] pub &'a NumberValue, +); + +/// A string value and how to display it. +#[derive(Clone, Debug, Serialize, PartialEq)] +pub struct StringValue { + /// The string value. + /// + /// If `hex` is true, this should contain hex digits, not raw binary data + /// (otherwise it would be impossible to encode non-UTF-8 data). + pub s: String, + + /// True if `s` is hex digits. + pub hex: bool, + + /// Whether to show `s` or `value_label` or both. + /// + /// If this is unset, then a higher-level default is used. + pub show: Option, + + /// The name of the variable that `s` came from, if any. + pub var_name: Option, + + /// The value label associated with `s`, if any. + pub value_label: Option, +} + +/// A variable name. +#[derive(Clone, Debug, Serialize, PartialEq)] +pub struct VariableValue { + /// Variable name. + pub var_name: String, + + /// Variable label, if any. + pub variable_label: Option, + + /// Whether to show `var_name` or `variable_label` or both. + /// + /// If this is unset, then a higher-level default is used. + pub show: Option, +} + +/// A text string. +/// +/// Whereas a [StringValue] is usually related to data, a `TextValue` is used +/// for other text within a table, such as a title, a column or row heading, or +/// a footnote. +#[derive(Clone, Debug, PartialEq)] +pub struct TextValue { + /// Whether the text came from the user. + /// + /// PSPP can localize text that it writes itself, but not text provided by + /// the user. + pub user_provided: bool, + + /// Localized. + /// + /// This is the main output string. + pub localized: String, + + /// English version of the string. + /// + /// Only for strings that are not user-provided, and only if it is different + /// from `localized`. + pub c: Option, + + /// Identifier. + /// + /// Only for strings that are not user-provided, and only if it is different + /// from `localized`. + pub id: Option, +} + +impl Serialize for TextValue { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + if self.user_provided && self.c.is_none() && self.id.is_none() { + serializer.serialize_str(&self.localized) + } else { + let mut s = serializer.serialize_struct( + "TextValue", + 2 + self.c.is_some() as usize + self.id.is_some() as usize, + )?; + s.serialize_field("user_provided", &self.user_provided)?; + s.serialize_field("localized", &self.localized)?; + if let Some(c) = &self.c { + s.serialize_field("c", &c)?; + } + if let Some(id) = &self.id { + s.serialize_field("id", &id)?; + } + s.end() + } + } +} + +impl Display for TextValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.localized) + } +} + +impl TextValue { + pub fn localized(&self) -> &str { + self.localized.as_str() + } + pub fn c(&self) -> &str { + self.c.as_ref().unwrap_or(&self.localized).as_str() + } + pub fn id(&self) -> &str { + self.id.as_ref().unwrap_or(&self.localized).as_str() + } +} + +/// A template with substitutions. +#[derive(Clone, Debug, Serialize, PartialEq)] +pub struct TemplateValue { + /// Template string. + /// + /// The documentation for [Value] in the PSPP manual describes the template + /// syntax. + /// + /// [Value]: https://pspp.benpfaff.org/manual/spv/light-detail.html#value + pub localized: String, + + /// Arguments to the template string. + pub args: Vec>, + + /// Optional identifier for the template. + pub id: Option, +} + +impl TemplateValue { + fn display<'a>( + &self, + display: &DisplayValue<'a>, + f: &mut std::fmt::Formatter<'_>, + ) -> std::fmt::Result { + fn extract_inner_template(input: &str) -> (&str, &str) { + let mut prev = None; + for (index, c) in input.char_indices() { + if c == ':' && prev != Some('\\') { + return (&input[..index], &input[index + 1..]); + } + prev = Some(c); + } + (input, "") + } + + let mut iter = self.localized.chars(); + while let Some(c) = iter.next() { + match c { + '\\' => { + let c = match iter.next() { + None => '\\', + Some('n') => '\n', + Some(c) => c, + }; + f.write_char(c)?; + } + '^' => { + let (index, rest) = Self::consume_int(iter.as_str()); + if let Some(index) = index.checked_sub(1) + && let Some(arg) = self.args.get(index) + && let Some(arg) = arg.first() + { + arg.display(display.options).fmt(f)?; + } + iter = rest.chars(); + } + '[' => { + let (a, rest) = extract_inner_template(iter.as_str()); + let (b, rest) = extract_inner_template(rest); + let rest = rest.strip_prefix("]").unwrap_or(rest); + let (index, rest) = Self::consume_int(rest); + iter = rest.chars(); + + if let Some(index) = index.checked_sub(1) + && let Some(args) = self.args.get(index) + { + let mut args = args.as_slice(); + let (mut template, mut escape) = + if !a.is_empty() { (a, '%') } else { (b, '^') }; + while !args.is_empty() + && let n_consumed = + self.inner_template(display, f, template, escape, args)? + && n_consumed > 0 + { + args = &args[n_consumed..]; + template = b; + escape = '^'; + } + } + } + c => f.write_char(c)?, + } + } + Ok(()) + } + + fn inner_template<'a>( + &self, + display: &DisplayValue<'a>, + f: &mut std::fmt::Formatter<'_>, + template: &str, + escape: char, + args: &[Value], + ) -> Result { + let mut iter = template.chars(); + let mut args_consumed = 0; + while let Some(c) = iter.next() { + match c { + '\\' => { + let c = iter.next().unwrap_or('\\') as char; + let c = if c == 'n' { '\n' } else { c }; + write!(f, "{c}")?; + } + c if c == escape => { + let (index, rest) = Self::consume_int(iter.as_str()); + iter = rest.chars(); + if let Some(index) = index.checked_sub(1) + && let Some(arg) = args.get(index) + { + args_consumed = args_consumed.max(index + 1); + write!(f, "{}", arg.display(display.options))?; + } + } + c => write!(f, "{c}")?, + } + } + Ok(args_consumed) + } + + fn consume_int(input: &str) -> (usize, &str) { + let mut n = 0; + for (index, c) in input.char_indices() { + match c.to_digit(10) { + Some(digit) => n = n * 10 + digit as usize, + None => return (n, &input[index..]), + } + } + (n, "") + } +} + +/// Possible content for a [Value]. +#[derive(Clone, Debug, Default, Serialize, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum ValueInner { + /// A numeric data value. + Number( + /// The number. + NumberValue, + ), + /// A string data value. + String( + /// The string. + StringValue, + ), + /// A variable name. + Variable( + /// The variable. + VariableValue, + ), + /// Plain text. + Text( + /// The text. + TextValue, + ), + /// Rich text. + Markup( + /// The rich text. + Markup, + ), + /// A template with substitutions. + Template( + /// The template. + TemplateValue, + ), + /// An empty value. + #[default] + Empty, +} + +impl ValueInner { + pub const fn is_empty(&self) -> bool { + matches!(self, Self::Empty) + } + fn show(&self) -> Option { + match self { + ValueInner::Number(NumberValue { show, .. }) + | ValueInner::String(StringValue { show, .. }) + | ValueInner::Variable(VariableValue { show, .. }) => *show, + _ => None, + } + } + + fn label(&self) -> Option<&str> { + self.value_label().or_else(|| self.variable_label()) + } + + fn value_label(&self) -> Option<&str> { + match self { + ValueInner::Number(NumberValue { value_label, .. }) + | ValueInner::String(StringValue { value_label, .. }) => { + value_label.as_ref().map(String::as_str) + } + _ => None, + } + } + + fn variable_label(&self) -> Option<&str> { + match self { + ValueInner::Variable(VariableValue { variable_label, .. }) => { + variable_label.as_ref().map(String::as_str) + } + _ => None, + } + } + + fn markup(&self) -> Option<&Markup> { + match self { + ValueInner::Markup(markup) => Some(markup), + _ => None, + } + } +} + +#[derive(Clone, Debug, Default, PartialEq)] +pub struct ValueStyle { + pub cell_style: Option, + pub font_style: Option, + pub subscripts: Vec, + pub footnotes: Vec>, +} + +impl ValueStyle { + pub fn is_empty(&self) -> bool { + self.font_style.is_none() + && self.cell_style.is_none() + && self.subscripts.is_empty() + && self.footnotes.is_empty() + } +} + +impl ValueInner { + // Returns an object that will format this value. Settings on `options` + // control whether variable and value labels are included. + pub fn display(&self, options: impl IntoValueOptions) -> DisplayValue<'_> { + fn interpret_show( + global_show: impl Fn() -> Show, + table_show: Option, + value_show: Option, + label: &str, + ) -> (bool, Option<&str>) { + match value_show.or(table_show).unwrap_or_else(global_show) { + Show::Value => (true, None), + Show::Label => (false, Some(label)), + Show::Both => (true, Some(label)), + } + } + + let options = options.into_value_options(); + let (show_value, show_label) = if let Some(value_label) = self.value_label() { + interpret_show( + || Settings::global().show_values, + options.show_values, + self.show(), + value_label, + ) + } else if let Some(variable_label) = self.variable_label() { + interpret_show( + || Settings::global().show_variables, + options.show_variables, + self.show(), + variable_label, + ) + } else { + (true, None) + }; + DisplayValue { + inner: self, + subscripts: &[], + footnotes: &[], + options, + show_value, + show_label, + } + } +} + +/// Options for displaying a [Value]. +#[derive(Copy, Clone, Debug)] +pub struct ValueOptions { + /// Whether to show values or value labels, or both. + /// + /// When this is `None`, a global default is used. + pub show_values: Option, + + /// Whether to show variable names or variable labels, or both. + /// + /// When this is `None`, a global default is used. + pub show_variables: Option, + + /// Numbers whose magnitudes are less than this value are displayed in + /// scientific notation. A value of 0 disables this feature. + pub small: f64, + + /// Where to put the footnote markers. + pub footnote_marker_type: FootnoteMarkerType, +} + +impl Default for ValueOptions { + fn default() -> Self { + Self { + show_values: None, + show_variables: None, + small: 0.0001, + footnote_marker_type: FootnoteMarkerType::default(), + } + } +} + +/// Obtains [ValueOptions] in various ways. +pub trait IntoValueOptions { + /// Returns [ValueOptions] for this type. + fn into_value_options(self) -> ValueOptions; +} + +/// Default [ValueOptions]. +impl IntoValueOptions for () { + fn into_value_options(self) -> ValueOptions { + ValueOptions::default() + } +} + +/// Extracts [ValueOptions] from a pivot table. +impl IntoValueOptions for &PivotTable { + fn into_value_options(self) -> ValueOptions { + self.value_options() + } +} + +/// Copies [ValueOptions] by reference. +impl IntoValueOptions for &ValueOptions { + fn into_value_options(self) -> ValueOptions { + *self + } +} + +/// Copies [ValueOptions] by value. +impl IntoValueOptions for ValueOptions { + fn into_value_options(self) -> ValueOptions { + self + } +} diff --git a/rust/pspp/src/output/table.rs b/rust/pspp/src/output/table.rs index 4cc7bf06ca..0dd010931a 100644 --- a/rust/pspp/src/output/table.rs +++ b/rust/pspp/src/output/table.rs @@ -35,10 +35,16 @@ use std::{ use enum_map::{EnumMap, enum_map}; use ndarray::{Array, Array2}; -use crate::{output::pivot::{CellStyle, DisplayValue, FontStyle, Footnote, HorzAlign, ValueInner}, spv::read::html}; +use crate::{ + output::pivot::{ + CellStyle, FontStyle, Footnote, HorzAlign, + value::{DisplayValue, ValueInner}, + }, + spv::html, +}; use super::pivot::{ - Area, AreaStyle, Axis2, Border, BorderStyle, HeadingRegion, Value, ValueOptions, + Area, AreaStyle, Axis2, Border, BorderStyle, HeadingRegion, Value, value::ValueOptions, }; /// The `(x,y)` position of a cell in a [Table]. diff --git a/rust/pspp/src/spv.rs b/rust/pspp/src/spv.rs index f6bee721db..0e34b9f383 100644 --- a/rust/pspp/src/spv.rs +++ b/rust/pspp/src/spv.rs @@ -17,15 +17,18 @@ //! Reading and writing SPV files. //! //! This module enables reading and writing SPSS Viewer or `.spv` files, which -//! SPSS 16 and later uses to represent the contents of its output editor. The -//! SPV file format is [documented in the PSPP manual]. +//! SPSS 16 and later uses to represent the contents of its output editor. See also +//! [SPV file format documentation]. //! -//! [documented in the PSPP manual]: https://pspp.benpfaff.org/manual/spv/index.html +//! Use [ReadOptions] to read an SPV file. Use [Writer] to write an SPV file. +//! +//! [SPV file format documentation]: https://pspp.benpfaff.org/manual/spv/index.html // Warn about missing docs, but not for items declared with `#[cfg(test)]`. #![cfg_attr(not(test), warn(missing_docs))] -pub use write::Writer; - -pub mod read; +mod read; mod write; + +pub use read::{Error, ReadOptions, SpvFile, html}; +pub use write::Writer; diff --git a/rust/pspp/src/spv/read/html.rs b/rust/pspp/src/spv/read/html.rs index c4c0e66eff..ec896e64f1 100644 --- a/rust/pspp/src/spv/read/html.rs +++ b/rust/pspp/src/spv/read/html.rs @@ -1,3 +1,25 @@ +// PSPP - a program for statistical analysis. +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program. If not, see . + +//! HTML parsing and formatting. +//! +//! SPV files contain text in a simple subset of HTML. [Markup] represents +//! parsed text in this form suitable for a single [Value], whereas [Document] +//! can contain multiple paragraphs of markup, each paragraph represented by a +//! [Block]. #![warn(dead_code)] use std::{ borrow::{Borrow, Cow}, @@ -27,23 +49,52 @@ fn lowercase<'a>(s: &'a str) -> Cow<'a, str> { } } +/// Inline styled text. #[derive(Clone, Debug, PartialEq)] pub enum Markup { - Seq(Vec), - Text(String), - Variable(Variable), - Style { style: Style, child: Box }, + /// A sequence. + Seq( + /// The sequence. + Vec, + ), + /// A text string. + Text( + /// The text. + String, + ), + /// A substitution variable. + Variable( + /// The variable. + Variable, + ), + /// Styled text. + Style { + /// The style to apply to the contents of `child`. + style: Style, + /// The styled child markup. + child: Box, + }, } +/// A substitution variable within [Markup]. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize)] pub enum Variable { + /// `&[Date]` Date, + /// `&[Time]` Time, - Head(u8), + /// `&[HeadN]` + Head( + /// `N`. + u8, + ), + /// `&[PageTitle]`. PageTitle, + /// `&[Page]`. Page, } +/// Unknown variable error returned by [Variable::from_str]. #[derive(Copy, Clone, Debug, PartialEq, Eq, thiserror::Error)] #[error("Unknown variable")] pub struct UnknownVariable; @@ -51,6 +102,7 @@ pub struct UnknownVariable; impl FromStr for Variable { type Err = UnknownVariable; + /// Parses `Date` into [Self::Date], and so on. fn from_str(s: &str) -> Result { match s { "Date" => Ok(Self::Date), @@ -161,31 +213,39 @@ impl Display for Markup { } impl Markup { - fn is_empty(&self) -> bool { + /// Returns true if this markup contains no text. + pub fn is_empty(&self) -> bool { match self { - Markup::Seq(seq) => seq.is_empty(), - _ => false, + Markup::Seq(seq) => seq.iter().all(|markup| markup.is_empty()), + Markup::Text(s) => s.is_empty(), + Markup::Variable(_) => false, + Markup::Style { style: _, child } => child.is_empty(), } } - fn is_style(&self) -> bool { + /// Returns true if this is a [Markup::Style]. + pub fn is_style(&self) -> bool { matches!(self, Markup::Style { .. }) } - fn into_style(self) -> Option<(Style, Markup)> { + /// If this is [Markup::Style], returns its contents, and otherwise `None`. + pub fn into_style(self) -> Option<(Style, Markup)> { match self { Markup::Style { style, child } => Some((style, *child)), _ => None, } } - fn is_text(&self) -> bool { + /// Returns true if this is a [Markup::Text]. + pub fn is_text(&self) -> bool { matches!(self, Markup::Text(_)) } - fn as_text(&self) -> Option<&str> { + /// For [Markup::Text], returns the text, and otherwise `None`. + pub fn as_text(&self) -> Option<&str> { match self { Markup::Text(text) => Some(text.as_str()), _ => None, } } - fn into_text(self) -> Option { + /// If this is [Markup::Text], returns its contents, and otherwise `None`. + pub fn into_text(self) -> Option { match self { Markup::Text(text) => Some(text), _ => None, @@ -229,6 +289,11 @@ impl Markup { Ok(()) } + /// Returns this markup converted into XHTML. The returned string contains + /// a single `...` element. + /// + /// Substitution variables in the markup are converted back into their + /// source forms as `&[PageTitle]`, etc. pub fn to_html(&self) -> String { let mut writer = XmlWriter::new(Cursor::new(Vec::new())); writer @@ -238,34 +303,39 @@ impl Markup { String::from_utf8(writer.into_inner().into_inner()).unwrap() } - pub fn to_pango<'a, F>(&self, substitutions: F) -> (String, AttrList) + /// Returns this markup as text and attributes suitable for passing as the + /// argument to [pango::Layout::set_text] and + /// [pango::Layout::set_attributes], respectively. + /// + /// Calls `expand` to obtain expansions for variables in the markup. + pub fn to_pango<'a, F>(&self, expand: F) -> (String, AttrList) where F: Fn(Variable) -> Option>, { let mut s = String::new(); let mut attrs = AttrList::new(); - self.to_pango_inner(&substitutions, &mut s, &mut attrs); + self.to_pango_inner(&expand, &mut s, &mut attrs); (s, attrs) } - fn to_pango_inner<'a, F>(&self, substitutions: &F, s: &mut String, attrs: &mut AttrList) + fn to_pango_inner<'a, F>(&self, expand: &F, s: &mut String, attrs: &mut AttrList) where F: Fn(Variable) -> Option>, { match self { Markup::Seq(seq) => { for child in seq { - child.to_pango_inner(substitutions, s, attrs); + child.to_pango_inner(expand, s, attrs); } } Markup::Text(string) => s.push_str(&string), - Markup::Variable(variable) => match substitutions(*variable) { + Markup::Variable(variable) => match expand(*variable) { Some(value) => s.push_str(&*value), None => write!(s, "&[{variable}]").unwrap(), }, Markup::Style { style, child } => { let start_index = s.len(); - child.to_pango_inner(substitutions, s, attrs); + child.to_pango_inner(expand, s, attrs); let end_index = s.len(); let mut attr = match style { @@ -322,13 +392,17 @@ impl Markup { } } +/// A block of styled text. #[derive(Clone, Debug, PartialEq, Serialize)] -pub struct Paragraph { +pub struct Block { + /// Contents. pub markup: Markup, + + /// Horizontal alignment. pub horz_align: HorzAlign, } -impl Default for Paragraph { +impl Default for Block { fn default() -> Self { Self { markup: Markup::default(), @@ -337,7 +411,7 @@ impl Default for Paragraph { } } -impl Paragraph { +impl Block { fn new(mut markup: Markup, horz_align: HorzAlign, css: &[Style]) -> Self { for style in css { apply_style(&mut markup, style.clone()); @@ -345,7 +419,8 @@ impl Paragraph { Self { markup, horz_align } } - fn into_value(self) -> Value { + /// Returns a [Value] with this `Block`'s contents. + pub fn into_value(self) -> Value { let mut font_style = FontStyle::default().with_size(10); let cell_style = CellStyle::default().with_horz_align(Some(self.horz_align)); let mut markup = self.markup; @@ -378,8 +453,12 @@ impl Paragraph { } } +/// Blocks of styled text. #[derive(Clone, Debug, Default, PartialEq)] -pub struct Document(pub Vec); +pub struct Document( + /// The blocks. + pub Vec, +); impl<'de> Deserialize<'de> for Document { fn deserialize(deserializer: D) -> Result @@ -400,14 +479,17 @@ impl Serialize for Document { } impl Document { + /// Returns true if this document contains no [Block]s. pub fn is_empty(&self) -> bool { self.0.is_empty() } + /// Parses HTML `input` into a `Document`. If `input` is not valid HTML, + /// then it is treated as plain text instead. pub fn from_html(input: &str) -> Self { match Dom::parse(&format!("{input}")) { Ok(dom) => Self(parse_dom(&dom)), - Err(_) if !input.is_empty() => Self(vec![Paragraph { + Err(_) if !input.is_empty() => Self(vec![Block { markup: Markup::Text(input.into()), horz_align: HorzAlign::Left, }]), @@ -415,19 +497,24 @@ impl Document { } } + /// Returns the document converted to a [Value]. If the document contains + /// more than one [Block], only the first one appears in the [Value]. pub fn into_value(self) -> Value { self.0.into_iter().next().unwrap_or_default().into_value() } + /// Returns the document converted to XHTML, except that the result will not + /// be a single `...` element but instead the contents for such + /// an element. pub fn to_html(&self) -> String { let mut writer = XmlWriter::new(Cursor::new(Vec::new())); writer .create_element("html") .write_inner_content(|w| { - for paragraph in &self.0 { + for block in &self.0 { w.create_element("p") - .with_attribute(("align", paragraph.horz_align.as_str().unwrap())) - .write_inner_content(|w| paragraph.markup.write_html(w))?; + .with_attribute(("align", block.horz_align.as_str().unwrap())) + .write_inner_content(|w| block.markup.write_html(w))?; } Ok(()) }) @@ -443,25 +530,55 @@ impl Document { .into() } + /// Returns the document converted to a series of [Value]s. pub fn to_values(&self) -> Vec { self.0 .iter() - .map(|paragraph| paragraph.clone().into_value()) + .map(|block| block.clone().into_value()) .collect() } } +/// A text style. +/// +/// Used in [Markup::Style]. #[derive(Clone, Debug, PartialEq)] pub enum Style { + /// **Bold**. Bold, + + /// *Italic*. Italic, + + /// __Underline__. Underline, + + /// ~~Strikethrough~~. Strike, + + /// Emphasis. Emphasis, + + /// Strong. Strong, - Face(String), - Color(Color), - Size(f64), + + /// Sets the typeface. + Face( + /// The typeface name. + String, + ), + + /// Font color. + Color( + /// The color + Color, + ), + + /// Font size. + Size( + /// In 1/72" units. + f64, + ), } fn node_as_element<'a>(node: &'a Node, name: &str) -> Option<&'a Element> { @@ -501,7 +618,7 @@ fn apply_style(markup: &mut Markup, style: Style) { }; } -pub fn parse_dom(dom: &Dom) -> Vec { +fn parse_dom(dom: &Dom) -> Vec { // Get the top-level elements, descending into an `html` element if // there is one. let roots = if dom.children.len() == 1 @@ -542,7 +659,7 @@ pub fn parse_dom(dom: &Dom) -> Vec { roots }; - let mut paragraphs = Vec::new(); + let mut blocks = Vec::new(); let mut start = 0; while start < body.len() { @@ -558,11 +675,11 @@ pub fn parse_dom(dom: &Dom) -> Vec { } (end, default_horz_align) }; - paragraphs.push(Paragraph::new(parse_nodes(&body[start..end]), align, &css)); + blocks.push(Block::new(parse_nodes(&body[start..end]), align, &css)); start = end; } - paragraphs + blocks } fn parse_nodes(nodes: &[Node]) -> Markup { diff --git a/rust/pspp/src/spv/read/legacy_xml.rs b/rust/pspp/src/spv/read/legacy_xml.rs index 97892a5c07..baaf0d8a6a 100644 --- a/rust/pspp/src/spv/read/legacy_xml.rs +++ b/rust/pspp/src/spv/read/legacy_xml.rs @@ -36,8 +36,9 @@ use crate::{ format::{self, Decimal::Dot, F8_0, F40_2, Type, UncheckedFormat}, output::pivot::{ self, Area, AreaStyle, Axis2, Axis3, Category, CategoryLocator, CellStyle, Color, - Dimension, Group, HeadingRegion, HorzAlign, Leaf, Length, Look, NumberValue, PivotTable, - RowParity, Value, ValueInner, VertAlign, + Dimension, Group, HeadingRegion, HorzAlign, Leaf, Length, Look, PivotTable, RowParity, + Value, VertAlign, + value::{NumberValue, ValueInner}, }, spv::read::legacy_bin::DataValue, }; diff --git a/rust/pspp/src/spv/read/light.rs b/rust/pspp/src/spv/read/light.rs index 672667386e..45b4c37a69 100644 --- a/rust/pspp/src/spv/read/light.rs +++ b/rust/pspp/src/spv/read/light.rs @@ -24,7 +24,8 @@ use crate::{ self, AreaStyle, Axis2, Axis3, BoxBorder, Color, FootnoteMarkerPosition, FootnoteMarkerType, Footnotes, Group, HeadingRegion, HorzAlign, LabelPosition, Look, PivotTable, PivotTableMetadata, PivotTableStyle, PrecomputedIndex, RowColBorder, RowParity, - StringValue, Stroke, TemplateValue, ValueStyle, VariableValue, VertAlign, parse_bool, + Stroke, VertAlign, parse_bool, + value::{StringValue, TemplateValue, ValueStyle, VariableValue}, }, settings::Show, }; @@ -1224,7 +1225,7 @@ impl ValueText { impl ValueString { fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { - pivot::Value::new(pivot::ValueInner::String(StringValue { + pivot::Value::new(pivot::value::ValueInner::String(StringValue { s: self.s.decode(encoding), hex: self.format.type_() == Type::AHex, show: self.show, @@ -1237,7 +1238,7 @@ impl ValueString { impl ValueVarName { fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { - pivot::Value::new(pivot::ValueInner::Variable(VariableValue { + pivot::Value::new(pivot::value::ValueInner::Variable(VariableValue { show: self.show, var_name: self.var_name.decode(encoding), variable_label: self.var_label.decode_optional(encoding), @@ -1259,7 +1260,7 @@ impl ValueFixedText { impl ValueTemplate { fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { - pivot::Value::new(pivot::ValueInner::Template(TemplateValue { + pivot::Value::new(pivot::value::ValueInner::Template(TemplateValue { args: self .args .iter() @@ -1428,7 +1429,7 @@ impl ValueMods { mods: &Option, encoding: &'static Encoding, footnotes: &pivot::Footnotes, - ) -> Option> { + ) -> Option> { mods.as_ref() .map(|mods| Box::new(mods.decode(encoding, footnotes))) } diff --git a/rust/pspp/src/spv/write.rs b/rust/pspp/src/spv/write.rs index f83b4b38ac..e3d34602fd 100644 --- a/rust/pspp/src/spv/write.rs +++ b/rust/pspp/src/spv/write.rs @@ -22,13 +22,12 @@ use std::{ use binrw::{BinWrite, Endian}; use chrono::Utc; -use displaydoc::Display; use enum_map::EnumMap; use quick_xml::{ ElementWriter, Writer as XmlWriter, events::{BytesText, attributes::Attribute}, }; -use zip::{ZipWriter, result::ZipError, write::SimpleFileOptions}; +use zip::{ZipWriter, write::SimpleFileOptions}; use crate::{ format::{Format, Type}, @@ -39,27 +38,15 @@ use crate::{ Area, AreaStyle, Axis2, Axis3, Border, BorderStyle, BoxBorder, Category, CellStyle, Color, Dimension, FontStyle, Footnote, FootnoteMarkerPosition, FootnoteMarkerType, Footnotes, Group, HeadingRegion, HorzAlign, LabelPosition, Leaf, PivotTable, - RowColBorder, RowParity, Stroke, Value, ValueInner, ValueStyle, VertAlign, + RowColBorder, RowParity, Stroke, Value, VertAlign, + value::{ValueInner, ValueStyle}, }, }, settings::Show, - spv::read::html::Document, + spv::{Error, html::Document}, util::ToSmallString, }; -/// An error writing an SPV file. -#[derive(Debug, Display, thiserror::Error)] -pub enum Error { - /// {0} - ZipError(#[from] ZipError), - - /// {0} - IoError(#[from] std::io::Error), - - /// {0} - BinrwError(#[from] binrw::Error), -} - /// SPSS viewer (SPV) file writer. pub struct Writer where @@ -94,8 +81,8 @@ where /// Returns this `Writer` with `page_setup` set up to be written with the /// next call to [write](Writer::write). /// - /// Page setup is only significant if it is written before the first call to - /// [write](Writer::writer). + /// Page setup is only written if it is set before the first call to + /// [write](Writer::write). pub fn with_page_setup(mut self, page_setup: PageSetup) -> Self { self.set_page_setup(page_setup); self @@ -104,8 +91,8 @@ where /// Sets `page_setup` to be written with the next call to /// [write](Writer::write). /// - /// Page setup is only significant if it is written before the first call to - /// [write](Writer::writer). + /// Page setup is only written if it is set before the first call to + /// [write](Writer::write). pub fn set_page_setup(&mut self, page_setup: PageSetup) { self.page_setup = Some(page_setup); } @@ -257,7 +244,7 @@ where if let Some(command_name) = &item.command_name { element = element.with_attribute(("commandName", command_name.as_str())); }; - closure(element); + closure(element).map_err(std::io::Error::other)?; Ok(()) })?; Ok(()) @@ -594,7 +581,7 @@ where if let Some(page_setup) = self.page_setup.take() { write_page_setup(&page_setup, w)?; } - self.write_item(item, w); + self.write_item(item, w).map_err(std::io::Error::other)?; Ok(()) })?; -- 2.30.2