work
authorBen Pfaff <blp@cs.stanford.edu>
Thu, 9 Oct 2025 15:40:25 +0000 (08:40 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Thu, 9 Oct 2025 15:40:25 +0000 (08:40 -0700)
13 files changed:
rust/doc/src/invoking/output.md
rust/doc/src/spv/structure.md
rust/paper-sizes/src/lib.rs
rust/pspp/Cargo.toml
rust/pspp/src/output/drivers/cairo/driver.rs
rust/pspp/src/output/drivers/cairo/pager.rs
rust/pspp/src/output/drivers/spv.rs
rust/pspp/src/output/page.rs
rust/pspp/src/output/pivot.rs
rust/pspp/src/output/spv.rs
rust/pspp/src/output/spv/css.rs
rust/pspp/src/output/spv/html.rs
rust/pspp/src/output/table.rs

index 223e1654a4d761a0ab40cea342e1726bc0f98ef8..37c4ab271420248d1369d5bf74d88986b62fc38a 100644 (file)
@@ -42,9 +42,57 @@ This driver has the following options:
 
 # PDF Output (`.pdf`)
 
+This driver has the following options:
+
+* `page_setup: <PageSetup>`  
+  Sets the page size, margins, and other parameters.  The following
+  sub-options are available:
+
+  - `initial_page_number: <number>`  
+    The page number to use for the first page of output.  The default
+    is 1.
+
+  - `paper: <size>`  
+    Sets the page size.  `<size>` is a quoted string in the form
+    `<w>x<h><unit>`, e.g. `8.5x11in` or `210x297mm`, or the name of a
+    standard paper size, such as `letter` or `a4`.  The default is
+    system- and user-dependent.
+
+  - `margins: <trbl>`  
+    `margins: [<tb>, <lr>]`  
+    `margins: [<t>, <rl>, <b>]`  
+    `margins: [<t>, <r>, <b>, <l>]`  
+    Sets the margins.  Each variable is a quoted string with a length
+    and a unit, e.g. `10mm`.  The one-value form sets all margins to
+    the same length; the two-value form sets the top and bottom
+    margins separately from left and right; and so on.  The default is
+    `0.5in`.
+
+  - `orientation: portrait`  
+    `orientation: landscape`  
+    Controls the output page orientation.  The default is `portrait`.
+
+  - `object_spacing: <length>`  
+    Sets the vertical spacing between output objects, such as tables
+    or text.  `<length>` is a quoted string with a length and a unit,
+    e.g. `10mm`.  The default is `12pt`, or 1/6 of an inch.
+
+  - `chart_spacing: as_is`  
+    `chart_spacing: full_height`  
+    `chart_spacing: half_height`  
+    `chart_spacing: quarter_height`  
+    Sets the size of charts and graphs in the output.  The default,
+    `as_is`, uses the size specified in the charts themselves.  The
+    other possibilities set chart size in terms of the height of the
+    page.
+
+  - `header: <heading>`  
+    `footer: <heading>`  
+    
+
 # HTML Output (`.htm` and `.html`)
 
-# CVS Output (`.csv`)
+# Comma-Separated Value Output (`.csv`)
 
 # JSON Output (`.json`)
 
index 9ba4635f0f827cdb100981a801da4a7dece36b6c..619fa62fdefbc0228f297f3576e3ec22b9d7ec65 100644 (file)
@@ -638,30 +638,30 @@ pageParagraph => pageParagraph_text
 The `pageSetup` element has the following attributes.
 
 * `initial-page-number`  
-     The page number to put on the first page of printed output.
-     Usually `1`.
+  The page number to put on the first page of printed output.
+  Usually `1`.
 
 * `chart-size`  
-     One of the listed, self-explanatory chart sizes, `quarter-height`,
-     or a localization (!)  of one of these (e.g. `dimensione attuale`,
-     `Wie vorgegeben`).
+  One of the listed, self-explanatory chart sizes, `quarter-height`,
+  or a localization (!)  of one of these (e.g. `dimensione attuale`,
+  `Wie vorgegeben`).
 
 * `margin-left`  
-* `margin-right`  
-* `margin-top`  
-* `margin-bottom`  
-     Margin sizes, e.g. `0.25in`.
+  `margin-right`  
+  `margin-top`  
+  `margin-bottom`  
+  Margin sizes, e.g. `0.25in`.
 
 * `paper-height`  
-* `paper-width`  
-     Paper sizes.
+  `paper-width`  
+  Paper sizes.
 
 * `reference-orientation`  
-     Indicates the orientation of the output page.  Either `0deg`
-     (portrait) or `90deg` (landscape),
+  Indicates the orientation of the output page.  Either `0deg`
+  (portrait) or `90deg` (landscape),
 
 * `space-after`  
-     The amount of space between printed objects, typically `12pt`.
+  The amount of space between printed objects, typically `12pt`.
 
 ## The `text` Element (Inside `pageParagraph`)
 
index 3303ee1033e98b1d43fa943fd5f814b3d2c4d64a..e312374bf312d16e0c4750ec7ca99ce156bc113d 100644 (file)
@@ -42,6 +42,9 @@ use xdg::BaseDirectories;
 #[cfg(target_os = "linux")]
 mod locale;
 
+#[cfg(feature = "serde")]
+mod serde;
+
 include!(concat!(env!("OUT_DIR"), "/paperspecs.rs"));
 
 static PAPERSIZE_FILENAME: &str = "papersize";
@@ -69,6 +72,14 @@ pub enum Unit {
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub struct ParseUnitError;
 
+impl Error for ParseUnitError {}
+
+impl Display for ParseUnitError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "unknown unit")
+    }
+}
+
 impl FromStr for Unit {
     type Err = ParseUnitError;
 
@@ -98,7 +109,7 @@ impl Unit {
     ///
     /// To convert a quantity of unit `a` into unit `b`, multiply by
     /// `a.as_unit(b)`.
-    fn as_unit(&self, other: Unit) -> f64 {
+    pub fn as_unit(&self, other: Unit) -> f64 {
         match (*self, other) {
             (Unit::Point, Unit::Point) => 1.0,
             (Unit::Point, Unit::Inch) => 1.0 / 72.0,
@@ -119,6 +130,80 @@ impl Display for Unit {
     }
 }
 
+/// A physical length with a [Unit].
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub struct Length {
+    /// The length.
+    pub value: f64,
+
+    /// The length's unit.
+    pub unit: Unit,
+}
+
+impl Length {
+    /// Constructs a new `Length` from `value` and `unit`.
+    pub fn new(value: f64, unit: Unit) -> Self {
+        Self { value, unit }
+    }
+
+    /// Returns this length converted to `unit`.
+    pub fn as_unit(&self, unit: Unit) -> Self {
+        Self {
+            value: self.value * unit.as_unit(Unit::Inch),
+            unit,
+        }
+    }
+
+    /// Returns the value of this length in `unit`.
+    pub fn into_unit(&self, unit: Unit) -> f64 {
+        self.as_unit(unit).value
+    }
+}
+
+/// An error parsing a [Length].
+#[derive(Copy, Clone, Debug)]
+pub enum ParseLengthError {
+    /// Missing unit.
+    MissingUnit,
+    /// Invalid unit.
+    InvalidUnit,
+    /// Invalid value
+    InvalidValue,
+}
+
+impl Error for ParseLengthError {}
+
+impl Display for ParseLengthError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ParseLengthError::MissingUnit => write!(f, "Missing unit"),
+            ParseLengthError::InvalidUnit => write!(f, "Invalid unit of measurement"),
+            ParseLengthError::InvalidValue => write!(f, "Invalid length"),
+        }
+    }
+}
+
+impl FromStr for Length {
+    type Err = ParseLengthError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        if let Some(index) = s.find(|c: char| c.is_alphabetic()) {
+            let (value, unit) = s.split_at(index);
+            let value = value.parse().map_err(|_| ParseLengthError::InvalidValue)?;
+            let unit = unit.parse().map_err(|_| ParseLengthError::InvalidUnit)?;
+            Ok(Self { value, unit })
+        } else {
+            Err(ParseLengthError::MissingUnit)
+        }
+    }
+}
+
+impl Display for Length {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}{}", self.value, self.unit)
+    }
+}
+
 /// The size of a piece of paper.
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub struct PaperSize {
@@ -170,6 +255,16 @@ impl PaperSize {
         let (bw, bh) = other.as_unit(unit).into_width_height();
         aw.round() == bw.round() && ah.round() == bh.round()
     }
+
+    /// Returns the paper's width as a [Length].
+    pub fn width(&self) -> Length {
+        Length::new(self.width, self.unit)
+    }
+
+    /// Returns the paper's height as a [Length].
+    pub fn height(&self) -> Length {
+        Length::new(self.height, self.unit)
+    }
 }
 
 /// An error parsing a [PaperSize].
@@ -240,29 +335,6 @@ impl Display for PaperSize {
     }
 }
 
-#[cfg(feature = "serde")]
-impl serde::Serialize for PaperSize {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        self.to_string().serialize(serializer)
-    }
-}
-
-#[cfg(feature = "serde")]
-impl<'de> serde::Deserialize<'de> for PaperSize {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        use serde::de::Error;
-        String::deserialize(deserializer)?
-            .parse()
-            .map_err(D::Error::custom)
-    }
-}
-
 /// An error parsing a [PaperSpec].
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum ParsePaperSpecError {
@@ -1028,17 +1100,4 @@ mod tests {
             );
         }
     }
-
-    #[cfg(feature = "serde")]
-    #[test]
-    fn test_serde() {
-        assert_eq!(
-            serde_json::to_string(&PaperSize::new(8.5, 11.0, Unit::Inch)).unwrap(),
-            "\"8.5x11in\""
-        );
-        assert_eq!(
-            serde_json::from_str::<PaperSize>("\"8.5x11in\"").unwrap(),
-            PaperSize::new(8.5, 11.0, Unit::Inch)
-        )
-    }
 }
index 5479134fb5de75ddcb6e8cc6f80338ffc662f1ce..a46db9bf0f42536ebb20c53826726fdadb53d393 100644 (file)
@@ -57,7 +57,7 @@ displaydoc = "0.2.5"
 codepage-437 = "0.1.0"
 serde_path_to_error = "0.1.20"
 html_parser = "0.7.0"
-paper-sizes = { path = "../paper-sizes" }
+paper-sizes = { path = "../paper-sizes", features = ["serde"] }
 
 [target.'cfg(windows)'.dependencies]
 windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] }
index 2bfd4be85e9d29bc917d71b7abfa500e9e401637..b676ce0919d34722eacfc8b5ec909eb4dcd37942 100644 (file)
@@ -23,6 +23,7 @@ use std::{
 use cairo::{Context, PdfSurface};
 use enum_map::{EnumMap, enum_map};
 use pango::SCALE;
+use paper_sizes::Unit;
 use serde::{Deserialize, Serialize};
 
 use crate::output::{
@@ -83,24 +84,16 @@ impl CairoDriver {
         let page_style = CairoPageStyle {
             margins: EnumMap::from_fn(|axis| {
                 [
-                    scale(page_setup.margins[axis][0]),
-                    scale(page_setup.margins[axis][1]),
+                    scale(page_setup.margins.0[axis][0].into_unit(Unit::Inch)),
+                    scale(page_setup.margins.0[axis][1].into_unit(Unit::Inch)),
                 ]
             }),
-            headings: page_setup.headings.clone(),
+            header: page_setup.header.clone(),
+            footer: page_setup.footer.clone(),
             initial_page_number: page_setup.initial_page_number,
         };
         let size = Coord2::new(scale(printable[Axis2::X]), scale(printable[Axis2::Y]));
-        let font = FontStyle {
-            bold: false,
-            italic: false,
-            underline: false,
-            markup: false,
-            font: "Sans Serif".into(),
-            fg: [Color::BLACK, Color::BLACK],
-            bg: [Color::WHITE, Color::WHITE],
-            size: 10,
-        };
+        let font = FontStyle::default().with_size(9);
         let font = parse_font_style(&font);
         let fsm_style = CairoFsmStyle {
             size,
@@ -111,14 +104,11 @@ impl CairoDriver {
             font,
             fg: Color::BLACK,
             use_system_colors: false,
-            object_spacing: scale(page_setup.object_spacing),
+            object_spacing: scale(page_setup.object_spacing.into_unit(Unit::Inch)),
             font_resolution: 72.0,
         };
-        let surface = PdfSurface::new(
-            page_setup.paper[Axis2::X] * 72.0,
-            page_setup.paper[Axis2::Y] * 72.0,
-            &config.file,
-        )?;
+        let (width, height) = page_setup.paper.as_unit(Unit::Point).into_width_height();
+        let surface = PdfSurface::new(width, height, &config.file)?;
         Ok(Self {
             fsm_style: Arc::new(fsm_style),
             page_style: Arc::new(page_style),
index 3bc05f0503deb24c24dd6aed5d5bef22f96b996f..ab131375cf618570e7acc8b51e78623dfb01cca0 100644 (file)
@@ -33,7 +33,8 @@ use crate::output::{
 #[derive(Clone, Debug)]
 pub struct CairoPageStyle {
     pub margins: EnumMap<Axis2, [usize; 2]>,
-    pub headings: [Heading; 2],
+    pub header: Heading,
+    pub footer: Heading,
     pub initial_page_number: i32,
 }
 
@@ -90,7 +91,7 @@ impl CairoPager {
             render_heading(
                 &context,
                 &self.fsm_style.font,
-                &self.page_style.headings[0],
+                &self.page_style.header,
                 page_number,
                 self.fsm_style.size[Axis2::X],
                 0, /* XXX*/
@@ -101,7 +102,7 @@ impl CairoPager {
             render_heading(
                 &context,
                 &self.fsm_style.font,
-                &self.page_style.headings[1],
+                &self.page_style.footer,
                 page_number,
                 self.fsm_style.size[Axis2::X],
                 self.fsm_style.size[Axis2::Y] + self.fsm_style.object_spacing,
@@ -186,7 +187,7 @@ fn measure_headings(page_style: &CairoPageStyle, fsm_style: &CairoFsmStyle) -> [
     let context = Context::new(&surface).unwrap();
 
     let mut heading_heights = Vec::with_capacity(2);
-    for heading in &page_style.headings {
+    for heading in [&page_style.header, &page_style.footer] {
         let mut height = render_heading(
             &context,
             &fsm_style.font,
@@ -223,7 +224,7 @@ fn render_heading(
         // XXX substitute heading variables
         layout.set_markup(&paragraph.markup);
 
-        layout.set_alignment(horz_align_to_pango(paragraph.horz_align));
+        layout.set_alignment(horz_align_to_pango(paragraph.align));
         layout.set_width(width as i32);
 
         context.save().unwrap();
index 38ea07be9f9d56acd74f0346feb005d2d7d6eaab..ce6ee465fac28d9bddddfbf32fecb55683aaa961 100644 (file)
@@ -27,6 +27,7 @@ use std::{
 use binrw::{BinWrite, Endian};
 use chrono::Utc;
 use enum_map::EnumMap;
+use paper_sizes::Length;
 use quick_xml::{
     ElementWriter,
     events::{BytesText, attributes::Attribute},
@@ -608,8 +609,8 @@ fn write_page_setup<X>(page_setup: &PageSetup, writer: &mut XmlWriter<X>) -> std
 where
     X: Write,
 {
-    fn inches<'a>(x: f64) -> Cow<'a, str> {
-        Cow::from(format!("{x:.2}in"))
+    fn length(length: Length) -> Cow<'static, str> {
+        Cow::from(length.to_string())
     }
 
     writer
@@ -627,12 +628,12 @@ where
                 ChartSize::QuarterHeight => "quarter-height",
             },
         ))
-        .with_attribute(("margin-left", inches(page_setup.margins[Axis2::X][0])))
-        .with_attribute(("margin-right", inches(page_setup.margins[Axis2::X][1])))
-        .with_attribute(("margin-top", inches(page_setup.margins[Axis2::Y][0])))
-        .with_attribute(("margin-bottom", inches(page_setup.margins[Axis2::Y][1])))
-        .with_attribute(("paper-height", inches(page_setup.paper[Axis2::Y])))
-        .with_attribute(("paper-width", inches(page_setup.paper[Axis2::X])))
+        .with_attribute(("margin-left", length(page_setup.margins.0[Axis2::X][0])))
+        .with_attribute(("margin-right", length(page_setup.margins.0[Axis2::X][1])))
+        .with_attribute(("margin-top", length(page_setup.margins.0[Axis2::Y][0])))
+        .with_attribute(("margin-bottom", length(page_setup.margins.0[Axis2::Y][1])))
+        .with_attribute(("paper-height", length(page_setup.paper.height())))
+        .with_attribute(("paper-width", length(page_setup.paper.width())))
         .with_attribute((
             "reference-orientation",
             match page_setup.orientation {
@@ -640,13 +641,10 @@ where
                 crate::output::page::Orientation::Landscape => "landscape",
             },
         ))
-        .with_attribute((
-            "space-after",
-            Cow::from(format!("{:.1}pt", page_setup.object_spacing * 72.0)),
-        ))
+        .with_attribute(("space-after", length(page_setup.object_spacing)))
         .write_inner_content(|w| {
-            write_page_heading(&page_setup.headings[0], "vps:pageHeader", w)?;
-            write_page_heading(&page_setup.headings[1], "vps:pageFooter", w)?;
+            write_page_heading(&page_setup.header, "vps:pageHeader", w)?;
+            write_page_heading(&page_setup.footer, "vps:pageFooter", w)?;
             Ok(())
         })?;
     Ok(())
index 19ddcf801f0eea7cfe62c64f51d3c65c00710fc8..9aeeb29d93af435abb311a458bfa05d79f1c3f5b 100644 (file)
 // You should have received a copy of the GNU General Public License along with
 // this program.  If not, see <http://www.gnu.org/licenses/>.
 
-use std::{path::Path, str::FromStr, sync::LazyLock};
+use std::{str::FromStr, sync::LazyLock};
 
 use enum_map::{EnumMap, enum_map};
-use paper_sizes::{Catalog, PaperSize};
+use paper_sizes::{Catalog, Length, PaperSize, Unit};
 use serde::{Deserialize, Deserializer, Serialize, de::Error};
 
+use crate::output::{pivot::FontStyle, spv::html::parse_paragraphs};
+
 use super::pivot::{Axis2, HorzAlign};
 
 #[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)]
@@ -48,91 +50,167 @@ pub enum ChartSize {
     QuarterHeight,
 }
 
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+#[derive(Clone, Debug, PartialEq)]
 pub struct Paragraph {
     pub markup: String,
-    pub horz_align: HorzAlign,
+    pub align: HorzAlign,
+    pub font_style: FontStyle,
 }
 
 impl Default for Paragraph {
     fn default() -> Self {
         Self {
             markup: Default::default(),
-            horz_align: HorzAlign::Left,
+            align: HorzAlign::Left,
+            font_style: FontStyle::default().with_size(10),
         }
     }
 }
 
-#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+#[derive(Clone, Debug, Default, PartialEq)]
 pub struct Heading(pub Vec<Paragraph>);
 
-#[derive(Clone, Debug, Deserialize, Serialize)]
+impl<'de> Deserialize<'de> for Heading {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        Ok(Self(parse_paragraphs(&String::deserialize(deserializer)?)))
+    }
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
 #[serde(default)]
 pub struct PageSetup {
     /// Page number of first page.
     pub initial_page_number: i32,
 
     /// Paper size in inches.
-    #[serde(with = "paper_size_serde")]
-    pub paper: EnumMap<Axis2, f64>,
+    #[serde(deserialize_with = "deserialize_paper_size")]
+    pub paper: PaperSize,
 
-    /// Margin width in inches.
-    pub margins: EnumMap<Axis2, [f64; 2]>,
+    /// Margin width.
+    pub margins: Margins,
 
     /// Portrait or landscape.
     pub orientation: Orientation,
 
-    /// Space between objects, in inches.
-    pub object_spacing: f64,
+    /// Space between objects.
+    pub object_spacing: Length,
 
     /// Size of charts.
     pub chart_size: ChartSize,
 
-    /// Header and footer.
-    pub headings: [Heading; 2],
+    /// Header.
+    #[serde(skip_serializing)]
+    pub header: Heading,
+
+    /// Footer.
+    #[serde(skip_serializing)]
+    pub footer: Heading,
 }
 
 static CATALOG: LazyLock<Catalog> = LazyLock::new(|| Catalog::new());
 
-mod paper_size_serde {
-    use std::str::FromStr;
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub struct Margins(pub EnumMap<Axis2, [Length; 2]>);
 
-    use enum_map::EnumMap;
-    use paper_sizes::{PaperSize, Unit::Inch};
-    use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error};
+impl Margins {
+    fn new(top: Length, right: Length, bottom: Length, left: Length) -> Self {
+        Self(enum_map! {
+            Axis2::X => [left, right],
+            Axis2::Y => [top, bottom],
+        })
+    }
 
-    use crate::{
-        output::{
-            page::{CATALOG, paper_size_to_enum_map},
-            pivot::Axis2,
-        },
-        util::ToSmallString,
-    };
+    fn new_uniform(width: Length) -> Self {
+        Self(EnumMap::from_fn(|_| [width, width]))
+    }
+
+    fn new_width_height(width: Length, height: Length) -> Self {
+        Self(enum_map! {
+            Axis2::X => [width, width],
+            Axis2::Y => [height, height],
+        })
+    }
+
+    fn total(&self, axis: Axis2, unit: Unit) -> f64 {
+        self.0[axis][0].into_unit(unit) + self.0[axis][1].into_unit(unit)
+    }
+}
 
-    pub fn deserialize<'de, D>(deserializer: D) -> Result<EnumMap<Axis2, f64>, D::Error>
+impl Default for Margins {
+    fn default() -> Self {
+        Self::new_uniform(Length::new(0.5, Unit::Inch))
+    }
+}
+
+impl Serialize for Margins {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
-        D: Deserializer<'de>,
+        S: serde::Serializer,
     {
-        let size = String::deserialize(deserializer)?;
-        let paper_size = PaperSize::from_str(&size).or_else(|_| {
-            CATALOG
-                .get_by_name(&size)
-                .map(|spec| spec.size)
-                .ok_or_else(|| D::Error::custom("unknown or invalid paper size {size}"))
-        })?;
-        Ok(paper_size_to_enum_map(paper_size))
+        {
+            let l = self.0[Axis2::X][0];
+            let r = self.0[Axis2::X][1];
+            let t = self.0[Axis2::Y][0];
+            let b = self.0[Axis2::Y][1];
+            if l == r {
+                if t == b {
+                    if l == t {
+                        l.serialize(serializer)
+                    } else {
+                        [t, l].serialize(serializer)
+                    }
+                } else {
+                    [t, l, b].serialize(serializer)
+                }
+            } else {
+                [t, r, b, l].serialize(serializer)
+            }
+        }
     }
+}
 
-    pub fn serialize<S>(paper_size: &EnumMap<Axis2, f64>, serializer: S) -> Result<S::Ok, S::Error>
+impl<'de> Deserialize<'de> for Margins {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
     where
-        S: Serializer,
+        D: Deserializer<'de>,
     {
-        PaperSize::new(paper_size[Axis2::X], paper_size[Axis2::Y], Inch)
-            .to_string()
-            .serialize(serializer)
+        #[derive(Deserialize)]
+        #[serde(untagged)]
+        enum Margins {
+            Array(Vec<Length>),
+            Value(Length),
+        }
+        let (t, r, b, l) = match Margins::deserialize(deserializer)? {
+            Margins::Array(items) if items.len() == 1 => (items[0], items[0], items[0], items[0]),
+            Margins::Array(items) if items.len() == 2 => (items[0], items[1], items[0], items[1]),
+            Margins::Array(items) if items.len() == 3 => (items[0], items[1], items[2], items[1]),
+            Margins::Array(items) if items.len() == 4 => (items[0], items[1], items[2], items[3]),
+            Margins::Value(value) => (value, value, value, value),
+            _ => return Err(D::Error::custom("invalid margins")),
+        };
+        Ok(Self(enum_map! {
+            Axis2::X => [l, r],
+            Axis2::Y => [t, b],
+        }))
     }
 }
 
+pub fn deserialize_paper_size<'de, D>(deserializer: D) -> Result<PaperSize, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let s = String::deserialize(deserializer)?;
+    PaperSize::from_str(&s).or_else(|_| {
+        CATALOG
+            .get_by_name(&s)
+            .map(|spec| spec.size)
+            .ok_or_else(|| D::Error::custom("unknown or invalid paper size {size}"))
+    })
+}
+
 fn paper_size_to_enum_map(paper_size: PaperSize) -> EnumMap<Axis2, f64> {
     let (w, h) = paper_size
         .as_unit(paper_sizes::Unit::Inch)
@@ -147,18 +225,69 @@ impl Default for PageSetup {
     fn default() -> Self {
         Self {
             initial_page_number: 1,
-            paper: paper_size_to_enum_map(CATALOG.default_paper().size),
-            margins: enum_map! { Axis2::X => [0.5, 0.5], Axis2::Y => [0.5, 0.5] },
+            paper: CATALOG.default_paper().size,
+            margins: Margins::default(),
             orientation: Default::default(),
-            object_spacing: 12.0 / 72.0,
+            object_spacing: Length::new(12.0, Unit::Point),
             chart_size: Default::default(),
-            headings: Default::default(),
+            header: Default::default(),
+            footer: Default::default(),
         }
     }
 }
 
 impl PageSetup {
     pub fn printable_size(&self) -> EnumMap<Axis2, f64> {
-        EnumMap::from_fn(|axis| self.paper[axis] - self.margins[axis][0] - self.margins[axis][1])
+        let paper = paper_size_to_enum_map(self.paper);
+        EnumMap::from_fn(|axis| paper[axis] - self.margins.total(axis, Unit::Inch))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use paper_sizes::{Length, Unit};
+
+    use crate::output::page::{Margins, PageSetup};
+
+    #[test]
+    fn margins() {
+        let a = Length::new(1.0, Unit::Inch);
+        let b = Length::new(2.0, Unit::Point);
+        let c = Length::new(3.0, Unit::Millimeter);
+        let d = Length::new(4.5, Unit::Millimeter);
+        assert_eq!(
+            serde_json::to_string(&Margins::new_uniform(a)).unwrap(),
+            "\"1in\""
+        );
+        assert_eq!(
+            serde_json::from_str::<Margins>("\"1in\"").unwrap(),
+            Margins::new_uniform(a)
+        );
+        assert_eq!(
+            serde_json::from_str::<Margins>("[\"1in\"]").unwrap(),
+            Margins::new_uniform(a)
+        );
+        assert_eq!(
+            serde_json::to_string(&Margins::new_width_height(a, b)).unwrap(),
+            "[\"2pt\",\"1in\"]"
+        );
+        assert_eq!(
+            serde_json::to_string(&Margins::new(a, b, c, b)).unwrap(),
+            "[\"1in\",\"2pt\",\"3mm\"]"
+        );
+        assert_eq!(
+            serde_json::to_string(&Margins::new(a, b, c, d)).unwrap(),
+            "[\"1in\",\"2pt\",\"3mm\",\"4.5mm\"]"
+        );
+    }
+
+    #[test]
+    fn page_setup() {
+        let s = toml::to_string(&PageSetup::default()).unwrap();
+        println!("{s}");
+        assert_eq!(
+            toml::from_str::<PageSetup>(&s).unwrap(),
+            PageSetup::default()
+        );
     }
 }
index 6df2acf86d4da3662bd1b1f2f6d4c06138366c7c..967ea845d105e3e5e8c514d98de232d54a7eb47b 100644 (file)
@@ -155,10 +155,7 @@ impl Area {
     }
 
     fn default_font_style(self) -> FontStyle {
-        FontStyle {
-            bold: self == Area::Title,
-            ..FontStyle::default()
-        }
+        FontStyle::default().with_bold(self == Area::Title)
     }
 
     fn default_area_style(self) -> AreaStyle {
@@ -981,7 +978,7 @@ pub struct AreaStyle {
     pub font_style: FontStyle,
 }
 
-#[derive(Clone, Debug, Serialize)]
+#[derive(Clone, Debug, Serialize, PartialEq)]
 pub struct CellStyle {
     /// `None` means "mixed" alignment: align strings to the left, numbers to
     /// the right.
@@ -1078,6 +1075,48 @@ impl Default for FontStyle {
     }
 }
 
+impl FontStyle {
+    pub fn with_size(self, size: i32) -> Self {
+        Self { size, ..self }
+    }
+    pub fn with_bold(self, bold: bool) -> Self {
+        Self { bold, ..self }
+    }
+    pub fn with_italic(self, italic: bool) -> Self {
+        Self { italic, ..self }
+    }
+    pub fn with_underline(self, underline: bool) -> Self {
+        Self { underline, ..self }
+    }
+    pub fn with_markup(self, markup: bool) -> Self {
+        Self { markup, ..self }
+    }
+    pub fn with_font(self, font: impl Into<String>) -> Self {
+        Self {
+            font: font.into(),
+            ..self
+        }
+    }
+    pub fn with_fg(self, fg: Color) -> Self {
+        Self {
+            fg: [fg, fg],
+            ..self
+        }
+    }
+    pub fn with_alternate_fg(self, fg: [Color; 2]) -> Self {
+        Self { fg, ..self }
+    }
+    pub fn with_bg(self, fg: Color) -> Self {
+        Self {
+            fg: [fg, fg],
+            ..self
+        }
+    }
+    pub fn with_alternate_bg(self, bg: [Color; 2]) -> Self {
+        Self { bg, ..self }
+    }
+}
+
 #[derive(Copy, Clone, PartialEq, Eq)]
 pub struct Color {
     pub alpha: u8,
@@ -1988,7 +2027,7 @@ where
     }
 }
 
-#[derive(Clone, Debug, Serialize)]
+#[derive(Clone, Debug, Serialize, PartialEq)]
 pub struct Footnote {
     #[serde(skip)]
     index: usize,
@@ -2134,7 +2173,7 @@ impl Display for Display26Adic {
 ///
 /// 5. A template. PSPP doesn't create these itself yet, but it can read and
 ///    interpret those created by SPSS.
-#[derive(Clone, Default)]
+#[derive(Clone, Default, PartialEq)]
 pub struct Value {
     pub inner: ValueInner,
     pub styling: Option<Box<ValueStyle>>,
@@ -2331,6 +2370,28 @@ impl Value {
     pub fn with_styling(self, styling: Option<Box<ValueStyle>>) -> Self {
         Self { styling, ..self }
     }
+    pub fn font_style(&self) -> Option<&FontStyle> {
+        self.styling
+            .as_ref()
+            .map(|styling| styling.font_style.as_ref())
+            .flatten()
+    }
+    pub fn cell_style(&self) -> Option<&CellStyle> {
+        self.styling
+            .as_ref()
+            .map(|styling| styling.cell_style.as_ref())
+            .flatten()
+    }
+    pub fn subscripts(&self) -> &[String] {
+        self.styling
+            .as_ref()
+            .map_or(&[], |styling| &styling.subscripts)
+    }
+    pub fn footnotes(&self) -> &[Arc<Footnote>] {
+        self.styling
+            .as_ref()
+            .map_or(&[], |styling| &styling.footnotes)
+    }
     pub const fn empty() -> Self {
         Value {
             inner: ValueInner::Empty,
@@ -2663,11 +2724,15 @@ impl Value {
 
 impl Debug for Value {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?}", self.display(()).to_string())
+        write!(f, "{:?}", self.display(()).to_string())?;
+        if let Some(styling) = &self.styling {
+            write!(f, " ({styling:?})")?;
+        }
+        Ok(())
     }
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 pub struct NumberValue {
     /// The numerical value, or `None` if it is a missing value.
     pub value: Option<f64>,
@@ -2728,7 +2793,7 @@ pub struct BareNumberValue<'a>(
     #[serde(serialize_with = "NumberValue::serialize_bare")] pub &'a NumberValue,
 );
 
-#[derive(Clone, Debug, Serialize)]
+#[derive(Clone, Debug, Serialize, PartialEq)]
 pub struct StringValue {
     /// The string value.
     ///
@@ -2745,14 +2810,14 @@ pub struct StringValue {
     pub value_label: Option<String>,
 }
 
-#[derive(Clone, Debug, Serialize)]
+#[derive(Clone, Debug, Serialize, PartialEq)]
 pub struct VariableValue {
     pub show: Option<Show>,
     pub var_name: String,
     pub variable_label: Option<String>,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 pub struct TextValue {
     pub user_provided: bool,
     /// Localized.
@@ -2800,14 +2865,14 @@ impl TextValue {
     }
 }
 
-#[derive(Clone, Debug, Serialize)]
+#[derive(Clone, Debug, Serialize, PartialEq)]
 pub struct TemplateValue {
     pub args: Vec<Vec<Value>>,
     pub localized: String,
     pub id: Option<String>,
 }
 
-#[derive(Clone, Debug, Default, Serialize)]
+#[derive(Clone, Debug, Default, Serialize, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum ValueInner {
     Number(NumberValue),
@@ -2857,7 +2922,7 @@ impl ValueInner {
     }
 }
 
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, PartialEq)]
 pub struct ValueStyle {
     pub cell_style: Option<CellStyle>,
     pub font_style: Option<FontStyle>,
index dc5dd541de4a0ef25adc9ec564541130ac176a4d..0036bdb2aabaa685d2f02fa30080c4b11d7569f9 100644 (file)
@@ -34,7 +34,7 @@ use crate::output::{
 };
 
 mod css;
-mod html;
+pub mod html;
 mod light;
 
 #[derive(Debug, Display, thiserror::Error)]
index b33d83ce12d7a1cb458fe2f37e40ce52a40974be..861182fb374f7bf6aa1ca83ff4377eabf1586d77 100644 (file)
@@ -1,8 +1,16 @@
+use std::{
+    borrow::Cow,
+    fmt::{Display, Write},
+    ops::Not,
+};
+
+use itertools::Itertools;
+
 use crate::output::pivot::FontStyle;
 
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 enum Token<'a> {
-    Id(&'a str),
+    Id(Cow<'a, str>),
     LeftCurly,
     RightCurly,
     Colon,
@@ -34,8 +42,41 @@ impl<'a> Iterator for Lexer<'a> {
             '}' => (Token::RightCurly, rest),
             ':' => (Token::Colon, rest),
             ';' => (Token::Semicolon, rest),
+            '\'' | '"' => {
+                let quote = c;
+                let mut s = String::new();
+                while let Some(c) = iter.next() {
+                    if c == quote {
+                        break;
+                    } else if c != '\\' {
+                        s.push(c);
+                    } else {
+                        let start = iter.as_str();
+                        match iter.next() {
+                            None => break,
+                            Some(a) if a.is_ascii_alphanumeric() => {
+                                let n = start
+                                    .chars()
+                                    .take_while(|c| c.is_ascii_alphanumeric())
+                                    .take(6)
+                                    .count();
+                                iter = start[n..].chars();
+                                if let Ok(code_point) = u32::from_str_radix(&start[..n], 16)
+                                    && let Ok(c) = char::try_from(code_point)
+                                {
+                                    s.push(c);
+                                }
+                            }
+                            Some('\n') => (),
+                            Some(other) => s.push(other),
+                        }
+                    }
+                }
+                (Token::Id(Cow::from(s)), iter.as_str())
+            }
             _ => {
-                while let Some(c) = iter.next()
+                while !iter.as_str().starts_with("-->")
+                    && let Some(c) = iter.next()
                     && !c.is_whitespace()
                     && c != '{'
                     && c != '}'
@@ -46,7 +87,7 @@ impl<'a> Iterator for Lexer<'a> {
                 }
                 let id_len = s.len() - rest.len();
                 let (id, rest) = s.split_at(id_len);
-                (Token::Id(id), rest)
+                (Token::Id(Cow::from(id)), rest)
             }
         };
         self.0 = rest;
@@ -62,15 +103,15 @@ impl FontStyle {
                 && let Some(Token::Colon) = lexer.next()
                 && let Some(Token::Id(value)) = lexer.next()
             {
-                match key {
+                match key.as_ref() {
                     "color" => {
                         if let Ok(color) = value.parse() {
                             self.fg = [color; 2];
                         }
                     }
                     "font-weight" => self.bold = value == "bold",
-                    "font-self" => self.italic = value == "italic",
-                    "text-decoration" => self.underline = dbg!(value) == "underline",
+                    "font-style" => self.italic = value == "italic",
+                    "text-decoration" => self.underline = value == "underline",
                     "font-family" => self.font = value.into(),
                     "font-size" => {
                         if let Ok(size) = value.parse::<i32>() {
@@ -88,44 +129,195 @@ impl FontStyle {
         style.parse_css(s);
         style
     }
-}
 
-#[cfg(test)]
-#[test]
-fn test_css_style() {
-    use crate::output::pivot::Color;
-
-    assert_eq!(FontStyle::from_css(""), FontStyle::default());
-    assert_eq!(
-        FontStyle::from_css(r#"p{color:ff0000}"#),
-        FontStyle {
-            fg: [Color::RED; 2],
-            ..FontStyle::default()
+    pub fn to_css(&self, base: &FontStyle) -> Option<String> {
+        let mut settings = Vec::new();
+        if self.font != base.font {
+            if is_css_ident(&self.font) {
+                settings.push(format!("font-family: {}", &self.font));
+            } else {
+                settings.push(format!("font-family: {}", CssString(&self.font)));
+            }
+        }
+        if self.bold != base.bold {
+            settings.push(format!(
+                "font-weight: {}",
+                if self.bold { "bold" } else { "normal" }
+            ));
         }
-    );
-    assert_eq!(
-        FontStyle::from_css("p {font-weight: bold; text-decoration: underline}"),
-        FontStyle {
-            bold: true,
-            underline: true,
-            ..FontStyle::default()
+        if self.italic != base.italic {
+            settings.push(format!(
+                "font-style: {}",
+                if self.bold { "italic" } else { "normal" }
+            ));
         }
-    );
-    assert_eq!(
-        FontStyle::from_css("p {font-family: Monospace}"),
-        FontStyle {
-            font: String::from("Monospace"),
-            ..FontStyle::default()
+        if self.underline != base.underline {
+            settings.push(format!(
+                "text-decoration: {}",
+                if self.bold { "underline" } else { "none" }
+            ));
+        }
+        if self.size != base.size {
+            settings.push(format!("font-size: {}", self.size as i64 * 4 / 3));
+        }
+        if self.fg[0] != base.fg[0] {
+            settings.push(format!("color: {}", self.fg[0].display_css()));
+        }
+        settings
+            .is_empty()
+            .not()
+            .then(|| format!("<!-- p {{ {} }} -->", settings.into_iter().join("; ")))
+    }
+}
+
+fn is_css_ident(s: &str) -> bool {
+    fn is_nmstart(c: char) -> bool {
+        c.is_ascii_alphabetic() || c == '_'
+    }
+    s.chars().next().is_some_and(is_nmstart) && s.chars().all(|c| is_nmstart(c) || c as u32 > 159)
+}
+
+struct CssString<'a>(&'a str);
+
+impl<'a> Display for CssString<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let quote = if self.0.contains('"') && !self.0.contains('\'') {
+            '\''
+        } else {
+            '"'
+        };
+        f.write_char(quote)?;
+        for c in self.0.chars() {
+            match c {
+                _ if c == quote || c == '\\' => {
+                    f.write_char('\\')?;
+                    f.write_char(c)?;
+                }
+                '\n' => f.write_str("\\00000a")?,
+                c => f.write_char(c)?,
+            }
         }
-    );
-    assert_eq!(
-        FontStyle::from_css("p {font-size: 24}"),
-        FontStyle {
-            size: 18,
-            ..FontStyle::default()
+        f.write_char(quote)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::borrow::Cow;
+
+    use crate::output::{
+        pivot::{Color, FontStyle},
+        spv::css::{Lexer, Token},
+    };
+
+    #[test]
+    fn css_strings() {
+        #[track_caller]
+        fn test_string(css: &str, value: &str) {
+            let mut lexer = Lexer(css);
+            assert_eq!(lexer.next(), Some(Token::Id(Cow::from(value))));
+            assert_eq!(lexer.next(), None);
         }
-    );
-    dbg!(FontStyle::from_css(
-        "color: red; font-weight: bold; text-decoration: underline; font-family: Serif"
-    ));
+
+        test_string(r#""abc""#, "abc");
+        test_string(r#""a\"'\'bc""#, "a\"''bc");
+        test_string(r#""a\22 bc""#, "a\" bc");
+        test_string(r#""a\000022bc""#, "a\"bc");
+        test_string(r#""a'bc""#, "a'bc");
+        test_string(
+            r#""\\\
+xyzzy""#,
+            "\\xyzzy",
+        );
+
+        test_string(r#"'abc'"#, "abc");
+        test_string(r#"'a"\"\'bc'"#, "a\"\"'bc");
+        test_string(r#"'a\22 bc'"#, "a\" bc");
+        test_string(r#"'a\000022bc'"#, "a\"bc");
+        test_string(r#"'a\'bc'"#, "a'bc");
+        test_string(
+            r#"'a\'bc\
+xyz'"#,
+            "a'bcxyz",
+        );
+        test_string(r#"'\\'"#, "\\");
+    }
+
+    #[test]
+    fn style_from_css() {
+        assert_eq!(FontStyle::from_css(""), FontStyle::default());
+        assert_eq!(
+            FontStyle::from_css(r#"p{color:ff0000}"#),
+            FontStyle {
+                fg: [Color::RED; 2],
+                ..FontStyle::default()
+            }
+        );
+        assert_eq!(
+            FontStyle::from_css("p {font-weight: bold; text-decoration: underline}"),
+            FontStyle {
+                bold: true,
+                underline: true,
+                ..FontStyle::default()
+            }
+        );
+        assert_eq!(
+            FontStyle::from_css("p {font-family: Monospace}"),
+            FontStyle {
+                font: String::from("Monospace"),
+                ..FontStyle::default()
+            }
+        );
+        assert_eq!(
+            FontStyle::from_css("p {font-size: 24}"),
+            FontStyle {
+                size: 18,
+                ..FontStyle::default()
+            }
+        );
+        assert_eq!(
+            FontStyle::from_css(
+                "<!--color: red; font-weight: bold; font-style: italic; text-decoration: underline; font-family: Serif-->"
+            ),
+            FontStyle {
+                fg: [Color::RED, Color::RED],
+                bold: true,
+                italic: true,
+                underline: true,
+                font: String::from("Serif"),
+                ..FontStyle::default()
+            }
+        );
+    }
+
+    #[test]
+    fn style_to_css() {
+        let base = FontStyle::default();
+        assert_eq!(base.to_css(&base), None);
+        assert_eq!(
+            FontStyle::default().with_size(18).to_css(&base),
+            Some("<!-- p { font-size: 24 } -->".into())
+        );
+        assert_eq!(
+            FontStyle::default()
+                .with_bold(true)
+                .with_underline(true)
+                .to_css(&base),
+            Some("<!-- p { font-weight: bold; text-decoration: underline } -->".into())
+        );
+        assert_eq!(
+            FontStyle::default().with_fg(Color::RED).to_css(&base),
+            Some("<!-- p { color: #ff0000 } -->".into())
+        );
+        assert_eq!(
+            FontStyle::default().with_font("Monospace").to_css(&base),
+            Some("<!-- p { font-family: Monospace } -->".into())
+        );
+        assert_eq!(
+            FontStyle::default()
+                .with_font("Times New Roman")
+                .to_css(&base),
+            Some(r#"<!-- p { font-family: "Times New Roman" } -->"#.into())
+        );
+    }
 }
index b75c762ef72f3c80b8cccd4b27b004a5a1f898ed..ad95996dcddffb1a872760b4c4fd84ddd6a82dce 100644 (file)
@@ -1,17 +1,22 @@
 use std::{
     fmt::{Display, Write},
+    mem::take,
     str::FromStr,
 };
 
 use html_parser::{Dom, Element, Node};
 
-use crate::output::pivot::{Color, FontStyle, Value};
+use crate::output::{
+    page::Paragraph,
+    pivot::{Color, FontStyle, HorzAlign, Value},
+};
 
 fn find_element<'a>(elements: &'a [Node], name: &str) -> Option<&'a Element> {
     for element in elements {
         if let Node::Element(element) = element
             && element.name == name
         {
+            dbg!(element);
             return Some(element);
         }
     }
@@ -63,6 +68,7 @@ fn extract_html_text(node: &Node, base_font_size: i32, s: &mut String) {
                 }
             }
         }
+        Node::Element(element) if element.name == "head" => (),
         Node::Element(element) => {
             fn push_attribute(name: &str, value: impl Display, s: &mut String) {
                 write!(s, " {name}=\"").unwrap();
@@ -126,25 +132,224 @@ fn extract_html_text(node: &Node, base_font_size: i32, s: &mut String) {
     }
 }
 
+fn extract_html_text2(node: &Node, base_font_size: i32, output: &mut impl HtmlOutput) {
+    match node {
+        Node::Text(text) => {
+            let s = output.text();
+            for c in text.chars() {
+                fn push_whitespace(c: char, s: &mut String) {
+                    if s.chars().next_back().is_none_or(|c| !c.is_whitespace()) {
+                        s.push(c);
+                    }
+                }
+
+                match c {
+                    '\u{00a0}' => {
+                        // U+00A0 NONBREAKING SPACE is really, really common
+                        // in SPV text and it makes it impossible to break
+                        // syntax across lines.  Translate it into a regular
+                        // space.
+                        push_whitespace(' ', s);
+                    }
+                    '\u{2007}' => {
+                        // U+2007 FIGURE SPACE also crops up weirdly
+                        // sometimes.
+                        push_whitespace(' ', s);
+                    }
+                    _ if c.is_whitespace() => push_whitespace(c, s),
+                    '<' => s.push_str("&lt;"),
+                    '>' => s.push_str("&gt;"),
+                    '&' => s.push_str("&amp;"),
+                    _ => s.push(c),
+                }
+            }
+        }
+        Node::Element(element) if element.name == "head" => (),
+        Node::Element(element) => {
+            fn push_attribute(name: &str, value: impl Display, s: &mut String) {
+                write!(s, " {name}=\"").unwrap();
+                let value = value.to_string();
+                for c in value.chars() {
+                    match c {
+                        '\n' => s.push_str("&#10;"),
+                        '&' => s.push_str("&amp;"),
+                        '<' => s.push_str("&lt;"),
+                        '>' => s.push_str("&gt;"),
+                        '"' => s.push_str("&quot;"),
+                        _ => s.push(c),
+                    }
+                }
+                s.push('"');
+            }
+
+            let s = output.text();
+            let tag = element.name.as_str();
+            let tag = match tag {
+                "br" | "BR" => {
+                    s.push('\n');
+                    None
+                }
+                "b" | "i" | "u" => {
+                    write!(s, "<{tag}>").unwrap();
+                    Some(tag)
+                }
+                "font" => {
+                    s.push_str("<span");
+                    if let Some(Some(face)) = element.attributes.get("face") {
+                        push_attribute("face", face, s);
+                    }
+                    if let Some(Some(color)) = element.attributes.get("color")
+                        && let Ok(color) = Color::from_str(&color)
+                    {
+                        push_attribute("color", color.display_css(), s);
+                    }
+                    if let Some(Some(html_size)) = element.attributes.get("size")
+                        && let Ok(html_size) = usize::from_str(&html_size)
+                        && let Some(index) = html_size.checked_sub(1)
+                        && let Some(scale) = [0.444, 0.556, 0.667, 0.778, 1.0, 1.33, 2.0]
+                            .get(index)
+                            .copied()
+                    {
+                        let size = base_font_size as f64 * scale * 1024.0;
+                        push_attribute("size", format_args!("{size:.0}"), s);
+                    }
+                    s.push('>');
+                    Some("span")
+                }
+                _ => None,
+            };
+            for child in &element.children {
+                extract_html_text2(child, base_font_size, output);
+            }
+            if let Some(tag) = tag {
+                let s = output.text();
+                write!(s, "</{tag}>").unwrap();
+            }
+        }
+        Node::Comment(_) => (),
+    }
+}
+
+trait HtmlOutput {
+    fn start_paragraph(&mut self, _align: HorzAlign) {}
+    fn end_paragraph(&mut self);
+    fn text(&mut self) -> &mut String;
+}
+
+fn parse2(
+    input: &str,
+    output: &mut impl HtmlOutput,
+    font_style: &mut FontStyle,
+) -> Result<(), html_parser::Error> {
+    let dom = Dom::parse(&format!("<!doctype html>{input}"))?;
+    font_style.markup = true;
+    for node in &dom.children {
+        match node.element() {
+            Some(head) if head.name.eq_ignore_ascii_case("head") => {
+                if let Some(style) = find_element(&head.children, "style") {
+                    let mut text = String::new();
+                    get_element_text(style, &mut text);
+                    font_style.parse_css(&text)
+                }
+            }
+            Some(p) if p.name.eq_ignore_ascii_case("p") => {
+                let align = match p.attributes.get("align") {
+                    Some(Some(align)) if align.eq_ignore_ascii_case("left") => HorzAlign::Left,
+                    Some(Some(align)) if align.eq_ignore_ascii_case("right") => HorzAlign::Right,
+                    Some(Some(align)) if align.eq_ignore_ascii_case("center") => HorzAlign::Center,
+                    _ => HorzAlign::Left,
+                };
+                output.start_paragraph(align);
+                extract_html_text2(node, font_style.size, output);
+                output.end_paragraph();
+            }
+            _ => extract_html_text2(node, font_style.size, output),
+        }
+    }
+    Ok(())
+}
+
+impl HtmlOutput for String {
+    fn end_paragraph(&mut self) {
+        self.push('\n');
+    }
+
+    fn text(&mut self) -> &mut String {
+        self
+    }
+}
+
+pub fn parse_value(input: &str) -> Value {
+    let mut font_style = FontStyle::default().with_size(10);
+    let mut html = String::new();
+    if parse2(input, &mut html, &mut font_style).is_ok() {
+        Value::new_user_text(html)
+    } else {
+        Value::new_user_text(input)
+    }
+    .with_font_style(font_style)
+}
+
+pub fn parse_paragraphs(input: &str) -> Vec<Paragraph> {
+    let mut font_style = FontStyle::default().with_size(10);
+
+    #[derive(Default)]
+    struct Paragraphs {
+        current: Paragraph,
+        finished: Vec<Paragraph>,
+    }
+
+    impl HtmlOutput for Paragraphs {
+        fn start_paragraph(&mut self, align: HorzAlign) {
+            if !self.current.markup.is_empty() {
+                self.end_paragraph();
+            }
+            self.current.align = align;
+        }
+
+        fn end_paragraph(&mut self) {
+            self.finished.push(take(&mut self.current));
+        }
+
+        fn text(&mut self) -> &mut String {
+            &mut self.current.markup
+        }
+    }
+
+    let mut output = Paragraphs::default();
+    if parse2(input, &mut output, &mut font_style).is_ok() {
+        if !output.current.markup.is_empty() {
+            output.end_paragraph();
+        }
+        output.finished
+    } else if !input.is_empty() {
+        vec![Paragraph {
+            markup: input.into(),
+            ..Paragraph::default()
+        }]
+    } else {
+        Vec::new()
+    }
+}
+
 pub fn parse(input: &str) -> Value {
-    let mut font_style = FontStyle {
-        size: 10,
-        ..Default::default()
-    };
-    let text = match Dom::parse(input) {
+    let mut font_style = FontStyle::default().with_size(10);
+    let text = match Dom::parse(&format!("<!doctype html>{input}")) {
         Ok(dom) => {
             font_style.markup = true;
-            if let Some(head) = find_element(&dom.children, "head")
-                && let Some(style) = find_element(&head.children, "style")
-            {
-                let mut text = String::new();
-                get_element_text(style, &mut text);
-                font_style.parse_css(&text)
-            }
-
             let mut s = String::new();
             for node in &dom.children {
-                extract_html_text(node, font_style.size, &mut s);
+                if let Node::Element(head) = node
+                    && head.name.eq_ignore_ascii_case("head")
+                {
+                    if let Some(style) = find_element(&head.children, "style") {
+                        let mut text = String::new();
+                        get_element_text(style, &mut text);
+                        font_style.parse_css(&text)
+                    }
+                } else {
+                    extract_html_text(node, font_style.size, &mut s);
+                }
             }
             s
         }
@@ -152,3 +357,64 @@ pub fn parse(input: &str) -> Value {
     };
     Value::new_user_text(text).with_font_style(font_style)
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::output::{
+        pivot::{FontStyle, Value},
+        spv::html::{parse, parse_paragraphs, parse_value},
+    };
+
+    #[test]
+    fn css() {
+        assert_eq!(
+            parse("<head><style><!--p {font-size: 24; font-weight: bold}--></style></head>text"),
+            Value::new_user_text("text").with_font_style(
+                FontStyle::default()
+                    .with_size(18)
+                    .with_bold(true)
+                    .with_markup(true)
+            )
+        );
+    }
+
+    #[test]
+    fn value() {
+        let value = parse_value(
+            r#"<b>bold</b><br><i>italic</i><BR><b><i>bold italic</i></b><br><font color="red" face="Serif">red serif</font><br><font size="7">big</font><br>"#,
+        );
+        assert_eq!(
+            value,
+            Value::new_user_text(
+                r##"<b>bold</b>
+<i>italic</i>
+<b><i>bold italic</i></b>
+<span face="Serif" color="#ff0000">red serif</span>
+<span size="20480">big</span>
+"##
+            )
+            .with_font_style(FontStyle::default().with_size(10).with_markup(true))
+        );
+    }
+
+    #[test]
+    fn paragraphs() {
+        let paragraphs = parse_paragraphs(
+            r#"<p align="left"><b>bold</b><br><i>italic</i><BR><b><i>bold italic</i></b><br><font color="red" face="Serif">red serif</font><br><font size="7">big</font><br></p>not in a paragraph<p align="right">right justified</p><p align="center">centered</p>trailing"#,
+        );
+        dbg!(&paragraphs);
+        /*
+        assert_eq!(
+            paragraph,
+            Value::new_user_text(
+                r##"<b>bold</b>
+<i>italic</i>
+<b><i>bold italic</i></b>
+<span face="Serif" color="#ff0000">red serif</span>
+<span size="20480">big</span>
+"##
+            )
+            .with_font_style(FontStyle::default().with_size(10).with_markup(true))
+        );*/
+    }
+}
index 3185dbb1b8c0155ab0e7a4bf62ca44a998837d2b..dfc148390d7debc56b9aacd1449a952997ab2315 100644 (file)
@@ -392,24 +392,19 @@ pub struct DrawCell<'a> {
 
 impl<'a> DrawCell<'a> {
     pub fn new(inner: &'a CellInner, table: &'a Table) -> Self {
-        let (font_style, cell_style, subscripts, footnotes) =
-            if let Some(styling) = &inner.value.styling {
-                (
-                    styling.font_style.as_ref(),
-                    styling.cell_style.as_ref(),
-                    styling.subscripts.as_slice(),
-                    styling.footnotes.as_slice(),
-                )
-            } else {
-                (None, None, [].as_slice(), [].as_slice())
-            };
         Self {
             rotate: inner.rotate,
             inner: &inner.value.inner,
-            font_style: font_style.unwrap_or(&table.areas[inner.area].font_style),
-            cell_style: cell_style.unwrap_or(&table.areas[inner.area].cell_style),
-            subscripts,
-            footnotes,
+            font_style: inner
+                .value
+                .font_style()
+                .unwrap_or(&table.areas[inner.area].font_style),
+            cell_style: inner
+                .value
+                .cell_style()
+                .unwrap_or(&table.areas[inner.area].cell_style),
+            subscripts: inner.value.subscripts(),
+            footnotes: inner.value.footnotes(),
             value_options: &table.value_options,
         }
     }