more pivot table output
authorBen Pfaff <blp@cs.stanford.edu>
Wed, 1 Jan 2025 00:09:25 +0000 (16:09 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Wed, 1 Jan 2025 00:09:25 +0000 (16:09 -0800)
rust/pspp/src/format.rs
rust/pspp/src/output/pivot/mod.rs
rust/pspp/src/output/pivot/output.rs
rust/pspp/src/output/table.rs

index 4aeba72e4d0c1b0aa34f37d69c942910e0e75bee..a266953a7f48eb243139ef3b5120e59d84f257bb 100644 (file)
@@ -498,7 +498,7 @@ impl Format {
         d: 2,
     };
 
-    pub fn format(self) -> Type {
+    pub fn type_(self) -> Type {
         self.type_
     }
     pub fn w(self) -> usize {
@@ -684,6 +684,15 @@ pub struct UncheckedFormat {
     pub d: Decimals,
 }
 
+impl UncheckedFormat {
+    pub fn new(type_: Type, w: Width, d: Decimals) -> Self {
+        Self { type_, w, d }
+    }
+    pub fn fix(&self) -> Format {
+        Format::fixed_from(self)
+    }
+}
+
 impl TryFrom<raw::Spec> for UncheckedFormat {
     type Error = Error;
 
@@ -957,7 +966,7 @@ impl<'a> Display for DisplayValue<'a> {
         let number = match self.value {
             Value::Number(number) => *number,
             Value::String(string) => {
-                if self.format.format() == Type::AHex {
+                if self.format.type_() == Type::AHex {
                     for byte in string {
                         write!(f, "{byte:02x}")?;
                     }
@@ -972,7 +981,7 @@ impl<'a> Display for DisplayValue<'a> {
             return self.missing(f);
         };
 
-        match self.format.format() {
+        match self.format.type_() {
             Type::F
             | Type::Comma
             | Type::Dot
index dbbef36f2042ad758ed5ee10c1e72dd2cc21cb77..772a96d48032d6ff0c880348cd61cd678be2f652 100644 (file)
 
 use std::{
     collections::HashMap,
-    fmt::Display,
+    fmt::{Display, Write},
     ops::{Index, Not, Range},
+    str::from_utf8,
     sync::{Arc, OnceLock, Weak},
 };
 
 use chrono::NaiveDateTime;
+use encoding_rs::UTF_8;
 use enum_iterator::Sequence;
 use enum_map::{enum_map, Enum, EnumMap};
+use smallstr::SmallString;
 use smallvec::{smallvec, SmallVec};
 
 use crate::{
-    format::{Format, Settings as FormatSettings},
+    dictionary::Value as DataValue,
+    format::{Format, Settings as FormatSettings, Type, UncheckedFormat},
     settings::{Settings, Show},
 };
 
@@ -753,6 +757,9 @@ impl Rect2 {
             Axis2::Y => y_range.clone(),
         })
     }
+    fn for_cell(cell: Coord2) -> Self {
+        Self::new(cell.x()..cell.x() + 1, cell.y()..cell.y() + 1)
+    }
     fn for_ranges((a, a_range): (Axis2, Range<usize>), b_range: Range<usize>) -> Self {
         let b = !a;
         let mut ranges = EnumMap::default();
@@ -927,11 +934,61 @@ impl PivotTable {
 
 #[derive(Clone, Debug)]
 pub struct Footnote {
+    index: usize,
     content: Value,
-    marker: Value,
+    marker: Option<Value>,
     show: bool,
 }
 
+impl Footnote {
+    pub fn display_marker<'a, 'b>(&'a self, table: &'b PivotTable) -> DisplayMarker<'a, 'b> {
+        DisplayMarker {
+            footnote: self,
+            table,
+        }
+    }
+
+    pub fn display_content<'a, 'b>(&'a self, table: &'b PivotTable) -> DisplayValue<'a, 'b> {
+        self.content.display(table)
+    }
+}
+
+pub struct DisplayMarker<'a, 'b> {
+    footnote: &'a Footnote,
+    table: &'b PivotTable,
+}
+
+impl<'a, 'b> Display for DisplayMarker<'a, 'b> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if let Some(marker) = &self.footnote.marker {
+            write!(f, "{}", marker.display(self.table).without_suffixes())
+        } else {
+            let i = self.footnote.index + 1;
+            match self.table.look.footnote_marker_type {
+                FootnoteMarkerType::Alphabetic => write!(f, "{}", Display26Adic(i)),
+                FootnoteMarkerType::Numeric => write!(f, "{i}"),
+            }
+        }
+    }
+}
+
+pub struct Display26Adic(pub usize);
+
+impl Display for Display26Adic {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let mut output = SmallVec::<[u8; 16]>::new();
+        let mut number = self.0;
+        while number > 0 {
+            number -= 1;
+            let digit = (number % 26) as u8;
+            output.push(digit + b'a');
+            number /= 26;
+        }
+        output.reverse();
+        write!(f, "{}", from_utf8(&output).unwrap())
+    }
+}
+
 /// The content of a single pivot table cell.
 ///
 /// A [Value] is also a pivot table's title, caption, footnote marker and
@@ -973,66 +1030,206 @@ pub struct Footnote {
 ///    interpret those created by SPSS.
 #[derive(Clone, Debug)]
 pub struct Value {
-    styling: Option<Box<ValueStyle>>,
     inner: ValueInner,
+    styling: Option<Box<ValueStyle>>,
+}
+
+impl Value {
+    pub fn new_user_text(s: impl Into<String>) -> Self {
+        let s: String = s.into();
+        Self {
+            inner: ValueInner::Text {
+                user_provided: true,
+                local: s.clone(),
+                c: s.clone(),
+                id: s.clone(),
+            },
+            styling: None,
+        }
+    }
 }
 
 pub struct DisplayValue<'a, 'b> {
     value: &'a Value,
     table: &'b PivotTable,
+
+    /// Whether to show subscripts and footnotes (which follow the body).
+    show_suffixes: bool,
+}
+
+impl<'a, 'b> DisplayValue<'a, 'b> {
+    pub fn without_suffixes(self) -> Self {
+        Self {
+            show_suffixes: false,
+            ..self
+        }
+    }
+
+    fn show(&self) -> (bool, Option<&String>) {
+        match &self.value.inner {
+            ValueInner::Number {
+                value_label: None, ..
+            }
+            | ValueInner::String {
+                value_label: None, ..
+            }
+            | ValueInner::Variable {
+                variable_label: None,
+                ..
+            }
+            | ValueInner::Text { .. }
+            | ValueInner::Template { .. } => (true, None),
+
+            ValueInner::Number {
+                show,
+                value_label: Some(label),
+                ..
+            }
+            | ValueInner::String {
+                show,
+                value_label: Some(label),
+                ..
+            } => interpret_show(
+                || Settings::global().show_values,
+                self.table.show_values,
+                *show,
+                label,
+            ),
+
+            ValueInner::Variable {
+                show,
+                variable_label: Some(label),
+                ..
+            } => interpret_show(
+                || Settings::global().show_variables,
+                self.table.show_variables,
+                *show,
+                label,
+            ),
+        }
+    }
+
+    fn template(
+        &self,
+        f: &mut std::fmt::Formatter<'_>,
+        template: &str,
+        args: &Vec<Vec<Value>>,
+    ) -> std::fmt::Result {
+        let mut iter = template.as_bytes().iter();
+        while let Some(c) = iter.next() {
+            match c {
+                b'\\' => {
+                    let c = *iter.next().unwrap_or(&b'\\') as char;
+                    let c = if c == 'n' { '\n' } else { c };
+                    write!(f, "{c}")?;
+                }
+                b'^' => {
+                    let (index, rest) = consume_int(iter.as_slice());
+                    if (1..=args.len()).contains(&index) && !args[index - 1].is_empty() {
+                        write!(f, "{}", args[index - 1][0].display(&self.table))?;
+                    }
+                    iter = rest.iter();
+                }
+                b'[' => {
+                    let (a, rest) = extract_inner_template(iter.as_slice());
+                    let (b, rest) = extract_inner_template(rest);
+                    let rest = rest.strip_prefix(b"]").unwrap_or(rest);
+                    iter = rest.iter();
+                }
+                c => write!(f, "{c}")?,
+            }
+        }
+        todo!()
+    }
+}
+
+fn consume_int(input: &[u8]) -> (usize, &[u8]) {
+    let mut n = 0;
+    for (index, c) in input.iter().enumerate() {
+        if !c.is_ascii_digit() {
+            return (n, &input[index..]);
+        }
+        n = n * 10 + (c - b'0') as usize;
+    }
+    (n, &[])
+}
+
+fn extract_inner_template(input: &[u8]) -> (&[u8], &[u8]) {
+    todo!()
 }
 
 fn interpret_show(
     global_show: impl Fn() -> Show,
     table_show: Option<Show>,
     value_show: Option<Show>,
-    has_label: bool,
-) -> Show {
-    if !has_label {
-        Show::Value
-    } else {
-        value_show.or(table_show).unwrap_or_else(global_show)
+    label: &String,
+) -> (bool, Option<&String>) {
+    match value_show.or(table_show).unwrap_or_else(global_show) {
+        Show::Value => (true, None),
+        Show::Label => (false, Some(label)),
+        Show::Both => (true, Some(label)),
     }
 }
 
 impl<'a, 'b> Display for DisplayValue<'a, 'b> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let (show_value, label) = self.show();
         match &self.value.inner {
             ValueInner::Number {
-                show,
                 format,
                 honor_small,
                 value,
-                var_name,
-                value_label,
+                ..
             } => {
-                let show = interpret_show(
-                    || Settings::global().show_values,
-                    self.table.show_values,
-                    *show,
-                    value_label.is_some(),
-                );
-                todo!()
+                if show_value {
+                    let format = if format.type_() == Type::F
+                        && *honor_small
+                        && value.is_some_and(|value| value != 0.0 && value.abs() < self.table.small)
+                    {
+                        UncheckedFormat::new(Type::E, 40, format.d() as u8).fix()
+                    } else {
+                        *format
+                    };
+                    let mut buf = SmallString::<[u8; 40]>::new();
+                    write!(
+                        &mut buf,
+                        "{}",
+                        DataValue::Number(*value).display(format, UTF_8)
+                    )
+                    .unwrap();
+                    write!(f, "{}", buf.trim_start_matches(' '))?;
+                }
+                if let Some(label) = label {
+                    if show_value {
+                        write!(f, " ")?;
+                    }
+                    f.write_str(label)?;
+                }
+                Ok(())
             }
-            ValueInner::String {
-                show,
-                hex,
-                s,
-                var_name,
-                value_label,
-            } => todo!(),
-            ValueInner::Variable {
-                show,
-                var_name,
-                value_label,
-            } => todo!(),
-            ValueInner::Text {
-                user_provided,
-                local,
-                c,
-                id,
-            } => todo!(),
-            ValueInner::Template { args, local, id } => todo!(),
+
+            ValueInner::String { s, .. } | ValueInner::Variable { var_name: s, .. } => {
+                match (show_value, label) {
+                    (true, None) => write!(f, "{s}"),
+                    (false, Some(label)) => write!(f, "{label}"),
+                    (true, Some(label)) => write!(f, "{s} {label}"),
+                    (false, None) => unreachable!(),
+                }
+            }
+
+            ValueInner::Text { local, .. } => {
+                if self
+                    .value
+                    .styling
+                    .as_ref()
+                    .is_some_and(|styling| styling.font_style.markup)
+                {
+                    todo!();
+                }
+                f.write_str(&local)
+            }
+
+            ValueInner::Template { args, local, .. } => self.template(f, &local, args),
         }
     }
 }
@@ -1042,7 +1239,11 @@ impl Value {
     // superscripts and footnotes.  Settings on `table` control whether variable
     // and value labels are included.
     fn display<'a, 'b>(&'a self, table: &'b PivotTable) -> DisplayValue<'a, 'b> {
-        DisplayValue { value: self, table }
+        DisplayValue {
+            value: self,
+            table,
+            show_suffixes: true,
+        }
     }
 }
 
@@ -1052,21 +1253,24 @@ pub enum ValueInner {
         show: Option<Show>,
         format: Format,
         honor_small: bool,
-        value: f64,
+        value: Option<f64>,
         var_name: Option<String>,
         value_label: Option<String>,
     },
     String {
         show: Option<Show>,
         hex: bool,
-        s: Option<String>,
+
+        /// If `hex` is true, this string should already be hex digits
+        /// (otherwise it would be impossible to encode non-UTF-8 data).
+        s: String,
         var_name: Option<String>,
         value_label: Option<String>,
     },
     Variable {
         show: Option<Show>,
-        var_name: Option<String>,
-        value_label: Option<String>,
+        var_name: String,
+        variable_label: Option<String>,
     },
     Text {
         user_provided: bool,
index 62183b091e2982560f7129eff1e184e6bbd31fd6..7c136bcec99636d9df41c149c67a74ad7152144f 100644 (file)
@@ -1,4 +1,4 @@
-use std::{ops::Range, sync::Arc};
+use std::sync::Arc;
 
 use enum_map::{enum_map, EnumMap};
 use smallvec::{SmallVec, ToSmallVec};
@@ -7,7 +7,7 @@ use crate::output::table::{CellInner, Table};
 
 use super::{
     Area, Axis, Axis2, Axis3, Border, BorderStyle, BoxBorder, Category, CategoryTrait, Color,
-    Coord2, Dimension, PivotTable, Rect2, RowColBorder, Stroke, Value,
+    Coord2, Dimension, Footnote, PivotTable, Rect2, RowColBorder, Stroke, Value,
 };
 
 /// All of the combinations of dimensions along an axis.
@@ -131,7 +131,7 @@ impl PivotTable {
         })
     }
 
-    pub fn output(&self, layer_indexes: &[usize], printing: bool) {
+    pub fn output(&self, layer_indexes: &[usize], printing: bool) -> OutputTables {
         let column_enumeration = self.enumerate_axis(Axis3::X, layer_indexes, self.look.omit_empty);
         let row_enumeration = self.enumerate_axis(Axis3::Y, layer_indexes, self.look.omit_empty);
         let data = Coord2::new(column_enumeration.len(), row_enumeration.len());
@@ -194,11 +194,7 @@ impl PivotTable {
         {
             body.put(
                 Rect2::new(0..stub.x(), 0..stub.y()),
-                CellInner {
-                    rotate: false,
-                    area: Area::Corner,
-                    value: self.corner_text.clone(),
-                },
+                CellInner::new(Area::Corner, self.corner_text.clone()),
             );
         }
 
@@ -227,11 +223,7 @@ impl PivotTable {
             let mut title = self.create_aux_table(Coord2::new(0, 0));
             title.put(
                 Rect2::new(0..1, 0..1),
-                CellInner {
-                    rotate: false,
-                    area: Area::Title,
-                    value: self.title.clone(),
-                },
+                CellInner::new(Area::Title, self.title.clone()),
             );
             Some(title)
         } else {
@@ -240,19 +232,103 @@ impl PivotTable {
 
         // Layers.
         let n_layers: usize = self.nonempty_layer_dimensions().count();
-/*
         let layers = if n_layers > 0 {
             let mut layers = self.create_aux_table(Coord2::new(1, n_layers));
             for (y, dimension) in self.nonempty_layer_dimensions().enumerate() {
-                
+                layers.put(
+                    Rect2::for_cell(Coord2::new(0, y)),
+                    CellInner::new(
+                        Area::Layers,
+                        Some(dimension.data_leaves[layer_indexes[y]].name.clone()),
+                    ),
+                );
+            }
+            Some(layers)
+        } else {
+            None
+        };
+
+        // Caption.
+        let caption = if self.caption.is_some() && self.show_caption {
+            let mut caption = self.create_aux_table(Coord2::new(1, 1));
+            caption.put(
+                Rect2::for_cell(Coord2::new(0, 0)),
+                CellInner::new(Area::Caption, self.caption.clone()),
+            );
+            Some(caption)
+        } else {
+            None
+        };
+
+        // Footnotes.
+        let f = self.collect_footnotes(&[
+            title.as_ref(),
+            layers.as_ref(),
+            Some(&body),
+            caption.as_ref(),
+        ]);
+        let footnotes = if !f.is_empty() {
+            let mut footnotes = self.create_aux_table(Coord2::new(1, f.len()));
+            for (y, f) in f.into_iter().enumerate() {
+                let s = format!("{}. {}", f.display_marker(self), f.display_content(self));
+                let value = Some(Value::new_user_text(s));
+                footnotes.put(
+                    Rect2::for_cell(Coord2::new(0, y)),
+                    CellInner::new(Area::Footer, value),
+                );
             }
-            todo!()
-        } else { None };*/
+            Some(footnotes)
+        } else {
+            None
+        };
+
+        OutputTables {
+            title,
+            layers,
+            body,
+            caption,
+            footnotes,
+        }
     }
 
     fn nonempty_layer_dimensions(&self) -> impl Iterator<Item = &Dimension> {
-        self.axes[Axis3::Z].dimensions.iter().rev().filter(|d| !d.data_leaves.is_empty())
+        self.axes[Axis3::Z]
+            .dimensions
+            .iter()
+            .rev()
+            .filter(|d| !d.data_leaves.is_empty())
     }
+
+    fn collect_footnotes<'a>(&'a self, tables: &[Option<&Table>]) -> Vec<&'a Footnote> {
+        if self.footnotes.is_empty() {
+            return Vec::new();
+        }
+
+        let mut refs = vec![false; self.footnotes.len()];
+        for table in tables.into_iter().flatten() {
+            table.visit_cells(|inner| {
+                if let Some(value) = &inner.value {
+                    if let Some(styling) = &value.styling {
+                        for index in &styling.footnote_indexes {
+                            refs[*index] = true;
+                        }
+                    }
+                }
+            });
+        }
+        refs.iter()
+            .enumerate()
+            .filter_map(|(index, r)| (*r).then_some(&self.footnotes[index]))
+            .collect()
+    }
+}
+
+pub struct OutputTables {
+    pub title: Option<Table>,
+    pub layers: Option<Table>,
+    pub body: Table,
+    pub caption: Option<Table>,
+    pub footnotes: Option<Table>,
 }
 
 fn find_category<'a>(
@@ -270,28 +346,6 @@ fn find_category<'a>(
     Some(c)
 }
 
-fn fill_cell(
-    table: &mut Table,
-    h: Axis2,
-    h_range: Range<usize>,
-    v_range: Range<usize>,
-    area: Area,
-    value: &Value,
-    rotate: bool,
-) {
-    let mut region = EnumMap::default();
-    region[h] = h_range;
-    region[!h] = v_range;
-    table.put(
-        region.into(),
-        CellInner {
-            rotate,
-            area,
-            value: Some(value.clone()),
-        },
-    );
-}
-
 /// Fills row or column headings into T.
 ///
 /// This function uses terminology and variable names for column headings, but
@@ -488,11 +542,7 @@ fn compose_headings(
             if d.root.show_label_in_corner && h_ofs > 0 {
                 table.put(
                     Rect2::for_ranges((h, 0..h_ofs), top_row..top_row + d.label_depth),
-                    CellInner {
-                        rotate: false,
-                        area: Area::Corner,
-                        value: Some(d.root.name.clone()),
-                    },
+                    CellInner::new(Area::Corner, Some(d.root.name.clone())),
                 );
             }
 
index 53f0e29a90ec854889a979c3dbc50d4859d839b5..ae40092e3aaf2408884f27fb347b51cadc6b43c1 100644 (file)
@@ -59,6 +59,16 @@ pub struct CellInner {
     pub value: Option<Value>,
 }
 
+impl CellInner {
+    pub fn new(area: Area, value: Option<Value>) -> Self {
+        Self {
+            rotate: false,
+            area,
+            value,
+        }
+    }
+}
+
 /// A table.
 pub struct Table {
     /// Number of rows and columns.
@@ -103,6 +113,10 @@ impl Table {
         pos.x() + self.n.x() * pos.y()
     }
 
+    pub fn get(&self, pos: Coord2) -> &Content {
+        &self.contents[self.offset(pos)]
+    }
+
     pub fn put(&mut self, region: Rect2, inner: CellInner) {
         use Axis2::*;
         if region[X].len() == 1 && region[Y].len() == 1 {
@@ -148,4 +162,27 @@ impl Table {
             Axis2::Y => self.v_line(border, a_value, b_range),
         }
     }
+
+    /// Visits all the nonempty cells once.
+    pub fn visit_cells(&self, mut f: impl FnMut(&CellInner)) {
+        for y in 0..self.n.y() {
+            let mut x = 0;
+            while x < self.n.x() {
+                let content = self.get(Coord2::new(x, y));
+                match content {
+                    Content::Empty => {
+                        x += 1;
+                    }
+                    Content::Value(cell_inner) => {
+                        f(&cell_inner);
+                        x += 1;
+                    }
+                    Content::Join(cell) => {
+                        f(&cell.inner);
+                        x = cell.region[Axis2::X].end;
+                    }
+                }
+            }
+        }
+    }
 }