From f3f1cda399b9c72553d46980ee6d43d84cb936df Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 17 Aug 2024 23:49:11 -0700 Subject: [PATCH] work on output --- rust/Cargo.lock | 21 ++ rust/Cargo.toml | 1 + rust/src/lib.rs | 1 + rust/src/output/mod.rs | 58 ++++++ rust/src/output/pivot/mod.rs | 389 +++++++++++++++++++++++++++++++++++ 5 files changed, 470 insertions(+) create mode 100644 rust/src/output/mod.rs create mode 100644 rust/src/output/pivot/mod.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 2ebc35eb62..ab286d0aa1 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -170,6 +170,26 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-map" +version = "2.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9" +dependencies = [ + "enum-map-derive", +] + +[[package]] +name = "enum-map-derive" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -528,6 +548,7 @@ dependencies = [ "clap", "diff", "encoding_rs", + "enum-map", "finl_unicode", "flate2", "float_next_after", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 4b4aba6b3f..c4e92cfe72 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -26,6 +26,7 @@ utf8-decode = "1.0.1" bitflags = "2.5.0" unicode-width = "0.1.13" chardetng = "0.1.17" +enum-map = "2.7.3" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 46fe08622a..b75411955f 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -6,6 +6,7 @@ pub mod endian; pub mod format; pub mod identifier; pub mod locale_charset; +pub mod output; #[allow(unused_variables, unused_mut, dead_code)] pub mod raw; pub mod sack; diff --git a/rust/src/output/mod.rs b/rust/src/output/mod.rs new file mode 100644 index 0000000000..944cbe75d9 --- /dev/null +++ b/rust/src/output/mod.rs @@ -0,0 +1,58 @@ +use std::sync::Arc; + +use self::pivot::Value; + +pub mod pivot; + +/// A single output item. +pub struct Item { + /// The localized label for the item that appears in the outline pane in the + /// output viewer and in PDF outlines. This is `None` if no label has been + /// explicitly set. + label: Option, + + /// A locale-invariant identifier for the command that produced the output, + /// which may be `None` if unknown or if a command did not produce this + /// output. + command_name: Option, + + /// For a group item, this is true if the group's subtree should + /// be expanded in an outline view, false otherwise. + /// + /// For other kinds of output items, this is true to show the item's + /// content, false to hide it. The item's label is always shown in an + /// outline view. + show: bool, + + /// Item details. + details: Details, +} + +pub enum Details { + Chart, + Image, + Group(Vec>), + Message, + Table, + Text(Text), +} + +pub struct Text { + type_: TextType, + + content: Value, +} + +pub enum TextType { + /// `TITLE` and `SUBTITLE` commands. + PageTitle, + + /// Title, + Title, + + /// Syntax printback logging. + Syntax, + + /// Other logging. + Log, +} diff --git a/rust/src/output/pivot/mod.rs b/rust/src/output/pivot/mod.rs new file mode 100644 index 0000000000..a46154a938 --- /dev/null +++ b/rust/src/output/pivot/mod.rs @@ -0,0 +1,389 @@ +//! Pivot tables. +//! +//! Pivot tables are PSPP's primary form of output. They are analogous to the +//! pivot tables you might be familiar with from spreadsheets and databases. +//! See for a brief introduction to +//! the overall concept of a pivot table. +//! +//! In PSPP, the most important internal pieces of a pivot table are: +//! +//! - Title. Every pivot table has a title that is displayed above it. It also +//! has an optional caption (displayed below it) and corner text (displayed in +//! the upper left corner). +//! +//! - Dimensions. A dimension consists of zero or more categories. A category +//! has a label, such as "df" or "Asymp. Sig." or 123 or a variable name. The +//! categories are the leaves of a tree whose non-leaf nodes form groups of +//! categories. The tree always has a root group whose label is the name of +//! the dimension. +//! +//! - Axes. A table has three axes: column, row, and layer. Each dimension is +//! assigned to an axis, and each axis has zero or more dimensions. When an +//! axis has more than one dimension, they are ordered from innermost to +//! outermost. +//! +//! - Data. A table's data consists of zero or more cells. Each cell maps from +//! a category for each dimension to a value, which is commonly a number but +//! could also be a variable name or an arbitrary text string. +//! +//! Creating a pivot table usually consists of the following steps: +//! +//! 1. Create the table with pivot_table_create(), passing in the title. +//! +//! 2. Optionally, set the format to use for "count" values with +//! pivot_table_set_weight_var() or pivot_table_set_weight_format(). +//! +//! 3. Create each dimension with pivot_dimension_create() and populate it with +//! categories and, possibly, with groups that contain the categories. This +//! call also assigns the dimension to an axis. +//! +//! In simple cases, only a call to pivot_dimension_create() is needed. +//! Other functions such as pivot_category_create_group() can be used for +//! hierarchies of categories. +//! +//! Sometimes it's easier to create categories in tandem with inserting data, +//! for example by adding a category for a variable just before inserting the +//! first cell for that variable. In that case, creating categories and +//! inserting data can be interleaved. +//! +//! 4. Insert data. For each cell, supply the category indexes, which are +//! assigned starting from 0 in the order in which the categories were +//! created in step 2, and the value to go in the cell. If the table has a +//! small, fixed number of dimensions, functions like, e.g. +//! pivot_table_put3() for 3 dimensions, can be used. The general function +//! pivot_table_put() works for other cases. +//! +//! 5. Output the table for user consumption. Use pivot_table_submit(). + +use std::{collections::HashMap, ops::Range, sync::Arc}; + +use enum_map::{Enum, EnumMap}; + +use crate::format::Spec; + +/// Areas of a pivot table for styling purposes. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Area { + Title, + Caption, + + /// Footnotes, + Footer, + + // Top-left corner. + Corner, + + ColumnLabels, + RowLabels, + Data, + + /// Layer indication. + Layers, +} + +/// Table borders for styling purposes. +pub enum Border { + Title, + OuterFrame(BoxBorder), + InnerFrame(BoxBorder), + Dimensions(RowColBorder), + Categories(RowColBorder), + DataLeft, + DataTop, +} + +/// The borders on a box. +pub enum BoxBorder { + Left, + Top, + Right, + Bottom, +} + +/// Borders between rows and columns. +pub enum RowColBorder { + RowHorz, + RowVert, + ColHorz, + ColVert, +} + +/// Sizing for rows or columns of a rendered table. +/// +/// The comments below talk about columns and their widths but they apply +/// equally to rows and their heights. +pub struct Sizing { + /// Specific column widths, in 1/96" units. + widths: Vec, + + /// Specific page breaks: 0-based columns after which a page break must + /// occur, e.g. a value of 1 requests a break after the second column. + breaks: Vec, + + /// Keeps: columns to keep together on a page if possible. + keeps: Vec>, +} + +#[derive(Enum)] +pub enum Axis { + Layer, + Row, + Column, +} + +/// An axis within a pivot table. +pub struct TableAxis { + /// `dimensions[0]` is the innermost dimension. + dimensions: Vec, + + /// The number of rows or columns along the axis, that is, the product of + /// `dimensions[*].n_leaves`. It is 0 if any dimension has 0 leaves. + extent: usize, + + /// Sum of `dimensions[*].label_depth`. + label_depth: usize, +} + +/// Dimensions. +/// +/// A [Dimension] identifies the categories associated with a single dimension +/// within a multidimensional pivot table. +/// +/// A dimension contains a collection of categories, which are the leaves in a +/// tree of groups. +/// +/// (A dimension or a group can contain zero categories, but this is unusual. +/// If a dimension contains no categories, then its table cannot contain any +/// data.) +pub struct Dimension { + axis_type: Axis, + level: usize, + + top_index: usize, + + /// Hierarchy of categories within the dimension. The groups and categories + /// are sorted in the order that should be used for display. This might be + /// different from the original order produced for output if the user + /// adjusted it. + /// + /// The root must always be a group, although it is allowed to have no + /// subcategories. + root: Arc, + + /// All of the leaves reachable via the root. + /// + /// The indexing for presentation_leaves is presentation order, thus + /// `presentation_leaves[i]->presentation_index == i`. This order is the + /// same as would be produced by an in-order traversal of the groups. It + /// is the order into which the user reordered or sorted the categories. + /// + /// The indexing for `data_leaves` is that used for `idx` in [Cell], thus + /// `data_leaves[i]->data_index == i`. This might differ from what an + /// in-order traversal of `root` would yield, if the user reordered + /// categories. + data_leaves: Vec>, + presentation_leaves: Vec>, + + /// Display. + hide_all_labels: bool, + + /// Number of rows or columns needed to express the labels. + label_depth: usize, +} + +/// A pivot_category is a leaf (a category) or a group. +pub struct Category { + name: Value, + label_depth: usize, + extra_depth: usize, + type_: CategoryType, +} + +pub enum CategoryType { + Group { + /// The child categories. + /// + /// A group usually has multiple children, but it is allowed to have + /// only one or even (pathologically) none. + children: Vec>, + + /// Display a label for the group itself? + show_label: bool, + + show_label_in_corner: bool, + }, + Leaf { + group_index: usize, + data_index: usize, + presentation_index: usize, + + /// Default format for values in this category. + format: Spec, + + /// Honor [Table]'s `small` setting? + honor_small: bool, + }, +} + +/// Styling for a pivot table. +/// +/// The division between this and the style information in [Table] seems fairly +/// arbitrary. The ultimate reason for the division is simply because that's +/// how SPSS documentation and file formats do it. +struct Look { + name: Option, + + omit_empty: bool, + row_labels_in_corner: bool, + + /// Range of column widths for columns in the row headings and corner , in 1/96" + /// units. + row_heading_widths: Range, + + /// Range of column widths for columns in the column headings , in 1/96" + /// units. + col_heading_widths: Range, + + /// Kind of markers to use for footnotes. + footnote_marker_type: FootnoteMarkerType, + + /// Where to put the footnote markers. + footnote_marker_position: FootnoteMarkerPosition, + // XXX and so on +} + +pub enum FootnoteMarkerType { + /// a, b, c, ... + Alphabetic, + + /// 1, 2, 3, ... + Numeric, +} + +pub enum FootnoteMarkerPosition { + /// Subscripts. + Subscript, + + /// Superscripts. + Superscript, +} + +pub struct Table { + look: Arc, + + rotate_inner_column_labels: bool, + + rotate_outer_row_labels: bool, + + show_grid_lines: bool, + + show_title: bool, + + show_caption: bool, + + show_value: Option, + + show_variables: Option, + + weight_format: Spec, + + /// Current layer indexes, with axes[PIVOT_AXIS_LAYER].n_dimensions + /// elements. current_layer[i] is an offset into + /// axes[PIVOT_AXIS_LAYER].dimensions[i]->data_leaves[], EXCEPT that a + /// dimension can have zero leaves, in which case current_layer[i] is zero + /// and there's no corresponding leaf. + current_layer: Vec, + + /// Column sizing and page breaks. + column_sizing: Sizing, + + /// Row sizing and page breaks. + row_sizing: Sizing, + + // XXX format settings + /// Numeric grouping character (usually `.` or `,`). + grouping: char, + + small: f64, + + command_local: Option, + command_c: Option, + language: Option, + locale: Option, + dataset: Option, + datafile: Option, + //XXX date + footnotes: Vec, + title: Value, + subtype: Value, + corner_text: Value, + caption: Value, + notes: Option, + dimensions: Vec, + axes: EnumMap, + cells: HashMap, +} + +/// Whether to show variable or value labels or the underlying value or variable name. +pub enum ValueShow { + /// Value or variable name only. + Value, + + /// Label only. + Label, + + /// Value and label. + Both, +} + +pub struct Footnote { + content: Value, + marker: Value, + show: bool, +} + +pub struct Value { + styling: Option>, + inner: ValueInner, +} + +pub enum ValueInner { + Numeric { + show: ValueShow, + format: Spec, + honor_small: bool, + value: f64, + var_name: Option, + value_label: Option, + }, + String { + show: ValueShow, + hex: bool, + s: Option, + var_name: Option, + value_label: Option, + }, + Variable { + show: ValueShow, + var_name: Option, + value_label: Option, + }, + Text { + user_provided: bool, + /// Localized. + local: String, + /// English. + c: String, + /// Identifier. + id: String, + }, + Template { + args: Vec>, + local: String, + id: String, + }, +} + +pub struct ValueStyle { + // XXX +} -- 2.30.2