From: Ben Pfaff Date: Thu, 25 Sep 2025 14:50:18 +0000 (-0700) Subject: work on reading spv files X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5b0236fcaa05cd5b7666434955eec751b24ae434;p=pspp work on reading spv files --- diff --git a/rust/Cargo.lock b/rust/Cargo.lock index f67759c596..20418374eb 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -625,6 +625,12 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + [[package]] name = "either" version = "1.15.0" @@ -1014,6 +1020,21 @@ dependencies = [ "digest", ] +[[package]] +name = "html_parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f56db07b6612644f6f7719f8ef944f75fff9d6378fdf3d316fd32194184abd" +dependencies = [ + "doc-comment", + "pest", + "pest_derive", + "serde", + "serde_derive", + "serde_json", + "thiserror", +] + [[package]] name = "httparse" version = "1.10.1" @@ -1617,6 +1638,49 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pest" +version = "2.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "989e7521a040efde50c3ab6bbadafbe15ab6dc042686926be59ac35d74607df4" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "187da9a3030dbafabbbfb20cb323b976dc7b7ce91fcd84f2f74d6e31d378e2de" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49b401d98f5757ebe97a26085998d6c0eecec4995cad6ab7fc30ffdf4b052843" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "pest_meta" +version = "2.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f27a2cfee9f9039c4d86faa5af122a0ac3851441a34865b8a043b46be0065a" +dependencies = [ + "pest", + "sha2", +] + [[package]] name = "pin-project" version = "1.1.10" @@ -1749,6 +1813,7 @@ dependencies = [ "flate2", "hashbrown 0.15.5", "hexplay", + "html_parser", "indexmap", "itertools 0.14.0", "libc", @@ -1764,6 +1829,7 @@ dependencies = [ "readpass", "serde", "serde_json", + "serde_path_to_error", "smallstr", "smallvec", "thiserror", @@ -1953,18 +2019,28 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -1983,6 +2059,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_repr" version = "0.1.20" @@ -2023,6 +2110,17 @@ dependencies = [ "digest", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2428,6 +2526,12 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "unicase" version = "2.8.1" diff --git a/rust/doc/src/SUMMARY.md b/rust/doc/src/SUMMARY.md index 4c10ae8220..66f6dd5e19 100644 --- a/rust/doc/src/SUMMARY.md +++ b/rust/doc/src/SUMMARY.md @@ -9,6 +9,7 @@ - [Inspecting Portable Files](invoking/pspp-show-por.md) - [Inspecting SPSS/PC+ Files](invoking/pspp-show-pc.md) - [Decrypting Files](invoking/pspp-decrypt.md) + - [Output Driver Configuration](invoking/output.md) # Language Overview diff --git a/rust/doc/src/invoking/output.md b/rust/doc/src/invoking/output.md new file mode 100644 index 0000000000..223e1654a4 --- /dev/null +++ b/rust/doc/src/invoking/output.md @@ -0,0 +1,51 @@ +# Output Driver Configuration + +PSPP can write output in several formats. This section documents the +supported formats and how they can be configured. + +# Text Output (`.txt` and `.text`) + +PSPP can produce plain text output, drawing boxes using ASCII or +Unicode line drawing characters. + +Plain text output is encoded in UTF-8. + +This driver has the following options: + +* `width: ` + Sets the maximum page width to the specified number of columns. To + fit in the given width, output tables columns will be word-wrapped + or, if necessary, tables will be broken into multiple chunks. The + default is no maximum width. + +* `boxes: unicode` + `boxes: ascii` + Sets the style used for boxes in the output. The following shows an + example of each style: + + ``` + unicode ascii + ┌────┬────┐ +----+----+ + │ │ │ | | | + ├────┼────┤ +----+----+ + │ │ │ | | | + └────┴────┘ +----+----+ + ``` + + Unicode boxes are generally more attractive but they can be harder + to work with in some environments. The default is `unicode`. + +* `emphasis: ` + If this is set to true, then the output includes bold and underline + emphasis with overstriking. This is supported by only some + software, mainly on Unix. The default is `false`. + +# PDF Output (`.pdf`) + +# HTML Output (`.htm` and `.html`) + +# CVS Output (`.csv`) + +# JSON Output (`.json`) + +# SPSS Viewer Output (`.spv`) diff --git a/rust/doc/src/invoking/pspp-convert.md b/rust/doc/src/invoking/pspp-convert.md index d1248cd67f..c9dfaf63e5 100644 --- a/rust/doc/src/invoking/pspp-convert.md +++ b/rust/doc/src/invoking/pspp-convert.md @@ -1,33 +1,47 @@ -# Converting data files with `pspp convert` +# Converting file formats with `pspp convert` -The `pspp convert` command reads data from one file and writes it to -another. The basic syntax is: +The `pspp convert` command reads SPSS data and viewer files and writes +them out in other formats. The basic syntax is: ``` pspp convert [OUTPUT] ``` -which reads an SPSS system file or portable file or SPSS/PC+ system -file from `` and writes a copy of it to `[OUTPUT]`. If -`[OUTPUT]` is omitted, output is written to the terminal. +which reads an input file from `` and writes a copy of it to +`[OUTPUT]`. If `[OUTPUT]` is omitted, output is written to the +terminal. -If `[OUTPUT]` is specified, then `pspp convert` tries to guess the +The following sections describe how `pspp convert` works with +different kinds of files. + +## Converting `.sav`, `.por`, and `.sys` Data Files + +`pspp convert` can convert SPSS system files (`.sav`), SPSS portable +files (`.por`), and SPSS/PC+ system files (`.sys`) into different +formats. + +If an output file is named, then `pspp convert` tries to guess the output format based on its extension: -* `csv` - `txt` +* `.csv` + `.txt` Comma-separated value. Each value is formatted according to its variable's print format. The first line in the file contains variable names. -* `sav` - `sys` +* `.sav` + `.sys` SPSS system file. Without an output file name, the default output format is CSV. Use `-O ` to override the default or to specify the format for unrecognized extensions. +## Converting `.spv` Viewer Files + +`pspp convert` can convert SPSS viewer files (`.spv` files) into +multiple different formats. + ## Options `pspp convert` accepts the following general options: diff --git a/rust/doc/src/spv/light-detail.md b/rust/doc/src/spv/light-detail.md index 17337d0a8c..6af5c0badc 100644 --- a/rust/doc/src/spv/light-detail.md +++ b/rust/doc/src/spv/light-detail.md @@ -56,14 +56,14 @@ context-free grammar using the following conventions: `bestring` A 32-bit unsigned integer, in little-endian or big-endian byte order, respectively, followed by the specified number of bytes of - character data. (The encoding is indicated by the Formats - nonterminal.) + character data. (The encoding is indicated by the + [`Formats`](#formats) nonterminal.) * `X?` - X is optional, e.g. 00? is an optional zero byte. + X is optional, e.g. `00?` is an optional zero byte. * `X*N` - X is repeated N times, e.g. byte*10 for ten arbitrary bytes. + X is repeated N times, e.g. `byte*10` for ten arbitrary bytes. * `X[NAME]` Gives X the specified NAME. Names are used in textual @@ -76,7 +76,7 @@ context-free grammar using the following conventions: * `(X)` Parentheses are used for grouping to make precedence clear, - especially in the presence of |, e.g. in 00 (01 | 02 | 03) 00. + especially in the presence of `|`, e.g. in `00 (01 | 02 | 03) 00`. * `count(X)` `becount(X)` @@ -104,8 +104,10 @@ name="px">"device-independent pixels" (px), at 96/inch. To convert from pt to px, multiply by 1.33 and round up. To convert from px to pt, divide by 1.33 and round down. +## Top-Level Structure + A "light" detail member `.bin` consists of a number of sections -concatenated together, terminated by an optional byte 01: +concatenated together, terminated by an optional byte `01`: ``` Table => @@ -154,15 +156,15 @@ whose values influence column widths. For the purpose of interpreting these values, a table is divided into the three regions shown below: ``` -+------------------+-------------------------------------------------+ -| | column headings | -| +-------------------------------------------------+ -| corner | | -| and | | -| row headings | data | -| | | -| | | -+------------------+-------------------------------------------------+ +┌──────────────────┬─────────────────────────────────────────────────┐ +│ │ column headings │ +│ ├─────────────────────────────────────────────────┤ +│ corner │ │ +│ and │ │ +│ row headings │ data │ +│ │ │ +│ │ │ +└──────────────────┴─────────────────────────────────────────────────┘ ``` `min-col-heading-width` and `max-col-heading-width` apply to the @@ -231,7 +233,7 @@ Footnote => Value[text] (58 | 31 Value[marker]) int32[show] Each footnote has `text` and an optional custom `marker` (such as `*`). -The syntax for Value would allow footnotes (and their markers) to +The syntax for `Value` would allow footnotes (and their markers) to reference other footnotes, but in practice this doesn't work. `show` is a 32-bit signed integer. It is positive to show the @@ -252,12 +254,21 @@ Area => v3(int32[left-margin] int32[right-margin] int32[top-margin] int32[bottom-margin]) ``` -Each `Area` represents the style for a different area of the table, in -the following order: title, caption, footer, corner, column labels, -row labels, data, and layers. - -`index` is the 1-based index of the Area, i.e. 1 for the first `Area`, -through 8 for the final `Area`. +Each `Area` represents the style for a different area of the table. +`index` is the 1-based index of the `Area`, i.e. 1 for the first +`Area`, through 8 for the final `Area`. The following table shows the +`index` values and the areas that they represent: + +| `index` | Area | +|--------:|:--------------| +| 1 | Title | +| 2 | Caption | +| 3 | Footer | +| 4 | Corner | +| 5 | Column labels | +| 6 | Row labels | +| 7 | Data | +| 8 | Layers | `typeface` is the string name of the font used in the area. In the corpus, this is `SansSerif` in over 99% of instances and `Times New @@ -272,13 +283,25 @@ the corpus its values are always integers. `underline` is 1 if the font is underlined, 0 otherwise. -`halign` specifies horizontal alignment: 0 for center, 2 for left, 4 -for right, 61453 for decimal, 64173 for mixed. Mixed alignment varies -according to type: string data is left-justified, numbers and most other -formats are right-justified. +`halign` specifies horizontal alignment: + +| `halign` | Alignment | +|---------:|:----------| +| 0 | Center | +| 2 | Left | +| 4 | Right | +| 64173 | Mixed | + +Mixed alignment varies according to type: string data is +left-justified, numbers and most other formats are right-justified. + +`valign` specifies vertical alignment: -`valign` specifies vertical alignment: 0 for center, 1 for top, 3 for -bottom. +| `valign` | Alignment | +|---------:|:----------| +| 0 | Center | +| 1 | Top | +| 3 | Bottom | `fg-color` and `bg-color` are the foreground color and background color, respectively. In the corpus, these are always `#000000` and @@ -290,7 +313,7 @@ should be the same color. When `alternate` is 1, `alt-fg-color` and are empty strings. `left-margin`, `right-margin`, `top-margin`, and `bottom-margin` are -measured in px. +measured in [px](#px). ## Borders @@ -303,7 +326,7 @@ Borders => 00 00 00) Border => - be32[border-type] + be32[index] be32[stroke-type] be32[color] ``` @@ -315,26 +338,30 @@ The fixed value of `endian` can be used to validate the endianness. `show-grid-lines` is 1 to draw grid lines, otherwise 0. Each `Border` describes one kind of border. `n-borders` seems to -always be 19. Each `border-type` appears once (although in an +always be 19. Each `index` appears once (although in an unpredictable order) and correspond to the following borders: -* 0: Title. -* 1...4: Left, top, right, and bottom outer frame. -* 5...8: Left, top, right, and bottom inner frame. -* 9, 10: Left and top of data area. -* 11, 12: Horizontal and vertical dimension rows. -* 13, 14: Horizontal and vertical dimension columns. -* 15, 16: Horizontal and vertical category rows. -* 17, 18: Horizontal and vertical category columns. +| `index` | Borders | +|--------:|:-------------------------------------------| +| 0 | Title. | +| 1...4 | Left, top, right, and bottom outer frame. | +| 5...8 | Left, top, right, and bottom inner frame. | +| 9, 10 | Left and top of data area. | +| 11, 12 | Horizontal and vertical dimension rows. | +| 13, 14 | Horizontal and vertical dimension columns. | +| 15, 16 | Horizontal and vertical category rows. | +| 17, 18 | Horizontal and vertical category columns. | `stroke-type` describes how a border is drawn, as one of: -* 0: No line. -* 1: Solid line. -* 2: Dashed line. -* 3: Thick line. -* 4: Thin line. -* 5: Double line. +| `stroke-type` | Border style | +|--------------:|:-------------| +| 0 | No line. | +| 1 | Solid line. | +| 2 | Dashed line. | +| 3 | Thick line. | +| 4 | Thin line. | +| 5 | Double line. | `color` is an RGB color. Bits 24-31 are alpha, bits 16-23 are red, 8-15 are green, 0-7 are blue. An alpha of 255 indicates an opaque @@ -484,7 +511,7 @@ widths as manually adjusted by the user. `locale` is a locale including an encoding, such as `en_US.windows-1252` or `it_IT.windows-1252`. (`locale` is often -duplicated in Y1, described below). +duplicated in `Y1`, described below). `epoch` is the year that starts the epoch. A 2-digit year is interpreted as belonging to the 100 years beginning at the epoch. The @@ -509,7 +536,7 @@ A writer may safely use false for `x7`, `x8`, and `x9`. ### X0 -X0 only appears, optionally, in version 1 members. +`X0` only appears, optionally, in version 1 members. ``` X0 => byte*14 Y1 Y2 @@ -556,19 +583,38 @@ X1 => bool[show-caption] ``` -`lang` may indicate the language in use. Some values seem to be 0: -en, 1: de, 2: es, 3: it, 5: ko, 6: pl, 8: zh-tw, 10: pt_BR, 11: fr. - -`show-variables` determines how variables are displayed by default. -A value of 1 means to display variable names, 2 to display variable -labels when available, 3 to display both (name followed by label, -separated by a space). The most common value is 0, which probably means -to use a global default. - -`show-values` is a similar setting for values. A value of 1 means to -display the value, 2 to display the value label when available, 3 to -display both. Again, the most common value is 0, which probably means -to use a global default. +`lang` may indicate the language in use. Some values and their +apparent meanings are: + +| Value | Language | +|------:|---------:| +| 0 | `en` | +| 1 | `de` | +| 2 | `es` | +| 3 | `it` | +| 5 | `ko` | +| 6 | `pl` | +| 8 | `zh-tw` | +| 10 | `pt_BR` | +| 11 | `fr` | + +`show-variables` determines how variables are displayed by default: + +| Value | Meaning | +|------:|:----------------------------------------------------| +| 0 | Use global default (the most common value) | +| 1 | Variable name only | +| 2 | Variable label only (when available) | +| 3 | Both (name followed by label, separated by a space) | + +`show-values` is a similar setting for values: + +| Value | Meaning | +|------:|:-------------------------------------------| +| 0 | Use global default (the most common value) | +| 1 | Value only | +| 2 | Value label only (when available) | +| 3 | Both | `show-title` is 1 to show the caption, 10 to hide it. @@ -597,7 +643,7 @@ The rest of `X2` specifies styles for data cells. At first glance this is odd, because each data cell can have its own style embedded as part of the data, but in practice `X2` specifies a style for a cell only if that cell is empty (and thus does not appear in the data at -all). Each StyleMap specifies the index of a blank cell, calculated +all). Each `StyleMap` specifies the index of a blank cell, calculated the same was as in the [Cells](#cells), along with a 0-based index into the accompanying StylePair array. @@ -645,22 +691,23 @@ optional bytes at the end. ### Encoding -Formats contains several indications of character encoding: +`Formats` contains several indications of character encoding: -- `locale` in Formats itself. +- `locale` in `Formats` itself. -- `locale` in Y1 (in version 1, Y1 is optionally nested inside X0; in -version 3, Y1 is nested inside X3). +- `locale` in `Y1` (in version 1, `Y1` is optionally nested inside +`X0`; in version 3, `Y1` is nested inside `X3`). -- `charset` in version 3, in Y1. +- `charset` in version 3, in `Y1`. - `lang` in X1, in version 3. -`charset`, if present, is a good indication of character encoding, -and in its absence the encoding suffix on `locale` in Formats will work. +`charset`, if present, is a good indication of character encoding, and +in its absence the encoding suffix on `locale` in `Formats` will work. -`locale` in Y1 can be disregarded: it is normally the same as -`locale` in Formats, and it is only present if `charset` is also. +A reader may disregard `locale` in `Y1`, because it is normally the +same as `locale` in `Formats`, and it is only present if `charset` is +also. `lang` is not helpful and should be ignored for character encoding purposes. @@ -708,10 +755,10 @@ many other values have been observed. A writer may safely use 0 for dimensions, and C column dimensions, `x2` is 2 for the first L dimensions, 0 for the next R dimensions, and 1 for the remaining C dimensions. This does not mean that the layer dimensions must be -presented first, followed by the row dimensions, followed by the column -dimensions--on the contrary, they are frequently in a different -order--but `x2` must follow this pattern to prevent the pivot table from -being misinterpreted. +presented first, followed by the row dimensions, followed by the +column dimensions--on the contrary, they are frequently in a different +order—but `x2` must follow this pattern to prevent the pivot table +from being misinterpreted. If `hide-dim-label` is 00, the pivot table displays a label for the dimension itself. Because usually the group and category labels are @@ -726,7 +773,7 @@ the first dimension, 1 for the second, and so on. Sometimes it is -1. There is no visible difference. A writer may safely use the 0-based index. -## Categories +### Categories Categories are arranged in a tree. Only the leaf nodes in the tree are really categories; the others just serve as grouping constructs. @@ -741,21 +788,21 @@ Group => `name` is the name of the category (or group). -A Leaf represents a leaf category. The Leaf's `leaf-index` is a -nonnegative integer unique within the Dimension and less than -`n-categories` in the Dimension. If the user does not sort or rearrange -the categories, then `leaf-index` starts at 0 for the first Leaf in the -dimension and increments by 1 with each successive Leaf. If the user -does sorts or rearrange the categories, then the order of categories in -the file reflects that change and `leaf-index` reflects the original -order. +A `Leaf` represents a leaf category. The `Leaf`'s `leaf-index` is a +nonnegative integer unique within the `Dimension` and less than +`n-categories` in the Dimension. If the user does not sort or +rearrange the categories, then `leaf-index` starts at 0 for the first +`Leaf` in the dimension and increments by 1 with each successive +`Leaf`. If the user does sort or rearrange the categories, then the +order of categories in the file reflects that change and `leaf-index` +reflects the original order. A dimension can have no leaf categories at all. A table that contains such a dimension necessarily has no data at all. -A Group is a group of nested categories. Usually a Group contains at -least one Category, so that `n-subcategories` is positive, but Groups -with zero subcategories have been observed. +A `Group` is a group of nested categories. Usually a `Group` contains +at least one `Category`, so that `n-subcategories` is positive, but +`Group`s with zero subcategories have been observed. If a Group's `merge` is 00, the most common value, then the group is really a distinct group that should be represented as such in the visual @@ -767,8 +814,8 @@ and should not be displayed. (Merged groups can be nested!) Writers need not use merged groups. -A Group's `x23` appears to be `i2` when all of the categories within a -group are leaf categories that directly represent data values for a +A `Group`'s `x23` appears to be `i2` when all of the categories within +a group are leaf categories that directly represent data values for a variable (e.g. in a frequency table or crosstabulation, a group of values in a variable being tabulated) and i0 otherwise. A writer may safely write a constant 0 in this field. @@ -808,11 +855,12 @@ Cells => int32[n-cells] Cell*[n-cells] Cell => int64[index] v1(00?) Value ``` -A Cell consists of an `index` and a Value. Suppose there are \\(d\\) -dimensions, numbered 1 through \\(d\\) in the order given in the [`Dimensions`](#dimensions) -previously, and that dimension \\(i\\) has \\(n_i\\) categories. Consider the cell -at coordinates \\(x_i, 1 \le i \le d\\), and note that \\(0 \le x_i < n_i\\). Then -the index \\(k\\) is calculated by the following algorithm: +A `Cell` consists of an `index` and a Value. Suppose there are +\\(d\\) dimensions, numbered 1 through \\(d\\) in the order given in +the [`Dimension`s](#dimensions) previously, and that dimension \\(i\\) +has \\(n_i\\) categories. Consider the cell at coordinates \\(x_i, 1 +\le i \le d\\), and note that \\(0 \le x_i < n_i\\). Then the index +\\(k\\) is calculated by the following algorithm: > let \\(k = 0\\). @@ -822,7 +870,7 @@ the index \\(k\\) is calculated by the following algorithm: For example, suppose there are 3 dimensions with 3, 4, and 5 categories, respectively. The cell at coordinates (1, 2, 3) has index \\(k = 5 \times (4 \times (3 \times 0 + 1) + 2) + 3 = 33\\). Within a -given dimension, the index is the `leaf-index` in a Leaf. +given dimension, the index is the `leaf-index` in a `Leaf`. ## Value @@ -859,6 +907,9 @@ the first nonzero byte in the encoding. 40 is shown in scientific notation if and only if it is nonzero and its magnitude is less than [`small`](#formats). + Values of 0 or 1 or 0x10000 are sometimes seen as `format`. PSPP + interprets these as F40.2. + Most commonly, `format` has width 40 (the maximum). An `x` with the maximum negative double value `-DBL_MAX` represents @@ -874,9 +925,14 @@ the first nonzero byte in the encoding. latter very commonly. `show` determines whether to show the numeric value or the value - label. A value of 1 means to show the value, 2 to show the label, - 3 to show both, and 0 means to use the default specified in - [`show-values`](#formats). + label: + + | `show` | Meaning | + |-------:|:---------------------------------------------------| + | 0 | Use default specified in [`show-values`](#formats) | + | 1 | Value only | + | 2 | Label only | + | 3 | Both value and label | * `03` A text string, in two forms: `c` is in English, and sometimes @@ -1048,17 +1104,34 @@ the Value in which the `Template` is nested. A writer may safely omit the optional fixed data in `TemplateString`. `FontStyle` and `CellStyle`, if present, change the style for this -individual Value. In `FontStyle`, `bold`, `italic`, and `underline` -control the particular style. `show` is ordinarily 1; if it is 0, then -the cell data is not shown. `fg-color` and `bg-color` are strings in -the format `#rrggbb`, e.g. `#ff0000` for red or `#ffffff` for white. -The empty string is occasionally observed also. The `size` is a font -size in units of 1/128 inch. - -In `CellStyle`, `halign` is 0 for center, 2 for left, 4 for right, 6 -for decimal, 0xffffffad for mixed. For decimal alignment, -`decimal-offset` is the decimal point's offset from the right side of -the cell, in [pt](#pt). `valign` specifies vertical alignment: 0 for -center, 1 for top, 3 for bottom. `left-margin`, `right-margin`, -`top-margin`, and `bottom-margin` are in pt. +individual `Value`. In `FontStyle`, `bold`, `italic`, and `underline` +control the particular style. `show` is ordinarily 1; if it is 0, +then the cell data is not shown. `fg-color` and `bg-color` are +strings in the format `#rrggbb`, e.g. `#ff0000` for red or `#ffffff` +for white. The empty string is occasionally observed also. The +`size` is a font size in units of 1/128 inch. + +In `CellStyle`, `halign` specified horizontal alignment: + +| `halign` | Meaning | +|-----------:|:--------| +| 0 | Center | +| 2 | Left | +| 4 | Right | +| 6 | Decimal | +| 0xffffffad | Mixed | + +For decimal alignment, `decimal-offset` is the decimal point's offset +from the right side of the cell, in [pt](#pt). + +`valign` specifies vertical alignment: + +| `valign` | Meaning | +|---------:|:--------| +| 0 | Center | +| 1 | Top | +| 3 | Bottom | + +`left-margin`, `right-margin`, `top-margin`, and `bottom-margin` are +in [pt](#pt). diff --git a/rust/doc/src/spv/structure.md b/rust/doc/src/spv/structure.md index dbbecf280c..9ba4635f0f 100644 --- a/rust/doc/src/spv/structure.md +++ b/rust/doc/src/spv/structure.md @@ -460,8 +460,21 @@ This element has the following attributes. As on the `heading` element. In the corpus, this is only present for version 21 and up and always includes all 8 digits. -See [Legacy Properties](legacy-detail-xml.md#legacy-properties), for -details on the `tableProperties` element. +This element contains the following: + +* `tableProperties`: See [Legacy + Properties](legacy-detail-xml.md#legacy-properties), for details. + +* `tableStructure`, which in turn contains: + + - Both `path` and `dataPath` for legacy members. + + - `dataPath` but not `path` for light detail binary members. + + - The usage of `csvPath` is rare and not yet understood. + + See [SPSS Viewer File Format](index.md) for more information on how + structure members refer to tables. ## The `graph` Element diff --git a/rust/doc/src/tablelook.md b/rust/doc/src/tablelook.md index 153c3e2b27..91b7b44f07 100644 --- a/rust/doc/src/tablelook.md +++ b/rust/doc/src/tablelook.md @@ -206,12 +206,26 @@ AreaStyle => `AreaStyle` represents style properties of an area. -`valign` is 0 for top alignment, 1 for bottom alginment, 2 for -center. - -`halign` is 0 for left alignment, 1 for right, 2 for center, 3 for -mixed, 4 for decimal. For decimal alignment, `decimal-offset` is the -offset of the decimal point in 20ths of a point. +`valign` has the following values: + +| `valign` | Vertical Alignment | +|---------:|:-------------------| +| 0 | Top | +| 1 | Bottom | +| 2 | Center | + +`halign` has the following values: + +| `halign` | Horizontal Alignment | +|---------:|:---------------------| +| 0 | Left | +| 1 | Right | +| 2 | Center | +| 3 | Mixed | +| 4 | Decimal | + +For decimal alignment, `decimal-offset` is the offset of the decimal +point, in 20ths of a point. `left-margin`, `right-margin`, `top-margin`, and `bottom-margin` are also measured in 20ths of a point. diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 18969a98c7..232a8282b8 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -55,6 +55,8 @@ toml = "0.9.5" hashbrown = { version = "0.15.5", features = ["serde"] } displaydoc = "0.2.5" codepage-437 = "0.1.0" +serde_path_to_error = "0.1.20" +html_parser = "0.7.0" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/convert.rs b/rust/pspp/src/convert.rs index b38ccb3b10..249df65b7a 100644 --- a/rust/pspp/src/convert.rs +++ b/rust/pspp/src/convert.rs @@ -258,7 +258,7 @@ enum OutputFormat { Csv, /// System file - Sys, + Sav, /// Portable file Por, @@ -272,7 +272,7 @@ impl TryFrom<&Path> for OutputFormat { if extension.eq_ignore_ascii_case("csv") || extension.eq_ignore_ascii_case("txt") { Ok(OutputFormat::Csv) } else if extension.eq_ignore_ascii_case("sav") || extension.eq_ignore_ascii_case("sys") { - Ok(OutputFormat::Sys) + Ok(OutputFormat::Sav) } else if extension.eq_ignore_ascii_case("por") { Ok(OutputFormat::Por) } else { @@ -304,7 +304,7 @@ impl Convert { .with_encoding(self.encoding) .with_password(self.password.clone()) .open_file(&self.input)?; - if output_format == OutputFormat::Sys && self.sys_options.to_unicode { + if output_format == OutputFormat::Sav && self.sys_options.to_unicode { system_file = system_file.into_unicode(); } let (dictionary, _, cases) = system_file.into_parts(); @@ -376,7 +376,7 @@ impl Convert { output.write_record(None::<&[u8]>)?; } } - OutputFormat::Sys => { + OutputFormat::Sav => { let Some(output) = &self.output else { bail!("output file name must be specified for output to a system file") }; diff --git a/rust/pspp/src/output.rs b/rust/pspp/src/output.rs index 23f2435d36..77f097fc9c 100644 --- a/rust/pspp/src/output.rs +++ b/rust/pspp/src/output.rs @@ -35,10 +35,11 @@ pub mod drivers; pub mod page; pub mod pivot; pub mod render; +mod spv; pub mod table; /// A single output item. -#[derive(Serialize)] +#[derive(Debug, Serialize)] pub struct Item { /// The localized label for the item that appears in the outline pane in the /// output viewer and in PDF outlines. This is `None` if no label has been @@ -79,6 +80,17 @@ impl Item { None => self.details.label(), } } + + pub fn with_show(self, show: bool) -> Self { + Self { show, ..self } + } + + pub fn with_command_name(self, command_name: Option) -> Self { + Self { + command_name, + ..self + } + } } impl From for Item @@ -90,7 +102,7 @@ where } } -#[derive(Serialize)] +#[derive(Debug, Serialize)] pub enum Details { Chart, Image, @@ -117,7 +129,7 @@ impl Details { | Details::Message(_) | Details::PageBreak | Details::Text(_) => None, - Details::Table(pivot_table) => pivot_table.command_c.as_ref(), + Details::Table(pivot_table) => pivot_table.metadata.command_c.as_ref(), } } diff --git a/rust/pspp/src/output/drivers/cairo/fsm.rs b/rust/pspp/src/output/drivers/cairo/fsm.rs index d4bbb01aca..89d5d61c53 100644 --- a/rust/pspp/src/output/drivers/cairo/fsm.rs +++ b/rust/pspp/src/output/drivers/cairo/fsm.rs @@ -187,7 +187,7 @@ impl CairoFsm { pivot_table, Some(layer_indexes.as_slice()), )); - if pivot_table.look.paginate_layers { + if pivot_table.style.look.paginate_layers { used = space; } else { used += self.style.object_spacing; @@ -246,12 +246,7 @@ fn xr_fill_rectangle(context: &Context, rectangle: Rect2) { } fn margin(cell: &DrawCell, axis: Axis2) -> usize { - px_to_xr( - cell.style.cell_style.margins[axis] - .iter() - .sum::() - .max(0) as usize, - ) + px_to_xr(cell.cell_style.margins[axis].iter().sum::().max(0) as usize) } pub fn parse_font_style(font_style: &FontStyle) -> FontDescription { @@ -327,8 +322,8 @@ impl CairoDevice<'_> { let layout = self.style.new_layout(self.context); - let cell_font = if !cell.style.font_style.font.is_empty() { - Some(parse_font_style(&cell.style.font_style)) + let cell_font = if !cell.font_style.font.is_empty() { + Some(parse_font_style(&cell.font_style)) } else { None }; @@ -354,7 +349,7 @@ impl CairoDevice<'_> { } let mut attrs = None; - let mut body = if cell.style.font_style.markup { + let mut body = if cell.font_style.markup { match parse_markup(&body, 0 as char) { Ok((markup_attrs, string, _accel)) => { attrs = Some(markup_attrs); @@ -366,7 +361,7 @@ impl CairoDevice<'_> { avoid_decimal_split(body) }; - if cell.style.font_style.underline { + if cell.font_style.underline { attrs .get_or_insert_default() .insert(AttrInt::new_underline(Underline::Single)); @@ -401,8 +396,7 @@ impl CairoDevice<'_> { let footnote_width = layout.size().0.max(0) as usize; // Bound the adjustment by the width of the right margin. - let right_margin = - px_to_xr(cell.style.cell_style.margins[Axis2::X][1].max(0) as usize); + let right_margin = px_to_xr(cell.cell_style.margins[Axis2::X][1].max(0) as usize); let footnote_adjustment = min(footnote_width, right_margin); // Adjust the bounding box. @@ -540,7 +534,7 @@ impl Device for CairoDevice<'_> { } fn measure_cell_height(&self, cell: &DrawCell, width: usize) -> usize { - let margins = &cell.style.cell_style.margins; + let margins = &cell.cell_style.margins; let bb = Rect2::new( 0..width.saturating_sub(px_to_xr(margins[Axis2::X].len())), 0..usize::MAX, @@ -710,8 +704,8 @@ impl Device for CairoDevice<'_> { spill: EnumMap, clip: &Rect2, ) { - let fg = &draw_cell.style.font_style.fg[alternate_row as usize]; - let bg = &draw_cell.style.font_style.bg[alternate_row as usize]; + let fg = &draw_cell.font_style.fg[alternate_row as usize]; + let bg = &draw_cell.font_style.bg[alternate_row as usize]; if (bg.r != 255 || bg.g != 255 || bg.b != 255) && bg.alpha != 0 { self.context.save().unwrap(); @@ -745,10 +739,10 @@ impl Device for CairoDevice<'_> { self.context.save().unwrap(); bb[Axis2::Y].start += valign_offset; for axis in [Axis2::X, Axis2::Y] { - bb[axis].start += px_to_xr(draw_cell.style.cell_style.margins[axis][0].max(0) as usize); + bb[axis].start += px_to_xr(draw_cell.cell_style.margins[axis][0].max(0) as usize); bb[axis].end = bb[axis] .end - .saturating_sub(draw_cell.style.cell_style.margins[axis][0].max(0) as usize); + .saturating_sub(draw_cell.cell_style.margins[axis][0].max(0) as usize); } if bb[Axis2::X].start < bb[Axis2::X].end && bb[Axis2::Y].start < bb[Axis2::Y].end { self.layout_cell(draw_cell, bb, clip); diff --git a/rust/pspp/src/output/drivers/html.rs b/rust/pspp/src/output/drivers/html.rs index 2c5f0aebcc..411f7db8af 100644 --- a/rust/pspp/src/output/drivers/html.rs +++ b/rust/pspp/src/output/drivers/html.rs @@ -80,7 +80,7 @@ where for layer_indexes in pivot_table.layers(true) { let output = pivot_table.output(&layer_indexes, false); write!(&mut self.writer, "")?; @@ -194,7 +194,7 @@ where write!(&mut style, "writing-mode: sideways-lr; ").unwrap(); } - let vert_align = match cell.style.cell_style.vert_align { + let vert_align = match cell.cell_style.vert_align { VertAlign::Top => None, VertAlign::Middle => Some("middle"), VertAlign::Bottom => Some("bottom"), @@ -202,36 +202,36 @@ where if let Some(vert_align) = vert_align { write!(&mut style, "vertical-align: {vert_align}; ").unwrap(); } - let bg = cell.style.font_style.bg[alternate_row as usize]; + let bg = cell.font_style.bg[alternate_row as usize]; if bg != Color::WHITE { write!(&mut style, "background: {}; ", bg.display_css()).unwrap(); } - let fg = cell.style.font_style.fg[alternate_row as usize]; + let fg = cell.font_style.fg[alternate_row as usize]; if fg != Color::BLACK { write!(&mut style, "color: {}; ", fg.display_css()).unwrap(); } - if !cell.style.font_style.font.is_empty() { + if !cell.font_style.font.is_empty() { write!( &mut style, r#"font-family: "{}"; "#, - Escape::new(&cell.style.font_style.font) + Escape::new(&cell.font_style.font) ) .unwrap(); } - if cell.style.font_style.bold { + if cell.font_style.bold { write!(&mut style, "font-weight: bold; ").unwrap(); } - if cell.style.font_style.italic { + if cell.font_style.italic { write!(&mut style, "font-style: italic; ").unwrap(); } - if cell.style.font_style.underline { + if cell.font_style.underline { write!(&mut style, "text-decoration: underline; ").unwrap(); } - if cell.style.font_style.size != 0 { - write!(&mut style, "font-size: {}pt; ", cell.style.font_style.size).unwrap(); + if cell.font_style.size != 0 { + write!(&mut style, "font-size: {}pt; ", cell.font_style.size).unwrap(); } if let Some(table) = table { diff --git a/rust/pspp/src/output/drivers/spv.rs b/rust/pspp/src/output/drivers/spv.rs index 58c40f6282..38ea07be9f 100644 --- a/rust/pspp/src/output/drivers/spv.rs +++ b/rust/pspp/src/output/drivers/spv.rs @@ -272,14 +272,14 @@ impl BinWrite for PivotTable { 3u32, // version SpvBool(true), // x0 SpvBool(false), // x1 - SpvBool(self.rotate_inner_column_labels), - SpvBool(self.rotate_outer_row_labels), + SpvBool(self.style.rotate_inner_column_labels), + SpvBool(self.style.rotate_outer_row_labels), SpvBool(true), // x2 0x15u32, // x3 - *self.look.heading_widths[HeadingRegion::Columns].start() as i32, - *self.look.heading_widths[HeadingRegion::Columns].end() as i32, - *self.look.heading_widths[HeadingRegion::Rows].start() as i32, - *self.look.heading_widths[HeadingRegion::Rows].end() as i32, + *self.style.look.heading_widths[HeadingRegion::Columns].start() as i32, + *self.style.look.heading_widths[HeadingRegion::Columns].end() as i32, + *self.style.look.heading_widths[HeadingRegion::Rows].start() as i32, + *self.style.look.heading_widths[HeadingRegion::Rows].end() as i32, 0u64, ) .write_le(writer)?; @@ -289,8 +289,8 @@ impl BinWrite for PivotTable { self.title(), self.subtype(), Optional(Some(self.title())), - Optional(self.corner_text.as_ref()), - Optional(self.caption.as_ref()), + Optional(self.metadata.corner_text.as_ref()), + Optional(self.metadata.caption.as_ref()), ) .write_le(writer)?; @@ -309,7 +309,7 @@ impl BinWrite for PivotTable { Area::Layers, ]; for (index, area) in SPV_AREAS.into_iter().enumerate() { - self.look.areas[area].write_le_args(writer, index)?; + self.style.look.areas[area].write_le_args(writer, index)?; } // Borders. @@ -337,22 +337,28 @@ impl BinWrite for PivotTable { let borders_start = Count::new(writer)?; (1, SPV_BORDERS.len() as u32).write_be(writer)?; for (index, border) in SPV_BORDERS.into_iter().enumerate() { - self.look.borders[border].write_be_args(writer, index)?; + self.style.look.borders[border].write_be_args(writer, index)?; } - (SpvBool(self.show_grid_lines), 0u8, 0u16).write_le(writer)?; + (SpvBool(self.style.show_grid_lines), 0u8, 0u16).write_le(writer)?; borders_start.finish_le32(writer)?; // Print Settings. Counted::new(( 1u32, - SpvBool(self.look.print_all_layers), - SpvBool(self.look.paginate_layers), - SpvBool(self.look.shrink_to_fit[Axis2::X]), - SpvBool(self.look.shrink_to_fit[Axis2::Y]), - SpvBool(self.look.top_continuation), - SpvBool(self.look.bottom_continuation), - self.look.n_orphan_lines as u32, - SpvString(self.look.continuation.as_ref().map_or("", |s| s.as_str())), + SpvBool(self.style.look.print_all_layers), + SpvBool(self.style.look.paginate_layers), + SpvBool(self.style.look.shrink_to_fit[Axis2::X]), + SpvBool(self.style.look.shrink_to_fit[Axis2::Y]), + SpvBool(self.style.look.top_continuation), + SpvBool(self.style.look.bottom_continuation), + self.style.look.n_orphan_lines as u32, + SpvString( + self.style + .look + .continuation + .as_ref() + .map_or("", |s| s.as_str()), + ), )) .with_endian(Endian::Little) .write_be(writer)?; @@ -362,10 +368,12 @@ impl BinWrite for PivotTable { 1u32, 4u32, self.spv_layer() as u32, - SpvBool(self.look.hide_empty), - SpvBool(self.look.row_label_position == LabelPosition::Corner), - SpvBool(self.look.footnote_marker_type == FootnoteMarkerType::Alphabetic), - SpvBool(self.look.footnote_marker_position == FootnoteMarkerPosition::Superscript), + SpvBool(self.style.look.hide_empty), + SpvBool(self.style.look.row_label_position == LabelPosition::Corner), + SpvBool(self.style.look.footnote_marker_type == FootnoteMarkerType::Alphabetic), + SpvBool( + self.style.look.footnote_marker_position == FootnoteMarkerPosition::Superscript, + ), 0u8, Counted::new(( 0u32, // n-row-breaks @@ -375,8 +383,8 @@ impl BinWrite for PivotTable { 0u32, // n-row-point-keeps 0u32, // n-column-point-keeps )), - SpvString::optional(&self.notes), - SpvString::optional(&self.look.name), + SpvString::optional(&self.metadata.notes), + SpvString::optional(&self.style.look.name), Zeros(82), )) .with_endian(Endian::Little) @@ -384,8 +392,8 @@ impl BinWrite for PivotTable { fn y0(pivot_table: &PivotTable) -> impl for<'a> BinWrite = ()> { ( - pivot_table.settings.epoch.0 as u32, - u8::from(pivot_table.settings.decimal), + pivot_table.style.settings.epoch.0 as u32, + u8::from(pivot_table.style.settings.decimal), b',', ) } @@ -394,7 +402,13 @@ impl BinWrite for PivotTable { ( 5, EnumMap::from_fn(|cc| { - SpvString(pivot_table.settings.number_style(Type::CC(cc)).to_string()) + SpvString( + pivot_table + .style + .settings + .number_style(Type::CC(cc)) + .to_string(), + ) }) .into_array(), ) @@ -403,16 +417,20 @@ impl BinWrite for PivotTable { fn x1(pivot_table: &PivotTable) -> impl for<'a> BinWrite = ()> { ( 0u8, // x14 - if pivot_table.show_title { 1u8 } else { 10u8 }, + if pivot_table.style.show_title { + 1u8 + } else { + 10u8 + }, 0u8, // x16 0u8, // lang - Show::as_spv(&pivot_table.show_variables), - Show::as_spv(&pivot_table.show_values), + Show::as_spv(&pivot_table.style.show_variables), + Show::as_spv(&pivot_table.style.show_values), -1i32, // x18 -1i32, // x19 Zeros(17), SpvBool(false), // x20 - SpvBool(pivot_table.show_caption), + SpvBool(pivot_table.style.show_caption), ) } @@ -427,13 +445,13 @@ impl BinWrite for PivotTable { fn y1(pivot_table: &PivotTable) -> impl for<'a> BinWrite = ()> + use<'_> { ( - SpvString::optional(&pivot_table.command_c), - SpvString::optional(&pivot_table.command_local), - SpvString::optional(&pivot_table.language), + SpvString::optional(&pivot_table.metadata.command_c), + SpvString::optional(&pivot_table.metadata.command_local), + SpvString::optional(&pivot_table.metadata.language), SpvString("UTF-8"), - SpvString::optional(&pivot_table.locale), + SpvString::optional(&pivot_table.metadata.locale), SpvBool(false), // x10 - SpvBool(pivot_table.settings.leading_zero), + SpvBool(pivot_table.style.settings.leading_zero), SpvBool(true), // x12 SpvBool(true), // x13 y0(pivot_table), @@ -453,12 +471,13 @@ impl BinWrite for PivotTable { 0u8, 0u8, y1(pivot_table), - pivot_table.small, + pivot_table.style.small, 1u8, - SpvString::optional(&pivot_table.dataset), - SpvString::optional(&pivot_table.datafile), + SpvString::optional(&pivot_table.metadata.dataset), + SpvString::optional(&pivot_table.metadata.datafile), 0u32, pivot_table + .metadata .date .map_or(0i64, |date| date.and_utc().timestamp()), y2(pivot_table), @@ -480,28 +499,28 @@ impl BinWrite for PivotTable { .write_le(writer)?; // Dimensions. - (self.dimensions.len() as u32).write_le(writer)?; + (self.dimensions().len() as u32).write_le(writer)?; - let x2 = repeat_n(2, self.axes[Axis3::Z].dimensions.len()) - .chain(repeat_n(0, self.axes[Axis3::Y].dimensions.len())) + let x2 = repeat_n(2, self.axes()[Axis3::Z].dimensions.len()) + .chain(repeat_n(0, self.axes()[Axis3::Y].dimensions.len())) .chain(repeat(1)); - for ((index, dimension), x2) in self.dimensions.iter().enumerate().zip(x2) { + for ((index, dimension), x2) in self.dimensions().iter().enumerate().zip(x2) { dimension.write_options(writer, endian, (index, x2))?; } // Axes. for axis in [Axis3::Z, Axis3::Y, Axis3::X] { - (self.axes[axis].dimensions.len() as u32).write_le(writer)?; + (self.axes()[axis].dimensions.len() as u32).write_le(writer)?; } for axis in [Axis3::Z, Axis3::Y, Axis3::X] { - for index in self.axes[axis].dimensions.iter().copied() { + for index in self.axes()[axis].dimensions.iter().copied() { (index as u32).write_le(writer)?; } } // Cells. - (self.cells.len() as u32).write_le(writer)?; - for (index, value) in &self.cells { + (self.cells().len() as u32).write_le(writer)?; + for (index, value) in self.cells() { (*index as u64, value).write_le(writer)?; } @@ -1219,14 +1238,11 @@ impl<'a> BinWrite for ValueMod<'a> { (0x31u8, SpvString(template)).write_options(writer, endian, args)?; } template_string_start.finish_le32(writer)?; - style - .style - .as_ref() - .map_or_else(StylePair::default, |area_style| StylePair { - font_style: Some(&area_style.font_style), - cell_style: Some(&area_style.cell_style), - }) - .write_options(writer, endian, args)?; + StylePair { + font_style: style.font_style.as_ref(), + cell_style: style.cell_style.as_ref(), + } + .write_options(writer, endian, args)?; v3_start.finish_le32(writer) } else { 0x58u8.write_options(writer, endian, args) diff --git a/rust/pspp/src/output/drivers/text.rs b/rust/pspp/src/output/drivers/text.rs index 8ad1c477e4..6c3e0a4afa 100644 --- a/rust/pspp/src/output/drivers/text.rs +++ b/rust/pspp/src/output/drivers/text.rs @@ -640,7 +640,7 @@ impl Device for TextRenderer { }; let text = if self.emphasis { - Emphasis::from(&cell.style.font_style).apply(text) + Emphasis::from(cell.font_style).apply(text) } else { Cow::from(text) }; diff --git a/rust/pspp/src/output/page.rs b/rust/pspp/src/output/page.rs index 6872a6aeab..ab89d6c375 100644 --- a/rust/pspp/src/output/page.rs +++ b/rust/pspp/src/output/page.rs @@ -14,6 +14,8 @@ // You should have received a copy of the GNU General Public License along with // this program. If not, see . +use std::{path::Path, str::FromStr}; + use enum_map::{EnumMap, enum_map}; use serde::{Deserialize, Serialize}; @@ -107,3 +109,4 @@ impl PageSetup { EnumMap::from_fn(|axis| self.paper[axis] - self.margins[axis][0] - self.margins[axis][1]) } } + diff --git a/rust/pspp/src/output/pivot.rs b/rust/pspp/src/output/pivot.rs index 13392f8ea6..6df2acf86d 100644 --- a/rust/pspp/src/output/pivot.rs +++ b/rust/pspp/src/output/pivot.rs @@ -58,7 +58,8 @@ pub use color::ParseError as ParseColorError; use color::{AlphaColor, Rgba8, Srgb, palette::css::TRANSPARENT}; use enum_iterator::Sequence; use enum_map::{Enum, EnumMap, enum_map}; -use look_xml::TableProperties; +use itertools::Itertools; +pub use look_xml::TableProperties; use quick_xml::{DeError, de::from_str}; use serde::{ Deserialize, Serialize, Serializer, @@ -68,6 +69,7 @@ use serde::{ use smallstr::SmallString; use smallvec::SmallVec; use thiserror::Error as ThisError; +pub use tlo::parse_bool; use tlo::parse_tlo; use crate::{ @@ -155,13 +157,7 @@ impl Area { fn default_font_style(self) -> FontStyle { FontStyle { bold: self == Area::Title, - italic: false, - underline: false, - markup: false, - font: String::from("Sans Serif"), - fg: [Color::BLACK; 2], - bg: [Color::WHITE; 2], - size: 9, + ..FontStyle::default() } } @@ -214,6 +210,10 @@ impl Border { Self::Dimension(row_col_border) => Self::Category(row_col_border), } } + + pub fn default_borders() -> EnumMap { + EnumMap::from_fn(Border::default_border_style) + } } impl Display for Border { @@ -289,14 +289,14 @@ impl Display for RowColBorder { #[derive(Default, Clone, Debug, Serialize)] pub struct Sizing { /// Specific column widths, in 1/96" units. - widths: Vec, + pub widths: Vec, /// Specific page breaks: 0-based columns after which a page break must /// occur, e.g. a value of 1 requests a break after the second column. - breaks: Vec, + pub breaks: Vec, /// Keeps: columns to keep together on a page if possible. - keeps: Vec>, + pub keeps: Vec>, } #[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Sequence, Serialize)] @@ -362,9 +362,11 @@ impl Iterator for AxisIterator { } impl PivotTable { - pub fn with_look(mut self, look: Arc) -> Self { - self.look = look; - self + pub fn with_look(self, look: Arc) -> Self { + Self { + style: self.style.with_look(look), + ..self + } } pub fn insert_number(&mut self, data_indexes: &[usize], number: Option, class: Class) { let format = match class { @@ -606,19 +608,26 @@ impl Footnotes { } } -#[derive(Clone, Debug)] -pub struct Leaf { - name: Box, +impl FromIterator for Footnotes { + fn from_iter>(iter: T) -> Self { + Self( + iter.into_iter() + .enumerate() + .map(|(index, footnote)| Arc::new(footnote.with_index(index))) + .collect(), + ) + } } +#[derive(Clone, Debug)] +pub struct Leaf(Box); + impl Leaf { pub fn new(name: Value) -> Self { - Self { - name: Box::new(name), - } + Self(Box::new(name)) } pub fn name(&self) -> &Value { - &self.name + &self.0 } } @@ -627,7 +636,7 @@ impl Serialize for Leaf { where S: serde::Serializer, { - self.name.serialize(serializer) + self.0.serialize(serializer) } } @@ -657,7 +666,7 @@ impl Category { pub fn name(&self) -> &Value { match self { Category::Group(group) => &group.name, - Category::Leaf(leaf) => &leaf.name, + Category::Leaf(leaf) => &leaf.0, } } @@ -820,7 +829,7 @@ impl Default for Look { footnote_marker_type: FootnoteMarkerType::default(), footnote_marker_position: FootnoteMarkerPosition::default(), areas: EnumMap::from_fn(Area::default_area_style), - borders: EnumMap::from_fn(Border::default_border_style), + borders: Border::default_borders(), print_all_layers: false, paginate_layers: false, shrink_to_fit: EnumMap::from_fn(|_| false), @@ -886,7 +895,7 @@ impl Look { /// Position for group labels. #[derive(Copy, Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] pub enum LabelPosition { - /// Hierarachically enclosing the categories. + /// Hierarchically enclosing the categories. /// /// For column labels, group labels appear above the categories. For row /// labels, group labels appear to the left of the categories. @@ -1032,7 +1041,7 @@ pub enum VertAlign { Bottom, } -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] pub struct FontStyle { pub bold: bool, pub italic: bool, @@ -1054,6 +1063,21 @@ pub struct FontStyle { pub size: i32, } +impl Default for FontStyle { + fn default() -> Self { + FontStyle { + bold: false, + italic: false, + underline: false, + markup: false, + font: String::from("Sans Serif"), + fg: [Color::BLACK; 2], + bg: [Color::WHITE; 2], + size: 9, + } + } +} + #[derive(Copy, Clone, PartialEq, Eq)] pub struct Color { pub alpha: u8, @@ -1089,6 +1113,10 @@ impl Color { pub fn display_css(&self) -> DisplayCss { DisplayCss(*self) } + + pub fn into_rgb(&self) -> (u8, u8, u8) { + (self.r, self.g, self.b) + } } impl Debug for Color { @@ -1112,14 +1140,8 @@ impl FromStr for Color { s.chars().count() == 6 && s.chars().all(|c| c.is_ascii_hexdigit()) } let color: AlphaColor = match s.parse() { - Err(ParseColorError::UnknownColorSyntax) if is_bare_hex(s) => { - ("#".to_owned() + s).parse() - } - Err(ParseColorError::UnknownColorSyntax) - if s.trim().eq_ignore_ascii_case("transparent") => - { - Ok(TRANSPARENT) - } + Err(_) if is_bare_hex(s) => ("#".to_owned() + s).parse(), + Err(_) if s.trim().eq_ignore_ascii_case("transparent") => Ok(TRANSPARENT), other => other, }?; Ok(color.to_rgba8().into()) @@ -1471,7 +1493,7 @@ impl IntoValueOptions for ValueOptions { } #[derive(Clone, Debug, Serialize)] -pub struct PivotTable { +pub struct PivotTableStyle { pub look: Arc, pub rotate_inner_column_labels: bool, @@ -1487,17 +1509,10 @@ pub struct PivotTable { pub show_values: Option, pub show_variables: Option, - - pub weight_format: Format, - - /// Current layer indexes, with `axes[Axis3::Z].dimensions.len()` elements. - /// `current_layer[i]` is an offset into - /// `axes[Axis3::Z].dimensions[i].data_leaves[]`, except that a dimension - /// can have zero leaves, in which case `current_layer[i]` is zero and - /// there's no corresponding leaf. - pub current_layer: Vec, - - /// Column and row sizing and page breaks. + /// Column and row sizing and page breaks: + /// + /// - `sizing[Axis2::X]` is sizes for columns. + /// - `sizing[Axis2::Y]` is sizes for rows. pub sizing: EnumMap>>, /// Format settings. @@ -1508,6 +1523,61 @@ pub struct PivotTable { pub small: f64, + pub weight_format: Format, +} + +impl Default for PivotTableStyle { + fn default() -> Self { + Self { + look: Look::shared_default(), + rotate_inner_column_labels: false, + rotate_outer_row_labels: false, + show_grid_lines: false, + show_title: true, + show_caption: true, + show_values: None, + show_variables: None, + sizing: EnumMap::default(), + settings: FormatSettings::default(), // XXX from settings + grouping: None, + small: 0.0001, // XXX from settings. + weight_format: Format::F40, + } + } +} + +impl PivotTableStyle { + fn with_look(self, look: Arc) -> Self { + Self { look, ..self } + } + fn with_show_values(self, show_values: Option) -> Self { + Self { + show_values, + ..self + } + } + fn with_show_variables(self, show_variables: Option) -> Self { + Self { + show_variables, + ..self + } + } + fn with_show_title(self, show_title: bool) -> Self { + Self { show_title, ..self } + } + fn with_show_caption(self, show_caption: bool) -> Self { + Self { + show_caption, + ..self + } + } + pub fn look_mut(&mut self) -> &mut Look { + Arc::make_mut(&mut self.look) + } +} + +#[derive(Clone, Debug, Default, Serialize)] +pub struct PivotTableMetadata { pub command_local: Option, pub command_c: Option, pub language: Option, @@ -1515,56 +1585,107 @@ pub struct PivotTable { pub dataset: Option, pub datafile: Option, pub date: Option, - pub footnotes: Footnotes, pub title: Option>, pub subtype: Option>, pub corner_text: Option>, pub caption: Option>, pub notes: Option, - pub dimensions: Vec, - pub axes: EnumMap, - pub cells: HashMap, +} + +#[derive(Clone, Debug, Serialize)] +pub struct PivotTable { + pub style: PivotTableStyle, + + /// Current layer indexes, with `axes[Axis3::Z].dimensions.len()` elements. + /// `current_layer[i]` is an offset into + /// `axes[Axis3::Z].dimensions[i].data_leaves[]`, except that a dimension + /// can have zero leaves, in which case `current_layer[i]` is zero and + /// there's no corresponding leaf. + pub current_layer: Vec, + + pub metadata: PivotTableMetadata, + pub footnotes: Footnotes, + dimensions: Vec, + axes: EnumMap, + cells: HashMap, +} + +impl PivotTableMetadata { + pub fn with_subtype(self, subtype: impl Into) -> Self { + Self { + subtype: Some(Box::new(subtype.into())), + ..self + } + } } impl PivotTable { + pub fn cells(&self) -> &HashMap { + &self.cells + } + pub fn dimensions(&self) -> &[Dimension] { + &self.dimensions + } + pub fn axes(&self) -> &EnumMap { + &self.axes + } + pub fn with_title(mut self, title: impl Into) -> Self { - self.title = Some(Box::new(title.into())); - self.show_title = true; + self.metadata.title = Some(Box::new(title.into())); + self.style.show_title = true; self } pub fn with_caption(mut self, caption: impl Into) -> Self { - self.caption = Some(Box::new(caption.into())); - self.show_caption = true; + self.metadata.caption = Some(Box::new(caption.into())); + self.style.show_caption = true; self } pub fn with_corner_text(mut self, corner_text: impl Into) -> Self { - self.corner_text = Some(Box::new(corner_text.into())); + self.metadata.corner_text = Some(Box::new(corner_text.into())); self } pub fn with_subtype(self, subtype: impl Into) -> Self { Self { - subtype: Some(Box::new(subtype.into())), + metadata: self.metadata.with_subtype(subtype), ..self } } - pub fn with_show_title(mut self, show_title: bool) -> Self { - self.show_title = show_title; - self + pub fn with_show_values(self, show_values: Option) -> Self { + Self { + style: self.style.with_show_values(show_values), + ..self + } } - pub fn with_show_caption(mut self, show_caption: bool) -> Self { - self.show_caption = show_caption; - self + pub fn with_show_variables(self, show_variables: Option) -> Self { + Self { + style: self.style.with_show_variables(show_variables), + ..self + } + } + + pub fn with_show_title(self, show_title: bool) -> Self { + Self { + style: self.style.with_show_title(show_title), + ..self + } + } + + pub fn with_show_caption(self, show_caption: bool) -> Self { + Self { + style: self.style.with_show_caption(show_caption), + ..self + } } pub fn with_layer(mut self, layer: &[usize]) -> Self { debug_assert_eq!(layer.len(), self.current_layer.len()); - if self.look.print_all_layers { - self.look_mut().print_all_layers = false; + if self.style.look.print_all_layers { + self.style.look_mut().print_all_layers = false; } self.current_layer.clear(); self.current_layer.extend_from_slice(layer); @@ -1572,39 +1693,39 @@ impl PivotTable { } pub fn with_all_layers(mut self) -> Self { - if !self.look.print_all_layers { + if !self.style.look.print_all_layers { self.look_mut().print_all_layers = true; } self } pub fn look_mut(&mut self) -> &mut Look { - Arc::make_mut(&mut self.look) + self.style.look_mut() } pub fn with_show_empty(mut self) -> Self { - if self.look.hide_empty { + if self.style.look.hide_empty { self.look_mut().hide_empty = false; } self } pub fn with_hide_empty(mut self) -> Self { - if !self.look.hide_empty { + if !self.style.look.hide_empty { self.look_mut().hide_empty = true; } self } pub fn label(&self) -> String { - match &self.title { + match &self.metadata.title { Some(title) => title.display(self).to_string(), None => String::from("Table"), } } pub fn title(&self) -> &Value { - match &self.title { + match &self.metadata.title { Some(title) => title, None => { static EMPTY: Value = Value::empty(); @@ -1614,7 +1735,7 @@ impl PivotTable { } pub fn subtype(&self) -> &Value { - match &self.subtype { + match &self.metadata.subtype { Some(subtype) => subtype, None => { static EMPTY: Value = Value::empty(); @@ -1627,33 +1748,10 @@ impl PivotTable { impl Default for PivotTable { fn default() -> Self { Self { - look: Look::shared_default(), - rotate_inner_column_labels: false, - rotate_outer_row_labels: false, - show_grid_lines: false, - show_title: true, - show_caption: true, - show_values: None, - show_variables: None, - weight_format: Format::F40, + style: PivotTableStyle::default(), + metadata: PivotTableMetadata::default(), current_layer: Vec::new(), - sizing: EnumMap::default(), - settings: FormatSettings::default(), // XXX from settings - grouping: None, - small: 0.0001, // XXX from settings. - command_local: None, - command_c: None, // XXX from current command name. - language: None, - locale: None, - dataset: None, - datafile: None, - date: None, footnotes: Footnotes::new(), - subtype: None, - title: None, - corner_text: None, - caption: None, - notes: None, dimensions: Vec::new(), axes: EnumMap::default(), cells: HashMap::new(), @@ -1661,6 +1759,56 @@ impl Default for PivotTable { } } +pub trait CellIndex { + fn cell_index(self, dimensions: I) -> usize + where + I: ExactSizeIterator; +} + +impl CellIndex for &[usize; N] { + fn cell_index(self, dimensions: I) -> usize + where + I: ExactSizeIterator, + { + self.as_slice().cell_index(dimensions) + } +} + +impl CellIndex for [usize; N] { + fn cell_index(self, dimensions: I) -> usize + where + I: ExactSizeIterator, + { + self.as_slice().cell_index(dimensions) + } +} + +impl CellIndex for &[usize] { + fn cell_index(self, dimensions: I) -> usize + where + I: ExactSizeIterator, + { + let data_indexes = self; + let mut index = 0; + for (dimension, data_index) in dimensions.zip_eq(data_indexes.iter()) { + debug_assert!(*data_index < dimension); + index = dimension * index + data_index; + } + index + } +} + +pub struct PrecomputedIndex(pub usize); + +impl CellIndex for PrecomputedIndex { + fn cell_index(self, _dimensions: I) -> usize + where + I: ExactSizeIterator, + { + self.0 + } +} + fn cell_index(data_indexes: &[usize], dimensions: I) -> usize where I: ExactSizeIterator, @@ -1683,34 +1831,49 @@ impl PivotTable { dimensions.push(dimension); } Self { - look: Settings::global().look.clone(), + style: PivotTableStyle::default().with_look(Settings::global().look.clone()), current_layer: repeat_n(0, axes[Axis3::Z].dimensions.len()).collect(), axes, dimensions, ..Self::default() } } - fn cell_index(&self, data_indexes: &[usize]) -> usize { - cell_index(data_indexes, self.dimensions.iter().map(|d| d.len())) + fn cell_index(&self, cell_index: C) -> usize + where + C: CellIndex, + { + cell_index.cell_index(self.dimensions.iter().map(|d| d.len())) } - pub fn insert(&mut self, data_indexes: &[usize], value: impl Into) { - self.cells - .insert(self.cell_index(data_indexes), value.into()); + pub fn insert(&mut self, cell_index: C, value: impl Into) + where + C: CellIndex, + { + self.cells.insert(self.cell_index(cell_index), value.into()); } - pub fn get(&self, data_indexes: &[usize]) -> Option<&Value> { - self.cells.get(&self.cell_index(data_indexes)) + pub fn get(&self, cell_index: C) -> Option<&Value> + where + C: CellIndex, + { + self.cells.get(&self.cell_index(cell_index)) } - pub fn with_data(mut self, iter: impl IntoIterator) -> Self + pub fn with_data(mut self, iter: impl IntoIterator) -> Self where - I: AsRef<[usize]>, + C: CellIndex, { self.extend(iter); self } + pub fn with_style(self, style: PivotTableStyle) -> Self { + Self { style, ..self } + } + pub fn with_metadata(self, metadata: PivotTableMetadata) -> Self { + Self { metadata, ..self } + } + /// Converts per-axis presentation-order indexes in `presentation_indexes`, /// into data indexes for each dimension. fn convert_indexes_ptod( @@ -1737,7 +1900,7 @@ impl PivotTable { /// /// - Otherwise, the iterator will just visit `self.current_layer`. pub fn layers(&self, print: bool) -> Box>> { - if print && self.look.print_all_layers { + if print && self.style.look.print_all_layers { Box::new(self.axis_values(Axis3::Z)) } else { Box::new(once(SmallVec::from_slice(&self.current_layer))) @@ -1746,10 +1909,10 @@ impl PivotTable { pub fn value_options(&self) -> ValueOptions { ValueOptions { - show_values: self.show_values, - show_variables: self.show_variables, - small: self.small, - footnote_marker_type: self.look.footnote_marker_type, + show_values: self.style.show_values, + show_variables: self.style.show_variables, + small: self.style.small, + footnote_marker_type: self.style.look.footnote_marker_type, } } @@ -1814,13 +1977,13 @@ impl PivotTable { } } -impl Extend<(I, Value)> for PivotTable +impl Extend<(C, Value)> for PivotTable where - I: AsRef<[usize]>, + C: CellIndex, { - fn extend>(&mut self, iter: T) { - for (data_indexes, value) in iter { - self.insert(data_indexes.as_ref(), value); + fn extend>(&mut self, iter: T) { + for (cell_index, value) in iter { + self.insert(cell_index, value); } } } @@ -1843,19 +2006,22 @@ impl Footnote { show: true, } } - pub fn with_marker(mut self, marker: impl Into) -> Self { - self.marker = Some(Box::new(marker.into())); - self + pub fn with_marker(self, marker: Option) -> Self { + Self { + marker: marker.map(Box::new), + ..self + } + } + pub fn with_some_marker(self, marker: impl Into) -> Self { + Self::with_marker(self, Some(marker.into())) } - pub fn with_show(mut self, show: bool) -> Self { - self.show = show; - self + pub fn with_show(self, show: bool) -> Self { + Self { show, ..self } } - pub fn with_index(mut self, index: usize) -> Self { - self.index = index; - self + pub fn with_index(self, index: usize) -> Self { + Self { index, ..self } } pub fn display_marker(&self, options: impl IntoValueOptions) -> DisplayMarker<'_> { @@ -2002,7 +2168,7 @@ impl Value { } } - fn new(inner: ValueInner) -> Self { + pub fn new(inner: ValueInner) -> Self { Self { inner, styling: None, @@ -2079,6 +2245,14 @@ impl Value { pub fn new_text(s: impl Into) -> Self { Self::new_user_text(s) } + pub fn new_general_text(localized: String, c: String, id: String, user_provided: bool) -> Self { + Self::new(ValueInner::Text(TextValue { + user_provided, + c: (c != localized).then_some(c), + id: (id != localized).then_some(id), + localized, + })) + } pub fn new_user_text(s: impl Into) -> Self { let s: String = s.into(); if s.is_empty() { @@ -2086,7 +2260,7 @@ impl Value { } else { Self::new(ValueInner::Text(TextValue { user_provided: true, - localized: s.clone(), + localized: s, c: None, id: None, })) @@ -2097,7 +2271,7 @@ impl Value { self } pub fn add_footnote(&mut self, footnote: &Arc) { - let footnotes = &mut self.styling.get_or_insert_default().footnotes; + let footnotes = &mut self.styling_mut().footnotes; footnotes.push(footnote.clone()); footnotes.sort_by_key(|f| f.index); } @@ -2126,6 +2300,37 @@ impl Value { } self } + pub fn with_variable_name(mut self, variable_name: Option) -> Self { + match &mut self.inner { + ValueInner::Number(NumberValue { variable, .. }) + | ValueInner::String(StringValue { + var_name: variable, .. + }) => *variable = variable_name, + ValueInner::Variable(VariableValue { + var_name: variable, .. + }) => { + if let Some(name) = variable_name { + *variable = name; + } + } + _ => (), + } + self + } + pub fn styling_mut(&mut self) -> &mut ValueStyle { + self.styling.get_or_insert_default() + } + pub fn with_font_style(mut self, font_style: FontStyle) -> Self { + self.styling_mut().font_style = Some(font_style); + self + } + pub fn with_cell_style(mut self, cell_style: CellStyle) -> Self { + self.styling_mut().cell_style = Some(cell_style); + self + } + pub fn with_styling(self, styling: Option>) -> Self { + Self { styling, ..self } + } pub const fn empty() -> Self { Value { inner: ValueInner::Empty, @@ -2204,8 +2409,8 @@ impl<'a> DisplayValue<'a> { } pub fn with_styling(mut self, styling: &'a ValueStyle) -> Self { - if let Some(area_style) = &styling.style { - self.markup = area_style.font_style.markup; + if let Some(font_style) = &styling.font_style { + self.markup = font_style.markup; } self.subscripts = styling.subscripts.as_slice(); self.footnotes = styling.footnotes.as_slice(); @@ -2599,7 +2804,7 @@ impl TextValue { pub struct TemplateValue { pub args: Vec>, pub localized: String, - pub id: String, + pub id: Option, } #[derive(Clone, Debug, Default, Serialize)] @@ -2654,14 +2859,18 @@ impl ValueInner { #[derive(Clone, Debug, Default)] pub struct ValueStyle { - pub style: Option, + pub cell_style: Option, + pub font_style: Option, pub subscripts: Vec, pub footnotes: Vec>, } impl ValueStyle { pub fn is_empty(&self) -> bool { - self.style.is_none() && self.subscripts.is_empty() && self.footnotes.is_empty() + self.font_style.is_none() + && self.cell_style.is_none() + && self.subscripts.is_empty() + && self.footnotes.is_empty() } } @@ -2792,7 +3001,18 @@ impl Serialize for MetadataEntry { #[cfg(test)] mod test { - use crate::output::pivot::{Display26Adic, MetadataEntry, MetadataValue, Value}; + use std::str::FromStr; + + use crate::output::pivot::{Color, Display26Adic, MetadataEntry, MetadataValue, Value}; + + #[test] + fn parse_color() { + assert_eq!(Color::from_str("red"), Ok(Color::new(255, 0, 0))); + assert_eq!(Color::from_str("transparent"), Ok(Color::TRANSPARENT)); + assert_eq!(Color::from_str("rgb(12,34,56)"), Ok(Color::new(12, 34, 56))); + assert_eq!(Color::from_str("#abcdef"), Ok(Color::new(0xab, 0xcd, 0xef))); + assert_eq!(Color::from_str("abcdef"), Ok(Color::new(0xab, 0xcd, 0xef))); + } #[test] fn display_26adic() { diff --git a/rust/pspp/src/output/pivot/output.rs b/rust/pspp/src/output/pivot/output.rs index 8df1ae5369..316634a12d 100644 --- a/rust/pspp/src/output/pivot/output.rs +++ b/rust/pspp/src/output/pivot/output.rs @@ -92,7 +92,7 @@ impl PivotTable { }; presentation_indexes[vary_axis] = &vary_indexes; let data_indexes = self.convert_indexes_ptod(presentation_indexes); - if self.get(&data_indexes).is_some() { + if self.get(&*data_indexes).is_some() { return false; } } @@ -141,7 +141,7 @@ impl PivotTable { let mut table = Table::new( Coord2::new(1, rows.len()), Coord2::new(0, 0), - self.look.areas.clone(), + self.style.look.areas.clone(), self.borders(false), self.into_value_options(), ); @@ -167,7 +167,11 @@ impl PivotTable { fn borders(&self, printing: bool) -> EnumMap { EnumMap::from_fn(|border| { - resolve_border_style(border, &self.look.borders, printing && self.show_grid_lines) + resolve_border_style( + border, + &self.style.look.borders, + printing && self.style.show_grid_lines, + ) }) } @@ -182,7 +186,7 @@ impl PivotTable { let mut body = Table::new( Coord2::from_fn(|axis| data[axis] + stub[axis]), stub, - self.look.areas.clone(), + self.style.look.areas.clone(), self.borders(printing), self.into_value_options(), ); @@ -199,7 +203,7 @@ impl PivotTable { Axis3::Z => layer_indexes, }; let data_indexes = self.convert_indexes_ptod(presentation_indexes); - let value = self.get(&data_indexes); + let value = self.get(&*data_indexes); body.put( Rect2::new(x..x + 1, y..y + 1), CellInner { @@ -213,14 +217,17 @@ impl PivotTable { // Insert corner text, but only if there's a stub and only if row labels // are not in the corner. - if self.corner_text.is_some() - && self.look.row_label_position == LabelPosition::Nested + if self.metadata.corner_text.is_some() + && self.style.look.row_label_position == LabelPosition::Nested && stub.x() > 0 && stub.y() > 0 { body.put( Rect2::new(0..stub.x(), 0..stub.y()), - CellInner::new(Area::Corner, self.corner_text.clone().unwrap_or_default()), + CellInner::new( + Area::Corner, + self.metadata.corner_text.clone().unwrap_or_default(), + ), ); } @@ -245,7 +252,10 @@ impl PivotTable { } pub fn output_title(&self) -> Option { - Some(self.create_aux_table3(Area::Title, [self.title.as_ref()?.clone()].into_iter())) + Some(self.create_aux_table3( + Area::Title, + [self.metadata.title.as_ref()?.clone()].into_iter(), + )) } pub fn output_layers(&self, layer_indexes: &[usize]) -> Option
{ @@ -258,7 +268,7 @@ impl PivotTable { layer_indexes, ) { if !dimension.is_empty() { - layers.push(dimension.nth_leaf(layer_index).unwrap().name.clone()); + layers.push(dimension.nth_leaf(layer_index).unwrap().0.clone()); } } layers.reverse(); @@ -267,7 +277,10 @@ impl PivotTable { } pub fn output_caption(&self) -> Option
{ - Some(self.create_aux_table3(Area::Caption, [self.caption.as_ref()?.clone()].into_iter())) + Some(self.create_aux_table3( + Area::Caption, + [self.metadata.caption.as_ref()?.clone()].into_iter(), + )) } pub fn output_footnotes(&self, footnotes: &[Arc]) -> Option
{ @@ -285,10 +298,14 @@ impl PivotTable { pub fn output(&self, layer_indexes: &[usize], printing: bool) -> OutputTables { // Produce most of the tables. - let title = self.show_title.then(|| self.output_title()).flatten(); + let title = self.style.show_title.then(|| self.output_title()).flatten(); let layers = self.output_layers(layer_indexes); let body = self.output_body(layer_indexes, printing); - let caption = self.show_caption.then(|| self.output_caption()).flatten(); + let caption = self + .style + .show_caption + .then(|| self.output_caption()) + .flatten(); // Then collect the footnotes from those tables. let tables = [ @@ -354,7 +371,7 @@ pub struct OutputTables { impl Path<'_> { pub fn get(&self, y: usize, height: usize) -> Option<&Value> { if y + 1 == height { - Some(&self.leaf.name) + Some(&self.leaf.0) } else { self.groups.get(y).map(|group| &*group.name) } @@ -542,7 +559,8 @@ struct Headings<'a> { impl<'a> Headings<'a> { fn new(pt: &'a PivotTable, h: Axis2, layer_indexes: &[usize]) -> Self { - let column_enumeration = pt.enumerate_axis(h.into(), layer_indexes, pt.look.hide_empty); + let column_enumeration = + pt.enumerate_axis(h.into(), layer_indexes, pt.style.look.hide_empty); let mut headings = pt.axes[h.into()] .dimensions @@ -556,7 +574,7 @@ impl<'a> Headings<'a> { .collect::>(); let row_label_position = if h == Axis2::Y - && pt.look.row_label_position == LabelPosition::Corner + && pt.style.look.row_label_position == LabelPosition::Corner && headings .iter_mut() .map(|heading| heading.move_dimension_labels_to_corner()) diff --git a/rust/pspp/src/output/pivot/tests.rs b/rust/pspp/src/output/pivot/tests.rs index d54e865a95..1e7ee45da3 100644 --- a/rust/pspp/src/output/pivot/tests.rs +++ b/rust/pspp/src/output/pivot/tests.rs @@ -719,7 +719,7 @@ fn footnote_table(show_f0: bool) -> PivotTable { let mut footnotes = Footnotes::new(); let f0 = footnotes.push( Footnote::new("First footnote") - .with_marker("*") + .with_some_marker("*") .with_show(show_f0), ); let f1 = footnotes.push(Footnote::new("Second footnote")); diff --git a/rust/pspp/src/output/pivot/tlo.rs b/rust/pspp/src/output/pivot/tlo.rs index e857784809..ca5fc629f5 100644 --- a/rust/pspp/src/output/pivot/tlo.rs +++ b/rust/pspp/src/output/pivot/tlo.rs @@ -218,6 +218,7 @@ enum Separator { None, #[br(magic = 1u16)] Some { + #[br(parse_with(parse_tlo_color))] color: Color, style: u16, width: u16, @@ -249,17 +250,10 @@ impl From for BorderStyle { } } -impl BinRead for Color { - type Args<'a> = (); - - fn read_options( - reader: &mut R, - endian: binrw::Endian, - _args: (), - ) -> BinResult { - let raw = ::read_options(reader, endian, ())?; - Ok(Color::new(raw as u8, (raw >> 8) as u8, (raw >> 16) as u8)) - } +#[binrw::parser(reader, endian)] +fn parse_tlo_color() -> BinResult { + let raw = ::read_options(reader, endian, ())?; + Ok(Color::new(raw as u8, (raw >> 8) as u8, (raw >> 16) as u8)) } #[binread] @@ -277,8 +271,9 @@ struct PvCellStyle { #[br(little)] #[derive(Debug)] struct AreaColor { - #[br(magic = b"\0\x01\0")] + #[br(magic(b"\0\x01\0"), parse_with(parse_tlo_color))] color10: Color, + #[br(parse_with(parse_tlo_color))] color0: Color, shading: u8, #[br(temp, magic = 0u8)] @@ -290,18 +285,8 @@ impl From for Color { match area_color.shading { 0 => area_color.color0, x1 @ 1..=9 => { - let Color { - r: r0, - g: g0, - b: b0, - .. - } = area_color.color0; - let Color { - r: r1, - g: g1, - b: b1, - .. - } = area_color.color10; + let (r0, g0, b0) = area_color.color0.into_rgb(); + let (r1, g1, b1) = area_color.color10.into_rgb(); fn mix(c0: u32, c1: u32, x1: u32) -> u8 { let x0 = 10 - x1; ((c0 * x0 + c1 * x1) / 10) as u8 @@ -427,6 +412,7 @@ struct AreaStyle { rtf_charset_number: u32, x: u8, font_name: U8String, + #[br(parse_with(parse_tlo_color))] text_color: Color, #[br(temp, magic = 0u16)] _tmp: (), @@ -490,7 +476,7 @@ impl Default for V2Styles { } #[binrw::parser(reader, endian)] -fn parse_bool() -> BinResult { +pub fn parse_bool() -> BinResult { let byte = ::read_options(reader, endian, ())?; match byte { 0 => Ok(false), diff --git a/rust/pspp/src/output/render.rs b/rust/pspp/src/output/render.rs index 61ac68af47..ef67425bda 100644 --- a/rust/pspp/src/output/render.rs +++ b/rust/pspp/src/output/render.rs @@ -974,7 +974,7 @@ impl Page { usize::checked_sub(cell.rect[Y].start, self.h[Y]).is_some_and(|row| row % 2 == 1); let draw_cell = DrawCell::new(cell.content.inner(), &self.table); - let valign_offset = match draw_cell.style.cell_style.vert_align { + let valign_offset = match draw_cell.cell_style.vert_align { VertAlign::Top => 0, VertAlign::Middle => self.extra_height(device, &bb, &draw_cell) / 2, VertAlign::Bottom => self.extra_height(device, &bb, &draw_cell), @@ -1370,10 +1370,10 @@ impl Pager { // Figure out the width of the body of the table. Use this to determine // the base scale. - let body_page = Page::new(Arc::new(output.body), device, 0, &pivot_table.look); + let body_page = Page::new(Arc::new(output.body), device, 0, &pivot_table.style.look); let body_width = body_page.width(Axis2::X); let mut scale = if body_width > device.params().size[Axis2::X] - && pivot_table.look.shrink_to_fit[Axis2::X] + && pivot_table.style.look.shrink_to_fit[Axis2::X] && device.params().can_scale { device.params().size[Axis2::X] as f64 / body_width as f64 @@ -1387,7 +1387,7 @@ impl Pager { Arc::new(table), device, body_width, - &pivot_table.look, + &pivot_table.style.look, ))); } pages.push(Arc::new(body_page)); @@ -1396,7 +1396,7 @@ impl Pager { Arc::new(table), device, 0, - &pivot_table.look, + &pivot_table.style.look, ))); } pages.reverse(); @@ -1410,7 +1410,7 @@ impl Pager { // shrinking the table vertically more than the scale would imply. // Shrinking only as much as necessary would require an iterative // search. - if pivot_table.look.shrink_to_fit[Axis2::Y] && device.params().can_scale { + if pivot_table.style.look.shrink_to_fit[Axis2::Y] && device.params().can_scale { let total_height = pages .iter() .map(|page: &Arc| page.total_size(Axis2::Y)) diff --git a/rust/pspp/src/output/spv.rs b/rust/pspp/src/output/spv.rs new file mode 100644 index 0000000000..dc5dd541de --- /dev/null +++ b/rust/pspp/src/output/spv.rs @@ -0,0 +1,323 @@ +// PSPP - a program for statistical analysis. +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program. If not, see . + +use std::{ + fs::File, + io::{BufReader, Cursor, Read, Seek}, + path::Path, +}; + +use binrw::BinRead; +use displaydoc::Display; +use itertools::Itertools; +use serde::Deserialize; +use zip::{ZipArchive, result::ZipError}; + +use crate::output::{ + Details, Item, Text, + page::PageSetup, + pivot::{TableProperties, Value}, + spv::light::{LightError, LightTable}, +}; + +mod css; +mod html; +mod light; + +#[derive(Debug, Display, thiserror::Error)] +pub enum Error { + /// Not an SPV file. + NotSpv, + + /// {0} + ZipError(#[from] ZipError), + + /// {0} + IoError(#[from] std::io::Error), + + /// {0} + DeError(#[from] quick_xml::DeError), + + /// {0} + BinrwError(#[from] binrw::Error), + + /// {0} + LightError(#[from] LightError), +} + +impl Item { + fn from_spv_file(path: impl AsRef) -> Result<(Self, Option), Error> { + Self::from_spv_reader(File::open(path.as_ref())?) + } + + fn from_spv_reader(reader: R) -> Result<(Self, Option), Error> + where + R: Read + Seek, + { + // Open archive. + let mut archive = ZipArchive::new(reader).map_err(|error| match error { + ZipError::InvalidArchive(_) => Error::NotSpv, + other => other.into(), + })?; + + // Check manifest. + let mut file = archive + .by_name("META-INF/MANIFEST.MF") + .map_err(|_| Error::NotSpv)?; + let mut string = String::new(); + file.read_to_string(&mut string)?; + if string.trim() != "allowPivoting=true" { + return Err(Error::NotSpv); + } + drop(file); + + let mut items = Vec::new(); + let mut page_setup = None; + for i in 0..archive.len() { + let name = archive.name_for_index(i).unwrap(); + if name.starts_with("outputViewer") && name.ends_with(".xml") { + let name = String::from(name); + let (mut new_items, ps) = read_heading(&mut archive, i)?; + items.append(&mut new_items); + page_setup = page_setup.or(ps); + } + } + + Ok(( + Item::new(Details::Group(items.into_iter().map_into().collect())), + page_setup, + )) + } +} + +fn read_heading( + archive: &mut ZipArchive, + file_number: usize, +) -> Result<(Vec, Option), Error> +where + R: Read + Seek, +{ + println!("{}", archive.by_index(file_number)?.name()); + let member = BufReader::new(archive.by_index(file_number)?); + let mut heading: Heading = match serde_path_to_error::deserialize( + &mut quick_xml::de::Deserializer::from_reader(member), + ) { + Ok(result) => result, + Err(error) => panic!("{error}"), + }; + dbg!(&heading); + let page_setup = heading.page_setup.take(); + Ok((heading.decode(archive)?, page_setup)) +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct Heading { + #[serde(rename = "@visibility")] + visibility: Option, + label: Label, + page_setup: Option, + + #[serde(rename = "$value")] + #[serde(default)] + children: Vec, +} + +impl Heading { + fn decode(self, archive: &mut ZipArchive) -> Result, Error> + where + R: Read + Seek, + { + let mut items = Vec::new(); + for child in self.children { + match child { + HeadingContent::Container(container) => { + if container.page_break_before { + items.push(Item::new(Details::PageBreak)); + } + match container.content { + ContainerContent::Table(table) => { + items.push(table.decode(archive).unwrap() /* XXX*/); + } + ContainerContent::Text(container_text) => { + items.push( + Item::new(Details::Text(Box::new(Text::new_log( + container_text.decode(), + )))) + .with_command_name(container_text.command_name), + ); + } + } + } + HeadingContent::Heading(heading) => { + let show = !heading.visibility.is_some(); + items.push( + Item::new(Details::Group( + heading.decode(archive)?.into_iter().map_into().collect(), + )) + .with_show(show), + ); + } + } + } + Ok(items) + } +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +enum HeadingContent { + Container(Container), + Heading(Box), +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct Label { + #[serde(rename = "$text")] + text: String, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct Container { + #[serde(rename = "@visibility")] + visibility: Visibility, + #[serde(rename = "@page-break-before")] + #[serde(default)] + page_break_before: bool, + #[serde(rename = "@text-align")] + text_align: Option, + #[serde(rename = "@width")] + width: Option, + label: Label, + + #[serde(rename = "$value")] + content: ContainerContent, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +enum Visibility { + Visible, + Hidden, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +enum TextAlign { + Left, + Center, + Right, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +enum ContainerContent { + Table(Table), + Text(ContainerText), + /* + Graph(Graph), + Model(Model), + Object(Object), + Image(Image), + Tree(Tree),*/ +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct Table { + #[serde(rename = "@commandName")] + command_name: String, + #[serde(rename = "@subType")] + sub_type: String, + #[serde(rename = "@tableId")] + table_id: i64, + #[serde(rename = "@type")] + table_type: TableType, + properties: Option, + table_structure: TableStructure, +} + +impl Table { + fn decode(&self, archive: &mut ZipArchive) -> Result + where + R: Read + Seek, + { + if self.table_structure.path.is_none() { + let mut light = archive.by_name(&self.table_structure.data_path)?; + let mut data = Vec::with_capacity(light.size() as usize); + light.read_to_end(&mut data)?; + let table = LightTable::read(&mut Cursor::new(data))?; + let pivot_table = table.decode()?; + println!("{}", &pivot_table); + Ok(Item::new(Details::Table(Box::new(pivot_table)))) + } else { + todo!() + } + } +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +enum TableType { + Table, + Note, + Warning, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct ContainerText { + #[serde(rename = "@type")] + text_type: TextType, + #[serde(rename = "@commandName")] + command_name: Option, + html: String, +} + +impl ContainerText { + fn decode(&self) -> Value { + html::parse(&self.html) + } +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +enum TextType { + Title, + Log, + Text, + #[serde(rename = "page-title")] + PageTitle, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct TableStructure { + path: Option, + data_path: String, + csv_path: Option, +} + +#[cfg(test)] +#[test] +fn test_spv() { + let item = Item::from_spv_file(Path::new("/home/blp/pspp/rust/tests/utilities/regress.spv")) + .unwrap() + .0; + println!("{item}"); +} diff --git a/rust/pspp/src/output/spv/css.rs b/rust/pspp/src/output/spv/css.rs new file mode 100644 index 0000000000..b33d83ce12 --- /dev/null +++ b/rust/pspp/src/output/spv/css.rs @@ -0,0 +1,131 @@ +use crate::output::pivot::FontStyle; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Token<'a> { + Id(&'a str), + LeftCurly, + RightCurly, + Colon, + Semicolon, + Error, +} + +struct Lexer<'a>(&'a str); + +impl<'a> Iterator for Lexer<'a> { + type Item = Token<'a>; + + fn next(&mut self) -> Option { + let mut s = self.0; + loop { + s = s.trim_start(); + if let Some(rest) = s.strip_prefix("") { + s = rest; + } else { + break; + } + } + let mut iter = s.chars(); + let (c, mut rest) = (iter.next()?, iter.as_str()); + let (token, rest) = match c { + '{' => (Token::LeftCurly, rest), + '}' => (Token::RightCurly, rest), + ':' => (Token::Colon, rest), + ';' => (Token::Semicolon, rest), + _ => { + while let Some(c) = iter.next() + && !c.is_whitespace() + && c != '{' + && c != '}' + && c != ':' + && c != ';' + { + rest = iter.as_str(); + } + let id_len = s.len() - rest.len(); + let (id, rest) = s.split_at(id_len); + (Token::Id(id), rest) + } + }; + self.0 = rest; + Some(token) + } +} + +impl FontStyle { + pub fn parse_css(&mut self, s: &str) { + let mut lexer = Lexer(s); + while let Some(token) = lexer.next() { + if let Token::Id(key) = token + && let Some(Token::Colon) = lexer.next() + && let Some(Token::Id(value)) = lexer.next() + { + match key { + "color" => { + if let Ok(color) = value.parse() { + self.fg = [color; 2]; + } + } + "font-weight" => self.bold = value == "bold", + "font-self" => self.italic = value == "italic", + "text-decoration" => self.underline = dbg!(value) == "underline", + "font-family" => self.font = value.into(), + "font-size" => { + if let Ok(size) = value.parse::() { + self.size = (size as i64 * 3 / 4) as i32; + } + } + _ => (), + } + } + } + } + + pub fn from_css(s: &str) -> Self { + let mut style = FontStyle::default(); + style.parse_css(s); + style + } +} + +#[cfg(test)] +#[test] +fn test_css_style() { + use crate::output::pivot::Color; + + assert_eq!(FontStyle::from_css(""), FontStyle::default()); + assert_eq!( + FontStyle::from_css(r#"p{color:ff0000}"#), + FontStyle { + fg: [Color::RED; 2], + ..FontStyle::default() + } + ); + assert_eq!( + FontStyle::from_css("p {font-weight: bold; text-decoration: underline}"), + FontStyle { + bold: true, + underline: true, + ..FontStyle::default() + } + ); + assert_eq!( + FontStyle::from_css("p {font-family: Monospace}"), + FontStyle { + font: String::from("Monospace"), + ..FontStyle::default() + } + ); + assert_eq!( + FontStyle::from_css("p {font-size: 24}"), + FontStyle { + size: 18, + ..FontStyle::default() + } + ); + dbg!(FontStyle::from_css( + "color: red; font-weight: bold; text-decoration: underline; font-family: Serif" + )); +} diff --git a/rust/pspp/src/output/spv/html.rs b/rust/pspp/src/output/spv/html.rs new file mode 100644 index 0000000000..b75c762ef7 --- /dev/null +++ b/rust/pspp/src/output/spv/html.rs @@ -0,0 +1,154 @@ +use std::{ + fmt::{Display, Write}, + str::FromStr, +}; + +use html_parser::{Dom, Element, Node}; + +use crate::output::pivot::{Color, FontStyle, Value}; + +fn find_element<'a>(elements: &'a [Node], name: &str) -> Option<&'a Element> { + for element in elements { + if let Node::Element(element) = element + && element.name == name + { + return Some(element); + } + } + None +} + +fn get_node_text(node: &Node, text: &mut String) { + match node { + Node::Text(string) => text.push_str(&string), + Node::Element(element) => get_element_text(element, text), + Node::Comment(_) => (), + } +} + +fn get_element_text(element: &Element, text: &mut String) { + for child in &element.children { + get_node_text(child, text); + } +} + +fn extract_html_text(node: &Node, base_font_size: i32, s: &mut String) { + match node { + Node::Text(text) => { + for c in text.chars() { + fn push_whitespace(c: char, s: &mut String) { + if s.chars().next_back().is_none_or(|c| !c.is_whitespace()) { + s.push(c); + } + } + + match c { + '\u{00a0}' => { + // U+00A0 NONBREAKING SPACE is really, really common + // in SPV text and it makes it impossible to break + // syntax across lines. Translate it into a regular + // space. + push_whitespace(' ', s); + } + '\u{2007}' => { + // U+2007 FIGURE SPACE also crops up weirdly + // sometimes. + push_whitespace(' ', s); + } + _ if c.is_whitespace() => push_whitespace(c, s), + '<' => s.push_str("<"), + '>' => s.push_str(">"), + '&' => s.push_str("&"), + _ => s.push(c), + } + } + } + Node::Element(element) => { + fn push_attribute(name: &str, value: impl Display, s: &mut String) { + write!(s, " {name}=\"").unwrap(); + let value = value.to_string(); + for c in value.chars() { + match c { + '\n' => s.push_str(" "), + '&' => s.push_str("&"), + '<' => s.push_str("<"), + '>' => s.push_str(">"), + '"' => s.push_str("""), + _ => s.push(c), + } + } + s.push('"'); + } + + let tag = element.name.as_str(); + let tag = match tag { + "br" | "BR" => { + s.push('\n'); + None + } + "b" | "i" | "u" => { + write!(s, "<{tag}>").unwrap(); + Some(tag) + } + "font" => { + s.push_str("'); + Some("span") + } + _ => None, + }; + for child in &element.children { + extract_html_text(child, base_font_size, s); + } + if let Some(tag) = tag { + write!(s, "").unwrap(); + } + } + Node::Comment(_) => (), + } +} + +pub fn parse(input: &str) -> Value { + let mut font_style = FontStyle { + size: 10, + ..Default::default() + }; + let text = match Dom::parse(input) { + Ok(dom) => { + font_style.markup = true; + if let Some(head) = find_element(&dom.children, "head") + && let Some(style) = find_element(&head.children, "style") + { + let mut text = String::new(); + get_element_text(style, &mut text); + font_style.parse_css(&text) + } + + let mut s = String::new(); + for node in &dom.children { + extract_html_text(node, font_style.size, &mut s); + } + s + } + _ => input.into(), + }; + Value::new_user_text(text).with_font_style(font_style) +} diff --git a/rust/pspp/src/output/spv/light.rs b/rust/pspp/src/output/spv/light.rs new file mode 100644 index 0000000000..cc38c8d8c2 --- /dev/null +++ b/rust/pspp/src/output/spv/light.rs @@ -0,0 +1,1552 @@ +use std::{ + fmt::Debug, + io::{Cursor, Read, Seek}, + ops::Deref, + str::FromStr, + sync::Arc, +}; + +use binrw::{BinRead, BinResult, Endian, Error as BinError, VecArgs, binread}; +use chrono::DateTime; +use displaydoc::Display; +use encoding_rs::{Encoding, WINDOWS_1252}; +use enum_map::{EnumMap, enum_map}; + +use crate::{ + format::{ + CC, Decimal, Decimals, Epoch, Format, NumberStyle, Settings, Type, UncheckedFormat, Width, + }, + output::pivot::{ + self, AreaStyle, Axis2, Axis3, BoxBorder, Color, FootnoteMarkerPosition, + FootnoteMarkerType, Footnotes, Group, HeadingRegion, HorzAlign, LabelPosition, Look, + PivotTable, PivotTableMetadata, PivotTableStyle, PrecomputedIndex, RowColBorder, + StringValue, Stroke, TemplateValue, ValueStyle, VariableValue, VertAlign, parse_bool, + }, + settings::Show, +}; + +#[derive(Debug, Display, thiserror::Error)] +pub enum LightError { + /// Expected {expected} dimensions along axes, found {actual} dimensions ({n_layers} layers + {n_rows} rows + {n_columns} columns). + WrongAxisCount { + expected: usize, + actual: usize, + n_layers: usize, + n_rows: usize, + n_columns: usize, + }, + + /// Invalid dimension index {index} in table with {n} dimensions. + InvalidDimensionIndex { index: usize, n: usize }, + + /// Dimension with index {0} appears twice in table axes. + DuplicateDimensionIndex(usize), +} + +#[binread] +#[br(little)] +#[derive(Debug)] +pub struct LightTable { + #[br(dbg)] + header: Header, + #[br(args(header.version))] + titles: Titles, + #[br(parse_with(parse_counted), args(header.version))] + footnotes: Vec, + #[br(args(header.version))] + areas: Areas, + borders: Counted, + print_settings: Counted, + #[br(if(header.version == Version::V3))] + table_settings: Counted, + #[br(if(header.version == Version::V1), temp)] + _ts: Option>, + #[br(args(header.version))] + formats: Formats, + #[br(parse_with(parse_counted), args(header.version))] + dimensions: Vec, + axes: Axes, + #[br(dbg, parse_with(parse_counted), args(header.version))] + cells: Vec, +} + +impl LightTable { + fn decode_look(&self, encoding: &'static Encoding) -> Look { + Look { + name: self.table_settings.table_look.decode_optional(encoding), + hide_empty: self.table_settings.omit_empty, + row_label_position: if self.table_settings.show_row_labels_in_corner { + LabelPosition::Corner + } else { + LabelPosition::Nested + }, + heading_widths: enum_map! { + HeadingRegion::Rows => self.header.min_row_heading_width as usize..=self.header.max_row_heading_width as usize, + HeadingRegion::Columns => self.header.min_column_heading_width as usize..=self.header.max_column_heading_width as usize, + }, + footnote_marker_type: if self.table_settings.show_alphabetic_markers { + FootnoteMarkerType::Alphabetic + } else { + FootnoteMarkerType::Numeric + }, + footnote_marker_position: if self.table_settings.footnote_marker_subscripts { + FootnoteMarkerPosition::Subscript + } else { + FootnoteMarkerPosition::Superscript + }, + areas: self.areas.decode(encoding), + borders: self.borders.decode(), + print_all_layers: self.print_settings.alll_layers, + paginate_layers: self.print_settings.paginate_layers, + shrink_to_fit: enum_map! { + Axis2::X => self.print_settings.fit_width, + Axis2::Y => self.print_settings.fit_length, + }, + top_continuation: self.print_settings.top_continuation, + bottom_continuation: self.print_settings.bottom_continuation, + continuation: self + .print_settings + .continuation_string + .decode_optional(encoding), + n_orphan_lines: self.print_settings.n_orphan_lines, + } + } + + pub fn decode(&self) -> Result { + let encoding = self.formats.encoding(); + + let x1 = self.formats.x1(); + let x2 = self.formats.x2(); + let x3 = self.formats.x3(); + let x3_inner = x3.and_then(|x3| x3.inner.as_ref()); + let y1 = self.formats.y1(); + let footnotes = self + .footnotes + .iter() + .map(|f| f.decode(encoding, &Footnotes::new())) + .collect(); + let cells = self + .cells + .iter() + .map(|cell| { + ( + PrecomputedIndex(cell.index as usize), + cell.value.decode(encoding, &footnotes), + ) + }) + .collect::>(); + let dimensions = self + .dimensions + .iter() + .map(|d| { + let mut root = Group::new(d.name.decode(encoding, &footnotes)) + .with_show_label(!d.hide_dim_label); + for category in &d.categories { + category.decode(encoding, &footnotes, &mut root); + } + pivot::Dimension { + presentation_order: (0..root.len()).collect(), /*XXX*/ + root, + hide_all_labels: d.hide_all_labels, + } + }) + .collect::>(); + let pivot_table = PivotTable::new(self.axes.decode(dimensions)?) + .with_style(PivotTableStyle { + look: Arc::new(self.decode_look(encoding)), + rotate_inner_column_labels: self.header.rotate_inner_column_labels, + rotate_outer_row_labels: self.header.rotate_outer_row_labels, + show_grid_lines: self.borders.show_grid_lines, + show_title: x1.map_or(true, |x1| x1.show_title != 10), + show_caption: x1.map_or(true, |x1| x1.show_caption), + show_values: x1.map_or(None, |x1| x1.show_values), + show_variables: x1.map_or(None, |x1| x1.show_variables), + sizing: self.table_settings.sizing.decode( + &self.formats.column_widths, + x2.map_or(&[], |x2| &x2.row_heights), + ), + settings: Settings { + epoch: self.formats.y0.epoch(), + decimal: self.formats.y0.decimal(), + leading_zero: y1.map_or(false, |y1| y1.include_leading_zero), + ccs: self.formats.custom_currency.decode(encoding), + }, + grouping: { + let grouping = self.formats.y0.grouping; + b",.' ".contains(&grouping).then_some(grouping as char) + }, + small: x3.map_or(0.0, |x3| x3.small), + weight_format: Format::F40, + }) + .with_metadata(PivotTableMetadata { + command_local: y1.map(|y1| y1.command_local.decode(encoding)), + command_c: y1.map(|y1| y1.command.decode(encoding)), + language: y1.map(|y1| y1.language.decode(encoding)), + locale: y1.map(|y1| y1.locale.decode(encoding)), + dataset: x3_inner.and_then(|strings| strings.dataset.decode_optional(encoding)), + datafile: x3_inner.and_then(|strings| strings.datafile.decode_optional(encoding)), + date: x3_inner.and_then(|inner| { + if inner.date != 0 { + DateTime::from_timestamp(inner.date as i64, 0).map(|dt| dt.naive_utc()) + } else { + None + } + }), + title: Some(Box::new(self.titles.title.decode(encoding, &footnotes))), + subtype: Some(Box::new(self.titles.subtype.decode(encoding, &footnotes))), + corner_text: self + .titles + .corner_text + .as_ref() + .map(|corner| Box::new(corner.decode(encoding, &footnotes))), + caption: self + .titles + .caption + .as_ref() + .map(|caption| Box::new(caption.decode(encoding, &footnotes))), + notes: self.table_settings.notes.decode_optional(encoding), + }) + .with_footnotes(footnotes) + .with_data(cells); + Ok(pivot_table) + } +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct Header { + #[br(magic = b"\x01\0")] + version: Version, + #[br(parse_with(parse_bool), temp)] + _x0: bool, + #[br(parse_with(parse_bool), temp)] + _x1: bool, + #[br(parse_with(parse_bool))] + rotate_inner_column_labels: bool, + #[br(parse_with(parse_bool))] + rotate_outer_row_labels: bool, + #[br(parse_with(parse_bool), temp)] + _x2: bool, + #[br(temp)] + _x3: i32, + min_column_heading_width: u32, + max_column_heading_width: u32, + min_row_heading_width: u32, + max_row_heading_width: u32, + table_id: i64, +} + +#[binread] +#[br(little)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Version { + #[br(magic = 1u32)] + V1, + #[br(magic = 3u32)] + V3, +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct Titles { + #[br(args(version))] + title: Value, + #[br(temp)] + _1: Optional, + #[br(args(version))] + subtype: Value, + #[br(temp)] + _2: Optional, + #[br(magic = b'1')] + #[br(args(version))] + user_title: Value, + #[br(temp)] + _3: Optional, + #[br(parse_with(parse_optional), args(version))] + corner_text: Option, + #[br(parse_with(parse_optional), args(version))] + caption: Option, +} + +#[binread] +#[br(little, magic = 1u8)] +#[derive(Debug)] +struct One; + +#[binread] +#[br(little, magic = 0u8)] +#[derive(Debug)] +struct Zero; + +#[binrw::parser(reader, endian)] +pub fn parse_optional<'a, T, A>(args: A, ...) -> BinResult> +where + T: BinRead = A>, +{ + let byte = ::read_options(reader, endian, ())?; + match byte { + b'1' => Ok(Some(T::read_options(reader, endian, args)?)), + b'X' => Ok(None), + _ => Err(BinError::NoVariantMatch { + pos: reader.stream_position()? - 1, + }), + } +} + +#[binrw::parser(reader, endian)] +fn parse_counted(inner: A, ...) -> BinResult> +where + for<'a> T: BinRead = A>, + A: Clone, + T: 'static, +{ + let count = u32::read_options(reader, endian, ())? as usize; + >::read_options(reader, endian, VecArgs { count, inner }) +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct Footnote { + #[br(args(version))] + text: Value, + #[br(parse_with(parse_optional))] + #[br(args(version))] + marker: Option, + show: i32, +} + +impl Footnote { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Footnote { + pivot::Footnote::new(self.text.decode(encoding, footnotes)) + .with_marker(self.marker.as_ref().map(|m| m.decode(encoding, footnotes))) + .with_show(self.show > 0) + } +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct Areas { + #[br(temp)] + _1: Optional, + #[br(args(version))] + areas: [Area; 8], +} + +impl Areas { + fn decode(&self, encoding: &'static Encoding) -> EnumMap { + EnumMap::from_fn(|area| { + let index = match area { + pivot::Area::Title => 0, + pivot::Area::Caption => 1, + pivot::Area::Footer => 2, + pivot::Area::Corner => 3, + pivot::Area::Labels(Axis2::X) => 4, + pivot::Area::Labels(Axis2::Y) => 5, + pivot::Area::Data => 6, + pivot::Area::Layers => 7, + }; + self.areas[index].decode(encoding) + }) + } +} + +#[binrw::parser(reader, endian)] +fn parse_color() -> BinResult { + let pos = reader.stream_position()?; + let string = U32String::read_options(reader, endian, ())?; + let string = string.decode(WINDOWS_1252); + dbg!(&string); + if string.is_empty() { + Ok(Color::BLACK) + } else { + Color::from_str(&string).map_err(|error| binrw::Error::Custom { + pos, + err: Box::new(error), + }) + } +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct Area { + #[br(temp)] + _index: u8, + #[br(magic = b'1')] + typeface: U32String, + size: f32, + style: i32, + #[br(parse_with(parse_bool))] + underline: bool, + halign: i32, + valign: i32, + #[br(parse_with(parse_color))] + fg: Color, + #[br(parse_with(parse_color))] + bg: Color, + #[br(parse_with(parse_bool))] + alternate: bool, + #[br(parse_with(parse_color))] + alt_fg: Color, + #[br(parse_with(parse_color))] + alt_bg: Color, + #[br(if(version == Version::V3))] + margins: Margins, +} + +impl Area { + fn decode(&self, encoding: &'static Encoding) -> AreaStyle { + AreaStyle { + cell_style: pivot::CellStyle { + horz_align: match self.halign { + 0 => Some(HorzAlign::Center), + 2 => Some(HorzAlign::Left), + 4 => Some(HorzAlign::Right), + _ => None, + }, + vert_align: match self.valign { + 0 => VertAlign::Middle, + 3 => VertAlign::Bottom, + _ => VertAlign::Top, + }, + margins: enum_map! { + Axis2::X => [self.margins.left_margin, self.margins.right_margin], + Axis2::Y => [self.margins.top_margin, self.margins.bottom_margin] + }, + }, + font_style: pivot::FontStyle { + bold: (self.style & 1) != 0, + italic: (self.style & 2) != 0, + underline: self.underline, + markup: false, + font: self.typeface.decode(encoding), + fg: [self.fg, if self.alternate { self.alt_fg } else { self.fg }], + bg: [self.bg, if self.alternate { self.alt_bg } else { self.bg }], + size: (self.size / 1.33) as i32, + }, + } + } +} + +#[binread] +#[br(little)] +#[derive(Debug, Default)] +struct Margins { + left_margin: i32, + right_margin: i32, + top_margin: i32, + bottom_margin: i32, +} + +#[binread] +#[br(big)] +#[derive(Debug)] +struct Borders { + #[br(magic(1u32), parse_with(parse_counted))] + borders: Vec, + + #[br(parse_with(parse_bool))] + show_grid_lines: bool, + + #[br(temp, magic(b"\0\0\0"))] + _1: (), +} + +impl Borders { + fn decode(&self) -> EnumMap { + let mut borders = pivot::Border::default_borders(); + for border in &self.borders { + if let Some((border, style)) = border.decode() { + borders[border] = style; + } else { + // warning + } + } + borders + } +} + +#[binread] +#[br(big)] +#[derive(Debug)] +struct Border { + #[br(map(|index: u32| index as usize))] + index: usize, + stroke: i32, + color: u32, +} + +impl Border { + fn decode(&self) -> Option<(pivot::Border, pivot::BorderStyle)> { + let border = match self.index { + 0 => pivot::Border::Title, + 1 => pivot::Border::OuterFrame(BoxBorder::Left), + 2 => pivot::Border::OuterFrame(BoxBorder::Top), + 3 => pivot::Border::OuterFrame(BoxBorder::Right), + 4 => pivot::Border::OuterFrame(BoxBorder::Bottom), + 5 => pivot::Border::InnerFrame(BoxBorder::Left), + 6 => pivot::Border::InnerFrame(BoxBorder::Top), + 7 => pivot::Border::InnerFrame(BoxBorder::Right), + 8 => pivot::Border::InnerFrame(BoxBorder::Bottom), + 9 => pivot::Border::DataLeft, + 10 => pivot::Border::DataLeft, + 11 => pivot::Border::Dimension(RowColBorder(HeadingRegion::Rows, Axis2::X)), + 12 => pivot::Border::Dimension(RowColBorder(HeadingRegion::Rows, Axis2::X)), + 13 => pivot::Border::Dimension(RowColBorder(HeadingRegion::Columns, Axis2::X)), + 14 => pivot::Border::Dimension(RowColBorder(HeadingRegion::Columns, Axis2::X)), + 15 => pivot::Border::Category(RowColBorder(HeadingRegion::Rows, Axis2::X)), + 16 => pivot::Border::Category(RowColBorder(HeadingRegion::Rows, Axis2::X)), + 17 => pivot::Border::Category(RowColBorder(HeadingRegion::Columns, Axis2::X)), + 18 => pivot::Border::Category(RowColBorder(HeadingRegion::Columns, Axis2::X)), + _ => return None, + }; + + let stroke = match self.stroke { + 0 => Stroke::None, + 2 => Stroke::Dashed, + 3 => Stroke::Thick, + 4 => Stroke::Thin, + 6 => Stroke::Double, + _ => Stroke::Solid, + }; + + let color = Color::new( + (self.color >> 16) as u8, + (self.color >> 8) as u8, + self.color as u8, + ) + .with_alpha((self.color >> 24) as u8); + + Some((border, pivot::BorderStyle { stroke, color })) + } +} + +#[binread] +#[br(big)] +#[derive(Debug)] +struct PrintSettings { + #[br(magic = b"\0\0\0\x01")] + #[br(parse_with(parse_bool))] + alll_layers: bool, + #[br(parse_with(parse_bool))] + paginate_layers: bool, + #[br(parse_with(parse_bool))] + fit_width: bool, + #[br(parse_with(parse_bool))] + fit_length: bool, + #[br(parse_with(parse_bool))] + top_continuation: bool, + #[br(parse_with(parse_bool))] + bottom_continuation: bool, + #[br(map(|n: u32| n as usize))] + n_orphan_lines: usize, + continuation_string: U32String, +} + +#[binread] +#[br(big)] +#[derive(Debug, Default)] +struct TableSettings { + #[br(temp, magic = 1u32)] + _x5: i32, + #[br(dbg)] + current_layer: i32, + #[br(parse_with(parse_bool))] + omit_empty: bool, + #[br(parse_with(parse_bool))] + show_row_labels_in_corner: bool, + #[br(parse_with(parse_bool))] + show_alphabetic_markers: bool, + #[br(parse_with(parse_bool))] + footnote_marker_subscripts: bool, + #[br(temp, parse_with(parse_bool))] + _x6: bool, + #[br(big)] + sizing: Counted, + notes: U32String, + table_look: U32String, + #[br(temp)] + _sponge: Sponge, +} + +#[binread] +#[br(big)] +#[derive(Debug, Default)] +struct Sizing { + #[br(parse_with(parse_counted))] + row_breaks: Vec, + #[br(parse_with(parse_counted))] + column_breaks: Vec, + #[br(parse_with(parse_counted))] + row_keeps: Vec<(i32, i32)>, + #[br(parse_with(parse_counted))] + column_keeps: Vec<(i32, i32)>, + #[br(parse_with(parse_counted))] + row_point_keeps: Vec<[i32; 3]>, + #[br(parse_with(parse_counted))] + column_point_keeps: Vec<[i32; 3]>, +} + +impl Sizing { + fn decode( + &self, + column_widths: &[i32], + row_heights: &[i32], + ) -> EnumMap>> { + fn decode_axis( + widths: &[i32], + breaks: &[u32], + keeps: &[(i32, i32)], + ) -> Option> { + if widths.is_empty() && breaks.is_empty() && keeps.is_empty() { + None + } else { + Some(Box::new(pivot::Sizing { + widths: widths.into(), + breaks: breaks.into_iter().map(|b| *b as usize).collect(), + keeps: keeps + .into_iter() + .map(|(low, high)| *low as usize..*high as usize) + .collect(), + })) + } + } + + enum_map! { + Axis2::X => decode_axis(column_widths, &self.column_breaks, &self.column_keeps), + Axis2::Y => decode_axis(row_heights, &self.row_breaks, &self.row_keeps), + } + } +} + +#[derive(Debug)] +struct Value(RawValue); + +impl Value { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { + self.0.decode(encoding, footnotes) + } +} + +impl BinRead for Value { + type Args<'a> = (Version,); + + fn read_options( + reader: &mut R, + endian: binrw::Endian, + (version,): (Version,), + ) -> BinResult { + let start = reader.stream_position()?; + dbg!(start); + for i in 0..4 { + let x = ::read_options(reader, endian, ())?; + if x != 0 { + reader.seek(std::io::SeekFrom::Start(start + i))?; + break; + } + } + Ok(Value(dbg!(RawValue::read_options( + reader, + endian, + (version,) + ))?)) + } +} + +#[binread] +#[derive(Default)] +struct U32String { + #[br(parse_with(parse_counted))] + string: Vec, +} + +impl U32String { + fn decode(&self, encoding: &'static Encoding) -> String { + if let Ok(string) = str::from_utf8(&self.string) { + string.into() + } else { + encoding + .decode_without_bom_handling(&self.string) + .0 + .into_owned() + } + } + fn decode_optional(&self, encoding: &'static Encoding) -> Option { + let string = self.decode(encoding); + if !string.is_empty() { + Some(string) + } else { + None + } + } +} + +impl Debug for U32String { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = self.string.iter().map(|c| *c as char).collect::(); + write!(f, "{s:?}") + } +} + +#[binread] +struct CountedInner { + #[br(parse_with(parse_counted))] + data: Vec, +} + +impl CountedInner { + fn cursor(self) -> Cursor> { + Cursor::new(self.data) + } +} + +impl Debug for CountedInner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", &self.data) + } +} + +#[derive(Clone, Debug, Default)] +struct Counted(T); + +impl Deref for Counted { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl BinRead for Counted +where + T: BinRead, +{ + type Args<'a> = T::Args<'a>; + + fn read_options( + reader: &mut R, + endian: binrw::Endian, + args: Self::Args<'_>, + ) -> BinResult { + let counted = CountedInner::read_options(reader, endian, ())?; + let mut cursor = counted.cursor(); + let result = ::read_options(&mut cursor, Endian::Little, args)?; + if cursor.position() < cursor.get_ref().len() as u64 { + return Err(binrw::Error::Custom { + pos: cursor.position(), + err: Box::new(format!( + "counted data not exhausted (consumed {} bytes out of {})", + cursor.position(), + cursor.get_ref().len() + )), + }); + } + Ok(Self(result)) + } +} + +/// `BinRead` for `Option` always requires the value to be there. This +/// instead tries to read it and falls back to None if there's no match. +#[derive(Clone, Debug)] +struct Optional(Option); + +impl Default for Optional { + fn default() -> Self { + Self(None) + } +} + +impl Deref for Optional { + type Target = Option; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl BinRead for Optional +where + T: BinRead, +{ + type Args<'a> = T::Args<'a>; + + fn read_options( + reader: &mut R, + endian: binrw::Endian, + args: Self::Args<'_>, + ) -> BinResult { + let start = reader.stream_position()?; + let result = ::read_options(reader, endian, args).ok(); + if result.is_none() { + reader.seek(std::io::SeekFrom::Start(start))?; + } + Ok(Self(result)) + } +} + +#[binread] +#[br(little)] +#[br(import(version: Version))] +#[derive(Debug)] +struct Formats { + #[br(parse_with(parse_counted))] + column_widths: Vec, + #[br(dbg)] + locale: U32String, + current_layer: i32, + #[br(temp, parse_with(parse_bool))] + _x7: bool, + #[br(temp, parse_with(parse_bool))] + _x8: bool, + #[br(temp, parse_with(parse_bool))] + _x9: bool, + #[br(dbg)] + y0: Y0, + custom_currency: CustomCurrency, + #[br(if(version == Version::V1))] + v1: Optional>, + #[br(if(version == Version::V3))] + v3: Option>, +} + +impl Formats { + fn y1(&self) -> Option<&Y1> { + self.v1 + .as_ref() + .map(|x0| &x0.y1) + .or_else(|| self.v3.as_ref().map(|v3| &v3.x3.y1)) + } + + fn x1(&self) -> Option<&X1> { + self.v3.as_ref().map(|v3| &v3.x1_x2.x1) + } + + fn x2(&self) -> Option<&X2> { + self.v3.as_ref().map(|v3| &*v3.x1_x2.x2) + } + + fn x3(&self) -> Option<&X3> { + self.v3.as_ref().map(|v3| &*v3.x3) + } + + fn charset(&self) -> Option<&U32String> { + self.y1().map(|y1| &y1.charset) + } + + fn encoding(&self) -> &'static Encoding { + // XXX We should probably warn for unknown encodings below + if let Some(charset) = self.charset() + && let Some(encoding) = Encoding::for_label(&charset.string) + { + encoding + } else if let Ok(locale) = str::from_utf8(&self.locale.string) + && let Some(dot) = locale.find('.') + && let Some(encoding) = Encoding::for_label(locale[dot + 1..].as_bytes()) + { + encoding + } else { + WINDOWS_1252 + } + } +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct FormatsV3 { + x1_x2: Counted, + x3: Counted, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct X1X2 { + x1: X1, + x2: Counted, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct X0 { + #[br(temp)] + _bytes: [u8; 14], + y1: Y1, + y2: Y2, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct Y1 { + command: U32String, + command_local: U32String, + language: U32String, + charset: U32String, + locale: U32String, + #[br(temp, parse_with(parse_bool))] + _x10: bool, + #[br(parse_with(parse_bool))] + include_leading_zero: bool, + #[br(temp, parse_with(parse_bool))] + _x12: bool, + #[br(temp, parse_with(parse_bool))] + _x13: bool, + y0: Y0, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct Y2 { + custom_currency: CustomCurrency, + missing: u8, + #[br(temp, parse_with(parse_bool))] + _x17: bool, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct X1 { + #[br(temp, parse_with(parse_bool))] + _x14: bool, + show_title: u8, + #[br(temp, parse_with(parse_bool))] + _x16: bool, + lang: u8, + #[br(parse_with(parse_show))] + show_variables: Option, + #[br(parse_with(parse_show))] + show_values: Option, + #[br(temp)] + _x18: i32, + #[br(temp)] + _x19: i32, + #[br(temp)] + _zeros: [u8; 17], + #[br(temp, parse_with(parse_bool))] + _x20: bool, + #[br(parse_with(parse_bool))] + show_caption: bool, +} + +#[binrw::parser(reader, endian)] +fn parse_show() -> BinResult> { + match ::read_options(reader, endian, ())? { + 0 => Ok(None), + 1 => Ok(Some(Show::Value)), + 2 => Ok(Some(Show::Label)), + 3 => Ok(Some(Show::Both)), + _ => Err(binrw::Error::NoVariantMatch { + pos: reader.stream_position()? - 1, + }), + } +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct X2 { + #[br(parse_with(parse_counted))] + row_heights: Vec, + #[br(parse_with(parse_counted))] + style_map: Vec<(i64, i16)>, + #[br(parse_with(parse_counted))] + styles: Vec, + tail: Counted>, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct X3 { + #[br(temp, magic = b"\x01\0")] + _x21: u8, + #[br(magic = b"\0\0\0")] + y1: Y1, + small: f64, + inner: Optional, + y2: Y2, + #[br(temp)] + _tail: Optional, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct X3Inner { + dataset: U32String, + datafile: U32String, + #[br(magic = 0u32)] + date: i32, + #[br(magic = 0u32, temp)] + _tail: (), +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct X3Tail { + #[br(temp)] + _x22: i32, + #[br(temp, assert(_zero == 0))] + _zero: i32, + #[br(temp)] + _one: Optional, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct Y0 { + epoch: i32, + decimal: u8, + grouping: u8, +} + +impl Y0 { + fn epoch(&self) -> Epoch { + if (1000..=9999).contains(&self.epoch) { + Epoch(self.epoch) + } else { + Epoch::default() + } + } + + fn decimal(&self) -> Decimal { + // XXX warn about bad decimal point? + Decimal::try_from(self.decimal as char).unwrap_or_default() + } +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct CustomCurrency { + #[br(parse_with(parse_counted))] + ccs: Vec, +} + +impl CustomCurrency { + fn decode(&self, encoding: &'static Encoding) -> EnumMap>> { + let mut ccs = EnumMap::default(); + for (cc, string) in enum_iterator::all().zip(&self.ccs) { + if let Ok(style) = NumberStyle::from_str(&string.decode(encoding)) { + ccs[cc] = Some(Box::new(style)); + } else { + // XXX warning + } + } + ccs + } +} + +#[binread] +#[br(little)] +#[br(return_unexpected_error, import(version: Version))] +#[derive(Debug)] +enum RawValue { + #[br(magic = 1u8)] + Number { + #[br(parse_with(parse_optional), args(version))] + mods: Option, + #[br(parse_with(parse_format))] + format: Format, + x: f64, + }, + #[br(magic = 2u8)] + VarNumber { + #[br(parse_with(parse_optional), args(version))] + mods: Option, + #[br(parse_with(parse_format))] + format: Format, + x: f64, + var_name: U32String, + value_label: U32String, + #[br(parse_with(parse_show))] + show: Option, + }, + #[br(magic = 3u8)] + Text { + local: U32String, + #[br(parse_with(parse_optional), args(version))] + mods: Option, + id: U32String, + c: U32String, + #[br(parse_with(parse_bool))] + fixed: bool, + }, + #[br(magic = 4u8)] + String { + #[br(parse_with(parse_optional), args(version))] + mods: Option, + #[br(parse_with(parse_format))] + format: Format, + value_label: U32String, + var_name: U32String, + #[br(parse_with(parse_show))] + show: Option, + s: U32String, + }, + #[br(magic = 5u8)] + VarName { + #[br(parse_with(parse_optional), args(version))] + mods: Option, + var_name: U32String, + var_label: U32String, + #[br(parse_with(parse_show))] + show: Option, + }, + #[br(magic = 6u8)] + FixedText { + local: U32String, + #[br(parse_with(parse_optional), args(version))] + mods: Option, + id: U32String, + c: U32String, + }, + Template { + #[br(parse_with(parse_optional), args(version))] + mods: Option, + template: U32String, + #[br(parse_with(parse_counted), args(version))] + args: Vec, + }, +} + +#[binrw::parser(reader, endian)] +fn parse_format() -> BinResult { + let raw = u32::read_options(reader, endian, ())?; + if raw == 0 || raw == 0x10000 || raw == 1 { + return Ok(Format::new(Type::F, 40, 2).unwrap()); + } + + let raw_type = (raw >> 16) as u16; + let type_ = if raw_type >= 40 { + Type::F + } else if let Ok(type_) = Type::try_from(raw_type) { + type_ + } else { + // XXX warn + Type::F + }; + let w = ((raw >> 8) & 0xff) as Width; + let d = raw as Decimals; + + Ok(UncheckedFormat::new(type_, w, d).fix()) +} + +impl RawValue { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { + match self { + RawValue::Number { mods, format, x } => { + pivot::Value::new_number_with_format((*x != -f64::MAX).then_some(*x), *format) + .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)) + } + RawValue::VarNumber { + mods, + format, + x, + var_name, + value_label, + show, + } => pivot::Value::new_number_with_format((*x != -f64::MAX).then_some(*x), *format) + .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)) + .with_value_label(value_label.decode_optional(encoding)) + .with_variable_name(Some(var_name.decode(encoding))) + .with_show_value_label(*show), + RawValue::Text { + local, + mods, + id, + c, + fixed, + } => pivot::Value::new_general_text( + local.decode(encoding), + c.decode(encoding), + id.decode(encoding), + !*fixed, + ) + .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)), + RawValue::String { + mods, + format, + value_label, + var_name, + show, + s, + } => pivot::Value::new(pivot::ValueInner::String(StringValue { + s: s.decode(encoding), + hex: format.type_() == Type::AHex, + show: *show, + var_name: var_name.decode_optional(encoding), + value_label: value_label.decode_optional(encoding), + })) + .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)), + RawValue::VarName { + mods, + var_name, + var_label, + show, + } => pivot::Value::new(pivot::ValueInner::Variable(VariableValue { + show: *show, + var_name: var_name.decode(encoding), + variable_label: var_label.decode_optional(encoding), + })) + .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)), + RawValue::FixedText { local, mods, id, c } => pivot::Value::new_general_text( + local.decode(encoding), + c.decode(encoding), + id.decode(encoding), + false, + ) + .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)), + RawValue::Template { + mods, + template, + args, + } => pivot::Value::new(pivot::ValueInner::Template(TemplateValue { + args: args + .iter() + .map(|argument| argument.decode(encoding, footnotes)) + .collect(), + localized: template.decode(encoding), + id: mods.as_ref().and_then(|mods| mods.template_id(encoding)), + })) + .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)), + } + } +} + +#[binread] +#[br(little)] +#[br(import(version: Version))] +#[derive(Debug)] +enum Argument { + Singleton(#[br(magic(0u32), args(version))] Value), + Multiple { + #[br(magic(0u32), parse_with(parse_counted), args(version))] + values: Vec, + }, +} + +impl Argument { + fn decode( + &self, + encoding: &'static Encoding, + footnotes: &pivot::Footnotes, + ) -> Vec { + match self { + Argument::Singleton(value) => vec![value.decode(encoding, footnotes)], + Argument::Multiple { values } => values + .iter() + .map(|value| value.decode(encoding, footnotes)) + .collect(), + } + } +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct ValueMods { + #[br(parse_with(parse_counted))] + refs: Vec, + #[br(parse_with(parse_counted))] + subscripts: Vec, + #[br(if(version == Version::V1))] + v1: Option, + #[br(if(version == Version::V3))] + v3: Counted, StylePair)>>, +} + +impl ValueMods { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> ValueStyle { + let style_pair = self.v3.as_ref().map(|v3| &v3.1); + let font_style = style_pair + .and_then(|style_pair| style_pair.font_style.as_ref()) + .map(|font_style| pivot::FontStyle { + bold: font_style.bold, + italic: font_style.italic, + underline: font_style.underline, + markup: false, + font: font_style.typeface.decode(encoding), + fg: [font_style.fg, font_style.fg], + bg: [font_style.bg, font_style.bg], + size: (font_style.size as i32) * 4 / 3, + }); + let cell_style = style_pair + .and_then(|style_pair| style_pair.cell_style.as_ref()) + .map(|cell_style| { + pivot::CellStyle { + horz_align: match cell_style.halign { + 0 => Some(HorzAlign::Center), + 2 => Some(HorzAlign::Left), + 4 => Some(HorzAlign::Right), + 6 => Some(HorzAlign::Decimal { + offset: cell_style.decimal_offset, + decimal: Decimal::Dot, /*XXX*/ + }), + _ => None, + }, + vert_align: match cell_style.valign { + 0 => VertAlign::Middle, + 3 => VertAlign::Bottom, + _ => VertAlign::Top, + }, + margins: enum_map! { + Axis2::X => [cell_style.left_margin as i32, cell_style.right_margin as i32], + Axis2::Y => [cell_style.top_margin as i32, cell_style.bottom_margin as i32], + }, + } + }); + ValueStyle { + cell_style, + font_style, + subscripts: self.subscripts.iter().map(|s| s.decode(encoding)).collect(), + footnotes: self + .refs + .iter() + .flat_map(|index| footnotes.0.get(*index as usize)) + .cloned() + .collect(), + } + } + fn decode_optional( + mods: &Option, + encoding: &'static Encoding, + footnotes: &pivot::Footnotes, + ) -> Option> { + mods.as_ref() + .map(|mods| Box::new(mods.decode(encoding, footnotes))) + } + fn template_id(&self, encoding: &'static Encoding) -> Option { + self.v3 + .as_ref() + .map(|v3| &*v3.0) + .and_then(|ts| ts.id.as_ref()) + .map(|s| s.decode(encoding)) + } +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct TemplateString { + _sponge: Counted, + #[br(parse_with(parse_optional))] + id: Option, +} + +#[derive(Debug, Default)] +struct Sponge; + +impl BinRead for Sponge { + type Args<'a> = (); + + fn read_options(reader: &mut R, _endian: Endian, _args: ()) -> BinResult { + let mut buf = [0; 32]; + while reader.read(&mut buf)? > 0 {} + Ok(Self) + } +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct StylePair { + #[br(parse_with(parse_optional))] + font_style: Option, + #[br(parse_with(parse_optional))] + cell_style: Option, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct FontStyle { + #[br(parse_with(parse_bool))] + bold: bool, + #[br(parse_with(parse_bool))] + italic: bool, + #[br(parse_with(parse_bool))] + underline: bool, + #[br(parse_with(parse_bool))] + show: bool, + #[br(parse_with(parse_color))] + fg: Color, + #[br(parse_with(parse_color))] + bg: Color, + typeface: U32String, + size: u8, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct CellStyle { + halign: i32, + valign: i32, + decimal_offset: f64, + left_margin: i16, + right_margin: i16, + top_margin: i16, + bottom_margin: i16, +} + +#[binread] +#[br(little)] +#[br(import(version: Version))] +#[derive(Debug)] +struct Dimension { + #[br(args(version))] + name: Value, + #[br(temp)] + _x1: u8, + #[br(temp)] + _x2: u8, + #[br(temp)] + _x3: u32, + #[br(parse_with(parse_bool))] + hide_dim_label: bool, + #[br(parse_with(parse_bool))] + hide_all_labels: bool, + #[br(magic(1u8), temp)] + _dim_index: i32, + #[br(parse_with(parse_counted), args(version))] + categories: Vec, +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct Category { + #[br(args(version))] + name: Value, + #[br(args(version))] + child: Child, +} + +impl Category { + fn decode(&self, encoding: &'static Encoding, footnotes: &Footnotes, group: &mut pivot::Group) { + let name = self.name.decode(encoding, footnotes); + match &self.child { + Child::Leaf { leaf_index: _ } => { + group.push(pivot::Leaf::new(name)); + } + Child::Group { + merge: true, + subcategories, + } => { + for subcategory in subcategories { + subcategory.decode(encoding, footnotes, group); + } + } + Child::Group { + merge: false, + subcategories, + } => { + let mut subgroup = Group::new(name).with_label_shown(); + for subcategory in subcategories { + subcategory.decode(encoding, footnotes, &mut subgroup); + } + group.push(subgroup); + } + } + } +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +enum Child { + Leaf { + #[br(magic(b"\0\0\0\x02\0\0\0"))] + leaf_index: u32, + #[br(magic(0u32), temp)] + _tail: (), + }, + Group { + #[br(parse_with(parse_bool))] + merge: bool, + #[br(temp, magic(b"\0\x01"))] + _x23: i32, + #[br(magic(-1i32), parse_with(parse_counted), args(version))] + subcategories: Vec>, + }, +} + +#[binread] +#[br(little)] +#[derive(Debug)] +struct Axes { + #[br(temp)] + n_layers: u32, + #[br(temp)] + n_rows: u32, + #[br(temp)] + n_columns: u32, + #[br(count(n_layers))] + layers: Vec, + #[br(count(n_rows))] + rows: Vec, + #[br(count(n_columns))] + columns: Vec, +} + +impl Axes { + fn decode( + &self, + dimensions: Vec, + ) -> Result, LightError> { + let n = self.layers.len() + self.rows.len() + self.columns.len(); + if n != dimensions.len() { + return Err(LightError::WrongAxisCount { + expected: dimensions.len(), + actual: n, + n_layers: self.layers.len(), + n_rows: self.rows.len(), + n_columns: self.columns.len(), + }); + } + + fn axis_dims(axis: Axis3, dimensions: &[u32]) -> impl Iterator { + dimensions.iter().map(move |d| (axis, *d as usize)) + } + + let mut axes = vec![None; n]; + for (axis, index) in axis_dims(Axis3::Z, &self.layers) + .chain(axis_dims(Axis3::Y, &self.rows)) + .chain(axis_dims(Axis3::X, &self.columns)) + { + if index >= n { + return Err(LightError::InvalidDimensionIndex { index, n }); + } else if axes[index].is_some() { + return Err(LightError::DuplicateDimensionIndex(index)); + } + axes[index] = Some(axis); + } + Ok(axes + .into_iter() + .map(|axis| axis.unwrap()) + .zip(dimensions) + .collect()) + } +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct Cell { + index: u64, + #[br(if(version == Version::V1), temp)] + _zero: Optional, + #[br(args(version))] + value: Value, +} diff --git a/rust/pspp/src/output/table.rs b/rust/pspp/src/output/table.rs index 98d5a0b16f..3185dbb1b8 100644 --- a/rust/pspp/src/output/table.rs +++ b/rust/pspp/src/output/table.rs @@ -31,7 +31,9 @@ use std::{ops::Range, sync::Arc}; use enum_map::{EnumMap, enum_map}; use ndarray::{Array, Array2}; -use crate::output::pivot::{Coord2, DisplayValue, Footnote, HorzAlign, ValueInner}; +use crate::output::pivot::{ + CellStyle, Coord2, DisplayValue, FontStyle, Footnote, HorzAlign, ValueInner, +}; use super::pivot::{ Area, AreaStyle, Axis2, Border, BorderStyle, HeadingRegion, Rect2, Value, ValueOptions, @@ -381,7 +383,8 @@ impl<'a> Iterator for Cells<'a> { pub struct DrawCell<'a> { pub rotate: bool, pub inner: &'a ValueInner, - pub style: &'a AreaStyle, + pub cell_style: &'a CellStyle, + pub font_style: &'a FontStyle, pub subscripts: &'a [String], pub footnotes: &'a [Arc], pub value_options: &'a ValueOptions, @@ -389,20 +392,22 @@ pub struct DrawCell<'a> { impl<'a> DrawCell<'a> { pub fn new(inner: &'a CellInner, table: &'a Table) -> Self { - let default_area_style = &table.areas[inner.area]; - let (style, subscripts, footnotes) = if let Some(styling) = &inner.value.styling { - ( - styling.style.as_ref().unwrap_or(default_area_style), - styling.subscripts.as_slice(), - styling.footnotes.as_slice(), - ) - } else { - (default_area_style, [].as_slice(), [].as_slice()) - }; + let (font_style, cell_style, subscripts, footnotes) = + if let Some(styling) = &inner.value.styling { + ( + styling.font_style.as_ref(), + styling.cell_style.as_ref(), + styling.subscripts.as_slice(), + styling.footnotes.as_slice(), + ) + } else { + (None, None, [].as_slice(), [].as_slice()) + }; Self { rotate: inner.rotate, inner: &inner.value.inner, - style, + font_style: font_style.unwrap_or(&table.areas[inner.area].font_style), + cell_style: cell_style.unwrap_or(&table.areas[inner.area].cell_style), subscripts, footnotes, value_options: &table.value_options, @@ -412,14 +417,13 @@ impl<'a> DrawCell<'a> { pub fn display(&self) -> DisplayValue<'a> { self.inner .display(self.value_options) - .with_font_style(&self.style.font_style) + .with_font_style(&self.font_style) .with_subscripts(self.subscripts) .with_footnotes(self.footnotes) } pub fn horz_align(&self, display: &DisplayValue) -> HorzAlign { - self.style - .cell_style + self.cell_style .horz_align .unwrap_or_else(|| HorzAlign::for_mixed(display.var_type())) }