From: Ben Pfaff Date: Thu, 23 Oct 2025 14:45:53 +0000 (-0700) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=823beb725958cd9ede929e6f871503d052df2a1d;p=pspp work --- diff --git a/rust/doc/src/spv/legacy-detail-binary.md b/rust/doc/src/spv/legacy-detail-binary.md index b8812b4f22..f6e57271d5 100644 --- a/rust/doc/src/spv/legacy-detail-binary.md +++ b/rust/doc/src/spv/legacy-detail-binary.md @@ -55,8 +55,9 @@ A data source has `n-variables` variables, each with `n-values` data values. `source-name` is a 28- or 64-byte string padded on the right with -0-bytes. The names that appear in the corpus are very generic: usually -`tableData` for pivot table data or `source0` for chart data. +0-bytes. The names that appear in the corpus are very generic: +usually `tableData` for pivot table data or `source0` for chart data. +They are encoded in ASCII. A given `Metadata`'s `data-offset` is the offset, in bytes, from the beginning of the member to the start of the corresponding `Data`. diff --git a/rust/doc/src/spv/legacy-detail-xml.md b/rust/doc/src/spv/legacy-detail-xml.md index a8871120f0..59f429c3de 100644 --- a/rust/doc/src/spv/legacy-detail-xml.md +++ b/rust/doc/src/spv/legacy-detail-xml.md @@ -48,9 +48,7 @@ visualization (sourceVariable | derivedVariable)+ categoricalDomain? graph - labelFrame[lf1]* - container? - labelFrame[lf2]* + (labelFrame | container)* style+ layerController? @@ -113,6 +111,10 @@ the following attributes: The `userSource` element has no visible effect. +The `labelFrame` elements that are direct children of `visualization` +seem to have the same effect as those that are children of the +`container` element. + The `extension` element as a child of `visualization` has the following attributes. @@ -250,8 +252,7 @@ This element has the following attributes. Always set to `true`. * `source` - Always set to `tableData`, the `source-name` in the corresponding - `tableData.bin` member (see + A `source-name` in the corresponding `tableData.bin` member (see [Metadata](legacy-detail-binary.md#metadata)). * `sourceName` @@ -298,7 +299,7 @@ expression. * `value` An expression that defines the variable's value. In theory this could be an arbitrary expression in terms of constants, functions, - and other variables, e.g. (VAR1 + VAR2) / 2. In practice, the + and other variables, e.g. `(VAR1 + VAR2) / 2`. In practice, the corpus contains only the following forms of expressions: - `constant(0)` diff --git a/rust/doc/src/spv/structure.md b/rust/doc/src/spv/structure.md index 4e34d7344f..af5caf4ba2 100644 --- a/rust/doc/src/spv/structure.md +++ b/rust/doc/src/spv/structure.md @@ -62,18 +62,20 @@ value specifications are defined: Either `true` or `false`. * `dimension` - A floating-point number followed by a unit, e.g. `10pt`. Units in - the corpus include `in` (inch), `pt` (points, 72/inch), `px` - ("device-independent pixels", 96/inch), and `cm`. If the unit is + A floating-point number followed by a unit, e.g. `10pt`. If the unit is omitted then points should be assumed. The number and unit may be separated by white space. - The corpus also includes localized names for units. A reader must - understand these to properly interpret the dimension: + The corpus includes the following units, which includes localized + names for units. A reader must understand these to properly + interpret the dimensions: - * inch: `인치`, `pol.`, `cala`, `cali` - * point: `пт` - * centimeter: `см` + | Unit | Units per Inch | Names | + |:-------------------------|---------------:|:-------------------------------------| + | Inch | 1 | `in`, `인치`, `pol.`, `cala`, `cali` | + | Centimeter | 2.54 | `cm`, `см` | + | Point | 72 | `pt`, `пт`, (empty string) | + | Device-independent pixel | 96 | `px` | * `real` A floating-point number. diff --git a/rust/pspp/src/format.rs b/rust/pspp/src/format.rs index e9d26895e0..88abd6b976 100644 --- a/rust/pspp/src/format.rs +++ b/rust/pspp/src/format.rs @@ -534,6 +534,12 @@ impl Format { d: 1, }; + pub const F8_0: Format = Format { + type_: Type::F, + w: 8, + d: 0, + }; + pub const F8_2: Format = Format { type_: Type::F, w: 8, @@ -575,6 +581,14 @@ impl Format { } } + pub fn with_max_width(self) -> Self { + if self.var_type().is_numeric() { + Self { w: 40, ..self } + } else { + self + } + } + pub fn fixed_from(source: &UncheckedFormat) -> Self { let UncheckedFormat { type_: format, diff --git a/rust/pspp/src/format/display.rs b/rust/pspp/src/format/display.rs index 93cb275c9e..5efdaa0e6c 100644 --- a/rust/pspp/src/format/display.rs +++ b/rust/pspp/src/format/display.rs @@ -53,6 +53,22 @@ pub struct DisplayDatum<'b, B> { quote_strings: bool, } +impl<'b, B> DisplayDatum<'b, B> { + /// For basic numeric formats, displays the datum wide enough to fully + /// display the selected number of decimal places, and trims off spaces in + /// the output. + pub fn with_stretch(self) -> Self { + match self.format.type_.category() { + Category::Basic | Category::Custom => Self { + format: self.format.with_max_width(), + trim_spaces: true, + ..self + }, + _ => self, + } + } +} + #[cfg(test)] mod tests; diff --git a/rust/pspp/src/output/pivot/look_xml.rs b/rust/pspp/src/output/pivot/look_xml.rs index d9e59ee56a..0a15f042e7 100644 --- a/rust/pspp/src/output/pivot/look_xml.rs +++ b/rust/pspp/src/output/pivot/look_xml.rs @@ -29,7 +29,7 @@ use crate::{ }; use thiserror::Error as ThisError; -#[derive(Deserialize, Debug)] +#[derive(Clone, Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct TableProperties { #[serde(rename = "@name")] @@ -115,7 +115,7 @@ impl From for Look { } } -#[derive(Deserialize, Debug)] +#[derive(Clone, Debug, Deserialize)] struct GeneralProperties { #[serde(rename = "@hideEmptyRows")] hide_empty_rows: bool, @@ -136,7 +136,7 @@ struct GeneralProperties { row_label_position: LabelPosition, } -#[derive(Deserialize, Debug)] +#[derive(Clone, Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct FootnoteProperties { #[serde(rename = "@markerPosition")] @@ -146,7 +146,7 @@ struct FootnoteProperties { marker_type: FootnoteMarkerType, } -#[derive(Deserialize, Debug)] +#[derive(Clone, Debug, Deserialize)] #[serde(rename_all = "camelCase")] struct CellFormatProperties { caption: CellStyleHolder, @@ -159,13 +159,13 @@ struct CellFormatProperties { title: CellStyleHolder, } -#[derive(Deserialize, Debug)] +#[derive(Clone, Debug, Deserialize)] #[serde(rename_all = "camelCase")] struct CellStyleHolder { style: CellStyle, } -#[derive(Deserialize, Debug, Default)] +#[derive(Clone, Debug, Default, Deserialize)] #[serde(default)] struct CellStyle { #[serde(rename = "@alternatingColor")] @@ -246,7 +246,7 @@ impl CellStyle { } } -#[derive(Deserialize, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum FontStyle { #[default] @@ -254,7 +254,7 @@ enum FontStyle { Italic, } -#[derive(Deserialize, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum FontWeight { #[default] @@ -262,7 +262,7 @@ enum FontWeight { Bold, } -#[derive(Deserialize, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum FontUnderline { #[default] @@ -270,7 +270,7 @@ enum FontUnderline { Underline, } -#[derive(Deserialize, Debug, Default)] +#[derive(Clone, Debug, Default, Deserialize)] #[serde(rename_all = "camelCase")] enum TextAlignment { Left, @@ -281,7 +281,7 @@ enum TextAlignment { Mixed, } -#[derive(Deserialize, Debug, Default)] +#[derive(Clone, Debug, Default, Deserialize)] #[serde(rename_all = "camelCase")] enum LabelLocationVertical { /// Top. @@ -295,7 +295,7 @@ enum LabelLocationVertical { Center, } -#[derive(Deserialize, Debug)] +#[derive(Clone, Debug, Deserialize)] #[serde(rename_all = "camelCase")] struct BorderProperties { bottom_inner_frame: BorderStyle, @@ -319,7 +319,7 @@ struct BorderProperties { vertical_dimension_border_columns: BorderStyle, } -#[derive(Deserialize, Debug, Default)] +#[derive(Clone, Debug, Default, Deserialize)] #[serde(rename_all = "camelCase", default)] struct PrintingProperties { #[serde(rename = "@printAllLayers")] diff --git a/rust/pspp/src/output/spv.rs b/rust/pspp/src/output/spv.rs index c249ca547c..2b5acb0003 100644 --- a/rust/pspp/src/output/spv.rs +++ b/rust/pspp/src/output/spv.rs @@ -30,7 +30,7 @@ use zip::{ZipArchive, result::ZipError}; use crate::output::{ Details, Item, SpvInfo, SpvMembers, Text, page::PageSetup, - pivot::{PivotTable, TableProperties, Value}, + pivot::{Look, PivotTable, TableProperties, Value}, spv::{ legacy_bin::LegacyBin, legacy_xml::Visualization, @@ -421,27 +421,34 @@ impl Table { )) } Some(xml_member_name) => { - let member = BufReader::new(archive.by_name(&xml_member_name)?); - let _visualization: Visualization = match serde_path_to_error::deserialize( - &mut quick_xml::de::Deserializer::from_reader(member), - ) - .with_context(|| format!("Failed to parse {xml_member_name}")) - { - Ok(result) => result, - Err(error) => panic!("{error:?}"), - }; - let bin_member_name = &self.table_structure.data_path; let mut bin_member = archive.by_name(bin_member_name)?; let mut bin_data = Vec::with_capacity(bin_member.size() as usize); bin_member.read_to_end(&mut bin_data)?; let mut cursor = Cursor::new(bin_data); - let _legacy_bin = LegacyBin::read(&mut cursor).map_err(|e| { + let legacy_bin = LegacyBin::read(&mut cursor).map_err(|e| { e.with_message(format!( "While parsing {bin_member_name:?} as legacy binary SPV member" )) })?; - //dbg!(&_legacy_bin); + let data = legacy_bin.decode(); + drop(bin_member); + + let member = BufReader::new(archive.by_name(&xml_member_name)?); + let visualization: Visualization = match serde_path_to_error::deserialize( + &mut quick_xml::de::Deserializer::from_reader(member), + ) + .with_context(|| format!("Failed to parse {xml_member_name}")) + { + Ok(result) => result, + Err(error) => panic!("{error:?}"), + }; + visualization.decode( + data, + self.properties + .as_ref() + .map_or_else(Look::default, |properties| properties.clone().into()), + ); Ok(PivotTable::new([]).into_item()) } diff --git a/rust/pspp/src/output/spv/legacy_bin.rs b/rust/pspp/src/output/spv/legacy_bin.rs index c306a09cab..cc298eee7c 100644 --- a/rust/pspp/src/output/spv/legacy_bin.rs +++ b/rust/pspp/src/output/spv/legacy_bin.rs @@ -1,8 +1,15 @@ -use std::io::{Read, Seek, SeekFrom}; +use std::{ + collections::HashMap, + io::{Read, Seek, SeekFrom}, +}; use binrw::{BinRead, BinResult, binread}; +use encoding_rs::UTF_8; -use crate::output::spv::light::{U32String, parse_vec}; +use crate::{ + data::Datum, + output::spv::light::{U32String, parse_vec}, +}; #[binread] #[br(little)] @@ -21,6 +28,54 @@ pub struct LegacyBin { strings: Option, } +impl LegacyBin { + pub fn decode(&self) -> HashMap>> { + fn decode_asciiz(name: &[u8]) -> String { + let len = name.iter().position(|b| *b == 0).unwrap_or(name.len()); + std::str::from_utf8(&name[..len]).unwrap().into() // XXX unwrap + } + + let mut sources = HashMap::new(); + for (metadata, data) in self.metadata.iter().zip(&self.data) { + let mut variables = HashMap::new(); + for variable in &data.variables { + variables.insert( + variable.variable_name.clone(), + variable + .values + .iter() + .map(|value| DataValue { + index: None, + value: Datum::Number((*value != f64::MIN).then_some(*value)), + }) + .collect::>(), + ); + } + sources.insert(metadata.source_name.clone(), variables); + } + if let Some(strings) = &self.strings { + for map in &strings.source_maps { + let source = sources.get_mut(&map.source_name).unwrap(); // XXX unwrap + for var_map in &map.variable_maps { + let variable = source.get_mut(&var_map.variable_name).unwrap(); // XXX unwrap + for datum_map in &var_map.datum_maps { + // XXX two possibly out-of-range indexes below + variable[datum_map.value_idx].value = + Datum::String(strings.labels[datum_map.label_idx].label.clone()); + } + } + } + } + sources + } +} + +#[derive(Clone, Debug)] +pub struct DataValue { + pub index: Option, + pub value: Datum, +} + #[binread] #[br(little)] #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -38,8 +93,8 @@ struct Metadata { n_values: u32, n_variables: u32, data_offset: u32, - #[br(count(if version == Version::Vaf { 28 } else { 64 }))] - source_name: Vec, + #[br(parse_with(parse_fixed_utf8_string), args(if version == Version::Vaf { 28 } else { 64 }))] + source_name: String, #[br(if(version == Version::Vb0), temp)] _x: u32, } @@ -92,7 +147,8 @@ impl BinRead for Data { #[br(little, import(n_values: u32))] #[derive(Debug)] struct Variable { - variable_name: [u8; 288], + #[br(parse_with(parse_fixed_utf8_string), args(288))] + variable_name: String, #[br(count(n_values))] values: Vec, } @@ -123,7 +179,8 @@ struct Strings { #[br(little)] #[derive(Debug)] struct SourceMap { - source_name: U32String, + #[br(parse_with(parse_utf8_string))] + source_name: String, #[br(parse_with(parse_vec))] variable_maps: Vec, } @@ -132,7 +189,8 @@ struct SourceMap { #[br(little)] #[derive(Debug)] struct VariableMap { - variable_name: U32String, + #[br(parse_with(parse_utf8_string))] + variable_name: String, #[br(parse_with(parse_vec))] datum_maps: Vec, } @@ -141,14 +199,36 @@ struct VariableMap { #[br(little)] #[derive(Debug)] struct DatumMap { - value_idx: u32, - label_idx: u32, + #[br(map(|x: u32| x as usize))] + value_idx: usize, + #[br(map(|x: u32| x as usize))] + label_idx: usize, } #[binread] #[br(little)] #[derive(Debug)] struct Label { - frequency: u32, - label: U32String, + #[br(temp)] + _frequency: u32, + #[br(parse_with(parse_utf8_string))] + label: String, +} + +/// Parses a UTF-8 string preceded by a 32-bit length. +#[binrw::parser(reader, endian)] +pub(super) fn parse_utf8_string() -> BinResult { + Ok(U32String::read_options(reader, endian, ())?.decode(UTF_8)) +} + +/// Parses a UTF-8 string that is exactly `n` bytes long and whose contents end +/// at the first null byte. +#[binrw::parser(reader)] +pub(super) fn parse_fixed_utf8_string(n: usize) -> BinResult { + let mut buf = vec![0; n]; + reader.read_exact(&mut buf)?; + let len = buf.iter().take_while(|b| **b != 0).count(); + Ok( + std::str::from_utf8(&buf[..len]).unwrap().into(), // XXX unwrap + ) } diff --git a/rust/pspp/src/output/spv/legacy_xml.rs b/rust/pspp/src/output/spv/legacy_xml.rs index 8c8d4043e6..e7a7657bde 100644 --- a/rust/pspp/src/output/spv/legacy_xml.rs +++ b/rust/pspp/src/output/spv/legacy_xml.rs @@ -14,11 +14,29 @@ // You should have received a copy of the GNU General Public License along with // this program. If not, see . -use std::marker::PhantomData; - -use serde::Deserialize; - -use crate::output::pivot::Color; +use std::{ + collections::{BTreeMap, HashMap}, + marker::PhantomData, + mem::take, + num::{NonZeroUsize, ParseFloatError}, + str::FromStr, +}; + +use enum_map::{Enum, EnumMap}; +use ordered_float::OrderedFloat; +use serde::{Deserialize, de::Error as _}; + +use crate::{ + data::Datum, + format::{Decimal::Dot, Type, UncheckedFormat}, + output::{ + pivot::{ + Area, AreaStyle, Color, HeadingRegion, HorzAlign, Look, PivotTable, RowParity, Value, + VertAlign, + }, + spv::legacy_bin::DataValue, + }, +}; #[derive(Debug)] struct Ref { @@ -38,6 +56,65 @@ impl<'de, T> Deserialize<'de> for Ref { } } +struct Map(HashMap, Datum>); + +impl Map { + fn remap_formats( + &mut self, + format: &Option, + string_format: &Option, + ) -> (crate::format::Format, Vec) { + let (format, affixes, relabels, try_strings_as_numbers) = if let Some(format) = &format { + ( + Some(format.decode()), + format.affixes.clone(), + format.relabels.as_slice(), + format.try_strings_as_numbers.unwrap_or_default(), + ) + } else if let Some(string_format) = &string_format { + ( + None, + string_format.affixes.clone(), + string_format.relabels.as_slice(), + false, + ) + } else { + (None, Vec::new(), [].as_slice(), false) + }; + for relabel in relabels { + let value = if try_strings_as_numbers && let Ok(to) = relabel.to.trim().parse::() { + Datum::Number(Some(to)) + } else if let Some(format) = format + && let Ok(to) = relabel.to.trim().parse::() + { + Datum::String( + Datum::::Number(Some(to)) + .display(format) + .with_stretch() + .to_string(), + ) + } else { + Datum::String(relabel.to.clone()) + }; + self.0.insert(OrderedFloat(relabel.from), value); + // XXX warn on duplicate + } + (format.unwrap_or(crate::format::Format::F8_0), affixes) + } + + fn apply(&self, data: &mut Vec) { + for value in data { + let Datum::Number(Some(number)) = value.value else { + continue; + }; + if let Some(to) = self.0.get(&OrderedFloat(number)) { + value.index = Some(number); + value.value = to.clone(); + } + } + } +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct Visualization { @@ -58,6 +135,317 @@ pub struct Visualization { children: Vec, } +impl Visualization { + pub fn decode( + &self, + data: HashMap>>, + mut look: Look, + ) -> Result { + let mut extension = None; + let mut user_source = None; + let mut source_variables = Vec::new(); + let mut derived_variables = Vec::new(); + let mut graph = None; + let mut labels = EnumMap::from_fn(|_| Vec::new()); + let mut styles = HashMap::new(); + let mut layer_controller = None; + for child in &self.children { + match child { + VisChild::Extension(e) => extension = Some(e), + VisChild::UserSource(us) => user_source = Some(us), + VisChild::SourceVariable(source_variable) => source_variables.push(source_variable), + VisChild::DerivedVariable(derived_variable) => { + derived_variables.push(derived_variable) + } + VisChild::CategoricalDomain(_) => (), + VisChild::Graph(g) => graph = Some(g), + VisChild::LabelFrame(label_frame) => { + if let Some(label) = &label_frame.label + && let Some(purpose) = label.purpose + { + labels[purpose].push(label); + } + } + VisChild::Container(c) => { + for label_frame in &c.label_frames { + if let Some(label) = &label_frame.label + && let Some(purpose) = label.purpose + { + labels[purpose].push(label); + } + } + } + VisChild::Style(style) => { + if let Some(id) = &style.id { + styles.insert(id.as_str(), style); + } + } + VisChild::LayerController(lc) => layer_controller = Some(lc), + } + } + let Some(graph) = graph else { todo!() }; + let Some(user_source) = user_source else { + todo!() + }; + + // Footnotes. + // + // Any pivot_value might refer to footnotes, so it's important to + // process the footnotes early to ensure that those references can be + // resolved. There is a possible problem that a footnote might itself + // reference an as-yet-unprocessed footnote, but that's OK because + // footnote references don't actually look at the footnote contents but + // only resolve a pointer to where the footnote will go later. + // + // Before we really start, create all the footnotes we'll fill in. This + // is because sometimes footnotes refer to themselves or to each other + // and we don't want to reject those references. + let mut footnotes = BTreeMap::::new(); + if let Some(f) = &graph.interval.footnotes { + f.decode(&mut footnotes); + } + for child in &graph.interval.labeling.children { + if let LabelingChild::Footnotes(f) = child { + f.decode(&mut footnotes); + } + } + for label in &labels[Purpose::Footnote] { + for (index, text) in label.text().iter().enumerate() { + if let Some(uses_reference) = text.uses_reference { + let entry = footnotes.entry(uses_reference.get() - 1).or_default(); + if index % 2 == 0 { + entry.0 = text.text.strip_suffix('\n').unwrap_or(&text.text).into(); + } else { + entry.1 = text.text.strip_suffix('.').unwrap_or(&text.text).into(); + } + } + } + } + + for (purpose, area) in [ + (Purpose::Title, Area::Title), + (Purpose::SubTitle, Area::Caption), + (Purpose::Layer, Area::Layers), + (Purpose::Footnote, Area::Footer), + ] { + for label in &labels[purpose] { + label.decode_style(&mut look.areas[area], &styles); + } + } + if let Some(style) = &graph.interval.labeling.style + && let Some(style) = styles.get(style.references.as_str()) + { + Style::decode( + Some(*style), + styles + .get(graph.cell_style.references.as_str()) + .map(|v| &**v), + &mut look.areas[Area::Data(RowParity::Even)], + ); + look.areas[Area::Data(RowParity::Odd)] = + look.areas[Area::Data(RowParity::Even)].clone(); + } + + let mut title = Value::empty(); + let mut caption = Value::empty(); + //Label::decode_ + + let show_grid_lines = extension + .as_ref() + .and_then(|extension| extension.show_gridline); + if let Some(style) = styles.get(graph.cell_style.references.as_str()) + && let Some(width) = &style.width + { + let mut parts = width.split(';'); + parts.next(); + if let Some(min_width) = parts.next() + && let Some(max_width) = parts.next() + && let Ok(min_width) = min_width.parse::() + && let Ok(max_width) = max_width.parse::() + { + look.heading_widths[HeadingRegion::Columns] = + min_width.as_pt() as usize..=max_width.as_pt() as usize; + } + } + + let mut series = HashMap::<&str, Series>::new(); + while let n_source = source_variables.len() + && let n_derived = derived_variables.len() + && (n_source > 0 || n_derived > 0) + { + for sv in take(&mut source_variables) { + let label_series = if let Some(label_variable) = &sv.label_variable { + let Some(label_series) = series.get(label_variable.references.as_str()) else { + source_variables.push(sv); + continue; + }; + Some(label_series) + } else { + None + }; + + let Some(data) = data + .get(&sv.source) + .and_then(|source| source.get(&sv.source_name)) + else { + todo!() + }; + fn remap_formats( + map: &mut HashMap, Datum>, + format: &Option, + string_format: &Option, + ) -> (crate::format::Format, Vec) { + let (format, affixes, relabels, try_strings_as_numbers) = + if let Some(format) = &format { + ( + Some(format.decode()), + format.affixes.clone(), + format.relabels.as_slice(), + format.try_strings_as_numbers.unwrap_or_default(), + ) + } else if let Some(string_format) = &string_format { + ( + None, + string_format.affixes.clone(), + string_format.relabels.as_slice(), + false, + ) + } else { + (None, Vec::new(), [].as_slice(), false) + }; + for relabel in relabels { + let value = if try_strings_as_numbers + && let Ok(to) = relabel.to.trim().parse::() + { + Datum::Number(Some(to)) + } else if let Some(format) = format + && let Ok(to) = relabel.to.trim().parse::() + { + Datum::String( + Datum::::Number(Some(to)) + .display(format) + .with_stretch() + .to_string(), + ) + } else { + Datum::String(relabel.to.clone()) + }; + map.insert(OrderedFloat(relabel.from), value); + // XXX warn on duplicate + } + (format.unwrap_or(crate::format::Format::F8_0), affixes) + } + let mut mapping = HashMap::new(); + let (format, affixes) = remap_formats(&mut mapping, &sv.format, &sv.string_format); + fn execute_mapping( + mapping: &HashMap, Datum>, + data: &mut Vec, + ) { + for value in data { + let Datum::Number(Some(number)) = value.value else { + continue; + }; + if let Some(to) = mapping.get(&OrderedFloat(number)) { + value.index = Some(number); + value.value = to.clone(); + } + } + } + let mut data = data.clone(); + if !mapping.is_empty() { + execute_mapping(&mapping, &mut data); + } else if let Some(label_series) = label_series { + for (value, label) in data.iter().zip(label_series.values.iter()) { + if let Some(Some(number)) = value.value.as_number() { + let dest = match &label.value { + Datum::Number(_) => { + label.value.display(format).with_stretch().to_string() + } + Datum::String(s) => s.clone(), + }; + mapping.insert(OrderedFloat(number), Datum::String(dest)); + } + } + } + series.insert( + &sv.id, + Series { + label: sv.label.clone(), + format, + remapped: false, + values: data, + mapping, + affixes, + }, + ); + } + + for dv in take(&mut derived_variables) { + let mut data = if dv.value == "constant(0)" { + let n_values = if let Some(series) = series.values().next() { + series.values.len() + } else { + derived_variables.push(dv); + continue; + }; + (0..n_values) + .map(|_| DataValue { + index: Some(0.0), + value: Datum::Number(Some(0.0)), + }) + .collect() + } else if dv.value.starts_with("constant") { + vec![] + } else if let Some(rest) = dv.value.strip_prefix("map(") + && let Some(var_name) = rest.strip_suffix(")") + { + let Some(dependency) = series.get(var_name) else { + derived_variables.push(dv); + continue; + }; + dependency.values.clone() + } else { + unreachable!() + }; + let mut mapping = HashMap::new(); + for vme in &dv.value_map { + for from in vme.from.split(';') { + let from = from.trim().parse::().unwrap(); // XXX + let to = if let Ok(to) = vme.to.trim().parse::() { + Datum::Number(Some(to)) + } else { + Datum::String(vme.to.clone()) + }; + mapping.insert(OrderedFloat(from), to); + } + } + if !mapping.is_empty() { + for value in &mut data { + let Datum::Number(Some(number)) = value.value else { + continue; + }; + if let Some(to) = mapping.get(&OrderedFloat(number)) { + value.index = Some(number); + value.value = to.clone(); + } + } + } + } + } + + todo!() + } +} + +struct Series { + label: Option, + format: crate::format::Format, + remapped: bool, + values: Vec, + mapping: HashMap, Datum>, + affixes: Vec, +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] enum VisChild { @@ -75,7 +463,10 @@ enum VisChild { #[derive(Deserialize, Debug)] #[serde(rename = "extension", rename_all = "camelCase")] -struct VisualizationExtension; +struct VisualizationExtension { + #[serde(rename = "@showGridline")] + show_gridline: Option, +} #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] @@ -90,6 +481,10 @@ struct SourceVariable { #[serde(rename = "@id")] id: String, + /// The `source-name` in the `tableData.bin` member. + #[serde(rename = "@source")] + source: String, + /// The name of a variable within the source, corresponding to the /// `variable-name` in the `tableData.bin` member. #[serde(rename = "@sourceName")] @@ -157,7 +552,7 @@ struct VariableReference { reference: Option, } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum Missing { Listwise, @@ -173,7 +568,7 @@ struct StringFormat { affixes: Vec, } -#[derive(Deserialize, Debug)] +#[derive(Deserialize, Debug, Default)] #[serde(rename_all = "camelCase")] struct Format { #[serde(rename = "@baseFormat")] @@ -235,7 +630,7 @@ struct Format { #[serde(rename = "@minimumIntegerDigits")] minimum_integer_digits: Option, #[serde(rename = "@maximumFractionDigits")] - maximum_fraction_digits: Option, + maximum_fraction_digits: Option, #[serde(rename = "@minimumFractionDigits")] minimum_fraction_digits: Option, #[serde(rename = "@useGrouping")] @@ -252,21 +647,31 @@ struct Format { try_strings_as_numbers: Option, #[serde(rename = "@negativesOutside")] negatives_outside: Option, - #[serde(default)] - relabel: Vec, + #[serde(default, rename = "relabel")] + relabels: Vec, #[serde(default, rename = "affix")] affixes: Vec, } +impl Format { + fn decode(&self) -> crate::format::Format { + if let Some(base_format) = self.base_format { + SignificantDateTimeFormat::from(self).decode() + } else { + SignificantNumberFormat::from(self).decode() + } + } +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct NumberFormat { #[serde(rename = "@minimumIntegerDigits")] - minimum_integer_digits: Option, + minimum_integer_digits: Option, #[serde(rename = "@maximumFractionDigits")] - maximum_fraction_digits: Option, + maximum_fraction_digits: Option, #[serde(rename = "@minimumFractionDigits")] - minimum_fraction_digits: Option, + minimum_fraction_digits: Option, #[serde(rename = "@useGrouping")] use_grouping: Option, #[serde(rename = "@scientific")] @@ -281,11 +686,69 @@ struct NumberFormat { affixes: Vec, } +struct SignificantNumberFormat<'a> { + scientific: Option, + prefix: &'a str, + suffix: &'a str, + use_grouping: Option, + maximum_fraction_digits: Option, +} + +impl<'a> From<&'a NumberFormat> for SignificantNumberFormat<'a> { + fn from(value: &'a NumberFormat) -> Self { + Self { + scientific: value.scientific, + prefix: &value.prefix, + suffix: &value.suffix, + use_grouping: value.use_grouping, + maximum_fraction_digits: value.maximum_fraction_digits, + } + } +} + +impl<'a> From<&'a Format> for SignificantNumberFormat<'a> { + fn from(value: &'a Format) -> Self { + Self { + scientific: value.scientific, + prefix: &value.prefix, + suffix: &value.suffix, + use_grouping: value.use_grouping, + maximum_fraction_digits: value.maximum_fraction_digits, + } + } +} + +impl<'a> SignificantNumberFormat<'a> { + fn decode(&self) -> crate::format::Format { + let type_ = if self.scientific == Some(Scientific::True) { + Type::E + } else if self.prefix == "$" { + Type::Dollar + } else if self.suffix == "%" { + Type::Pct + } else if self.use_grouping == Some(true) { + Type::Comma + } else { + Type::F + }; + let d = match self.maximum_fraction_digits { + Some(d) if (0..=15).contains(&d) => d, + _ => 2, + }; + UncheckedFormat { + type_, + w: 40, + d: d as u8, + } + .fix() + } +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct DateTimeFormat { #[serde(rename = "@baseFormat")] - base_format: Option, + base_format: BaseFormat, #[serde(rename = "@separatorChars")] separator_chars: Option, #[serde(rename = "@mdyOrder")] @@ -342,11 +805,123 @@ struct DateTimeFormat { affixes: Vec, } +struct SignificantDateTimeFormat { + base_format: Option, + show_quarter: Option, + show_week: Option, + show_day: Option, + show_hour: Option, + show_second: Option, + show_millis: Option, + mdy_order: Option, + month_format: Option, + year_abbreviation: Option, +} + +impl From<&Format> for SignificantDateTimeFormat { + fn from(value: &Format) -> Self { + Self { + base_format: value.base_format, + show_quarter: value.show_quarter, + show_week: value.show_week, + show_day: value.show_day, + show_hour: value.show_hour, + show_second: value.show_second, + show_millis: value.show_millis, + mdy_order: value.mdy_order, + month_format: value.month_format, + year_abbreviation: value.year_abbreviation, + } + } +} +impl From<&DateTimeFormat> for SignificantDateTimeFormat { + fn from(value: &DateTimeFormat) -> Self { + Self { + base_format: Some(value.base_format), + show_quarter: value.show_quarter, + show_week: value.show_week, + show_day: value.show_day, + show_hour: value.show_hour, + show_second: value.show_second, + show_millis: value.show_millis, + mdy_order: value.mdy_order, + month_format: value.month_format, + year_abbreviation: value.year_abbreviation, + } + } +} +impl SignificantDateTimeFormat { + fn decode(&self) -> crate::format::Format { + let type_ = match self.base_format { + Some(BaseFormat::Date) => { + let type_ = if self.show_quarter == Some(true) { + Type::QYr + } else if self.show_week == Some(true) { + Type::WkYr + } else { + match (self.mdy_order, self.month_format) { + (Some(MdyOrder::DayMonthYear), Some(MonthFormat::Number)) => Type::EDate, + (Some(MdyOrder::DayMonthYear), Some(MonthFormat::PaddedNumber)) => { + Type::EDate + } + (Some(MdyOrder::DayMonthYear), _) => Type::Date, + (Some(MdyOrder::YearMonthDay), _) => Type::SDate, + _ => Type::ADate, + } + }; + let mut w = type_.min_width(); + if self.year_abbreviation != Some(true) { + w += 2; + }; + return UncheckedFormat { type_, w, d: 0 }.try_into().unwrap(); + } + Some(BaseFormat::DateTime) => { + if self.mdy_order == Some(MdyOrder::YearMonthDay) { + Type::YmdHms + } else { + Type::DateTime + } + } + _ => { + if self.show_day == Some(true) { + Type::DTime + } else if self.show_hour == Some(true) { + Type::Time + } else { + Type::MTime + } + } + }; + date_time_format(type_, self.show_second, self.show_millis) + } +} + +impl DateTimeFormat { + fn decode(&self) -> crate::format::Format { + SignificantDateTimeFormat::from(self).decode() + } +} + +fn date_time_format( + type_: Type, + show_second: Option, + show_millis: Option, +) -> crate::format::Format { + let mut w = type_.min_width(); + let mut d = 0; + if show_second == Some(true) { + w += 3; + if show_millis == Some(true) { + d = 3; + w += d as u16 + 1; + } + } + UncheckedFormat { type_, w, d }.try_into().unwrap() +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct ElapsedTimeFormat { - #[serde(rename = "@baseFormat")] - base_format: Option, #[serde(rename = "@dayPadding")] day_padding: Option, #[serde(rename = "hourPadding")] @@ -371,7 +946,20 @@ struct ElapsedTimeFormat { affixes: Vec, } -#[derive(Deserialize, Debug)] +impl ElapsedTimeFormat { + fn decode(&self) -> crate::format::Format { + let type_ = if self.show_day == Some(true) { + Type::DTime + } else if self.show_hour == Some(true) { + Type::Time + } else { + Type::MTime + }; + date_time_format(type_, self.show_second, self.show_millis) + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum BaseFormat { Date, @@ -380,7 +968,7 @@ enum BaseFormat { ElapsedTime, } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum MdyOrder { DayMonthYear, @@ -388,7 +976,7 @@ enum MdyOrder { YearMonthDay, } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum MonthFormat { Long, @@ -397,14 +985,14 @@ enum MonthFormat { PaddedNumber, } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum DayType { Month, Year, } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum HourFormat { #[serde(rename = "AMPM")] @@ -415,7 +1003,7 @@ enum HourFormat { As12, } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum Scientific { OnlyForSmall, @@ -424,7 +1012,7 @@ enum Scientific { False, } -#[derive(Deserialize, Debug)] +#[derive(Clone, Debug, Deserialize)] #[serde(rename_all = "camelCase")] struct Affix { /// The footnote number as a natural number: 1 for the first footnote, 2 for @@ -446,7 +1034,7 @@ struct Affix { value: String, } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum Position { Subscript, @@ -471,6 +1059,57 @@ struct ValueMapEntry { to: String, } +#[derive(Copy, Clone, Default, Debug, PartialEq, PartialOrd)] +struct Dimension(f64); + +impl Dimension { + fn as_px(&self) -> f64 { + self.0 * 96.0 + } + fn as_pt(&self) -> f64 { + self.0 * 72.0 + } +} + +impl<'de> Deserialize<'de> for Dimension { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let string = String::deserialize(deserializer)?; + Dimension::from_str(&string).map_err(D::Error::custom) + } +} + +impl FromStr for Dimension { + type Err = ParseFloatError; + + fn from_str(s: &str) -> Result { + fn parse_unit(s: &str) -> (f64, &str) { + for (unit, per_inch) in &[ + ("in", 1.0), + ("인치", 1.0), + ("pol.", 1.0), + ("cala", 1.0), + ("cali", 1.0), + ("cm", 2.54), + ("см", 2.54), + ("pt", 72.0), + ("пт", 72.0), + ("px", 96.0), + ] { + if let Some(rest) = s.strip_suffix(unit) { + return (*per_inch, rest); + } + } + (72.0, s) + } + + let (per_inch, s) = parse_unit(s); + Ok(Self(s.trim().parse::()? / per_inch)) + } +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct Style { @@ -530,16 +1169,16 @@ struct Style { font_underline: Option, #[serde(rename = "@margin-bottom")] - margin_bottom: Option, + margin_bottom: Option, #[serde(rename = "@margin-top")] - margin_top: Option, + margin_top: Option, #[serde(rename = "@margin-left")] - margin_left: Option, + margin_left: Option, #[serde(rename = "@margin-right")] - margin_right: Option, + margin_right: Option, #[serde(rename = "@textAlignment")] text_alignment: Option, @@ -558,9 +1197,52 @@ struct Style { #[serde(rename = "@visible")] visible: Option, + + #[serde(rename = "@decimal-offset")] + decimal_offset: Option, +} + +impl Style { + fn decode(fg: Option<&Style>, bg: Option<&Style>, out: &mut AreaStyle) { + if let Some(fg) = fg { + if let Some(weight) = fg.font_weight { + out.font_style.bold = weight.is_bold(); + } + if let Some(style) = fg.font_style { + out.font_style.italic = style.is_italic(); + } + if let Some(underline) = fg.font_underline { + out.font_style.underline = underline.is_underline(); + } + if let Some(color) = fg.color { + out.font_style.fg = color; + } + if let Some(font_size) = &fg.font_size { + if let Ok(size) = font_size + .trim_end_matches(|c: char| c.is_alphabetic()) + .parse() + { + out.font_style.size = size; + } else { + // XXX warn? + } + } + if let Some(alignment) = fg.text_alignment { + out.cell_style.horz_align = alignment.as_horz_align(fg.decimal_offset); + } + if let Some(label_local_vertical) = fg.label_location_vertical { + out.cell_style.vert_align = label_local_vertical.into(); + } + } + if let Some(bg) = bg { + if let Some(color) = bg.color { + out.font_style.bg = color; + } + } + } } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum Border { Solid, @@ -570,28 +1252,46 @@ enum Border { None, } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum FontWeight { Regular, Bold, } -#[derive(Deserialize, Debug)] +impl FontWeight { + fn is_bold(&self) -> bool { + *self == Self::Bold + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum FontStyle { Regular, Italic, } -#[derive(Deserialize, Debug)] +impl FontStyle { + fn is_italic(&self) -> bool { + *self == Self::Italic + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum FontUnderline { None, Underline, } -#[derive(Deserialize, Debug)] +impl FontUnderline { + fn is_underline(&self) -> bool { + *self == Self::Underline + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum TextAlignment { Left, @@ -601,7 +1301,22 @@ enum TextAlignment { Mixed, } -#[derive(Deserialize, Debug)] +impl TextAlignment { + fn as_horz_align(&self, decimal_offset: Option) -> Option { + match self { + TextAlignment::Left => Some(HorzAlign::Left), + TextAlignment::Right => Some(HorzAlign::Right), + TextAlignment::Center => Some(HorzAlign::Center), + TextAlignment::Decimal => Some(HorzAlign::Decimal { + offset: decimal_offset.unwrap_or_default().as_px(), + decimal: Dot, + }), + TextAlignment::Mixed => None, + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum LabelLocation { Positive, @@ -609,6 +1324,16 @@ enum LabelLocation { Center, } +impl From for VertAlign { + fn from(value: LabelLocation) -> Self { + match value { + LabelLocation::Positive => VertAlign::Top, + LabelLocation::Negative => VertAlign::Bottom, + LabelLocation::Center => VertAlign::Middle, + } + } +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct Graph { @@ -643,11 +1368,11 @@ struct Location { /// Minimum size. #[serde(rename = "@min")] - min: Option, + min: Option, /// Maximum size. #[serde(rename = "@max")] - max: Option, + max: Option, /// An element to attach to. Required when method is attach or same, not /// observed otherwise. @@ -658,7 +1383,7 @@ struct Location { value: Option, } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum Part { Height, @@ -669,7 +1394,7 @@ enum Part { Right, } -#[derive(Deserialize, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] enum Method { SizeToContent, @@ -944,11 +1669,19 @@ struct Footnotes { mappings: Vec, } +impl Footnotes { + fn decode(&self, dst: &mut BTreeMap) { + for f in &self.mappings { + dst.entry(f.defines_reference.get() - 1).or_default().0 = f.to.clone(); + } + } +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct FootnoteMapping { #[serde(rename = "@definesReference")] - defines_reference: i64, + defines_reference: NonZeroUsize, #[serde(rename = "@from")] from: i64, @@ -967,8 +1700,8 @@ struct FacetLevel { level: usize, #[serde(rename = "@gap")] - gap: Option, - //axis: Axis, + gap: Option, + axis: Axis, } #[derive(Deserialize, Debug)] @@ -991,7 +1724,7 @@ struct MajorTicks { label_angle: f64, #[serde(rename = "@length")] - length: String, + length: Dimension, #[serde(rename = "@style")] style: Ref