From 1263898b5b5ef27e0654f11cf699d3dcde9f9656 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 29 Dec 2024 10:12:31 -0800 Subject: [PATCH] work on output --- rust/Cargo.lock | 24 +- rust/pspp/Cargo.toml | 2 + rust/pspp/src/cooked.rs | 844 --------------------------- rust/pspp/src/format.rs | 3 + rust/pspp/src/output/mod.rs | 1 + rust/pspp/src/output/pivot/mod.rs | 329 ++++++++--- rust/pspp/src/output/pivot/output.rs | 370 ++++++++++++ rust/pspp/src/output/table.rs | 121 ++++ rust/pspp/src/raw.rs | 37 +- 9 files changed, 781 insertions(+), 950 deletions(-) create mode 100644 rust/pspp/src/output/pivot/output.rs create mode 100644 rust/pspp/src/output/table.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 0254522591..2fa8a237e2 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -310,6 +310,26 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-iterator" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c280b9e6b3ae19e152d8e31cf47f18389781e119d4013a2a2bb0180e5facc635" +dependencies = [ + "enum-iterator-derive", +] + +[[package]] +name = "enum-iterator-derive" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1ab991c1362ac86c61ab6f556cff143daa22e5a15e4e189df818b2fd19fe65b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "enum-map" version = "2.7.3" @@ -866,6 +886,7 @@ dependencies = [ "diff", "either", "encoding_rs", + "enum-iterator", "enum-map", "finl_unicode", "flagset", @@ -880,6 +901,7 @@ dependencies = [ "num-traits", "ordered-float", "pspp-derive", + "smallvec", "thiserror", "unicase", "unicode-width", diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 7004bc070c..a9233a9df0 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -30,6 +30,8 @@ enum-map = "2.7.3" flagset = "0.4.6" pspp-derive = { version = "0.1.0", path = "../pspp-derive" } either = "1.13.0" +enum-iterator = "2.1.0" +smallvec = "1.13.2" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/cooked.rs b/rust/pspp/src/cooked.rs index 3dba3301f6..96d78ff1f2 100644 --- a/rust/pspp/src/cooked.rs +++ b/rust/pspp/src/cooked.rs @@ -390,12 +390,7 @@ impl Metadata { } struct Decoder { - //pub raw: raw::Decoder, pub encoding: &'static Encoding, - //pub variables: HashMap, - //pub var_names: HashMap, - //pub dictionary: Dictionary, - //n_dict_indexes: usize, n_generated_names: usize, } @@ -910,842 +905,3 @@ impl MultipleResponseType { } } } - -/* -impl Decoder { - fn generate_name(&mut self) -> Identifier { - loop { - self.n_generated_names += 1; - let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding) - .unwrap(); - if !self.var_names.contains_key(&name) { - return name; - } - assert!(self.n_generated_names < usize::MAX); - } - } - fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> { - let (output, malformed) = self.encoding.decode_without_bom_handling(input); - if malformed { - warn(Error::MalformedString { - encoding: self.encoding.name().into(), - text: output.clone().into(), - }); - } - output - } - fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String { - self.decode_string_cow(input, warn).into() - } - pub fn decode_identifier( - &self, - input: &[u8], - warn: &impl Fn(Error), - ) -> Result { - let s = self.decode_string_cow(input, warn); - Identifier::new(&s, self.encoding) - } - fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> { - let max_index = self.n_dict_indexes; - if dict_index == 0 || dict_index > max_index { - return Err(Error::InvalidDictIndex { - dict_index, - max_index, - }); - } - let Some(variable) = self.variables.get(&(dict_index - 1)) else { - return Err(Error::DictIndexIsContinuation(dict_index)); - }; - Ok(variable) - } - - /// Returns `input` decoded from `self.encoding` into UTF-8 such that - /// re-encoding the result back into `self.encoding` will have exactly the - /// same length in bytes. - /// - /// XXX warn about errors? - fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> { - if let (s, false) = self.encoding.decode_without_bom_handling(input) { - // This is the common case. Usually there will be no errors. - s - } else { - // Unusual case. Don't bother to optimize it much. - let mut decoder = self.encoding.new_decoder_without_bom_handling(); - let mut output = String::with_capacity( - decoder - .max_utf8_buffer_length_without_replacement(input.len()) - .unwrap(), - ); - let mut rest = input; - while !rest.is_empty() { - match decoder.decode_to_string_without_replacement(rest, &mut output, true) { - (DecoderResult::InputEmpty, _) => break, - (DecoderResult::OutputFull, _) => unreachable!(), - (DecoderResult::Malformed(a, b), consumed) => { - let skipped = a as usize + b as usize; - output.extend(repeat('?').take(skipped)); - rest = &rest[consumed..]; - } - } - } - assert_eq!(self.encoding.encode(&output).0.len(), input.len()); - output.into() - } - } -} - -pub trait TryDecode: Sized { - type Input<'a>; - fn try_decode( - decoder: &mut Decoder, - input: &Self::Input<'_>, - warn: impl Fn(Error), - ) -> Result, Error>; -} - -pub trait Decode: Sized { - fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self; -} - -impl Decode> for String { - fn decode(decoder: &Decoder, input: &RawStr, warn: impl Fn(Error)) -> Self { - decoder.decode_string(&input.0, &warn) - } -} -*/ -/* -#[derive(Clone, Debug)] -pub struct HeaderRecord { - pub eye_catcher: String, - pub weight_index: Option, - pub n_cases: Option, - pub creation: NaiveDateTime, - pub file_label: String, -} - -fn trim_end_spaces(mut s: String) -> String { - s.truncate(s.trim_end_matches(' ').len()); - s -} - -/// Data file info that doesn't fit in [Dictionary]. -pub struct Metadata { - creation: NaiveDateTime, - endian: Endian, - compression: Option, - n_cases: Option, - product: String, - product_ext: Option, - version: Option<(i32, i32, i32)>, -} - -impl Metadata { - fn decode( - header: &crate::raw::HeaderRecord>, - integer_info: Option<&IntegerInfoRecord>, - product_ext: Option<&ProductInfoRecord>, - warn: impl Fn(Error), - ) -> Self { - let creation_date = NaiveDate::parse_from_str(&header.creation_date, "%e %b %Y") - .unwrap_or_else(|_| { - warn(Error::InvalidCreationDate { - creation_date: header.creation_date.to_string(), - }); - Default::default() - }); - let creation_time = NaiveTime::parse_from_str(&header.creation_time, "%H:%M:%S") - .unwrap_or_else(|_| { - warn(Error::InvalidCreationTime { - creation_time: header.creation_time.to_string(), - }); - Default::default() - }); - let creation = NaiveDateTime::new(creation_date, creation_time); - - let product = header - .eye_catcher - .trim_start_matches("@(#) SPSS DATA FILE") - .trim_end() - .to_string(); - - Self { - creation, - endian: header.endian, - compression: header.compression, - n_cases: header.n_cases.map(|n| n as u64), - product, - product_ext: product_ext.map(|pe| pe.0.clone()), - version: integer_info.map(|ii| ii.version), - } - } -} - -impl TryDecode for HeaderRecord { - type Input<'a> = crate::raw::HeaderRecord>; - - fn try_decode( - _decoder: &mut Decoder, - input: &Self::Input<'_>, - warn: impl Fn(Error), - ) -> Result, Error> { - let eye_catcher = trim_end_spaces(input.eye_catcher.to_string()); - let file_label = trim_end_spaces(input.file_label.to_string()); - let creation_date = NaiveDate::parse_from_str(&input.creation_date, "%e %b %Y") - .unwrap_or_else(|_| { - warn(Error::InvalidCreationDate { - creation_date: input.creation_date.to_string(), - }); - Default::default() - }); - let creation_time = NaiveTime::parse_from_str(&input.creation_time, "%H:%M:%S") - .unwrap_or_else(|_| { - warn(Error::InvalidCreationTime { - creation_time: input.creation_time.to_string(), - }); - Default::default() - }); - Ok(Some(HeaderRecord { - eye_catcher, - weight_index: input.weight_index.map(|n| n as usize), - n_cases: input.n_cases.map(|n| n as u64), - creation: NaiveDateTime::new(creation_date, creation_time), - file_label, - })) - } -} - -#[derive(Clone, Debug)] -pub struct VariableRecord { - pub width: VarWidth, - pub name: Identifier, - pub print_format: Spec, - pub write_format: Spec, - pub missing_values: MissingValues, - pub label: Option, -} - - -fn parse_variable_record( - decoder: &mut Decoder, - input: &raw::VariableRecord, String>, - warn: impl Fn(Error), -) -> Result<(), Error> { - let width = match input.width { - 0 => VarWidth::Numeric, - w @ 1..=255 => VarWidth::String(w as u16), - -1 => return Ok(()), - _ => { - return Err(Error::InvalidVariableWidth { - offsets: input.offsets.clone(), - width: input.width, - }) - } - }; - let name = trim_end_spaces(input.name.to_string()); - let name = match Identifier::new(&name, decoder.encoding) { - Ok(name) => { - if !decoder.var_names.contains_key(&name) { - name - } else { - let new_name = decoder.generate_name(); - warn(Error::DuplicateVariableName { - duplicate_name: name.clone(), - new_name: new_name.clone(), - }); - new_name - } - } - Err(id_error) => { - let new_name = decoder.generate_name(); - warn(Error::InvalidVariableName { - id_error, - new_name: new_name.clone(), - }); - new_name - } - }; - let variable = Variable { - dict_index: decoder.n_dict_indexes, - short_name: name.clone(), - long_name: None, - width, - }; - decoder.n_dict_indexes += width.n_dict_indexes(); - assert!(decoder - .var_names - .insert(name.clone(), variable.dict_index) - .is_none()); - assert!(decoder - .variables - .insert(variable.dict_index, variable) - .is_none()); - - let print_format = decode_format(input.print_format, width, |new_spec, format_error| { - warn(Error::InvalidPrintFormat { - new_spec, - variable: name.clone(), - format_error, - }) - }); - let write_format = decode_format(input.write_format, width, |new_spec, format_error| { - warn(Error::InvalidWriteFormat { - new_spec, - variable: name.clone(), - format_error, - }) - }); - let mut variable = dictionary::Variable::new(name, width); - variable.print_format = print_format; - variable.write_format = write_format; - variable.missing_values = input.missing_values.clone(); - if let Some(ref label) = input.label { - variable.label = Some(label.to_string()); - } - decoder.dictionary.add_var(variable).unwrap(); - Ok(()) -} - -#[derive(Clone, Debug)] -pub struct DocumentRecord(Vec); - -impl TryDecode for DocumentRecord { - type Input<'a> = crate::raw::DocumentRecord; - - fn try_decode( - decoder: &mut Decoder, - input: &Self::Input<'_>, - warn: impl Fn(Error), - ) -> Result, Error> { - Ok(Some(DocumentRecord( - input - .lines - .iter() - .map(|s| trim_end_spaces(decoder.decode_string(&s.0, &warn))) - .collect(), - ))) - } -} - -trait TextRecord -where - Self: Sized, -{ - const NAME: &'static str; - fn parse(input: &str, warn: impl Fn(Error)) -> Result; -} - -#[derive(Clone, Debug)] -pub struct VariableSet { - pub name: String, - pub vars: Vec, -} - -impl VariableSet { - fn parse(input: &str) -> Result { - let (name, input) = input.split_once('=').ok_or(Error::TBD)?; - let vars = input.split_ascii_whitespace().map(String::from).collect(); - Ok(VariableSet { - name: name.into(), - vars, - }) - } -} - -trait WarnOnError { - fn warn_on_error(self, warn: &F) -> Option; -} -impl WarnOnError for Result { - fn warn_on_error(self, warn: &F) -> Option { - match self { - Ok(result) => Some(result), - Err(error) => { - warn(error); - None - } - } - } -} - -#[derive(Clone, Debug)] -pub struct ValueLabel { - pub value: Value, - pub label: String, -} - -#[derive(Clone, Debug)] -pub struct ValueLabelRecord { - pub var_type: VarType, - pub labels: Vec, - pub variables: Vec, -} - -impl TryDecode for ValueLabelRecord { - type Input<'a> = crate::raw::ValueLabelRecord, RawString>; - fn try_decode( - decoder: &mut Decoder, - input: &Self::Input<'_>, - warn: impl Fn(Error), - ) -> Result, Error> { - let variables: Vec<&Variable> = input - .dict_indexes - .iter() - .filter_map(|&dict_index| { - decoder - .get_var_by_index(dict_index as usize) - .warn_on_error(&warn) - }) - .filter(|&variable| match variable.width { - VarWidth::String(width) if width > 8 => { - warn(Error::InvalidLongStringValueLabel( - variable.short_name.clone(), - )); - false - } - _ => true, - }) - .collect(); - let mut i = variables.iter(); - let Some(&first_var) = i.next() else { - return Ok(None); - }; - let var_type: VarType = first_var.width.into(); - for &variable in i { - let this_type: VarType = variable.width.into(); - if var_type != this_type { - let (numeric_var, string_var) = match var_type { - VarType::Numeric => (first_var, variable), - VarType::String => (variable, first_var), - }; - warn(Error::ValueLabelsDifferentTypes { - numeric_var: numeric_var.short_name.clone(), - string_var: string_var.short_name.clone(), - }); - return Ok(None); - } - } - let labels = input - .labels - .iter() - .map(|raw::ValueLabel { value, label }| { - let label = decoder.decode_string(&label.0, &warn); - let value = Value::decode(value, decoder); - ValueLabel { value, label } - }) - .collect(); - let variables = variables - .iter() - .map(|&variable| variable.short_name.clone()) - .collect(); - Ok(Some(ValueLabelRecord { - var_type, - labels, - variables, - })) - } -} - -#[derive(Clone, Debug)] -pub struct VariableSetRecord(Vec); - -impl TextRecord for VariableSetRecord { - const NAME: &'static str = "variable set"; - fn parse(input: &str, warn: impl Fn(Error)) -> Result { - let mut sets = Vec::new(); - for line in input.lines() { - if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) { - sets.push(set) - } - } - Ok(VariableSetRecord(sets)) - } -} - -#[derive(Clone, Debug)] -pub struct LongName { - pub short_name: Identifier, - pub long_name: Identifier, -} - -impl LongName { - fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result { - let short_name = - Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidShortName)?; - let long_name = - Identifier::new(long_name, decoder.encoding).map_err(Error::InvalidLongName)?; - Ok(LongName { - short_name, - long_name, - }) - } -} - -#[derive(Clone, Debug)] -pub struct LongNameRecord(Vec); - -impl LongNameRecord { - pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result { - let mut names = Vec::new(); - for pair in input.split('\t').filter(|s| !s.is_empty()) { - if let Some((short_name, long_name)) = pair.split_once('=') { - if let Some(long_name) = - LongName::new(decoder, short_name, long_name).warn_on_error(&warn) - { - names.push(long_name); - } - } else { - warn(Error::TBD) - } - } - Ok(LongNameRecord(names)) - } -} - -#[derive(Clone, Debug)] -pub struct VeryLongString { - pub short_name: Identifier, - pub length: u16, -} - -impl VeryLongString { - fn parse(decoder: &Decoder, input: &str) -> Result { - let Some((short_name, length)) = input.split_once('=') else { - return Err(Error::TBD); - }; - let short_name = - Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidLongStringName)?; - let length: u16 = length.parse().map_err(|_| Error::TBD)?; - if length > VarWidth::MAX_STRING { - return Err(Error::TBD); - } - Ok(VeryLongString { short_name, length }) - } -} - -#[derive(Clone, Debug)] -pub struct VeryLongStringRecord(Vec); - -impl VeryLongStringRecord { - pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result { - let mut very_long_strings = Vec::new(); - for tuple in input - .split('\0') - .map(|s| s.trim_end_matches('\t')) - .filter(|s| !s.is_empty()) - { - if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) { - very_long_strings.push(vls) - } - } - Ok(VeryLongStringRecord(very_long_strings)) - } -} - -#[derive(Clone, Debug)] -pub struct Attribute { - pub name: Identifier, - pub values: Vec, -} - -impl Attribute { - fn parse<'a>( - decoder: &Decoder, - input: &'a str, - warn: &impl Fn(Error), - ) -> Result<(Option, &'a str), Error> { - let Some((name, mut input)) = input.split_once('(') else { - return Err(Error::TBD); - }; - let mut values = Vec::new(); - loop { - let Some((value, rest)) = input.split_once('\n') else { - return Err(Error::TBD); - }; - if let Some(stripped) = value - .strip_prefix('\'') - .and_then(|value| value.strip_suffix('\'')) - { - values.push(stripped.into()); - } else { - warn(Error::TBD); - values.push(value.into()); - } - if let Some(rest) = rest.strip_prefix(')') { - let attribute = Identifier::new(name, decoder.encoding) - .map_err(Error::InvalidAttributeName) - .warn_on_error(warn) - .map(|name| Attribute { name, values }); - return Ok((attribute, rest)); - }; - input = rest; - } - } -} - -#[derive(Clone, Debug)] -pub struct AttributeSet(pub Vec); - -impl AttributeSet { - fn parse<'a>( - decoder: &Decoder, - mut input: &'a str, - sentinel: Option, - warn: &impl Fn(Error), - ) -> Result<(AttributeSet, &'a str), Error> { - let mut attributes = Vec::new(); - let rest = loop { - match input.chars().next() { - None => break input, - c if c == sentinel => break &input[1..], - _ => { - let (attribute, rest) = Attribute::parse(decoder, input, &warn)?; - if let Some(attribute) = attribute { - attributes.push(attribute); - } - input = rest; - } - } - }; - Ok((AttributeSet(attributes), rest)) - } -} - -#[derive(Clone, Debug)] -pub struct FileAttributeRecord(AttributeSet); - -impl FileAttributeRecord { - pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result { - let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?; - if !rest.is_empty() { - warn(Error::TBD); - } - Ok(FileAttributeRecord(set)) - } -} - -#[derive(Clone, Debug)] -pub struct VarAttributeSet { - pub long_var_name: Identifier, - pub attributes: AttributeSet, -} - -impl VarAttributeSet { - fn parse<'a>( - decoder: &Decoder, - input: &'a str, - warn: &impl Fn(Error), - ) -> Result<(Option, &'a str), Error> { - let Some((long_var_name, rest)) = input.split_once(':') else { - return Err(Error::TBD); - }; - let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?; - let var_attribute = Identifier::new(long_var_name, decoder.encoding) - .map_err(Error::InvalidAttributeVariableName) - .warn_on_error(warn) - .map(|name| VarAttributeSet { - long_var_name: name, - attributes, - }); - Ok((var_attribute, rest)) - } -} - -#[derive(Clone, Debug)] -pub struct VariableAttributeRecord(Vec); - -impl VariableAttributeRecord { - pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result { - let mut var_attribute_sets = Vec::new(); - while !input.is_empty() { - let Some((var_attribute, rest)) = - VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn) - else { - break; - }; - if let Some(var_attribute) = var_attribute { - var_attribute_sets.push(var_attribute); - } - input = rest; - } - Ok(VariableAttributeRecord(var_attribute_sets)) - } -} - -#[derive(Clone, Debug)] -pub enum MultipleResponseType { - MultipleDichotomy { - value: Value, - labels: CategoryLabels, - }, - MultipleCategory, -} - -impl MultipleResponseType { - fn decode( - decoder: &Decoder, - mr_set: &Identifier, - input: &raw::MultipleResponseType, - min_width: VarWidth, - warn: &impl Fn(Error), - ) -> Result { - let mr_type = match input { - raw::MultipleResponseType::MultipleDichotomy { value, labels } => { - let value = decoder.decode_string_cow(&value.0, warn); - let value = match min_width { - VarWidth::Numeric => { - let number: f64 = value.trim().parse().map_err(|_| { - Error::InvalidMDGroupCountedValue { - mr_set: mr_set.clone(), - number: value.into(), - } - })?; - Value::Number(Some(number.into())) - } - VarWidth::String(max_width) => { - let value = value.trim_end_matches(' '); - let width = value.len(); - if width > max_width as usize { - return Err(Error::TooWideMDGroupCountedValue { - mr_set: mr_set.clone(), - value: value.into(), - width, - max_width, - }); - }; - Value::String(value.into()) - } - }; - MultipleResponseType::MultipleDichotomy { - value, - labels: *labels, - } - } - raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory, - }; - Ok(mr_type) - } -} - -#[derive(Clone, Debug)] -pub struct MultipleResponseSet { - pub name: Identifier, - pub min_width: VarWidth, - pub max_width: VarWidth, - pub label: String, - pub mr_type: MultipleResponseType, - pub dict_indexes: Vec, -} - - -#[derive(Clone, Debug)] -pub struct MultipleResponseRecord(pub Vec); - -impl TryDecode for MultipleResponseRecord { - type Input<'a> = raw::MultipleResponseRecord>; - - fn try_decode( - decoder: &mut Decoder, - input: &Self::Input<'_>, - warn: impl Fn(Error), - ) -> Result, Error> { - let mut sets = Vec::with_capacity(input.0.len()); - for set in &input.0 { - match MultipleResponseSet::decode(decoder, set, &warn) { - Ok(set) => sets.push(set), - Err(error) => warn(error), - } - } - Ok(Some(MultipleResponseRecord(sets))) - } -} - -#[derive(Clone, Debug)] -pub struct LongStringValueLabels { - pub var_name: Identifier, - pub width: VarWidth, - pub labels: Vec, -} - -impl LongStringValueLabels { - fn decode( - decoder: &Decoder, - input: &raw::LongStringValueLabels, - warn: &impl Fn(Error), - ) -> Result { - let var_name = decoder.decode_string(&input.var_name.0, warn); - let var_name = Identifier::new(var_name.trim_end(), decoder.encoding) - .map_err(Error::InvalidLongStringValueLabelName)?; - - let min_width = 9; - let max_width = VarWidth::MAX_STRING; - if input.width < 9 || input.width > max_width as u32 { - return Err(Error::InvalidLongValueLabelWidth { - name: var_name, - width: input.width, - min_width, - max_width, - }); - } - let width = input.width as u16; - - let mut labels = Vec::with_capacity(input.labels.len()); - for (value, label) in input.labels.iter() { - let value = Value::String(decoder.decode_exact_length(&value.0).into()); - let label = decoder.decode_string(&label.0, warn); - labels.push(ValueLabel { value, label }); - } - - Ok(LongStringValueLabels { - var_name, - width: VarWidth::String(width), - labels, - }) - } -} - -#[derive(Clone, Debug)] -pub struct LongStringValueLabelRecord(pub Vec); - -impl TryDecode for LongStringValueLabelRecord { - type Input<'a> = raw::LongStringValueLabelRecord; - - fn try_decode( - decoder: &mut Decoder, - input: &Self::Input<'_>, - warn: impl Fn(Error), - ) -> Result, Error> { - let mut labels = Vec::with_capacity(input.0.len()); - for label in &input.0 { - match LongStringValueLabels::decode(decoder, label, &warn) { - Ok(set) => labels.push(set), - Err(error) => warn(error), - } - } - Ok(Some(LongStringValueLabelRecord(labels))) - } -} - -#[cfg(test)] -mod test { - use encoding_rs::WINDOWS_1252; - - #[test] - fn test() { - let mut s = String::new(); - s.push(char::REPLACEMENT_CHARACTER); - let encoded = WINDOWS_1252.encode(&s).0; - let decoded = WINDOWS_1252.decode(&encoded[..]).0; - println!("{:?}", decoded); - } - - #[test] - fn test2() { - let charset: Vec = (0..=255).collect(); - println!("{}", charset.len()); - let decoded = WINDOWS_1252.decode(&charset[..]).0; - println!("{}", decoded.len()); - let encoded = WINDOWS_1252.encode(&decoded[..]).0; - println!("{}", encoded.len()); - assert_eq!(&charset[..], &encoded[..]); - } -} -*/ diff --git a/rust/pspp/src/format.rs b/rust/pspp/src/format.rs index d547bb94e5..378f771e7b 100644 --- a/rust/pspp/src/format.rs +++ b/rust/pspp/src/format.rs @@ -649,6 +649,7 @@ impl Display for UncheckedFormat { } } +#[derive(Clone, Debug)] pub struct Settings { pub epoch: Option, @@ -676,6 +677,7 @@ impl Default for Settings { /// A numeric output style. This can express numeric formats in /// [Category::Basic] and [Category::Custom]. +#[derive(Clone, Debug)] pub struct NumberStyle { pub neg_prefix: Affix, pub prefix: Affix, @@ -701,6 +703,7 @@ pub struct NumberStyle { pub extra_bytes: usize, } +#[derive(Clone, Debug)] pub struct Affix { /// String contents of affix. pub s: String, diff --git a/rust/pspp/src/output/mod.rs b/rust/pspp/src/output/mod.rs index 3c9392d3a6..f16da89f4f 100644 --- a/rust/pspp/src/output/mod.rs +++ b/rust/pspp/src/output/mod.rs @@ -4,6 +4,7 @@ use std::sync::Arc; use self::pivot::Value; pub mod pivot; +pub mod table; /// A single output item. pub struct Item { diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index 394d01537e..1ec18be1a1 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -57,15 +57,19 @@ use std::{ collections::HashMap, - ops::Range, - sync::{Arc, OnceLock}, + ops::{Index, Range}, + sync::{Arc, OnceLock, Weak}, }; use chrono::NaiveDateTime; +use enum_iterator::Sequence; use enum_map::{enum_map, Enum, EnumMap}; +use smallvec::{smallvec, SmallVec}; use crate::format::{Format, Settings as FormatSettings}; +pub mod output; + /// Areas of a pivot table for styling purposes. #[derive(Copy, Clone, Debug, Enum, PartialEq, Eq)] pub enum Area { @@ -86,8 +90,50 @@ pub enum Area { Layers, } +impl Area { + fn default_cell_style(self) -> CellStyle { + use HorzAlign::*; + use VertAlign::*; + let (halign, valign, hmargins, vmargins) = match self { + Area::Title => (Center, Middle, [8, 11], [1, 8]), + Area::Caption => (Left, Top, [8, 11], [1, 1]), + Area::Footer => (Left, Top, [11, 8], [2, 3]), + Area::Corner => (Left, Bottom, [8, 11], [1, 1]), + Area::ColumnLabels => (Left, Top, [8, 11], [1, 3]), + Area::RowLabels => (Left, Top, [8, 11], [1, 3]), + Area::Data => (Mixed, Top, [8, 11], [1, 1]), + Area::Layers => (Left, Bottom, [8, 11], [1, 3]), + }; + CellStyle { + horz_align: halign, + vert_align: valign, + margins: enum_map! { Axis2::X => hmargins, Axis2::Y => vmargins }, + } + } + + fn default_font_style(self) -> FontStyle { + FontStyle { + bold: self == Area::Title, + italic: false, + underline: false, + markup: false, + font: String::from("Sans Serif"), + fg: [Color::BLACK; 2], + bg: [Color::WHITE; 2], + size: 9, + } + } + + fn default_area_style(self) -> AreaStyle { + AreaStyle { + cell_style: self.default_cell_style(), + font_style: self.default_font_style(), + } + } +} + /// Table borders for styling purposes. -#[derive(Debug, Enum)] +#[derive(Copy, Clone, Debug, Enum)] pub enum Border { Title, OuterFrame(BoxBorder), @@ -98,8 +144,37 @@ pub enum Border { DataTop, } +impl Border { + fn default_stroke(self) -> Stroke { + match self { + Self::InnerFrame(_) | Self::DataLeft | Self::DataTop => Stroke::Thick, + Self::Dimensions(side) if side != RowColBorder::RowVert => Stroke::Solid, + Self::Categories(RowColBorder::ColHorz | RowColBorder::ColVert) => Stroke::Solid, + _ => Stroke::None, + } + } + fn default_border_style(self) -> BorderStyle { + BorderStyle { + stroke: self.default_stroke(), + color: Color::BLACK, + } + } + + fn fallback(self) -> Self { + match self { + Self::Title + | Self::OuterFrame(_) + | Self::InnerFrame(_) + | Self::DataLeft + | Self::DataTop + | Self::Categories(_) => self, + Self::Dimensions(row_col_border) => Self::Categories(row_col_border), + } + } +} + /// The borders on a box. -#[derive(Debug, Enum)] +#[derive(Copy, Clone, Debug, Enum)] pub enum BoxBorder { Left, Top, @@ -108,7 +183,7 @@ pub enum BoxBorder { } /// Borders between rows and columns. -#[derive(Debug, Enum, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq)] pub enum RowColBorder { RowHorz, RowVert, @@ -120,7 +195,7 @@ pub enum RowColBorder { /// /// The comments below talk about columns and their widths but they apply /// equally to rows and their heights. -#[derive(Default)] +#[derive(Default, Clone, Debug)] pub struct Sizing { /// Specific column widths, in 1/96" units. widths: Vec, @@ -133,15 +208,25 @@ pub struct Sizing { keeps: Vec>, } -#[derive(Enum)] +#[derive(Copy, Clone, Debug, Enum, Sequence)] pub enum Axis3 { X, Y, Z, } +impl Axis3 { + fn transpose(&self) -> Option { + match self { + Axis3::X => Some(Axis3::Y), + Axis3::Y => Some(Axis3::X), + Axis3::Z => None, + } + } +} + /// An axis within a pivot table. -#[derive(Default)] +#[derive(Clone, Debug, Default)] pub struct Axis { /// `dimensions[0]` is the innermost dimension. dimensions: Vec, @@ -154,13 +239,15 @@ pub struct Axis { label_depth: usize, } -pub struct AxisCursor<'a> { +pub struct AxisIterator<'a> { axis: &'a Axis, - indexes: Vec, + indexes: SmallVec<[usize; 4]>, } -impl<'a> AxisCursor<'a> { - fn next(&'a mut self) -> Option<&'a Vec> { +impl<'a> Iterator for AxisIterator<'a> { + type Item = SmallVec<[usize; 4]>; + + fn next(&mut self) -> Option { if self.indexes.is_empty() { if self .axis @@ -170,13 +257,13 @@ impl<'a> AxisCursor<'a> { { return None; } - self.indexes = vec![0; self.axis.dimensions.len()]; - Some(&self.indexes) + self.indexes = smallvec![0; self.axis.dimensions.len()]; + Some(self.indexes.clone()) } else { for (index, dimension) in self.indexes.iter_mut().zip(self.axis.dimensions.iter()) { *index += 1; if *index < dimension.len() { - return Some(&self.indexes); + return Some(self.indexes.clone()); }; *index = 0 } @@ -186,10 +273,10 @@ impl<'a> AxisCursor<'a> { } impl Axis { - fn cursor(&self) -> AxisCursor { - AxisCursor { + fn iter(&self) -> AxisIterator { + AxisIterator { axis: self, - indexes: Vec::new(), + indexes: SmallVec::new(), } } } @@ -205,6 +292,7 @@ impl Axis { /// (A dimension or a group can contain zero categories, but this is unusual. /// If a dimension contains no categories, then its table cannot contain any /// data.) +#[derive(Clone, Debug)] pub struct Dimension { axis_type: Axis3, level: usize, @@ -251,7 +339,9 @@ impl Dimension { } } +#[derive(Clone, Debug)] pub struct Group { + parent: Option>, name: Value, label_depth: usize, extra_depth: usize, @@ -268,7 +358,9 @@ pub struct Group { show_label_in_corner: bool, } +#[derive(Clone, Debug)] pub struct Leaf { + parent: Weak, name: Value, label_depth: usize, extra_depth: usize, @@ -285,15 +377,48 @@ pub struct Leaf { } /// A pivot_category is a leaf (a category) or a group. +#[derive(Clone, Debug)] pub enum Category { Group(Arc), Leaf(Arc), } +impl Category { + fn is_leaf(&self) -> bool { + match self { + Category::Group(_) => false, + Category::Leaf(_) => true, + } + } + + fn is_group(&self) -> bool { + match self { + Category::Group(_) => true, + Category::Leaf(_) => false, + } + } + + fn show_label(&self) -> bool { + match self { + Category::Group(group) => group.show_label, + Category::Leaf(_) => true, + } + } + + fn ptr_eq(&self, other: &Self) -> bool { + match (self, other) { + (Category::Group(a), Category::Group(b)) => Arc::ptr_eq(a, b), + (Category::Leaf(a), Category::Leaf(b)) => Arc::ptr_eq(a, b), + _ => false, + } + } +} + trait CategoryTrait { fn name(&self) -> &Value; fn label_depth(&self) -> usize; fn extra_depth(&self) -> usize; + fn parent(&self) -> Option>; } impl CategoryTrait for Group { @@ -308,6 +433,10 @@ impl CategoryTrait for Group { fn extra_depth(&self) -> usize { self.extra_depth } + + fn parent(&self) -> Option> { + self.parent.as_ref().and_then(|parent| parent.upgrade()) + } } impl CategoryTrait for Leaf { @@ -322,6 +451,10 @@ impl CategoryTrait for Leaf { fn extra_depth(&self) -> usize { self.extra_depth } + + fn parent(&self) -> Option> { + self.parent.upgrade() + } } impl CategoryTrait for Category { @@ -345,6 +478,13 @@ impl CategoryTrait for Category { Category::Leaf(leaf) => leaf.extra_depth(), } } + + fn parent(&self) -> Option> { + match self { + Category::Group(group) => group.parent(), + Category::Leaf(leaf) => leaf.parent(), + } + } } /// Styling for a pivot table. @@ -352,6 +492,7 @@ impl CategoryTrait for Category { /// The division between this and the style information in [Table] seems fairly /// arbitrary. The ultimate reason for the division is simply because that's /// how SPSS documentation and file formats do it. +#[derive(Clone, Debug)] struct Look { name: Option, @@ -401,53 +542,10 @@ impl Default for Look { row_labels_in_corner: true, row_heading_widths: 36..72, col_heading_widths: 36..120, - footnote_marker_type: FootnoteMarkerType::Alphabetic, - footnote_marker_position: FootnoteMarkerPosition::Subscript, - areas: EnumMap::from_fn(|area| { - use HorzAlign::*; - use VertAlign::*; - let (halign, valign, hmargins, vmargins) = match area { - Area::Title => (Center, Middle, [8, 11], [1, 8]), - Area::Caption => (Left, Top, [8, 11], [1, 1]), - Area::Footer => (Left, Top, [11, 8], [2, 3]), - Area::Corner => (Left, Bottom, [8, 11], [1, 1]), - Area::ColumnLabels => (Left, Top, [8, 11], [1, 3]), - Area::RowLabels => (Left, Top, [8, 11], [1, 3]), - Area::Data => (Mixed, Top, [8, 11], [1, 1]), - Area::Layers => (Left, Bottom, [8, 11], [1, 3]), - }; - AreaStyle { - cell_style: CellStyle { - horz_align: halign, - vert_align: valign, - margins: enum_map! { Axis2::X => hmargins, Axis2::Y => vmargins }, - }, - font_style: FontStyle { - bold: area == Area::Title, - italic: false, - underline: false, - markup: false, - font: String::from("Sans Serif"), - fg: [Color::BLACK; 2], - bg: [Color::WHITE; 2], - size: 9, - }, - } - }), - borders: EnumMap::from_fn(|border| { - let stroke = match border { - Border::InnerFrame(_) | Border::DataLeft | Border::DataTop => Stroke::Thick, - Border::Dimensions(side) if side != RowColBorder::RowVert => Stroke::Solid, - Border::Categories(RowColBorder::ColHorz | RowColBorder::ColVert) => { - Stroke::Solid - } - _ => Stroke::None, - }; - BorderStyle { - stroke, - color: Color::BLACK, - } - }), + footnote_marker_type: FootnoteMarkerType::default(), + footnote_marker_position: FootnoteMarkerPosition::default(), + areas: EnumMap::from_fn(Area::default_area_style), + borders: EnumMap::from_fn(Border::default_border_style), print_all_layers: false, paginate_layers: false, shrink_to_fit: EnumMap::from_fn(|_| false), @@ -466,11 +564,13 @@ impl Look { } } +#[derive(Clone, Debug)] pub struct AreaStyle { cell_style: CellStyle, font_style: FontStyle, } +#[derive(Clone, Debug)] pub struct CellStyle { horz_align: HorzAlign, vert_align: VertAlign, @@ -484,6 +584,7 @@ pub struct CellStyle { margins: EnumMap, } +#[derive(Copy, Clone, Debug)] pub enum HorzAlign { /// Right aligned. Right, @@ -507,6 +608,7 @@ pub enum HorzAlign { }, } +#[derive(Copy, Clone, Debug)] pub enum VertAlign { /// Top alignment. Top, @@ -518,6 +620,7 @@ pub enum VertAlign { Bottom, } +#[derive(Clone, Debug)] pub struct FontStyle { bold: bool, italic: bool, @@ -531,6 +634,7 @@ pub struct FontStyle { size: i32, } +#[derive(Copy, Clone, Debug)] pub struct Color { alpha: u8, r: u8, @@ -552,11 +656,13 @@ impl Color { } } +#[derive(Copy, Clone, Debug)] pub struct BorderStyle { stroke: Stroke, color: Color, } +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Stroke { None, Solid, @@ -566,30 +672,92 @@ pub enum Stroke { Double, } -/// An axis of a flat table. -#[derive(Debug, Enum)] +impl Stroke { + /// Returns a stroke that "combines" the two arguments, that is, that gives + /// a reasonable stroke choice for a rule for different reasons should have + /// both styles. + fn combine(self, other: Stroke) -> Self { + self.max(other) + } +} + +/// An axis of a 2-dimensional table. +#[derive(Copy, Clone, Debug, Enum)] pub enum Axis2 { X, Y, } +impl Axis2 { + pub fn opposite(self) -> Self { + match self { + Self::X => Self::Y, + Self::Y => Self::X, + } + } +} + +/// A 2-dimensional `(x,y)` pair. +pub struct Coord2(pub EnumMap); + +impl Coord2 { + pub fn new(x: usize, y: usize) -> Self { + use Axis2::*; + Self(enum_map! { + X => x, + Y => y + }) + } + + pub fn x(&self) -> usize { + self.0[Axis2::X] + } + + pub fn y(&self) -> usize { + self.0[Axis2::Y] + } + + pub fn get(&self, axis: Axis2) -> usize { + self.0[axis] + } +} + +impl From> for Coord2 { + fn from(value: EnumMap) -> Self { + Self(value) + } +} + +impl Index for Coord2 { + type Output = usize; + + fn index(&self, index: Axis2) -> &Self::Output { + &self.0[index] + } +} + +#[derive(Copy, Clone, Debug, Default)] pub enum FootnoteMarkerType { /// a, b, c, ... + #[default] Alphabetic, /// 1, 2, 3, ... Numeric, } +#[derive(Copy, Clone, Debug, Default)] pub enum FootnoteMarkerPosition { /// Subscripts. + #[default] Subscript, /// Superscripts. Superscript, } -pub struct Table { +#[derive(Clone, Debug)] +pub struct PivotTable { look: Arc, rotate_inner_column_labels: bool, @@ -644,7 +812,7 @@ pub struct Table { cells: HashMap, } -impl Table { +impl PivotTable { fn new() -> Self { Self { look: Look::shared_default(), @@ -697,9 +865,28 @@ impl Table { fn get(&self, data_indexes: &[usize]) -> Option<&Value> { self.cells.get(&self.cell_index(data_indexes)) } + + /// Converts per-axis presentation-order indexes in `presentation_indexes`, + /// into data indexes for each dimension. + fn convert_indexes_ptod( + &self, + presentation_indexes: EnumMap, + ) -> SmallVec<[usize; 4]> { + let mut data_indexes = SmallVec::with_capacity(self.dimensions.len()); + for i in enum_iterator::all::() { + let axis = &self.axes[i]; + for (j, dimension) in axis.dimensions.iter().enumerate() { + let pindex = presentation_indexes[i][j]; + data_indexes[dimension.top_index] = + dimension.presentation_leaves[pindex].data_index; + } + } + data_indexes + } } /// Whether to show variable or value labels or the underlying value or variable name. +#[derive(Copy, Clone, Debug)] pub enum ValueShow { /// Value or variable name only. Value, @@ -711,6 +898,7 @@ pub enum ValueShow { Both, } +#[derive(Clone, Debug)] pub struct Footnote { content: Value, marker: Value, @@ -756,11 +944,13 @@ pub struct Footnote { /// /// 5. A template. PSPP doesn't create these itself yet, but it can read and /// interpret those created by SPSS. +#[derive(Clone, Debug)] pub struct Value { styling: Option>, inner: ValueInner, } +#[derive(Clone, Debug)] pub enum ValueInner { Number { show: ValueShow, @@ -798,6 +988,7 @@ pub enum ValueInner { }, } +#[derive(Clone, Debug)] pub struct ValueStyle { font_style: FontStyle, cell_style: CellStyle, diff --git a/rust/pspp/src/output/pivot/output.rs b/rust/pspp/src/output/pivot/output.rs new file mode 100644 index 0000000000..f9d4dc2f23 --- /dev/null +++ b/rust/pspp/src/output/pivot/output.rs @@ -0,0 +1,370 @@ +use std::sync::Arc; + +use enum_map::{enum_map, EnumMap}; +use smallvec::{SmallVec, ToSmallVec}; + +use crate::output::table::Table; + +use super::{ + Axis, Axis2, Axis3, Border, BorderStyle, Category, CategoryTrait, Color, Coord2, Dimension, + PivotTable, Stroke, +}; + +/// All of the combinations of dimensions along an axis. +struct AxisEnumeration { + indexes: Vec, + stride: usize, + position: usize, +} + +impl AxisEnumeration { + fn len(&self) -> usize { + self.indexes.len() / self.stride + } + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn get(&self, index: usize) -> &[usize] { + let start = self.stride * index; + &self.indexes[start..start + self.stride] + } +} + +impl Iterator for AxisEnumeration { + type Item = SmallVec<[usize; 4]>; + + fn next(&mut self) -> Option { + if self.position < self.indexes.len() { + let item = (&self.indexes[self.position..self.position + self.stride]).to_smallvec(); + self.position += self.stride; + Some(item) + } else { + None + } + } +} + +impl PivotTable { + fn is_row_empty( + &self, + layer_indexes: &[usize], + fixed_indexes: &[usize], + fixed_axis: Axis3, + ) -> bool { + let vary_axis = fixed_axis.transpose().unwrap(); + let mut cursor2 = self.axes[vary_axis].iter(); + while let Some(vary_indexes) = cursor2.next() { + let mut presentation_indexes = enum_map! { + Axis3::Z => layer_indexes, + _ => fixed_indexes, + }; + presentation_indexes[vary_axis] = &vary_indexes; + let data_indexes = self.convert_indexes_ptod(presentation_indexes); + if self.get(&data_indexes).is_some() { + return false; + } + } + true + } + fn enumerate_axis( + &self, + enum_axis: Axis3, + layer_indexes: &[usize], + omit_empty: bool, + ) -> AxisEnumeration { + let axis = &self.axes[enum_axis]; + let indexes = if axis.dimensions.is_empty() { + vec![0] + } else if axis.extent == 0 { + vec![] + } else { + let mut enumeration = + Vec::with_capacity(axis.extent.checked_mul(axis.dimensions.len()).unwrap()); + if omit_empty { + for axis_indexes in axis.iter() { + if !self.is_row_empty(layer_indexes, &axis_indexes, enum_axis) { + enumeration.extend_from_slice(&axis_indexes); + } + } + } + + if enumeration.is_empty() { + for axis_indexes in axis.iter() { + enumeration.extend_from_slice(&axis_indexes); + } + } + enumeration + }; + AxisEnumeration { + indexes, + stride: axis.dimensions.len().max(1), + position: 0, + } + } + + pub fn output(&self, layer_indexes: &[usize], printing: bool) { + let column_enumeration = self.enumerate_axis(Axis3::X, layer_indexes, self.look.omit_empty); + let row_enumeration = self.enumerate_axis(Axis3::Y, layer_indexes, self.look.omit_empty); + let data = Coord2::new(column_enumeration.len(), row_enumeration.len()); + let stub = Coord2::new( + self.axes[Axis3::Y].label_depth, + self.axes[Axis3::X].label_depth, + ); + let borders = EnumMap::from_fn(|border| { + resolve_border_style(border, &self.look.borders, printing && self.show_grid_lines) + }); + let n = EnumMap::from_fn(|axis| data[axis] + stub[axis]).into(); + let table = Table::new(n, stub, self.look.areas.clone(), borders); + } +} + +fn find_category<'a>( + d: &'a Dimension, + dim_index: usize, + indexes: &[usize], + mut row_ofs: usize, +) -> Option { + let index = indexes[dim_index]; + let mut c = Category::Leaf(Arc::clone(&d.presentation_leaves[index])); + while row_ofs != c.extra_depth() { + row_ofs = row_ofs.checked_sub(1 + c.extra_depth())?; + c = Category::Group(Arc::clone(&c.parent()?)); + } + Some(c) +} + +/// Fills row or column headings into T. +/// +/// This function uses terminology and variable names for column headings, but +/// it also applies to row headings because it uses variables for the +/// differences, e.g. when for column headings it would use the H axis, it +/// instead uses 'h', which is set to H for column headings and V for row +/// headings. +fn compose_headings( + table: &mut Table, + h_axis: &Axis, + h: Axis2, + v_axis: &Axis, + column_enumeration: &AxisEnumeration, + dim_col_horz: Border, + dim_col_vert: Border, + cat_col_horz: Border, + cat_col_vert: Border, + rotate_inner_labels: bool, + rotate_outer_labels: bool, +) { + let v = h.opposite(); + let v_size = h_axis.label_depth; + let h_ofs = v_axis.label_depth; + let n_columns = column_enumeration.len(); + + if h_axis.dimensions.is_empty() || n_columns == 0 || v_size == 0 { + return; + } + + // Below, we're going to iterate through the dimensions. Each dimension + // occupies one or more rows in the heading. `top_row` is the top row of + // these (and `top_row + d->label_depth - 1` is the bottom row). + let mut top_row = 0; + + // We're going to iterate through dimensions and the rows that label them + // from top to bottom (from outer to inner dimensions). As we move + // downward, we start drawing vertical rules to separate categories and + // groups. After we start drawing a vertical rule in a particular + // horizontal position, it continues until the bottom of the heading. + // vrules[pos] indicates whether, in our current row, we have already + // started drawing a vertical rule in horizontal position `pos`. (There are + // n_columns + 1 horizontal positions. We allocate all of them for + // convenience below but only the inner `n_columns - 1` of them really + // matter.) + // + // Here's an example that shows how vertical rules continue all the way + // downward: + // + // ```text + // +-----------------------------------------------------+ __ + // | bbbb | | + // +-----------------+-----------------+-----------------+ |dimension "bbbb" + // | bbbb1 | bbbb2 | bbbb3 | _| + // +-----------------+-----------------+-----------------+ __ + // | aaaa | aaaa | aaaa | | + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ |dimension "aaaa" + // |aaaa1|aaaa2|aaaa3|aaaa1|aaaa2|aaaa3|aaaa1|aaaa2|aaaa3| _| + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ + // + // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ + // | | | | | | | | | | + // 0 1 2 3 4 5 6 7 8 9 + // |___________________vrules[] indexes__________________| + // ``` + // + // Our data structures are more naturally iterated from bottom to top (inner + // to outer dimensions). A previous version of this code actually worked + // like that, but it didn't draw all of the vertical lines correctly as + // shown above. It ended up rendering the above heading much like shown + // below, which isn't what users expect. The "aaaa" label really needs to + // be shown three times for clarity: + // + // ```text + // +-----------------------------------------------------+ + // | bbbb | + // +-----------------+-----------------+-----------------+ + // | bbbb1 | bbbb2 | bbbb3 | + // +-----------------+-----------------+-----------------+ + // | | aaaa | | + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ + // |aaaa1|aaaa2|aaaa3|aaaa1|aaaa2|aaaa3|aaaa1|aaaa2|aaaa3| + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ + // ``` + let mut vrules = vec![false; n_columns + 1]; + vrules[0] = true; + vrules[n_columns] = true; + for dim_index in (0..h_axis.dimensions.len()).rev() { + let d = &h_axis.dimensions[dim_index]; + if d.hide_all_labels { + continue; + } + + for row_ofs in 0..d.label_depth { + let mut x1 = 0; + while x1 < n_columns { + let Some(c): Option = find_category( + d, + dim_index, + column_enumeration.get(x1), + d.label_depth - row_ofs - 1, + ) else { + x1 += 1; + continue; + }; + + let mut x2 = x1 + 1; + while x2 < n_columns && !vrules[x2] { + let c2 = find_category( + d, + dim_index, + column_enumeration.get(x2), + d.label_depth - row_ofs - 1, + ); + if c2.as_ref().is_none_or(|c2| !c.ptr_eq(c2)) { + break; + } + x2 += 1; + } + + let y1 = top_row + row_ofs; + let y2 = y1 + c.extra_depth() + 1; + let is_outer_row = y1 == 0; + let is_inner_row = y2 == v_size; + if c.show_label() { + let bb = enum_map! { + Axis2::X => x1 + h_ofs..x2 + h_ofs + 1, + Axis2::Y => y1..y2 - 1, + }; + let rotate = (rotate_inner_labels && is_inner_row) + || (rotate_outer_labels && is_outer_row); + // fill_cell + + // Draw all the vertical lines in our running example, other + // than the far left and far right ones. Only the ones that + // start in the last row of the heading are drawn with the + // "category" style, the rest with the "dimension" style, + // e.g. only the # below are category style: + // + // ```text + // +-----------------------------------------------------+ + // | bbbb | + // +-----------------+-----------------+-----------------+ + // | bbbb1 | bbbb2 | bbbb3 | + // +-----------------+-----------------+-----------------+ + // | aaaa | aaaa | aaaa | + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ + // |aaaa1#aaaa2#aaaa3|aaaa1#aaaa2#aaaa3|aaaa1#aaaa2#aaaa3| + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ + // ``` + let style = if y1 == v_size - 1 { + cat_col_vert + } else { + dim_col_vert + }; + if !vrules[x2] { + // draw line + vrules[x2] = true; + } + if !vrules[x1] { + // draw line + vrules[x1] = true; + } + + // Draws the horizontal lines within a dimension, that is, + // those that separate a category (or group) from its parent + // group or dimension's label. Our running example doesn't + // have groups but the `====` lines below show the + // separators between categories and their dimension label: + // + // ```text + // +-----------------------------------------------------+ + // | bbbb | + // +=================+=================+=================+ + // | bbbb1 | bbbb2 | bbbb3 | + // +-----------------+-----------------+-----------------+ + // | aaaa | aaaa | aaaa | + // +=====+=====+=====+=====+=====+=====+=====+=====+=====+ + // |aaaa1|aaaa2|aaaa3|aaaa1|aaaa2|aaaa3|aaaa1|aaaa2|aaaa3| + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ + // ``` + if c.parent().is_some_and(|parent| parent.show_label) { + // draw line + } + x1 = x2; + } + } + + if d.root.show_label_in_corner && h_ofs > 0 { + // fill cell + } + + // Draw the horizontal line between dimensions, e.g. the `=====` + // line here: + // + // ```text + // +-----------------------------------------------------+ __ + // | bbbb | | + // +-----------------+-----------------+-----------------+ |dim "bbbb" + // | bbbb1 | bbbb2 | bbbb3 | _| + // +=================+=================+=================+ __ + // | aaaa | aaaa | aaaa | | + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ |dim "aaaa" + // |aaaa1|aaaa2|aaaa3|aaaa1|aaaa2|aaaa3|aaaa1|aaaa2|aaaa3| _| + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+ + // ``` + if dim_index != h_axis.dimensions.len() - 1 { + // draw lines + } + top_row += d.label_depth; + } + } +} + +fn resolve_border_style( + border: Border, + borders: &EnumMap, + show_grid_lines: bool, +) -> BorderStyle { + let style = borders[border]; + if style.stroke != Stroke::None { + style + } else { + let style = borders[border.fallback()]; + if style.stroke != Stroke::None || !show_grid_lines { + style + } else { + BorderStyle { + stroke: Stroke::Dashed, + color: Color::BLACK, + } + } + } +} diff --git a/rust/pspp/src/output/table.rs b/rust/pspp/src/output/table.rs new file mode 100644 index 0000000000..5f9366bd2a --- /dev/null +++ b/rust/pspp/src/output/table.rs @@ -0,0 +1,121 @@ +//! Tables. +//! +//! A table is a rectangular grid of cells. Cells can be joined to form larger +//! cells. Rows and columns can be separated by rules of various types. Rows +//! at the top and bottom of a table and columns at the left and right edges of +//! a table can be designated as headers, which means that if the table must be +//! broken across more than one page, those rows or columns are repeated on each +//! page. +//! +//! Some drivers use tables as an implementation detail of rendering pivot +//! tables. + +use std::{ops::Range, sync::Arc}; + +use enum_map::{enum_map, EnumMap}; + +use crate::output::pivot::Coord2; + +use super::pivot::{Area, AreaStyle, Axis2, Border, BorderStyle, CellStyle, FontStyle, Value}; + +#[derive(Clone)] +pub enum Content { + Empty, + Value(Box), + Join(Arc), +} + +#[derive(Clone)] +pub struct Cell { + inner: CellInner, + + /// Occupied table region. + region: EnumMap>, + font_style: Option>, + cell_style: Option>, +} + +impl Cell { + fn new(inner: CellInner, region: EnumMap>) -> Self { + Self { + inner, + region, + font_style: None, + cell_style: None, + } + } +} + +#[derive(Clone)] +pub struct CellInner { + /// Rotate cell contents 90 degrees? + rotate: bool, + + /// An index into `styles` in the `Table`. + style_idx: u8, + + value: Value, +} + +/// A table. +pub struct Table { + /// Number of rows and columns. + n: Coord2, + + /// Table header rows and columns. + headers: Coord2, + + contents: Vec, + + /// Styles for areas of the table. + areas: EnumMap, + + /// Styles for borders in the table. + borders: EnumMap, + + /// Horizontal and vertical rules. + rules: EnumMap>, +} + +impl Table { + pub fn new( + n: Coord2, + headers: Coord2, + areas: EnumMap, + borders: EnumMap, + ) -> Self { + let nr = n[Axis2::Y]; + let nc = n[Axis2::X]; + Self { + n, + headers, + contents: vec![Content::Empty; nr * nc], + areas, + borders, + rules: enum_map! { + Axis2::X => vec![Border::Title; (nr + 1) * nc], + Axis2::Y => vec![Border::Title; nr * (nc + 1)], + }, + } + } + + pub fn offset(&self, pos: Coord2) -> usize { + pos.x() + self.n.x() * pos.y() + } + + pub fn put(&mut self, region: EnumMap>, inner: CellInner) { + use Axis2::*; + if region[X].len() == 1 && region[Y].len() == 1 { + let offset = self.offset(Coord2::new(region[X].start, region[Y].start)); + self.contents[offset] = Content::Value(Box::new(inner)); + } else { + let cell = Arc::new(Cell::new(inner, region.clone())); + for y in region[Y].clone() { + for x in region[X].clone() { + let offset = self.offset(Coord2::new(x, y)); + self.contents[offset] = Content::Join(cell.clone()) + } + } + } + } +} diff --git a/rust/pspp/src/raw.rs b/rust/pspp/src/raw.rs index 9da9ea5a77..8db959052b 100644 --- a/rust/pspp/src/raw.rs +++ b/rust/pspp/src/raw.rs @@ -5,7 +5,7 @@ use crate::{ identifier::{Error as IdError, Identifier}, }; -use encoding_rs::{mem::decode_latin1, DecoderResult, Encoding}; +use encoding_rs::{mem::decode_latin1, Encoding}; use flate2::read::ZlibDecoder; use num::Integer; use std::{ @@ -14,7 +14,6 @@ use std::{ collections::{HashMap, VecDeque}, fmt::{Debug, Display, Formatter, Result as FmtResult}, io::{Error as IoError, Read, Seek, SeekFrom}, - iter::repeat, mem::take, num::NonZeroU8, ops::Range, @@ -553,40 +552,6 @@ impl Decoder { self.decode_slice(input.0.as_slice()) } - /// Returns `input` decoded from `self.encoding` into UTF-8 such that - /// re-encoding the result back into `self.encoding` will have exactly the - /// same length in bytes. - /// - /// XXX warn about errors? - pub fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> { - if let (s, false) = self.encoding.decode_without_bom_handling(input) { - // This is the common case. Usually there will be no errors. - s - } else { - // Unusual case. Don't bother to optimize it much. - let mut decoder = self.encoding.new_decoder_without_bom_handling(); - let mut output = String::with_capacity( - decoder - .max_utf8_buffer_length_without_replacement(input.len()) - .unwrap(), - ); - let mut rest = input; - while !rest.is_empty() { - match decoder.decode_to_string_without_replacement(rest, &mut output, true) { - (DecoderResult::InputEmpty, _) => break, - (DecoderResult::OutputFull, _) => unreachable!(), - (DecoderResult::Malformed(a, b), consumed) => { - let skipped = a as usize + b as usize; - output.extend(repeat('?').take(skipped)); - rest = &rest[consumed..]; - } - } - } - assert_eq!(self.encoding.encode(&output).0.len(), input.len()); - output.into() - } - } - pub fn decode_identifier(&self, input: &RawString) -> Result { self.new_identifier(&self.decode(input)) } -- 2.30.2