[[package]]
name = "quick-xml"
-version = "0.37.5"
+version = "0.38.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb"
+checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
dependencies = [
"memchr",
"serde",
smallstr = "0.3.0"
itertools = "0.14.0"
unicode-linebreak = "0.1.5"
-quick-xml = { version = "0.37.2", features = ["serialize"] }
+quick-xml = { version = "0.38.4", features = ["serialize"] }
serde = { version = "1.0.218", features = ["derive", "rc"] }
color = { version = "0.2.3", features = ["serde"] }
binrw = "0.14.1"
let (body, suffixes) = cell.display().split_suffixes();
let horz_align = cell.horz_align(&body);
- let body = body.to_string();
+
+ let mut attrs = None;
+ let mut body = if let Some(markup) = body.markup() {
+ match parse_markup(markup, 0 as char) {
+ Ok((markup_attrs, string, _accel)) => {
+ attrs = Some(markup_attrs);
+ string.into()
+ }
+ Err(_) => String::from(markup),
+ }
+ } else {
+ avoid_decimal_split(body.to_string())
+ };
match horz_align {
HorzAlign::Decimal { offset, decimal } if !cell.rotate => {
_ => (),
}
- let mut attrs = None;
- let mut body = if cell.font_style.markup {
- match parse_markup(&body, 0 as char) {
- Ok((markup_attrs, string, _accel)) => {
- attrs = Some(markup_attrs);
- string.into()
- }
- Err(_) => body,
- }
- } else {
- avoid_decimal_split(body)
- };
-
if cell.font_style.underline {
attrs
.get_or_insert_default()
)
.write_options(writer, endian, args)?;
}
+ ValueInner::Markup(markup) => {
+ (
+ 3u8,
+ SpvString(&markup.xml), // XXX
+ ValueMod::new(self),
+ SpvString(&markup.xml),
+ SpvString(&markup.xml),
+ SpvBool(true),
+ )
+ .write_options(writer, endian, args)?;
+ }
ValueInner::Text(text) => {
(
3u8,
use enum_map::{Enum, EnumMap, enum_map};
use itertools::Itertools;
pub use look_xml::{Length, TableProperties};
-use quick_xml::{DeError, de::from_str};
+use quick_xml::{DeError, de::from_str, escape::resolve_xml_entity, events::Event};
use serde::{
Deserialize, Serialize, Serializer,
de::Visitor,
pub fn as_group(&self) -> Option<&Group> {
match self {
Category::Group(group) => Some(group),
- Category::Leaf(leaf) => None,
+ Category::Leaf(_) => None,
}
}
pub fn as_group_mut(&mut self) -> Option<&mut Group> {
match self {
Category::Group(group) => Some(group),
- Category::Leaf(leaf) => None,
+ Category::Leaf(_) => None,
}
}
pub fn as_leaf(&self) -> Option<&Leaf> {
match self {
- Category::Group(group) => None,
Category::Leaf(leaf) => Some(leaf),
+ Category::Group(_) => None,
}
}
pub fn as_leaf_mut(&mut self) -> Option<&mut Leaf> {
match self {
- Category::Group(group) => None,
Category::Leaf(leaf) => Some(leaf),
+ Category::Group(_) => None,
}
}
}
}
- fn index_path(&self, index: usize, mut path: IndexVec) -> Option<IndexVec> {
+ fn index_path(&self, index: usize, path: IndexVec) -> Option<IndexVec> {
match self {
Category::Group(group) => group.index_path(index, path),
- Category::Leaf(leaf) if index == 0 => Some(path),
+ Category::Leaf(_) if index == 0 => Some(path),
_ => None,
}
}
pub bold: bool,
pub italic: bool,
pub underline: bool,
- pub markup: bool,
pub font: String,
pub fg: Color,
pub bg: Color,
bold: false,
italic: false,
underline: false,
- markup: false,
font: String::from("Sans Serif"),
fg: Color::BLACK,
bg: Color::WHITE,
pub fn with_underline(self, underline: bool) -> Self {
Self { underline, ..self }
}
- pub fn with_markup(self, markup: bool) -> Self {
- Self { markup, ..self }
- }
pub fn with_font(self, font: impl Into<String>) -> Self {
Self {
font: font.into(),
ValueInner::String(string_value) => string_value.s.serialize(serializer),
ValueInner::Variable(variable_value) => variable_value.var_name.serialize(serializer),
ValueInner::Text(text_value) => text_value.localized.serialize(serializer),
+ ValueInner::Markup(markup) => markup.xml.serialize(serializer), /*XXX*/
ValueInner::Template(template_value) => template_value.localized.serialize(serializer),
ValueInner::Empty => serializer.serialize_none(),
}
localized,
}))
}
+ pub fn new_markup(s: impl Into<String>) -> Self {
+ Self::new(ValueInner::Markup(Markup { xml: s.into() }))
+ }
pub fn new_user_text(s: impl Into<String>) -> Self {
let s: String = s.into();
if s.is_empty() {
pub struct DisplayValue<'a> {
inner: &'a ValueInner,
- markup: bool,
subscripts: &'a [String],
footnotes: &'a [Arc<Footnote>],
options: ValueOptions,
}
}
+ pub fn markup(&self) -> Option<&str> {
+ self.inner.markup()
+ }
+
/// Returns this display split into `(body, suffixes)` where `suffixes` is
/// subscripts and footnotes and `body` is everything else.
pub fn split_suffixes(self) -> (Self, Self) {
}
pub fn with_styling(mut self, styling: &'a ValueStyle) -> Self {
- if let Some(font_style) = &styling.font_style {
- self.markup = font_style.markup;
- }
self.subscripts = styling.subscripts.as_slice();
self.footnotes = styling.footnotes.as_slice();
self
}
- pub fn with_font_style(self, font_style: &FontStyle) -> Self {
- Self {
- markup: font_style.markup,
- ..self
- }
- }
-
pub fn with_subscripts(self, subscripts: &'a [String]) -> Self {
Self { subscripts, ..self }
}
}
}
- ValueInner::Text(TextValue {
- localized: local, ..
- }) => {
- if self.markup {
- dbg!(local);
+ ValueInner::Markup(Markup { xml }) => {
+ let mut reader = quick_xml::Reader::from_str(xml.as_str());
+ while let Ok(event) = reader.read_event() {
+ match event {
+ Event::Text(bytes_text) => {
+ f.write_str(&bytes_text.decode().unwrap())?;
+ }
+ Event::GeneralRef(bytes) => {
+ if let Ok(entity) = bytes.decode()
+ && let Some(s) = resolve_xml_entity(&entity)
+ {
+ f.write_str(s)?;
+ }
+ }
+ Event::Eof => break,
+ _ => (),
+ }
}
- f.write_str(local)
+ Ok(())
}
+ ValueInner::Text(TextValue {
+ localized: local, ..
+ }) => f.write_str(local),
+
ValueInner::Template(TemplateValue {
args,
localized: local,
pub variable_label: Option<String>,
}
+#[derive(Clone, Debug, PartialEq, Serialize /*XXX*/)]
+pub struct Markup {
+ pub xml: String,
+}
+
#[derive(Clone, Debug, PartialEq)]
pub struct TextValue {
pub user_provided: bool,
String(StringValue),
Variable(VariableValue),
Text(TextValue),
+ Markup(Markup),
Template(TemplateValue),
#[default]
_ => None,
}
}
+
+ fn markup(&self) -> Option<&str> {
+ match self {
+ ValueInner::Markup(markup) => Some(&markup.xml),
+ _ => None,
+ }
+ }
}
#[derive(Clone, Debug, Default, PartialEq)]
};
DisplayValue {
inner: self,
- markup: false,
subscripts: &[],
footnotes: &[],
options,
bold: self.font_weight == FontWeight::Bold,
italic: self.font_style == FontStyle::Italic,
underline: self.font_underline == FontUnderline::Underline,
- markup: false,
font: self.font_family.clone(),
fg: match data_row {
RowParity::Even => self.color.unwrap_or(Color::BLACK),
bold: true,
italic: false,
underline: false,
- markup: false,
font: String::from("Sans Serif"),
fg: Color::BLACK,
bg: Color::WHITE,
bold: false,
italic: false,
underline: false,
- markup: false,
font: String::from("Sans Serif"),
fg: Color::BLACK,
bg: Color::WHITE,
bold: false,
italic: false,
underline: false,
- markup: false,
font: String::from("Sans Serif"),
fg: Color::BLACK,
bg: Color::WHITE,
bold: false,
italic: false,
underline: false,
- markup: false,
font: String::from("Sans Serif"),
fg: Color::BLACK,
bg: Color::WHITE,
bold: false,
italic: false,
underline: false,
- markup: false,
font: String::from("Sans Serif"),
fg: Color::BLACK,
bg: Color::WHITE,
bold: false,
italic: false,
underline: false,
- markup: false,
font: String::from("Sans Serif"),
fg: Color::BLACK,
bg: Color::WHITE,
bold: false,
italic: false,
underline: false,
- markup: false,
font: String::from("Sans Serif"),
fg: Color::BLACK,
bg: Color::WHITE,
bold: false,
italic: false,
underline: false,
- markup: false,
font: String::from("Sans Serif"),
fg: Color::BLACK,
bg: Color::BLACK,
bold: false,
italic: false,
underline: false,
- markup: false,
font: String::from("Sans Serif"),
fg: Color::BLACK,
bg: Color::WHITE,
bold: style.weight > 400,
italic: style.italic,
underline: style.underline,
- markup: false,
font: style.font_name.string.clone(),
fg: style.text_color,
bg,
use crate::output::{
Details, Item, SpvInfo, SpvMembers, Text,
page::PageSetup,
- pivot::{Look, PivotTable, TableProperties, Value},
+ pivot::{Look, TableProperties, Value},
spv::{
legacy_bin::LegacyBin,
legacy_xml::Visualization,
impl ContainerText {
fn decode(&self) -> Value {
- dbg!(&self.html);
html::parse(&self.html)
}
}
None
}
+fn parse_entity(s: &str) -> (char, &str) {
+ static ENTITIES: [(&str, char); 6] = [
+ ("amp;", '&'),
+ ("lt;", '<'),
+ ("gt;", '>'),
+ ("apos;", '\''),
+ ("quot;", '"'),
+ ("nbsp;", '\u{00a0}'),
+ ];
+ for (name, ch) in ENTITIES {
+ if let Some(rest) = s.strip_prefix(name) {
+ return (ch, rest);
+ }
+ }
+ ('&', s)
+}
+
fn get_node_text(node: &Node, text: &mut String) {
match node {
Node::Text(string) => {
let mut s = string.as_str();
- 'OUTER: while !s.is_empty() {
+ while !s.is_empty() {
let amp = s.find('&').unwrap_or(s.len());
let (head, rest) = s.split_at(amp);
text.push_str(head);
if rest.is_empty() {
break;
}
-
- static ENTITIES: [(&str, char); 6] = [
- ("&", '&'),
- ("<", '<'),
- (">", '>'),
- ("'", '\''),
- (""", '"'),
- (" ", '\u{00a0}'),
- ];
- for (name, character) in ENTITIES {
- if let Some(rest) = rest.strip_prefix(name) {
- text.push(character);
- s = rest;
- continue 'OUTER;
- }
- }
- text.push('&');
- s = &s[1..];
+ let ch;
+ (ch, s) = parse_entity(&s[1..]);
+ text.push(ch);
}
}
Node::Element(element) => get_element_text(element, text),
fn extract_html_text(node: &Node, base_font_size: i32, s: &mut String) {
match node {
Node::Text(text) => {
- dbg!(text);
- for c in text.chars() {
+ let mut iter = text.chars();
+ while let Some(mut c) = iter.next() {
fn push_whitespace(c: char, s: &mut String) {
if s.chars().next_back().is_none_or(|c| !c.is_whitespace()) {
s.push(c);
}
}
+ if c == '&' {
+ let rest;
+ (c, rest) = parse_entity(iter.as_str());
+ iter = rest.chars();
+ }
match c {
'\u{00a0}' => {
// U+00A0 NONBREAKING SPACE is really, really common
_ if c.is_whitespace() => push_whitespace(c, s),
'<' => s.push_str("<"),
'>' => s.push_str(">"),
+ '&' => s.push_str("&"),
+ '\'' => s.push_str("'"),
+ '"' => s.push_str("""),
_ => s.push(c),
}
}
font_style: &mut FontStyle,
) -> Result<(), html_parser::Error> {
let dom = Dom::parse(&format!("<!doctype html>{input}"))?;
- font_style.markup = true;
for node in &dom.children {
match node.element() {
Some(head) if head.name.eq_ignore_ascii_case("head") => {
let mut font_style = FontStyle::default().with_size(10);
let mut html = String::new();
if parse2(input, &mut html, &mut font_style).is_ok() {
- Value::new_user_text(html)
+ Value::new_markup(html)
} else {
Value::new_user_text(input)
}
pub fn parse(input: &str) -> Value {
let mut font_style = FontStyle::default().with_size(10);
- let text = match Dom::parse(&format!("<!doctype html>{input}")) {
+ let value = match Dom::parse(&format!("<!doctype html>{input}")) {
Ok(dom) => {
- font_style.markup = true;
let mut s = String::new();
for node in &dom.children {
if let Node::Element(head) = node
extract_html_text(node, font_style.size, &mut s);
}
}
- dbg!(&s);
- s
+ Value::new_markup(s)
}
- _ => input.into(),
+ _ => Value::new_user_text(input),
};
- Value::new_user_text(text).with_font_style(font_style)
+ value.with_font_style(font_style)
}
#[cfg(test)]
mod tests {
- use html_parser::Dom;
-
use crate::output::{
pivot::{FontStyle, Value},
spv::html::{parse, parse_paragraphs, parse_value},
fn css() {
assert_eq!(
parse("<head><style><!--p {font-size: 24; font-weight: bold}--></style></head>text"),
- Value::new_user_text("text").with_font_style(
- FontStyle::default()
- .with_size(18)
- .with_bold(true)
- .with_markup(true)
- )
+ Value::new_markup("text")
+ .with_font_style(FontStyle::default().with_size(18).with_bold(true))
);
}
);
assert_eq!(
value,
- Value::new_user_text(
+ Value::new_markup(
r##"<b>bold</b>
<i>italic</i>
<b><i>bold italic</i></b>
<span size="20480">big</span>
"##
)
- .with_font_style(FontStyle::default().with_size(10).with_markup(true))
+ .with_font_style(FontStyle::default().with_size(10))
);
}
- #[test]
- fn entity() {
- let html = r#"<!doctype html><html><body>Hi there!</body></html>"#;
- dbg!(Dom::parse(html));
- todo!()
- }
-
#[test]
fn paragraphs() {
let paragraphs = parse_paragraphs(
use chrono::{NaiveDateTime, NaiveTime};
use enum_map::{Enum, EnumMap};
use hashbrown::HashSet;
-use itertools::Itertools;
use ordered_float::OrderedFloat;
use serde::Deserialize;
PivotTable, RowParity, Value, ValueInner, VertAlign,
},
spv::legacy_bin::DataValue,
- table,
},
};
}
for dv in take(&mut derived_variables) {
- match dv.decode(&data, &series) {
+ match dv.decode(&series) {
Ok(s) => {
series.insert(&dv.id, s);
}
// they are redundant. Ignore them.
continue;
};
- let dimension = &mut dims[dim_index].dimension;
for index in
w.include.split(';').filter_map(|s| s.parse::<usize>().ok())
{
}
impl DerivedVariable {
- fn decode(
- &self,
- data: &HashMap<String, HashMap<String, Vec<DataValue>>>,
- series: &HashMap<&str, Series>,
- ) -> Result<Series, ()> {
+ fn decode(&self, series: &HashMap<&str, Series>) -> Result<Series, ()> {
let mut values = if self.value == "constant(0)" {
let n_values = if let Some(series) = series.values().next() {
series.values.len()
Some(SetFormatChild::NumberFormat(format)) => {
Some(SignificantNumberFormat::from(format).decode())
}
- Some(SetFormatChild::StringFormat(format)) => None,
+ Some(SetFormatChild::StringFormat(_)) => None,
Some(SetFormatChild::DateTimeFormat(format)) => Some(format.decode()),
Some(SetFormatChild::ElapsedTimeFormat(format)) => Some(format.decode()),
None => None,
}
if fg.is_some() || bg.is_some() {
- let mut styling = value.styling_mut();
+ let styling = value.styling_mut();
let font_style = styling
.font_style
.get_or_insert_with(|| base_style.font_style.clone());
for t in labels {
if let LabelChild::Text(text) = &t.child {
for t in text {
- if let Some(defines_reference) = t.defines_reference {
+ if let Some(_defines_reference) = t.defines_reference {
// XXX footnote
}
s += &t.text;
bold: (self.style & 1) != 0,
italic: (self.style & 2) != 0,
underline: self.underline,
- markup: false,
font: self.typeface.decode(encoding),
fg: match data_row {
RowParity::Even => self.fg,
bold: font_style.bold,
italic: font_style.italic,
underline: font_style.underline,
- markup: false,
font: font_style.typeface.decode(encoding),
fg: font_style.fg,
bg: font_style.bg,
pub fn display(&self) -> DisplayValue<'a> {
self.inner
.display(self.value_options)
- .with_font_style(&self.font_style)
.with_subscripts(self.subscripts)
.with_footnotes(self.footnotes)
}