From 158d711d558214b71cf7a39e63949975d0f26a1f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 24 Dec 2025 13:51:52 -0800 Subject: [PATCH] tests --- rust/pspp/src/output/drivers/text.rs | 34 +++++++----- rust/pspp/src/spv/read/html.rs | 50 ++++++++++-------- rust/pspp/src/spv/read/legacy_xml.rs | 55 ++++++++++++++------ rust/pspp/src/spv/read/tests.rs | 6 +++ rust/pspp/src/spv/testdata/legacy7.expected | 44 ++++++++++++++++ rust/pspp/src/spv/testdata/legacy7.spv | Bin 0 -> 6178 bytes 6 files changed, 139 insertions(+), 50 deletions(-) create mode 100644 rust/pspp/src/spv/testdata/legacy7.expected create mode 100644 rust/pspp/src/spv/testdata/legacy7.spv diff --git a/rust/pspp/src/output/drivers/text.rs b/rust/pspp/src/output/drivers/text.rs index 5827fef5f8..5a11d6190c 100644 --- a/rust/pspp/src/output/drivers/text.rs +++ b/rust/pspp/src/output/drivers/text.rs @@ -385,20 +385,32 @@ impl TextDriver { } impl TextRenderer { + fn start_object(&mut self, writer: &mut W) -> FmtResult + where + W: FmtWrite, + { + if self.n_objects > 0 { + writeln!(writer)?; + } + self.n_objects += 1; + Ok(()) + } + fn render(&mut self, item: &Item, writer: &mut W) -> FmtResult where W: FmtWrite, { - for (index, item) in ItemRefIterator::without_hidden(item) - .filter(|item| !item.details.is_heading()) - .enumerate() + for item in ItemRefIterator::without_hidden(item).filter(|item| !item.details.is_heading()) { - if index > 0 { - writeln!(writer)?; - } match &item.details { - Details::Chart => writeln!(writer, "Omitting chart from text output")?, - Details::Image(_) => writeln!(writer, "Omitting image from text output")?, + Details::Chart => { + self.start_object(writer)?; + writeln!(writer, "Omitting chart from text output")? + } + Details::Image(_) => { + self.start_object(writer)?; + writeln!(writer, "Omitting image from text output")? + } Details::Heading(_) => unreachable!(), Details::Message(_diagnostic) => todo!(), Details::PageBreak => (), @@ -415,10 +427,8 @@ impl TextRenderer { where W: FmtWrite, { - for (index, layer_indexes) in table.layers(true).enumerate() { - if index > 0 { - writeln!(writer)?; - } + for layer_indexes in table.layers(true) { + self.start_object(writer)?; let mut pager = Pager::new(self, table, Some(layer_indexes.as_slice())); while pager.has_next(self).is_some() { diff --git a/rust/pspp/src/spv/read/html.rs b/rust/pspp/src/spv/read/html.rs index 19dc709766..4cc4b12caf 100644 --- a/rust/pspp/src/spv/read/html.rs +++ b/rust/pspp/src/spv/read/html.rs @@ -778,7 +778,9 @@ fn parse_nodes(nodes: &[Node]) -> Markup { } // SPSS often starts paragraphs with an initial `
` that it // ignores, but it does honor `
`. So weird. - Node::Element(br) if br.name == "br" => { + Node::Element(br) + if br.name.eq_ignore_ascii_case("br") && (br.name == "br" || i != 0) => + { add_markup(&mut retval, Markup::Text('\n'.into())); } Node::Element(element) => { @@ -970,26 +972,6 @@ mod tests { ); } - /* - #[test] - fn value() { - let value = parse_value( - r#"bold
italic
bold italic
red serif
big
"#, - ); - assert_eq!( - value, - Value::new_markup( - r##"bold -italic -bold italic -red serif -big -"## - ) - .with_font_style(FontStyle::default().with_size(10)) - ); - }*/ - /// From the corpus (also included in the documentation). #[test] fn header1() { @@ -1090,6 +1072,32 @@ mod tests { ); } + /// From the corpus, anonymized. + /// + /// This tests the unusual treatment of `
` at the start of text (`
` + /// is ignored at the start, but `
` is not). + #[test] + fn breaks() { + let text = r##"<head><style type="text/css">p{color:0;font-family:Monospaced;font-size:13pt;font-style:normal;font-weight:normal;text-decoration:none}</style></head><BR>USE ALL.<BR>COMPUTE filter_$=(group = 1).<BR>VARIABLE LABEL filter_$ 'group = 1 (FILTER)'.<BR>VALUE LABELS filter_$ 0 'Not Selected' 1 'Selected'.<BR>FORMAT filter_$ (f1.0).<BR>FILTER BY filter_$.<BR>EXECUTE.<BR>NPAR TEST<BR>  /WILCOXON=x WITH y<BR>   z w (PAIRED)<BR>  /MISSING ANALYSIS."##; + let content = quick_xml::de::from_str::(text).unwrap(); + let html = Document::from_html(&content); + let s = html.into_value().display(()).to_string(); + assert_eq!( + s, + r##"USE ALL. +COMPUTE filter_$=(group = 1). +VARIABLE LABEL filter_$ 'group = 1 (FILTER)'. +VALUE LABELS filter_$ 0 'Not Selected' 1 'Selected'. +FORMAT filter_$ (f1.0). +FILTER BY filter_$. +EXECUTE. +NPAR TEST +  /WILCOXON=x WITH y +   z w (PAIRED) +  /MISSING ANALYSIS."## + ); + } + /// Checks that the `escape-html` feature is enabled in [quick_xml], since /// we need that to resolve ` ` and other HTML entities. #[test] diff --git a/rust/pspp/src/spv/read/legacy_xml.rs b/rust/pspp/src/spv/read/legacy_xml.rs index 5998f8bb3f..5b4a5473f7 100644 --- a/rust/pspp/src/spv/read/legacy_xml.rs +++ b/rust/pspp/src/spv/read/legacy_xml.rs @@ -17,6 +17,7 @@ use std::{ cell::{Cell, RefCell}, collections::{BTreeMap, HashMap}, + fmt::Debug, marker::PhantomData, mem::take, num::NonZeroUsize, @@ -670,16 +671,10 @@ impl Visualization { let cell = series.get("cell").unwrap()/*XXX*/; let mut coords = Vec::with_capacity(dims.len()); let (cell_formats, format_map) = graph.interval.labeling.decode_format_map(&series); - let cell_footnotes = - graph - .interval - .labeling - .children - .iter() - .find_map(|child| match child { - LabelingChild::Footnotes(footnotes) => series.get(footnotes.variable.as_str()), - _ => None, - }); + let cell_footnotes = graph + .interval + .footnotes() + .and_then(|footnotes| series.get(footnotes.variable.as_str())); let mut data = HashMap::new(); for (i, cell) in cell.values.iter().enumerate() { coords.clear(); @@ -718,9 +713,9 @@ impl Visualization { for part in s.split(',') { if let Ok(index) = part.parse::() && let Some(index) = index.checked_sub(1) - && let Some(footnote) = footnotes.get(index) + && let Some(footnote) = dbg!(footnotes.get(index)) { - value = value.with_footnote(footnote); + value.add_footnote(footnote); } } } @@ -1021,7 +1016,7 @@ impl Visualization { .collect::>(); let mut pivot_table = PivotTable::new(dimensions) .with_look(Arc::new(look)) - .with_footnotes(footnotes) + .with_footnotes(dbg!(footnotes)) .with_data(data) .with_layer(¤t_layer); let decimal = Decimal::for_lang(&self.lang); @@ -1050,6 +1045,14 @@ struct Series { dimension_index: Cell>, } +impl Debug for Series { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Series") + .field("name", &self.name) + .finish_non_exhaustive() + } +} + impl Series { fn new(name: String, values: Vec, map: Map) -> Self { Self { @@ -1888,10 +1891,6 @@ impl Style { base_style: &AreaStyle, ) { if let Some(sf) = sf { - if sf.reset == Some(true) { - value.styling_mut().footnotes.clear(); - } - let format = match &sf.child { Some(SetFormatChild::Format(format)) => Some(format.decode()), Some(SetFormatChild::NumberFormat(format)) => { @@ -2414,6 +2413,19 @@ struct Interval { footnotes: Option, } +impl Interval { + fn footnotes(&self) -> Option<&Footnotes> { + if let Some(footnotes) = &self.footnotes { + Some(footnotes) + } else { + self.labeling + .children + .iter() + .find_map(|child| child.as_footnotes()) + } + } +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct Labeling { @@ -2459,6 +2471,15 @@ enum LabelingChild { Footnotes(Footnotes), } +impl LabelingChild { + fn as_footnotes(&self) -> Option<&Footnotes> { + match self { + Self::Footnotes(footnotes) => Some(footnotes), + _ => None, + } + } +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct Formatting { diff --git a/rust/pspp/src/spv/read/tests.rs b/rust/pspp/src/spv/read/tests.rs index ee61dbc782..bf12298010 100644 --- a/rust/pspp/src/spv/read/tests.rs +++ b/rust/pspp/src/spv/read/tests.rs @@ -43,6 +43,12 @@ fn legacy6() { test_raw_spvfile("legacy6"); } +/// Regression test for ``. +#[test] +fn legacy7() { + test_raw_spvfile("legacy7"); +} + fn test_raw_spvfile(name: &str) { let input_filename = Path::new("src/spv/testdata") .join(name) diff --git a/rust/pspp/src/spv/testdata/legacy7.expected b/rust/pspp/src/spv/testdata/legacy7.expected new file mode 100644 index 0000000000..18283a8a4e --- /dev/null +++ b/rust/pspp/src/spv/testdata/legacy7.expected @@ -0,0 +1,44 @@ + Ranks +╭───────────────────────────────────────────────────────┬─────┬─────────┬────────────╮ +│ │ N │Mean Rank│Sum of Ranks│ +├───────────────────────────────────────────────────────┼─────┼─────────┼────────────┤ +│xxxxxxxxxx - yyyyyyyyyyyyy Negative Ranks│25[a]│ 13,00│ 325,00│ +│ Positive Ranks│ 0[b]│ ,00│ ,00│ +│ Ties │ 0[c]│ │ │ +│ Total │ 25│ │ │ +├───────────────────────────────────────────────────────┼─────┼─────────┼────────────┤ +│xxxxxxxxxxxxx - yyyyyyyyyyyyyy Negative Ranks│25[d]│ 13,00│ 325,00│ +│ Positive Ranks│ 0[e]│ ,00│ ,00│ +│ Ties │ 0[f]│ │ │ +│ Total │ 25│ │ │ +├───────────────────────────────────────────────────────┼─────┼─────────┼────────────┤ +│xxxxxxxxxxxxxx - yyyyyyyyyyyyyyy Negative Ranks│25[g]│ 13,00│ 325,00│ +│ Positive Ranks│ 0[h]│ ,00│ ,00│ +│ Ties │ 0[i]│ │ │ +│ Total │ 25│ │ │ +├───────────────────────────────────────────────────────┼─────┼─────────┼────────────┤ +│xxxxxxxxxxxxxxxx - yyyyyyyyyyyyyyyyy Negative Ranks│ 5[j]│ 3,00│ 15,00│ +│ Positive Ranks│ 0[k]│ ,00│ ,00│ +│ Ties │20[l]│ │ │ +│ Total │ 25│ │ │ +├───────────────────────────────────────────────────────┼─────┼─────────┼────────────┤ +│xxxxxxxxxxxxxxxxxxx - yyyyyyyyyyyyyyyyyy Negative Ranks│ 0[m]│ ,00│ ,00│ +│ Positive Ranks│ 5[n]│ 3,00│ 15,00│ +│ Ties │20[o]│ │ │ +│ Total │ 25│ │ │ +╰───────────────────────────────────────────────────────┴─────┴─────────┴────────────╯ +a. Footnote A +b. Footnote B +c. Footnote C +d. Footnote D +e. Footnote E +f. Footnote F +g. Footnote G +h. Footnote H +i. Footnote I +j. Footnote J +k. Footnote K +l. Footnote L +m. Footnote M +n. Footnote N +o. Footnote O diff --git a/rust/pspp/src/spv/testdata/legacy7.spv b/rust/pspp/src/spv/testdata/legacy7.spv new file mode 100644 index 0000000000000000000000000000000000000000..77caf756279881f8c7ad1ccdf3ec17db63994155 GIT binary patch literal 6178 zcma)=1yq#X*2f27NNJFe0Tht#Atj`{k(!~TyJn;WBm_YQq`Rf1OS&6rX-Nrz0cl}G zK79SY_rCYO_kQa>=hFLna$*END``j84B-%95S?kVWr^UBwy$o~FO?tbyP9q-$}v;)rRyIrm0=Oh!-Bgaib66G1@2*fbKO}2UJ5< z?$QZ8FB6o$4CyhSt4w&>V|?Fuf`BahbjUw9p@B(RiM5 zl4mc}l_?sL)VC6yr==^O$NO+fX3Er5U~i#Z&Y_f z){;+vpBzfRo$})1rfv#ix@miZ8-4SW7FUGs6Kaorq5Z1&L;FodMnlxb3`l697Bl1+ zynm@wk!6pdBKAs}@oy}^x#=~K#}I7HOrrc+?vR21jegt!K^dSs%M;GV=-o>i_>T>YU zqg{8C-RI?E{dZ*6pWebYJgM6;FZo^XJsL;}KH*Vvj#vU;;<*xY#bn*$UR^!it@%>X zNi!1HP9S6!c%`@$`Nu_sA7L}b#<}~bACL(``qf&ewUr@we{|CRaE!z zjFIl%&ZJve>X0yRvKc>RYxIxoNsUjPkKx~zJ5*0D2~6G)?yjmj3Fb2dM>o&k-JAS+ zqWknXYG`@Ft$9i+OC3G+^0RtN-c*3*pAnMklnhbosG$0a2LRM50st)kWrT2fIXV6j z9n&VNc5}D(FVXQC3W;76yyrhwF^3QL>BFHNrK<15@VZj0A3Nx)`eoyBT|el5ya#?2n-4!o@XSU`x`tS-_a&A)gRG3{kn4inF)zu zH?qFzXyzVcP>wJr9pC&tE|)8WcPgIS9Ouvx{sO_8^8|7J;yV8a_#4uA?O8|y4cqsQ zGg52~%B&q8DiyNhi>rCbWEnF9n1WYa)o{(b&(|;4H?Urcg$i>|yb`vIHZ7Jo+>z*1 zyMErYMt-Mi62;@Yy;=I(L(*2$t#LTGNgI^2dOmw6TfC~J68T$vxVO312P4~`ZSAdNW=W> z(+l{0KYo%vcN-1a3Q#c?(8ZlGlkoy%b?_Ly@i21Rzx!l7rGtZzI}YJAXaW^u!b+zl zfJPjBl65U;T1k<|nBPy(J78u}HC$E!1m-84_p;7gJ_f??pm6xTn(!8>-U}^p4J)P+ zgC*2u+)XMpxX{=h7Se>v4M`A2V+C{Fb{a`hwt1JD#lVn_+Tl z^djeum2cV_L?V`8p79Nbb1sz!)IH!Rb9FQEux~$~>~@s61`(%2TA0GzlI8Ggx;?=rrv)el>1Q`j5d>C$orGmjk=q2sMebr}Rf_m*hW2+ELB?QGQg@sM0CchON z!E6P?L>VPCJKb*r7JjC9$S!PRgV)DPY%W6s+L2=HL(8Z3HAceg55&~9q~>&d&G z$#}ic**d1El@ZYu9vGNV7cl%-2zP=DXXyYj$?ytTttFwnzV=w&^F=H04xDX|dsTcR zCv3GJD+BHx){qu6j2#M<<{L8u(je492wx&tPi@CfN5@6D;k^7#?OB4g@6iLlq)pbL zQYBSeC|jVjrsIA$0~57Y`$xe#a7*tF-BqTo0o}5BX7TAA*hz?UZ6_DCj@~>2PP^5m zK8-PjXx_}^CgjwqDcU=OIuE8nuwVU^KoR(@`wT@>u;Fg+w6e(QH>Vrfz^kq$a(H}y z_fx_bTS83KPmvYve$i6rtq}*OG#qJhEoMch#rU_Uz%DA>o?LS5LS~|oj29&D^v7-H z4{9zI7fX?7UV%b>3khbFs@Z!c3vo8Q&<+-M5912N=yq)#QVugei9dRX%!Wg!Y3eg6 z#w6`l;$x!8gelzU$AOURJoSs@$6LJmX^=Lta3H=jr)d=AxhZ{1&XezDPrO8zjDSdq zyrpB>uCh;E(3%C5TWvL60^_nlte1ucyF;>Gn`mmim0{WVcT%rUlKZfvOAD?OxVKzG zK=UC-;Z}k}D^11wHbso!7*|LPHuyC#V?ia#EZ(paJsH^j1bc3IbEC*goJ}LBN(`q! zyY$Hy5LssX_Iyk^U5+hIbpQ9vgy*O90D)4o`nzRvX*n3_8m~hf=`te(4~kFBMBC*r zS_bj~boFqNr+Q6RpJzdNR)pDn{G|`Lydxlbt=bk*Axsych3y#n2&2KT_-3<#9h%2m z+52;VgmiERibF$M8_{vNL)5WEL;l{(iZ%)C#55O`$4IpHCH@JfZ0)0KoI_sLIX%^& zsWoQ0C!OL$rC8Hl*fIjqp2AH$HQ+X^&s=eDGrY$oRkw7rO^^3smi;WvJY1U0n;~lO zWhEuSxBe~GQ5s6h_k|MuW9KuU$JSwvBcg9Yj;i91s!YTrPJ1}Y&SmDv>z+>thFN(8 zK0KgsQ(#K^Tzqm4fb+*$=<8yB51(R@a2jUl%PiA`OG!|43n0x>qqYnLzE&+XI+i3G zrghmata+j~Jxt^p{Vys9^O{4?Qu_p=?hL21G`v~xjaa1*D@ST%dlPSdXVHEiTGBv% zU!sUwvNK)n>0&PE{TP2KyngC5iTShW0>`c_%KiHV_tIT%lQL0P?t ziFspiH7LG5Hofytbk+X!XPE4vwWMG2R5$I|sh2|2!THdcQOj8*0|g3@YS^0I4I z#2|)Bmxm6fwKNe!5HbXANsHyE+4U9FooKe&4G$AT<(knMIQT1e zwk)7o;3+DLpvb1YL6o{F)l9lO3k*KB)HJh?{)Z2tJX=qpucO%p zTb_bFf7?=MHCV+QG-sbKXQ&#`?nWLqQRgK;n+9@QnC4EOdgZQ=T6gwFMs1EbG%<<= zZoP6Sxn;jNfF^`VB~;ESX>Sa9G15{GYxhHpF@^2N290mq&F4JDD^=L55Hw`%!YiL` zgELl8jK3B4dJkEQ09md%QaloFYv7wIIfbv^hw-mTS+=d-Vb1g7@kSL`yWnjWQuObrd z#2S;q%0D23x7W4{GdhFnmv5fqppfG=W6Ya{T2m$Cg=tzO{;P$} z$L;e{2Z-;Z6N~p={N^vTw%X9yc zD`*(2+WoSB$rb*A(6LB{U@<3ilH4MG8QEVEYEtS-@^U)5Tx#-v(EL?Z?JoXKGk0`! z@zk_??gF!Owh@Q9!>#_2?fl}i$LYOj2Lb?C6aWDAE#CzWbA`k7?5sSk-2W^M@cfzI z{0A=|4V+vS1&Mwxn|9iDNT?;|_hecOuuWQa;(jRB@?jatwa?ekd;FWAAPxvZ4t!Wz} z^w(l_@1mCT7Um1LktO9%LR1T4u{+dQLrheCgtLS{KYbCJU@TX-#{b~pRPWGL!wEp= zZmAc>9+xn`QwDZAO&qpz@SY*DzbA`)PWj`+TzC{$`&pK|6VDijtQu+oC+wlen)r|n ztj9>wQLZduw3X3Xn2F<3SjgRZJ-g}61I{bKC5!vc@#F?nrc=Hwitqh!Yqiz!dq*}y z#bf0w!yH$!2<_>^`O!q=MD`_TUYj;s=`6^M$Jr@U3G4-KmxDN;a{6`sPOwR%Y&(pvNxe{9UR@~a%h1YJy+ ztMyKhLB|aCe(4jJaBu%N$Z~JIj`0v%^n_F$PAZcXP0Y%rK8NY_Ic>=2(P8#3N(^?# z?lBqZ!8<46X0r2%U)YSoHy_Wj96AfhIaBOg!2KK=Le|1y&j>D~Mr3xA zX0YOZ1U>~F$rq8xOm*G0RSVBasY^iH{1j(Qz{gPr8X#&uyD-j_(M%El_+!CY|0qS8 z;7SLojTtW32aSI4Zpz5yY#R^dt2uWZPm`6VAg2{CLpP@7Re-k7l;J$1}IIsKeqg%66FU4)2>l71Wx6C5Ph0T{0}U zaqshy+WVC#2+}ZDRTsr=(bGd+kK>7x&SW4P77xu?c%(t)Ek~;lfC{4YjFGEjxdG~B zY;WScYC^J(l^0LB&FPb|FE_MAB{T#p=z>tppGKo)i*r$AGtf_2x!+;#pfO10N+)Av zFy^DKjGVr3E-H`w>@@D|2wPV)Yf%Xg^cC49O^dLxes;5W569jcXlK6JR1h*JFpt9A z)2iDLqs2<2^1ka~YP}Vq%NMQ|Q=7JO!Hl_as9sUWmXdMdKwjv0Vht+_bTC*_X03<> zmwZB6%+^acq)eOQ#O|tKj;H;=(o{jaLyP{Oio}$E%GkTWU&DXM!hcgT2L3J?|4ron z;qmW9x8EMoZhgHAe0W=7{%<_~mZ)2Aze;fb*4uy5<^TIWA^=j#i4gEtmA=LOTZR60 z_}{p{YxG|aU-?(uKPmPt`Inad+k^hTh`+1zU&%y7fd5yk{}UgG`RfXSx9#ig|7E!M GYxX~Y`1>0G literal 0 HcmV?d00001 -- 2.30.2