From 2cc2419d905a00395915f18fa584888b21e0fade Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 10 Aug 2025 11:13:07 -0700 Subject: [PATCH] work toward user configuration of drivers --- rust/Cargo.lock | 1 + rust/pspp/Cargo.toml | 2 +- rust/pspp/src/data.rs | 75 ++++++++++++++++++---------- rust/pspp/src/format/mod.rs | 7 +-- rust/pspp/src/main.rs | 15 ++++-- rust/pspp/src/output/cairo/driver.rs | 54 ++++++++++++++------ rust/pspp/src/output/cairo/mod.rs | 6 +-- rust/pspp/src/output/csv.rs | 24 ++++++--- rust/pspp/src/output/driver.rs | 29 +++++++++++ rust/pspp/src/output/html.rs | 22 ++++++-- rust/pspp/src/output/page.rs | 17 +++---- rust/pspp/src/output/pivot/mod.rs | 10 ++-- rust/pspp/src/output/pivot/test.rs | 10 ++-- rust/pspp/src/output/text.rs | 35 +++++++++---- rust/pspp/src/sys/cooked.rs | 41 +++++++++++++-- rust/pspp/src/sys/mod.rs | 2 +- rust/pspp/src/sys/write.rs | 12 ++--- 17 files changed, 256 insertions(+), 106 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index a6cefe2bf1..f07ce38b34 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -613,6 +613,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9" dependencies = [ "enum-map-derive", + "serde", ] [[package]] diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 7f8dfe0be0..b26173eb5d 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -22,7 +22,7 @@ indexmap = { version = "2.1.0", features = ["serde"] } bitflags = "2.5.0" unicode-width = "0.2.0" chardetng = "0.1.17" -enum-map = "2.7.3" +enum-map = { version = "2.7.3", features = ["serde"] } flagset = "0.4.6" pspp-derive = { version = "0.1.0", path = "../pspp-derive" } either = "1.13.0" diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index 7b1364dfb9..60c46752db 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -379,26 +379,19 @@ impl Datum> { } pub fn codepage_to_unicode(&mut self) { - match self { - Datum::Number(_) => (), - Datum::String(string) => string.codepage_to_unicode(), + if let Some(s) = self.as_string_mut() { + s.codepage_to_unicode(); } } pub fn without_encoding(self) -> Datum { - match self { - Datum::Number(number) => Datum::Number(number), - Datum::String(string) => Datum::String(string.inner), - } + self.map_string(|s| s.into_inner()) } } impl<'a> Datum>> { pub fn into_owned(self) -> Datum> { - match self { - Self::Number(number) => Datum::Number(number), - Self::String(string) => Datum::String(string.into_owned()), - } + self.map_string(|s| s.into_owned()) } } @@ -407,16 +400,10 @@ where T: EncodedString, { pub fn as_borrowed(&self) -> Datum>> { - match self { - Datum::Number(number) => Datum::Number(*number), - Datum::String(string) => Datum::String(string.as_encoded_byte_str()), - } + self.as_ref().map_string(|s| s.as_encoded_byte_str()) } pub fn cloned(&self) -> Datum> { - match self { - Datum::Number(number) => Datum::Number(*number), - Datum::String(string) => Datum::String(string.cloned()), - } + self.as_ref().map_string(|s| s.cloned()) } } @@ -517,6 +504,23 @@ where } impl Datum { + pub fn as_ref(&self) -> Datum<&B> { + match self { + Datum::Number(number) => Datum::Number(*number), + Datum::String(string) => Datum::String(&string), + } + } + + pub fn map_string(self, f: F) -> Datum + where + F: Fn(B) -> R, + { + match self { + Datum::Number(number) => Datum::Number(number), + Datum::String(string) => Datum::String(f(string)), + } + } + /// Constructs a new numerical [Datum] for the system-missing value. pub const fn sysmis() -> Self { Self::Number(None) @@ -588,17 +592,11 @@ where } pub fn as_encoded(&self, encoding: &'static Encoding) -> Datum>> { - match self { - Datum::Number(number) => Datum::Number(*number), - Datum::String(raw_string) => Datum::String(raw_string.as_encoded(encoding)), - } + self.as_ref().map_string(|s| s.as_encoded(encoding)) } pub fn with_encoding(self, encoding: &'static Encoding) -> Datum> { - match self { - Datum::Number(number) => Datum::Number(number), - Datum::String(string) => Datum::String(string.with_encoding(encoding)), - } + self.map_string(|s| s.with_encoding(encoding)) } } @@ -734,6 +732,29 @@ where } } +impl Case>> { + pub fn into_unicode(self) -> Self { + if self.encoding == UTF_8 { + self + } else { + Self { + encoding: UTF_8, + data: self + .data + .into_iter() + .map(|datum| { + datum.map_string(|s| { + let mut s = s.with_encoding(self.encoding); + s.codepage_to_unicode(); + s.into_inner() + }) + }) + .collect(), + } + } + } +} + impl Serialize for Case where B: Borrow<[Datum]>, diff --git a/rust/pspp/src/format/mod.rs b/rust/pspp/src/format/mod.rs index 1131e9a827..b651ab743a 100644 --- a/rust/pspp/src/format/mod.rs +++ b/rust/pspp/src/format/mod.rs @@ -24,14 +24,14 @@ use std::{ use chrono::{Datelike, Local}; use enum_iterator::{all, Sequence}; use enum_map::{Enum, EnumMap}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use thiserror::Error as ThisError; use unicode_width::UnicodeWidthStr; use crate::{ data::{ByteString, Datum}, - variable::{VarType, VarWidth}, sys::raw, + variable::{VarType, VarWidth}, }; mod display; @@ -841,7 +841,8 @@ impl Display for UncheckedFormat { } } -#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Enum)] +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Enum, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] pub enum Decimal { #[default] Dot, diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index 096f74ea5b..77c63ba1f5 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -86,10 +86,14 @@ struct Convert { #[arg(short = 'O')] output_format: Option, - /// The encoding to use. + /// The encoding to use for reading the input file. #[arg(short = 'e', long, value_parser = parse_encoding)] encoding: Option<&'static Encoding>, + /// If true, convert to Unicode (UTF-8) encoding. + #[arg(long = "unicode")] + to_unicode: bool, + /// Password for decryption, with or without what SPSS calls "password encryption". /// /// Specify only for an encrypted system file. @@ -127,11 +131,14 @@ impl Convert { eprintln!("warning: {warning}"); } - let (dictionary, _, cases) = ReadOptions::new(warn) + let mut system_file = ReadOptions::new(warn) .with_encoding(self.encoding) .with_password(self.password.clone()) - .open_file(&self.input)? - .into_parts(); + .open_file(&self.input)?; + if self.to_unicode { + system_file = system_file.into_unicode(); + } + let (dictionary, _, cases) = system_file.into_parts(); // Take only the first `self.max_cases` cases. let cases = cases.take(self.max_cases.unwrap_or(usize::MAX)); diff --git a/rust/pspp/src/output/cairo/driver.rs b/rust/pspp/src/output/cairo/driver.rs index 360d14fa89..c0931408eb 100644 --- a/rust/pspp/src/output/cairo/driver.rs +++ b/rust/pspp/src/output/cairo/driver.rs @@ -14,11 +14,16 @@ // You should have received a copy of the GNU General Public License along with // this program. If not, see . -use std::{borrow::Cow, path::Path, sync::Arc}; +use std::{ + borrow::Cow, + path::{Path, PathBuf}, + sync::Arc, +}; use cairo::{Context, PdfSurface}; use enum_map::{enum_map, EnumMap}; use pango::SCALE; +use serde::{Deserialize, Serialize}; use crate::output::{ cairo::{ @@ -33,6 +38,25 @@ use crate::output::{ use crate::output::pivot::Axis2; +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct CairoConfig { + /// Output file name. + pub path: PathBuf, + + /// Page setup. + #[serde(flatten)] + pub setup: Setup, +} + +impl CairoConfig { + pub fn new(path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + setup: Setup::default(), + } + } +} + pub struct CairoDriver { fsm_style: Arc, page_style: Arc, @@ -41,22 +65,21 @@ pub struct CairoDriver { } impl CairoDriver { - pub fn new(path: impl AsRef) -> CairoDriver { + pub fn new(config: &CairoConfig) -> cairo::Result { fn scale(inches: f64) -> usize { (inches * 72.0 * SCALE as f64).max(0.0).round() as usize } - let page_setup = Setup::default(); - let printable = page_setup.printable_size(); + let printable = config.setup.printable_size(); let page_style = CairoPageStyle { margins: EnumMap::from_fn(|axis| { [ - scale(page_setup.margins[axis][0]), - scale(page_setup.margins[axis][1]), + scale(config.setup.margins[axis][0]), + scale(config.setup.margins[axis][1]), ] }), - headings: page_setup.headings.clone(), - initial_page_number: page_setup.initial_page_number, + headings: config.setup.headings.clone(), + initial_page_number: config.setup.initial_page_number, }; let size = Coord2::new(scale(printable[Axis2::X]), scale(printable[Axis2::Y])); let font = FontStyle { @@ -79,21 +102,20 @@ impl CairoDriver { font, fg: Color::BLACK, use_system_colors: false, - object_spacing: scale(page_setup.object_spacing), + object_spacing: scale(config.setup.object_spacing), font_resolution: 72.0, }; let surface = PdfSurface::new( - page_setup.paper[Axis2::X] * 72.0, - page_setup.paper[Axis2::Y] * 72.0, - path, - ) - .unwrap(); - Self { + config.setup.paper[Axis2::X] * 72.0, + config.setup.paper[Axis2::Y] * 72.0, + &config.path, + )?; + Ok(Self { fsm_style: Arc::new(fsm_style), page_style: Arc::new(page_style), pager: None, surface, - } + }) } } diff --git a/rust/pspp/src/output/cairo/mod.rs b/rust/pspp/src/output/cairo/mod.rs index 2811bca601..0d6782f142 100644 --- a/rust/pspp/src/output/cairo/mod.rs +++ b/rust/pspp/src/output/cairo/mod.rs @@ -22,7 +22,7 @@ mod driver; pub mod fsm; pub mod pager; -pub use driver::CairoDriver; +pub use driver::{CairoConfig, CairoDriver}; /// Conversion from 1/96" units ("pixels") to Cairo/Pango units. fn px_to_xr(x: usize) -> usize { @@ -43,10 +43,10 @@ fn horz_align_to_pango(horz_align: HorzAlign) -> pango::Alignment { #[cfg(test)] mod test { - use crate::output::cairo::CairoDriver; + use crate::output::cairo::{CairoConfig, CairoDriver}; #[test] fn create() { - CairoDriver::new("test.pdf"); + CairoDriver::new(&CairoConfig::new("test.pdf")).unwrap(); } } diff --git a/rust/pspp/src/output/csv.rs b/rust/pspp/src/output/csv.rs index 543e80fae8..dba4cd2cf8 100644 --- a/rust/pspp/src/output/csv.rs +++ b/rust/pspp/src/output/csv.rs @@ -19,14 +19,24 @@ use std::{ fmt::Display, fs::File, io::{Error, Write}, + path::PathBuf, sync::Arc, }; +use serde::{Deserialize, Serialize}; + use crate::output::pivot::Coord2; use super::{driver::Driver, pivot::PivotTable, table::Table, Details, Item, TextType}; -struct CsvDriver { +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct CsvConfig { + path: PathBuf, + #[serde(flatten)] + options: CsvOptions, +} + +pub struct CsvDriver { file: File, options: CsvOptions, @@ -34,7 +44,7 @@ struct CsvDriver { n_items: usize, } -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Serialize, Deserialize)] struct CsvOptions { quote: u8, delimiter: u8, @@ -89,12 +99,12 @@ impl Display for CsvField<'_> { } impl CsvDriver { - pub fn new(file: File) -> Self { - Self { - file, - options: CsvOptions::default(), + pub fn new(config: &CsvConfig) -> std::io::Result { + Ok(Self { + file: File::create(&config.path)?, + options: config.options.clone(), n_items: 0, - } + }) } fn start_item(&mut self) { diff --git a/rust/pspp/src/output/driver.rs b/rust/pspp/src/output/driver.rs index 897ae61de7..eeddcfd89e 100644 --- a/rust/pspp/src/output/driver.rs +++ b/rust/pspp/src/output/driver.rs @@ -16,8 +16,37 @@ use std::{borrow::Cow, sync::Arc}; +use serde::{Deserialize, Serialize}; + +use crate::output::{ + cairo::{CairoConfig, CairoDriver}, + csv::{CsvConfig, CsvDriver}, + html::{HtmlConfig, HtmlDriver}, + text::{TextConfig, TextDriver}, +}; + use super::{page::Setup, Item}; +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(tag = "driver", rename_all = "snake_case")] +pub enum Config { + Text(TextConfig), + Pdf(CairoConfig), + Html(HtmlConfig), + Csv(CsvConfig), +} + +impl dyn Driver { + fn new(config: &Config) -> anyhow::Result> { + match config { + Config::Text(text_config) => Ok(Box::new(TextDriver::new(text_config)?)), + Config::Pdf(cairo_config) => Ok(Box::new(CairoDriver::new(cairo_config)?)), + Config::Html(html_config) => Ok(Box::new(HtmlDriver::new(html_config)?)), + Config::Csv(csv_config) => Ok(Box::new(CsvDriver::new(csv_config)?)), + } + } +} + // An output driver. pub trait Driver { fn name(&self) -> Cow<'static, str>; diff --git a/rust/pspp/src/output/html.rs b/rust/pspp/src/output/html.rs index 9a80783f3e..cfc515d19a 100644 --- a/rust/pspp/src/output/html.rs +++ b/rust/pspp/src/output/html.rs @@ -17,10 +17,13 @@ use std::{ borrow::Cow, fmt::{Display, Write as _}, + fs::File, io::Write, + path::PathBuf, sync::Arc, }; +use serde::{Deserialize, Serialize}; use smallstr::SmallString; use crate::output::{ @@ -30,7 +33,12 @@ use crate::output::{ Details, Item, }; -pub struct HtmlRenderer { +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct HtmlConfig { + file: PathBuf, +} + +pub struct HtmlDriver { writer: W, fg: Color, bg: Color, @@ -49,11 +57,17 @@ impl Stroke { } } -impl HtmlRenderer +impl HtmlDriver { + pub fn new(config: &HtmlConfig) -> std::io::Result { + Ok(Self::for_writer(File::create(&config.file)?)) + } +} + +impl HtmlDriver where W: Write, { - pub fn new(mut writer: W) -> Self { + pub fn for_writer(mut writer: W) -> Self { let _ = put_header(&mut writer); Self { fg: Color::BLACK, @@ -412,7 +426,7 @@ a:active { "#; -impl Driver for HtmlRenderer +impl Driver for HtmlDriver where W: Write, { diff --git a/rust/pspp/src/output/page.rs b/rust/pspp/src/output/page.rs index ccebb8bcd5..87015f6a7d 100644 --- a/rust/pspp/src/output/page.rs +++ b/rust/pspp/src/output/page.rs @@ -14,20 +14,21 @@ // You should have received a copy of the GNU General Public License along with // this program. If not, see . -use std::path::PathBuf; - use enum_map::{enum_map, EnumMap}; +use serde::{Deserialize, Serialize}; use super::pivot::{Axis2, HorzAlign}; -#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] pub enum Orientation { #[default] Portrait, Landscape, } -#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] pub enum ChartSize { #[default] AsIs, @@ -36,7 +37,7 @@ pub enum ChartSize { QuarterHeight, } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Paragraph { pub markup: String, pub horz_align: HorzAlign, @@ -51,9 +52,10 @@ impl Default for Paragraph { } } -#[derive(Clone, Debug, Default, PartialEq)] +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] pub struct Heading(pub Vec); +#[derive(Clone, Debug, Deserialize, Serialize)] pub struct Setup { pub initial_page_number: i32, @@ -72,8 +74,6 @@ pub struct Setup { /// Header and footer. pub headings: [Heading; 2], - - file_name: Option, } impl Default for Setup { @@ -86,7 +86,6 @@ impl Default for Setup { object_spacing: 12.0 / 72.0, chart_size: Default::default(), headings: Default::default(), - file_name: None, } } } diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index 0b91cbaeb6..37ad5929da 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -60,7 +60,7 @@ use enum_iterator::Sequence; use enum_map::{enum_map, Enum, EnumMap}; use look_xml::TableProperties; use quick_xml::{de::from_str, DeError}; -use serde::{de::Visitor, Deserialize}; +use serde::{de::Visitor, Deserialize, Serialize}; use smallstr::SmallString; use smallvec::SmallVec; use thiserror::Error as ThisError; @@ -68,9 +68,9 @@ use tlo::parse_tlo; use crate::{ data::{ByteString, Datum, EncodedString, RawString}, - variable::{VarType, Variable}, format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat}, settings::{Settings, Show}, + variable::{VarType, Variable}, }; pub mod output; @@ -864,7 +864,8 @@ pub struct CellStyle { pub margins: EnumMap, } -#[derive(Copy, Clone, Debug, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] pub enum HorzAlign { /// Right aligned. Right, @@ -1095,7 +1096,8 @@ impl Stroke { } /// An axis of a 2-dimensional table. -#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] pub enum Axis2 { X, Y, diff --git a/rust/pspp/src/output/pivot/test.rs b/rust/pspp/src/output/pivot/test.rs index cc532fad10..e66ee0f1c0 100644 --- a/rust/pspp/src/output/pivot/test.rs +++ b/rust/pspp/src/output/pivot/test.rs @@ -19,9 +19,9 @@ use std::{fmt::Display, fs::File, path::Path, sync::Arc}; use enum_map::EnumMap; use crate::output::{ - cairo::CairoDriver, + cairo::{CairoConfig, CairoDriver}, driver::Driver, - html::HtmlRenderer, + html::HtmlDriver, pivot::{ Area, Axis2, Border, BorderStyle, Class, Color, Dimension, Footnote, FootnoteMarkerPosition, FootnoteMarkerType, Footnotes, Group, HeadingRegion, LabelPosition, @@ -175,13 +175,13 @@ pub fn assert_rendering(name: &str, pivot_table: &PivotTable, expected: &str) { let item = Arc::new(Item::new(Details::Table(Box::new(pivot_table.clone())))); if let Some(dir) = std::env::var_os("PSPP_TEST_HTML_DIR") { let writer = File::create(Path::new(&dir).join(name).with_extension("html")).unwrap(); - HtmlRenderer::new(writer).write(&item); + HtmlDriver::for_writer(writer).write(&item); } let item = Arc::new(Item::new(Details::Table(Box::new(pivot_table.clone())))); if let Some(dir) = std::env::var_os("PSPP_TEST_PDF_DIR") { - let path = Path::new(&dir).join(name).with_extension("pdf"); - CairoDriver::new(path).write(&item); + let config = CairoConfig::new(Path::new(&dir).join(name).with_extension("pdf")); + CairoDriver::new(&config).unwrap().write(&item); } if let Some(dir) = std::env::var_os("PSPP_TEST_SPV_DIR") { diff --git a/rust/pspp/src/output/text.rs b/rust/pspp/src/output/text.rs index 7abe32851a..5ef2f67090 100644 --- a/rust/pspp/src/output/text.rs +++ b/rust/pspp/src/output/text.rs @@ -20,10 +20,12 @@ use std::{ fs::File, io::{BufWriter, Write as IoWrite}, ops::{Index, Range}, + path::PathBuf, sync::{Arc, LazyLock}, }; use enum_map::{enum_map, Enum, EnumMap}; +use serde::{Deserialize, Serialize}; use unicode_linebreak::{linebreaks, BreakOpportunity}; use unicode_width::UnicodeWidthStr; @@ -38,7 +40,8 @@ use super::{ Details, Item, }; -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] pub enum Boxes { Ascii, #[default] @@ -54,8 +57,18 @@ impl Boxes { } } -#[derive(Clone, Debug)] -pub struct TextRendererConfig { +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct TextConfig { + /// Output file name. + file: PathBuf, + + /// Renderer config. + #[serde(flatten)] + options: TextRendererOptions, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct TextRendererOptions { /// Enable bold and underline in output? pub emphasis: bool, @@ -66,7 +79,7 @@ pub struct TextRendererConfig { pub boxes: Boxes, } -impl Default for TextRendererConfig { +impl Default for TextRendererOptions { fn default() -> Self { Self { emphasis: false, @@ -95,12 +108,12 @@ pub struct TextRenderer { impl Default for TextRenderer { fn default() -> Self { - Self::new(&TextRendererConfig::default()) + Self::new(&TextRendererOptions::default()) } } impl TextRenderer { - pub fn new(config: &TextRendererConfig) -> Self { + pub fn new(config: &TextRendererOptions) -> Self { Self { emphasis: config.emphasis, width: config.width, @@ -358,11 +371,11 @@ pub struct TextDriver { } impl TextDriver { - pub fn new(file: File) -> TextDriver { - Self { - file: BufWriter::new(file), - renderer: TextRenderer::default(), - } + pub fn new(config: &TextConfig) -> std::io::Result { + Ok(Self { + file: BufWriter::new(File::create(&config.file)?), + renderer: TextRenderer::new(&config.options), + }) } } diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index f6e3233a13..7764b21c99 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -55,7 +55,7 @@ use crate::{ use anyhow::{anyhow, Error as AnyError}; use binrw::{BinRead, BinWrite, Endian}; use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; -use encoding_rs::Encoding; +use encoding_rs::{Encoding, UTF_8}; use indexmap::set::MutableValues; use itertools::Itertools; use serde::Serialize; @@ -597,6 +597,16 @@ impl SystemFile { pub fn into_parts(self) -> (Dictionary, Metadata, Cases) { (self.dictionary, self.metadata, self.cases) } + + /// Converts this system file reader into one encoded in UTF-8. + pub fn into_unicode(mut self) -> Self { + self.dictionary.codepage_to_unicode(); + Self { + dictionary: self.dictionary, + metadata: self.metadata, + cases: self.cases.into_unicode(), + } + } } /// Decoded records in a system file, arranged by type. @@ -1655,12 +1665,26 @@ impl MultipleResponseType { pub struct Cases { encoding: &'static Encoding, + into_unicode: bool, inner: RawCases, } impl Cases { pub fn new(encoding: &'static Encoding, inner: RawCases) -> Self { - Self { encoding, inner } + Self { + encoding, + inner, + into_unicode: false, + } + } + pub fn into_unicode(self) -> Self { + Self { + into_unicode: { + // We only need to convert if we're not starting out as UTF-8. + self.encoding != UTF_8 + }, + ..self + } } } @@ -1674,8 +1698,15 @@ impl Iterator for Cases { type Item = Result>>, raw::Error>; fn next(&mut self) -> Option { - self.inner - .next() - .map(|result| result.map(|case| case.with_encoding(self.encoding))) + self.inner.next().map(|result| { + result.map(|case| { + let case = case.with_encoding(self.encoding); + if self.into_unicode { + case.into_unicode() + } else { + case + } + }) + }) } } diff --git a/rust/pspp/src/sys/mod.rs b/rust/pspp/src/sys/mod.rs index f660f85892..1552c2e95c 100644 --- a/rust/pspp/src/sys/mod.rs +++ b/rust/pspp/src/sys/mod.rs @@ -39,7 +39,7 @@ pub mod sack; mod write; use serde::Serializer; -pub use write::{SysfileVersion, WriteOptions, Writer}; +pub use write::{SystemFileVersion, WriteOptions, Writer}; #[cfg(test)] mod test; diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index 16e54e6538..7f429799a7 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -38,7 +38,7 @@ use crate::{ /// System file format version. #[derive(Copy, Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] -pub enum SysfileVersion { +pub enum SystemFileVersion { /// Obsolete version. V2, @@ -54,7 +54,7 @@ pub struct WriteOptions { pub compression: Option, /// System file version to write. - pub sysfile_version: SysfileVersion, + pub sysfile_version: SystemFileVersion, /// Date and time to write to the file. pub timestamp: NaiveDateTime, @@ -101,7 +101,7 @@ impl WriteOptions { } /// Returns `self` with the system file version set to `sysfile_version`. - pub fn with_sysfile_version(self, sysfile_version: SysfileVersion) -> Self { + pub fn with_sysfile_version(self, sysfile_version: SystemFileVersion) -> Self { Self { sysfile_version, ..self @@ -587,7 +587,7 @@ where } fn write_long_variable_names(&mut self) -> Result<(), BinError> { - if self.options.sysfile_version == SysfileVersion::V2 { + if self.options.sysfile_version == SystemFileVersion::V2 { return Ok(()); } @@ -669,7 +669,7 @@ where } fn write_data_file_attributes(&mut self) -> Result<(), BinError> { - if self.options.sysfile_version != SysfileVersion::V3 { + if self.options.sysfile_version != SystemFileVersion::V3 { return Ok(()); } let mut s = String::new(); @@ -678,7 +678,7 @@ where } fn write_variable_attributes(&mut self) -> Result<(), BinError> { - if self.options.sysfile_version != SysfileVersion::V3 { + if self.options.sysfile_version != SystemFileVersion::V3 { return Ok(()); } let mut s = String::new(); -- 2.30.2