checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9"
dependencies = [
"enum-map-derive",
+ "serde",
]
[[package]]
bitflags = "2.5.0"
unicode-width = "0.2.0"
chardetng = "0.1.17"
-enum-map = "2.7.3"
+enum-map = { version = "2.7.3", features = ["serde"] }
flagset = "0.4.6"
pspp-derive = { version = "0.1.0", path = "../pspp-derive" }
either = "1.13.0"
}
pub fn codepage_to_unicode(&mut self) {
- match self {
- Datum::Number(_) => (),
- Datum::String(string) => string.codepage_to_unicode(),
+ if let Some(s) = self.as_string_mut() {
+ s.codepage_to_unicode();
}
}
pub fn without_encoding(self) -> Datum<ByteString> {
- match self {
- Datum::Number(number) => Datum::Number(number),
- Datum::String(string) => Datum::String(string.inner),
- }
+ self.map_string(|s| s.into_inner())
}
}
impl<'a> Datum<WithEncoding<ByteCow<'a>>> {
pub fn into_owned(self) -> Datum<WithEncoding<ByteString>> {
- match self {
- Self::Number(number) => Datum::Number(number),
- Self::String(string) => Datum::String(string.into_owned()),
- }
+ self.map_string(|s| s.into_owned())
}
}
T: EncodedString,
{
pub fn as_borrowed(&self) -> Datum<WithEncoding<ByteStr<'_>>> {
- match self {
- Datum::Number(number) => Datum::Number(*number),
- Datum::String(string) => Datum::String(string.as_encoded_byte_str()),
- }
+ self.as_ref().map_string(|s| s.as_encoded_byte_str())
}
pub fn cloned(&self) -> Datum<WithEncoding<ByteString>> {
- match self {
- Datum::Number(number) => Datum::Number(*number),
- Datum::String(string) => Datum::String(string.cloned()),
- }
+ self.as_ref().map_string(|s| s.cloned())
}
}
}
impl<B> Datum<B> {
+ pub fn as_ref(&self) -> Datum<&B> {
+ match self {
+ Datum::Number(number) => Datum::Number(*number),
+ Datum::String(string) => Datum::String(&string),
+ }
+ }
+
+ pub fn map_string<F, R>(self, f: F) -> Datum<R>
+ where
+ F: Fn(B) -> R,
+ {
+ match self {
+ Datum::Number(number) => Datum::Number(number),
+ Datum::String(string) => Datum::String(f(string)),
+ }
+ }
+
/// Constructs a new numerical [Datum] for the system-missing value.
pub const fn sysmis() -> Self {
Self::Number(None)
}
pub fn as_encoded(&self, encoding: &'static Encoding) -> Datum<WithEncoding<ByteStr<'_>>> {
- match self {
- Datum::Number(number) => Datum::Number(*number),
- Datum::String(raw_string) => Datum::String(raw_string.as_encoded(encoding)),
- }
+ self.as_ref().map_string(|s| s.as_encoded(encoding))
}
pub fn with_encoding(self, encoding: &'static Encoding) -> Datum<WithEncoding<T>> {
- match self {
- Datum::Number(number) => Datum::Number(number),
- Datum::String(string) => Datum::String(string.with_encoding(encoding)),
- }
+ self.map_string(|s| s.with_encoding(encoding))
}
}
}
}
+impl Case<Vec<Datum<ByteString>>> {
+ pub fn into_unicode(self) -> Self {
+ if self.encoding == UTF_8 {
+ self
+ } else {
+ Self {
+ encoding: UTF_8,
+ data: self
+ .data
+ .into_iter()
+ .map(|datum| {
+ datum.map_string(|s| {
+ let mut s = s.with_encoding(self.encoding);
+ s.codepage_to_unicode();
+ s.into_inner()
+ })
+ })
+ .collect(),
+ }
+ }
+ }
+}
+
impl<B> Serialize for Case<B>
where
B: Borrow<[Datum<ByteString>]>,
use chrono::{Datelike, Local};
use enum_iterator::{all, Sequence};
use enum_map::{Enum, EnumMap};
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
use thiserror::Error as ThisError;
use unicode_width::UnicodeWidthStr;
use crate::{
data::{ByteString, Datum},
- variable::{VarType, VarWidth},
sys::raw,
+ variable::{VarType, VarWidth},
};
mod display;
}
}
-#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Enum)]
+#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Enum, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
pub enum Decimal {
#[default]
Dot,
#[arg(short = 'O')]
output_format: Option<OutputFormat>,
- /// The encoding to use.
+ /// The encoding to use for reading the input file.
#[arg(short = 'e', long, value_parser = parse_encoding)]
encoding: Option<&'static Encoding>,
+ /// If true, convert to Unicode (UTF-8) encoding.
+ #[arg(long = "unicode")]
+ to_unicode: bool,
+
/// Password for decryption, with or without what SPSS calls "password encryption".
///
/// Specify only for an encrypted system file.
eprintln!("warning: {warning}");
}
- let (dictionary, _, cases) = ReadOptions::new(warn)
+ let mut system_file = ReadOptions::new(warn)
.with_encoding(self.encoding)
.with_password(self.password.clone())
- .open_file(&self.input)?
- .into_parts();
+ .open_file(&self.input)?;
+ if self.to_unicode {
+ system_file = system_file.into_unicode();
+ }
+ let (dictionary, _, cases) = system_file.into_parts();
// Take only the first `self.max_cases` cases.
let cases = cases.take(self.max_cases.unwrap_or(usize::MAX));
// You should have received a copy of the GNU General Public License along with
// this program. If not, see <http://www.gnu.org/licenses/>.
-use std::{borrow::Cow, path::Path, sync::Arc};
+use std::{
+ borrow::Cow,
+ path::{Path, PathBuf},
+ sync::Arc,
+};
use cairo::{Context, PdfSurface};
use enum_map::{enum_map, EnumMap};
use pango::SCALE;
+use serde::{Deserialize, Serialize};
use crate::output::{
cairo::{
use crate::output::pivot::Axis2;
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct CairoConfig {
+ /// Output file name.
+ pub path: PathBuf,
+
+ /// Page setup.
+ #[serde(flatten)]
+ pub setup: Setup,
+}
+
+impl CairoConfig {
+ pub fn new(path: impl AsRef<Path>) -> Self {
+ Self {
+ path: path.as_ref().to_path_buf(),
+ setup: Setup::default(),
+ }
+ }
+}
+
pub struct CairoDriver {
fsm_style: Arc<CairoFsmStyle>,
page_style: Arc<CairoPageStyle>,
}
impl CairoDriver {
- pub fn new(path: impl AsRef<Path>) -> CairoDriver {
+ pub fn new(config: &CairoConfig) -> cairo::Result<Self> {
fn scale(inches: f64) -> usize {
(inches * 72.0 * SCALE as f64).max(0.0).round() as usize
}
- let page_setup = Setup::default();
- let printable = page_setup.printable_size();
+ let printable = config.setup.printable_size();
let page_style = CairoPageStyle {
margins: EnumMap::from_fn(|axis| {
[
- scale(page_setup.margins[axis][0]),
- scale(page_setup.margins[axis][1]),
+ scale(config.setup.margins[axis][0]),
+ scale(config.setup.margins[axis][1]),
]
}),
- headings: page_setup.headings.clone(),
- initial_page_number: page_setup.initial_page_number,
+ headings: config.setup.headings.clone(),
+ initial_page_number: config.setup.initial_page_number,
};
let size = Coord2::new(scale(printable[Axis2::X]), scale(printable[Axis2::Y]));
let font = FontStyle {
font,
fg: Color::BLACK,
use_system_colors: false,
- object_spacing: scale(page_setup.object_spacing),
+ object_spacing: scale(config.setup.object_spacing),
font_resolution: 72.0,
};
let surface = PdfSurface::new(
- page_setup.paper[Axis2::X] * 72.0,
- page_setup.paper[Axis2::Y] * 72.0,
- path,
- )
- .unwrap();
- Self {
+ config.setup.paper[Axis2::X] * 72.0,
+ config.setup.paper[Axis2::Y] * 72.0,
+ &config.path,
+ )?;
+ Ok(Self {
fsm_style: Arc::new(fsm_style),
page_style: Arc::new(page_style),
pager: None,
surface,
- }
+ })
}
}
pub mod fsm;
pub mod pager;
-pub use driver::CairoDriver;
+pub use driver::{CairoConfig, CairoDriver};
/// Conversion from 1/96" units ("pixels") to Cairo/Pango units.
fn px_to_xr(x: usize) -> usize {
#[cfg(test)]
mod test {
- use crate::output::cairo::CairoDriver;
+ use crate::output::cairo::{CairoConfig, CairoDriver};
#[test]
fn create() {
- CairoDriver::new("test.pdf");
+ CairoDriver::new(&CairoConfig::new("test.pdf")).unwrap();
}
}
fmt::Display,
fs::File,
io::{Error, Write},
+ path::PathBuf,
sync::Arc,
};
+use serde::{Deserialize, Serialize};
+
use crate::output::pivot::Coord2;
use super::{driver::Driver, pivot::PivotTable, table::Table, Details, Item, TextType};
-struct CsvDriver {
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct CsvConfig {
+ path: PathBuf,
+ #[serde(flatten)]
+ options: CsvOptions,
+}
+
+pub struct CsvDriver {
file: File,
options: CsvOptions,
n_items: usize,
}
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
struct CsvOptions {
quote: u8,
delimiter: u8,
}
impl CsvDriver {
- pub fn new(file: File) -> Self {
- Self {
- file,
- options: CsvOptions::default(),
+ pub fn new(config: &CsvConfig) -> std::io::Result<Self> {
+ Ok(Self {
+ file: File::create(&config.path)?,
+ options: config.options.clone(),
n_items: 0,
- }
+ })
}
fn start_item(&mut self) {
use std::{borrow::Cow, sync::Arc};
+use serde::{Deserialize, Serialize};
+
+use crate::output::{
+ cairo::{CairoConfig, CairoDriver},
+ csv::{CsvConfig, CsvDriver},
+ html::{HtmlConfig, HtmlDriver},
+ text::{TextConfig, TextDriver},
+};
+
use super::{page::Setup, Item};
+#[derive(Clone, Debug, Serialize, Deserialize)]
+#[serde(tag = "driver", rename_all = "snake_case")]
+pub enum Config {
+ Text(TextConfig),
+ Pdf(CairoConfig),
+ Html(HtmlConfig),
+ Csv(CsvConfig),
+}
+
+impl dyn Driver {
+ fn new(config: &Config) -> anyhow::Result<Box<Self>> {
+ match config {
+ Config::Text(text_config) => Ok(Box::new(TextDriver::new(text_config)?)),
+ Config::Pdf(cairo_config) => Ok(Box::new(CairoDriver::new(cairo_config)?)),
+ Config::Html(html_config) => Ok(Box::new(HtmlDriver::new(html_config)?)),
+ Config::Csv(csv_config) => Ok(Box::new(CsvDriver::new(csv_config)?)),
+ }
+ }
+}
+
// An output driver.
pub trait Driver {
fn name(&self) -> Cow<'static, str>;
use std::{
borrow::Cow,
fmt::{Display, Write as _},
+ fs::File,
io::Write,
+ path::PathBuf,
sync::Arc,
};
+use serde::{Deserialize, Serialize};
use smallstr::SmallString;
use crate::output::{
Details, Item,
};
-pub struct HtmlRenderer<W> {
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct HtmlConfig {
+ file: PathBuf,
+}
+
+pub struct HtmlDriver<W> {
writer: W,
fg: Color,
bg: Color,
}
}
-impl<W> HtmlRenderer<W>
+impl HtmlDriver<File> {
+ pub fn new(config: &HtmlConfig) -> std::io::Result<Self> {
+ Ok(Self::for_writer(File::create(&config.file)?))
+ }
+}
+
+impl<W> HtmlDriver<W>
where
W: Write,
{
- pub fn new(mut writer: W) -> Self {
+ pub fn for_writer(mut writer: W) -> Self {
let _ = put_header(&mut writer);
Self {
fg: Color::BLACK,
<body>
"#;
-impl<W> Driver for HtmlRenderer<W>
+impl<W> Driver for HtmlDriver<W>
where
W: Write,
{
// You should have received a copy of the GNU General Public License along with
// this program. If not, see <http://www.gnu.org/licenses/>.
-use std::path::PathBuf;
-
use enum_map::{enum_map, EnumMap};
+use serde::{Deserialize, Serialize};
use super::pivot::{Axis2, HorzAlign};
-#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)]
+#[serde(rename_all = "snake_case")]
pub enum Orientation {
#[default]
Portrait,
Landscape,
}
-#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
pub enum ChartSize {
#[default]
AsIs,
QuarterHeight,
}
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct Paragraph {
pub markup: String,
pub horz_align: HorzAlign,
}
}
-#[derive(Clone, Debug, Default, PartialEq)]
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub struct Heading(pub Vec<Paragraph>);
+#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Setup {
pub initial_page_number: i32,
/// Header and footer.
pub headings: [Heading; 2],
-
- file_name: Option<PathBuf>,
}
impl Default for Setup {
object_spacing: 12.0 / 72.0,
chart_size: Default::default(),
headings: Default::default(),
- file_name: None,
}
}
}
use enum_map::{enum_map, Enum, EnumMap};
use look_xml::TableProperties;
use quick_xml::{de::from_str, DeError};
-use serde::{de::Visitor, Deserialize};
+use serde::{de::Visitor, Deserialize, Serialize};
use smallstr::SmallString;
use smallvec::SmallVec;
use thiserror::Error as ThisError;
use crate::{
data::{ByteString, Datum, EncodedString, RawString},
- variable::{VarType, Variable},
format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat},
settings::{Settings, Show},
+ variable::{VarType, Variable},
};
pub mod output;
pub margins: EnumMap<Axis2, [i32; 2]>,
}
-#[derive(Copy, Clone, Debug, PartialEq)]
+#[derive(Copy, Clone, Debug, PartialEq, Deserialize, Serialize)]
+#[serde(rename_all = "snake_case")]
pub enum HorzAlign {
/// Right aligned.
Right,
}
/// An axis of a 2-dimensional table.
-#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
pub enum Axis2 {
X,
Y,
use enum_map::EnumMap;
use crate::output::{
- cairo::CairoDriver,
+ cairo::{CairoConfig, CairoDriver},
driver::Driver,
- html::HtmlRenderer,
+ html::HtmlDriver,
pivot::{
Area, Axis2, Border, BorderStyle, Class, Color, Dimension, Footnote,
FootnoteMarkerPosition, FootnoteMarkerType, Footnotes, Group, HeadingRegion, LabelPosition,
let item = Arc::new(Item::new(Details::Table(Box::new(pivot_table.clone()))));
if let Some(dir) = std::env::var_os("PSPP_TEST_HTML_DIR") {
let writer = File::create(Path::new(&dir).join(name).with_extension("html")).unwrap();
- HtmlRenderer::new(writer).write(&item);
+ HtmlDriver::for_writer(writer).write(&item);
}
let item = Arc::new(Item::new(Details::Table(Box::new(pivot_table.clone()))));
if let Some(dir) = std::env::var_os("PSPP_TEST_PDF_DIR") {
- let path = Path::new(&dir).join(name).with_extension("pdf");
- CairoDriver::new(path).write(&item);
+ let config = CairoConfig::new(Path::new(&dir).join(name).with_extension("pdf"));
+ CairoDriver::new(&config).unwrap().write(&item);
}
if let Some(dir) = std::env::var_os("PSPP_TEST_SPV_DIR") {
fs::File,
io::{BufWriter, Write as IoWrite},
ops::{Index, Range},
+ path::PathBuf,
sync::{Arc, LazyLock},
};
use enum_map::{enum_map, Enum, EnumMap};
+use serde::{Deserialize, Serialize};
use unicode_linebreak::{linebreaks, BreakOpportunity};
use unicode_width::UnicodeWidthStr;
Details, Item,
};
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+#[serde(rename_all = "snake_case")]
pub enum Boxes {
Ascii,
#[default]
}
}
-#[derive(Clone, Debug)]
-pub struct TextRendererConfig {
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct TextConfig {
+ /// Output file name.
+ file: PathBuf,
+
+ /// Renderer config.
+ #[serde(flatten)]
+ options: TextRendererOptions,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct TextRendererOptions {
/// Enable bold and underline in output?
pub emphasis: bool,
pub boxes: Boxes,
}
-impl Default for TextRendererConfig {
+impl Default for TextRendererOptions {
fn default() -> Self {
Self {
emphasis: false,
impl Default for TextRenderer {
fn default() -> Self {
- Self::new(&TextRendererConfig::default())
+ Self::new(&TextRendererOptions::default())
}
}
impl TextRenderer {
- pub fn new(config: &TextRendererConfig) -> Self {
+ pub fn new(config: &TextRendererOptions) -> Self {
Self {
emphasis: config.emphasis,
width: config.width,
}
impl TextDriver {
- pub fn new(file: File) -> TextDriver {
- Self {
- file: BufWriter::new(file),
- renderer: TextRenderer::default(),
- }
+ pub fn new(config: &TextConfig) -> std::io::Result<TextDriver> {
+ Ok(Self {
+ file: BufWriter::new(File::create(&config.file)?),
+ renderer: TextRenderer::new(&config.options),
+ })
}
}
use anyhow::{anyhow, Error as AnyError};
use binrw::{BinRead, BinWrite, Endian};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
-use encoding_rs::Encoding;
+use encoding_rs::{Encoding, UTF_8};
use indexmap::set::MutableValues;
use itertools::Itertools;
use serde::Serialize;
pub fn into_parts(self) -> (Dictionary, Metadata, Cases) {
(self.dictionary, self.metadata, self.cases)
}
+
+ /// Converts this system file reader into one encoded in UTF-8.
+ pub fn into_unicode(mut self) -> Self {
+ self.dictionary.codepage_to_unicode();
+ Self {
+ dictionary: self.dictionary,
+ metadata: self.metadata,
+ cases: self.cases.into_unicode(),
+ }
+ }
}
/// Decoded records in a system file, arranged by type.
pub struct Cases {
encoding: &'static Encoding,
+ into_unicode: bool,
inner: RawCases,
}
impl Cases {
pub fn new(encoding: &'static Encoding, inner: RawCases) -> Self {
- Self { encoding, inner }
+ Self {
+ encoding,
+ inner,
+ into_unicode: false,
+ }
+ }
+ pub fn into_unicode(self) -> Self {
+ Self {
+ into_unicode: {
+ // We only need to convert if we're not starting out as UTF-8.
+ self.encoding != UTF_8
+ },
+ ..self
+ }
}
}
type Item = Result<Case<Vec<Datum<ByteString>>>, raw::Error>;
fn next(&mut self) -> Option<Self::Item> {
- self.inner
- .next()
- .map(|result| result.map(|case| case.with_encoding(self.encoding)))
+ self.inner.next().map(|result| {
+ result.map(|case| {
+ let case = case.with_encoding(self.encoding);
+ if self.into_unicode {
+ case.into_unicode()
+ } else {
+ case
+ }
+ })
+ })
}
}
mod write;
use serde::Serializer;
-pub use write::{SysfileVersion, WriteOptions, Writer};
+pub use write::{SystemFileVersion, WriteOptions, Writer};
#[cfg(test)]
mod test;
/// System file format version.
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
-pub enum SysfileVersion {
+pub enum SystemFileVersion {
/// Obsolete version.
V2,
pub compression: Option<Compression>,
/// System file version to write.
- pub sysfile_version: SysfileVersion,
+ pub sysfile_version: SystemFileVersion,
/// Date and time to write to the file.
pub timestamp: NaiveDateTime,
}
/// Returns `self` with the system file version set to `sysfile_version`.
- pub fn with_sysfile_version(self, sysfile_version: SysfileVersion) -> Self {
+ pub fn with_sysfile_version(self, sysfile_version: SystemFileVersion) -> Self {
Self {
sysfile_version,
..self
}
fn write_long_variable_names(&mut self) -> Result<(), BinError> {
- if self.options.sysfile_version == SysfileVersion::V2 {
+ if self.options.sysfile_version == SystemFileVersion::V2 {
return Ok(());
}
}
fn write_data_file_attributes(&mut self) -> Result<(), BinError> {
- if self.options.sysfile_version != SysfileVersion::V3 {
+ if self.options.sysfile_version != SystemFileVersion::V3 {
return Ok(());
}
let mut s = String::new();
}
fn write_variable_attributes(&mut self) -> Result<(), BinError> {
- if self.options.sysfile_version != SysfileVersion::V3 {
+ if self.options.sysfile_version != SystemFileVersion::V3 {
return Ok(());
}
let mut s = String::new();