From: Ben Pfaff Date: Fri, 28 Nov 2025 19:45:54 +0000 (-0800) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Frust;p=pspp work --- diff --git a/rust/doc/src/invoking/output.md b/rust/doc/src/invoking/output.md index 47c9ab2e9f..60a1779d45 100644 --- a/rust/doc/src/invoking/output.md +++ b/rust/doc/src/invoking/output.md @@ -190,7 +190,7 @@ PSPP can produce output in [comma-separated value] format. CSV output is encoded in UTF-8. -This driver has the following options: +This driver has the following general options: * `quote = ""` A single character for quoting multi-line fields and fields that @@ -199,6 +199,32 @@ This driver has the following options: * `delimiter = ""` A single character to separate fields. The default is `,`. +The following additional options only affect output written by [`pspp +convert`](pspp-convert.md): + +* `var_names = false` + By default, `pspp convert` writes the variable names as the first + line of output. With this option, `pspp convert` omits this line. + +* `recode = true` + By default, `pspp convert` writes user-missing values to CSV output + files as their regular values. This options makes `pspp convert` + write them the same way as system-missing values (as a single + space). + +* `labels = true` + By default, `pspp convert` writes variables' values to CSV output + files. With this option, `pspp convert` writes value labels. + +* `print_formats = true` + By default, `pspp convert` writes numeric variables as plain + numbers. This option makes `pspp convert` honor variables' print + formats. + +* `decimal = ""` + This option sets the character used as a decimal point in output. + The default is `.`. + [comma-separated value]: https://en.wikipedia.org/wiki/Comma-separated_values # JSON Output (`.json`) @@ -220,3 +246,32 @@ This driver has the following options: * `page_setup = ` Sets the page size, margins, and other parameters. `` has the same form documented [for PDF format](#page_setup). + +# System File Output (`.sav`) + +PSPP can produce output in the form of SPSS system files, which +usually have a `.sav` extension. + +This driver has the following options: + +* `compression = ""` + Sets the kind of compression used for writing data in the system + file. `` must be one of the following: + + - `simple`: Uses a simple form of compression that saves space + writing small integer values and string segments that are all + spaces. All versions of SPSS support simple compression. + + - `zlib`: Uses more advanced compression that saves space in more + general cases. Only SPSS 21 and later can read files written with + `zlib` compression. + +# Portable File Output (`.por`) + +PSPP can produce output in the form of SPSS portable files, which +usually have a `.por` extension. + +> The portable file format is mostly obsolete. The "system file" +> or .sav format should be used for writing new data files. + +This driver has no options. diff --git a/rust/doc/src/invoking/pspp-convert.md b/rust/doc/src/invoking/pspp-convert.md index 02e24cd43d..109d7aa5bc 100644 --- a/rust/doc/src/invoking/pspp-convert.md +++ b/rust/doc/src/invoking/pspp-convert.md @@ -20,30 +20,6 @@ SPSS Viewer Files] for using it with SPSS Viewer (SPV) input files. `pspp convert` accepts the following options: -* `-f ` - Specifies the format to use for output. `` may be one of - the following: - - * `csv` - `txt` - Comma-separated value. Each value is formatted according to its - variable's print format. The first line in the file contains - variable names. - - * `sav` - `sys` - SPSS system file. - - * `por` - SPSS portable file. - - > The portable file format is mostly obsolete. The "system file" - > or .sav format should be used for writing new data files. - - If `-f` is not given, the default output format is chosen using the - output file's extension. If there is no output file name, the - default output format is CSV. - * `-e ` `--encoding=` Sets the character encoding used to read text strings in the input @@ -60,6 +36,12 @@ SPSS Viewer Files] for using it with SPSS Viewer (SPV) input files. [Encoding Standard]: https://encoding.spec.whatwg.org/#names-and-labels +* `--unicode` + For input from a system file, converts from the file's encoding to + Unicode (UTF-8) encoding before writing the output. If the input + was not already in Unicode, then this causes string variables to be + tripled in width. + * `-c ` `--cases=` By default, all cases in the input are copied to the output. @@ -78,59 +60,13 @@ SPSS Viewer Files] for using it with SPSS Viewer (SPV) input files. > ⚠️ The password (and other command-line options) may be visible to > other users on multiuser systems. -## System File Output Options - -These options only affect output to SPSS system files. - -* `--unicode` - Writes system file output with Unicode (UTF-8) encoding. If the - input was not already in Unicode, then this causes string variables - to be tripled in width. - -* `--compression ` - Writes data in the system file with the specified format of - compression: - - - `simple`: A simple form of compression that saves space writing - small integer values and string segments that are all spaces. All - versions of SPSS support simple compression. - - - `zlib`: More advanced compression that saves space in more general - cases. Only SPSS 21 and later can read files written with `zlib` - compression. - -## CSV Output Options - -These options only affect output to CSV files. - -* `--no-var-names` - By default, `pspp convert` writes the variable names as the first - line of output. With this option, `pspp convert` omits this line. - -* `--recode` - By default, `pspp convert` writes user-missing values to CSV output - files as their regular values. With this option, `pspp convert` - recodes them to system-missing values (which are written as a - single space). - -* `--labels` - By default, `pspp convert` writes variables' values to CSV output - files. With this option, `pspp convert` writes value labels. - -* `--print-formats` - By default, `pspp convert` writes numeric variables as plain - numbers. This option makes `pspp convert` honor variables' print - formats. - -* `--decimal=DECIMAL` - This option sets the character used as a decimal point in output. - The default is `.`. Only ASCII characters may be used. +* `-o ` + Adds `` to the output engine configuration. See + [Output Drivers](output.md) for information on how to configure + output. -* `--delimiter=DELIMITER` - This option sets the character used to separate fields in output. - The default is `,`, unless the decimal point is `,`, in which case - `;` is used. Only ASCII characters may be used. + If no output driver is specified, the default output format is + chosen based on `[OUTPUT]`'s extension. If `[OUTPUT]` is omitted, + output is written to stdout in [JSON](output.md#json-output-json) + format. -* `--qualifier=QUALIFIER` - The option sets the character used to quote fields that contain the - delimiter. The default is `"`. Only ASCII characters may be used. diff --git a/rust/doc/src/invoking/pspp-show-pc.md b/rust/doc/src/invoking/pspp-show-pc.md index dac20f816e..a933acee01 100644 --- a/rust/doc/src/invoking/pspp-show-pc.md +++ b/rust/doc/src/invoking/pspp-show-pc.md @@ -45,7 +45,7 @@ The following ``s are available: ## Options -The following options affect how `pspp show-pc` reads ``: +`pspp show-pc` accepts the following options: * `--data []` For mode `dictionary`, and `encodings`, this instructs `pspp @@ -53,8 +53,6 @@ The following options affect how `pspp show-pc` reads ``: then that sets a limit on the number of cases to read. Without this option, PSPP will not read any cases. -The following options affect how `pspp show-pc` writes its output: - * `-o ` Adds `` to the output engine configuration. See [Output Drivers](output.md) for information on how to configure diff --git a/rust/doc/src/invoking/pspp-show-por.md b/rust/doc/src/invoking/pspp-show-por.md index d187187441..83e0a3d80e 100644 --- a/rust/doc/src/invoking/pspp-show-por.md +++ b/rust/doc/src/invoking/pspp-show-por.md @@ -85,7 +85,7 @@ The following ``s are available: ## Options -The following options affect how `pspp show-por` reads ``: +`pspp show-por` accepts the following options: * `--data []` For mode `dictionary`, and `encodings`, this instructs `pspp @@ -93,8 +93,6 @@ The following options affect how `pspp show-por` reads ``: then that sets a limit on the number of cases to read. Without this option, PSPP will not read any cases. -The following options affect how `pspp show-por` writes its output: - * `-o ` Adds `` to the output engine configuration. See [Output Drivers](output.md) for information on how to configure diff --git a/rust/doc/src/invoking/pspp-show.md b/rust/doc/src/invoking/pspp-show.md index 5d8bcbd915..db8c3fa5ef 100644 --- a/rust/doc/src/invoking/pspp-show.md +++ b/rust/doc/src/invoking/pspp-show.md @@ -60,7 +60,7 @@ The following ``s are available: ## Options -The following options affect how `pspp show` reads ``: +`pspp show` accepts the following options: * `--encoding ` For modes `decoded` and `dictionary`, this reads the input file @@ -81,8 +81,6 @@ The following options affect how `pspp show` reads ``: that sets a limit on the number of cases to read. Without this option, PSPP will not read any cases. -The following options affect how `pspp show` writes its output: - * `-o ` Adds `` to the output engine configuration. See [Output Drivers](output.md) for information on how to configure diff --git a/rust/pspp/src/cli/convert.rs b/rust/pspp/src/cli/convert.rs index 74319fd36f..77f49ab345 100644 --- a/rust/pspp/src/cli/convert.rs +++ b/rust/pspp/src/cli/convert.rs @@ -14,27 +14,18 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -use std::{ - fs::File, - io::{Write, stdout}, - path::{Path, PathBuf}, -}; +use std::path::PathBuf; -use anyhow::{Error as AnyError, Result, anyhow, bail}; -use chrono::{Datelike, NaiveTime, Timelike}; -use clap::{Args, ValueEnum}; -use csv::Writer; +use anyhow::{Error as AnyError, Result, bail}; +use clap::Args; use encoding_rs::Encoding; use pspp::{ - calendar::calendar_offset_to_gregorian, - data::{ByteString, Case, Datum, WithEncoding}, + data::{ByteString, Case, Datum}, file::FileType, - format::{DisplayPlain, Type}, + output::drivers::Driver, pc::PcFile, por::PortableFile, - sys::{ReadOptions, WriteOptions, raw::records::Compression}, - util::ToSmallString, - variable::Variable, + sys::ReadOptions, }; use super::parse_encoding; @@ -48,11 +39,6 @@ pub struct Convert { /// Output file name (if omitted, output is written to stdout). output: Option, - /// Format for output file (if omitted, the intended format is inferred - /// based on file extension). - #[arg(short = 'O')] - output_format: Option, - /// The encoding to use for reading the input file. #[arg(short = 'e', long, value_parser = parse_encoding)] encoding: Option<&'static Encoding>, @@ -71,177 +57,6 @@ pub struct Convert { #[arg(short = 'c', long = "cases")] max_cases: Option, - #[command(flatten, next_help_heading = "Options for CSV output")] - csv_options: CsvOptions, - - #[command(flatten, next_help_heading = "Options for system file output")] - sys_options: SysOptions, -} - -#[derive(Args, Clone, Debug)] -struct CsvOptions { - /// Omit writing variable names as the first line of output. - #[arg(long)] - no_var_names: bool, - - /// Writes user-missing values like system-missing values. Otherwise, - /// user-missing values are written the same way as non-missing values. - #[arg(long)] - recode: bool, - - /// Write value labels instead of values. - #[arg(long)] - labels: bool, - - /// Use print formats for numeric variables. - #[arg(long)] - print_formats: bool, - - /// Decimal point. - #[arg(long, default_value_t = '.')] - decimal: char, - - /// Delimiter. - /// - /// The default is `,` unless that would be the same as the decimal point, - /// in which case `;` is the default. - #[arg(long)] - delimiter: Option, - - /// Character used to quote the delimiter. - #[arg(long, default_value_t = '"')] - qualifier: char, -} - -impl CsvOptions { - fn write_field( - &self, - datum: &Datum>, - variable: &Variable, - writer: &mut Writer, - ) -> csv::Result<()> - where - W: Write, - { - if self.labels - && let Some(label) = variable.value_labels.get(datum) - { - writer.write_field(label) - } else if datum.is_sysmis() { - writer.write_field(" ") - } else if self.print_formats || datum.is_string() { - writer.write_field( - datum - .display(variable.print_format) - .with_trimming() - .to_small_string::<64>(), - ) - } else { - let number = datum.as_number().unwrap().unwrap(); - match variable.print_format.type_() { - Type::F - | Type::Comma - | Type::Dot - | Type::Dollar - | Type::Pct - | Type::E - | Type::CC(_) - | Type::N - | Type::Z - | Type::P - | Type::PK - | Type::IB - | Type::PIB - | Type::PIBHex - | Type::RB - | Type::RBHex - | Type::WkDay - | Type::Month => writer.write_field( - number - .display_plain() - .with_decimal(self.decimal) - .to_small_string::<64>(), - ), - - Type::Date - | Type::ADate - | Type::EDate - | Type::JDate - | Type::SDate - | Type::QYr - | Type::MoYr - | Type::WkYr => { - if number >= 0.0 - && let Some(date) = - calendar_offset_to_gregorian(number / 60.0 / 60.0 / 24.0) - { - writer.write_field( - format_args!( - "{:02}/{:02}/{:04}", - date.month(), - date.day(), - date.year() - ) - .to_small_string::<64>(), - ) - } else { - writer.write_field(" ") - } - } - - Type::DateTime | Type::YmdHms => { - if number >= 0.0 - && let Some(date) = - calendar_offset_to_gregorian(number / 60.0 / 60.0 / 24.0) - && let Some(time) = NaiveTime::from_num_seconds_from_midnight_opt( - (number % (60.0 * 60.0 * 24.0)) as u32, - 0, - ) - { - writer.write_field( - format_args!( - "{:02}/{:02}/{:04} {:02}:{:02}:{:02}", - date.month(), - date.day(), - date.year(), - time.hour(), - time.minute(), - time.second() - ) - .to_small_string::<64>(), - ) - } else { - writer.write_field(" ") - } - } - - Type::MTime | Type::Time | Type::DTime => { - if let Some(time) = - NaiveTime::from_num_seconds_from_midnight_opt(number.abs() as u32, 0) - { - writer.write_field( - format_args!( - "{}{:02}:{:02}:{:02}", - if number.is_sign_negative() { "-" } else { "" }, - time.hour(), - time.minute(), - time.second() - ) - .to_small_string::<64>(), - ) - } else { - writer.write_field(" ") - } - } - - Type::A | Type::AHex => unreachable!(), - } - } - } -} - -#[derive(Args, Clone, Debug)] -struct SysOptions { /// Write the output file with Unicode (UTF-8) encoding. /// /// If the input was not already encoded in Unicode, this triples the width @@ -249,57 +64,13 @@ struct SysOptions { #[arg(long = "unicode")] to_unicode: bool, - /// How to compress data in the system file. - #[arg(long, default_value = "simple")] - compression: Option, -} - -/// Output file format. -#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)] -enum OutputFormat { - /// Comma-separated values using each variable's print format (variable - /// names are written as the first line) - #[value(alias("txt"))] - Csv, - - /// System file - #[value(alias("sys"))] - Sav, - - /// Portable file - Por, -} - -impl TryFrom<&Path> for OutputFormat { - type Error = AnyError; - - fn try_from(value: &Path) -> std::result::Result { - let extension = value.extension().unwrap_or_default(); - if extension.eq_ignore_ascii_case("csv") || extension.eq_ignore_ascii_case("txt") { - Ok(OutputFormat::Csv) - } else if extension.eq_ignore_ascii_case("sav") || extension.eq_ignore_ascii_case("sys") { - Ok(OutputFormat::Sav) - } else if extension.eq_ignore_ascii_case("por") { - Ok(OutputFormat::Por) - } else { - Err(anyhow!( - "Unknown output file extension '{}'", - extension.display() - )) - } - } + /// Output driver configuration options. + #[arg(short = 'o', help_heading = "Output options")] + output_options: Vec, } impl Convert { pub fn run(self) -> Result<()> { - let output_format = match self.output_format { - Some(format) => format, - None => match &self.output { - Some(output) => output.as_path().try_into()?, - _ => OutputFormat::Csv, - }, - }; - let (dictionary, cases) = match FileType::from_file(&self.input)? { Some(FileType::System { .. }) => { fn warn(warning: anyhow::Error) { @@ -310,7 +81,7 @@ impl Convert { .with_encoding(self.encoding) .with_password(self.password.clone()) .open_file(&self.input)?; - if output_format == OutputFormat::Sav && self.sys_options.to_unicode { + if self.to_unicode { system_file = system_file.into_unicode(); } let (dictionary, _, cases) = system_file.into_parts(); @@ -352,56 +123,15 @@ impl Convert { // Take only the first `self.max_cases` cases. let cases = cases.take(self.max_cases.unwrap_or(usize::MAX)); - match output_format { - OutputFormat::Csv => { - let writer = match self.output { - Some(path) => Box::new(File::create(path)?) as Box, - None => Box::new(stdout()), - }; - let decimal: u8 = self.csv_options.decimal.try_into()?; - let delimiter: u8 = match self.csv_options.delimiter { - Some(delimiter) => delimiter.try_into()?, - None if decimal != b',' => b',', - None => b';', - }; - let qualifier: u8 = self.csv_options.qualifier.try_into()?; - let mut output = csv::WriterBuilder::new() - .delimiter(delimiter) - .quote(qualifier) - .from_writer(writer); - if !self.csv_options.no_var_names { - output - .write_record(dictionary.variables.iter().map(|var| var.name.as_str()))?; - } + let mut output = + ::from_options(self.output.as_ref(), &self.output_options, "csv")?; + if !output.can_write_data_file() { + bail!("Can't write data output to {} driver.", output.name()); + } - for case in cases { - for (datum, variable) in case?.into_iter().zip(dictionary.variables.iter()) { - self.csv_options - .write_field(&datum, variable, &mut output)?; - } - output.write_record(None::<&[u8]>)?; - } - } - OutputFormat::Sav => { - let Some(output) = &self.output else { - bail!("output file name must be specified for output to a system file") - }; - let mut output = WriteOptions::new() - .with_compression(self.sys_options.compression) - .write_file(&dictionary, output)?; - for case in cases { - output.write_case(case?)?; - } - } - OutputFormat::Por => { - let Some(output) = &self.output else { - bail!("output file name must be specified for output to a portable file") - }; - let mut output = pspp::por::WriteOptions::new().write_file(&dictionary, output)?; - for case in cases { - output.write_case(case?)?; - } - } + let mut writer = output.write_data_file(&dictionary)?.unwrap(); + for case in cases { + writer.write_case(case?)?; } Ok(()) } diff --git a/rust/pspp/src/output/drivers.rs b/rust/pspp/src/output/drivers.rs index 82f908b6e4..4ac3d0561f 100644 --- a/rust/pspp/src/output/drivers.rs +++ b/rust/pspp/src/output/drivers.rs @@ -17,9 +17,13 @@ use std::{borrow::Cow, fmt::Write, path::Path, sync::Arc}; use anyhow::{anyhow, bail}; -use clap::ValueEnum; use serde::{Deserialize, Serialize}; +use crate::{ + data::{ByteString, Case, Datum}, + dictionary::Dictionary, +}; + use super::{Item, page::PageSetup}; pub mod cairo; @@ -34,6 +38,12 @@ use html::{HtmlConfig, HtmlDriver}; pub mod json; use json::{JsonConfig, JsonDriver}; +pub mod por; +use por::{PorConfig, PorDriver}; + +pub mod sav; +use sav::{SavConfig, SavDriver}; + pub mod spv; use spv::{SpvConfig, SpvDriver}; @@ -50,10 +60,23 @@ pub trait Driver { false } - fn serialize(&mut self, _item: &dyn erased_serde::Serialize) { + fn serialize(&mut self, item: &dyn erased_serde::Serialize) { + let _ = item; unreachable!("This driver does not support serialization"); } + fn can_write_data_file(&self) -> bool { + false + } + + fn write_data_file<'a>( + &'a mut self, + dictionary: &'a Dictionary, + ) -> anyhow::Result>> { + let _ = dictionary; + Ok(None) + } + /// Returns false if the driver doesn't support page setup. fn setup(&mut self, page_setup: &PageSetup) -> bool { let _ = page_setup; @@ -107,6 +130,29 @@ impl Driver for Box { fn handles_groups(&self) -> bool { (**self).handles_groups() } + + fn can_serialize(&self) -> bool { + (**self).can_serialize() + } + + fn serialize(&mut self, item: &dyn erased_serde::Serialize) { + (**self).serialize(item); + } + + fn can_write_data_file(&self) -> bool { + (**self).can_write_data_file() + } + + fn write_data_file<'a>( + &'a mut self, + dictionary: &'a Dictionary, + ) -> anyhow::Result>> { + (**self).write_data_file(dictionary) + } +} + +pub trait CaseWriter { + fn write_case(&mut self, case: Case>>) -> anyhow::Result<()>; } #[derive(Clone, Debug, Serialize, Deserialize)] @@ -117,20 +163,11 @@ pub enum Config { Html(HtmlConfig), Json(JsonConfig), Csv(CsvConfig), + Por(PorConfig), + Sav(SavConfig), Spv(SpvConfig), } -#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, ValueEnum)] -#[serde(rename_all = "snake_case")] -pub enum DriverType { - Text, - Pdf, - Html, - Csv, - Json, - Spv, -} - impl dyn Driver { /// Creates a driver for writing to `file`. If `file` is `None`, then the /// driver will write to stdout. `options` may specify options to pass to @@ -184,12 +221,14 @@ impl dyn Driver { pub fn new(config: &Config) -> anyhow::Result> { match config { - Config::Text(text_config) => Ok(Box::new(TextDriver::new(text_config)?)), - Config::Pdf(cairo_config) => Ok(Box::new(CairoDriver::new(cairo_config)?)), - Config::Html(html_config) => Ok(Box::new(HtmlDriver::new(html_config)?)), Config::Csv(csv_config) => Ok(Box::new(CsvDriver::new(csv_config)?)), + Config::Html(html_config) => Ok(Box::new(HtmlDriver::new(html_config)?)), Config::Json(json_config) => Ok(Box::new(JsonDriver::new(json_config)?)), + Config::Pdf(cairo_config) => Ok(Box::new(CairoDriver::new(cairo_config)?)), + Config::Por(por_config) => Ok(Box::new(PorDriver::new(por_config)?)), + Config::Sav(sav_config) => Ok(Box::new(SavDriver::new(sav_config)?)), Config::Spv(spv_config) => Ok(Box::new(SpvDriver::new(spv_config)?)), + Config::Text(text_config) => Ok(Box::new(TextDriver::new(text_config)?)), } } @@ -201,6 +240,8 @@ impl dyn Driver { "csv" => Some("csv"), "json" | "ndjson" => Some("json"), "spv" => Some("spv"), + "sav" => Some("sav"), + "por" => Some("por"), _ => None, } } diff --git a/rust/pspp/src/output/drivers/csv.rs b/rust/pspp/src/output/drivers/csv.rs index b91865dc01..d0963e37ff 100644 --- a/rust/pspp/src/output/drivers/csv.rs +++ b/rust/pspp/src/output/drivers/csv.rs @@ -18,29 +18,37 @@ use std::{ borrow::Cow, fmt::Display, fs::File, - io::{BufWriter, Error, Write}, + io::{BufWriter, Error, Write, stdout}, path::PathBuf, sync::Arc, }; -use serde::{ - Deserialize, Deserializer, Serialize, - de::{Unexpected, Visitor}, -}; +use chrono::{Datelike, NaiveTime, Timelike}; +use serde::{Deserialize, Serialize}; -use crate::output::{Item, drivers::Driver, pivot::Coord2}; +use crate::{ + calendar::calendar_offset_to_gregorian, + data::{ByteString, Case, Datum, WithEncoding}, + dictionary::Dictionary, + format::{DisplayPlain, Type}, + output::{Item, drivers::Driver, pivot::Coord2}, + util::ToSmallString as _, + variable::Variable, +}; use crate::output::{Details, TextType, pivot::PivotTable, table::Table}; +use super::CaseWriter; + #[derive(Clone, Debug, Serialize, Deserialize)] pub struct CsvConfig { - file: PathBuf, + file: Option, #[serde(flatten)] options: CsvOptions, } pub struct CsvDriver { - file: BufWriter, + file: Box, options: CsvOptions, /// Number of items written so far. @@ -52,30 +60,22 @@ pub struct CsvDriver { struct CsvOptions { quote: char, delimiter: char, -} -fn deserialize_ascii_char<'de, D>(deserializer: D) -> Result -where - D: Deserializer<'de>, -{ - struct AsciiCharVisitor; - impl<'de> Visitor<'de> for AsciiCharVisitor { - type Value = u8; - fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "a single ASCII character") - } - fn visit_str(self, s: &str) -> Result - where - E: serde::de::Error, - { - if s.len() == 1 { - Ok(s.chars().next().unwrap() as u8) - } else { - Err(serde::de::Error::invalid_value(Unexpected::Str(s), &self)) - } - } - } - deserializer.deserialize_char(AsciiCharVisitor) + /// Omit writing variable names as the first line of output. + var_names: bool, + + /// Writes user-missing values like system-missing values. Otherwise, + /// user-missing values are written the same way as non-missing values. + recode: bool, + + /// Write value labels instead of values. + labels: bool, + + /// Use print formats for numeric variables. + print_formats: bool, + + /// Decimal point. + decimal: char, } impl Default for CsvOptions { @@ -83,35 +83,194 @@ impl Default for CsvOptions { Self { quote: '"', delimiter: ',', + var_names: true, + recode: false, + labels: false, + print_formats: false, + decimal: '.', } } } impl CsvOptions { - fn char_needs_quoting(&self, b: char) -> bool { - b == '\r' || b == '\n' || b == self.quote || b == self.delimiter + fn field<'a>(&'a self, text: &'a str) -> CsvField<'a> { + CsvField::new(text, self) } - fn string_needs_quoting(&self, s: &str) -> bool { - s.chars().any(|b| self.char_needs_quoting(b)) + fn write_field( + &self, + datum: &Datum>, + variable: &Variable, + file: &mut W, + ) -> std::io::Result<()> + where + W: Write, + { + if self.labels + && let Some(label) = variable.value_labels.get(datum) + { + write!(file, "{}", self.field(label)) + } else if datum.is_sysmis() || (self.recode && variable.missing_values().contains(datum)) { + write!(file, "{}", self.field(" ")) + } else if self.print_formats || datum.is_string() { + write!( + file, + "{}", + self.field( + &datum + .display(variable.print_format) + .with_trimming() + .to_small_string::<64>(), + ) + ) + } else { + let number = datum.as_number().unwrap().unwrap(); + match variable.print_format.type_() { + Type::F + | Type::Comma + | Type::Dot + | Type::Dollar + | Type::Pct + | Type::E + | Type::CC(_) + | Type::N + | Type::Z + | Type::P + | Type::PK + | Type::IB + | Type::PIB + | Type::PIBHex + | Type::RB + | crate::format::Type::RBHex + | Type::WkDay + | Type::Month => write!( + file, + "{}", + self.field( + &number + .display_plain() + .with_decimal(self.decimal) + .to_small_string::<64>() + ) + ), + + Type::Date + | Type::ADate + | Type::EDate + | Type::JDate + | Type::SDate + | Type::QYr + | Type::MoYr + | Type::WkYr => { + if number >= 0.0 + && let Some(date) = + calendar_offset_to_gregorian(number / 60.0 / 60.0 / 24.0) + { + write!( + file, + "{}", + self.field( + &format_args!( + "{:02}/{:02}/{:04}", + date.month(), + date.day(), + date.year() + ) + .to_small_string::<64>() + ) + ) + } else { + write!(file, "{}", self.field(" ")) + } + } + + Type::DateTime | Type::YmdHms => { + if number >= 0.0 + && let Some(date) = + calendar_offset_to_gregorian(number / 60.0 / 60.0 / 24.0) + && let Some(time) = NaiveTime::from_num_seconds_from_midnight_opt( + (number % (60.0 * 60.0 * 24.0)) as u32, + 0, + ) + { + write!( + file, + "{}", + self.field( + &format_args!( + "{:02}/{:02}/{:04} {:02}:{:02}:{:02}", + date.month(), + date.day(), + date.year(), + time.hour(), + time.minute(), + time.second() + ) + .to_small_string::<64>(), + ) + ) + } else { + write!(file, "{}", self.field(" ")) + } + } + + Type::MTime | Type::Time | Type::DTime => { + if let Some(time) = + NaiveTime::from_num_seconds_from_midnight_opt(number.abs() as u32, 0) + { + write!( + file, + "{}", + self.field( + &format_args!( + "{}{:02}:{:02}:{:02}", + if number.is_sign_negative() { "-" } else { "" }, + time.hour(), + time.minute(), + time.second() + ) + .to_small_string::<64>(), + ) + ) + } else { + write!(file, "{}", self.field(" ")) + } + } + + Type::A | Type::AHex => unreachable!(), + } + } } } struct CsvField<'a> { text: &'a str, - options: CsvOptions, + delimiter: char, + quote: char, } impl<'a> CsvField<'a> { - fn new(text: &'a str, options: CsvOptions) -> Self { - Self { text, options } + fn new(text: &'a str, options: &CsvOptions) -> Self { + Self { + text, + delimiter: options.delimiter, + quote: options.quote, + } + } + + fn char_needs_quoting(&self, b: char) -> bool { + b == '\r' || b == '\n' || b == self.quote || b == self.delimiter + } + + fn needs_quoting(&self) -> bool { + self.text.chars().any(|b| self.char_needs_quoting(b)) } } impl Display for CsvField<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.options.string_needs_quoting(self.text) { - let quote = self.options.quote; + if self.needs_quoting() { + let quote = self.quote; write!(f, "{quote}")?; for c in self.text.chars() { if c == quote { @@ -129,7 +288,10 @@ impl Display for CsvField<'_> { impl CsvDriver { pub fn new(config: &CsvConfig) -> std::io::Result { Ok(Self { - file: BufWriter::new(File::create(&config.file)?), + file: match &config.file { + Some(file) => Box::new(BufWriter::new(File::create(file)?)), + None => Box::new(stdout()), + }, options: config.options, n_items: 0, }) @@ -178,7 +340,7 @@ impl CsvDriver { Some(leader) if x == 0 && y == 0 => format!("{leader}: {display}"), _ => display.to_string(), }; - write!(&mut self.file, "{}", CsvField::new(&s, self.options))?; + write!(&mut self.file, "{}", CsvField::new(&s, &self.options))?; } } writeln!(&mut self.file)?; @@ -200,7 +362,7 @@ impl Driver for CsvDriver { Details::Message(diagnostic) => { self.start_item(); let text = diagnostic.to_string(); - writeln!(&mut self.file, "{}", CsvField::new(&text, self.options)).unwrap(); + writeln!(&mut self.file, "{}", CsvField::new(&text, &self.options)).unwrap(); } Details::Table(pivot_table) => { for layer in pivot_table.layers(true) { @@ -216,7 +378,7 @@ impl Driver for CsvDriver { TextType::Title | TextType::Log => { self.start_item(); for line in text.content.display(()).to_string().lines() { - writeln!(&mut self.file, "{}", CsvField::new(line, self.options)).unwrap(); + writeln!(&mut self.file, "{}", CsvField::new(line, &self.options)).unwrap(); } } }, @@ -226,4 +388,43 @@ impl Driver for CsvDriver { fn flush(&mut self) { let _ = self.file.flush(); } + + fn can_write_data_file(&self) -> bool { + true + } + + fn write_data_file<'a>( + &'a mut self, + dictionary: &'a Dictionary, + ) -> anyhow::Result>> { + for (index, variable) in dictionary.variables.iter().enumerate() { + if index > 0 { + write!(&mut self.file, "{}", self.options.delimiter)?; + } + let name = variable.name.as_str(); + write!(&mut self.file, "{}", CsvField::new(name, &self.options))?; + } + writeln!(&mut self.file)?; + Ok(Some(Box::new(CsvDriverCaseWriter { + driver: self, + dictionary, + }))) + } +} + +struct CsvDriverCaseWriter<'a> { + driver: &'a mut CsvDriver, + dictionary: &'a Dictionary, +} + +impl<'a> CaseWriter for CsvDriverCaseWriter<'a> { + fn write_case(&mut self, case: Case>>) -> anyhow::Result<()> { + for (datum, variable) in case.into_iter().zip(self.dictionary.variables.iter()) { + self.driver + .options + .write_field(&datum, variable, &mut self.driver.file)?; + } + writeln!(&mut self.driver.file).unwrap(); + Ok(()) + } } diff --git a/rust/pspp/src/output/drivers/html.rs b/rust/pspp/src/output/drivers/html.rs index c6998a65d5..194bf5ce0b 100644 --- a/rust/pspp/src/output/drivers/html.rs +++ b/rust/pspp/src/output/drivers/html.rs @@ -419,7 +419,7 @@ a:active { impl Driver for HtmlDriver where - W: Write, + W: Write + 'static, { fn name(&self) -> Cow<'static, str> { Cow::from("html") diff --git a/rust/pspp/src/output/drivers/json.rs b/rust/pspp/src/output/drivers/json.rs index 75b1a56003..d121a9a99d 100644 --- a/rust/pspp/src/output/drivers/json.rs +++ b/rust/pspp/src/output/drivers/json.rs @@ -17,7 +17,7 @@ use std::{ borrow::Cow, fs::File, - io::{BufWriter, Write}, + io::{BufWriter, Write, stdout}, path::PathBuf, sync::Arc, }; @@ -28,7 +28,7 @@ use super::{Driver, Item}; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct JsonConfig { - file: PathBuf, + file: Option, /// If false (the default), each output item is exactly one line of JSON, in /// [newline-delimited JSON] format. @@ -49,7 +49,7 @@ pub struct JsonConfig { } pub struct JsonDriver { - file: BufWriter, + file: Box, pretty: bool, tables: bool, } @@ -57,10 +57,16 @@ pub struct JsonDriver { impl JsonDriver { pub fn new(config: &JsonConfig) -> std::io::Result { Ok(Self { - file: BufWriter::new(File::create(&config.file)?), - pretty: config - .pretty - .unwrap_or_else(|| !config.file.ends_with(".ndjson")), + file: match &config.file { + Some(file) => Box::new(BufWriter::new(File::create(file)?)), + None => Box::new(stdout()), + }, + pretty: config.pretty.unwrap_or_else(|| { + !config + .file + .as_ref() + .is_some_and(|file| file.ends_with(".ndjson")) + }), tables: config.tables.unwrap_or_default(), }) } diff --git a/rust/pspp/src/output/drivers/por.rs b/rust/pspp/src/output/drivers/por.rs new file mode 100644 index 0000000000..723d0e2dab --- /dev/null +++ b/rust/pspp/src/output/drivers/por.rs @@ -0,0 +1,78 @@ +// PSPP - a program for statistical analysis. +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program. If not, see . + +use std::{borrow::Cow, fs::File, io::BufWriter, path::PathBuf, sync::Arc}; + +use serde::{Deserialize, Serialize}; + +use crate::{ + data::{ByteString, Case, Datum}, + dictionary::Dictionary, + por::{WriteOptions, Writer}, +}; + +use super::{CaseWriter, Driver, Item}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PorConfig { + file: PathBuf, +} + +pub struct PorDriver { + file: PathBuf, +} + +impl PorDriver { + pub fn new(config: &PorConfig) -> std::io::Result { + Ok(Self { + file: config.file.clone(), + }) + } +} + +impl Driver for PorDriver { + fn name(&self) -> Cow<'static, str> { + Cow::from("por") + } + + fn write(&mut self, _item: &Arc) { + todo!() + } + + fn can_write_data_file(&self) -> bool { + true + } + + fn write_data_file<'a>( + &'a mut self, + dictionary: &'a Dictionary, + ) -> anyhow::Result>> { + Ok(Some(Box::new(PorDriverCaseWriter { + writer: WriteOptions::new().write_file(&dictionary, &self.file)?, + }))) + } +} + +struct PorDriverCaseWriter { + writer: Writer>, +} + +impl CaseWriter for PorDriverCaseWriter { + fn write_case(&mut self, case: Case>>) -> anyhow::Result<()> { + self.writer.write_case(case)?; + Ok(()) + } +} diff --git a/rust/pspp/src/output/drivers/sav.rs b/rust/pspp/src/output/drivers/sav.rs new file mode 100644 index 0000000000..75e1c6162a --- /dev/null +++ b/rust/pspp/src/output/drivers/sav.rs @@ -0,0 +1,83 @@ +// PSPP - a program for statistical analysis. +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program. If not, see . + +use std::{borrow::Cow, fs::File, io::BufWriter, path::PathBuf, sync::Arc}; + +use serde::{Deserialize, Serialize}; + +use crate::{ + data::{ByteString, Case, Datum}, + dictionary::Dictionary, + sys::{WriteOptions, Writer, raw::records::Compression}, +}; + +use super::{CaseWriter, Driver, Item}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SavConfig { + file: PathBuf, + compression: Option, +} + +pub struct SavDriver { + file: PathBuf, + compression: Option, +} + +impl SavDriver { + pub fn new(config: &SavConfig) -> std::io::Result { + Ok(Self { + file: config.file.clone(), + compression: config.compression, + }) + } +} + +impl Driver for SavDriver { + fn name(&self) -> Cow<'static, str> { + Cow::from("sav") + } + + fn write(&mut self, _item: &Arc) { + todo!() + } + + fn can_write_data_file(&self) -> bool { + true + } + + fn write_data_file<'a>( + &'a mut self, + dictionary: &'a Dictionary, + ) -> anyhow::Result>> { + Ok(Some(Box::new(SavDriverCaseWriter { + writer: WriteOptions::new() + .with_compression(self.compression) + .write_file(&dictionary, &self.file)?, + }))) + } +} + +struct SavDriverCaseWriter { + writer: Writer>, +} + +impl CaseWriter for SavDriverCaseWriter { + fn write_case(&mut self, case: Case>>) -> anyhow::Result<()> { + self.writer.write_case(case)?; + Ok(()) + } +} diff --git a/rust/pspp/src/output/drivers/spv.rs b/rust/pspp/src/output/drivers/spv.rs index d081cbf2b0..17dae059d0 100644 --- a/rust/pspp/src/output/drivers/spv.rs +++ b/rust/pspp/src/output/drivers/spv.rs @@ -550,7 +550,7 @@ impl PivotTable { impl Driver for SpvDriver where - W: Write + Seek, + W: Write + Seek + 'static, { fn name(&self) -> Cow<'static, str> { Cow::from("spv") diff --git a/rust/pspp/src/output/drivers/text.rs b/rust/pspp/src/output/drivers/text.rs index 2091e3c04b..42b0eacdeb 100644 --- a/rust/pspp/src/output/drivers/text.rs +++ b/rust/pspp/src/output/drivers/text.rs @@ -18,7 +18,7 @@ use std::{ borrow::Cow, fmt::{Display, Error as FmtError, Result as FmtResult, Write as FmtWrite}, fs::File, - io::{BufWriter, Write as IoWrite}, + io::{BufWriter, Write as IoWrite, stdout}, ops::{Index, Range}, path::PathBuf, sync::{Arc, LazyLock}, @@ -360,17 +360,17 @@ impl Display for Item { } pub struct TextDriver { - file: BufWriter, + file: Box, renderer: TextRenderer, } impl TextDriver { pub fn new(config: &TextConfig) -> std::io::Result { Ok(Self { - file: BufWriter::new(match &config.file { - Some(file) => File::create(file)?, - None => File::options().write(true).open("/dev/stdout")?, - }), + file: match &config.file { + Some(file) => Box::new(BufWriter::new(File::create(file)?)), + None => Box::new(stdout()), + }, renderer: TextRenderer::new(&config.options), }) } diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index 1bc79ced89..3fae0faa2a 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -32,21 +32,20 @@ use crate::{ }; use binrw::{BinRead, BinWrite, Endian, Error as BinError, binrw}; -use clap::ValueEnum; use encoding_rs::Encoding; use itertools::Itertools; -use serde::{Serialize, Serializer, ser::SerializeTuple}; +use serde::{Deserialize, Serialize, Serializer, ser::SerializeTuple}; use thiserror::Error as ThisError; /// Type of compression in a system file. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, ValueEnum)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] pub enum Compression { /// Simple bytecode-based compression. Simple, /// [ZLIB] compression. /// /// [ZLIB]: https://www.zlib.net/ - #[value(name = "zlib", help = "ZLIB space-efficient compression")] ZLib, }