From: Ben Pfaff Date: Thu, 13 Nov 2025 16:59:36 +0000 (-0800) Subject: rust: Move CLI implementation into `cli` module. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=46c91efbf468dbff0bedfc4787f1745be2fd5769;p=pspp rust: Move CLI implementation into `cli` module. --- diff --git a/rust/pspp/src/cli.rs b/rust/pspp/src/cli.rs new file mode 100644 index 0000000000..3a5868608f --- /dev/null +++ b/rust/pspp/src/cli.rs @@ -0,0 +1,80 @@ +// PSPP - a program for statistical analysis. +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use anyhow::Result; +use clap::{Parser, Subcommand}; +use encoding_rs::Encoding; +use thiserror::Error as ThisError; + +use convert::Convert; +use decrypt::Decrypt; +use identify::Identify; +use show::Show; +use show_pc::ShowPc; +use show_por::ShowPor; +use show_spv::ShowSpv; + +mod convert; +mod decrypt; +mod identify; +mod show; +mod show_pc; +mod show_por; +mod show_spv; + +/// PSPP, a program for statistical analysis of sampled data. +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +pub struct Cli { + #[command(subcommand)] + pub command: Command, +} + +#[derive(Subcommand, Clone, Debug)] +pub enum Command { + Convert(Convert), + Decrypt(Decrypt), + Identify(Identify), + Show(Show), + ShowPor(ShowPor), + ShowPc(ShowPc), + ShowSpv(ShowSpv), +} + +impl Command { + pub fn run(self) -> Result<()> { + match self { + Command::Convert(convert) => convert.run(), + Command::Decrypt(decrypt) => decrypt.run(), + Command::Identify(identify) => identify.run(), + Command::Show(show) => show.run(), + Command::ShowPor(show_por) => show_por.run(), + Command::ShowPc(show_pc) => show_pc.run(), + Command::ShowSpv(show_spv) => show_spv.run(), + } + } +} + +#[derive(ThisError, Debug)] +#[error("{0}: unknown encoding")] +struct UnknownEncodingError(String); + +fn parse_encoding(arg: &str) -> Result<&'static Encoding, UnknownEncodingError> { + match Encoding::for_label_no_replacement(arg.as_bytes()) { + Some(encoding) => Ok(encoding), + None => Err(UnknownEncodingError(arg.to_string())), + } +} diff --git a/rust/pspp/src/cli/convert.rs b/rust/pspp/src/cli/convert.rs new file mode 100644 index 0000000000..af206c0639 --- /dev/null +++ b/rust/pspp/src/cli/convert.rs @@ -0,0 +1,402 @@ +/* PSPP - a program for statistical analysis. + * Copyright (C) 2023 Free Software Foundation, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . */ + +use std::{ + fs::File, + io::{Write, stdout}, + path::{Path, PathBuf}, +}; + +use anyhow::{Error as AnyError, Result, anyhow, bail}; +use chrono::{Datelike, NaiveTime, Timelike}; +use clap::{Args, ValueEnum}; +use csv::Writer; +use encoding_rs::Encoding; +use pspp::{ + calendar::calendar_offset_to_gregorian, + data::{ByteString, Case, Datum, WithEncoding}, + file::FileType, + format::{DisplayPlain, Type}, + pc::PcFile, + por::PortableFile, + sys::{ReadOptions, WriteOptions, raw::records::Compression}, + util::ToSmallString, + variable::Variable, +}; + +use super::parse_encoding; + +/// Convert SPSS data files into other formats. +#[derive(Args, Clone, Debug)] +pub struct Convert { + /// Input file name. + input: PathBuf, + + /// Output file name (if omitted, output is written to stdout). + output: Option, + + /// Format for output file (if omitted, the intended format is inferred + /// based on file extension). + #[arg(short = 'O')] + output_format: Option, + + /// The encoding to use for reading the input file. + #[arg(short = 'e', long, value_parser = parse_encoding)] + encoding: Option<&'static Encoding>, + + /// Password for decryption, with or without what SPSS calls "password encryption". + /// + /// Specify only for an encrypted system file. + #[clap(short, long)] + password: Option, + + /// Maximum number of cases to print. + #[arg(short = 'c', long = "cases")] + max_cases: Option, + + #[command(flatten, next_help_heading = "Options for CSV output")] + csv_options: CsvOptions, + + #[command(flatten, next_help_heading = "Options for system file output")] + sys_options: SysOptions, +} + +#[derive(Args, Clone, Debug)] +struct CsvOptions { + /// Omit writing variable names as the first line of output. + #[arg(long)] + no_var_names: bool, + + /// Writes user-missing values like system-missing values. Otherwise, + /// user-missing values are written the same way as non-missing values. + #[arg(long)] + recode: bool, + + /// Write value labels instead of values. + #[arg(long)] + labels: bool, + + /// Use print formats for numeric variables. + #[arg(long)] + print_formats: bool, + + /// Decimal point. + #[arg(long, default_value_t = '.')] + decimal: char, + + /// Delimiter. + /// + /// The default is `,` unless that would be the same as the decimal point, + /// in which case `;` is the default. + #[arg(long)] + delimiter: Option, + + /// Character used to quote the delimiter. + #[arg(long, default_value_t = '"')] + qualifier: char, +} + +impl CsvOptions { + fn write_field( + &self, + datum: &Datum>, + variable: &Variable, + writer: &mut Writer, + ) -> csv::Result<()> + where + W: Write, + { + if self.labels + && let Some(label) = variable.value_labels.get(datum) + { + writer.write_field(label) + } else if datum.is_sysmis() { + writer.write_field(" ") + } else if self.print_formats || datum.is_string() { + writer.write_field( + datum + .display(variable.print_format) + .with_trimming() + .to_small_string::<64>(), + ) + } else { + let number = datum.as_number().unwrap().unwrap(); + match variable.print_format.type_() { + Type::F + | Type::Comma + | Type::Dot + | Type::Dollar + | Type::Pct + | Type::E + | Type::CC(_) + | Type::N + | Type::Z + | Type::P + | Type::PK + | Type::IB + | Type::PIB + | Type::PIBHex + | Type::RB + | Type::RBHex + | Type::WkDay + | Type::Month => writer.write_field( + number + .display_plain() + .with_decimal(self.decimal) + .to_small_string::<64>(), + ), + + Type::Date + | Type::ADate + | Type::EDate + | Type::JDate + | Type::SDate + | Type::QYr + | Type::MoYr + | Type::WkYr => { + if number >= 0.0 + && let Some(date) = + calendar_offset_to_gregorian(number / 60.0 / 60.0 / 24.0) + { + writer.write_field( + format_args!( + "{:02}/{:02}/{:04}", + date.month(), + date.day(), + date.year() + ) + .to_small_string::<64>(), + ) + } else { + writer.write_field(" ") + } + } + + Type::DateTime | Type::YmdHms => { + if number >= 0.0 + && let Some(date) = + calendar_offset_to_gregorian(number / 60.0 / 60.0 / 24.0) + && let Some(time) = NaiveTime::from_num_seconds_from_midnight_opt( + (number % (60.0 * 60.0 * 24.0)) as u32, + 0, + ) + { + writer.write_field( + format_args!( + "{:02}/{:02}/{:04} {:02}:{:02}:{:02}", + date.month(), + date.day(), + date.year(), + time.hour(), + time.minute(), + time.second() + ) + .to_small_string::<64>(), + ) + } else { + writer.write_field(" ") + } + } + + Type::MTime | Type::Time | Type::DTime => { + if let Some(time) = + NaiveTime::from_num_seconds_from_midnight_opt(number.abs() as u32, 0) + { + writer.write_field( + format_args!( + "{}{:02}:{:02}:{:02}", + if number.is_sign_negative() { "-" } else { "" }, + time.hour(), + time.minute(), + time.second() + ) + .to_small_string::<64>(), + ) + } else { + writer.write_field(" ") + } + } + + Type::A | Type::AHex => unreachable!(), + } + } + } +} + +#[derive(Args, Clone, Debug)] +struct SysOptions { + /// Write the output file with Unicode (UTF-8) encoding. + /// + /// If the input was not already encoded in Unicode, this triples the width + /// of string variables. + #[arg(long = "unicode")] + to_unicode: bool, + + /// How to compress data in the system file. + #[arg(long, default_value = "simple")] + compression: Option, +} + +/// Output file format. +#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)] +enum OutputFormat { + /// Comma-separated values using each variable's print format (variable + /// names are written as the first line) + Csv, + + /// System file + Sav, + + /// Portable file + Por, +} + +impl TryFrom<&Path> for OutputFormat { + type Error = AnyError; + + fn try_from(value: &Path) -> std::result::Result { + let extension = value.extension().unwrap_or_default(); + if extension.eq_ignore_ascii_case("csv") || extension.eq_ignore_ascii_case("txt") { + Ok(OutputFormat::Csv) + } else if extension.eq_ignore_ascii_case("sav") || extension.eq_ignore_ascii_case("sys") { + Ok(OutputFormat::Sav) + } else if extension.eq_ignore_ascii_case("por") { + Ok(OutputFormat::Por) + } else { + Err(anyhow!( + "Unknown output file extension '{}'", + extension.display() + )) + } + } +} + +impl Convert { + pub fn run(self) -> Result<()> { + let output_format = match self.output_format { + Some(format) => format, + None => match &self.output { + Some(output) => output.as_path().try_into()?, + _ => OutputFormat::Csv, + }, + }; + + let (dictionary, cases) = match FileType::from_file(&self.input)? { + Some(FileType::System { .. }) => { + fn warn(warning: anyhow::Error) { + eprintln!("warning: {warning}"); + } + + let mut system_file = ReadOptions::new(warn) + .with_encoding(self.encoding) + .with_password(self.password.clone()) + .open_file(&self.input)?; + if output_format == OutputFormat::Sav && self.sys_options.to_unicode { + system_file = system_file.into_unicode(); + } + let (dictionary, _, cases) = system_file.into_parts(); + let cases = cases.map(|result| result.map_err(AnyError::from)); + let cases = Box::new(cases) + as Box>>, AnyError>>>; + (dictionary, cases) + } + Some(FileType::Portable) => { + fn warn_portable(warning: pspp::por::Warning) { + eprintln!("warning: {warning}"); + } + + let portable_file = PortableFile::open_file(&self.input, warn_portable)?; + let (dictionary, _, cases) = portable_file.into_parts(); + let cases = cases.map(|result| result.map_err(AnyError::from)); + let cases = Box::new(cases) + as Box>>, AnyError>>>; + (dictionary, cases) + } + Some(FileType::Pc) => { + fn warn_pc(warning: pspp::pc::Warning) { + eprintln!("warning: {warning}"); + } + + let pc_file = PcFile::open_file(&self.input, warn_pc)?; + let (dictionary, _, cases) = pc_file.into_parts(); + let cases = cases.map(|result| result.map_err(AnyError::from)); + let cases = Box::new(cases) + as Box>>, AnyError>>>; + (dictionary, cases) + } + _ => bail!( + "{}: not a system, portable, or SPSS/PC+ file", + self.input.display() + ), + }; + + // Take only the first `self.max_cases` cases. + let cases = cases.take(self.max_cases.unwrap_or(usize::MAX)); + + match output_format { + OutputFormat::Csv => { + let writer = match self.output { + Some(path) => Box::new(File::create(path)?) as Box, + None => Box::new(stdout()), + }; + let decimal: u8 = self.csv_options.decimal.try_into()?; + let delimiter: u8 = match self.csv_options.delimiter { + Some(delimiter) => delimiter.try_into()?, + None if decimal != b',' => b',', + None => b';', + }; + let qualifier: u8 = self.csv_options.qualifier.try_into()?; + let mut output = csv::WriterBuilder::new() + .delimiter(delimiter) + .quote(qualifier) + .from_writer(writer); + if !self.csv_options.no_var_names { + output + .write_record(dictionary.variables.iter().map(|var| var.name.as_str()))?; + } + + for case in cases { + for (datum, variable) in case?.into_iter().zip(dictionary.variables.iter()) { + self.csv_options + .write_field(&datum, variable, &mut output)?; + } + output.write_record(None::<&[u8]>)?; + } + } + OutputFormat::Sav => { + let Some(output) = &self.output else { + bail!("output file name must be specified for output to a system file") + }; + let mut output = WriteOptions::new() + .with_compression(self.sys_options.compression) + .write_file(&dictionary, output)?; + for case in cases { + output.write_case(case?)?; + } + } + OutputFormat::Por => { + let Some(output) = &self.output else { + bail!("output file name must be specified for output to a portable file") + }; + let mut output = pspp::por::WriteOptions::new().write_file(&dictionary, output)?; + for case in cases { + output.write_case(case?)?; + } + } + } + Ok(()) + } +} diff --git a/rust/pspp/src/cli/decrypt.rs b/rust/pspp/src/cli/decrypt.rs new file mode 100644 index 0000000000..50e0629cae --- /dev/null +++ b/rust/pspp/src/cli/decrypt.rs @@ -0,0 +1,57 @@ +/* PSPP - a program for statistical analysis. + * Copyright (C) 2023 Free Software Foundation, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . */ + +use anyhow::{Result, anyhow}; +use clap::Args; +use pspp::crypto::EncryptedFile; +use std::{fs::File, path::PathBuf}; +use zeroize::Zeroizing; + +/// Decrypts an encrypted SPSS data, output, or syntax file. +#[derive(Args, Clone, Debug)] +pub struct Decrypt { + /// Input file name. + input: PathBuf, + + /// Output file name. + output: PathBuf, + + /// Password for decryption, with or without what SPSS calls "password encryption". + /// + /// If omitted, PSPP will prompt interactively for the password. + #[clap(short, long)] + password: Option, +} + +impl Decrypt { + pub fn run(self) -> Result<()> { + let input = EncryptedFile::new(File::open(&self.input)?)?; + let password = match self.password { + Some(password) => Zeroizing::new(password), + None => { + eprintln!("Please enter the password for {}:", self.input.display()); + readpass::from_tty().unwrap() + } + }; + let mut reader = match input.unlock(password.as_bytes()) { + Ok(reader) => reader, + Err(_) => return Err(anyhow!("Incorrect password.")), + }; + let mut writer = File::create(self.output)?; + std::io::copy(&mut reader, &mut writer)?; + Ok(()) + } +} diff --git a/rust/pspp/src/cli/identify.rs b/rust/pspp/src/cli/identify.rs new file mode 100644 index 0000000000..917e86370b --- /dev/null +++ b/rust/pspp/src/cli/identify.rs @@ -0,0 +1,46 @@ +// PSPP - a program for statistical analysis. +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program. If not, see . + +use anyhow::Result; +use clap::Args; +use pspp::file::FileType; +use std::path::PathBuf; + +/// Identify the type of a file. +#[derive(Args, Clone, Debug)] +pub struct Identify { + /// File to identify. + file: PathBuf, +} + +impl Identify { + pub fn run(self) -> Result<()> { + match FileType::from_file(&self.file)? { + None => println!("unknown"), + Some(file_type) => { + print!("{}", file_type.as_extension()); + if file_type.is_encrypted() { + print!(" (encrypted)"); + } + if !file_type.is_confident() { + print!(" (low confidence)"); + } + println!(); + } + } + Ok(()) + } +} diff --git a/rust/pspp/src/cli/show.rs b/rust/pspp/src/cli/show.rs new file mode 100644 index 0000000000..3d1bfe737f --- /dev/null +++ b/rust/pspp/src/cli/show.rs @@ -0,0 +1,363 @@ +// PSPP - a program for statistical analysis. +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program. If not, see . + +use super::parse_encoding; +use anyhow::{Result, anyhow}; +use clap::{Args, ValueEnum}; +use encoding_rs::Encoding; +use itertools::Itertools; +use pspp::{ + data::cases_to_output, + output::{ + Details, Item, Text, + drivers::{Config, Driver}, + pivot::PivotTable, + }, + sys::{ + Records, + raw::{Decoder, EncodingReport, Magic, Reader, Record, infer_encoding}, + }, +}; +use serde::Serialize; +use std::{ + cell::RefCell, + ffi::OsStr, + fmt::{Display, Write as _}, + fs::File, + io::{BufReader, Write, stdout}, + path::{Path, PathBuf}, + rc::Rc, + sync::Arc, +}; + +/// Show information about SPSS system files. +#[derive(Args, Clone, Debug)] +pub struct Show { + /// What to show. + #[arg(value_enum)] + mode: Mode, + + /// File to show. + #[arg(required = true)] + input: PathBuf, + + /// Output file name. If omitted, output is written to stdout. + output: Option, + + /// The encoding to use. + #[arg(long, value_parser = parse_encoding, help_heading = "Input file options")] + encoding: Option<&'static Encoding>, + + /// Maximum number of cases to read. + /// + /// If specified without an argument, all cases will be read. + #[arg( + long = "data", + num_args = 0..=1, + default_missing_value = "18446744073709551615", + default_value_t = 0, + help_heading = "Input file options" + )] + max_cases: u64, + + /// Output driver configuration options. + #[arg(short = 'o', help_heading = "Output options")] + output_options: Vec, + + /// Output format. + #[arg(long, short = 'f', help_heading = "Output options")] + format: Option, +} + +enum Output { + Driver { + driver: Rc>>, + mode: Mode, + }, + Json { + writer: Rc>>, + pretty: bool, + }, + Discard, +} + +impl Output { + fn show(&self, value: &T) -> Result<()> + where + T: Serialize, + for<'a> &'a T: Into
, + { + match self { + Self::Driver { driver, .. } => { + driver + .borrow_mut() + .write(&Arc::new(value.into().into_item())); + Ok(()) + } + Self::Json { .. } => self.show_json(value), + Self::Discard => Ok(()), + } + } + + fn show_json(&self, value: &T) -> Result<()> + where + T: Serialize, + { + match self { + Self::Driver { mode, driver: _ } => { + Err(anyhow!("Mode '{mode}' only supports output as JSON.")) + } + Self::Json { writer, pretty } => { + let mut writer = writer.borrow_mut(); + match pretty { + true => serde_json::to_writer_pretty(&mut *writer, value)?, + false => serde_json::to_writer(&mut *writer, value)?, + }; + writeln!(writer)?; + Ok(()) + } + Self::Discard => Ok(()), + } + } + + fn warn(&self, warning: &impl Display) { + match self { + Output::Driver { driver, .. } => { + driver + .borrow_mut() + .write(&Arc::new(Item::from(Text::new_log(warning.to_string())))); + } + Output::Json { .. } => { + #[derive(Serialize)] + struct Warning { + warning: String, + } + let warning = Warning { + warning: warning.to_string(), + }; + let _ = self.show_json(&warning); + } + Self::Discard => (), + } + } +} + +impl Show { + pub fn run(self) -> Result<()> { + let format = if let Some(format) = self.format { + format + } else if let Some(output_file) = &self.output { + match output_file + .extension() + .unwrap_or(OsStr::new("")) + .to_str() + .unwrap_or("") + { + "json" => ShowFormat::Json, + "ndjson" => ShowFormat::Ndjson, + _ => ShowFormat::Output, + } + } else { + ShowFormat::Json + }; + + let output = match format { + ShowFormat::Output => { + let mut config = String::new(); + + if let Some(file) = &self.output { + #[derive(Serialize)] + struct File<'a> { + file: &'a Path, + } + let file = File { + file: file.as_path(), + }; + let toml_file = toml::to_string_pretty(&file).unwrap(); + config.push_str(&toml_file); + } + for option in &self.output_options { + writeln!(&mut config, "{option}").unwrap(); + } + + let table: toml::Table = toml::from_str(&config)?; + if !table.contains_key("driver") { + let driver = if let Some(file) = &self.output { + ::driver_type_from_filename(file).ok_or_else(|| { + anyhow!("{}: no default output format for file name", file.display()) + })? + } else { + "text" + }; + + #[derive(Serialize)] + struct DriverConfig { + driver: &'static str, + } + config.insert_str( + 0, + &toml::to_string_pretty(&DriverConfig { driver }).unwrap(), + ); + } + + let config: Config = toml::from_str(&config)?; + Output::Driver { + mode: self.mode, + driver: Rc::new(RefCell::new(Box::new(::new(&config)?))), + } + } + ShowFormat::Json | ShowFormat::Ndjson => Output::Json { + pretty: format == ShowFormat::Json, + writer: if let Some(output_file) = &self.output { + Rc::new(RefCell::new(Box::new(File::create(output_file)?))) + } else { + Rc::new(RefCell::new(Box::new(stdout()))) + }, + }, + ShowFormat::Discard => Output::Discard, + }; + + let reader = File::open(&self.input)?; + let reader = BufReader::new(reader); + let mut reader = Reader::new(reader, Box::new(|warning| output.warn(&warning)))?; + + match self.mode { + Mode::Identity => { + match reader.header().magic { + Magic::Sav => println!("SPSS System File"), + Magic::Zsav => println!("SPSS System File with Zlib compression"), + Magic::Ebcdic => println!("EBCDIC-encoded SPSS System File"), + } + return Ok(()); + } + Mode::Raw => { + output.show_json(reader.header())?; + for record in reader.records() { + output.show_json(&record?)?; + } + for (_index, case) in (0..self.max_cases).zip(reader.cases()) { + output.show_json(&case?)?; + } + } + Mode::Decoded => { + let records: Vec = reader.records().collect::, _>>()?; + let encoding = match self.encoding { + Some(encoding) => encoding, + None => infer_encoding(&records, &mut |e| output.warn(&e))?, + }; + let mut decoder = Decoder::new(encoding, |e| output.warn(&e)); + for record in records { + output.show_json(&record.decode(&mut decoder))?; + } + } + Mode::Dictionary => { + let records: Vec = reader.records().collect::, _>>()?; + let encoding = match self.encoding { + Some(encoding) => encoding, + None => infer_encoding(&records, &mut |e| output.warn(&e))?, + }; + let mut decoder = Decoder::new(encoding, |e| output.warn(&e)); + let records = Records::from_raw(records, &mut decoder); + let (dictionary, metadata, cases) = records + .decode( + reader.header().clone().decode(&mut decoder), + reader.cases(), + encoding, + |e| output.warn(&e), + ) + .into_parts(); + match &output { + Output::Driver { driver, mode: _ } => { + let mut output = Vec::new(); + output.push(PivotTable::from(&metadata).into()); + output.extend(dictionary.all_pivot_tables().into_iter().map_into()); + output.extend(cases_to_output(&dictionary, cases)); + driver + .borrow_mut() + .write(&Arc::new(output.into_iter().collect())); + } + Output::Json { .. } => { + output.show_json(&dictionary)?; + output.show_json(&metadata)?; + for (_index, case) in (0..self.max_cases).zip(cases) { + output.show_json(&case?)?; + } + } + Output::Discard => (), + } + } + Mode::Encodings => { + let encoding_report = EncodingReport::new(reader, self.max_cases)?; + output.show(&encoding_report)?; + } + } + + Ok(()) + } +} + +/// What to show in a system file. +#[derive(Clone, Copy, Debug, Default, PartialEq, ValueEnum)] +enum Mode { + /// The kind of file. + Identity, + + /// File dictionary, with variables, value labels, attributes, ... + #[default] + #[value(alias = "dict")] + Dictionary, + + /// Possible encodings of text in file dictionary and (with `--data`) cases. + Encodings, + + /// Raw file records, without assuming a particular character encoding. + Raw, + + /// Raw file records decoded with a particular character encoding. + Decoded, +} + +impl Mode { + fn as_str(&self) -> &'static str { + match self { + Mode::Dictionary => "dictionary", + Mode::Identity => "identity", + Mode::Raw => "raw", + Mode::Decoded => "decoded", + Mode::Encodings => "encodings", + } + } +} + +impl Display for Mode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Serialize, ValueEnum)] +#[serde(rename_all = "snake_case")] +enum ShowFormat { + /// Pretty-printed JSON. + #[default] + Json, + /// Newline-delimited JSON. + Ndjson, + /// Pivot tables. + Output, + /// No output. + Discard, +} diff --git a/rust/pspp/src/cli/show_pc.rs b/rust/pspp/src/cli/show_pc.rs new file mode 100644 index 0000000000..19dded7002 --- /dev/null +++ b/rust/pspp/src/cli/show_pc.rs @@ -0,0 +1,294 @@ +// PSPP - a program for statistical analysis. +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program. If not, see . + +use anyhow::{Result, anyhow}; +use clap::{Args, ValueEnum}; +use itertools::Itertools; +use pspp::{ + data::cases_to_output, + output::{ + Item, Text, + drivers::{Config, Driver}, + pivot::PivotTable, + }, + pc::PcFile, +}; +use serde::Serialize; +use std::{ + cell::RefCell, + ffi::OsStr, + fmt::{Display, Write as _}, + fs::File, + io::{BufReader, Write, stdout}, + path::{Path, PathBuf}, + rc::Rc, + sync::Arc, +}; + +/// Show information about SPSS/PC+ data files. +#[derive(Args, Clone, Debug)] +pub struct ShowPc { + /// What to show. + #[arg(value_enum)] + mode: Mode, + + /// File to show. + #[arg(required = true)] + input: PathBuf, + + /// Output file name. If omitted, output is written to stdout. + output: Option, + + /// Maximum number of cases to read. + /// + /// If specified without an argument, all cases will be read. + #[arg( + long = "data", + num_args = 0..=1, + default_missing_value = "18446744073709551615", + default_value_t = 0, + help_heading = "Input file options" + )] + max_cases: usize, + + /// Output driver configuration options. + #[arg(short = 'o', help_heading = "Output options")] + output_options: Vec, + + /// Output format. + #[arg(long, short = 'f', help_heading = "Output options")] + format: Option, +} + +enum Output { + Driver { + driver: Rc>>, + mode: Mode, + }, + Json { + writer: Rc>>, + pretty: bool, + }, + Discard, +} + +impl Output { + fn show_json(&self, value: &T) -> Result<()> + where + T: Serialize, + { + match self { + Self::Driver { mode, driver: _ } => { + Err(anyhow!("Mode '{mode}' only supports output as JSON.")) + } + Self::Json { writer, pretty } => { + let mut writer = writer.borrow_mut(); + match pretty { + true => serde_json::to_writer_pretty(&mut *writer, value)?, + false => serde_json::to_writer(&mut *writer, value)?, + }; + writeln!(writer)?; + Ok(()) + } + Self::Discard => Ok(()), + } + } + + fn warn(&self, warning: &impl Display) { + match self { + Output::Driver { driver, .. } => { + driver + .borrow_mut() + .write(&Arc::new(Item::from(Text::new_log(warning.to_string())))); + } + Output::Json { .. } => { + #[derive(Serialize)] + struct Warning { + warning: String, + } + let warning = Warning { + warning: warning.to_string(), + }; + let _ = self.show_json(&warning); + } + Self::Discard => (), + } + } +} + +impl ShowPc { + pub fn run(self) -> Result<()> { + let format = if let Some(format) = self.format { + format + } else if let Some(output_file) = &self.output { + match output_file + .extension() + .unwrap_or(OsStr::new("")) + .to_str() + .unwrap_or("") + { + "json" => ShowFormat::Json, + "ndjson" => ShowFormat::Ndjson, + _ => ShowFormat::Output, + } + } else { + ShowFormat::Json + }; + + let output = match format { + ShowFormat::Output => { + let mut config = String::new(); + + if let Some(file) = &self.output { + #[derive(Serialize)] + struct File<'a> { + file: &'a Path, + } + let file = File { + file: file.as_path(), + }; + let toml_file = toml::to_string_pretty(&file).unwrap(); + config.push_str(&toml_file); + } + for option in &self.output_options { + writeln!(&mut config, "{option}").unwrap(); + } + + let table: toml::Table = toml::from_str(&config)?; + if !table.contains_key("driver") { + let driver = if let Some(file) = &self.output { + ::driver_type_from_filename(file).ok_or_else(|| { + anyhow!("{}: no default output format for file name", file.display()) + })? + } else { + "text" + }; + + #[derive(Serialize)] + struct DriverConfig { + driver: &'static str, + } + config.insert_str( + 0, + &toml::to_string_pretty(&DriverConfig { driver }).unwrap(), + ); + } + + let config: Config = toml::from_str(&config)?; + Output::Driver { + mode: self.mode, + driver: Rc::new(RefCell::new(Box::new(::new(&config)?))), + } + } + ShowFormat::Json | ShowFormat::Ndjson => Output::Json { + pretty: format == ShowFormat::Json, + writer: if let Some(output_file) = &self.output { + Rc::new(RefCell::new(Box::new(File::create(output_file)?))) + } else { + Rc::new(RefCell::new(Box::new(stdout()))) + }, + }, + ShowFormat::Discard => Output::Discard, + }; + + let reader = BufReader::new(File::open(&self.input)?); + match self.mode { + Mode::Dictionary => { + let PcFile { + dictionary, + metadata: _, + cases, + } = PcFile::open(reader, |warning| output.warn(&warning))?; + let cases = cases.take(self.max_cases); + + match &output { + Output::Driver { driver, mode: _ } => { + let mut output = Vec::new(); + output.extend(dictionary.all_pivot_tables().into_iter().map_into()); + output.extend(cases_to_output(&dictionary, cases)); + driver + .borrow_mut() + .write(&Arc::new(output.into_iter().collect())); + } + Output::Json { .. } => { + output.show_json(&dictionary)?; + for (_index, case) in (0..self.max_cases).zip(cases) { + output.show_json(&case?)?; + } + } + Output::Discard => (), + } + } + Mode::Metadata => { + let metadata = PcFile::open(reader, |warning| output.warn(&warning))?.metadata; + + match &output { + Output::Driver { driver, mode: _ } => { + driver + .borrow_mut() + .write(&Arc::new(PivotTable::from(&metadata).into())); + } + Output::Json { .. } => { + output.show_json(&metadata)?; + } + Output::Discard => (), + } + } + } + Ok(()) + } +} + +/// What to show in a system file. +#[derive(Clone, Copy, Debug, Default, PartialEq, ValueEnum)] +enum Mode { + /// File dictionary, with variables, value labels, ... + #[default] + #[value(alias = "dict")] + Dictionary, + + /// File metadata not included in the dictionary. + Metadata, +} + +impl Mode { + fn as_str(&self) -> &'static str { + match self { + Mode::Dictionary => "dictionary", + Mode::Metadata => "metadata", + } + } +} + +impl Display for Mode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Serialize, ValueEnum)] +#[serde(rename_all = "snake_case")] +enum ShowFormat { + /// Pretty-printed JSON. + #[default] + Json, + /// Newline-delimited JSON. + Ndjson, + /// Pivot tables. + Output, + /// No output. + Discard, +} diff --git a/rust/pspp/src/cli/show_por.rs b/rust/pspp/src/cli/show_por.rs new file mode 100644 index 0000000000..bac7c42884 --- /dev/null +++ b/rust/pspp/src/cli/show_por.rs @@ -0,0 +1,321 @@ +// PSPP - a program for statistical analysis. +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program. If not, see . + +use anyhow::{Result, anyhow}; +use clap::{Args, ValueEnum}; +use itertools::Itertools; +use pspp::{ + data::cases_to_output, + output::{ + Item, Text, + drivers::{Config, Driver}, + pivot::PivotTable, + }, + por::PortableFile, +}; +use serde::Serialize; +use std::{ + cell::RefCell, + ffi::OsStr, + fmt::{Display, Write as _}, + fs::File, + io::{BufReader, Write, stdout}, + path::{Path, PathBuf}, + rc::Rc, + sync::Arc, +}; + +/// Show information about SPSS portable files. +#[derive(Args, Clone, Debug)] +pub struct ShowPor { + /// What to show. + #[arg(value_enum)] + mode: Mode, + + /// File to show. + #[arg(required = true)] + input: PathBuf, + + /// Output file name. If omitted, output is written to stdout. + output: Option, + + /// Maximum number of cases to read. + /// + /// If specified without an argument, all cases will be read. + #[arg( + long = "data", + num_args = 0..=1, + default_missing_value = "18446744073709551615", + default_value_t = 0, + help_heading = "Input file options" + )] + max_cases: usize, + + /// Output driver configuration options. + #[arg(short = 'o', help_heading = "Output options")] + output_options: Vec, + + /// Output format. + #[arg(long, short = 'f', help_heading = "Output options")] + format: Option, +} + +enum Output { + Driver { + driver: Rc>>, + mode: Mode, + }, + Json { + writer: Rc>>, + pretty: bool, + }, + Discard, +} + +impl Output { + fn show_json(&self, value: &T) -> Result<()> + where + T: Serialize, + { + match self { + Self::Driver { mode, driver: _ } => { + Err(anyhow!("Mode '{mode}' only supports output as JSON.")) + } + Self::Json { writer, pretty } => { + let mut writer = writer.borrow_mut(); + match pretty { + true => serde_json::to_writer_pretty(&mut *writer, value)?, + false => serde_json::to_writer(&mut *writer, value)?, + }; + writeln!(writer)?; + Ok(()) + } + Self::Discard => Ok(()), + } + } + + fn warn(&self, warning: &impl Display) { + match self { + Output::Driver { driver, .. } => { + driver + .borrow_mut() + .write(&Arc::new(Item::from(Text::new_log(warning.to_string())))); + } + Output::Json { .. } => { + #[derive(Serialize)] + struct Warning { + warning: String, + } + let warning = Warning { + warning: warning.to_string(), + }; + let _ = self.show_json(&warning); + } + Self::Discard => (), + } + } +} + +impl ShowPor { + pub fn run(self) -> Result<()> { + let format = if let Some(format) = self.format { + format + } else if let Some(output_file) = &self.output { + match output_file + .extension() + .unwrap_or(OsStr::new("")) + .to_str() + .unwrap_or("") + { + "json" => ShowFormat::Json, + "ndjson" => ShowFormat::Ndjson, + _ => ShowFormat::Output, + } + } else { + ShowFormat::Json + }; + + let output = match format { + ShowFormat::Output => { + let mut config = String::new(); + + if let Some(file) = &self.output { + #[derive(Serialize)] + struct File<'a> { + file: &'a Path, + } + let file = File { + file: file.as_path(), + }; + let toml_file = toml::to_string_pretty(&file).unwrap(); + config.push_str(&toml_file); + } + for option in &self.output_options { + writeln!(&mut config, "{option}").unwrap(); + } + + let table: toml::Table = toml::from_str(&config)?; + if !table.contains_key("driver") { + let driver = if let Some(file) = &self.output { + ::driver_type_from_filename(file).ok_or_else(|| { + anyhow!("{}: no default output format for file name", file.display()) + })? + } else { + "text" + }; + + #[derive(Serialize)] + struct DriverConfig { + driver: &'static str, + } + config.insert_str( + 0, + &toml::to_string_pretty(&DriverConfig { driver }).unwrap(), + ); + } + + let config: Config = toml::from_str(&config)?; + Output::Driver { + mode: self.mode, + driver: Rc::new(RefCell::new(Box::new(::new(&config)?))), + } + } + ShowFormat::Json | ShowFormat::Ndjson => Output::Json { + pretty: format == ShowFormat::Json, + writer: if let Some(output_file) = &self.output { + Rc::new(RefCell::new(Box::new(File::create(output_file)?))) + } else { + Rc::new(RefCell::new(Box::new(stdout()))) + }, + }, + ShowFormat::Discard => Output::Discard, + }; + + let reader = BufReader::new(File::open(&self.input)?); + match self.mode { + Mode::Dictionary => { + let PortableFile { + dictionary, + metadata: _, + cases, + } = PortableFile::open(reader, |warning| output.warn(&warning))?; + let cases = cases.take(self.max_cases); + + match &output { + Output::Driver { driver, mode: _ } => { + let mut output = Vec::new(); + output.extend(dictionary.all_pivot_tables().into_iter().map_into()); + output.extend(cases_to_output(&dictionary, cases)); + driver + .borrow_mut() + .write(&Arc::new(output.into_iter().collect())); + } + Output::Json { .. } => { + output.show_json(&dictionary)?; + for (_index, case) in (0..self.max_cases).zip(cases) { + output.show_json(&case?)?; + } + } + Output::Discard => (), + } + } + Mode::Metadata => { + let metadata = + PortableFile::open(reader, |warning| output.warn(&warning))?.metadata; + + match &output { + Output::Driver { driver, mode: _ } => { + driver + .borrow_mut() + .write(&Arc::new(PivotTable::from(&metadata).into())); + } + Output::Json { .. } => { + output.show_json(&metadata)?; + } + Output::Discard => (), + } + } + Mode::Histogram => { + let (histogram, translations) = PortableFile::read_histogram(reader)?; + let h = histogram + .into_iter() + .enumerate() + .filter_map(|(index, count)| { + if count > 0 + && index != translations[index as u8] as usize + && translations[index as u8] != 0 + { + Some(( + format!("{index:02x}"), + translations[index as u8] as char, + count, + )) + } else { + None + } + }) + .collect::>(); + output.show_json(&h)?; + } + } + Ok(()) + } +} + +/// What to show in a system file. +#[derive(Clone, Copy, Debug, Default, PartialEq, ValueEnum)] +enum Mode { + /// File dictionary, with variables, value labels, ... + #[default] + #[value(alias = "dict")] + Dictionary, + + /// File metadata not included in the dictionary. + Metadata, + + /// Histogram of character incidence in the file. + Histogram, +} + +impl Mode { + fn as_str(&self) -> &'static str { + match self { + Mode::Dictionary => "dictionary", + Mode::Metadata => "metadata", + Mode::Histogram => "histogram", + } + } +} + +impl Display for Mode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Serialize, ValueEnum)] +#[serde(rename_all = "snake_case")] +enum ShowFormat { + /// Pretty-printed JSON. + #[default] + Json, + /// Newline-delimited JSON. + Ndjson, + /// Pivot tables. + Output, + /// No output. + Discard, +} diff --git a/rust/pspp/src/cli/show_spv.rs b/rust/pspp/src/cli/show_spv.rs new file mode 100644 index 0000000000..fa399f9067 --- /dev/null +++ b/rust/pspp/src/cli/show_spv.rs @@ -0,0 +1,139 @@ +// PSPP - a program for statistical analysis. +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program. If not, see . + +use anyhow::Result; +use clap::{Args, ValueEnum}; +use pspp::output::{Criteria, Item}; +use std::{fmt::Display, path::PathBuf}; + +/// Show information about SPSS viewer files (SPV files). +#[derive(Args, Clone, Debug)] +pub struct ShowSpv { + /// What to show. + #[arg(value_enum)] + mode: Mode, + + /// File to show. + /// + /// For most modes, this should be a `.spv` file. For `convert-table-look`, + /// this should be a `.tlo` or `.stt` file. + #[arg(required = true)] + input: PathBuf, + + /// Input selection options. + #[command(flatten)] + criteria: Criteria, + + /// Include ZIP member names in `dir` output. + #[arg(long = "member-names")] + show_member_names: bool, +} + +/// What to show in a viewer file. +#[derive(Clone, Copy, Debug, PartialEq, ValueEnum)] +enum Mode { + /// List tables and other items. + #[value(alias = "dir")] + Directory, + + /// Copies first selected TableLook into output in `.stt` format. + GetTableLook, + + /// Reads `.tlo` or `.stt` TableLook and outputs as `.stt` format. + ConvertTableLook, + + /// Prints contents. + View, +} + +impl Mode { + fn as_str(&self) -> &'static str { + match self { + Mode::Directory => "directory", + Mode::GetTableLook => "get-table-look", + Mode::ConvertTableLook => "convert-table-look", + Mode::View => "view", + } + } +} + +impl Display for Mode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl ShowSpv { + pub fn run(self) -> Result<()> { + match self.mode { + Mode::Directory => { + let item = Item::from_spv_file(&self.input)?.0; + let item = self.criteria.apply(item); + for child in item.details.children() { + print_item_directory(&child, 0, self.show_member_names); + } + Ok(()) + } + Mode::View => { + let item = Item::from_spv_file(&self.input)?.0; + let item = self.criteria.apply(item); + for child in item.details.children() { + println!("{child}"); + } + Ok(()) + } + Mode::GetTableLook => todo!(), + Mode::ConvertTableLook => todo!(), + } + } +} + +fn print_item_directory(item: &Item, level: usize, show_member_names: bool) { + for _ in 0..level { + print!(" "); + } + print!("- {} {:?}", item.details.kind(), item.label()); + if let Some(table) = item.details.as_table() { + let title = table.title().display(table).to_string(); + if item.label.as_ref().is_none_or(|label| label != &title) { + print!(" title {title:?}"); + } + } + if let Some(command_name) = &item.command_name { + print!(" command {command_name:?}"); + } + if let Some(subtype) = item.subtype() + && item.label.as_ref().is_none_or(|label| label != &subtype) + { + print!(" subtype {subtype:?}"); + } + if !item.show { + if item.details.is_heading() { + print!(" (collapsed)"); + } else { + print!(" (hidden)"); + } + } + if show_member_names && let Some(spv_info) = &item.spv_info { + for (index, name) in spv_info.member_names().into_iter().enumerate() { + print!(" {} {name:?}", if index == 0 { "in" } else { "and" }); + } + } + println!(); + for child in item.details.children() { + print_item_directory(&child, level + 1, show_member_names); + } +} diff --git a/rust/pspp/src/convert.rs b/rust/pspp/src/convert.rs deleted file mode 100644 index 249df65b7a..0000000000 --- a/rust/pspp/src/convert.rs +++ /dev/null @@ -1,402 +0,0 @@ -/* PSPP - a program for statistical analysis. - * Copyright (C) 2023 Free Software Foundation, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . */ - -use std::{ - fs::File, - io::{Write, stdout}, - path::{Path, PathBuf}, -}; - -use anyhow::{Error as AnyError, Result, anyhow, bail}; -use chrono::{Datelike, NaiveTime, Timelike}; -use clap::{Args, ValueEnum}; -use csv::Writer; -use encoding_rs::Encoding; -use pspp::{ - calendar::calendar_offset_to_gregorian, - data::{ByteString, Case, Datum, WithEncoding}, - file::FileType, - format::{DisplayPlain, Type}, - pc::PcFile, - por::PortableFile, - sys::{ReadOptions, WriteOptions, raw::records::Compression}, - util::ToSmallString, - variable::Variable, -}; - -use crate::parse_encoding; - -/// Convert SPSS data files into other formats. -#[derive(Args, Clone, Debug)] -pub struct Convert { - /// Input file name. - input: PathBuf, - - /// Output file name (if omitted, output is written to stdout). - output: Option, - - /// Format for output file (if omitted, the intended format is inferred - /// based on file extension). - #[arg(short = 'O')] - output_format: Option, - - /// The encoding to use for reading the input file. - #[arg(short = 'e', long, value_parser = parse_encoding)] - encoding: Option<&'static Encoding>, - - /// Password for decryption, with or without what SPSS calls "password encryption". - /// - /// Specify only for an encrypted system file. - #[clap(short, long)] - password: Option, - - /// Maximum number of cases to print. - #[arg(short = 'c', long = "cases")] - max_cases: Option, - - #[command(flatten, next_help_heading = "Options for CSV output")] - csv_options: CsvOptions, - - #[command(flatten, next_help_heading = "Options for system file output")] - sys_options: SysOptions, -} - -#[derive(Args, Clone, Debug)] -struct CsvOptions { - /// Omit writing variable names as the first line of output. - #[arg(long)] - no_var_names: bool, - - /// Writes user-missing values like system-missing values. Otherwise, - /// user-missing values are written the same way as non-missing values. - #[arg(long)] - recode: bool, - - /// Write value labels instead of values. - #[arg(long)] - labels: bool, - - /// Use print formats for numeric variables. - #[arg(long)] - print_formats: bool, - - /// Decimal point. - #[arg(long, default_value_t = '.')] - decimal: char, - - /// Delimiter. - /// - /// The default is `,` unless that would be the same as the decimal point, - /// in which case `;` is the default. - #[arg(long)] - delimiter: Option, - - /// Character used to quote the delimiter. - #[arg(long, default_value_t = '"')] - qualifier: char, -} - -impl CsvOptions { - fn write_field( - &self, - datum: &Datum>, - variable: &Variable, - writer: &mut Writer, - ) -> csv::Result<()> - where - W: Write, - { - if self.labels - && let Some(label) = variable.value_labels.get(datum) - { - writer.write_field(label) - } else if datum.is_sysmis() { - writer.write_field(" ") - } else if self.print_formats || datum.is_string() { - writer.write_field( - datum - .display(variable.print_format) - .with_trimming() - .to_small_string::<64>(), - ) - } else { - let number = datum.as_number().unwrap().unwrap(); - match variable.print_format.type_() { - Type::F - | Type::Comma - | Type::Dot - | Type::Dollar - | Type::Pct - | Type::E - | Type::CC(_) - | Type::N - | Type::Z - | Type::P - | Type::PK - | Type::IB - | Type::PIB - | Type::PIBHex - | Type::RB - | Type::RBHex - | Type::WkDay - | Type::Month => writer.write_field( - number - .display_plain() - .with_decimal(self.decimal) - .to_small_string::<64>(), - ), - - Type::Date - | Type::ADate - | Type::EDate - | Type::JDate - | Type::SDate - | Type::QYr - | Type::MoYr - | Type::WkYr => { - if number >= 0.0 - && let Some(date) = - calendar_offset_to_gregorian(number / 60.0 / 60.0 / 24.0) - { - writer.write_field( - format_args!( - "{:02}/{:02}/{:04}", - date.month(), - date.day(), - date.year() - ) - .to_small_string::<64>(), - ) - } else { - writer.write_field(" ") - } - } - - Type::DateTime | Type::YmdHms => { - if number >= 0.0 - && let Some(date) = - calendar_offset_to_gregorian(number / 60.0 / 60.0 / 24.0) - && let Some(time) = NaiveTime::from_num_seconds_from_midnight_opt( - (number % (60.0 * 60.0 * 24.0)) as u32, - 0, - ) - { - writer.write_field( - format_args!( - "{:02}/{:02}/{:04} {:02}:{:02}:{:02}", - date.month(), - date.day(), - date.year(), - time.hour(), - time.minute(), - time.second() - ) - .to_small_string::<64>(), - ) - } else { - writer.write_field(" ") - } - } - - Type::MTime | Type::Time | Type::DTime => { - if let Some(time) = - NaiveTime::from_num_seconds_from_midnight_opt(number.abs() as u32, 0) - { - writer.write_field( - format_args!( - "{}{:02}:{:02}:{:02}", - if number.is_sign_negative() { "-" } else { "" }, - time.hour(), - time.minute(), - time.second() - ) - .to_small_string::<64>(), - ) - } else { - writer.write_field(" ") - } - } - - Type::A | Type::AHex => unreachable!(), - } - } - } -} - -#[derive(Args, Clone, Debug)] -struct SysOptions { - /// Write the output file with Unicode (UTF-8) encoding. - /// - /// If the input was not already encoded in Unicode, this triples the width - /// of string variables. - #[arg(long = "unicode")] - to_unicode: bool, - - /// How to compress data in the system file. - #[arg(long, default_value = "simple")] - compression: Option, -} - -/// Output file format. -#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)] -enum OutputFormat { - /// Comma-separated values using each variable's print format (variable - /// names are written as the first line) - Csv, - - /// System file - Sav, - - /// Portable file - Por, -} - -impl TryFrom<&Path> for OutputFormat { - type Error = AnyError; - - fn try_from(value: &Path) -> std::result::Result { - let extension = value.extension().unwrap_or_default(); - if extension.eq_ignore_ascii_case("csv") || extension.eq_ignore_ascii_case("txt") { - Ok(OutputFormat::Csv) - } else if extension.eq_ignore_ascii_case("sav") || extension.eq_ignore_ascii_case("sys") { - Ok(OutputFormat::Sav) - } else if extension.eq_ignore_ascii_case("por") { - Ok(OutputFormat::Por) - } else { - Err(anyhow!( - "Unknown output file extension '{}'", - extension.display() - )) - } - } -} - -impl Convert { - pub fn run(self) -> Result<()> { - let output_format = match self.output_format { - Some(format) => format, - None => match &self.output { - Some(output) => output.as_path().try_into()?, - _ => OutputFormat::Csv, - }, - }; - - let (dictionary, cases) = match FileType::from_file(&self.input)? { - Some(FileType::System { .. }) => { - fn warn(warning: anyhow::Error) { - eprintln!("warning: {warning}"); - } - - let mut system_file = ReadOptions::new(warn) - .with_encoding(self.encoding) - .with_password(self.password.clone()) - .open_file(&self.input)?; - if output_format == OutputFormat::Sav && self.sys_options.to_unicode { - system_file = system_file.into_unicode(); - } - let (dictionary, _, cases) = system_file.into_parts(); - let cases = cases.map(|result| result.map_err(AnyError::from)); - let cases = Box::new(cases) - as Box>>, AnyError>>>; - (dictionary, cases) - } - Some(FileType::Portable) => { - fn warn_portable(warning: pspp::por::Warning) { - eprintln!("warning: {warning}"); - } - - let portable_file = PortableFile::open_file(&self.input, warn_portable)?; - let (dictionary, _, cases) = portable_file.into_parts(); - let cases = cases.map(|result| result.map_err(AnyError::from)); - let cases = Box::new(cases) - as Box>>, AnyError>>>; - (dictionary, cases) - } - Some(FileType::Pc) => { - fn warn_pc(warning: pspp::pc::Warning) { - eprintln!("warning: {warning}"); - } - - let pc_file = PcFile::open_file(&self.input, warn_pc)?; - let (dictionary, _, cases) = pc_file.into_parts(); - let cases = cases.map(|result| result.map_err(AnyError::from)); - let cases = Box::new(cases) - as Box>>, AnyError>>>; - (dictionary, cases) - } - _ => bail!( - "{}: not a system, portable, or SPSS/PC+ file", - self.input.display() - ), - }; - - // Take only the first `self.max_cases` cases. - let cases = cases.take(self.max_cases.unwrap_or(usize::MAX)); - - match output_format { - OutputFormat::Csv => { - let writer = match self.output { - Some(path) => Box::new(File::create(path)?) as Box, - None => Box::new(stdout()), - }; - let decimal: u8 = self.csv_options.decimal.try_into()?; - let delimiter: u8 = match self.csv_options.delimiter { - Some(delimiter) => delimiter.try_into()?, - None if decimal != b',' => b',', - None => b';', - }; - let qualifier: u8 = self.csv_options.qualifier.try_into()?; - let mut output = csv::WriterBuilder::new() - .delimiter(delimiter) - .quote(qualifier) - .from_writer(writer); - if !self.csv_options.no_var_names { - output - .write_record(dictionary.variables.iter().map(|var| var.name.as_str()))?; - } - - for case in cases { - for (datum, variable) in case?.into_iter().zip(dictionary.variables.iter()) { - self.csv_options - .write_field(&datum, variable, &mut output)?; - } - output.write_record(None::<&[u8]>)?; - } - } - OutputFormat::Sav => { - let Some(output) = &self.output else { - bail!("output file name must be specified for output to a system file") - }; - let mut output = WriteOptions::new() - .with_compression(self.sys_options.compression) - .write_file(&dictionary, output)?; - for case in cases { - output.write_case(case?)?; - } - } - OutputFormat::Por => { - let Some(output) = &self.output else { - bail!("output file name must be specified for output to a portable file") - }; - let mut output = pspp::por::WriteOptions::new().write_file(&dictionary, output)?; - for case in cases { - output.write_case(case?)?; - } - } - } - Ok(()) - } -} diff --git a/rust/pspp/src/decrypt.rs b/rust/pspp/src/decrypt.rs deleted file mode 100644 index 50e0629cae..0000000000 --- a/rust/pspp/src/decrypt.rs +++ /dev/null @@ -1,57 +0,0 @@ -/* PSPP - a program for statistical analysis. - * Copyright (C) 2023 Free Software Foundation, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . */ - -use anyhow::{Result, anyhow}; -use clap::Args; -use pspp::crypto::EncryptedFile; -use std::{fs::File, path::PathBuf}; -use zeroize::Zeroizing; - -/// Decrypts an encrypted SPSS data, output, or syntax file. -#[derive(Args, Clone, Debug)] -pub struct Decrypt { - /// Input file name. - input: PathBuf, - - /// Output file name. - output: PathBuf, - - /// Password for decryption, with or without what SPSS calls "password encryption". - /// - /// If omitted, PSPP will prompt interactively for the password. - #[clap(short, long)] - password: Option, -} - -impl Decrypt { - pub fn run(self) -> Result<()> { - let input = EncryptedFile::new(File::open(&self.input)?)?; - let password = match self.password { - Some(password) => Zeroizing::new(password), - None => { - eprintln!("Please enter the password for {}:", self.input.display()); - readpass::from_tty().unwrap() - } - }; - let mut reader = match input.unlock(password.as_bytes()) { - Ok(reader) => reader, - Err(_) => return Err(anyhow!("Incorrect password.")), - }; - let mut writer = File::create(self.output)?; - std::io::copy(&mut reader, &mut writer)?; - Ok(()) - } -} diff --git a/rust/pspp/src/identify.rs b/rust/pspp/src/identify.rs deleted file mode 100644 index 917e86370b..0000000000 --- a/rust/pspp/src/identify.rs +++ /dev/null @@ -1,46 +0,0 @@ -// PSPP - a program for statistical analysis. -// Copyright (C) 2025 Free Software Foundation, Inc. -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -// details. -// -// You should have received a copy of the GNU General Public License along with -// this program. If not, see . - -use anyhow::Result; -use clap::Args; -use pspp::file::FileType; -use std::path::PathBuf; - -/// Identify the type of a file. -#[derive(Args, Clone, Debug)] -pub struct Identify { - /// File to identify. - file: PathBuf, -} - -impl Identify { - pub fn run(self) -> Result<()> { - match FileType::from_file(&self.file)? { - None => println!("unknown"), - Some(file_type) => { - print!("{}", file_type.as_extension()); - if file_type.is_encrypted() { - print!(" (encrypted)"); - } - if !file_type.is_confident() { - print!(" (low confidence)"); - } - println!(); - } - } - Ok(()) - } -} diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index df4d3715b3..a1f72129d9 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -14,68 +14,12 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -use anyhow::Result; -use clap::{Parser, Subcommand}; -use encoding_rs::Encoding; -use thiserror::Error as ThisError; +use clap::Parser; -use crate::{ - convert::Convert, decrypt::Decrypt, identify::Identify, show::Show, show_pc::ShowPc, - show_por::ShowPor, show_spv::ShowSpv, -}; +use crate::cli::Cli; -mod convert; -mod decrypt; -mod identify; -mod show; -mod show_pc; -mod show_por; -mod show_spv; +mod cli; -/// PSPP, a program for statistical analysis of sampled data. -#[derive(Parser, Debug)] -#[command(author, version, about, long_about = None)] -struct Cli { - #[command(subcommand)] - command: Command, -} - -#[derive(Subcommand, Clone, Debug)] -enum Command { - Convert(Convert), - Decrypt(Decrypt), - Identify(Identify), - Show(Show), - ShowPor(ShowPor), - ShowPc(ShowPc), - ShowSpv(ShowSpv), -} - -impl Command { - fn run(self) -> Result<()> { - match self { - Command::Convert(convert) => convert.run(), - Command::Decrypt(decrypt) => decrypt.run(), - Command::Identify(identify) => identify.run(), - Command::Show(show) => show.run(), - Command::ShowPor(show_por) => show_por.run(), - Command::ShowPc(show_pc) => show_pc.run(), - Command::ShowSpv(show_spv) => show_spv.run(), - } - } -} - -#[derive(ThisError, Debug)] -#[error("{0}: unknown encoding")] -struct UnknownEncodingError(String); - -fn parse_encoding(arg: &str) -> Result<&'static Encoding, UnknownEncodingError> { - match Encoding::for_label_no_replacement(arg.as_bytes()) { - Some(encoding) => Ok(encoding), - None => Err(UnknownEncodingError(arg.to_string())), - } -} - -fn main() -> Result<()> { +fn main() -> anyhow::Result<()> { Cli::parse().command.run() } diff --git a/rust/pspp/src/show.rs b/rust/pspp/src/show.rs deleted file mode 100644 index c83cddde78..0000000000 --- a/rust/pspp/src/show.rs +++ /dev/null @@ -1,363 +0,0 @@ -// PSPP - a program for statistical analysis. -// Copyright (C) 2025 Free Software Foundation, Inc. -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -// details. -// -// You should have received a copy of the GNU General Public License along with -// this program. If not, see . - -use crate::parse_encoding; -use anyhow::{Result, anyhow}; -use clap::{Args, ValueEnum}; -use encoding_rs::Encoding; -use itertools::Itertools; -use pspp::{ - data::cases_to_output, - output::{ - Details, Item, Text, - drivers::{Config, Driver}, - pivot::PivotTable, - }, - sys::{ - Records, - raw::{Decoder, EncodingReport, Magic, Reader, Record, infer_encoding}, - }, -}; -use serde::Serialize; -use std::{ - cell::RefCell, - ffi::OsStr, - fmt::{Display, Write as _}, - fs::File, - io::{BufReader, Write, stdout}, - path::{Path, PathBuf}, - rc::Rc, - sync::Arc, -}; - -/// Show information about SPSS system files. -#[derive(Args, Clone, Debug)] -pub struct Show { - /// What to show. - #[arg(value_enum)] - mode: Mode, - - /// File to show. - #[arg(required = true)] - input: PathBuf, - - /// Output file name. If omitted, output is written to stdout. - output: Option, - - /// The encoding to use. - #[arg(long, value_parser = parse_encoding, help_heading = "Input file options")] - encoding: Option<&'static Encoding>, - - /// Maximum number of cases to read. - /// - /// If specified without an argument, all cases will be read. - #[arg( - long = "data", - num_args = 0..=1, - default_missing_value = "18446744073709551615", - default_value_t = 0, - help_heading = "Input file options" - )] - max_cases: u64, - - /// Output driver configuration options. - #[arg(short = 'o', help_heading = "Output options")] - output_options: Vec, - - /// Output format. - #[arg(long, short = 'f', help_heading = "Output options")] - format: Option, -} - -enum Output { - Driver { - driver: Rc>>, - mode: Mode, - }, - Json { - writer: Rc>>, - pretty: bool, - }, - Discard, -} - -impl Output { - fn show(&self, value: &T) -> Result<()> - where - T: Serialize, - for<'a> &'a T: Into
, - { - match self { - Self::Driver { driver, .. } => { - driver - .borrow_mut() - .write(&Arc::new(value.into().into_item())); - Ok(()) - } - Self::Json { .. } => self.show_json(value), - Self::Discard => Ok(()), - } - } - - fn show_json(&self, value: &T) -> Result<()> - where - T: Serialize, - { - match self { - Self::Driver { mode, driver: _ } => { - Err(anyhow!("Mode '{mode}' only supports output as JSON.")) - } - Self::Json { writer, pretty } => { - let mut writer = writer.borrow_mut(); - match pretty { - true => serde_json::to_writer_pretty(&mut *writer, value)?, - false => serde_json::to_writer(&mut *writer, value)?, - }; - writeln!(writer)?; - Ok(()) - } - Self::Discard => Ok(()), - } - } - - fn warn(&self, warning: &impl Display) { - match self { - Output::Driver { driver, .. } => { - driver - .borrow_mut() - .write(&Arc::new(Item::from(Text::new_log(warning.to_string())))); - } - Output::Json { .. } => { - #[derive(Serialize)] - struct Warning { - warning: String, - } - let warning = Warning { - warning: warning.to_string(), - }; - let _ = self.show_json(&warning); - } - Self::Discard => (), - } - } -} - -impl Show { - pub fn run(self) -> Result<()> { - let format = if let Some(format) = self.format { - format - } else if let Some(output_file) = &self.output { - match output_file - .extension() - .unwrap_or(OsStr::new("")) - .to_str() - .unwrap_or("") - { - "json" => ShowFormat::Json, - "ndjson" => ShowFormat::Ndjson, - _ => ShowFormat::Output, - } - } else { - ShowFormat::Json - }; - - let output = match format { - ShowFormat::Output => { - let mut config = String::new(); - - if let Some(file) = &self.output { - #[derive(Serialize)] - struct File<'a> { - file: &'a Path, - } - let file = File { - file: file.as_path(), - }; - let toml_file = toml::to_string_pretty(&file).unwrap(); - config.push_str(&toml_file); - } - for option in &self.output_options { - writeln!(&mut config, "{option}").unwrap(); - } - - let table: toml::Table = toml::from_str(&config)?; - if !table.contains_key("driver") { - let driver = if let Some(file) = &self.output { - ::driver_type_from_filename(file).ok_or_else(|| { - anyhow!("{}: no default output format for file name", file.display()) - })? - } else { - "text" - }; - - #[derive(Serialize)] - struct DriverConfig { - driver: &'static str, - } - config.insert_str( - 0, - &toml::to_string_pretty(&DriverConfig { driver }).unwrap(), - ); - } - - let config: Config = toml::from_str(&config)?; - Output::Driver { - mode: self.mode, - driver: Rc::new(RefCell::new(Box::new(::new(&config)?))), - } - } - ShowFormat::Json | ShowFormat::Ndjson => Output::Json { - pretty: format == ShowFormat::Json, - writer: if let Some(output_file) = &self.output { - Rc::new(RefCell::new(Box::new(File::create(output_file)?))) - } else { - Rc::new(RefCell::new(Box::new(stdout()))) - }, - }, - ShowFormat::Discard => Output::Discard, - }; - - let reader = File::open(&self.input)?; - let reader = BufReader::new(reader); - let mut reader = Reader::new(reader, Box::new(|warning| output.warn(&warning)))?; - - match self.mode { - Mode::Identity => { - match reader.header().magic { - Magic::Sav => println!("SPSS System File"), - Magic::Zsav => println!("SPSS System File with Zlib compression"), - Magic::Ebcdic => println!("EBCDIC-encoded SPSS System File"), - } - return Ok(()); - } - Mode::Raw => { - output.show_json(reader.header())?; - for record in reader.records() { - output.show_json(&record?)?; - } - for (_index, case) in (0..self.max_cases).zip(reader.cases()) { - output.show_json(&case?)?; - } - } - Mode::Decoded => { - let records: Vec = reader.records().collect::, _>>()?; - let encoding = match self.encoding { - Some(encoding) => encoding, - None => infer_encoding(&records, &mut |e| output.warn(&e))?, - }; - let mut decoder = Decoder::new(encoding, |e| output.warn(&e)); - for record in records { - output.show_json(&record.decode(&mut decoder))?; - } - } - Mode::Dictionary => { - let records: Vec = reader.records().collect::, _>>()?; - let encoding = match self.encoding { - Some(encoding) => encoding, - None => infer_encoding(&records, &mut |e| output.warn(&e))?, - }; - let mut decoder = Decoder::new(encoding, |e| output.warn(&e)); - let records = Records::from_raw(records, &mut decoder); - let (dictionary, metadata, cases) = records - .decode( - reader.header().clone().decode(&mut decoder), - reader.cases(), - encoding, - |e| output.warn(&e), - ) - .into_parts(); - match &output { - Output::Driver { driver, mode: _ } => { - let mut output = Vec::new(); - output.push(PivotTable::from(&metadata).into()); - output.extend(dictionary.all_pivot_tables().into_iter().map_into()); - output.extend(cases_to_output(&dictionary, cases)); - driver - .borrow_mut() - .write(&Arc::new(output.into_iter().collect())); - } - Output::Json { .. } => { - output.show_json(&dictionary)?; - output.show_json(&metadata)?; - for (_index, case) in (0..self.max_cases).zip(cases) { - output.show_json(&case?)?; - } - } - Output::Discard => (), - } - } - Mode::Encodings => { - let encoding_report = EncodingReport::new(reader, self.max_cases)?; - output.show(&encoding_report)?; - } - } - - Ok(()) - } -} - -/// What to show in a system file. -#[derive(Clone, Copy, Debug, Default, PartialEq, ValueEnum)] -enum Mode { - /// The kind of file. - Identity, - - /// File dictionary, with variables, value labels, attributes, ... - #[default] - #[value(alias = "dict")] - Dictionary, - - /// Possible encodings of text in file dictionary and (with `--data`) cases. - Encodings, - - /// Raw file records, without assuming a particular character encoding. - Raw, - - /// Raw file records decoded with a particular character encoding. - Decoded, -} - -impl Mode { - fn as_str(&self) -> &'static str { - match self { - Mode::Dictionary => "dictionary", - Mode::Identity => "identity", - Mode::Raw => "raw", - Mode::Decoded => "decoded", - Mode::Encodings => "encodings", - } - } -} - -impl Display for Mode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -#[derive(Clone, Copy, Debug, Default, PartialEq, Serialize, ValueEnum)] -#[serde(rename_all = "snake_case")] -enum ShowFormat { - /// Pretty-printed JSON. - #[default] - Json, - /// Newline-delimited JSON. - Ndjson, - /// Pivot tables. - Output, - /// No output. - Discard, -} diff --git a/rust/pspp/src/show_pc.rs b/rust/pspp/src/show_pc.rs deleted file mode 100644 index 19dded7002..0000000000 --- a/rust/pspp/src/show_pc.rs +++ /dev/null @@ -1,294 +0,0 @@ -// PSPP - a program for statistical analysis. -// Copyright (C) 2025 Free Software Foundation, Inc. -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -// details. -// -// You should have received a copy of the GNU General Public License along with -// this program. If not, see . - -use anyhow::{Result, anyhow}; -use clap::{Args, ValueEnum}; -use itertools::Itertools; -use pspp::{ - data::cases_to_output, - output::{ - Item, Text, - drivers::{Config, Driver}, - pivot::PivotTable, - }, - pc::PcFile, -}; -use serde::Serialize; -use std::{ - cell::RefCell, - ffi::OsStr, - fmt::{Display, Write as _}, - fs::File, - io::{BufReader, Write, stdout}, - path::{Path, PathBuf}, - rc::Rc, - sync::Arc, -}; - -/// Show information about SPSS/PC+ data files. -#[derive(Args, Clone, Debug)] -pub struct ShowPc { - /// What to show. - #[arg(value_enum)] - mode: Mode, - - /// File to show. - #[arg(required = true)] - input: PathBuf, - - /// Output file name. If omitted, output is written to stdout. - output: Option, - - /// Maximum number of cases to read. - /// - /// If specified without an argument, all cases will be read. - #[arg( - long = "data", - num_args = 0..=1, - default_missing_value = "18446744073709551615", - default_value_t = 0, - help_heading = "Input file options" - )] - max_cases: usize, - - /// Output driver configuration options. - #[arg(short = 'o', help_heading = "Output options")] - output_options: Vec, - - /// Output format. - #[arg(long, short = 'f', help_heading = "Output options")] - format: Option, -} - -enum Output { - Driver { - driver: Rc>>, - mode: Mode, - }, - Json { - writer: Rc>>, - pretty: bool, - }, - Discard, -} - -impl Output { - fn show_json(&self, value: &T) -> Result<()> - where - T: Serialize, - { - match self { - Self::Driver { mode, driver: _ } => { - Err(anyhow!("Mode '{mode}' only supports output as JSON.")) - } - Self::Json { writer, pretty } => { - let mut writer = writer.borrow_mut(); - match pretty { - true => serde_json::to_writer_pretty(&mut *writer, value)?, - false => serde_json::to_writer(&mut *writer, value)?, - }; - writeln!(writer)?; - Ok(()) - } - Self::Discard => Ok(()), - } - } - - fn warn(&self, warning: &impl Display) { - match self { - Output::Driver { driver, .. } => { - driver - .borrow_mut() - .write(&Arc::new(Item::from(Text::new_log(warning.to_string())))); - } - Output::Json { .. } => { - #[derive(Serialize)] - struct Warning { - warning: String, - } - let warning = Warning { - warning: warning.to_string(), - }; - let _ = self.show_json(&warning); - } - Self::Discard => (), - } - } -} - -impl ShowPc { - pub fn run(self) -> Result<()> { - let format = if let Some(format) = self.format { - format - } else if let Some(output_file) = &self.output { - match output_file - .extension() - .unwrap_or(OsStr::new("")) - .to_str() - .unwrap_or("") - { - "json" => ShowFormat::Json, - "ndjson" => ShowFormat::Ndjson, - _ => ShowFormat::Output, - } - } else { - ShowFormat::Json - }; - - let output = match format { - ShowFormat::Output => { - let mut config = String::new(); - - if let Some(file) = &self.output { - #[derive(Serialize)] - struct File<'a> { - file: &'a Path, - } - let file = File { - file: file.as_path(), - }; - let toml_file = toml::to_string_pretty(&file).unwrap(); - config.push_str(&toml_file); - } - for option in &self.output_options { - writeln!(&mut config, "{option}").unwrap(); - } - - let table: toml::Table = toml::from_str(&config)?; - if !table.contains_key("driver") { - let driver = if let Some(file) = &self.output { - ::driver_type_from_filename(file).ok_or_else(|| { - anyhow!("{}: no default output format for file name", file.display()) - })? - } else { - "text" - }; - - #[derive(Serialize)] - struct DriverConfig { - driver: &'static str, - } - config.insert_str( - 0, - &toml::to_string_pretty(&DriverConfig { driver }).unwrap(), - ); - } - - let config: Config = toml::from_str(&config)?; - Output::Driver { - mode: self.mode, - driver: Rc::new(RefCell::new(Box::new(::new(&config)?))), - } - } - ShowFormat::Json | ShowFormat::Ndjson => Output::Json { - pretty: format == ShowFormat::Json, - writer: if let Some(output_file) = &self.output { - Rc::new(RefCell::new(Box::new(File::create(output_file)?))) - } else { - Rc::new(RefCell::new(Box::new(stdout()))) - }, - }, - ShowFormat::Discard => Output::Discard, - }; - - let reader = BufReader::new(File::open(&self.input)?); - match self.mode { - Mode::Dictionary => { - let PcFile { - dictionary, - metadata: _, - cases, - } = PcFile::open(reader, |warning| output.warn(&warning))?; - let cases = cases.take(self.max_cases); - - match &output { - Output::Driver { driver, mode: _ } => { - let mut output = Vec::new(); - output.extend(dictionary.all_pivot_tables().into_iter().map_into()); - output.extend(cases_to_output(&dictionary, cases)); - driver - .borrow_mut() - .write(&Arc::new(output.into_iter().collect())); - } - Output::Json { .. } => { - output.show_json(&dictionary)?; - for (_index, case) in (0..self.max_cases).zip(cases) { - output.show_json(&case?)?; - } - } - Output::Discard => (), - } - } - Mode::Metadata => { - let metadata = PcFile::open(reader, |warning| output.warn(&warning))?.metadata; - - match &output { - Output::Driver { driver, mode: _ } => { - driver - .borrow_mut() - .write(&Arc::new(PivotTable::from(&metadata).into())); - } - Output::Json { .. } => { - output.show_json(&metadata)?; - } - Output::Discard => (), - } - } - } - Ok(()) - } -} - -/// What to show in a system file. -#[derive(Clone, Copy, Debug, Default, PartialEq, ValueEnum)] -enum Mode { - /// File dictionary, with variables, value labels, ... - #[default] - #[value(alias = "dict")] - Dictionary, - - /// File metadata not included in the dictionary. - Metadata, -} - -impl Mode { - fn as_str(&self) -> &'static str { - match self { - Mode::Dictionary => "dictionary", - Mode::Metadata => "metadata", - } - } -} - -impl Display for Mode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -#[derive(Clone, Copy, Debug, Default, PartialEq, Serialize, ValueEnum)] -#[serde(rename_all = "snake_case")] -enum ShowFormat { - /// Pretty-printed JSON. - #[default] - Json, - /// Newline-delimited JSON. - Ndjson, - /// Pivot tables. - Output, - /// No output. - Discard, -} diff --git a/rust/pspp/src/show_por.rs b/rust/pspp/src/show_por.rs deleted file mode 100644 index bac7c42884..0000000000 --- a/rust/pspp/src/show_por.rs +++ /dev/null @@ -1,321 +0,0 @@ -// PSPP - a program for statistical analysis. -// Copyright (C) 2025 Free Software Foundation, Inc. -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -// details. -// -// You should have received a copy of the GNU General Public License along with -// this program. If not, see . - -use anyhow::{Result, anyhow}; -use clap::{Args, ValueEnum}; -use itertools::Itertools; -use pspp::{ - data::cases_to_output, - output::{ - Item, Text, - drivers::{Config, Driver}, - pivot::PivotTable, - }, - por::PortableFile, -}; -use serde::Serialize; -use std::{ - cell::RefCell, - ffi::OsStr, - fmt::{Display, Write as _}, - fs::File, - io::{BufReader, Write, stdout}, - path::{Path, PathBuf}, - rc::Rc, - sync::Arc, -}; - -/// Show information about SPSS portable files. -#[derive(Args, Clone, Debug)] -pub struct ShowPor { - /// What to show. - #[arg(value_enum)] - mode: Mode, - - /// File to show. - #[arg(required = true)] - input: PathBuf, - - /// Output file name. If omitted, output is written to stdout. - output: Option, - - /// Maximum number of cases to read. - /// - /// If specified without an argument, all cases will be read. - #[arg( - long = "data", - num_args = 0..=1, - default_missing_value = "18446744073709551615", - default_value_t = 0, - help_heading = "Input file options" - )] - max_cases: usize, - - /// Output driver configuration options. - #[arg(short = 'o', help_heading = "Output options")] - output_options: Vec, - - /// Output format. - #[arg(long, short = 'f', help_heading = "Output options")] - format: Option, -} - -enum Output { - Driver { - driver: Rc>>, - mode: Mode, - }, - Json { - writer: Rc>>, - pretty: bool, - }, - Discard, -} - -impl Output { - fn show_json(&self, value: &T) -> Result<()> - where - T: Serialize, - { - match self { - Self::Driver { mode, driver: _ } => { - Err(anyhow!("Mode '{mode}' only supports output as JSON.")) - } - Self::Json { writer, pretty } => { - let mut writer = writer.borrow_mut(); - match pretty { - true => serde_json::to_writer_pretty(&mut *writer, value)?, - false => serde_json::to_writer(&mut *writer, value)?, - }; - writeln!(writer)?; - Ok(()) - } - Self::Discard => Ok(()), - } - } - - fn warn(&self, warning: &impl Display) { - match self { - Output::Driver { driver, .. } => { - driver - .borrow_mut() - .write(&Arc::new(Item::from(Text::new_log(warning.to_string())))); - } - Output::Json { .. } => { - #[derive(Serialize)] - struct Warning { - warning: String, - } - let warning = Warning { - warning: warning.to_string(), - }; - let _ = self.show_json(&warning); - } - Self::Discard => (), - } - } -} - -impl ShowPor { - pub fn run(self) -> Result<()> { - let format = if let Some(format) = self.format { - format - } else if let Some(output_file) = &self.output { - match output_file - .extension() - .unwrap_or(OsStr::new("")) - .to_str() - .unwrap_or("") - { - "json" => ShowFormat::Json, - "ndjson" => ShowFormat::Ndjson, - _ => ShowFormat::Output, - } - } else { - ShowFormat::Json - }; - - let output = match format { - ShowFormat::Output => { - let mut config = String::new(); - - if let Some(file) = &self.output { - #[derive(Serialize)] - struct File<'a> { - file: &'a Path, - } - let file = File { - file: file.as_path(), - }; - let toml_file = toml::to_string_pretty(&file).unwrap(); - config.push_str(&toml_file); - } - for option in &self.output_options { - writeln!(&mut config, "{option}").unwrap(); - } - - let table: toml::Table = toml::from_str(&config)?; - if !table.contains_key("driver") { - let driver = if let Some(file) = &self.output { - ::driver_type_from_filename(file).ok_or_else(|| { - anyhow!("{}: no default output format for file name", file.display()) - })? - } else { - "text" - }; - - #[derive(Serialize)] - struct DriverConfig { - driver: &'static str, - } - config.insert_str( - 0, - &toml::to_string_pretty(&DriverConfig { driver }).unwrap(), - ); - } - - let config: Config = toml::from_str(&config)?; - Output::Driver { - mode: self.mode, - driver: Rc::new(RefCell::new(Box::new(::new(&config)?))), - } - } - ShowFormat::Json | ShowFormat::Ndjson => Output::Json { - pretty: format == ShowFormat::Json, - writer: if let Some(output_file) = &self.output { - Rc::new(RefCell::new(Box::new(File::create(output_file)?))) - } else { - Rc::new(RefCell::new(Box::new(stdout()))) - }, - }, - ShowFormat::Discard => Output::Discard, - }; - - let reader = BufReader::new(File::open(&self.input)?); - match self.mode { - Mode::Dictionary => { - let PortableFile { - dictionary, - metadata: _, - cases, - } = PortableFile::open(reader, |warning| output.warn(&warning))?; - let cases = cases.take(self.max_cases); - - match &output { - Output::Driver { driver, mode: _ } => { - let mut output = Vec::new(); - output.extend(dictionary.all_pivot_tables().into_iter().map_into()); - output.extend(cases_to_output(&dictionary, cases)); - driver - .borrow_mut() - .write(&Arc::new(output.into_iter().collect())); - } - Output::Json { .. } => { - output.show_json(&dictionary)?; - for (_index, case) in (0..self.max_cases).zip(cases) { - output.show_json(&case?)?; - } - } - Output::Discard => (), - } - } - Mode::Metadata => { - let metadata = - PortableFile::open(reader, |warning| output.warn(&warning))?.metadata; - - match &output { - Output::Driver { driver, mode: _ } => { - driver - .borrow_mut() - .write(&Arc::new(PivotTable::from(&metadata).into())); - } - Output::Json { .. } => { - output.show_json(&metadata)?; - } - Output::Discard => (), - } - } - Mode::Histogram => { - let (histogram, translations) = PortableFile::read_histogram(reader)?; - let h = histogram - .into_iter() - .enumerate() - .filter_map(|(index, count)| { - if count > 0 - && index != translations[index as u8] as usize - && translations[index as u8] != 0 - { - Some(( - format!("{index:02x}"), - translations[index as u8] as char, - count, - )) - } else { - None - } - }) - .collect::>(); - output.show_json(&h)?; - } - } - Ok(()) - } -} - -/// What to show in a system file. -#[derive(Clone, Copy, Debug, Default, PartialEq, ValueEnum)] -enum Mode { - /// File dictionary, with variables, value labels, ... - #[default] - #[value(alias = "dict")] - Dictionary, - - /// File metadata not included in the dictionary. - Metadata, - - /// Histogram of character incidence in the file. - Histogram, -} - -impl Mode { - fn as_str(&self) -> &'static str { - match self { - Mode::Dictionary => "dictionary", - Mode::Metadata => "metadata", - Mode::Histogram => "histogram", - } - } -} - -impl Display for Mode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -#[derive(Clone, Copy, Debug, Default, PartialEq, Serialize, ValueEnum)] -#[serde(rename_all = "snake_case")] -enum ShowFormat { - /// Pretty-printed JSON. - #[default] - Json, - /// Newline-delimited JSON. - Ndjson, - /// Pivot tables. - Output, - /// No output. - Discard, -} diff --git a/rust/pspp/src/show_spv.rs b/rust/pspp/src/show_spv.rs deleted file mode 100644 index fa399f9067..0000000000 --- a/rust/pspp/src/show_spv.rs +++ /dev/null @@ -1,139 +0,0 @@ -// PSPP - a program for statistical analysis. -// Copyright (C) 2025 Free Software Foundation, Inc. -// -// This program is free software: you can redistribute it and/or modify it under -// the terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -// details. -// -// You should have received a copy of the GNU General Public License along with -// this program. If not, see . - -use anyhow::Result; -use clap::{Args, ValueEnum}; -use pspp::output::{Criteria, Item}; -use std::{fmt::Display, path::PathBuf}; - -/// Show information about SPSS viewer files (SPV files). -#[derive(Args, Clone, Debug)] -pub struct ShowSpv { - /// What to show. - #[arg(value_enum)] - mode: Mode, - - /// File to show. - /// - /// For most modes, this should be a `.spv` file. For `convert-table-look`, - /// this should be a `.tlo` or `.stt` file. - #[arg(required = true)] - input: PathBuf, - - /// Input selection options. - #[command(flatten)] - criteria: Criteria, - - /// Include ZIP member names in `dir` output. - #[arg(long = "member-names")] - show_member_names: bool, -} - -/// What to show in a viewer file. -#[derive(Clone, Copy, Debug, PartialEq, ValueEnum)] -enum Mode { - /// List tables and other items. - #[value(alias = "dir")] - Directory, - - /// Copies first selected TableLook into output in `.stt` format. - GetTableLook, - - /// Reads `.tlo` or `.stt` TableLook and outputs as `.stt` format. - ConvertTableLook, - - /// Prints contents. - View, -} - -impl Mode { - fn as_str(&self) -> &'static str { - match self { - Mode::Directory => "directory", - Mode::GetTableLook => "get-table-look", - Mode::ConvertTableLook => "convert-table-look", - Mode::View => "view", - } - } -} - -impl Display for Mode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -impl ShowSpv { - pub fn run(self) -> Result<()> { - match self.mode { - Mode::Directory => { - let item = Item::from_spv_file(&self.input)?.0; - let item = self.criteria.apply(item); - for child in item.details.children() { - print_item_directory(&child, 0, self.show_member_names); - } - Ok(()) - } - Mode::View => { - let item = Item::from_spv_file(&self.input)?.0; - let item = self.criteria.apply(item); - for child in item.details.children() { - println!("{child}"); - } - Ok(()) - } - Mode::GetTableLook => todo!(), - Mode::ConvertTableLook => todo!(), - } - } -} - -fn print_item_directory(item: &Item, level: usize, show_member_names: bool) { - for _ in 0..level { - print!(" "); - } - print!("- {} {:?}", item.details.kind(), item.label()); - if let Some(table) = item.details.as_table() { - let title = table.title().display(table).to_string(); - if item.label.as_ref().is_none_or(|label| label != &title) { - print!(" title {title:?}"); - } - } - if let Some(command_name) = &item.command_name { - print!(" command {command_name:?}"); - } - if let Some(subtype) = item.subtype() - && item.label.as_ref().is_none_or(|label| label != &subtype) - { - print!(" subtype {subtype:?}"); - } - if !item.show { - if item.details.is_heading() { - print!(" (collapsed)"); - } else { - print!(" (hidden)"); - } - } - if show_member_names && let Some(spv_info) = &item.spv_info { - for (index, name) in spv_info.member_names().into_iter().enumerate() { - print!(" {} {name:?}", if index == 0 { "in" } else { "and" }); - } - } - println!(); - for child in item.details.children() { - print_item_directory(&child, level + 1, show_member_names); - } -}