From 22731daece46e848324b02d45e4743b708a5160f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 2 Sep 2025 14:56:23 -0700 Subject: [PATCH] rust: Improve documentation for `pspp decrypt` and `pspp show`. Also, update `pspp show` implementation to match. --- rust/doc/src/invoking/pspp-convert.md | 15 ++-- rust/doc/src/invoking/pspp-decrypt.md | 12 +++- rust/doc/src/invoking/pspp-show.md | 98 +++++++++++++++++++++++++++ rust/pspp/src/show.rs | 43 ++++++------ 4 files changed, 140 insertions(+), 28 deletions(-) diff --git a/rust/doc/src/invoking/pspp-convert.md b/rust/doc/src/invoking/pspp-convert.md index 42055240a2..a1d33a605b 100644 --- a/rust/doc/src/invoking/pspp-convert.md +++ b/rust/doc/src/invoking/pspp-convert.md @@ -1,8 +1,15 @@ # Converting data files with `pspp convert` -`pspp convert [OUTPUT]` reads an SPSS data file from `` -and writes a copy of it to `[OUTPUT]` (or to the terminal, if -`[OUTPUT]` is omitted). +The `pspp convert` command reads data from one file and writes it to +another. The basic syntax is: + +``` +pspp convert [OUTPUT] +``` + +which reads an SPSS data file from `` and writes a copy of it +to `[OUTPUT]`. If `[OUTPUT]` is omitted, output is written to the +terminal. If `[OUTPUT]` is specified, then `pspp convert` tries to guess the output format based on its extension: @@ -38,7 +45,7 @@ for unrecognized extensions. `` must be one of the labels for encodings in the [Encoding Standard]. PSPP does not support UTF-16 or EBCDIC - encodings data files. + encodings in data files. `pspp show encodings` can help figure out the correct encoding for a system file. diff --git a/rust/doc/src/invoking/pspp-decrypt.md b/rust/doc/src/invoking/pspp-decrypt.md index 8d17c020e6..1331e6f5df 100644 --- a/rust/doc/src/invoking/pspp-decrypt.md +++ b/rust/doc/src/invoking/pspp-decrypt.md @@ -1,8 +1,14 @@ # Decrypting SPSS files with `pspp decrypt` -SPSS supports encryption using a password for data, viewer, and syntax -files. `pspp decrypt ` reads an encrypted file -`` and writes an equivalent plaintext file ``. +The `pspp decrypt` command reads an encrypted SPSS file and writes out +an equivalent plaintext file. The basic syntax is: + +``` +pspp decrypt +``` + +which reads an encrypted SPSS data, viewer, or syntax file ``, +decrypts it, and writes the decrypted version to ``. Other commands, such as [`pspp convert`](pspp-convert.md), can also read encrypted files directly. diff --git a/rust/doc/src/invoking/pspp-show.md b/rust/doc/src/invoking/pspp-show.md index 65319beb06..f162a4b4ef 100644 --- a/rust/doc/src/invoking/pspp-show.md +++ b/rust/doc/src/invoking/pspp-show.md @@ -1 +1,99 @@ # Inspecting data files with `pspp show` + +The `pspp show` command reads an SPSS data file and produces a report. +The basic syntax is: + +``` +pspp show [OUTPUT] +``` + +where `` is a mode of operation (see below), `` is the +SPSS data file to read, and `[OUTPUT]` is the output file name. If +`[OUTPUT]` is omitted, output is written to the terminal. + +The following ``s are available: + +* `identify`: Outputs a line of text to stdout that identifies the + basic kind of system file. + +* `dictionary`: Outputs the file dictionary in detail, including + variables, value labels, attributes, documents, and so on. With + `--data`, also outputs cases from the system file. + + This can be useful as an alternative to PSPP syntax commands such as + [`SYSFILE INFO`](../commands/spss-io/sysfile-info.md) or [`DISPLAY + DICTIONARY`](../commands/variables/display.md). + + [`pspp convert`](pspp-convert.md) is a better way to convert a + system file to another format. + +* `encodings`: Analyzes text data in the system file dictionary and + (with `--data`) cases and produces a report that can help the user + to figure out what character encoding the file uses. + + This is useful for old system files that don't identify their own + encodings. + +* `raw`: Outputs the raw structure of the system file dictionary and + (with `--data`) cases. This command does not assume a particular + character encoding for the system file, which means that some of the + dictionary can't be printed in detail, only in summary. + + This is useful for debugging how PSPP reads system files and for + investigating cases of system file corruption, especially when the + character encoding is unknown or uncertain. + +* `decoded`: Outputs the raw structure of the system file dictionary + and (with `--data`) cases. Versus `raw`, this command does decode + the dictionary and data with a particular character encoding, which + allows it to fully interpret system file records. + + This is useful for debugging how PSPP reads system files and for + investigating cases of system file corruption. + +## Options + +The following options affect how `pspp show` reads ``: + +* `--encoding ` + For modes `decoded` and `dictionary`, this reads the input file + using the specified ``, overriding the default. + + `` must be one of the labels for encodings in the + [Encoding Standard]. PSPP does not support UTF-16 or EBCDIC + encodings in data files. + + `pspp show encodings` can help figure out the correct encoding for a + system file. + + [Encoding Standard]: https://encoding.spec.whatwg.org/#names-and-labels + +* `--data []` + For modes `raw`, `dictionary`, and `encodings`, this instructs `pspp + show` to read cases from the file. If `` is given, then + that sets a limit on the number of cases to read. Without this + option, PSPP will not read any cases. + +The following options affect how `pspp show` writes its output: + +* `-f ` + `--format ` + Specifies the format to use for output. `` may be one of + the following: + + - `json`: JSON using indentation and spaces for easy human + consumption. + - `ndjson`: [Newline-delimited JSON]. + - `output`: Pivot tables with the PSPP output engine. Use `-o` for + additional configuration. + - `discard`: Do not produce any output. + + When these options are not used, the default output format is chosen + based on the `[OUTPUT]` extension. If `[OUTPUT]` is not specified, + then output defaults to JSON. + + [Newline-delimited JSON]: https://github.com/ndjson/ndjson-spec + +* `-o ` + Adds `` to the output engine configuration. + diff --git a/rust/pspp/src/show.rs b/rust/pspp/src/show.rs index aed88d263e..9e699d623e 100644 --- a/rust/pspp/src/show.rs +++ b/rust/pspp/src/show.rs @@ -50,14 +50,14 @@ pub struct Show { /// File to show. #[arg(required = true)] - input_file: PathBuf, + input: PathBuf, /// Output file name. If omitted, output is written to stdout. - output_file: Option, + output: Option, - /// Output driver configuration options. - #[arg(short = 'o')] - output_options: Vec, + /// The encoding to use. + #[arg(long, value_parser = parse_encoding, help_heading = "Input file options")] + encoding: Option<&'static Encoding>, /// Maximum number of cases to read. /// @@ -66,17 +66,18 @@ pub struct Show { long = "data", num_args = 0..=1, default_missing_value = "18446744073709551615", - default_value_t = 0 + default_value_t = 0, + help_heading = "Input file options" )] max_cases: u64, + /// Output driver configuration options. + #[arg(short = 'o', help_heading = "Output options")] + output_options: Vec, + /// Output format. - #[arg(long, short = 'f')] + #[arg(long, short = 'f', help_heading = "Output options")] format: Option, - - /// The encoding to use. - #[arg(long, value_parser = parse_encoding)] - encoding: Option<&'static Encoding>, } enum Output { @@ -170,7 +171,7 @@ impl Show { pub fn run(self) -> Result<()> { let format = if let Some(format) = self.format { format - } else if let Some(output_file) = &self.output_file { + } else if let Some(output_file) = &self.output { match output_file .extension() .unwrap_or(OsStr::new("")) @@ -189,7 +190,7 @@ impl Show { ShowFormat::Output => { let mut config = String::new(); - if let Some(file) = &self.output_file { + if let Some(file) = &self.output { #[derive(Serialize)] struct File<'a> { file: &'a Path, @@ -206,7 +207,7 @@ impl Show { let table: toml::Table = toml::from_str(&config)?; if !table.contains_key("driver") { - let driver = if let Some(file) = &self.output_file { + let driver = if let Some(file) = &self.output { ::driver_type_from_filename(file).ok_or_else(|| { anyhow!("{}: no default output format for file name", file.display()) })? @@ -232,7 +233,7 @@ impl Show { } ShowFormat::Json | ShowFormat::Ndjson => Output::Json { pretty: format == ShowFormat::Json, - writer: if let Some(output_file) = &self.output_file { + writer: if let Some(output_file) = &self.output { Rc::new(RefCell::new(Box::new(File::create(output_file)?))) } else { Rc::new(RefCell::new(Box::new(stdout()))) @@ -241,7 +242,7 @@ impl Show { ShowFormat::Discard => Output::Discard, }; - let reader = File::open(&self.input_file)?; + let reader = File::open(&self.input)?; let reader = BufReader::new(reader); let mut reader = Reader::new(reader, Box::new(|warning| output.warn(&warning)))?; @@ -328,17 +329,17 @@ impl Show { /// What to show in a system file. #[derive(Clone, Copy, Debug, Default, PartialEq, ValueEnum)] enum Mode { - /// The file dictionary, including variables, value labels, attributes, and so on. + /// The kind of file. + Identity, + + /// File dictionary, with variables, value labels, attributes, ... #[default] #[value(alias = "dict")] Dictionary, - /// Possible encodings of text in the file dictionary and (with `--data`) cases. + /// Possible encodings of text in file dictionary and (with `--data`) cases. Encodings, - /// The kind of file. - Identity, - /// Raw file records, without assuming a particular character encoding. Raw, -- 2.30.2