* along with this program. If not, see <http://www.gnu.org/licenses/>. */
use anyhow::Result;
-use clap::{Parser, ValueEnum};
+use clap::{Args, Parser, Subcommand, ValueEnum};
use encoding_rs::Encoding;
-use pspp::sys::cooked::{decode, Headers};
-use pspp::sys::raw::{encoding_from_headers, Decoder, Magic, Reader, Record};
+use pspp::sys::cooked::{decode, Error, Headers};
+use pspp::sys::raw::{encoding_from_headers, Decoder, Magic, Reader, Record, Warning};
use std::fs::File;
-use std::io::BufReader;
+use std::io::{stdout, BufReader, Write};
use std::path::{Path, PathBuf};
use std::str;
use thiserror::Error as ThisError;
-/// A utility to dissect SPSS system files.
+/// PSPP, a program for statistical analysis of sampled data.
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
-struct Args {
+struct Cli {
+ #[command(subcommand)]
+ command: Command,
+}
+
+/// Output file format.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)]
+enum OutputFormat {
+ /// Comma-separated values using each variable's print format (variable
+ /// names are written as the first line)
+ Csv,
+
+ /// SPSS system file.
+ Sav,
+}
+
+/// Convert SPSS data files into other formats.
+#[derive(Args, Clone, Debug)]
+struct Convert {
+ /// Input file name.
+ input: PathBuf,
+
+ /// Output file name (if omitted, output is written to stdout).
+ output: Option<PathBuf>,
+
+ /// Format for output file (if omitted, the intended format is inferred
+ /// based on file extension).
+ output_format: Option<OutputFormat>,
+
+ /// The encoding to use.
+ #[arg(long, value_parser = parse_encoding)]
+ encoding: Option<&'static Encoding>,
+}
+
+impl Convert {
+ fn warn(warning: Warning) {
+ eprintln!("warning: {warning}");
+ }
+
+ fn err(error: Error) {
+ eprintln!("error: {error}");
+ }
+
+ fn run(self) -> Result<()> {
+ let input = BufReader::new(File::open(&self.input)?);
+ let mut reader = Reader::new(input, Self::warn)?;
+ let headers = reader.headers().collect::<Result<Vec<_>, _>>()?;
+ let cases = reader.cases();
+ let encoding = encoding_from_headers(&headers, &mut |w| Self::warn(w))?;
+ let mut decoder = Decoder::new(encoding, |w| Self::warn(w));
+ let mut decoded_records = Vec::new();
+ for header in headers {
+ decoded_records.push(header.decode(&mut decoder)?);
+ }
+ drop(decoder);
+
+ let headers = Headers::new(decoded_records, &mut |e| Self::err(e))?;
+ let (dictionary, metadata, cases) = decode(headers, cases, encoding, |e| Self::err(e))?;
+ let writer = match self.output {
+ Some(path) => Box::new(File::create(path)?) as Box<dyn Write>,
+ None => Box::new(stdout()),
+ };
+ let mut output = csv::WriterBuilder::new().from_writer(writer);
+ output.write_record(dictionary.variables.iter().map(|var| var.name.as_str()))?;
+
+ if let Some(cases) = cases {
+ for case in cases {
+ output.write_record(case?.into_iter().zip(dictionary.variables.iter()).map(
+ |(datum, variable)| {
+ datum
+ .display(variable.print_format, variable.encoding)
+ .to_string()
+ },
+ ))?;
+ }
+ }
+ Ok(())
+ }
+}
+
+/// Dissects SPSS system files.
+#[derive(Args, Clone, Debug)]
+struct Dissect {
/// Maximum number of cases to print.
#[arg(long = "data", default_value_t = 0)]
max_cases: u64,
encoding: Option<&'static Encoding>,
}
+impl Dissect {
+ fn run(self) -> Result<()> {
+ for file in self.files {
+ dissect(&file, self.max_cases, self.mode, self.encoding)?;
+ }
+ Ok(())
+ }
+}
+
+#[derive(Subcommand, Clone, Debug)]
+enum Command {
+ Convert(Convert),
+ Dissect(Dissect),
+}
+
+impl Command {
+ fn run(self) -> Result<()> {
+ match self {
+ Command::Convert(convert) => convert.run(),
+ Command::Dissect(dissect) => dissect.run(),
+ }
+ }
+}
+
#[derive(ThisError, Debug)]
#[error("{0}: unknown encoding")]
struct UnknownEncodingError(String);
}
fn main() -> Result<()> {
- let Args {
- max_cases,
- files,
- mode,
- encoding,
- } = Args::parse();
-
- for file in files {
- dissect(&file, max_cases, mode, encoding)?;
- }
- Ok(())
+ Cli::parse().command.run()
}
fn dissect(