impl Debug for RawString {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- write!(f, "{:?}", *self)
+ <RawStr as Debug>::fmt(&*self, f)
}
}
pub struct DisplayRawString<'a>(Cow<'a, str>);
impl<'a> Display for DisplayRawString<'a> {
- // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1
- // (actually bytes interpreted as Unicode code points).
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", &self.0)
}
}
impl Debug for RawStr {
+ // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1
+ // (actually bytes interpreted as Unicode code points).
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
let s = from_utf8(&self.0).map_or_else(|_| decode_latin1(&self.0), Cow::from);
write!(f, "{s:?}")
}
}
-#[derive(Clone, Debug, Default, PartialEq, Eq)]
+#[derive(Clone, Default, PartialEq, Eq)]
pub struct Attributes(pub BTreeMap<Identifier, Vec<String>>);
impl Attributes {
}
}
+impl Debug for Attributes {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ self.0.fmt(f)
+ }
+}
+
#[derive(Clone, Debug, ThisError, PartialEq, Eq)]
pub enum InvalidRole {
#[error("Unknown role {0:?}.")]
}
}
-#[derive(Clone, Debug, Default, PartialEq, Eq)]
+#[derive(Clone, Default, PartialEq, Eq)]
pub struct ValueLabels(pub HashMap<Datum, String>);
impl ValueLabels {
}
}
+impl Debug for ValueLabels {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ self.0.fmt(f)
+ }
+}
+
impl Hash for ValueLabels {
fn hash<H: Hasher>(&self, state: &mut H) {
let mut hash = 0;
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>. */
-use anyhow::{anyhow, Result};
+use anyhow::{anyhow, bail, Error as AnyError, Result};
use clap::{Args, Parser, Subcommand, ValueEnum};
use encoding_rs::Encoding;
use pspp::{
crypto::EncryptedFile,
sys::{
- raw::{infer_encoding, Decoder, Magic, Reader, Record},
+ self,
+ raw::{infer_encoding, records::Compression, Decoder, Magic, Reader, Record},
ReaderOptions, Records,
},
};
/// Comma-separated values using each variable's print format (variable
/// names are written as the first line)
Csv,
+
+ /// System file
+ Sys,
+}
+
+impl TryFrom<&Path> for OutputFormat {
+ type Error = AnyError;
+
+ fn try_from(value: &Path) -> std::result::Result<Self, Self::Error> {
+ let extension = value.extension().unwrap_or_default();
+ if extension.eq_ignore_ascii_case("csv") || extension.eq_ignore_ascii_case("txt") {
+ Ok(OutputFormat::Csv)
+ } else if extension.eq_ignore_ascii_case("sav") || extension.eq_ignore_ascii_case("sys") {
+ Ok(OutputFormat::Sys)
+ } else {
+ Err(anyhow!(
+ "Unknown output file extension '{}'",
+ extension.display()
+ ))
+ }
+ }
}
/// Convert SPSS data files into other formats.
/// Maximum number of cases to print.
#[arg(short = 'c', long = "cases")]
- max_cases: Option<u64>,
+ max_cases: Option<usize>,
#[command(flatten, next_help_heading = "Options for CSV output")]
csv_options: CsvOptions,
+
+ #[command(flatten, next_help_heading = "Options for system file output")]
+ sys_options: SysOptions,
}
#[derive(Args, Clone, Debug)]
no_var_names: bool,
}
+#[derive(Args, Clone, Debug)]
+struct SysOptions {
+ /// How to compress data in the system file.
+ #[arg(long)]
+ compression: Option<Compression>,
+}
+
impl Convert {
fn run(self) -> Result<()> {
fn warn(warning: anyhow::Error) {
.with_password(self.password.clone())
.open_file(&self.input, warn)?
.into_parts();
- let writer = match self.output {
- Some(path) => Box::new(File::create(path)?) as Box<dyn Write>,
- None => Box::new(stdout()),
+
+ // Take only the first `self.max_cases` cases.
+ let cases = cases.take(self.max_cases.unwrap_or(usize::MAX));
+
+ let output_format = match self.output_format {
+ Some(format) => format,
+ None => {
+ let Some(output) = &self.output else {
+ bail!("either --output-format or an output file name must be specified");
+ };
+ output.as_path().try_into()?
+ }
};
- let mut output = csv::WriterBuilder::new().from_writer(writer);
- if !self.csv_options.no_var_names {
- output.write_record(dictionary.variables.iter().map(|var| var.name.as_str()))?;
- }
- for (_case_number, case) in (0..self.max_cases.unwrap_or(u64::MAX)).zip(cases) {
- output.write_record(case?.0.into_iter().zip(dictionary.variables.iter()).map(
- |(datum, variable)| {
- datum
- .display(variable.print_format, variable.encoding)
- .to_string()
- },
- ))?;
+ match output_format {
+ OutputFormat::Csv => {
+ let writer = match self.output {
+ Some(path) => Box::new(File::create(path)?) as Box<dyn Write>,
+ None => Box::new(stdout()),
+ };
+ let mut output = csv::WriterBuilder::new().from_writer(writer);
+ if !self.csv_options.no_var_names {
+ output
+ .write_record(dictionary.variables.iter().map(|var| var.name.as_str()))?;
+ }
+
+ for case in cases {
+ output.write_record(
+ case?.0.into_iter().zip(dictionary.variables.iter()).map(
+ |(datum, variable)| {
+ datum
+ .display(variable.print_format, variable.encoding)
+ .to_string()
+ },
+ ),
+ )?;
+ }
+ }
+ OutputFormat::Sys => {
+ let Some(output) = &self.output else {
+ bail!("output file name must be specified for output to a system file")
+ };
+ let mut output = sys::WriteOptions::new()
+ .with_compression(self.sys_options.compression)
+ .write_file(&dictionary, output)?;
+ for case in cases {
+ output.write_case(&case?)?;
+ }
+ }
}
Ok(())
}
Raw,
Decoded,
#[default]
- Cooked,
+ Parsed,
}
fn main() -> Result<()> {
return Ok(());
}
Mode::Raw => {
+ println!("{:#?}", reader.header());
for record in reader.records() {
let header = record?;
- println!("{:?}", header);
+ println!("{:#?}", header);
}
for (_index, case) in (0..max_cases).zip(reader.cases()) {
- println!("{:?}", case?);
+ println!("{:#?}", case?);
}
}
Mode::Decoded => {
let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}"));
for header in records {
let header = header.decode(&mut decoder);
- println!("{:?}", header);
- /*
- if let Record::Cases(cases) = header {
- let mut cases = cases.borrow_mut();
- for _ in 0..max_cases {
- let Some(Ok(record)) = cases.next() else {
- break;
- };
- println!("{:?}", record);
- }
- }
- */
+ println!("{:#?}", header);
}
}
- Mode::Cooked => {
+ Mode::Parsed => {
let records: Vec<Record> = reader.records().collect::<Result<Vec<_>, _>>()?;
let encoding = match encoding {
Some(encoding) => encoding,
use std::{
collections::BTreeMap,
+ fmt::{Debug, Display},
fs::File,
io::{Read, Seek},
ops::Range,
},
};
use anyhow::{anyhow, Error as AnyError};
-use binrw::io::BufReader;
+use binrw::{io::BufReader, BinRead, BinWrite};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::Encoding;
use indexmap::set::MutableValues;
}
}
+/// Product version number in a system file.
+///
+/// # Example
+///
+/// `ProductVersion(1,2,3)` is version 1.2.3.
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, BinRead, BinWrite)]
+pub struct ProductVersion(
+ /// Major version.
+ pub i32,
+ /// Minor version
+ pub i32,
+ /// Revision.
+ pub i32,
+);
+
+impl ProductVersion {
+ /// This version of PSPP.
+ pub const VERSION: Self = {
+ const fn parse_integer(mut s: &[u8]) -> (i32, &[u8]) {
+ let mut value = 0;
+ let mut n = 0;
+ while let Some((c, rest)) = s.split_first()
+ && *c >= b'0'
+ && *c <= b'9'
+ {
+ value = value * 10 + (*c - b'0') as i32;
+ n += 1;
+ s = rest;
+ }
+ assert!(n > 0);
+ (value, s)
+ }
+
+ const fn skip_dot(s: &[u8]) -> &[u8] {
+ let Some((c, rest)) = s.split_first() else {
+ unreachable!()
+ };
+ assert!(*c == b'.');
+ rest
+ }
+
+ // Parse `CARGO_PKG_VERSION`. This could be easier if `const` contexts
+ // were less restricted.
+ let s = env!("CARGO_PKG_VERSION").as_bytes();
+ let (first, s) = parse_integer(s);
+ let s = skip_dot(s);
+ let (second, s) = parse_integer(s);
+ let s = skip_dot(s);
+ let (third, s) = parse_integer(s);
+ assert!(matches!(s.first(), None | Some(b'-' | b'+')));
+ Self(first, second, third)
+ };
+}
+
+impl Display for ProductVersion {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}.{}.{}", self.0, self.1, self.2)
+ }
+}
+
+impl Debug for ProductVersion {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ <Self as Display>::fmt(self, f)
+ }
+}
+
/// System file metadata that is not part of [Dictionary].
///
/// [Dictionary]: crate::dictionary::Dictionary
/// Version number of the product that wrote the file.
///
- /// For example, `(1,2,3)` is version 1.2.3.
- pub version: Option<(i32, i32, i32)>,
+ pub version: Option<ProductVersion>,
}
impl Metadata {
let product = header
.eye_catcher
.trim_start_matches("@(#) SPSS DATA FILE")
- .trim_end()
+ .trim()
.to_string();
Self {
pub mod sack;
mod write;
+pub use write::{Version, WriteOptions, Writer};
#[cfg(test)]
mod test;
endian::{Endian, Parse},
format::{Format, Type},
identifier::{Error as IdError, Identifier},
- sys::raw::{
- read_bytes, read_string, read_vec, Decoder, Error, ErrorDetails, Magic, RawDatum,
- RawStrArray, RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails,
+ sys::{
+ raw::{
+ read_bytes, read_string, read_vec, Decoder, Error, ErrorDetails, Magic, RawDatum,
+ RawStrArray, RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails,
+ },
+ ProductVersion,
},
};
use binrw::{BinRead, BinWrite};
+use clap::ValueEnum;
use itertools::Itertools;
use thiserror::Error as ThisError;
/// Type of compression in a system file.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)]
pub enum Compression {
/// Simple bytecode-based compression.
Simple,
}
/// A file header record in a system file.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
pub struct FileHeader<S>
where
S: Debug,
pub endian: Endian,
}
-impl<S> FileHeader<S>
-where
- S: Debug,
-{
- fn debug_field<T>(&self, f: &mut Formatter, name: &str, value: T) -> std::fmt::Result
- where
- T: Debug,
- {
- writeln!(f, "{name:>17}: {:?}", value)
- }
-}
-
-impl<S> Debug for FileHeader<S>
-where
- S: Debug,
-{
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- writeln!(f, "File header record:")?;
- self.debug_field(f, "Magic", self.magic)?;
- self.debug_field(f, "Product name", &self.eye_catcher)?;
- self.debug_field(f, "Layout code", self.layout_code)?;
- self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
- self.debug_field(f, "Compression", self.compression)?;
- self.debug_field(f, "Weight index", self.weight_index)?;
- self.debug_field(f, "Number of cases", self.n_cases)?;
- self.debug_field(f, "Compression bias", self.bias)?;
- self.debug_field(f, "Creation date", &self.creation_date)?;
- self.debug_field(f, "Creation time", &self.creation_time)?;
- self.debug_field(f, "File label", &self.file_label)?;
- self.debug_field(f, "Endianness", self.endian)
- }
-}
-
#[allow(missing_docs)]
#[derive(BinRead, BinWrite)]
pub struct RawHeader {
}
/// A variable record in a system file.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
pub struct VariableRecord<S>
where
S: Debug,
pub label: Option<S>,
}
-impl<S> Debug for VariableRecord<S>
-where
- S: Debug,
-{
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- writeln!(f, "Width: {}", self.width,)?;
- writeln!(f, "Print format: {:?}", self.print_format)?;
- writeln!(f, "Write format: {:?}", self.write_format)?;
- writeln!(f, "Name: {:?}", &self.name)?;
- writeln!(f, "Variable label: {:?}", self.label)?;
- writeln!(f, "Missing values: {:?}", self.missing_values)
- }
-}
-
#[allow(missing_docs)]
#[derive(BinRead, BinWrite)]
pub struct RawVariableRecord {
///
/// This represents both the type-3 and type-4 records together, since they are
/// always paired anyway.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
pub struct ValueLabelRecord<D, S>
where
D: Debug,
pub var_type: VarType,
}
-impl<D, S> Debug for ValueLabelRecord<D, S>
-where
- D: Debug,
- S: Debug,
-{
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- writeln!(f, "labels: ")?;
- for label in self.labels.iter() {
- writeln!(f, "{label:?}")?;
- }
- write!(f, "apply to {} variables", self.var_type)?;
- for dict_index in self.dict_indexes.iter() {
- write!(f, " #{dict_index}")?;
- }
- Ok(())
- }
-}
-
impl<D, S> ValueLabelRecord<D, S>
where
D: Debug,
#[derive(Clone, Debug, BinRead, BinWrite)]
pub struct RawIntegerInfoRecord {
/// Version number.
- ///
- /// e.g. `(1,2,3)` for version 1.2.3.
- pub version: (i32, i32, i32),
+ pub version: ProductVersion,
/// Identifies the type of machine.
///
borrow::Cow,
collections::HashMap,
fmt::Write as _,
+ fs::File,
io::{Cursor, Seek, Write},
+ path::Path,
};
use binrw::{BinWrite, Endian, Error as BinError};
use chrono::Local;
use encoding_rs::Encoding;
+use itertools::zip_eq;
use smallvec::SmallVec;
use crate::{
- data::Datum,
+ data::{Case, Datum},
dictionary::{
Alignment, Attributes, CategoryLabels, Dictionary, Measure, MultipleResponseType,
ValueLabels, VarWidth,
},
Magic,
},
+ ProductVersion,
},
};
}
}
+impl WriteOptions {
+ pub fn new() -> Self {
+ Self::default()
+ }
+ pub fn with_compression(self, compression: Option<Compression>) -> Self {
+ Self {
+ compression,
+ ..self
+ }
+ }
+ pub fn with_version(self, version: Version) -> Self {
+ Self { version, ..self }
+ }
+ pub fn write_file(
+ self,
+ dictionary: &Dictionary,
+ path: impl AsRef<Path>,
+ ) -> Result<Writer<File>, BinError> {
+ self.write_writer(dictionary, File::create(path)?)
+ }
+ pub fn write_writer<W>(
+ self,
+ dictionary: &Dictionary,
+ mut writer: W,
+ ) -> Result<Writer<W>, BinError>
+ where
+ W: Write + Seek,
+ {
+ let mut dict_writer = DictionaryWriter::new(&self, &mut writer, dictionary);
+ dict_writer.write()?;
+ Ok(Writer::new(self, dict_writer.case_vars, writer))
+ }
+}
+
struct DictionaryWriter<'a, W> {
compression: Option<Compression>,
version: Version,
},
n_cases: u32::MAX,
bias: 100.0,
- creation_date: as_byte_array(now.format("%d %b %Y").to_string()),
+ creation_date: as_byte_array(now.format("%d %b %y").to_string()),
creation_time: as_byte_array(now.format("%H:%M:%S").to_string()),
file_label: as_byte_array(self.dictionary.file_label.clone().unwrap_or_default()),
};
(3u32, value_labels.0.len() as u32).write_le(self.writer)?;
for (datum, label) in &value_labels.0 {
let label = &*self.dictionary.encoding.encode(&label).0;
- let padding = label.len().next_multiple_of(8) - label.len();
- (datum, label.len() as u32, label, Zeros(padding)).write_le(self.writer)?;
+ let label = if label.len() > 255 {
+ &label[..255]
+ } else {
+ label
+ };
+ let padding = (1 + label.len()).next_multiple_of(8) - (1 + label.len());
+ (datum, label.len() as u8, label, Zeros(padding)).write_le(self.writer)?;
}
// Variable record.
Ok(())
}
- const fn version() -> (i32, i32, i32) {
- const fn parse_integer(mut s: &[u8]) -> (i32, &[u8]) {
- let mut value = 0;
- let mut n = 0;
- while let Some((c, rest)) = s.split_first()
- && *c >= b'0'
- && *c <= b'9'
- {
- value = value * 10 + (*c - b'0') as i32;
- n += 1;
- s = rest;
- }
- assert!(n > 0);
- (value, s)
- }
-
- const fn skip_dot(s: &[u8]) -> &[u8] {
- let Some((c, rest)) = s.split_first() else {
- unreachable!()
- };
- assert!(*c == b'.');
- rest
- }
-
- let s = env!("CARGO_PKG_VERSION").as_bytes();
- let (first, s) = parse_integer(s);
- let s = skip_dot(s);
- let (second, s) = parse_integer(s);
- let s = skip_dot(s);
- let (third, s) = parse_integer(s);
- assert!(matches!(s.first(), None | Some(b'-' | b'+')));
- (first, second, third)
- }
-
fn write_integer_record(&mut self) -> Result<(), BinError> {
(
7u32,
4u32,
8u32,
RawIntegerInfoRecord {
- version: Self::version(),
+ version: ProductVersion::VERSION,
machine_code: -1,
floating_point_rep: 1,
compression_code: 1,
}
}
-impl WriteOptions {
- pub fn new() -> Self {
- Self::default()
- }
- pub fn write_writer<W>(
- self,
- dictionary: &Dictionary,
- mut writer: W,
- ) -> Result<Writer<W>, BinError>
- where
- W: Write + Seek,
- {
- DictionaryWriter::new(&self, &mut writer, dictionary).write()?;
- todo!()
- }
-}
-
impl BinWrite for Datum {
type Args<'a> = ();
}
}
}
-/*
-/// A variable in a system file.
-struct WriteVar {
- width: VarWidth,
- segment_width: u8,
- case_index: usize,
- /// Offset within string variable in case.
- offset: usize,
-
- /// Number of padding bytes following data.
- padding: usize,
+/// System file writer.
+pub struct Writer<W> {
+ compression: Option<Compression>,
+ case_vars: Vec<CaseVar>,
+ inner: W,
}
-impl WriteVar {
- fn new_vars(dictionary: &Dictionary) -> Vec<Self> {
- let mut vars = Vec::new();
- for dv in &dictionary.variables {
-
+impl<W> Writer<W> {
+ fn new(options: WriteOptions, case_vars: Vec<CaseVar>, inner: W) -> Self {
+ Self {
+ compression: options.compression,
+ case_vars,
+ inner,
}
}
-}*/
-
-/// System file writer.
-pub struct Writer<W> {
- inner: W,
}
-impl<W> Writer<W> where W: Write + Seek {}
+impl<W> Writer<W>
+where
+ W: Write + Seek,
+{
+ pub fn write_case(&mut self, case: &Case) -> Result<(), BinError> {
+ match self.compression {
+ Some(_) => todo!(),
+ None => self.write_case_uncompressed(case),
+ }
+ }
+ fn write_case_uncompressed(&mut self, case: &Case) -> Result<(), BinError> {
+ for (var, datum) in zip_eq(&self.case_vars, &case.0) {
+ match var {
+ CaseVar::Numeric => datum
+ .as_number()
+ .unwrap()
+ .unwrap_or(f64::MIN)
+ .write_le(&mut self.inner)?,
+ CaseVar::String { width: _, encoding } => {
+ let mut s = datum.as_string().unwrap().as_bytes();
+ for segment in encoding {
+ let data;
+ (data, s) = s.split_at(segment.data_bytes);
+ (data, Pad::new(segment.padding_bytes, 0)).write_le(&mut self.inner)?;
+ }
+ }
+ }
+ }
+ Ok(())
+ }
+}