From 949ae93c6ab12ddaa1ed183c7a3a05051f449910 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 22 Jul 2025 09:22:49 -0700 Subject: [PATCH] work on json output --- rust/Cargo.lock | 8 +- rust/pspp/Cargo.toml | 7 +- rust/pspp/src/data.rs | 47 +++++++- rust/pspp/src/dictionary.rs | 52 ++++++-- rust/pspp/src/format/mod.rs | 7 +- rust/pspp/src/identifier.rs | 22 ++++ rust/pspp/src/main.rs | 16 ++- rust/pspp/src/sys/cooked.rs | 29 +++-- rust/pspp/src/sys/mod.rs | 12 ++ rust/pspp/src/sys/raw.rs | 30 ++++- rust/pspp/src/sys/raw/records.rs | 199 ++++++++++++++++++++----------- rust/pspp/src/sys/write.rs | 12 +- 12 files changed, 319 insertions(+), 122 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index fbaa4822c8..a6cefe2bf1 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -330,6 +330,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-link", ] @@ -582,6 +583,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", + "serde", ] [[package]] @@ -1083,6 +1085,7 @@ checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown 0.15.3", + "serde", ] [[package]] @@ -1629,6 +1632,7 @@ dependencies = [ "rand", "readpass", "serde", + "serde_json", "smallstr", "smallvec", "thiserror", @@ -1831,9 +1835,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.140" +version = "1.0.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" dependencies = [ "itoa", "memchr", diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index a4e1e8b975..7f8dfe0be0 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -9,16 +9,16 @@ license = "GPL-3.0-or-later" [dependencies] anyhow = "1.0.69" clap = { version = "4.1.7", features = ["derive", "wrap_help"] } -encoding_rs = "0.8.32" +encoding_rs = { version = "0.8.32", features = ["serde"] } flate2 = "1.0.26" hexplay = "0.2.1" num = "0.4.0" ordered-float = "3.7.0" thiserror = "1.0" -chrono = "0.4.40" +chrono = { version = "0.4.40", features = ["serde"] } unicase = "2.6.0" libc = "0.2.147" -indexmap = "2.1.0" +indexmap = { version = "2.1.0", features = ["serde"] } bitflags = "2.5.0" unicode-width = "0.2.0" chardetng = "0.1.17" @@ -50,6 +50,7 @@ readpass = "1.0.3" zeroize = "1.8.1" unicode-properties = "0.1.3" unicode-segmentation = "1.12.0" +serde_json = "1.0.141" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index d3fa05d4d7..b0a3efcf6e 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -38,6 +38,7 @@ use std::{ use encoding_rs::{mem::decode_latin1, Encoding, UTF_8}; use ordered_float::OrderedFloat; +use serde::{ser::SerializeTupleVariant, Serialize}; use crate::dictionary::{VarType, VarWidth}; @@ -120,6 +121,15 @@ impl Debug for RawString { } } +impl Serialize for RawString { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.deref().serialize(serializer) + } +} + /// A borrowed string in an unspecified encoding. /// /// A [RawString] is usually associated with a [Variable] and uses the @@ -187,6 +197,29 @@ impl RawStr { } } +impl Serialize for RawStr { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + if let Ok(s) = str::from_utf8(&self.0) { + let (variant_index, variant) = if self.0.iter().all(|b| b.is_ascii()) { + (0, "Ascii") + } else { + (1, "Utf8") + }; + let mut tuple = + serializer.serialize_tuple_variant("RawString", variant_index, variant, 1)?; + tuple.serialize_field(s)?; + tuple.end() + } else { + let mut tuple = serializer.serialize_tuple_variant("RawString", 2, "Windows1252", 1)?; + tuple.serialize_field(&decode_latin1(&self.0))?; + tuple.end() + } + } +} + /// Helper struct for printing [RawStr] with [format!]. /// /// Created by [RawStr::display]. @@ -232,6 +265,18 @@ impl Debug for Datum { } } +impl Serialize for Datum { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + Datum::Number(number) => number.serialize(serializer), + Datum::String(raw_string) => raw_string.serialize(serializer), + } + } +} + impl PartialEq for Datum { fn eq(&self, other: &Self) -> bool { match (self, other) { @@ -400,7 +445,7 @@ impl From<&[u8]> for Datum { } /// A case in a data set. -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] pub struct Case( /// One [Datum] per variable in the corresponding [Dictionary], in the same /// order. diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 977f166e5d..60dc4f9d52 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -31,6 +31,7 @@ use encoding_rs::Encoding; use enum_map::{Enum, EnumMap}; use indexmap::IndexSet; use num::integer::div_ceil; +use serde::{ser::SerializeStruct, Serialize}; use smallvec::SmallVec; use thiserror::Error as ThisError; use unicase::UniCase; @@ -49,7 +50,7 @@ use crate::{ pub type DictIndex = usize; /// Variable type. -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] pub enum VarType { /// A numeric variable. Numeric, @@ -87,7 +88,7 @@ impl Display for VarType { } /// [VarType], plus a width for [VarType::String]. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)] pub enum VarWidth { Numeric, String(u16), @@ -299,6 +300,26 @@ pub struct Dictionary { pub encoding: &'static Encoding, } +impl Serialize for Dictionary { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut map = serializer.serialize_struct("Dictionary", 12)?; + map.serialize_field("variables", &self.variables)?; + map.serialize_field("split_file", &self.split_vars())?; + map.serialize_field("weight", &self.weight_var())?; + map.serialize_field("filter", &self.filter_var())?; + map.serialize_field("documents", &self.documents)?; + // vectors + map.serialize_field("attributes", &self.attributes)?; + map.serialize_field("mrsets", &self.mrsets)?; + //variable sets + map.serialize_field("encoding", self.encoding)?; + map.end() + } +} + #[derive(Debug, ThisError)] pub enum AddVarError { #[error("Duplicate variable name {0}.")] @@ -335,6 +356,11 @@ impl Dictionary { self.weight.map(|index| &self.variables[index].0) } + /// Returns a reference to the filter variable, if any. + pub fn filter_var(&self) -> Option<&Variable> { + self.filter.map(|index| &self.variables[index].0) + } + /// Returns references to all the split variables, if any. pub fn split_vars(&self) -> Vec<&Variable> { self.split_file @@ -1014,7 +1040,7 @@ where }); } -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize)] pub enum Role { #[default] Input, @@ -1087,7 +1113,7 @@ impl From for i32 { } } -#[derive(Clone, Default, PartialEq, Eq)] +#[derive(Clone, Default, PartialEq, Eq, Serialize)] pub struct Attributes(pub BTreeMap>); impl Attributes { @@ -1160,7 +1186,7 @@ impl TryFrom<&Attributes> for Option { } /// A variable, usually inside a [Dictionary]. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct Variable { /// The variable's name. /// @@ -1321,7 +1347,7 @@ impl HasIdentifier for Vector { } /// Variables that represent multiple responses to a survey question. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct MultipleResponseSet { /// The set's name. pub name: Identifier, @@ -1356,7 +1382,7 @@ impl HasIdentifier for MultipleResponseSet { } /// The type of a [MultipleResponseSet]. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub enum MultipleResponseType { /// A "multiple dichotomy set", analogous to a survey question with a set of /// checkboxes. Each variable in the set is treated in a Boolean fashion: @@ -1398,7 +1424,7 @@ impl MultipleResponseType { } } -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] pub enum CategoryLabels { VarLabels, CountedValues { use_var_label_as_mrset_label: bool }, @@ -1420,7 +1446,7 @@ impl VariableSet { } } -#[derive(Clone, Default, PartialEq, Eq)] +#[derive(Clone, Default, PartialEq, Eq, Serialize)] pub struct ValueLabels(pub HashMap); impl ValueLabels { @@ -1475,7 +1501,7 @@ impl Hash for ValueLabels { } } -#[derive(Clone, Default)] +#[derive(Clone, Default, Serialize)] pub struct MissingValues { /// Individual missing values, up to 3 of them. values: Vec, @@ -1620,7 +1646,7 @@ impl<'a> Display for DisplayMissingValues<'a> { } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Serialize)] pub enum MissingValueRange { In { low: f64, high: f64 }, From { low: f64 }, @@ -1685,7 +1711,7 @@ impl Display for MissingValueRange { } } -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] pub enum Alignment { Left, Right, @@ -1710,7 +1736,7 @@ impl Alignment { } /// [Level of measurement](https://en.wikipedia.org/wiki/Level_of_measurement). -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] pub enum Measure { /// Nominal values can only be compared for equality. Nominal, diff --git a/rust/pspp/src/format/mod.rs b/rust/pspp/src/format/mod.rs index 7ddbbbaee7..75bac4038a 100644 --- a/rust/pspp/src/format/mod.rs +++ b/rust/pspp/src/format/mod.rs @@ -24,6 +24,7 @@ use std::{ use chrono::{Datelike, Local}; use enum_iterator::{all, Sequence}; use enum_map::{Enum, EnumMap}; +use serde::Serialize; use thiserror::Error as ThisError; use unicode_width::UnicodeWidthStr; @@ -124,7 +125,7 @@ impl From for Category { } } -#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Hash, Sequence)] +#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Hash, Sequence, Serialize)] pub enum CC { A, B, @@ -151,7 +152,7 @@ impl Display for CC { } } -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Sequence)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Sequence, Serialize)] pub enum Type { // Basic numeric formats. F, @@ -482,7 +483,7 @@ impl TryFrom for UncheckedFormat { } } -#[derive(Copy, Clone, PartialEq, Eq, Hash)] +#[derive(Copy, Clone, PartialEq, Eq, Hash, Serialize)] pub struct Format { type_: Type, w: Width, diff --git a/rust/pspp/src/identifier.rs b/rust/pspp/src/identifier.rs index b2565d6a54..0923504b4d 100644 --- a/rust/pspp/src/identifier.rs +++ b/rust/pspp/src/identifier.rs @@ -23,6 +23,7 @@ use std::{ }; use encoding_rs::{CoderResult, Encoder, EncoderResult, Encoding, UTF_8}; +use serde::Serialize; use thiserror::Error as ThisError; use unicase::UniCase; use unicode_properties::UnicodeGeneralCategory; @@ -391,6 +392,15 @@ impl Identifier { } } +impl Serialize for Identifier { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.0.as_str().serialize(serializer) + } +} + fn encode_fully(encoder: &mut Encoder, mut src: &str, dst: &mut Vec, last: bool) { while let (CoderResult::OutputFull, read, _) = encoder.encode_from_utf8_to_vec(src, dst, last) { src = &src[read..]; @@ -603,6 +613,18 @@ where } } +impl Serialize for ByIdentifier +where + T: HasIdentifier + Clone + Serialize, +{ + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.0.serialize(serializer) + } +} + #[cfg(test)] mod tests { use encoding_rs::{UTF_8, WINDOWS_1252}; diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index dbd9be9f32..7067ef0c38 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -311,13 +311,12 @@ fn dissect( return Ok(()); } Mode::Raw => { - println!("{:#?}", reader.header()); + serde_json::to_writer_pretty(stdout(), reader.header())?; for record in reader.records() { - let header = record?; - println!("{:#?}", header); + serde_json::to_writer_pretty(stdout(), &record?)?; } for (_index, case) in (0..max_cases).zip(reader.cases()) { - println!("{:#?}", case?); + serde_json::to_writer_pretty(stdout(), &case?)?; } } Mode::Decoded => { @@ -327,9 +326,8 @@ fn dissect( None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?, }; let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}")); - for header in records { - let header = header.decode(&mut decoder); - println!("{:#?}", header); + for record in records { + serde_json::to_writer_pretty(stdout(), &record.decode(&mut decoder))?; } } Mode::Parsed => { @@ -348,8 +346,8 @@ fn dissect( |e| eprintln!("{e}"), ) .into_parts(); - println!("{dictionary:#?}"); - println!("{metadata:#?}"); + serde_json::to_writer_pretty(stdout(), &dictionary)?; + serde_json::to_writer_pretty(stdout(), &metadata)?; } } diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index c62f49964d..c4c67bf753 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -36,17 +36,20 @@ use crate::{ hexfloat::HexFloat, identifier::{ByIdentifier, Error as IdError, Identifier}, output::pivot::{Group, Value}, - sys::raw::{ - self, infer_encoding, - records::{ - Compression, DocumentRecord, EncodingRecord, Extension, FileAttributesRecord, - FileHeader, FloatInfoRecord, IntegerInfoRecord, LongName, LongNamesRecord, - LongStringMissingValueRecord, LongStringValueLabelRecord, MultipleResponseRecord, - NumberOfCasesRecord, ProductInfoRecord, RawFormat, ValueLabel, ValueLabelRecord, - VarDisplayRecord, VariableAttributesRecord, VariableRecord, VariableSetRecord, - VeryLongStringsRecord, + sys::{ + raw::{ + self, infer_encoding, + records::{ + Compression, DocumentRecord, EncodingRecord, Extension, FileAttributesRecord, + FileHeader, FloatInfoRecord, IntegerInfoRecord, LongName, LongNamesRecord, + LongStringMissingValueRecord, LongStringValueLabelRecord, MultipleResponseRecord, + NumberOfCasesRecord, ProductInfoRecord, RawFormat, ValueLabel, ValueLabelRecord, + VarDisplayRecord, VariableAttributesRecord, VariableRecord, VariableSetRecord, + VeryLongStringsRecord, + }, + Cases, DecodedRecord, RawDatum, RawWidth, Reader, }, - Cases, DecodedRecord, RawDatum, RawWidth, Reader, + serialize_endian, }, }; use anyhow::{anyhow, Error as AnyError}; @@ -55,6 +58,7 @@ use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; use encoding_rs::Encoding; use indexmap::set::MutableValues; use itertools::Itertools; +use serde::Serialize; use thiserror::Error as ThisError; /// A warning for decoding [Records] into a [SystemFile]. @@ -1320,7 +1324,7 @@ impl Records { /// # Example /// /// `ProductVersion(1,2,3)` is version 1.2.3. -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, BinRead, BinWrite)] +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, BinRead, BinWrite, Serialize)] pub struct ProductVersion( /// Major version. pub i32, @@ -1384,7 +1388,7 @@ impl Debug for ProductVersion { /// System file metadata that is not part of [Dictionary]. /// /// [Dictionary]: crate::dictionary::Dictionary -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] pub struct Metadata { /// Creation date and time. /// @@ -1392,6 +1396,7 @@ pub struct Metadata { pub creation: NaiveDateTime, /// Endianness of integers and floating-point numbers in the file. + #[serde(serialize_with = "serialize_endian")] pub endian: Endian, /// Compression type (if any). diff --git a/rust/pspp/src/sys/mod.rs b/rust/pspp/src/sys/mod.rs index 911cc0f62f..cfb64761e5 100644 --- a/rust/pspp/src/sys/mod.rs +++ b/rust/pspp/src/sys/mod.rs @@ -29,6 +29,7 @@ #![cfg_attr(not(test), warn(missing_docs))] mod cooked; +use binrw::Endian; pub use cooked::*; pub mod encoding; pub mod raw; @@ -37,7 +38,18 @@ pub mod raw; pub mod sack; mod write; +use serde::Serializer; pub use write::{Version, WriteOptions, Writer}; #[cfg(test)] mod test; + +fn serialize_endian(endian: &Endian, serializer: S) -> Result +where + S: Serializer, +{ + match endian { + Endian::Big => serializer.serialize_unit_variant("Endian", 0, "Big"), + Endian::Little => serializer.serialize_unit_variant("Endian", 1, "Little"), + } +} diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 60b5619f4e..642ef61cb1 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -45,6 +45,7 @@ use crate::{ use encoding_rs::Encoding; use flate2::bufread::ZlibDecoder; +use serde::Serialize; use smallvec::SmallVec; use std::{ borrow::Cow, @@ -386,7 +387,7 @@ impl From for WarningDetails { } /// A raw record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub enum Record { /// Variable record. /// @@ -536,7 +537,7 @@ pub enum Record { /// Some records can be understand raw, but others need to have strings decoded /// (and interpreted as identifiers) or raw data interpreted as either numbers /// or strings. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub enum DecodedRecord { /// Variable record, with strings decoded. Variable(VariableRecord), @@ -794,7 +795,7 @@ impl<'de> Decoder<'de> { /// System file type, inferred from its "magic number". /// /// The magic number is the first four bytes of the file. -#[derive(Copy, Clone, PartialEq, Eq, Hash)] +#[derive(Copy, Clone, PartialEq, Eq, Hash, Serialize)] pub enum Magic { /// Regular system file. Sav, @@ -905,6 +906,18 @@ impl Debug for RawDatum { } } +impl Serialize for RawDatum { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + RawDatum::Number(number) => number.serialize(serializer), + RawDatum::String(s) => RawStr::from_bytes(s).serialize(serializer), + } + } +} + impl RawDatum { /// Constructs a `RawDatum` from `raw` given that we now know the variable /// type and endianness. @@ -1497,7 +1510,7 @@ impl Iterator for Cases { } /// Width of a variable record. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)] pub enum RawWidth { /// String continuation. /// @@ -1589,6 +1602,15 @@ impl Debug for RawStrArray { } } +impl Serialize for RawStrArray { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + RawStr::from_bytes(&self.0).serialize(serializer) + } +} + fn skip_bytes(r: &mut R, mut n: usize) -> Result<(), IoError> { thread_local! { static BUF: RefCell<[u8; 256]> = RefCell::new([0u8; 256]); diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index e85a473800..c6353d78e8 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -25,17 +25,18 @@ use crate::{ read_bytes, read_string, read_vec, Decoder, Error, ErrorDetails, Magic, RawDatum, RawStrArray, RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails, }, - ProductVersion, + serialize_endian, ProductVersion, }, }; use binrw::{binrw, BinRead, BinWrite, Error as BinError}; use clap::ValueEnum; use itertools::Itertools; +use serde::{ser::SerializeTuple, Serialize, Serializer}; use thiserror::Error as ThisError; /// Type of compression in a system file. -#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, ValueEnum)] pub enum Compression { /// Simple bytecode-based compression. Simple, @@ -55,10 +56,10 @@ pub enum HeaderWarning { } /// A file header record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct FileHeader where - S: Debug, + S: Debug + Serialize, { /// Magic number. pub magic: Magic, @@ -97,22 +98,45 @@ where pub file_label: S, /// Endianness of the data in the file header. + #[serde(serialize_with = "serialize_endian")] pub endian: Endian, } -#[allow(missing_docs)] +/// Raw file header. #[derive(BinRead, BinWrite)] pub struct RawHeader { + /// Magic number. pub magic: [u8; 4], + + /// Eye-catcher string and product name. pub eye_catcher: [u8; 60], + + /// Layout code, normally either 2 or 3. pub layout_code: u32, + + /// Claimed number of variable positions (not always accurate). pub nominal_case_size: u32, + + /// Compression type. pub compression_code: u32, + + /// 1-based variable index of the weight variable, or 0 if the file is + /// unweighted. pub weight_index: u32, + + /// Claimed number of cases, or [u32::MAX] if unknown. pub n_cases: u32, + + /// Compression bias, usually 100.0. pub bias: f64, + + /// `dd mmm yy` in the file's encoding. pub creation_date: [u8; 9], + + /// `HH:MM:SS` in the file's encoding. pub creation_time: [u8; 8], + + /// File label, in the file's encoding. Padded on the right with spaces. #[brw(pad_after = 3)] pub file_label: [u8; 64], } @@ -254,12 +278,32 @@ impl TryFrom for RawFormat { } } +struct RawFormatDisplayMeaning(RawFormat); + +impl Display for RawFormatDisplayMeaning { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + let type_ = format_name(self.0 .0 >> 16); + let w = (self.0 .0 >> 8) & 0xff; + let d = self.0 .0 & 0xff; + write!(f, "{type_}{w}.{d}") + } +} + impl Debug for RawFormat { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - let type_ = format_name(self.0 >> 16); - let w = (self.0 >> 8) & 0xff; - let d = self.0 & 0xff; - write!(f, "{:06x} ({type_}{w}.{d})", self.0) + write!(f, "{:06x} ({})", self.0, RawFormatDisplayMeaning(*self)) + } +} + +impl Serialize for RawFormat { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut tuple = serializer.serialize_tuple(2)?; + tuple.serialize_element(&self.0)?; + tuple.serialize_element(&RawFormatDisplayMeaning(*self).to_string())?; + tuple.end() } } @@ -422,10 +466,10 @@ pub enum VariableWarning { } /// A variable record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct VariableRecord where - S: Debug, + S: Debug + Serialize, { /// Range of offsets in file. pub offsets: Range, @@ -449,14 +493,30 @@ where pub label: Option, } -#[allow(missing_docs)] +/// Raw variable record. #[derive(BinRead, BinWrite)] pub struct RawVariableRecord { + /// Variable width, in the range -1..=255. pub width: i32, + + /// 1 if the variable has a label, 0 otherwise. pub has_variable_label: u32, + + /// - 0 for no missing values. + /// - 1 for one missing value. + /// - 2 for two missing values. + /// - 3 for three missing values. + /// - -2 for a range of missing values. + /// - -3 for an individual missing value plus a range. pub missing_value_code: i32, + + /// Print format. pub print_format: RawFormat, + + /// Write format. pub write_format: RawFormat, + + /// Variable name, padded with spaces. pub name: [u8; 8], } @@ -572,11 +632,11 @@ pub enum ValueLabelWarning { } /// A value and label in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct ValueLabel where - D: Debug, - S: Debug, + D: Debug + Serialize, + S: Debug + Serialize, { /// The value being labeled. pub datum: D, @@ -588,11 +648,11 @@ where /// /// This represents both the type-3 and type-4 records together, since they are /// always paired anyway. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct ValueLabelRecord where - D: Debug, - S: Debug, + D: Debug + Serialize, + S: Debug + Serialize, { /// Range of offsets in file. pub offsets: Range, @@ -609,8 +669,8 @@ where impl ValueLabelRecord where - D: Debug, - S: Debug, + D: Debug + Serialize, + S: Debug + Serialize, { /// Maximum number of value labels in a record. pub const MAX_LABELS: u32 = u32::MAX / 8; @@ -764,10 +824,10 @@ impl ValueLabelRecord { } /// A document record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct DocumentRecord where - S: Debug, + S: Debug + Serialize, { /// The range of file offsets occupied by the record. pub offsets: Range, @@ -845,17 +905,18 @@ pub struct ExtensionRecord<'a> { } /// An integer info record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct IntegerInfoRecord { /// File offsets occupied by the record. pub offsets: Range, /// Details. + #[serde(flatten)] pub inner: RawIntegerInfoRecord, } /// Machine integer info record in [mod@binrw] format. -#[derive(Clone, Debug, BinRead, BinWrite)] +#[derive(Clone, Debug, BinRead, BinWrite, Serialize)] pub struct RawIntegerInfoRecord { /// Version number. pub version: ProductVersion, @@ -905,7 +966,7 @@ impl FloatInfoRecord { } /// A floating-point info record. -#[derive(Clone, Debug, BinRead, BinWrite)] +#[derive(Clone, Debug, BinRead, BinWrite, Serialize)] pub struct FloatInfoRecord { /// Value used for system-missing values. pub sysmis: f64, @@ -918,7 +979,7 @@ pub struct FloatInfoRecord { } /// Long variable names record. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct RawLongNamesRecord( /// Text contents of record. TextRecord, @@ -949,7 +1010,7 @@ impl RawLongNamesRecord { } /// An extension record whose contents are a text string. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct TextRecord { /// Range of file offsets for this record in bytes. pub offsets: Range, @@ -992,7 +1053,7 @@ pub enum VeryLongStringWarning { } /// A very long string parsed from a [VeryLongStringsRecord]. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct VeryLongString { /// Short name of very long string variable. pub short_name: Identifier, @@ -1019,11 +1080,11 @@ impl VeryLongString { } /// A very long string record as text. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct RawVeryLongStringsRecord(TextRecord); /// A parsed very long string record. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct VeryLongStringsRecord( /// The very long strings. pub Vec, @@ -1113,7 +1174,7 @@ pub enum MultipleResponseWarning { } /// The type of a multiple-response set. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub enum MultipleResponseType { /// Multiple-dichotomy set. MultipleDichotomy { @@ -1172,11 +1233,11 @@ impl MultipleResponseType { } /// A multiple-response set in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct MultipleResponseSet where - I: Debug, - S: Debug, + I: Debug + Serialize, + S: Debug + Serialize, { /// The set's name. pub name: I, @@ -1272,11 +1333,11 @@ impl MultipleResponseSet { } /// A multiple-response set record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct MultipleResponseRecord where - I: Debug, - S: Debug, + I: Debug + Serialize, + S: Debug + Serialize, { /// File offsets of the record. pub offsets: Range, @@ -1399,7 +1460,7 @@ impl Alignment { } /// Variable display settings for one variable, in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct VarDisplay { /// Measurement level. pub measure: Option, @@ -1412,7 +1473,7 @@ pub struct VarDisplay { } /// A variable display record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct VarDisplayRecord( /// Variable display settings for each variable. pub Vec, @@ -1483,10 +1544,10 @@ pub enum LongStringMissingValuesWarning { } /// Missing values for one long string variable. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct LongStringMissingValues where - N: Debug, + N: Debug + Serialize, { /// Variable name. pub var_name: N, @@ -1509,10 +1570,10 @@ impl LongStringMissingValues { } /// Long string missing values record in a sytem file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct LongStringMissingValueRecord where - N: Debug, + N: Debug + Serialize, { /// The record's file offsets. pub offsets: Range, @@ -1594,7 +1655,7 @@ impl LongStringMissingValueRecord { } /// A character encoding record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct EncodingRecord( /// The encoding name. pub String, @@ -1612,7 +1673,7 @@ impl EncodingRecord { } /// The extended number of cases record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct NumberOfCasesRecord { /// Always observed as 1. pub one: u64, @@ -1651,7 +1712,7 @@ pub enum VariableSetWarning { } /// Raw (text) version of the variable set record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct RawVariableSetRecord(TextRecord); impl RawVariableSetRecord { @@ -1682,7 +1743,7 @@ impl RawVariableSetRecord { } /// Raw (text) version of a product info record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct RawProductInfoRecord(TextRecord); impl RawProductInfoRecord { @@ -1857,11 +1918,11 @@ impl Attributes { } /// A raw (text) file attributes record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct RawFileAttributesRecord(TextRecord); /// A decoded file attributes record in a system file. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, Serialize)] pub struct FileAttributesRecord(pub Attributes); impl RawFileAttributesRecord { @@ -1902,7 +1963,7 @@ impl RawFileAttributesRecord { } /// A set of variable attributes in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct VarAttributes { /// The long name of the variable associated with the attributes. pub long_var_name: Identifier, @@ -1947,11 +2008,11 @@ impl VarAttributes { } /// A raw (text) variable attributes record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct RawVariableAttributesRecord(TextRecord); /// A decoded variable attributes record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct VariableAttributesRecord(pub Vec); impl RawVariableAttributesRecord { @@ -2004,7 +2065,7 @@ pub enum LongNameWarning { } /// A long variable name in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct LongName { /// The variable's short name. pub short_name: Identifier, @@ -2035,15 +2096,15 @@ impl LongName { } /// A long variable name record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct LongNamesRecord(pub Vec); /// A product info record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct ProductInfoRecord(pub String); /// A variable set in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct VariableSet { /// Name of the variable set. pub name: String, @@ -2082,7 +2143,7 @@ impl VariableSet { } /// A variable set record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct VariableSetRecord { /// Range of file offsets occupied by the record. pub offsets: Range, @@ -2143,7 +2204,7 @@ pub enum ExtensionWarning { /// /// Most of the records in system files are "extension records". This structure /// collects everything in an extension record for later processing. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct Extension { /// File offsets occupied by the extension record. /// @@ -2266,10 +2327,10 @@ pub enum LongStringValueLabelWarning { } /// One set of long string value labels record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct LongStringValueLabels where - S: Debug, + S: Debug + Serialize, { /// The variable being labeled. pub var_name: N, @@ -2306,11 +2367,11 @@ impl LongStringValueLabels { } /// A long string value labels record in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct LongStringValueLabelRecord where - N: Debug, - S: Debug, + N: Debug + Serialize, + S: Debug + Serialize, { /// File offsets occupied by the record. pub offsets: Range, @@ -2365,17 +2426,18 @@ impl LongStringValueLabelRecord { } /// ZLIB header, for [Compression::ZLib]. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct ZHeader { /// File offset to the start of the record. pub offset: u64, /// Raw header. + #[serde(flatten)] pub inner: RawZHeader, } /// A ZLIB header in a system file. -#[derive(Clone, Debug, BinRead, BinWrite)] +#[derive(Clone, Debug, BinRead, BinWrite, Serialize)] pub struct RawZHeader { /// File offset to the ZLIB data header. pub zheader_offset: u64, @@ -2448,18 +2510,19 @@ pub enum ZHeaderError { } /// A ZLIB trailer in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct ZTrailer { /// File offset to the start of the record. pub offset: u64, /// The raw trailer. + #[serde(flatten)] pub inner: RawZTrailer, } /// A ZLIB trailer in a system file. #[binrw] -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct RawZTrailer { /// Compression bias as a negative integer, e.g. -100. pub int_bias: i64, @@ -2518,7 +2581,7 @@ pub enum ZlibTrailerWarning { } /// A ZLIB block descriptor in a system file. -#[derive(Clone, Debug, BinRead, BinWrite)] +#[derive(Clone, Debug, BinRead, BinWrite, Serialize)] pub struct ZBlock { /// Offset of block of data if simple compression were used. pub uncompressed_ofs: u64, diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index cfb25a5897..293c9beee1 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -738,9 +738,7 @@ impl Iterator for SegmentWidths { enum CaseVar { Numeric, - String { - encoding: SmallVec<[StringSegment; 1]>, - }, + String(SmallVec<[StringSegment; 1]>), } impl CaseVar { @@ -764,14 +762,14 @@ impl CaseVar { encoding.last_mut().unwrap().padding_bytes += padding_bytes; } } - CaseVar::String { encoding } + CaseVar::String(encoding) } } } fn n_segments(&self) -> usize { match self { CaseVar::Numeric => 1, - CaseVar::String { encoding, .. } => encoding.len(), + CaseVar::String(encoding) => encoding.len(), } } } @@ -842,7 +840,7 @@ where .unwrap() .unwrap_or(f64::MIN) .write_le(&mut self.inner)?, - CaseVar::String { encoding } => { + CaseVar::String(encoding) => { let mut s = datum.as_string().unwrap().as_bytes(); for segment in encoding { let data; @@ -875,7 +873,7 @@ where } }, - CaseVar::String { encoding } => { + CaseVar::String(encoding) => { let mut s = datum.as_string().unwrap().as_bytes(); for segment in encoding { let data; -- 2.30.2