From e87704bf4c252aa868b88646a1e00d1c58ab61c0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 24 Jul 2025 18:34:06 -0700 Subject: [PATCH] Use EncodedDatum all over --- rust/pspp/src/data.rs | 185 ++++++++++++++++++++++++++- rust/pspp/src/dictionary.rs | 5 +- rust/pspp/src/format/display/mod.rs | 47 ++++--- rust/pspp/src/format/display/test.rs | 33 ++--- rust/pspp/src/format/parse.rs | 3 +- rust/pspp/src/main.rs | 54 ++++---- rust/pspp/src/output/pivot/mod.rs | 10 +- rust/pspp/src/sys/cooked.rs | 50 ++++---- rust/pspp/src/sys/raw.rs | 32 ++--- rust/pspp/src/sys/test.rs | 5 +- rust/pspp/src/sys/write.rs | 8 +- 11 files changed, 315 insertions(+), 117 deletions(-) diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index c296c8a6af..b85033c4d7 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -44,6 +44,7 @@ use serde::{ser::SerializeTupleVariant, Serialize}; use crate::{ dictionary::{VarType, VarWidth}, format::DisplayPlain, + sys::raw::RawDatum, }; /// An owned string in an unspecified character encoding. @@ -132,6 +133,12 @@ impl From<&[u8]> for RawString { } } +impl From for RawString { + fn from(value: EncodedString) -> Self { + Self(value.bytes) + } +} + impl Debug for RawString { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { ::fmt(&*self, f) @@ -264,7 +271,23 @@ pub enum EncodedDatum { } impl EncodedDatum { - /// Constructs a new numerical [Datum] for the system-missing value. + pub fn into_raw(self) -> Datum { + match self { + EncodedDatum::Number(number) => Datum::Number(number), + EncodedDatum::String(encoded_string) => Datum::String(encoded_string.into()), + } + } + + pub fn as_raw(&self) -> Dat<'_> { + match self { + EncodedDatum::Number(number) => Dat::Number(*number), + EncodedDatum::String(encoded_string) => { + Dat::String(RawStr::from_bytes(encoded_string.as_bytes())) + } + } + } + + /// Constructs a new numerical [EncodedDatum] for the system-missing value. pub const fn sysmis() -> Self { Self::Number(None) } @@ -369,6 +392,24 @@ impl Serialize for EncodedDatum { } } +impl From for EncodedDatum { + fn from(number: f64) -> Self { + Some(number).into() + } +} + +impl From> for EncodedDatum { + fn from(value: Option) -> Self { + Self::Number(value) + } +} + +impl From<&str> for EncodedDatum { + fn from(value: &str) -> Self { + Self::String(EncodedString::from(value)) + } +} + /// A borrowed [Datum] with a string encoding. #[derive(Copy, Clone)] pub enum EncodedDat<'a> { @@ -466,6 +507,69 @@ impl<'a> PartialEq for EncodedDat<'a> { impl<'a> Eq for EncodedDat<'a> {} +/// A borrowed [Datum]. +#[derive(Clone)] +pub enum Dat<'a> { + /// A numeric value. + Number( + /// A number, or `None` for the system-missing value. + Option, + ), + /// A string value. + String( + /// The value, in the variable's encoding. + &'a RawStr, + ), +} + +impl Dat<'_> { + /// Constructs a new numerical [Datum] for the system-missing value. + pub const fn sysmis() -> Self { + Self::Number(None) + } + + /// Returns the number inside this datum, or `None` if this is a string + /// datum. + pub fn as_number(&self) -> Option> { + match self { + Self::Number(number) => Some(*number), + Self::String(_) => None, + } + } + + /// Returns the string inside this datum, or `None` if this is a numeric + /// datum. + pub fn as_string(&self) -> Option<&RawStr> { + match self { + Self::Number(_) => None, + Self::String(s) => Some(s), + } + } + + pub fn as_encoded<'a>(&'a self, encoding: &'static Encoding) -> EncodedDat<'a> { + match self { + Self::Number(number) => EncodedDat::Number(*number), + Self::String(raw_string) => EncodedDat::String(raw_string.as_encoded(encoding)), + } + } + + /// Returns the [VarType] corresponding to this datum. + pub fn var_type(&self) -> VarType { + match self { + Self::Number(_) => VarType::Numeric, + Self::String(_) => VarType::String, + } + } + + /// Returns the [VarWidth] corresponding to this datum. + pub fn width(&self) -> VarWidth { + match self { + Self::Number(_) => VarWidth::Numeric, + Self::String(s) => VarWidth::String(s.len().try_into().unwrap()), + } + } +} + /// The value of a [Variable](crate::dictionary::Variable). #[derive(Clone)] pub enum Datum { @@ -687,7 +791,7 @@ impl From<&[u8]> for Datum { /// A case in a data set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] -pub struct Case( +pub struct RawCase( /// One [Datum] per variable in the corresponding [Dictionary], in the same /// order. /// @@ -695,6 +799,66 @@ pub struct Case( pub Vec, ); +impl RawCase { + pub fn as_encoding(&self, encoding: &'static Encoding) -> Case<&'_ [Datum]> { + Case { + encoding, + data: &self.0, + } + } + pub fn with_encoding(self, encoding: &'static Encoding) -> Case> { + Case { + encoding, + data: self.0, + } + } +} + +pub struct Case +where + B: Borrow<[Datum]>, +{ + encoding: &'static Encoding, + data: B, +} + +impl Case +where + B: Borrow<[Datum]>, +{ + fn len(&self) -> usize { + self.data.borrow().len() + } +} + +impl IntoIterator for Case> { + type Item = EncodedDatum; + + type IntoIter = CaseVecIter; + + fn into_iter(self) -> Self::IntoIter { + CaseVecIter { + encoding: self.encoding, + iter: self.data.into_iter(), + } + } +} + +pub struct CaseVecIter { + encoding: &'static Encoding, + iter: std::vec::IntoIter, +} + +impl Iterator for CaseVecIter { + type Item = EncodedDatum; + + fn next(&mut self) -> Option { + self.iter + .next() + .map(|datum| datum.with_encoding(self.encoding)) + } +} + /// An owned string and its [Encoding]. /// /// The string is not guaranteed to be valid in the encoding. @@ -714,6 +878,14 @@ impl EncodedString { self.bytes.len() } + /// Returns this string recoded in UTF-8. Invalid characters will be + /// replaced by [REPLACEMENT_CHARACTER]. + /// + /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER + pub fn as_str(&self) -> Cow<'_, str> { + self.encoding.decode_without_bom_handling(&self.bytes).0 + } + /// Returns the bytes in the string, in its encoding. pub fn as_bytes(&self) -> &[u8] { &self.bytes @@ -756,6 +928,15 @@ impl EncodedString { } } +impl From<&str> for EncodedString { + fn from(value: &str) -> Self { + Self { + bytes: value.into(), + encoding: UTF_8, + } + } +} + impl<'a> From<&'a EncodedString> for EncodedStr<'a> { fn from(value: &'a EncodedString) -> Self { value.borrowed() diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index d6b2015909..30a31f915e 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -1020,7 +1020,10 @@ impl<'a> OutputMrsets<'a> { let mr_type_name = match &mrset.mr_type { MultipleResponseType::MultipleDichotomy { datum, .. } => { - pt.insert(&[row, 2], Value::new_datum(datum, self.dictionary.encoding)); + pt.insert( + &[row, 2], + Value::new_datum(datum.as_encoded(self.dictionary.encoding)), + ); "Dichotomies" } MultipleResponseType::MultipleCategory => "Categories", diff --git a/rust/pspp/src/format/display/mod.rs b/rust/pspp/src/format/display/mod.rs index 3a956ee84e..d4638cf34a 100644 --- a/rust/pspp/src/format/display/mod.rs +++ b/rust/pspp/src/format/display/mod.rs @@ -29,7 +29,7 @@ use smallvec::{Array, SmallVec}; use crate::{ calendar::{calendar_offset_to_gregorian, day_of_year, month_name, short_month_name}, - data::{Datum, QuotedEncodedDat}, + data::{Datum, EncodedDat, EncodedDatum, QuotedEncodedDat}, endian::{endian_to_smallvec, ToBytes}, format::{Category, DateTemplate, Decimal, Format, NumberStyle, Settings, TemplateItem, Type}, settings::{EndianSettings, Settings as PsppSettings}, @@ -39,8 +39,7 @@ pub struct DisplayDatum<'a, 'b> { format: Format, settings: &'b Settings, endian: EndianSettings, - datum: &'a Datum, - encoding: &'static Encoding, + datum: EncodedDat<'a>, /// If true, the output will remove leading and trailing spaces from numeric /// values, and trailing spaces from string values. (This might make the @@ -83,33 +82,46 @@ impl Display for DisplayPlainF64 { } } -impl Datum { - /// Returns an object that implements [Display] for printing this [Datum] as - /// `format`. `encoding` specifies this `Datum`'s encoding (therefore, it - /// is used only if this is a `Datum::String`). +impl EncodedDat<'_> { + /// Returns an object that implements [Display] for printing this + /// [EncodedDatum] as `format`. /// /// [Display]: std::fmt::Display - pub fn display(&self, format: Format, encoding: &'static Encoding) -> DisplayDatum { - DisplayDatum::new(format, self, encoding) + pub fn display(&self, format: Format) -> DisplayDatum { + DisplayDatum::new(format, *self) } - pub fn display_plain(&self, encoding: &'static Encoding) -> QuotedEncodedDat<'_> { - self.as_encoded(encoding).quoted() + pub fn display_plain(&self) -> QuotedEncodedDat<'_> { + self.quoted() + } +} + +impl EncodedDatum { + /// Returns an object that implements [Display] for printing this + /// [EncodedDatum] as `format`. + /// + /// [Display]: std::fmt::Display + pub fn display(&self, format: Format) -> DisplayDatum { + DisplayDatum::new(format, self.borrowed()) + } + + pub fn display_plain(&self) -> QuotedEncodedDat<'_> { + self.quoted() } } impl Display for DisplayDatum<'_, '_> { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { let number = match self.datum { - Datum::Number(number) => *number, - Datum::String(string) => { + EncodedDat::Number(number) => number, + EncodedDat::String(string) => { if self.format.type_() == Type::AHex { - for byte in &string.0 { + for byte in string.as_bytes() { write!(f, "{byte:02x}")?; } } else { let quote = if self.quote_strings { "\"" } else { "" }; - let s = self.encoding.decode_without_bom_handling(&string.0).0; + let s = string.as_str(); let s = if self.trim_spaces { s.trim_end_matches(' ') } else { @@ -161,12 +173,11 @@ impl Display for DisplayDatum<'_, '_> { } impl<'a, 'b> DisplayDatum<'a, 'b> { - pub fn new(format: Format, value: &'a Datum, encoding: &'static Encoding) -> Self { + pub fn new(format: Format, datum: EncodedDat<'a>) -> Self { let settings = PsppSettings::global(); Self { format, - datum: value, - encoding, + datum, settings: &settings.formats, endian: settings.endian, trim_spaces: false, diff --git a/rust/pspp/src/format/display/test.rs b/rust/pspp/src/format/display/test.rs index d9b4dd5f64..6def5489b0 100644 --- a/rust/pspp/src/format/display/test.rs +++ b/rust/pspp/src/format/display/test.rs @@ -23,7 +23,7 @@ use smallstr::SmallString; use smallvec::SmallVec; use crate::{ - data::Datum, + data::{Datum, EncodedDatum}, endian::Endian, format::{AbstractFormat, Epoch, Format, Settings, Type, UncheckedFormat, CC}, lex::{scan::StringScanner, segment::Syntax, Punct, Token}, @@ -75,8 +75,8 @@ fn test(name: &str) { let format: Format = format.try_into().unwrap(); assert_eq!(tokens.get(1), Some(&Token::Punct(Punct::Colon))); let expected = tokens[2].as_string().unwrap(); - let actual = Datum::Number(value) - .display(format, UTF_8) + let actual = EncodedDatum::Number(value) + .display(format) .with_settings(&settings) .with_endian(endian) .to_string(); @@ -183,11 +183,11 @@ fn leading_zeros() { } fn test_with_settings(value: f64, expected: [&str; 2], settings: &Settings) { - let value = Datum::from(value); + let value = EncodedDatum::from(value); for (expected, d) in expected.into_iter().zip([2, 1].into_iter()) { assert_eq!( &value - .display(Format::new(Type::F, 5, d).unwrap(), UTF_8) + .display(Format::new(Type::F, 5, d).unwrap()) .with_settings(settings) .to_string(), expected @@ -214,8 +214,8 @@ fn leading_zeros() { fn non_ascii_cc() { fn test(settings: &Settings, value: f64, expected: &str) { assert_eq!( - &Datum::from(value) - .display(Format::new(Type::CC(CC::A), 10, 2).unwrap(), UTF_8) + &EncodedDatum::from(value) + .display(Format::new(Type::CC(CC::A), 10, 2).unwrap()) .with_settings(settings) .to_string(), expected @@ -266,8 +266,8 @@ fn test_binhex(name: &str) { assert_eq!(tokens.get(1), Some(&Token::Punct(Punct::Colon))); let expected = tokens[2].as_string().unwrap(); let mut actual = SmallVec::<[u8; 16]>::new(); - Datum::Number(value) - .display(format, UTF_8) + EncodedDatum::Number(value) + .display(format) .with_endian(endian) .write(&mut actual, UTF_8) .unwrap(); @@ -339,11 +339,8 @@ fn test_dates(format: Format, expect: &[&str]) { ]; assert_eq!(expect.len(), INPUTS.len()); for (input, expect) in INPUTS.iter().copied().zip_eq(expect.iter().copied()) { - let value = parser.parse(input).unwrap(); - let formatted = value - .display(format, UTF_8) - .with_settings(&settings) - .to_string(); + let value = parser.parse(input).unwrap().with_encoding(UTF_8); + let formatted = value.display(format).with_settings(&settings).to_string(); assert_eq!(&formatted, expect); } } @@ -1295,8 +1292,12 @@ fn test_times(format: Format, name: &str) { .zip_eq(output.lines().map(|r| r.unwrap())) .zip(1..) { - let value = parser.parse(&input).unwrap(); - let formatted = value.display(format, UTF_8).to_string(); + let formatted = parser + .parse(&input) + .unwrap() + .with_encoding(UTF_8) + .display(format) + .to_string(); assert!( formatted == expect, "formatting {}:{line_number} as {format}:\n actual: {formatted:?}\nexpected: {expect:?}", diff --git a/rust/pspp/src/format/parse.rs b/rust/pspp/src/format/parse.rs index d2f6b22bc7..495037938b 100644 --- a/rust/pspp/src/format/parse.rs +++ b/rust/pspp/src/format/parse.rs @@ -943,7 +943,8 @@ mod test { let error = result.clone().err(); let value = result .unwrap_or(type_.default_value()) - .display(Format::new(Type::F, 10, 4).unwrap(), UTF_8) + .with_encoding(UTF_8) + .display(Format::new(Type::F, 10, 4).unwrap()) .to_string(); if value != expected { panic!( diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index 972775998f..2104cbfa20 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -25,6 +25,7 @@ use pspp::{ ReadOptions, Records, }, }; +use serde::Serialize; use std::{ fs::File, io::{stdout, BufReader, Write}, @@ -158,15 +159,9 @@ impl Convert { } for case in cases { - output.write_record( - case?.0.into_iter().zip(dictionary.variables.iter()).map( - |(datum, variable)| { - datum - .display(variable.print_format, variable.encoding()) - .to_string() - }, - ), - )?; + output.write_record(case?.into_iter().zip(dictionary.variables.iter()).map( + |(datum, variable)| datum.display(variable.print_format).to_string(), + ))?; } } OutputFormat::Sys => { @@ -177,7 +172,7 @@ impl Convert { .with_compression(self.sys_options.compression) .write_file(&dictionary, output)?; for case in cases { - output.write_case(case?.0.iter())?; + output.write_case(case?.into_iter().map(|datum| datum.into_raw()))?; } } } @@ -221,18 +216,18 @@ impl Decrypt { } } -/// Dissects SPSS system files. +/// Show SPSS system file dictionary and data. #[derive(Args, Clone, Debug)] -struct Dissect { +struct Show { /// Maximum number of cases to print. #[arg(long = "data", default_value_t = 0)] max_cases: u64, - /// Files to dissect. + /// Files to show. #[arg(required = true)] files: Vec, - /// How to dissect the file. + /// What to show. #[arg(short, long, value_enum, default_value_t)] mode: Mode, @@ -241,10 +236,10 @@ struct Dissect { encoding: Option<&'static Encoding>, } -impl Dissect { +impl Show { fn run(self) -> Result<()> { for file in self.files { - dissect(&file, self.max_cases, self.mode, self.encoding)?; + show(&file, self.max_cases, self.mode, self.encoding)?; } Ok(()) } @@ -254,7 +249,7 @@ impl Dissect { enum Command { Convert(Convert), Decrypt(Decrypt), - Dissect(Dissect), + Show(Show), } impl Command { @@ -262,7 +257,7 @@ impl Command { match self { Command::Convert(convert) => convert.run(), Command::Decrypt(decrypt) => decrypt.run(), - Command::Dissect(dissect) => dissect.run(), + Command::Show(show) => show.run(), } } } @@ -291,7 +286,16 @@ fn main() -> Result<()> { Cli::parse().command.run() } -fn dissect( +fn show_json(value: &T) -> Result<()> +where + T: Serialize, +{ + serde_json::to_writer_pretty(stdout(), value)?; + println!(); + Ok(()) +} + +fn show( file_name: &Path, max_cases: u64, mode: Mode, @@ -311,12 +315,12 @@ fn dissect( return Ok(()); } Mode::Raw => { - serde_json::to_writer_pretty(stdout(), reader.header())?; + show_json(reader.header())?; for record in reader.records() { - serde_json::to_writer_pretty(stdout(), &record?)?; + show_json(&record?)?; } for (_index, case) in (0..max_cases).zip(reader.cases()) { - serde_json::to_writer_pretty(stdout(), &case?)?; + show_json(&case?)?; } } Mode::Decoded => { @@ -327,7 +331,7 @@ fn dissect( }; let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}")); for record in records { - serde_json::to_writer_pretty(stdout(), &record.decode(&mut decoder))?; + show_json(&record.decode(&mut decoder))?; } } Mode::Parsed => { @@ -346,8 +350,8 @@ fn dissect( |e| eprintln!("{e}"), ) .into_parts(); - serde_json::to_writer_pretty(stdout(), &dictionary)?; - serde_json::to_writer_pretty(stdout(), &metadata)?; + show_json(&dictionary)?; + show_json(&metadata)?; } } diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index 6062028663..8260b95488 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -68,7 +68,7 @@ use thiserror::Error as ThisError; use tlo::parse_tlo; use crate::{ - data::Datum, + data::{Datum, EncodedDat, EncodedDatum}, dictionary::{VarType, Variable}, format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat}, settings::{Settings, Show}, @@ -1860,10 +1860,10 @@ impl Value { variable_label: variable.label.clone(), })) } - pub fn new_datum(value: &Datum, encoding: &'static Encoding) -> Self { + pub fn new_datum(value: EncodedDat) -> Self { match value { - Datum::Number(number) => Self::new_number(*number), - Datum::String(string) => Self::new_user_text(string.decode(encoding).into_owned()), + EncodedDat::Number(number) => Self::new_number(number), + EncodedDat::String(string) => Self::new_user_text(string.as_str()), } } pub fn new_variable_value(variable: &Variable, value: &Datum) -> Self { @@ -2206,7 +2206,7 @@ impl Display for DisplayValue<'_> { *format }; let mut buf = SmallString::<[u8; 40]>::new(); - write!(&mut buf, "{}", Datum::Number(*value).display(format, UTF_8)).unwrap(); + write!(&mut buf, "{}", EncodedDat::Number(*value).display(format)).unwrap(); write!(f, "{}", buf.trim_start_matches(' '))?; } if let Some(label) = self.show_label { diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index db67102f9d..22662678e5 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -26,7 +26,7 @@ use std::{ use crate::{ calendar::date_time_to_pspp, crypto::EncryptedFile, - data::{Datum, EncodedDatum, RawString}, + data::{Case, Datum, EncodedDatum, RawString}, dictionary::{ DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, InvalidRole, MissingValues, MissingValuesError, MultipleResponseType, VarWidth, Variable, @@ -47,7 +47,7 @@ use crate::{ VarDisplayRecord, VariableAttributesRecord, VariableRecord, VariableSetRecord, VeryLongStringsRecord, }, - Cases, DecodedRecord, RawDatum, RawWidth, Reader, + DecodedRecord, RawCases, RawDatum, RawWidth, Reader, }, serialize_endian, }, @@ -775,7 +775,7 @@ impl Records { pub fn decode( mut self, header: FileHeader, - mut cases: Cases, + mut cases: RawCases, encoding: &'static Encoding, mut warn: impl FnMut(Error), ) -> SystemFile { @@ -1035,7 +1035,8 @@ impl Records { .map(|value| { value .decode(variable.width) - .display(variable.print_format, variable.encoding()) + .as_encoded(variable.encoding()) + .display(variable.print_format) .with_trimming() .with_quoted_string() .to_string() @@ -1311,7 +1312,7 @@ impl Records { SystemFile { dictionary, metadata, - cases, + cases: Cases::new(encoding, cases), } } } @@ -1676,32 +1677,29 @@ impl MultipleResponseType { } } -/* -trait Quoted { - fn quoted(self) -> WithQuotes - where - Self: Display + Sized; +pub struct Cases { + encoding: &'static Encoding, + inner: RawCases, } -impl Quoted for T -where - T: Display, -{ - fn quoted(self) -> WithQuotes { - WithQuotes(self) +impl Cases { + pub fn new(encoding: &'static Encoding, inner: RawCases) -> Self { + Self { encoding, inner } } } -struct WithQuotes(T) -where - T: Display; +impl Debug for Cases { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Cases") + } +} -impl Display for WithQuotes -where - T: Display, -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "\"{}\"", &self.0) +impl Iterator for Cases { + type Item = Result>, raw::Error>; + + fn next(&mut self) -> Option { + self.inner + .next() + .map(|result| result.map(|case| case.with_encoding(self.encoding))) } } -*/ diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 642ef61cb1..27ffb2f810 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -20,7 +20,7 @@ //! raw details. Most readers will want to use higher-level interfaces. use crate::{ - data::{Case, Datum, RawStr, RawString}, + data::{Datum, RawCase, RawStr, RawString}, dictionary::{VarType, VarWidth}, endian::{Endian, Parse, ToBytes}, identifier::{Error as IdError, Identifier}, @@ -946,12 +946,12 @@ impl Datum { reader: &mut R, case_vars: &[CaseVar], endian: Endian, - ) -> Result, Error> { + ) -> Result, Error> { fn eof( reader: &mut R, case_vars: &[CaseVar], case_start: u64, - ) -> Result, Error> { + ) -> Result, Error> { let offset = reader.stream_position()?; if offset == case_start { Ok(None) @@ -993,7 +993,7 @@ impl Datum { } } } - Ok(Some(Case(values))) + Ok(Some(RawCase(values))) } fn read_compressed_chunk( @@ -1025,12 +1025,12 @@ impl Datum { codes: &mut VecDeque, endian: Endian, bias: f64, - ) -> Result, Error> { + ) -> Result, Error> { fn eof( reader: &mut R, case_start: u64, n_chunks: usize, - ) -> Result, Error> { + ) -> Result, Error> { let offset = reader.stream_position()?; if n_chunks > 0 { Err(Error::new( @@ -1080,7 +1080,7 @@ impl Datum { } } } - Ok(Some(Case(values))) + Ok(Some(RawCase(values))) } } @@ -1154,7 +1154,7 @@ where var_types: VarTypes, state: ReaderState, - cases: Option, + cases: Option, } impl<'a, R> Reader<'a, R> @@ -1194,7 +1194,7 @@ where /// there is an error reading the headers, or if [cases](Self::cases) is /// called before all of the headers have been read, the returned [Cases] /// will be empty. - pub fn cases(self) -> Cases { + pub fn cases(self) -> RawCases { self.cases.unwrap_or_default() } } @@ -1210,7 +1210,7 @@ where { fn cases(&mut self, ztrailer_offset: Option) { self.0.state = ReaderState::End; - self.0.cases = Some(Cases::new( + self.0.cases = Some(RawCases::new( self.0.reader.take().unwrap(), take(&mut self.0.var_types), &self.0.header, @@ -1372,7 +1372,7 @@ impl CaseVar { /// /// [Dictionary]: crate::dictionary::Dictionary /// [SystemFile]: crate::sys::cooked::SystemFile -pub struct Cases { +pub struct RawCases { reader: Box, case_vars: Vec, compression: Option, @@ -1384,13 +1384,13 @@ pub struct Cases { read_cases: u64, } -impl Debug for Cases { +impl Debug for RawCases { fn fmt(&self, f: &mut Formatter) -> FmtResult { write!(f, "Cases") } } -impl Default for Cases { +impl Default for RawCases { fn default() -> Self { Self { reader: Box::new(empty()), @@ -1406,7 +1406,7 @@ impl Default for Cases { } } -impl Cases { +impl RawCases { fn new( reader: R, var_types: VarTypes, @@ -1461,8 +1461,8 @@ impl Cases { } } -impl Iterator for Cases { - type Item = Result; +impl Iterator for RawCases { + type Item = Result; fn next(&mut self) -> Option { if self.eof { diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index eda4aebd67..aa1714124a 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -665,9 +665,8 @@ where case_numbers .push(Value::new_integer(Some((case_numbers.len() + 1) as f64))); data.push( - case.0 - .into_iter() - .map(|datum| Value::new_datum(&datum, dictionary.encoding())) + case.into_iter() + .map(|datum| Value::new_datum(datum.borrowed())) .collect::>(), ); } diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index f7c38ed58d..e42f1c5066 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -17,7 +17,7 @@ use itertools::zip_eq; use smallvec::SmallVec; use crate::{ - data::{Datum, EncodedDatum}, + data::{Dat, Datum, EncodedDatum}, dictionary::{ Alignment, Attributes, CategoryLabels, Dictionary, Measure, MultipleResponseType, ValueLabels, VarWidth, @@ -842,7 +842,7 @@ where fn write_case_uncompressed<'c>( &mut self, - case: impl Iterator, + case: impl Iterator, ) -> Result<(), BinError> { for (var, datum) in zip_eq(self.case_vars, case) { match var { @@ -865,7 +865,7 @@ where } fn write_case_compressed<'c>( &mut self, - case: impl Iterator, + case: impl Iterator, ) -> Result<(), BinError> { for (var, datum) in zip_eq(self.case_vars, case) { match var { @@ -991,7 +991,7 @@ where /// Panics if [try_finish](Self::try_finish) has been called. pub fn write_case<'a>( &mut self, - case: impl IntoIterator, + case: impl IntoIterator, ) -> Result<(), BinError> { match self.inner.as_mut().unwrap() { Either::Left(inner) => { -- 2.30.2