From: Ben Pfaff Date: Sat, 12 Jul 2025 22:48:53 +0000 (-0700) Subject: Clean up RawDatum a bit. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9e9cef722d8af23e6d707e25178539be0bfca2da;p=pspp Clean up RawDatum a bit. --- diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 0a6dee73cd..1db24b683c 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -34,9 +34,9 @@ use crate::{ FileAttributesRecord, FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, LongNamesRecord, LongStringMissingValueRecord, LongStringValueLabelRecord, MissingValues, MissingValuesError, MultipleResponseRecord, NumberOfCasesRecord, - ProductInfoRecord, RawStrArray, RawString, RawWidth, ValueLabel, ValueLabelRecord, - VarDisplayRecord, VariableAttributesRecord, VariableRecord, VariableSetRecord, - VeryLongStringsRecord, ZHeader, ZTrailer, + ProductInfoRecord, RawDatum, RawString, RawWidth, ValueLabel, + ValueLabelRecord, VarDisplayRecord, VariableAttributesRecord, VariableRecord, + VariableSetRecord, VeryLongStringsRecord, ZHeader, ZTrailer, }, }, }; @@ -309,7 +309,7 @@ pub enum Error { pub struct Headers { pub header: HeaderRecord, pub variable: Vec>, - pub value_label: Vec, String>>, + pub value_label: Vec>, pub document: Vec>, pub integer_info: Option, pub float_info: Option, diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 926a52263e..969fee56f5 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -399,7 +399,7 @@ pub enum Record { /// Value labels for numeric and short string variables. /// /// These appear after the variable records. - ValueLabel(ValueLabelRecord, RawString>), + ValueLabel(ValueLabelRecord), /// Document record. Document(DocumentRecord), @@ -484,7 +484,7 @@ pub enum DecodedRecord { Variable(VariableRecord), /// Value label, with strings decoded. - ValueLabel(ValueLabelRecord, String>), + ValueLabel(ValueLabelRecord), /// Documents, with strings decoded. Document(DocumentRecord), @@ -988,16 +988,30 @@ impl TryFrom for VarWidth { } } -/// A [Datum] for which the character encoding and variable width is not yet known. -pub type RawDatum = Datum>; +/// A [Datum] with knowledge of string width or character encoding. +#[derive(Copy, Clone)] +pub enum RawDatum { + Number(Option), + String([u8; 8]), +} + +impl Debug for RawDatum { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + match self { + RawDatum::Number(Some(number)) => write!(f, "{number:?}"), + RawDatum::Number(None) => write!(f, "SYSMIS"), + RawDatum::String(s) => write!(f, "{:?}", default_decode(s)), + } + } +} impl RawDatum { /// Constructs a `RawDatum` from `raw` given that we now know the variable /// type and endianness. pub fn from_raw(raw: &UntypedDatum, var_type: VarType, endian: Endian) -> Self { match var_type { - VarType::String => Datum::String(RawStrArray(raw.0)), - VarType::Numeric => Datum::Number(endian.parse(raw.0)), + VarType::String => RawDatum::String(raw.0), + VarType::Numeric => RawDatum::Number(endian.parse(raw.0)), } } @@ -1008,7 +1022,7 @@ impl RawDatum { Self::Number(x) => Datum::Number(*x), Self::String(s) => { let width = width.as_string_width().unwrap(); - Datum::String(RawString::from(&s.0[..width])) + Datum::String(RawString::from(&s[..width])) } } } @@ -1996,8 +2010,9 @@ impl VariableRecord { /// 8 bytes that represent a number or a string (but that's all we know). /// -/// Used when we don't know whether it's a number or a string, or the string -/// width, or the character encoding. +/// Used when we don't know whether it's a number or a string, or the numerical +/// endianness, or the string width, or the character encoding. Really all we +/// know is that it's 8 bytes that mean something. #[derive(Copy, Clone)] pub struct UntypedDatum(pub [u8; 8]); @@ -2276,26 +2291,26 @@ impl Display for QuotedEncodedStr<'_> { } #[derive(Clone, Debug)] -pub struct ValueLabel +pub struct ValueLabel where - V: Debug, + D: Debug, S: Debug, { - pub datum: Datum, + pub datum: D, pub label: S, } #[derive(Clone)] -pub struct ValueLabelRecord +pub struct ValueLabelRecord where - V: Debug, + D: Debug, S: Debug, { /// Range of offsets in file. pub offsets: Range, /// The labels. - pub labels: Vec>, + pub labels: Vec>, /// The 1-based indexes of the variable indexes. pub dict_indexes: Vec, @@ -2304,9 +2319,9 @@ where pub var_type: VarType, } -impl Debug for ValueLabelRecord +impl Debug for ValueLabelRecord where - V: Debug, + D: Debug, S: Debug, { fn fmt(&self, f: &mut Formatter) -> FmtResult { @@ -2322,9 +2337,9 @@ where } } -impl ValueLabelRecord +impl ValueLabelRecord where - V: Debug, + D: Debug, S: Debug, { /// Maximum number of value labels in a record. @@ -2334,7 +2349,7 @@ where pub const MAX_INDEXES: u32 = u32::MAX / 8; } -impl ValueLabelRecord, RawString> { +impl ValueLabelRecord { fn read( r: &mut R, endian: Endian, @@ -2430,7 +2445,7 @@ impl ValueLabelRecord, RawString> { let labels = labels .into_iter() .map(|(value, label)| ValueLabel { - datum: Datum::from_raw(&value, var_type, endian), + datum: RawDatum::from_raw(&value, var_type, endian), label, }) .collect(); @@ -2444,7 +2459,7 @@ impl ValueLabelRecord, RawString> { }))) } - fn decode(self, decoder: &mut Decoder) -> ValueLabelRecord, String> { + fn decode(self, decoder: &mut Decoder) -> ValueLabelRecord { let labels = self .labels .iter()