From: Ben Pfaff Date: Sun, 6 Aug 2023 21:50:41 +0000 (-0700) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4ca47add8d2c1a39bc317ed87cbc9789a3f3abcf;p=pspp work --- diff --git a/rust/src/raw.rs b/rust/src/raw.rs index ca0596f541..4dbddec151 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -3,6 +3,7 @@ use crate::Error; use flate2::read::ZlibDecoder; use num::Integer; +use std::borrow::Cow; use std::fmt::{Debug, Formatter, Result as FmtResult}; use std::str::from_utf8; use std::{ @@ -53,6 +54,18 @@ impl Record { pub struct FallbackEncoding<'a>(&'a [u8]); +fn fallback_encode<'a>(s: &'a [u8]) -> Cow<'a, str> { + if let Ok(s) = from_utf8(s) { + s.into() + } else { + let s: String = s + .iter() + .map(|c| char::from(*c)) + .collect(); + s.into() + } +} + impl<'a> Debug for FallbackEncoding<'a> { fn fmt(&self, f: &mut Formatter) -> FmtResult { if let Ok(s) = from_utf8(self.0) { @@ -736,7 +749,7 @@ pub struct Variable { pub missing_values: MissingValues, /// Optional variable label. - pub label: Option>, + pub label: Option, } impl Debug for Variable { @@ -760,8 +773,6 @@ impl Debug for Variable { f, "Variable label: {:?}", self.label - .as_ref() - .map(|label| FallbackEncoding(&label[..])) )?; writeln!(f, "Missing values: {:?}", self.missing_values) } @@ -782,12 +793,12 @@ impl Variable { 1 => { let len: u32 = endian.parse(read_bytes(r)?); let read_len = len.min(65535) as usize; - let label = Some(read_vec(r, read_len)?); + let label = UnencodedString(read_vec(r, read_len)?); let padding_bytes = Integer::next_multiple_of(&len, &4) - len; let _ = read_vec(r, padding_bytes as usize)?; - label + Some(label) } _ => { return Err(Error::BadVariableLabelCode { @@ -811,13 +822,62 @@ impl Variable { } } -#[derive(Clone, Debug)] +#[derive(Copy, Clone)] +pub struct UntypedValue(pub [u8; 8]); + +impl Debug for UntypedValue { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + let little: f64 = Endian::Little.parse(self.0); + let little = format!("{:?}", little); + let big: f64 = Endian::Big.parse(self.0); + let big = format!("{:?}", big); + let number = if little.len() <= big.len() { little } else { big }; + write!(f, "{number}")?; + + let string = fallback_encode(&self.0); + let string = string.split(|c: char| c == '\0' || c.is_control()).next().unwrap(); + write!(f, "/\"{string}\"")?; + Ok(()) + } +} + +#[derive(Clone)] +pub struct UnencodedString(Vec); + +impl From> for UnencodedString { + fn from(source: Vec) -> Self { + Self(source) + } +} + +impl From<&[u8]> for UnencodedString { + fn from(source: &[u8]) -> Self { + Self(source.into()) + } +} + +impl Debug for UnencodedString { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + write!(f, "{:?}", FallbackEncoding(self.0.as_slice())) + } +} + +#[derive(Clone)] pub struct ValueLabel { /// Offset from the start of the file to the start of the record. pub offset: u64, /// The labels. - pub labels: Vec<([u8; 8], Vec)>, + pub labels: Vec<(UntypedValue, UnencodedString)>, +} + +impl Debug for ValueLabel { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + for (value, label) in self.labels.iter() { + writeln!(f, "{value:?}: {label:?}")?; + } + Ok(()) + } } impl ValueLabel { @@ -837,20 +897,20 @@ impl ValueLabel { let mut labels = Vec::new(); for _ in 0..n { - let value: [u8; 8] = read_bytes(r)?; + let value = UntypedValue(read_bytes(r)?); let label_len: u8 = endian.parse(read_bytes(r)?); let label_len = label_len as usize; let padded_len = Integer::next_multiple_of(&(label_len + 1), &8); - let mut label = read_vec(r, padded_len)?; + let mut label = read_vec(r, padded_len - 1)?; label.truncate(label_len); - labels.push((value, label)); + labels.push((value, UnencodedString(label))); } Ok(ValueLabel { offset, labels }) } } -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct VarIndexes { /// Offset from the start of the file to the start of the record. pub offset: u64, @@ -859,6 +919,16 @@ pub struct VarIndexes { pub var_indexes: Vec, } +impl Debug for VarIndexes { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + write!(f, "apply to variables")?; + for var_index in self.var_indexes.iter() { + write!(f, " #{var_index}")?; + } + Ok(()) + } +} + impl VarIndexes { /// Maximum number of variable indexes in a record. pub const MAX: u32 = u32::MAX / 8; @@ -1046,16 +1116,16 @@ pub enum CategoryLabels { } pub enum MultipleResponseType { MultipleDichotomy { - value: Vec, + value: UnencodedString, labels: CategoryLabels, }, MultipleCategory, } pub struct MultipleResponseSet { - pub name: Vec, - pub label: Vec, + pub name: UnencodedString, + pub label: UnencodedString, pub mr_type: MultipleResponseType, - pub vars: Vec>, + pub vars: Vec, } impl MultipleResponseSet { @@ -1153,7 +1223,7 @@ impl ExtensionRecord for MultipleResponseSets { } } -fn parse_counted_string(input: &[u8]) -> Result<(&[u8], &[u8]), Error> { +fn parse_counted_string(input: &[u8]) -> Result<(UnencodedString, &[u8]), Error> { let Some(space) = input.iter().position(|&b| b == b' ') else { return Err(Error::TBD); }; @@ -1170,7 +1240,7 @@ fn parse_counted_string(input: &[u8]) -> Result<(&[u8], &[u8]), Error> { }; let (string, rest) = input.split_at(length); - Ok((string, rest)) + Ok((string.into(), rest)) } pub struct ExtraProductInfo(String); @@ -1297,11 +1367,11 @@ impl TextRecord for VeryLongStringRecord { } pub struct LongStringValueLabels { - pub var_name: Vec, + pub var_name: UnencodedString, pub width: u32, /// `(value, label)` pairs, where each value is `width` bytes. - pub labels: Vec<(Vec, Vec)>, + pub labels: Vec<(UnencodedString, UnencodedString)>, } pub struct LongStringValueLabelSet(Vec); @@ -1338,10 +1408,10 @@ impl ExtensionRecord for LongStringValueLabelSet { pub struct LongStringMissingValues { /// Variable name. - pub var_name: Vec, + pub var_name: UnencodedString, - /// Up to three missing values. - pub missing_values: Vec<[u8; 8]>, + /// Missing values. + pub missing_values: MissingValues, } pub struct LongStringMissingValueSet(Vec); @@ -1368,7 +1438,7 @@ impl ExtensionRecord for LongStringMissingValueSet { value_len, }); } - let mut missing_values = Vec::new(); + let mut values = Vec::new(); for i in 0..n_missing_values { let value: [u8; 8] = read_bytes(&mut input)?; let numeric_value: u64 = endian.parse(value); @@ -1380,11 +1450,12 @@ impl ExtensionRecord for LongStringMissingValueSet { } else { value }; - missing_values.push(value); + values.push(Value::String(value)); } + let missing_values = MissingValues { values, range: None }; missing_value_set.push(LongStringMissingValues { var_name, - missing_values, + missing_values }); } Ok(LongStringMissingValueSet(missing_value_set)) @@ -1789,7 +1860,7 @@ fn read_vec(r: &mut R, n: usize) -> Result, IoError> { Ok(vec) } -fn read_string(r: &mut R, endian: Endian) -> Result, IoError> { +fn read_string(r: &mut R, endian: Endian) -> Result { let length: u32 = endian.parse(read_bytes(r)?); - read_vec(r, length as usize) + Ok(read_vec(r, length as usize)?.into()) }