use flate2::read::ZlibDecoder;
use num::Integer;
+use std::borrow::Cow;
use std::fmt::{Debug, Formatter, Result as FmtResult};
use std::str::from_utf8;
use std::{
pub struct FallbackEncoding<'a>(&'a [u8]);
+fn fallback_encode<'a>(s: &'a [u8]) -> Cow<'a, str> {
+ if let Ok(s) = from_utf8(s) {
+ s.into()
+ } else {
+ let s: String = s
+ .iter()
+ .map(|c| char::from(*c))
+ .collect();
+ s.into()
+ }
+}
+
impl<'a> Debug for FallbackEncoding<'a> {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
if let Ok(s) = from_utf8(self.0) {
pub missing_values: MissingValues,
/// Optional variable label.
- pub label: Option<Vec<u8>>,
+ pub label: Option<UnencodedString>,
}
impl Debug for Variable {
f,
"Variable label: {:?}",
self.label
- .as_ref()
- .map(|label| FallbackEncoding(&label[..]))
)?;
writeln!(f, "Missing values: {:?}", self.missing_values)
}
1 => {
let len: u32 = endian.parse(read_bytes(r)?);
let read_len = len.min(65535) as usize;
- let label = Some(read_vec(r, read_len)?);
+ let label = UnencodedString(read_vec(r, read_len)?);
let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
let _ = read_vec(r, padding_bytes as usize)?;
- label
+ Some(label)
}
_ => {
return Err(Error::BadVariableLabelCode {
}
}
-#[derive(Clone, Debug)]
+#[derive(Copy, Clone)]
+pub struct UntypedValue(pub [u8; 8]);
+
+impl Debug for UntypedValue {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ let little: f64 = Endian::Little.parse(self.0);
+ let little = format!("{:?}", little);
+ let big: f64 = Endian::Big.parse(self.0);
+ let big = format!("{:?}", big);
+ let number = if little.len() <= big.len() { little } else { big };
+ write!(f, "{number}")?;
+
+ let string = fallback_encode(&self.0);
+ let string = string.split(|c: char| c == '\0' || c.is_control()).next().unwrap();
+ write!(f, "/\"{string}\"")?;
+ Ok(())
+ }
+}
+
+#[derive(Clone)]
+pub struct UnencodedString(Vec<u8>);
+
+impl From<Vec<u8>> for UnencodedString {
+ fn from(source: Vec<u8>) -> Self {
+ Self(source)
+ }
+}
+
+impl From<&[u8]> for UnencodedString {
+ fn from(source: &[u8]) -> Self {
+ Self(source.into())
+ }
+}
+
+impl Debug for UnencodedString {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ write!(f, "{:?}", FallbackEncoding(self.0.as_slice()))
+ }
+}
+
+#[derive(Clone)]
pub struct ValueLabel {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
/// The labels.
- pub labels: Vec<([u8; 8], Vec<u8>)>,
+ pub labels: Vec<(UntypedValue, UnencodedString)>,
+}
+
+impl Debug for ValueLabel {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ for (value, label) in self.labels.iter() {
+ writeln!(f, "{value:?}: {label:?}")?;
+ }
+ Ok(())
+ }
}
impl ValueLabel {
let mut labels = Vec::new();
for _ in 0..n {
- let value: [u8; 8] = read_bytes(r)?;
+ let value = UntypedValue(read_bytes(r)?);
let label_len: u8 = endian.parse(read_bytes(r)?);
let label_len = label_len as usize;
let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
- let mut label = read_vec(r, padded_len)?;
+ let mut label = read_vec(r, padded_len - 1)?;
label.truncate(label_len);
- labels.push((value, label));
+ labels.push((value, UnencodedString(label)));
}
Ok(ValueLabel { offset, labels })
}
}
-#[derive(Clone, Debug)]
+#[derive(Clone)]
pub struct VarIndexes {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
pub var_indexes: Vec<u32>,
}
+impl Debug for VarIndexes {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ write!(f, "apply to variables")?;
+ for var_index in self.var_indexes.iter() {
+ write!(f, " #{var_index}")?;
+ }
+ Ok(())
+ }
+}
+
impl VarIndexes {
/// Maximum number of variable indexes in a record.
pub const MAX: u32 = u32::MAX / 8;
}
pub enum MultipleResponseType {
MultipleDichotomy {
- value: Vec<u8>,
+ value: UnencodedString,
labels: CategoryLabels,
},
MultipleCategory,
}
pub struct MultipleResponseSet {
- pub name: Vec<u8>,
- pub label: Vec<u8>,
+ pub name: UnencodedString,
+ pub label: UnencodedString,
pub mr_type: MultipleResponseType,
- pub vars: Vec<Vec<u8>>,
+ pub vars: Vec<UnencodedString>,
}
impl MultipleResponseSet {
}
}
-fn parse_counted_string(input: &[u8]) -> Result<(&[u8], &[u8]), Error> {
+fn parse_counted_string(input: &[u8]) -> Result<(UnencodedString, &[u8]), Error> {
let Some(space) = input.iter().position(|&b| b == b' ') else {
return Err(Error::TBD);
};
};
let (string, rest) = input.split_at(length);
- Ok((string, rest))
+ Ok((string.into(), rest))
}
pub struct ExtraProductInfo(String);
}
pub struct LongStringValueLabels {
- pub var_name: Vec<u8>,
+ pub var_name: UnencodedString,
pub width: u32,
/// `(value, label)` pairs, where each value is `width` bytes.
- pub labels: Vec<(Vec<u8>, Vec<u8>)>,
+ pub labels: Vec<(UnencodedString, UnencodedString)>,
}
pub struct LongStringValueLabelSet(Vec<LongStringValueLabels>);
pub struct LongStringMissingValues {
/// Variable name.
- pub var_name: Vec<u8>,
+ pub var_name: UnencodedString,
- /// Up to three missing values.
- pub missing_values: Vec<[u8; 8]>,
+ /// Missing values.
+ pub missing_values: MissingValues,
}
pub struct LongStringMissingValueSet(Vec<LongStringMissingValues>);
value_len,
});
}
- let mut missing_values = Vec::new();
+ let mut values = Vec::new();
for i in 0..n_missing_values {
let value: [u8; 8] = read_bytes(&mut input)?;
let numeric_value: u64 = endian.parse(value);
} else {
value
};
- missing_values.push(value);
+ values.push(Value::String(value));
}
+ let missing_values = MissingValues { values, range: None };
missing_value_set.push(LongStringMissingValues {
var_name,
- missing_values,
+ missing_values
});
}
Ok(LongStringMissingValueSet(missing_value_set))
Ok(vec)
}
-fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<Vec<u8>, IoError> {
+fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<UnencodedString, IoError> {
let length: u32 = endian.parse(read_bytes(r)?);
- read_vec(r, length as usize)
+ Ok(read_vec(r, length as usize)?.into())
}