use crate::endian::{Endian, Parse, ToBytes};
use crate::{CategoryLabels, Compression};
+use encoding_rs::mem::decode_latin1;
use flate2::read::ZlibDecoder;
use num::Integer;
use std::borrow::Cow;
}
}
-pub struct FallbackEncoding<'a>(&'a [u8]);
-
-fn fallback_encode<'a>(s: &'a [u8]) -> Cow<'a, str> {
- if let Ok(s) = from_utf8(s) {
- s.into()
- } else {
- let s: String = s.iter().map(|c| char::from(*c)).collect();
- s.into()
- }
-}
-
-impl<'a> Debug for FallbackEncoding<'a> {
- fn fmt(&self, f: &mut Formatter) -> FmtResult {
- if let Ok(s) = from_utf8(self.0) {
- let s = s.trim_end();
- write!(f, "\"{s}\"")
- } else {
- let s: String = self
- .0
- .iter()
- .map(|c| char::from(*c).escape_default())
- .flatten()
- .collect();
- let s = s.trim_end();
- write!(f, "\"{s}\"")
- }
- }
+// If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
+// decoded as Latin-1 (actually bytes interpreted as Unicode code points).
+fn default_decode<'a>(s: &'a [u8]) -> Cow<'a, str> {
+ from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
}
#[derive(Clone)]
};
write!(f, "{number}")?;
- let string = fallback_encode(&self.0);
+ let string = default_decode(&self.0);
let string = string
.split(|c: char| c == '\0' || c.is_control())
.next()
.unwrap();
- write!(f, "/\"{string}\"")?;
+ write!(f, "{string:?}")?;
Ok(())
}
}
impl Debug for UnencodedString {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- write!(f, "{:?}", FallbackEncoding(self.0.as_slice()))
+ write!(f, "{:?}", default_decode(self.0.as_slice()))
}
}
impl<const N: usize> Debug for UnencodedStr<N> {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- write!(f, "{:?}", FallbackEncoding(&self.0))
+ write!(f, "{:?}", default_decode(&self.0))
}
}
}
}
-trait TextRecord
-where
- Self: Sized,
-{
- const NAME: &'static str;
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
-}
-
trait ExtensionRecord
where
Self: Sized,
Ok((string.into(), rest))
}
-pub struct ProductInfo(String);
-
-impl TextRecord for ProductInfo {
- const NAME: &'static str = "extra product info";
- fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
- Ok(ProductInfo(input.into()))
- }
-}
-
#[derive(Clone, Debug)]
pub struct VarDisplayRecord(pub Vec<u32>);
}
}
-pub struct VariableSet {
- pub name: String,
- pub vars: Vec<String>,
-}
-
-impl VariableSet {
- fn parse(input: &str) -> Result<Self, Error> {
- let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
- let vars = input.split_ascii_whitespace().map(String::from).collect();
- Ok(VariableSet {
- name: name.into(),
- vars,
- })
- }
-}
-
-pub struct VariableSetRecord(Vec<VariableSet>);
-
-impl TextRecord for VariableSetRecord {
- const NAME: &'static str = "variable set";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let mut sets = Vec::new();
- for line in input.lines() {
- match VariableSet::parse(line) {
- Ok(set) => sets.push(set),
- Err(error) => warn(error),
- }
- }
- Ok(VariableSetRecord(sets))
- }
-}
-
-pub struct LongVariableName {
- pub short_name: String,
- pub long_name: String,
-}
-
-pub struct LongVariableNameRecord(Vec<LongVariableName>);
-
-impl TextRecord for LongVariableNameRecord {
- const NAME: &'static str = "long variable names";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let mut names = Vec::new();
- for pair in input.split('\t').filter(|s| !s.is_empty()) {
- if let Some((short_name, long_name)) = pair.split_once('=') {
- let name = LongVariableName {
- short_name: short_name.into(),
- long_name: long_name.into(),
- };
- names.push(name);
- } else {
- warn(Error::TBD)
- }
- }
- Ok(LongVariableNameRecord(names))
- }
-}
-
-pub struct VeryLongString {
- pub short_name: String,
- pub length: usize,
-}
-
-impl VeryLongString {
- fn parse(input: &str) -> Result<VeryLongString, Error> {
- let Some((short_name, length)) = input.split_once('=') else {
- return Err(Error::TBD);
- };
- let length: usize = length.parse().map_err(|_| Error::TBD)?;
- Ok(VeryLongString {
- short_name: short_name.into(),
- length,
- })
- }
-}
-
-pub struct VeryLongStringRecord(Vec<VeryLongString>);
-
-impl TextRecord for VeryLongStringRecord {
- const NAME: &'static str = "very long strings";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let mut very_long_strings = Vec::new();
- for tuple in input
- .split('\0')
- .map(|s| s.trim_end_matches('\t'))
- .filter(|s| !s.is_empty())
- {
- match VeryLongString::parse(tuple) {
- Ok(vls) => very_long_strings.push(vls),
- Err(error) => warn(error),
- }
- }
- Ok(VeryLongStringRecord(very_long_strings))
- }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringValueLabels {
- pub var_name: UnencodedString,
- pub width: u32,
-
- /// `(value, label)` pairs, where each value is `width` bytes.
- pub labels: Vec<(UnencodedString, UnencodedString)>,
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringValueLabelRecord(Vec<LongStringValueLabels>);
-
-impl ExtensionRecord for LongStringValueLabelRecord {
- const SUBTYPE: u32 = 21;
- const SIZE: Option<u32> = Some(1);
- const COUNT: Option<u32> = None;
- const NAME: &'static str = "long string value labels record";
-
- fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
- ext.check_size::<Self>()?;
-
- let mut input = &ext.data[..];
- let mut label_set = Vec::new();
- while !input.is_empty() {
- let var_name = read_string(&mut input, endian)?;
- let width: u32 = endian.parse(read_bytes(&mut input)?);
- let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
- let mut labels = Vec::new();
- for _ in 0..n_labels {
- let value = read_string(&mut input, endian)?;
- let label = read_string(&mut input, endian)?;
- labels.push((value, label));
- }
- label_set.push(LongStringValueLabels {
- var_name,
- width,
- labels,
- })
- }
- Ok(LongStringValueLabelRecord(label_set))
- }
-}
-
pub struct LongStringMissingValues {
/// Variable name.
pub var_name: UnencodedString,
}
}
-pub struct Attribute {
- pub name: String,
- pub values: Vec<String>,
-}
-
-impl Attribute {
- fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
- let Some((name, mut input)) = input.split_once('(') else {
- return Err(Error::TBD);
- };
- let mut values = Vec::new();
- loop {
- let Some((value, rest)) = input.split_once('\n') else {
- return Err(Error::TBD);
- };
- if let Some(stripped) = value
- .strip_prefix('\'')
- .and_then(|value| value.strip_suffix('\''))
- {
- values.push(stripped.into());
- } else {
- warn(Error::TBD);
- values.push(value.into());
- }
- if let Some(rest) = rest.strip_prefix(')') {
- return Ok((
- Attribute {
- name: name.into(),
- values,
- },
- rest,
- ));
- }
- input = rest;
- }
- }
-}
-
-pub struct AttributeSet(pub Vec<Attribute>);
-
-impl AttributeSet {
- fn parse<'a>(
- mut input: &'a str,
- sentinel: Option<char>,
- warn: &impl Fn(Error),
- ) -> Result<(AttributeSet, &'a str), Error> {
- let mut attributes = Vec::new();
- let rest = loop {
- match input.chars().next() {
- None => break input,
- c if c == sentinel => break &input[1..],
- _ => {
- let (attribute, rest) = Attribute::parse(input, &warn)?;
- attributes.push(attribute);
- input = rest;
- }
- }
- };
- Ok((AttributeSet(attributes), rest))
- }
-}
-
-pub struct FileAttributeRecord(AttributeSet);
-
-impl TextRecord for FileAttributeRecord {
- const NAME: &'static str = "data file attributes";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let (set, rest) = AttributeSet::parse(input, None, &warn)?;
- if !rest.is_empty() {
- warn(Error::TBD);
- }
- Ok(FileAttributeRecord(set))
- }
-}
-
-pub struct VarAttributeSet {
- pub long_var_name: String,
- pub attributes: AttributeSet,
-}
-
-impl VarAttributeSet {
- fn parse<'a>(
- input: &'a str,
- warn: &impl Fn(Error),
- ) -> Result<(VarAttributeSet, &'a str), Error> {
- let Some((long_var_name, rest)) = input.split_once(':') else {
- return Err(Error::TBD);
- };
- let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
- Ok((
- VarAttributeSet {
- long_var_name: long_var_name.into(),
- attributes,
- },
- rest,
- ))
- }
-}
-
-pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
-
-impl TextRecord for VariableAttributeRecord {
- const NAME: &'static str = "variable attributes";
- fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let mut var_attribute_sets = Vec::new();
- while !input.is_empty() {
- match VarAttributeSet::parse(input, &warn) {
- Ok((var_attribute, rest)) => {
- var_attribute_sets.push(var_attribute);
- input = rest;
- }
- Err(error) => {
- warn(error);
- break;
- }
- }
- }
- Ok(VariableAttributeRecord(var_attribute_sets))
- }
-}
#[derive(Clone, Debug)]
pub struct NumberOfCasesRecord {
pub data: Vec<u8>,
}
-/*
-fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) {
- match extension {
- /* Implemented record types. */
- ExtensionType::Integer => (4, 8),
- ExtensionType::Float => (8, 3),
- ExtensionType::VarSets => (1, 0),
- ExtensionType::Mrsets => (1, 0),
- ExtensionType::ProductInfo => (1, 0),
- ExtensionType::Display => (4, 0),
- ExtensionType::LongNames => (1, 0),
- ExtensionType::LongStrings => (1, 0),
- ExtensionType::Ncases => (8, 2),
- ExtensionType::FileAttrs => (1, 0),
- ExtensionType::VarAttrs => (1, 0),
- ExtensionType::Mrsets2 => (1, 0),
- ExtensionType::Encoding => (1, 0),
- ExtensionType::LongLabels => (1, 0),
- ExtensionType::LongMissing => (1, 0),
-
- /* Ignored record types. */
- ExtensionType::Date => (0, 0),
- ExtensionType::DataEntry => (0, 0),
- ExtensionType::Dataview => (0, 0),
- }
-}
- */
-
impl Extension {
fn check_size<E: ExtensionRecord>(&self) -> Result<(), Error> {
if let Some(expected_size) = E::SIZE {
let length: u32 = endian.parse(read_bytes(r)?);
Ok(read_vec(r, length as usize)?.into())
}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabels {
+ pub var_name: UnencodedString,
+ pub width: u32,
+
+ /// `(value, label)` pairs, where each value is `width` bytes.
+ pub labels: Vec<(UnencodedString, UnencodedString)>,
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabelRecord(Vec<LongStringValueLabels>);
+
+impl ExtensionRecord for LongStringValueLabelRecord {
+ const SUBTYPE: u32 = 21;
+ const SIZE: Option<u32> = Some(1);
+ const COUNT: Option<u32> = None;
+ const NAME: &'static str = "long string value labels record";
+
+ fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ ext.check_size::<Self>()?;
+
+ let mut input = &ext.data[..];
+ let mut label_set = Vec::new();
+ while !input.is_empty() {
+ let var_name = read_string(&mut input, endian)?;
+ let width: u32 = endian.parse(read_bytes(&mut input)?);
+ let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
+ let mut labels = Vec::new();
+ for _ in 0..n_labels {
+ let value = read_string(&mut input, endian)?;
+ let label = read_string(&mut input, endian)?;
+ labels.push((value, label));
+ }
+ label_set.push(LongStringValueLabels {
+ var_name,
+ width,
+ labels,
+ })
+ }
+ Ok(LongStringValueLabelRecord(label_set))
+ }
+}
+