use crate::{
format::{Spec, UncheckedSpec, Width},
identifier::{Error as IdError, Identifier},
- {endian::Endian, CategoryLabels, Compression}, raw,
+ raw::{self, MissingValues},
+ {endian::Endian, CategoryLabels, Compression},
};
use thiserror::Error as ThisError;
pub name: Identifier,
pub print_format: Spec,
pub write_format: Spec,
- //pub missing_values: MissingValues,
+ pub missing_values: MissingValues,
pub label: Option<String>,
}
})
}
-fn decode_var(
- decoder: &mut Decoder,
- input: &crate::raw::Variable,
- warn: impl Fn(Error),
-) -> Result<Option<Variable>, Error> {
- match input.width {
- 0..=255 => (),
- -1 => return Ok(None),
- _ => {
- return Err(Error::BadVariableWidth {
- offset: input.offset,
- width: input.width,
- })
- }
- };
- let width = input.width as Width;
- let name = decoder.decode_string(&input.name.0, &warn);
- let name = match Identifier::new(&name, decoder.encoding) {
- Ok(name) => {
- if !decoder.take_name(&name) {
+impl Variable {
+ pub fn decode(
+ decoder: &mut Decoder,
+ input: &crate::raw::Variable,
+ warn: impl Fn(Error),
+ ) -> Result<Option<Variable>, Error> {
+ match input.width {
+ 0..=255 => (),
+ -1 => return Ok(None),
+ _ => {
+ return Err(Error::BadVariableWidth {
+ offset: input.offset,
+ width: input.width,
+ })
+ }
+ };
+ let width = input.width as Width;
+ let name = decoder.decode_string(&input.name.0, &warn);
+ let name = match Identifier::new(&name, decoder.encoding) {
+ Ok(name) => {
+ if !decoder.take_name(&name) {
+ decoder.generate_name()
+ } else {
+ name
+ }
+ }
+ Err(error) => {
+ warn(error.into());
decoder.generate_name()
- } else {
- name
}
- }
- Err(error) => {
- warn(error.into());
- decoder.generate_name()
- }
- };
- let print_format = decode_format(input.print_format, &name.0, width);
- let write_format = decode_format(input.write_format, &name.0, width);
- let label = input.label.as_ref().map(|label| decoder.decode_string(&label.0, &warn).into());
- Ok(Some(Variable { width, name, print_format, write_format, label }))
+ };
+ let print_format = decode_format(input.print_format, &name.0, width);
+ let write_format = decode_format(input.write_format, &name.0, width);
+ let label = input
+ .label
+ .as_ref()
+ .map(|label| decoder.decode_string(&label.0, &warn).into());
+ Ok(Some(Variable {
+ width,
+ name,
+ print_format,
+ write_format,
+ missing_values: input.missing_values.clone(),
+ label,
+ }))
+ }
}
#[derive(Clone)]
pub use crate::raw::FloatInfo;
pub use crate::raw::IntegerInfo;
+trait TextRecord
+where
+ Self: Sized,
+{
+ const NAME: &'static str;
+ fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
+}
+
+pub struct VariableSet {
+ pub name: String,
+ pub vars: Vec<String>,
+}
+
+impl VariableSet {
+ fn parse(input: &str) -> Result<Self, Error> {
+ let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
+ let vars = input.split_ascii_whitespace().map(String::from).collect();
+ Ok(VariableSet {
+ name: name.into(),
+ vars,
+ })
+ }
+}
+
+pub struct VariableSetRecord(Vec<VariableSet>);
+
+impl TextRecord for VariableSetRecord {
+ const NAME: &'static str = "variable set";
+ fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let mut sets = Vec::new();
+ for line in input.lines() {
+ match VariableSet::parse(line) {
+ Ok(set) => sets.push(set),
+ Err(error) => warn(error),
+ }
+ }
+ Ok(VariableSetRecord(sets))
+ }
+}
+
+pub struct ProductInfo(pub String);
+
+impl TextRecord for ProductInfo {
+ const NAME: &'static str = "extra product info";
+ fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ Ok(ProductInfo(input.into()))
+ }
+}
+
+pub struct LongVariableName {
+ pub short_name: String,
+ pub long_name: String,
+}
+
+pub struct LongVariableNameRecord(Vec<LongVariableName>);
+
+impl TextRecord for LongVariableNameRecord {
+ const NAME: &'static str = "long variable names";
+ fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let mut names = Vec::new();
+ for pair in input.split('\t').filter(|s| !s.is_empty()) {
+ if let Some((short_name, long_name)) = pair.split_once('=') {
+ let name = LongVariableName {
+ short_name: short_name.into(),
+ long_name: long_name.into(),
+ };
+ names.push(name);
+ } else {
+ warn(Error::TBD)
+ }
+ }
+ Ok(LongVariableNameRecord(names))
+ }
+}
+
+pub struct VeryLongString {
+ pub short_name: String,
+ pub length: usize,
+}
+
+impl VeryLongString {
+ fn parse(input: &str) -> Result<VeryLongString, Error> {
+ let Some((short_name, length)) = input.split_once('=') else {
+ return Err(Error::TBD);
+ };
+ let length: usize = length.parse().map_err(|_| Error::TBD)?;
+ Ok(VeryLongString {
+ short_name: short_name.into(),
+ length,
+ })
+ }
+}
+
+pub struct VeryLongStringRecord(Vec<VeryLongString>);
+
+impl TextRecord for VeryLongStringRecord {
+ const NAME: &'static str = "very long strings";
+ fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let mut very_long_strings = Vec::new();
+ for tuple in input
+ .split('\0')
+ .map(|s| s.trim_end_matches('\t'))
+ .filter(|s| !s.is_empty())
+ {
+ match VeryLongString::parse(tuple) {
+ Ok(vls) => very_long_strings.push(vls),
+ Err(error) => warn(error),
+ }
+ }
+ Ok(VeryLongStringRecord(very_long_strings))
+ }
+}
+
+pub struct Attribute {
+ pub name: String,
+ pub values: Vec<String>,
+}
+
+impl Attribute {
+ fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
+ let Some((name, mut input)) = input.split_once('(') else {
+ return Err(Error::TBD);
+ };
+ let mut values = Vec::new();
+ loop {
+ let Some((value, rest)) = input.split_once('\n') else {
+ return Err(Error::TBD);
+ };
+ if let Some(stripped) = value
+ .strip_prefix('\'')
+ .and_then(|value| value.strip_suffix('\''))
+ {
+ values.push(stripped.into());
+ } else {
+ warn(Error::TBD);
+ values.push(value.into());
+ }
+ if let Some(rest) = rest.strip_prefix(')') {
+ return Ok((
+ Attribute {
+ name: name.into(),
+ values,
+ },
+ rest,
+ ));
+ }
+ input = rest;
+ }
+ }
+}
+
+pub struct AttributeSet(pub Vec<Attribute>);
+
+impl AttributeSet {
+ fn parse<'a>(
+ mut input: &'a str,
+ sentinel: Option<char>,
+ warn: &impl Fn(Error),
+ ) -> Result<(AttributeSet, &'a str), Error> {
+ let mut attributes = Vec::new();
+ let rest = loop {
+ match input.chars().next() {
+ None => break input,
+ c if c == sentinel => break &input[1..],
+ _ => {
+ let (attribute, rest) = Attribute::parse(input, &warn)?;
+ attributes.push(attribute);
+ input = rest;
+ }
+ }
+ };
+ Ok((AttributeSet(attributes), rest))
+ }
+}
+
+pub struct FileAttributeRecord(AttributeSet);
+
+impl TextRecord for FileAttributeRecord {
+ const NAME: &'static str = "data file attributes";
+ fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let (set, rest) = AttributeSet::parse(input, None, &warn)?;
+ if !rest.is_empty() {
+ warn(Error::TBD);
+ }
+ Ok(FileAttributeRecord(set))
+ }
+}
+
+pub struct VarAttributeSet {
+ pub long_var_name: String,
+ pub attributes: AttributeSet,
+}
+
+impl VarAttributeSet {
+ fn parse<'a>(
+ input: &'a str,
+ warn: &impl Fn(Error),
+ ) -> Result<(VarAttributeSet, &'a str), Error> {
+ let Some((long_var_name, rest)) = input.split_once(':') else {
+ return Err(Error::TBD);
+ };
+ let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
+ Ok((
+ VarAttributeSet {
+ long_var_name: long_var_name.into(),
+ attributes,
+ },
+ rest,
+ ))
+ }
+}
+
+pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
+
+impl TextRecord for VariableAttributeRecord {
+ const NAME: &'static str = "variable attributes";
+ fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let mut var_attribute_sets = Vec::new();
+ while !input.is_empty() {
+ match VarAttributeSet::parse(input, &warn) {
+ Ok((var_attribute, rest)) => {
+ var_attribute_sets.push(var_attribute);
+ input = rest;
+ }
+ Err(error) => {
+ warn(error);
+ break;
+ }
+ }
+ }
+ Ok(VariableAttributeRecord(var_attribute_sets))
+ }
+}
+
#[derive(Clone, Debug)]
pub enum MultipleResponseType {
MultipleDichotomy {
#[derive(Clone, Debug)]
pub struct MultipleResponseRecord(Vec<MultipleResponseSet>);
-#[derive(Clone, Debug)]
-pub struct ProductInfo(String);
-
pub enum Measure {
Nominal,
Ordinal,
use crate::endian::{Endian, Parse, ToBytes};
use crate::{CategoryLabels, Compression};
+use encoding_rs::mem::decode_latin1;
use flate2::read::ZlibDecoder;
use num::Integer;
use std::borrow::Cow;
}
}
-pub struct FallbackEncoding<'a>(&'a [u8]);
-
-fn fallback_encode<'a>(s: &'a [u8]) -> Cow<'a, str> {
- if let Ok(s) = from_utf8(s) {
- s.into()
- } else {
- let s: String = s.iter().map(|c| char::from(*c)).collect();
- s.into()
- }
-}
-
-impl<'a> Debug for FallbackEncoding<'a> {
- fn fmt(&self, f: &mut Formatter) -> FmtResult {
- if let Ok(s) = from_utf8(self.0) {
- let s = s.trim_end();
- write!(f, "\"{s}\"")
- } else {
- let s: String = self
- .0
- .iter()
- .map(|c| char::from(*c).escape_default())
- .flatten()
- .collect();
- let s = s.trim_end();
- write!(f, "\"{s}\"")
- }
- }
+// If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
+// decoded as Latin-1 (actually bytes interpreted as Unicode code points).
+fn default_decode<'a>(s: &'a [u8]) -> Cow<'a, str> {
+ from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
}
#[derive(Clone)]
};
write!(f, "{number}")?;
- let string = fallback_encode(&self.0);
+ let string = default_decode(&self.0);
let string = string
.split(|c: char| c == '\0' || c.is_control())
.next()
.unwrap();
- write!(f, "/\"{string}\"")?;
+ write!(f, "{string:?}")?;
Ok(())
}
}
impl Debug for UnencodedString {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- write!(f, "{:?}", FallbackEncoding(self.0.as_slice()))
+ write!(f, "{:?}", default_decode(self.0.as_slice()))
}
}
impl<const N: usize> Debug for UnencodedStr<N> {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- write!(f, "{:?}", FallbackEncoding(&self.0))
+ write!(f, "{:?}", default_decode(&self.0))
}
}
}
}
-trait TextRecord
-where
- Self: Sized,
-{
- const NAME: &'static str;
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
-}
-
trait ExtensionRecord
where
Self: Sized,
Ok((string.into(), rest))
}
-pub struct ProductInfo(String);
-
-impl TextRecord for ProductInfo {
- const NAME: &'static str = "extra product info";
- fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
- Ok(ProductInfo(input.into()))
- }
-}
-
#[derive(Clone, Debug)]
pub struct VarDisplayRecord(pub Vec<u32>);
}
}
-pub struct VariableSet {
- pub name: String,
- pub vars: Vec<String>,
-}
-
-impl VariableSet {
- fn parse(input: &str) -> Result<Self, Error> {
- let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
- let vars = input.split_ascii_whitespace().map(String::from).collect();
- Ok(VariableSet {
- name: name.into(),
- vars,
- })
- }
-}
-
-pub struct VariableSetRecord(Vec<VariableSet>);
-
-impl TextRecord for VariableSetRecord {
- const NAME: &'static str = "variable set";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let mut sets = Vec::new();
- for line in input.lines() {
- match VariableSet::parse(line) {
- Ok(set) => sets.push(set),
- Err(error) => warn(error),
- }
- }
- Ok(VariableSetRecord(sets))
- }
-}
-
-pub struct LongVariableName {
- pub short_name: String,
- pub long_name: String,
-}
-
-pub struct LongVariableNameRecord(Vec<LongVariableName>);
-
-impl TextRecord for LongVariableNameRecord {
- const NAME: &'static str = "long variable names";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let mut names = Vec::new();
- for pair in input.split('\t').filter(|s| !s.is_empty()) {
- if let Some((short_name, long_name)) = pair.split_once('=') {
- let name = LongVariableName {
- short_name: short_name.into(),
- long_name: long_name.into(),
- };
- names.push(name);
- } else {
- warn(Error::TBD)
- }
- }
- Ok(LongVariableNameRecord(names))
- }
-}
-
-pub struct VeryLongString {
- pub short_name: String,
- pub length: usize,
-}
-
-impl VeryLongString {
- fn parse(input: &str) -> Result<VeryLongString, Error> {
- let Some((short_name, length)) = input.split_once('=') else {
- return Err(Error::TBD);
- };
- let length: usize = length.parse().map_err(|_| Error::TBD)?;
- Ok(VeryLongString {
- short_name: short_name.into(),
- length,
- })
- }
-}
-
-pub struct VeryLongStringRecord(Vec<VeryLongString>);
-
-impl TextRecord for VeryLongStringRecord {
- const NAME: &'static str = "very long strings";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let mut very_long_strings = Vec::new();
- for tuple in input
- .split('\0')
- .map(|s| s.trim_end_matches('\t'))
- .filter(|s| !s.is_empty())
- {
- match VeryLongString::parse(tuple) {
- Ok(vls) => very_long_strings.push(vls),
- Err(error) => warn(error),
- }
- }
- Ok(VeryLongStringRecord(very_long_strings))
- }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringValueLabels {
- pub var_name: UnencodedString,
- pub width: u32,
-
- /// `(value, label)` pairs, where each value is `width` bytes.
- pub labels: Vec<(UnencodedString, UnencodedString)>,
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringValueLabelRecord(Vec<LongStringValueLabels>);
-
-impl ExtensionRecord for LongStringValueLabelRecord {
- const SUBTYPE: u32 = 21;
- const SIZE: Option<u32> = Some(1);
- const COUNT: Option<u32> = None;
- const NAME: &'static str = "long string value labels record";
-
- fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
- ext.check_size::<Self>()?;
-
- let mut input = &ext.data[..];
- let mut label_set = Vec::new();
- while !input.is_empty() {
- let var_name = read_string(&mut input, endian)?;
- let width: u32 = endian.parse(read_bytes(&mut input)?);
- let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
- let mut labels = Vec::new();
- for _ in 0..n_labels {
- let value = read_string(&mut input, endian)?;
- let label = read_string(&mut input, endian)?;
- labels.push((value, label));
- }
- label_set.push(LongStringValueLabels {
- var_name,
- width,
- labels,
- })
- }
- Ok(LongStringValueLabelRecord(label_set))
- }
-}
-
pub struct LongStringMissingValues {
/// Variable name.
pub var_name: UnencodedString,
}
}
-pub struct Attribute {
- pub name: String,
- pub values: Vec<String>,
-}
-
-impl Attribute {
- fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
- let Some((name, mut input)) = input.split_once('(') else {
- return Err(Error::TBD);
- };
- let mut values = Vec::new();
- loop {
- let Some((value, rest)) = input.split_once('\n') else {
- return Err(Error::TBD);
- };
- if let Some(stripped) = value
- .strip_prefix('\'')
- .and_then(|value| value.strip_suffix('\''))
- {
- values.push(stripped.into());
- } else {
- warn(Error::TBD);
- values.push(value.into());
- }
- if let Some(rest) = rest.strip_prefix(')') {
- return Ok((
- Attribute {
- name: name.into(),
- values,
- },
- rest,
- ));
- }
- input = rest;
- }
- }
-}
-
-pub struct AttributeSet(pub Vec<Attribute>);
-
-impl AttributeSet {
- fn parse<'a>(
- mut input: &'a str,
- sentinel: Option<char>,
- warn: &impl Fn(Error),
- ) -> Result<(AttributeSet, &'a str), Error> {
- let mut attributes = Vec::new();
- let rest = loop {
- match input.chars().next() {
- None => break input,
- c if c == sentinel => break &input[1..],
- _ => {
- let (attribute, rest) = Attribute::parse(input, &warn)?;
- attributes.push(attribute);
- input = rest;
- }
- }
- };
- Ok((AttributeSet(attributes), rest))
- }
-}
-
-pub struct FileAttributeRecord(AttributeSet);
-
-impl TextRecord for FileAttributeRecord {
- const NAME: &'static str = "data file attributes";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let (set, rest) = AttributeSet::parse(input, None, &warn)?;
- if !rest.is_empty() {
- warn(Error::TBD);
- }
- Ok(FileAttributeRecord(set))
- }
-}
-
-pub struct VarAttributeSet {
- pub long_var_name: String,
- pub attributes: AttributeSet,
-}
-
-impl VarAttributeSet {
- fn parse<'a>(
- input: &'a str,
- warn: &impl Fn(Error),
- ) -> Result<(VarAttributeSet, &'a str), Error> {
- let Some((long_var_name, rest)) = input.split_once(':') else {
- return Err(Error::TBD);
- };
- let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
- Ok((
- VarAttributeSet {
- long_var_name: long_var_name.into(),
- attributes,
- },
- rest,
- ))
- }
-}
-
-pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
-
-impl TextRecord for VariableAttributeRecord {
- const NAME: &'static str = "variable attributes";
- fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let mut var_attribute_sets = Vec::new();
- while !input.is_empty() {
- match VarAttributeSet::parse(input, &warn) {
- Ok((var_attribute, rest)) => {
- var_attribute_sets.push(var_attribute);
- input = rest;
- }
- Err(error) => {
- warn(error);
- break;
- }
- }
- }
- Ok(VariableAttributeRecord(var_attribute_sets))
- }
-}
#[derive(Clone, Debug)]
pub struct NumberOfCasesRecord {
pub data: Vec<u8>,
}
-/*
-fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) {
- match extension {
- /* Implemented record types. */
- ExtensionType::Integer => (4, 8),
- ExtensionType::Float => (8, 3),
- ExtensionType::VarSets => (1, 0),
- ExtensionType::Mrsets => (1, 0),
- ExtensionType::ProductInfo => (1, 0),
- ExtensionType::Display => (4, 0),
- ExtensionType::LongNames => (1, 0),
- ExtensionType::LongStrings => (1, 0),
- ExtensionType::Ncases => (8, 2),
- ExtensionType::FileAttrs => (1, 0),
- ExtensionType::VarAttrs => (1, 0),
- ExtensionType::Mrsets2 => (1, 0),
- ExtensionType::Encoding => (1, 0),
- ExtensionType::LongLabels => (1, 0),
- ExtensionType::LongMissing => (1, 0),
-
- /* Ignored record types. */
- ExtensionType::Date => (0, 0),
- ExtensionType::DataEntry => (0, 0),
- ExtensionType::Dataview => (0, 0),
- }
-}
- */
-
impl Extension {
fn check_size<E: ExtensionRecord>(&self) -> Result<(), Error> {
if let Some(expected_size) = E::SIZE {
let length: u32 = endian.parse(read_bytes(r)?);
Ok(read_vec(r, length as usize)?.into())
}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabels {
+ pub var_name: UnencodedString,
+ pub width: u32,
+
+ /// `(value, label)` pairs, where each value is `width` bytes.
+ pub labels: Vec<(UnencodedString, UnencodedString)>,
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabelRecord(Vec<LongStringValueLabels>);
+
+impl ExtensionRecord for LongStringValueLabelRecord {
+ const SUBTYPE: u32 = 21;
+ const SIZE: Option<u32> = Some(1);
+ const COUNT: Option<u32> = None;
+ const NAME: &'static str = "long string value labels record";
+
+ fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ ext.check_size::<Self>()?;
+
+ let mut input = &ext.data[..];
+ let mut label_set = Vec::new();
+ while !input.is_empty() {
+ let var_name = read_string(&mut input, endian)?;
+ let width: u32 = endian.parse(read_bytes(&mut input)?);
+ let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
+ let mut labels = Vec::new();
+ for _ in 0..n_labels {
+ let value = read_string(&mut input, endian)?;
+ let label = read_string(&mut input, endian)?;
+ labels.push((value, label));
+ }
+ label_set.push(LongStringValueLabels {
+ var_name,
+ width,
+ labels,
+ })
+ }
+ Ok(LongStringValueLabelRecord(label_set))
+ }
+}
+