LongStringMissingValues(LongStringMissingValueRecord<RawString>),
Encoding(EncodingRecord),
NumberOfCases(NumberOfCasesRecord),
+ VariableSets(RawVariableSetRecord),
+ ProductInfo(RawProductInfoRecord),
+ LongNames(RawLongNamesRecord),
+ VeryLongStrings(RawVeryLongStringsRecord),
+ FileAttributes(RawFileAttributesRecord),
Text(TextRecord),
OtherExtension(Extension),
EndOfHeaders(u32),
ProductInfo(ProductInfoRecord),
LongNames(LongNamesRecord),
VeryLongStrings(VeryLongStringsRecord),
- FileAttributes(FileAttributeRecord),
+ FileAttributes(FileAttributesRecord),
VariableAttributes(VariableAttributeRecord),
OtherExtension(Extension),
EndOfHeaders(u32),
}
Record::Encoding(record) => DecodedRecord::Encoding(record.clone()),
Record::NumberOfCases(record) => DecodedRecord::NumberOfCases(record.clone()),
+ Record::VariableSets(record) => DecodedRecord::VariableSets(record.decode(decoder)),
+ Record::ProductInfo(record) => DecodedRecord::ProductInfo(record.decode(decoder)),
+ Record::LongNames(record) => DecodedRecord::LongNames(record.decode(decoder)),
+ Record::VeryLongStrings(record) => {
+ DecodedRecord::VeryLongStrings(record.decode(decoder))
+ }
+ Record::FileAttributes(record) => DecodedRecord::FileAttributes(record.decode(decoder)),
Record::Text(record) => record.decode(decoder),
Record::OtherExtension(record) => DecodedRecord::OtherExtension(record.clone()),
Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(record),
#[derive(Clone, Default)]
pub struct MissingValues {
/// Individual missing values, up to 3 of them.
- pub values: Vec<Datum>,
+ values: Vec<Datum>,
/// Optional range of missing values.
- pub range: Option<MissingValueRange>,
+ range: Option<MissingValueRange>,
}
impl Debug for MissingValues {
}
}
+#[derive(Copy, Clone, Debug)]
+pub enum MissingValuesError {
+ TooMany,
+ TooWide,
+ MixedTypes,
+}
+
impl MissingValues {
+ pub fn new(
+ mut values: Vec<Datum>,
+ range: Option<MissingValueRange>,
+ ) -> Result<Self, MissingValuesError> {
+ if values.len() > 3 {
+ return Err(MissingValuesError::TooMany);
+ }
+
+ let mut var_type = None;
+ for value in values.iter_mut() {
+ value.trim_end();
+ match value.width() {
+ VarWidth::String(w) if w > 8 => return Err(MissingValuesError::TooWide),
+ _ => (),
+ }
+ if var_type.is_some_and(|t| t != value.var_type()) {
+ return Err(MissingValuesError::MixedTypes);
+ }
+ var_type = Some(value.var_type());
+ }
+
+ if var_type == Some(VarType::String) && range.is_some() {
+ return Err(MissingValuesError::MixedTypes);
+ }
+
+ Ok(Self { values, range })
+ }
+
pub fn is_empty(&self) -> bool {
self.values.is_empty() && self.range.is_none()
}
+ pub fn var_type(&self) -> Option<VarType> {
+ if let Some(datum) = self.values.first() {
+ Some(datum.var_type())
+ } else if self.range.is_some() {
+ Some(VarType::Numeric)
+ } else {
+ None
+ }
+ }
+
pub fn contains(&self, value: &Datum) -> bool {
- if self.values.contains(value) {
+ if self
+ .values
+ .iter()
+ .any(|datum| datum.eq_ignore_trailing_spaces(value))
+ {
return true;
}
let range = range.map(|(low, high)| {
MissingValueRange::new(endian.parse(low), endian.parse(high))
});
- return Ok(Self { values, range });
+ return Ok(Self::new(values, range).unwrap());
}
Ok(VarWidth::String(_)) if range.is_some() => warn(Warning::MissingValueStringRange),
Ok(VarWidth::String(width)) => {
.into_iter()
.map(|value| Datum::String(RawString::from(&value[..width])))
.collect();
- return Ok(Self {
- values,
- range: None,
- });
+ return Ok(Self::new(values, None).unwrap());
}
Err(()) => warn(Warning::MissingValueContinuation(offset)),
}
pub fn len(&self) -> usize {
self.0.len()
}
+ pub fn trim_end(&mut self) {
+ while self.0.pop_if(|c| *c == b' ').is_some() {}
+ }
}
impl Borrow<RawStr> for RawString {
pub fn decode(&self, encoding: &'static Encoding) -> Cow<'_, str> {
encoding.decode_without_bom_handling(&self.0).0
}
+
+ pub fn eq_ignore_trailing_spaces(&self, other: &RawStr) -> bool {
+ let mut this = self.0.iter();
+ let mut other = other.0.iter();
+ loop {
+ match (this.next(), other.next()) {
+ (Some(a), Some(b)) if a == b => (),
+ (Some(_), Some(_)) => return false,
+ (None, None) => return true,
+ (Some(b' '), None) => return this.all(|c| *c == b' '),
+ (None, Some(b' ')) => return other.all(|c| *c == b' '),
+ (Some(_), None) | (None, Some(_)) => return false,
+ }
+ }
+ }
}
pub struct DisplayRawString<'a>(Cow<'a, str>);
}
}
-trait ExtensionRecord {
- const SUBTYPE: u32;
- const SIZE: Option<u32>;
- const COUNT: Option<u32>;
- const NAME: &'static str;
- fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning>;
+struct ExtensionRecord<'a> {
+ size: Option<u32>,
+ count: Option<u32>,
+ name: &'a str,
}
#[derive(Clone, Debug)]
pub character_code: i32,
}
-impl ExtensionRecord for IntegerInfoRecord {
- const SUBTYPE: u32 = 3;
- const SIZE: Option<u32> = Some(4);
- const COUNT: Option<u32> = Some(8);
- const NAME: &'static str = "integer record";
+static INTEGER_INFO_RECORD: ExtensionRecord = ExtensionRecord {
+ size: Some(4),
+ count: Some(8),
+ name: "integer record",
+};
+impl IntegerInfoRecord {
fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
- ext.check_size::<Self>()?;
+ ext.check_size(&INTEGER_INFO_RECORD)?;
let mut input = &ext.data[..];
let data: Vec<i32> = (0..8)
pub lowest: f64,
}
-impl ExtensionRecord for FloatInfoRecord {
- const SUBTYPE: u32 = 4;
- const SIZE: Option<u32> = Some(8);
- const COUNT: Option<u32> = Some(3);
- const NAME: &'static str = "floating point record";
+static FLOAT_INFO_RECORD: ExtensionRecord = ExtensionRecord {
+ size: Some(8),
+ count: Some(3),
+ name: "floating point record",
+};
+impl FloatInfoRecord {
fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
- ext.check_size::<Self>()?;
+ ext.check_size(&FLOAT_INFO_RECORD)?;
let mut input = &ext.data[..];
let data: Vec<f64> = (0..3)
I: Debug,
S: Debug;
-impl ExtensionRecord for MultipleResponseRecord<RawString, RawString> {
- const SUBTYPE: u32 = 7;
- const SIZE: Option<u32> = Some(1);
- const COUNT: Option<u32> = None;
- const NAME: &'static str = "multiple response set record";
+static MULTIPLE_RESPONSE_RECORD: ExtensionRecord = ExtensionRecord {
+ size: Some(1),
+ count: None,
+ name: "multiple response set record",
+};
+impl MultipleResponseRecord<RawString, RawString> {
fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
- ext.check_size::<Self>()?;
+ ext.check_size(&MULTIPLE_RESPONSE_RECORD)?;
let mut input = &ext.data[..];
let mut sets = Vec::new();
pub struct VarDisplayRecord(pub Vec<VarDisplay>);
impl VarDisplayRecord {
- const SUBTYPE: u32 = 11;
-
fn parse(
ext: &Extension,
var_types: &VarTypes,
where
N: Debug;
-impl ExtensionRecord for LongStringMissingValueRecord<RawString> {
- const SUBTYPE: u32 = 22;
- const SIZE: Option<u32> = Some(1);
- const COUNT: Option<u32> = None;
- const NAME: &'static str = "long string missing values record";
+static LONG_STRING_MISSING_VALUE_RECORD: ExtensionRecord = ExtensionRecord {
+ size: Some(1),
+ count: None,
+ name: "long string missing values record",
+};
+impl LongStringMissingValueRecord<RawString> {
fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
- ext.check_size::<Self>()?;
+ ext.check_size(&LONG_STRING_MISSING_VALUE_RECORD)?;
let mut input = &ext.data[..];
let mut missing_value_set = Vec::new();
#[derive(Clone, Debug)]
pub struct EncodingRecord(pub String);
-impl ExtensionRecord for EncodingRecord {
- const SUBTYPE: u32 = 20;
- const SIZE: Option<u32> = Some(1);
- const COUNT: Option<u32> = None;
- const NAME: &'static str = "encoding record";
+static ENCODING_RECORD: ExtensionRecord = ExtensionRecord {
+ size: Some(1),
+ count: None,
+ name: "encoding record",
+};
+impl EncodingRecord {
fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
- ext.check_size::<Self>()?;
+ ext.check_size(&ENCODING_RECORD)?;
Ok(Record::Encoding(EncodingRecord(
String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName {
pub n_cases: u64,
}
-impl ExtensionRecord for NumberOfCasesRecord {
- const SUBTYPE: u32 = 16;
- const SIZE: Option<u32> = Some(8);
- const COUNT: Option<u32> = Some(2);
- const NAME: &'static str = "extended number of cases record";
+static NUMBER_OF_CASES_RECORD: ExtensionRecord = ExtensionRecord {
+ size: Some(8),
+ count: Some(2),
+ name: "extended number of cases record",
+};
+impl NumberOfCasesRecord {
fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
- ext.check_size::<Self>()?;
+ ext.check_size(&NUMBER_OF_CASES_RECORD)?;
let mut input = &ext.data[..];
let one = endian.parse(read_bytes(&mut input)?);
}
}
+#[derive(Clone, Debug)]
+pub struct RawVariableSetRecord(TextRecord);
+
+impl RawVariableSetRecord {
+ fn parse(extension: Extension) -> Result<Record, Warning> {
+ Ok(Record::VariableSets(Self(TextRecord::parse(
+ extension,
+ "variable sets record",
+ )?)))
+ }
+ fn decode(self, decoder: &mut Decoder) -> VariableSetRecord {
+ let mut sets = Vec::new();
+ let input = decoder.decode(&self.0.text);
+ for line in input.lines() {
+ if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&mut decoder.warn) {
+ sets.push(set)
+ }
+ }
+ VariableSetRecord {
+ offsets: self.0.offsets,
+ sets,
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct RawProductInfoRecord(TextRecord);
+
+impl RawProductInfoRecord {
+ fn parse(extension: Extension) -> Result<Record, Warning> {
+ Ok(Record::ProductInfo(Self(TextRecord::parse(
+ extension,
+ "product info record",
+ )?)))
+ }
+ fn decode(self, decoder: &mut Decoder) -> ProductInfoRecord {
+ ProductInfoRecord(decoder.decode(&self.0.text).into())
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct RawLongNamesRecord(TextRecord);
+
+impl RawLongNamesRecord {
+ fn parse(extension: Extension) -> Result<Record, Warning> {
+ Ok(Record::LongNames(Self(TextRecord::parse(
+ extension,
+ "long names record",
+ )?)))
+ }
+ fn decode(self, decoder: &mut Decoder) -> LongNamesRecord {
+ let input = decoder.decode(&self.0.text);
+ let mut names = Vec::new();
+ for pair in input.split('\t').filter(|s| !s.is_empty()) {
+ if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&mut decoder.warn)
+ {
+ names.push(long_name);
+ }
+ }
+ LongNamesRecord(names)
+ }
+}
+
#[derive(Clone, Debug)]
pub struct TextRecord {
pub offsets: Range<u64>,
#[derive(Clone, Copy, Debug)]
pub enum TextRecordType {
- VariableSets,
- ProductInfo,
- LongNames,
- VeryLongStrings,
- FileAttributes,
VariableAttributes,
}
impl TextRecord {
+ fn parse(extension: Extension, name: &str) -> Result<TextRecord, Warning> {
+ extension.check_size(&ExtensionRecord {
+ size: Some(1),
+ count: None,
+ name,
+ })?;
+ Ok(Self::new(extension, TextRecordType::VariableAttributes))
+ }
fn new(extension: Extension, rec_type: TextRecordType) -> Self {
Self {
offsets: extension.offsets,
}
pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
match self.rec_type {
- TextRecordType::VariableSets => {
- DecodedRecord::VariableSets(VariableSetRecord::decode(&self, decoder))
- }
- TextRecordType::ProductInfo => {
- DecodedRecord::ProductInfo(ProductInfoRecord::decode(&self, decoder))
- }
- TextRecordType::LongNames => {
- DecodedRecord::LongNames(LongNamesRecord::decode(&self, decoder))
- }
- TextRecordType::VeryLongStrings => {
- DecodedRecord::VeryLongStrings(VeryLongStringsRecord::decode(&self, decoder))
- }
- TextRecordType::FileAttributes => {
- DecodedRecord::FileAttributes(FileAttributeRecord::decode(&self, decoder))
- }
TextRecordType::VariableAttributes => {
DecodedRecord::VariableAttributes(VariableAttributeRecord::decode(&self, decoder))
}
}
}
+#[derive(Clone, Debug)]
+pub struct RawVeryLongStringsRecord(TextRecord);
+
#[derive(Clone, Debug)]
pub struct VeryLongStringsRecord(pub Vec<VeryLongString>);
-impl VeryLongStringsRecord {
- fn decode(source: &TextRecord, decoder: &mut Decoder) -> Self {
- let input = decoder.decode(&source.text);
+impl RawVeryLongStringsRecord {
+ fn parse(extension: Extension) -> Result<Record, Warning> {
+ Ok(Record::VeryLongStrings(Self(TextRecord::parse(
+ extension,
+ "very long strings record",
+ )?)))
+ }
+ fn decode(self, decoder: &mut Decoder) -> VeryLongStringsRecord {
+ let input = decoder.decode(&self.0.text);
let mut very_long_strings = Vec::new();
for tuple in input
.split('\0')
}
}
+#[derive(Clone, Debug)]
+pub struct RawFileAttributesRecord(TextRecord);
+
#[derive(Clone, Debug, Default)]
-pub struct FileAttributeRecord(pub Attributes);
+pub struct FileAttributesRecord(pub Attributes);
-impl FileAttributeRecord {
- fn decode(source: &TextRecord, decoder: &mut Decoder) -> Self {
- let input = decoder.decode(&source.text);
+impl RawFileAttributesRecord {
+ fn parse(extension: Extension) -> Result<Record, Warning> {
+ Ok(Record::FileAttributes(Self(TextRecord::parse(
+ extension,
+ "file attributes record",
+ )?)))
+ }
+ fn decode(self, decoder: &mut Decoder) -> FileAttributesRecord {
+ let input = decoder.decode(&self.0.text);
match Attributes::parse(decoder, &input, None).issue_warning(&mut decoder.warn) {
Some((set, rest)) => {
if !rest.is_empty() {
decoder.warn(Warning::TBD);
}
- FileAttributeRecord(set)
+ FileAttributesRecord(set)
}
- None => FileAttributeRecord::default(),
+ None => FileAttributesRecord::default(),
}
}
}
#[derive(Clone, Debug)]
pub struct LongNamesRecord(pub Vec<LongName>);
-impl LongNamesRecord {
- fn decode(source: &TextRecord, decoder: &mut Decoder) -> Self {
- let input = decoder.decode(&source.text);
- let mut names = Vec::new();
- for pair in input.split('\t').filter(|s| !s.is_empty()) {
- if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&mut decoder.warn)
- {
- names.push(long_name);
- }
- }
- LongNamesRecord(names)
- }
-}
-
#[derive(Clone, Debug)]
pub struct ProductInfoRecord(pub String);
-impl ProductInfoRecord {
- fn decode(source: &TextRecord, decoder: &mut Decoder) -> Self {
- Self(decoder.decode(&source.text).into())
- }
-}
#[derive(Clone, Debug)]
pub struct VariableSet {
pub name: String,
pub sets: Vec<VariableSet>,
}
-impl VariableSetRecord {
- fn decode(source: &TextRecord, decoder: &mut Decoder) -> VariableSetRecord {
- let mut sets = Vec::new();
- let input = decoder.decode(&source.text);
- for line in input.lines() {
- if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&mut decoder.warn) {
- sets.push(set)
- }
- }
- VariableSetRecord {
- offsets: source.offsets.clone(),
- sets,
- }
- }
-}
-
trait IssueWarning<T> {
fn issue_warning(self, warn: &mut dyn FnMut(Warning)) -> Option<T>;
}
}
impl Extension {
- fn check_size<E: ExtensionRecord>(&self) -> Result<(), Warning> {
- if let Some(expected_size) = E::SIZE {
- if self.size != expected_size {
+ fn check_size(&self, expected: &ExtensionRecord) -> Result<(), Warning> {
+ match expected.size {
+ Some(expected_size) if self.size != expected_size => {
return Err(Warning::BadRecordSize {
offset: self.offsets.start,
- record: E::NAME.into(),
+ record: expected.name.into(),
size: self.size,
expected_size,
});
}
+ _ => (),
}
- if let Some(expected_count) = E::COUNT {
- if self.count != expected_count {
+ match expected.count {
+ Some(expected_count) if self.count != expected_count => {
return Err(Warning::BadRecordCount {
offset: self.offsets.start,
- record: E::NAME.into(),
+ record: expected.name.into(),
count: self.count,
expected_count,
});
}
+ _ => (),
}
Ok(())
}
data,
};
let result = match subtype {
- IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian),
- FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian),
- VarDisplayRecord::SUBTYPE => {
- VarDisplayRecord::parse(&extension, var_types, endian, warn)
- }
- MultipleResponseRecord::SUBTYPE | 19 => {
- MultipleResponseRecord::parse(&extension, endian)
- }
- LongStringValueLabelRecord::SUBTYPE => {
- LongStringValueLabelRecord::parse(&extension, endian)
- }
- EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian),
- NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian),
- 5 => Ok(Record::Text(TextRecord::new(
- extension,
- TextRecordType::VariableSets,
- ))),
- 10 => Ok(Record::Text(TextRecord::new(
- extension,
- TextRecordType::ProductInfo,
- ))),
- 13 => Ok(Record::Text(TextRecord::new(
- extension,
- TextRecordType::LongNames,
- ))),
- 14 => Ok(Record::Text(TextRecord::new(
- extension,
- TextRecordType::VeryLongStrings,
- ))),
- 17 => Ok(Record::Text(TextRecord::new(
- extension,
- TextRecordType::FileAttributes,
- ))),
+ 3 => IntegerInfoRecord::parse(&extension, endian),
+ 4 => FloatInfoRecord::parse(&extension, endian),
+ 11 => VarDisplayRecord::parse(&extension, var_types, endian, warn),
+ 7 | 19 => MultipleResponseRecord::parse(&extension, endian),
+ 21 => LongStringValueLabelRecord::parse(&extension, endian),
+ 22 => LongStringMissingValueRecord::parse(&extension, endian),
+ 20 => EncodingRecord::parse(&extension, endian),
+ 16 => NumberOfCasesRecord::parse(&extension, endian),
+ 5 => RawVariableSetRecord::parse(extension),
+ 10 => RawProductInfoRecord::parse(extension),
+ 13 => RawLongNamesRecord::parse(extension),
+ 14 => RawVeryLongStringsRecord::parse(extension),
+ 17 => RawFileAttributesRecord::parse(extension),
18 => Ok(Record::Text(TextRecord::new(
extension,
TextRecordType::VariableAttributes,
N: Debug,
S: Debug;
-impl ExtensionRecord for LongStringValueLabelRecord<RawString, RawString> {
- const SUBTYPE: u32 = 21;
- const SIZE: Option<u32> = Some(1);
- const COUNT: Option<u32> = None;
- const NAME: &'static str = "long string value labels record";
+static LONG_STRING_VALUE_LABEL_RECORD: ExtensionRecord = ExtensionRecord {
+ size: Some(1),
+ count: None,
+ name: "long string value labels record",
+};
+impl LongStringValueLabelRecord<RawString, RawString> {
fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
- ext.check_size::<Self>()?;
+ ext.check_size(&LONG_STRING_VALUE_LABEL_RECORD)?;
let mut input = &ext.data[..];
let mut label_set = Vec::new();