use std::{
+ cmp::Ordering,
collections::{HashMap, HashSet},
fmt::Debug,
- ops::{Bound, RangeBounds}, cmp::Ordering,
+ ops::{Bound, RangeBounds},
};
use encoding_rs::Encoding;
use crate::{
format::Spec,
identifier::{ByIdentifier, HasIdentifier, Identifier},
- raw::{Alignment, CategoryLabels, Measure, MissingValues, VarType, self, RawStr, Decoder},
+ raw::{self, Alignment, CategoryLabels, Decoder, Measure, MissingValues, RawStr, VarType},
};
pub type DictIndex = usize;
pub encoding: &'static Encoding,
}
+pub struct DuplicateVariableName;
+
impl Dictionary {
pub fn new(encoding: &'static Encoding) -> Self {
Self {
}
}
- pub fn add_var(&mut self, variable: Variable) -> Result<(), ()> {
+ pub fn add_var(&mut self, variable: Variable) -> Result<(), DuplicateVariableName> {
if self.variables.insert(ByIdentifier::new(variable)) {
Ok(())
} else {
- Err(())
+ Err(DuplicateVariableName)
}
}
if from_index != to_index {
self.variables.move_index(from_index, to_index);
self.update_dict_indexes(&|index| {
+ #[allow(clippy::collapsible_else_if)]
if index == from_index {
Some(to_index)
} else if from_index < to_index {
F: Fn(DictIndex) -> Option<DictIndex>,
{
update_dict_index_vec(&mut self.split_file, f);
- self.weight = self.weight.map(|index| f(index)).flatten();
- self.filter = self.filter.map(|index| f(index)).flatten();
+ self.weight = self.weight.and_then(f);
+ self.filter = self.filter.and_then(f);
self.vectors = self
.vectors
.drain()
vector_by_id
.0
.with_updated_dict_indexes(f)
- .map(|vector| ByIdentifier::new(vector))
+ .map(ByIdentifier::new)
})
.collect();
self.mrsets = self
mrset_by_id
.0
.with_updated_dict_indexes(f)
- .map(|mrset| ByIdentifier::new(mrset))
+ .map(ByIdentifier::new)
})
.collect();
self.variable_sets = self
var_set_by_id
.0
.with_updated_dict_indexes(f)
- .map(|var_set| ByIdentifier::new(var_set))
+ .map(ByIdentifier::new)
})
.collect();
}
alignment: Alignment::default_for_type(var_type),
leave,
short_names: Vec::new(),
- attributes: HashSet::new()
+ attributes: HashSet::new(),
}
}
}
use crate::{
dictionary::VarWidth,
+ encoding::{default_encoding, get_encoding, Error as EncodingError},
endian::{Endian, Parse, ToBytes},
identifier::{Error as IdError, Identifier},
};
expected_n_blocks: u64,
ztrailer_len: u64,
},
+
+ #[error("{0}")]
+ EncodingError(EncodingError),
}
#[derive(ThisError, Debug)]
#[error("Invalid variable name in long string value label record. {0}")]
InvalidLongStringValueLabelName(IdError),
+ #[error("{0}")]
+ EncodingError(EncodingError),
+
#[error("Details TBD")]
TBD,
}
Cases(Rc<RefCell<Cases>>),
}
+#[derive(Clone, Debug)]
pub enum DecodedRecord<'a> {
Header(HeaderRecord<Cow<'a, str>>),
Variable(VariableRecord<Cow<'a, str>, String>),
reader: &mut R,
endian: Endian,
var_types: &[VarType],
- warn: &Box<dyn Fn(Warning)>,
+ warn: &dyn Fn(Warning),
) -> Result<Option<Record>, Error>
where
R: Read + Seek,
Record::NumberOfCases(record) => DecodedRecord::NumberOfCases(record.clone()),
Record::Text(record) => record.decode(decoder),
Record::OtherExtension(record) => DecodedRecord::OtherExtension(record.clone()),
- Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(record.clone()),
+ Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(*record),
Record::ZHeader(record) => DecodedRecord::ZHeader(record.clone()),
Record::ZTrailer(record) => DecodedRecord::ZTrailer(record.clone()),
Record::Cases(_) => todo!(),
}
}
+pub fn encoding_from_headers(
+ headers: &Vec<Record>,
+ warn: &impl Fn(Warning),
+) -> Result<&'static Encoding, Error> {
+ let mut encoding_record = None;
+ let mut integer_info_record = None;
+ for record in headers {
+ match record {
+ Record::Encoding(record) => encoding_record = Some(record),
+ Record::IntegerInfo(record) => integer_info_record = Some(record),
+ _ => (),
+ }
+ }
+ let encoding = encoding_record.map(|record| record.0.as_str());
+ let character_code = integer_info_record.map(|record| record.character_code);
+ match get_encoding(encoding, character_code) {
+ Ok(encoding) => Ok(encoding),
+ Err(err @ EncodingError::Ebcdic) => Err(Error::EncodingError(err)),
+ Err(err) => {
+ warn(Warning::EncodingError(err));
+ // Warn that we're using the default encoding.
+ Ok(default_encoding())
+ }
+ }
+}
+
// If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
// decoded as Latin-1 (actually bytes interpreted as Unicode code points).
fn default_decode(s: &[u8]) -> Cow<str> {
}
impl Decoder {
+ pub fn new<F>(encoding: &'static Encoding, warn: F) -> Self
+ where
+ F: Fn(Warning) + 'static,
+ {
+ Self {
+ encoding,
+ warn: Box::new(warn),
+ }
+ }
fn warn(&self, warning: Warning) {
(self.warn)(warning)
}
&self.header,
)
}
-}
-
-impl<R> Iterator for Reader<R>
-where
- R: Read + Seek + 'static,
-{
- type Item = Result<Record, Error>;
-
- fn next(&mut self) -> Option<Self::Item> {
+ fn _next(&mut self) -> Option<<Self as Iterator>::Item> {
match self.state {
ReaderState::Start => {
self.state = ReaderState::Headers;
}
}
+impl<R> Iterator for Reader<R>
+where
+ R: Read + Seek + 'static,
+{
+ type Item = Result<Record, Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let retval = self._next();
+ if matches!(retval, Some(Err(_))) {
+ self.state = ReaderState::End;
+ }
+ retval
+ }
+}
+
trait ReadSeek: Read + Seek {}
impl<T> ReadSeek for T where T: Read + Seek {}
};
Ok(Self { values, range })
}
- fn decode<'a>(&'a self, decoder: &Decoder) -> MissingValues<String> {
+ fn decode(&self, decoder: &Decoder) -> MissingValues<String> {
MissingValues {
values: self
.values
}))
}
- pub fn decode<'a>(&'a self, decoder: &Decoder) -> DecodedRecord {
+ pub fn decode(&self, decoder: &Decoder) -> DecodedRecord {
DecodedRecord::Variable(VariableRecord {
offsets: self.offsets.clone(),
width: self.width,
r: &mut R,
endian: Endian,
var_types: &[VarType],
- warn: &Box<dyn Fn(Warning)>,
+ warn: &dyn Fn(Warning),
) -> Result<Option<Record>, Error> {
let label_offset = r.stream_position()?;
let n: u32 = endian.parse(read_bytes(r)?);
.labels
.iter()
.map(|ValueLabel { value, label }| ValueLabel {
- value: value.clone(),
+ value: *value,
label: decoder.decode(label),
})
.collect();
}
}
- pub fn decode<'a>(&'a self, decoder: &Decoder) -> DecodedRecord {
+ pub fn decode(&self, decoder: &Decoder) -> DecodedRecord {
DecodedRecord::Document(DocumentRecord {
offsets: self.offsets.clone(),
lines: self
for short_name in self.short_names.iter() {
if let Some(short_name) = decoder
.decode_identifier(short_name)
- .map_err(|err| Warning::InvalidMrSetName(err))
+ .map_err(Warning::InvalidMrSetName)
.issue_warning(&decoder.warn)
{
short_names.push(short_name);
Ok(MultipleResponseSet {
name: decoder
.decode_identifier(&self.name)
- .map_err(|err| Warning::InvalidMrSetVariableName(err))?,
+ .map_err(Warning::InvalidMrSetVariableName)?,
label: decoder.decode(&self.label),
mr_type: self.mr_type.clone(),
- short_names: short_names,
+ short_names,
})
}
}
}
impl MultipleResponseRecord<RawString, RawString> {
- fn decode<'a>(&'a self, decoder: &Decoder) -> DecodedRecord {
+ fn decode(&self, decoder: &Decoder) -> DecodedRecord {
let mut sets = Vec::new();
for set in self.0.iter() {
if let Some(set) = set.decode(decoder).issue_warning(&decoder.warn) {
ext: &Extension,
n_vars: usize,
endian: Endian,
- warn: &Box<dyn Fn(Warning)>,
+ warn: &dyn Fn(Warning),
) -> Result<Record, Warning> {
if ext.size != 4 {
return Err(Warning::BadRecordSize {
}
impl LongStringMissingValues<RawString, RawStr<8>> {
- fn decode<'a>(
+ fn decode(
&self,
decoder: &Decoder,
) -> Result<LongStringMissingValues<Identifier, String>, IdError> {
}
impl LongStringMissingValueRecord<RawString, RawStr<8>> {
- pub fn decode<'a>(
- &self,
- decoder: &Decoder,
- ) -> LongStringMissingValueRecord<Identifier, String> {
+ pub fn decode(&self, decoder: &Decoder) -> LongStringMissingValueRecord<Identifier, String> {
let mut mvs = Vec::with_capacity(self.0.len());
for mv in self.0.iter() {
if let Some(mv) = mv
.decode(decoder)
- .map_err(|err| Warning::InvalidLongStringMissingValueVariableName(err))
+ .map_err(Warning::InvalidLongStringMissingValueVariableName)
.issue_warning(&decoder.warn)
{
mvs.push(mv);
}
}
-#[derive(Copy, Clone, Debug)]
+#[derive(Clone, Debug)]
pub struct NumberOfCasesRecord {
/// Always observed as 1.
pub one: u64,
text: extension.data.into(),
}
}
- pub fn decode<'a>(&self, decoder: &Decoder) -> DecodedRecord {
+ pub fn decode(&self, decoder: &Decoder) -> DecodedRecord {
match self.rec_type {
TextRecordType::VariableSets => {
DecodedRecord::VariableSets(VariableSetRecord::decode(self, decoder))
}
}
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
pub struct AttributeSet(pub HashMap<Identifier, Vec<String>>);
impl AttributeSet {
}
}
-impl Default for AttributeSet {
- fn default() -> Self {
- Self(HashMap::default())
- }
-}
-
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
pub struct FileAttributeRecord(AttributeSet);
impl FileAttributeRecord {
}
}
-impl Default for FileAttributeRecord {
- fn default() -> Self {
- Self(AttributeSet::default())
- }
-}
-
#[derive(Clone, Debug)]
pub struct VarAttributeSet {
pub long_var_name: Identifier,
let mut var_attribute_sets = Vec::new();
while !input.is_empty() {
let Some((var_attribute, rest)) =
- VarAttributeSet::parse(decoder, &input).issue_warning(&decoder.warn)
+ VarAttributeSet::parse(decoder, input).issue_warning(&decoder.warn)
else {
break;
};
var_attribute_sets.push(var_attribute);
- input = rest.into();
+ input = rest;
}
VariableAttributeRecord(var_attribute_sets)
}
r: &mut R,
endian: Endian,
n_vars: usize,
- warn: &Box<dyn Fn(Warning)>,
+ warn: &dyn Fn(Warning),
) -> Result<Option<Record>, Error> {
let subtype = endian.parse(read_bytes(r)?);
let header_offset = r.stream_position()?;
let mut labels = Vec::with_capacity(self.labels.len());
for (value, label) in self.labels.iter() {
let value = decoder.decode_exact_length(&value.0);
- let label = decoder.decode(&label);
+ let label = decoder.decode(label);
labels.push((value, label));
}