};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::Encoding;
+use indexmap::set::MutableValues;
use num::Integer;
use thiserror::Error as ThisError;
assert_eq!(var_index_map.insert(value_index, dict_index), None);
}
+ if let Some(weight_index) = headers.header.weight_index {
+ if let Some(dict_index) = var_index_map.get(&(weight_index as usize - 1)) {
+ let variable = &dictionary.variables[*dict_index];
+ if variable.is_numeric() {
+ dictionary.weight = Some(*dict_index);
+ } else {
+ warn(Error::TBD);
+ }
+ } else {
+ warn(Error::TBD);
+ }
+ }
+
for record in headers.value_label.drain(..) {
let mut dict_indexes = Vec::with_capacity(record.dict_indexes.len());
- let mut continuation_indexes = Vec::new();
let mut long_string_variables = Vec::new();
for value_index in record.dict_indexes.iter() {
- if let Some(dict_index) = var_index_map.get(&(*value_index as usize - 1)) {
- let variable = &dictionary.variables[*dict_index];
- if variable.width.is_long_string() {
- long_string_variables.push(variable.name.clone());
- } else {
- dict_indexes.push(*dict_index);
- }
+ let Some(dict_index) = var_index_map.get(&(*value_index as usize - 1)) else {
+ unreachable!()
+ };
+ let variable = &dictionary.variables[*dict_index];
+ if variable.width.is_long_string() {
+ long_string_variables.push(variable.name.clone());
} else {
- continuation_indexes.push(*value_index);
+ dict_indexes.push(*dict_index);
}
}
- if !continuation_indexes.is_empty() {
- warn(Error::LongStringContinuationIndexes {
- offset: record.offsets.start,
- indexes: continuation_indexes,
- });
- }
if !long_string_variables.is_empty() {
warn(Error::InvalidLongStringValueLabels {
offsets: record.offsets.clone(),
wrong_types: Vec<u32>,
},
- #[error("At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}]: {invalid:?}")]
+ #[error("At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}] or referred to string continuations: {invalid:?}")]
InvalidVarIndexes {
offset: u64,
max: usize,
fn read<R>(
reader: &mut R,
endian: Endian,
- var_types: &[VarType],
+ var_types: &VarTypes,
warn: &dyn Fn(Warning),
) -> Result<Option<Record>, Error>
where
2 => Ok(Some(VariableRecord::read(reader, endian)?)),
3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
- 7 => Extension::read(reader, endian, var_types.len(), warn),
+ 7 => Extension::read(reader, endian, var_types.n_values(), warn),
999 => Ok(Some(Record::EndOfHeaders(
endian.parse(read_bytes(reader)?),
))),
fn read_case<R: Read + Seek>(
reader: &mut R,
- var_types: &[VarType],
+ var_types: &VarTypes,
endian: Endian,
) -> Result<Option<Vec<Self>>, Error> {
let case_start = reader.stream_position()?;
- let mut values = Vec::with_capacity(var_types.len());
- for (i, &var_type) in var_types.iter().enumerate() {
+ let mut values = Vec::with_capacity(var_types.n_values());
+ for (i, (var_type, _)) in var_types.types.iter().enumerate() {
let Some(raw) = try_read_bytes(reader)? else {
if i == 0 {
return Ok(None);
return Err(Error::EofInCase {
offset,
case_ofs: offset - case_start,
- case_len: var_types.len() * 8,
+ case_len: var_types.n_values() * 8,
});
}
};
- values.push(Value::from_raw(&UntypedValue(raw), var_type, endian));
+ values.push(Value::from_raw(&UntypedValue(raw), *var_type, endian));
}
Ok(Some(values))
}
fn read_compressed_case<R: Read + Seek>(
reader: &mut R,
- var_types: &[VarType],
+ var_types: &VarTypes,
codes: &mut VecDeque<u8>,
endian: Endian,
bias: f64,
) -> Result<Option<Vec<Self>>, Error> {
let case_start = reader.stream_position()?;
- let mut values = Vec::with_capacity(var_types.len());
- for (i, &var_type) in var_types.iter().enumerate() {
+ let mut values = Vec::with_capacity(var_types.n_values());
+ for (i, (var_type, _)) in var_types.types.iter().enumerate() {
let value = loop {
let Some(code) = codes.pop_front() else {
let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
};
match code {
0 => (),
- 1..=251 => match var_type {
+ 1..=251 => match *var_type {
VarType::Numeric => break Self::Number(Some(code as f64 - bias)),
VarType::String => {
break Self::String(RawStr(endian.to_bytes(code as f64 - bias)))
}
}
253 => {
- break Self::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian)
+ break Self::from_raw(&UntypedValue(read_bytes(reader)?), *var_type, endian)
}
- 254 => match var_type {
+ 254 => match *var_type {
VarType::String => break Self::String(RawStr(*b" ")), // XXX EBCDIC
VarType::Numeric => {
return Err(Error::CompressedStringExpected {
})
}
},
- 255 => match var_type {
+ 255 => match *var_type {
VarType::Numeric => break Self::Number(None),
VarType::String => {
return Err(Error::CompressedNumberExpected {
warn: Box<dyn Fn(Warning)>,
header: HeaderRecord<RawString>,
- var_types: Vec<VarType>,
+ var_types: VarTypes,
state: ReaderState,
}
reader: Some(reader),
warn: Box::new(warn),
header,
- var_types: Vec::new(),
+ var_types: VarTypes::new(),
state: ReaderState::Start,
})
}
match Record::read(
self.reader.as_mut().unwrap(),
self.header.endian,
- self.var_types.as_slice(),
+ &self.var_types,
&self.warn,
) {
Ok(Some(record)) => break record,
}
};
match record {
- Record::Variable(VariableRecord { width, .. }) => {
- self.var_types.push(if width == 0 {
- VarType::Numeric
- } else {
- VarType::String
- });
- }
+ Record::Variable(VariableRecord { width, .. }) => self.var_types.push(width),
Record::EndOfHeaders(_) => {
self.state = if let Some(Compression::ZLib) = self.header.compression {
ReaderState::ZlibHeader
pub struct Cases {
reader: Box<dyn ReadSeek>,
- var_types: Vec<VarType>,
+ var_types: VarTypes,
compression: Option<Compression>,
bias: f64,
endian: Endian,
}
impl Cases {
- fn new<R>(reader: R, var_types: Vec<VarType>, header: &HeaderRecord<RawString>) -> Self
+ fn new<R>(reader: R, var_types: VarTypes, header: &HeaderRecord<RawString>) -> Self
where
R: Read + Seek + 'static,
{
fn read<R: Read + Seek>(
r: &mut R,
endian: Endian,
- var_types: &[VarType],
+ var_types: &VarTypes,
warn: &dyn Fn(Warning),
) -> Result<Option<Record>, Error> {
let label_offset = r.stream_position()?;
let index_offset = r.stream_position()?;
let mut dict_indexes = Vec::with_capacity(n as usize);
let mut invalid_indexes = Vec::new();
- let valid_range = 1..=var_types.len();
for _ in 0..n {
let index: u32 = endian.parse(read_bytes(r)?);
- if valid_range.contains(&(index as usize)) {
+ if var_types.is_valid_index(index as usize) {
dict_indexes.push(index);
} else {
invalid_indexes.push(index);
if !invalid_indexes.is_empty() {
warn(Warning::InvalidVarIndexes {
offset: index_offset,
- max: var_types.len(),
+ max: var_types.n_values(),
invalid: invalid_indexes,
});
}
let Some(&first_index) = dict_indexes.first() else {
return Ok(None);
};
- let var_type = var_types[first_index as usize - 1];
+ let var_type = var_types.types[first_index as usize - 1].0;
let mut wrong_type_indexes = Vec::new();
dict_indexes.retain(|&index| {
- if var_types[index as usize - 1] != var_type {
+ if var_types.types[index as usize - 1].0 != var_type {
wrong_type_indexes.push(index);
false
} else {
LongStringValueLabelRecord(labels)
}
}
+
+#[derive(Default)]
+pub struct VarTypes {
+ pub types: Vec<(VarType, usize)>,
+}
+
+impl VarTypes {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ pub fn push(&mut self, width: i32) {
+ let var_type = match width {
+ -1 => return,
+ 0 => VarType::Numeric,
+ 1..=255 => VarType::String,
+ _ => unreachable!(),
+ };
+ let n_values = (width as usize).div_ceil(8).max(1);
+ for i in 0..n_values {
+ self.types.push((var_type, i));
+ }
+ }
+
+ pub fn n_values(&self) -> usize {
+ self.types.len()
+ }
+
+ pub fn is_valid_index(&self, index: usize) -> bool {
+ self.var_type_at(index).is_some()
+ }
+
+ pub fn var_type_at(&self, index: usize) -> Option<VarType> {
+ if index >= 1 && index <= self.types.len() {
+ if let (var_type, 0) = self.types[index - 1] {
+ return Some(var_type);
+ }
+ }
+ None
+ }
+}