BadRecordType { offset: u64, rec_type: u32 },
#[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")]
- BadVariableLabelCode { start_offset: u64, code_offset: u64, code: u32 },
+ BadVariableLabelCode {
+ start_offset: u64,
+ code_offset: u64,
+ code: u32,
+ },
#[error(
"At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
VarDisplay(VarDisplayRecord),
MultipleResponse(MultipleResponseRecord),
LongStringValueLabels(LongStringValueLabelRecord),
+ LongStringMissingValues(LongStringMissingValueSet),
Encoding(EncodingRecord),
NumberOfCases(NumberOfCasesRecord),
ProductInfo(TextRecord),
// If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
// decoded as Latin-1 (actually bytes interpreted as Unicode code points).
-fn default_decode<>(s: &[u8]) -> Cow<str> {
+fn default_decode(s: &[u8]) -> Cow<str> {
from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
}
}
};
- let missing_values = MissingValues::read(r, start_offset, width, missing_value_code, endian)?;
+ let missing_values =
+ MissingValues::read(r, start_offset, width, missing_value_code, endian)?;
let end_offset = r.stream_position()?;
#[derive(Clone)]
pub struct ValueLabelRecord {
- /// Offset from the start of the file to the start of the value label
- /// record.
- pub label_offset: u64,
+ /// Range of offsets in file.
+ pub offsets: Range<u64>,
/// The labels.
pub labels: Vec<(UntypedValue, UnencodedString)>,
- /// Offset from the start of the file to the start of the variable index
- /// record.
- pub index_offset: u64,
-
/// The 1-based indexes of the variable indexes.
pub dict_indexes: Vec<u32>,
}
}
}
+impl Header for ValueLabelRecord {
+ fn offsets(&self) -> Range<u64> {
+ self.offsets.clone()
+ }
+}
+
impl ValueLabelRecord {
/// Maximum number of value labels in a record.
pub const MAX_LABELS: u32 = u32::MAX / 8;
dict_indexes.push(endian.parse(read_bytes(r)?));
}
+ let end_offset = r.stream_position()?;
Ok(ValueLabelRecord {
- label_offset,
+ offsets: label_offset..end_offset,
labels,
- index_offset,
dict_indexes,
})
}
#[derive(Clone, Debug)]
pub struct DocumentRecord {
- /// Offset from the start of the file to the start of the record.
- pub pos: u64,
+ pub offsets: Range<u64>,
/// The document, as an array of 80-byte lines.
pub lines: Vec<DocumentLine>,
pub const MAX_LINES: usize = i32::MAX as usize / Self::LINE_LEN;
fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<DocumentRecord, Error> {
- let offset = r.stream_position()?;
+ let start_offset = r.stream_position()?;
let n: u32 = endian.parse(read_bytes(r)?);
let n = n as usize;
if n > Self::MAX_LINES {
Err(Error::BadDocumentLength {
- offset,
+ offset: start_offset,
n,
max: Self::MAX_LINES,
})
} else {
- let pos = r.stream_position()?;
let mut lines = Vec::with_capacity(n);
for _ in 0..n {
lines.push(UnencodedStr::<{ DocumentRecord::LINE_LEN }>(read_bytes(r)?));
}
- Ok(DocumentRecord { pos, lines })
+ let end_offset = r.stream_position()?;
+ Ok(DocumentRecord {
+ offsets: start_offset..end_offset,
+ lines,
+ })
}
}
}
-trait ExtensionRecord
-where
- Self: Sized,
-{
+impl Header for DocumentRecord {
+ fn offsets(&self) -> Range<u64> {
+ self.offsets.clone()
+ }
+}
+
+trait ExtensionRecord {
const SUBTYPE: u32;
const SIZE: Option<u32>;
const COUNT: Option<u32>;
const NAME: &'static str;
- fn parse(ext: &Extension, endian: Endian, warn: impl Fn(Error)) -> Result<Self, Error>;
+ fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error>;
}
#[derive(Clone, Debug)]
pub struct IntegerInfoRecord {
+ pub offsets: Range<u64>,
pub version: (i32, i32, i32),
pub machine_code: i32,
pub floating_point_rep: i32,
const COUNT: Option<u32> = Some(8);
const NAME: &'static str = "integer record";
- fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
ext.check_size::<Self>()?;
let mut input = &ext.data[..];
let data: Vec<i32> = (0..8)
.map(|_| endian.parse(read_bytes(&mut input).unwrap()))
.collect();
- Ok(IntegerInfoRecord {
+ Ok(Record::IntegerInfo(IntegerInfoRecord {
+ offsets: ext.offsets.clone(),
version: (data[0], data[1], data[2]),
machine_code: data[3],
floating_point_rep: data[4],
compression_code: data[5],
endianness: data[6],
character_code: data[7],
- })
+ }))
}
}
const COUNT: Option<u32> = Some(3);
const NAME: &'static str = "floating point record";
- fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
ext.check_size::<Self>()?;
let mut input = &ext.data[..];
let data: Vec<f64> = (0..3)
.map(|_| endian.parse(read_bytes(&mut input).unwrap()))
.collect();
- Ok(FloatInfoRecord {
+ Ok(Record::FloatInfo(FloatInfoRecord {
sysmis: data[0],
highest: data[1],
lowest: data[2],
- })
+ }))
}
}
};
let (value, input) = parse_counted_string(input)?;
(
- MultipleResponseType::MultipleDichotomy {
- value,
- labels,
- },
+ MultipleResponseType::MultipleDichotomy { value, labels },
input,
)
}
const COUNT: Option<u32> = None;
const NAME: &'static str = "multiple response set record";
- fn parse(ext: &Extension, _endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Error> {
ext.check_size::<Self>()?;
let mut input = &ext.data[..];
sets.push(set);
input = rest;
}
- Ok(MultipleResponseRecord(sets))
+ Ok(Record::MultipleResponse(MultipleResponseRecord(sets)))
}
}
const COUNT: Option<u32> = None;
const NAME: &'static str = "variable display record";
- fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
ext.check_size::<Self>()?;
let mut input = &ext.data[..];
let display = (0..ext.count)
.map(|_| endian.parse(read_bytes(&mut input).unwrap()))
.collect();
- Ok(VarDisplayRecord(display))
+ Ok(Record::VarDisplay(VarDisplayRecord(display)))
}
}
+#[derive(Clone, Debug)]
pub struct LongStringMissingValues {
/// Variable name.
pub var_name: UnencodedString,
pub missing_values: MissingValues,
}
+#[derive(Clone, Debug)]
pub struct LongStringMissingValueSet(Vec<LongStringMissingValues>);
impl ExtensionRecord for LongStringMissingValueSet {
const COUNT: Option<u32> = None;
const NAME: &'static str = "long string missing values record";
- fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
ext.check_size::<Self>()?;
let mut input = &ext.data[..];
let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
let value_len: u32 = endian.parse(read_bytes(&mut input)?);
if value_len != 8 {
- let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offset;
+ let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
return Err(Error::BadLongMissingValueLength {
- record_offset: ext.offset,
+ record_offset: ext.offsets.start,
offset,
value_len,
});
missing_values,
});
}
- Ok(LongStringMissingValueSet(missing_value_set))
+ Ok(Record::LongStringMissingValues(LongStringMissingValueSet(
+ missing_value_set,
+ )))
}
}
const COUNT: Option<u32> = None;
const NAME: &'static str = "encoding record";
- fn parse(ext: &Extension, _endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Error> {
ext.check_size::<Self>()?;
- Ok(EncodingRecord(
- String::from_utf8(ext.data.clone())
- .map_err(|_| Error::BadEncodingName { offset: ext.offset })?,
- ))
+ Ok(Record::Encoding(EncodingRecord(
+ String::from_utf8(ext.data.clone()).map_err(|_| Error::BadEncodingName {
+ offset: ext.offsets.start,
+ })?,
+ )))
}
}
const COUNT: Option<u32> = Some(2);
const NAME: &'static str = "extended number of cases record";
- fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
ext.check_size::<Self>()?;
let mut input = &ext.data[..];
let one = endian.parse(read_bytes(&mut input)?);
let n_cases = endian.parse(read_bytes(&mut input)?);
- Ok(NumberOfCasesRecord { one, n_cases })
+ Ok(Record::NumberOfCases(NumberOfCasesRecord { one, n_cases }))
}
}
#[derive(Clone, Debug)]
pub struct TextRecord {
- /// Offset from the start of the file to the start of the record.
- pub offset: u64,
+ pub offsets: Range<u64>,
/// The text content of the record.
pub text: UnencodedString,
impl From<Extension> for TextRecord {
fn from(source: Extension) -> Self {
TextRecord {
- offset: source.offset,
+ offsets: source.offsets,
text: source.data.into(),
}
}
#[derive(Clone, Debug)]
pub struct Extension {
- /// Offset from the start of the file to the start of the record.
- pub offset: u64,
+ pub offsets: Range<u64>,
/// Record subtype.
pub subtype: u32,
if let Some(expected_size) = E::SIZE {
if self.size != expected_size {
return Err(Error::BadRecordSize {
- offset: self.offset,
+ offset: self.offsets.start,
record: E::NAME.into(),
size: self.size,
expected_size,
if let Some(expected_count) = E::COUNT {
if self.count != expected_count {
return Err(Error::BadRecordCount {
- offset: self.offset,
+ offset: self.offsets.start,
record: E::NAME.into(),
count: self.count,
expected_count,
fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
let subtype = endian.parse(read_bytes(r)?);
- let offset = r.stream_position()?;
+ let header_offset = r.stream_position()?;
let size: u32 = endian.parse(read_bytes(r)?);
let count = endian.parse(read_bytes(r)?);
let Some(product) = size.checked_mul(count) else {
return Err(Error::ExtensionRecordTooLarge {
- offset,
+ offset: header_offset,
subtype,
size,
count,
});
};
- let offset = r.stream_position()?;
+ let start_offset = r.stream_position()?;
let data = read_vec(r, product as usize)?;
+ let end_offset = start_offset + product as u64;
let extension = Extension {
- offset,
+ offsets: start_offset..end_offset,
subtype,
size,
count,
data,
};
match subtype {
- IntegerInfoRecord::SUBTYPE => Ok(Record::IntegerInfo(IntegerInfoRecord::parse(
- &extension,
- endian,
- |_| (),
- )?)),
- FloatInfoRecord::SUBTYPE => Ok(Record::FloatInfo(FloatInfoRecord::parse(
- &extension,
- endian,
- |_| (),
- )?)),
- VarDisplayRecord::SUBTYPE => Ok(Record::VarDisplay(VarDisplayRecord::parse(
- &extension,
- endian,
- |_| (),
- )?)),
- MultipleResponseRecord::SUBTYPE | 19 => Ok(Record::MultipleResponse(
- MultipleResponseRecord::parse(&extension, endian, |_| ())?,
- )),
- LongStringValueLabelRecord::SUBTYPE => Ok(Record::LongStringValueLabels(
- LongStringValueLabelRecord::parse(&extension, endian, |_| ())?,
- )),
- EncodingRecord::SUBTYPE => Ok(Record::Encoding(EncodingRecord::parse(
- &extension,
- endian,
- |_| (),
- )?)),
- NumberOfCasesRecord::SUBTYPE => Ok(Record::NumberOfCases(NumberOfCasesRecord::parse(
- &extension,
- endian,
- |_| (),
- )?)),
+ IntegerInfoRecord::SUBTYPE => Ok(IntegerInfoRecord::parse(
+ &extension, endian,
+ )?),
+ FloatInfoRecord::SUBTYPE => Ok(FloatInfoRecord::parse(
+ &extension, endian,
+ )?),
+ VarDisplayRecord::SUBTYPE => Ok(VarDisplayRecord::parse(
+ &extension, endian,
+ )?),
+ MultipleResponseRecord::SUBTYPE | 19 => Ok(
+ MultipleResponseRecord::parse(&extension, endian)?,
+ ),
+ LongStringValueLabelRecord::SUBTYPE => Ok(
+ LongStringValueLabelRecord::parse(&extension, endian)?,
+ ),
+ EncodingRecord::SUBTYPE => {
+ Ok(EncodingRecord::parse(&extension, endian)?)
+ }
+ NumberOfCasesRecord::SUBTYPE => Ok(NumberOfCasesRecord::parse(
+ &extension, endian,
+ )?),
5 => Ok(Record::VariableSets(extension.into())),
10 => Ok(Record::ProductInfo(extension.into())),
13 => Ok(Record::LongNames(extension.into())),
const COUNT: Option<u32> = None;
const NAME: &'static str = "long string value labels record";
- fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
ext.check_size::<Self>()?;
let mut input = &ext.data[..];
labels,
})
}
- Ok(LongStringValueLabelRecord(label_set))
+ Ok(Record::LongStringValueLabels(LongStringValueLabelRecord(
+ label_set,
+ )))
}
}