use flate2::read::ZlibDecoder;
use num::Integer;
+use std::borrow::Cow;
+use std::fmt::{Debug, Formatter, Result as FmtResult};
+use std::str::from_utf8;
use std::{
collections::VecDeque,
io::{Error as IoError, Read, Seek, SeekFrom},
ZLib,
}
+#[derive(Clone, Debug)]
pub enum Record {
Header(Header),
Document(Document),
}
}
+pub struct FallbackEncoding<'a>(&'a [u8]);
+
+fn fallback_encode<'a>(s: &'a [u8]) -> Cow<'a, str> {
+ if let Ok(s) = from_utf8(s) {
+ s.into()
+ } else {
+ let s: String = s
+ .iter()
+ .map(|c| char::from(*c))
+ .collect();
+ s.into()
+ }
+}
+
+impl<'a> Debug for FallbackEncoding<'a> {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ if let Ok(s) = from_utf8(self.0) {
+ let s = s.trim_end();
+ write!(f, "\"{s}\"")
+ } else {
+ let s: String = self
+ .0
+ .iter()
+ .map(|c| char::from(*c).escape_default())
+ .flatten()
+ .collect();
+ let s = s.trim_end();
+ write!(f, "\"{s}\"")
+ }
+ }
+}
+
+#[derive(Clone)]
pub struct Header {
/// Magic number.
pub magic: Magic,
pub endian: Endian,
}
+impl Header {
+ fn debug_field<T: Debug>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult {
+ writeln!(f, "{name:>17}: {:?}", value)
+ }
+}
+
+impl Debug for Header {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ writeln!(f, "File header record:")?;
+ self.debug_field(f, "Magic", self.magic)?;
+ self.debug_field(f, "Product name", FallbackEncoding(&self.eye_catcher))?;
+ self.debug_field(f, "Layout code", self.layout_code)?;
+ self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
+ self.debug_field(f, "Compression", self.compression)?;
+ self.debug_field(f, "Weight index", self.weight_index)?;
+ self.debug_field(f, "Number of cases", self.n_cases)?;
+ self.debug_field(f, "Compression bias", self.bias)?;
+ self.debug_field(f, "Creation date", FallbackEncoding(&self.creation_date))?;
+ self.debug_field(f, "Creation time", FallbackEncoding(&self.creation_time))?;
+ self.debug_field(f, "File label", FallbackEncoding(&self.file_label))?;
+ self.debug_field(f, "Endianness", self.endian)
+ }
+}
+
impl Header {
fn read<R: Read>(r: &mut R) -> Result<Header, Error> {
let magic: [u8; 4] = read_bytes(r)?;
};
let weight_index: u32 = endian.parse(read_bytes(r)?);
- let weight_index = (weight_index > 0).then_some(weight_index - 1);
+ let weight_index = (weight_index > 0).then(|| weight_index - 1);
let n_cases: u32 = endian.parse(read_bytes(r)?);
let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]);
}
+impl Debug for Magic {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ let s = match self {
+ &Magic::SAV => "$FL2",
+ &Magic::ZSAV => "$FL3",
+ &Magic::EBCDIC => "($FL2 in EBCDIC)",
+ _ => return write!(f, "{:?}", self.0),
+ };
+ write!(f, "{s}")
+ }
+}
+
impl TryFrom<[u8; 4]> for Magic {
type Error = Error;
String([u8; 8]),
}
+impl Debug for Value {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ match self {
+ Value::Number(Some(number)) => write!(f, "{number:?}"),
+ Value::Number(None) => write!(f, "SYSMIS"),
+ Value::String(bytes) => write!(f, "{:?}", FallbackEncoding(bytes)),
+ }
+ }
+}
+
impl Value {
+ fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Value, IoError> {
+ Ok(Self::from_raw(var_type, read_bytes(r)?, endian))
+ }
+
pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
match var_type {
VarType::String => Value::String(raw),
impl FusedIterator for Reader {}
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Format(pub u32);
+
+impl Debug for Format {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ let type_ = format_name(self.0 >> 16);
+ let w = (self.0 >> 8) & 0xff;
+ let d = self.0 & 0xff;
+ write!(f, "{:06x} ({type_}{w}.{d})", self.0)
+ }
+}
+
+fn format_name(type_: u32) -> &'static str {
+ match type_ {
+ 1 => "A",
+ 2 => "AHEX",
+ 3 => "COMMA",
+ 4 => "DOLLAR",
+ 5 => "F",
+ 6 => "IB",
+ 7 => "PIBHEX",
+ 8 => "P",
+ 9 => "PIB",
+ 10 => "PK",
+ 11 => "RB",
+ 12 => "RBHEX",
+ 15 => "Z",
+ 16 => "N",
+ 17 => "E",
+ 20 => "DATE",
+ 21 => "TIME",
+ 22 => "DATETIME",
+ 23 => "ADATE",
+ 24 => "JDATE",
+ 25 => "DTIME",
+ 26 => "WKDAY",
+ 27 => "MONTH",
+ 28 => "MOYR",
+ 29 => "QYR",
+ 30 => "WKYR",
+ 31 => "PCT",
+ 32 => "DOT",
+ 33 => "CCA",
+ 34 => "CCB",
+ 35 => "CCC",
+ 36 => "CCD",
+ 37 => "CCE",
+ 38 => "EDATE",
+ 39 => "SDATE",
+ 40 => "MTIME",
+ 41 => "YMDHMS",
+ _ => "(unknown)",
+ }
+}
+
+#[derive(Clone)]
+pub struct MissingValues {
+ /// Individual missing values, up to 3 of them.
+ pub values: Vec<Value>,
+
+ /// Optional range of missing values.
+ pub range: Option<(Value, Value)>,
+}
+
+impl Debug for MissingValues {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ for (i, value) in self.values.iter().enumerate() {
+ if i > 0 {
+ write!(f, ", ")?;
+ }
+ write!(f, "{value:?}")?;
+ }
+
+ if let Some((low, high)) = self.range {
+ if !self.values.is_empty() {
+ write!(f, ", ")?;
+ }
+ write!(f, "{low:?} THRU {high:?}")?;
+ }
+
+ if self.is_empty() {
+ write!(f, "none")?;
+ }
+
+ Ok(())
+ }
+}
+
+impl MissingValues {
+ fn is_empty(&self) -> bool {
+ self.values.is_empty() && self.range.is_none()
+ }
+
+ fn read<R: Read + Seek>(
+ r: &mut R,
+ offset: u64,
+ width: i32,
+ code: i32,
+ endian: Endian,
+ ) -> Result<MissingValues, Error> {
+ let (n_values, has_range) = match (width, code) {
+ (_, 0..=3) => (code, false),
+ (0, -2) => (0, true),
+ (0, -3) => (1, true),
+ (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }),
+ (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
+ };
+
+ let var_type = VarType::from_width(width);
+
+ let mut values = Vec::new();
+ for _ in 0..n_values {
+ values.push(Value::read(r, var_type, endian)?);
+ }
+ let range = if has_range {
+ let low = Value::read(r, var_type, endian)?;
+ let high = Value::read(r, var_type, endian)?;
+ Some((low, high))
+ } else {
+ None
+ };
+ Ok(MissingValues { values, range })
+ }
+}
+
+#[derive(Clone)]
pub struct Variable {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
/// Write format.
pub write_format: u32,
- /// Missing value code, one of -3, -2, 0, 1, 2, or 3.
- pub missing_value_code: i32,
-
- /// Raw missing values, up to 3 of them.
- pub missing: Vec<[u8; 8]>,
+ /// Missing values.
+ pub missing_values: MissingValues,
/// Optional variable label.
- pub label: Option<Vec<u8>>,
+ pub label: Option<UnencodedString>,
+}
+
+impl Debug for Variable {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ writeln!(
+ f,
+ "Width: {} ({})",
+ self.width,
+ if self.width > 0 {
+ "string"
+ } else if self.width == 0 {
+ "numeric"
+ } else {
+ "long string continuation record"
+ }
+ )?;
+ writeln!(f, "Print format: {:?}", Format(self.print_format))?;
+ writeln!(f, "Write format: {:?}", Format(self.write_format))?;
+ writeln!(f, "Name: {:?}", FallbackEncoding(&self.name))?;
+ writeln!(
+ f,
+ "Variable label: {:?}",
+ self.label
+ )?;
+ writeln!(f, "Missing values: {:?}", self.missing_values)
+ }
}
impl Variable {
1 => {
let len: u32 = endian.parse(read_bytes(r)?);
let read_len = len.min(65535) as usize;
- let label = Some(read_vec(r, read_len)?);
+ let label = UnencodedString(read_vec(r, read_len)?);
let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
let _ = read_vec(r, padding_bytes as usize)?;
- label
+ Some(label)
}
_ => {
return Err(Error::BadVariableLabelCode {
}
};
- let mut missing = Vec::new();
- if missing_value_code != 0 {
- match (width, missing_value_code) {
- (0, -3 | -2 | 1 | 2 | 3) => (),
- (0, _) => {
- return Err(Error::BadNumericMissingValueCode {
- offset,
- code: missing_value_code,
- })
- }
- (_, 0..=3) => (),
- (_, _) => {
- return Err(Error::BadStringMissingValueCode {
- offset,
- code: missing_value_code,
- })
- }
- }
-
- for _ in 0..missing_value_code.abs() {
- missing.push(read_bytes(r)?);
- }
- }
+ let missing_values = MissingValues::read(r, offset, width, missing_value_code, endian)?;
Ok(Variable {
offset,
name,
print_format,
write_format,
- missing_value_code,
- missing,
+ missing_values,
label,
})
}
}
+#[derive(Copy, Clone)]
+pub struct UntypedValue(pub [u8; 8]);
+
+impl Debug for UntypedValue {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ let little: f64 = Endian::Little.parse(self.0);
+ let little = format!("{:?}", little);
+ let big: f64 = Endian::Big.parse(self.0);
+ let big = format!("{:?}", big);
+ let number = if little.len() <= big.len() { little } else { big };
+ write!(f, "{number}")?;
+
+ let string = fallback_encode(&self.0);
+ let string = string.split(|c: char| c == '\0' || c.is_control()).next().unwrap();
+ write!(f, "/\"{string}\"")?;
+ Ok(())
+ }
+}
+
+#[derive(Clone)]
+pub struct UnencodedString(Vec<u8>);
+
+impl From<Vec<u8>> for UnencodedString {
+ fn from(source: Vec<u8>) -> Self {
+ Self(source)
+ }
+}
+
+impl From<&[u8]> for UnencodedString {
+ fn from(source: &[u8]) -> Self {
+ Self(source.into())
+ }
+}
+
+impl Debug for UnencodedString {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ write!(f, "{:?}", FallbackEncoding(self.0.as_slice()))
+ }
+}
+
+#[derive(Clone)]
pub struct ValueLabel {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
/// The labels.
- pub labels: Vec<([u8; 8], Vec<u8>)>,
+ pub labels: Vec<(UntypedValue, UnencodedString)>,
+}
+
+impl Debug for ValueLabel {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ for (value, label) in self.labels.iter() {
+ writeln!(f, "{value:?}: {label:?}")?;
+ }
+ Ok(())
+ }
}
impl ValueLabel {
let mut labels = Vec::new();
for _ in 0..n {
- let value: [u8; 8] = read_bytes(r)?;
+ let value = UntypedValue(read_bytes(r)?);
let label_len: u8 = endian.parse(read_bytes(r)?);
let label_len = label_len as usize;
let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
- let mut label = read_vec(r, padded_len)?;
+ let mut label = read_vec(r, padded_len - 1)?;
label.truncate(label_len);
- labels.push((value, label));
+ labels.push((value, UnencodedString(label)));
}
Ok(ValueLabel { offset, labels })
}
}
+#[derive(Clone)]
pub struct VarIndexes {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
pub var_indexes: Vec<u32>,
}
+impl Debug for VarIndexes {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ write!(f, "apply to variables")?;
+ for var_index in self.var_indexes.iter() {
+ write!(f, " #{var_index}")?;
+ }
+ Ok(())
+ }
+}
+
impl VarIndexes {
/// Maximum number of variable indexes in a record.
pub const MAX: u32 = u32::MAX / 8;
}
}
+#[derive(Clone, Debug)]
pub struct Document {
/// Offset from the start of the file to the start of the record.
pub pos: u64,
}
*/
-trait ExtensionRecord where Self: Sized {
+trait TextRecord
+where
+ Self: Sized,
+{
+ const NAME: &'static str;
+ fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
+}
+
+trait ExtensionRecord
+where
+ Self: Sized,
+{
const SIZE: Option<u32>;
const COUNT: Option<u32>;
const NAME: &'static str;
- fn parse(ext: &Extension, endian: Endian) -> Result<Self, Error>;
+ fn parse(ext: &Extension, endian: Endian, warn: impl Fn(Error)) -> Result<Self, Error>;
}
pub struct IntegerInfo {
- version: (i32, i32, i32),
- machine_code: i32,
- floating_point_rep: i32,
- compression_code: i32,
- endianness: i32,
- character_code: i32,
+ pub version: (i32, i32, i32),
+ pub machine_code: i32,
+ pub floating_point_rep: i32,
+ pub compression_code: i32,
+ pub endianness: i32,
+ pub character_code: i32,
}
impl ExtensionRecord for IntegerInfo {
const COUNT: Option<u32> = Some(8);
const NAME: &'static str = "integer record";
- fn parse(ext: &Extension, endian: Endian) -> Result<Self, Error>{
+ fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
ext.check_size::<Self>()?;
let mut input = &ext.data[..];
floating_point_rep: data[4],
compression_code: data[5],
endianness: data[6],
- character_code: data[7]
+ character_code: data[7],
})
}
}
pub struct FloatInfo {
- sysmis: f64,
- highest: f64,
- lowest: f64,
+ pub sysmis: f64,
+ pub highest: f64,
+ pub lowest: f64,
}
impl ExtensionRecord for FloatInfo {
const COUNT: Option<u32> = Some(3);
const NAME: &'static str = "floating point record";
- fn parse(ext: &Extension, endian: Endian) -> Result<Self, Error>{
+ fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
ext.check_size::<Self>()?;
let mut input = &ext.data[..];
}
}
+pub enum CategoryLabels {
+ VarLabels,
+ CountedValues,
+}
+pub enum MultipleResponseType {
+ MultipleDichotomy {
+ value: UnencodedString,
+ labels: CategoryLabels,
+ },
+ MultipleCategory,
+}
+pub struct MultipleResponseSet {
+ pub name: UnencodedString,
+ pub label: UnencodedString,
+ pub mr_type: MultipleResponseType,
+ pub vars: Vec<UnencodedString>,
+}
+
+impl MultipleResponseSet {
+ fn parse(input: &[u8]) -> Result<(MultipleResponseSet, &[u8]), Error> {
+ let Some(equals) = input.iter().position(|&b| b == b'=') else {
+ return Err(Error::TBD);
+ };
+ let (name, input) = input.split_at(equals);
+ let (mr_type, input) = match input.get(0) {
+ Some(b'C') => (MultipleResponseType::MultipleCategory, &input[1..]),
+ Some(b'D') => {
+ let (value, input) = parse_counted_string(&input[1..])?;
+ (
+ MultipleResponseType::MultipleDichotomy {
+ value: value.into(),
+ labels: CategoryLabels::VarLabels,
+ },
+ input,
+ )
+ }
+ Some(b'E') => {
+ let Some(b' ') = input.get(1) else {
+ return Err(Error::TBD);
+ };
+ let input = &input[2..];
+ let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
+ (CategoryLabels::CountedValues, rest)
+ } else if let Some(rest) = input.strip_prefix(b" 11 ") {
+ (CategoryLabels::VarLabels, rest)
+ } else {
+ return Err(Error::TBD);
+ };
+ let (value, input) = parse_counted_string(input)?;
+ (
+ MultipleResponseType::MultipleDichotomy {
+ value: value.into(),
+ labels,
+ },
+ input,
+ )
+ }
+ _ => return Err(Error::TBD),
+ };
+ let Some(b' ') = input.get(0) else {
+ return Err(Error::TBD);
+ };
+ let (label, mut input) = parse_counted_string(&input[1..])?;
+ let mut vars = Vec::new();
+ while input.get(0) == Some(&b' ') {
+ input = &input[1..];
+ let Some(length) = input.iter().position(|b| b" \n".contains(b)) else {
+ return Err(Error::TBD);
+ };
+ if length > 0 {
+ vars.push(input[..length].into());
+ }
+ input = &input[length..];
+ }
+ if input.get(0) != Some(&b'\n') {
+ return Err(Error::TBD);
+ }
+ while input.get(0) == Some(&b'\n') {
+ input = &input[1..];
+ }
+ Ok((
+ MultipleResponseSet {
+ name: name.into(),
+ label: label.into(),
+ mr_type,
+ vars,
+ },
+ input,
+ ))
+ }
+}
+
+pub struct MultipleResponseSets(Vec<MultipleResponseSet>);
+
+impl ExtensionRecord for MultipleResponseSets {
+ const SIZE: Option<u32> = Some(1);
+ const COUNT: Option<u32> = None;
+ const NAME: &'static str = "multiple response set record";
+
+ fn parse(ext: &Extension, _endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ ext.check_size::<Self>()?;
+
+ let mut input = &ext.data[..];
+ let mut sets = Vec::new();
+ while !input.is_empty() {
+ let (set, rest) = MultipleResponseSet::parse(input)?;
+ sets.push(set);
+ input = rest;
+ }
+ Ok(MultipleResponseSets(sets))
+ }
+}
+
+fn parse_counted_string(input: &[u8]) -> Result<(UnencodedString, &[u8]), Error> {
+ let Some(space) = input.iter().position(|&b| b == b' ') else {
+ return Err(Error::TBD);
+ };
+ let Ok(length) = from_utf8(&input[..space]) else {
+ return Err(Error::TBD);
+ };
+ let Ok(length): Result<usize, _> = length.parse() else {
+ return Err(Error::TBD);
+ };
+
+ let input = &input[space + 1..];
+ if input.len() < length {
+ return Err(Error::TBD);
+ };
+
+ let (string, rest) = input.split_at(length);
+ Ok((string.into(), rest))
+}
+
+pub struct ExtraProductInfo(String);
+
+impl TextRecord for ExtraProductInfo {
+ const NAME: &'static str = "extra product info";
+ fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ Ok(ExtraProductInfo(input.into()))
+ }
+}
+
+pub struct VarDisplayRecord(Vec<u32>);
+
+impl ExtensionRecord for VarDisplayRecord {
+ const SIZE: Option<u32> = Some(4);
+ const COUNT: Option<u32> = None;
+ const NAME: &'static str = "variable display record";
+
+ fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ ext.check_size::<Self>()?;
+
+ let mut input = &ext.data[..];
+ let display = (0..ext.count)
+ .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
+ .collect();
+ Ok(VarDisplayRecord(display))
+ }
+}
+
+pub struct VariableSet {
+ pub name: String,
+ pub vars: Vec<String>,
+}
+
+impl VariableSet {
+ fn parse(input: &str) -> Result<Self, Error> {
+ let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
+ let vars = input.split_ascii_whitespace().map(String::from).collect();
+ Ok(VariableSet {
+ name: name.into(),
+ vars,
+ })
+ }
+}
+
+pub struct VariableSetRecord(Vec<VariableSet>);
+
+impl TextRecord for VariableSetRecord {
+ const NAME: &'static str = "variable set";
+ fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let mut sets = Vec::new();
+ for line in input.lines() {
+ match VariableSet::parse(line) {
+ Ok(set) => sets.push(set),
+ Err(error) => warn(error),
+ }
+ }
+ Ok(VariableSetRecord(sets))
+ }
+}
+
+pub struct LongVariableName {
+ pub short_name: String,
+ pub long_name: String,
+}
+
+pub struct LongVariableNameRecord(Vec<LongVariableName>);
+
+impl TextRecord for LongVariableNameRecord {
+ const NAME: &'static str = "long variable names";
+ fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let mut names = Vec::new();
+ for pair in input.split('\t').filter(|s| !s.is_empty()) {
+ if let Some((short_name, long_name)) = pair.split_once('=') {
+ let name = LongVariableName {
+ short_name: short_name.into(),
+ long_name: long_name.into(),
+ };
+ names.push(name);
+ } else {
+ warn(Error::TBD)
+ }
+ }
+ Ok(LongVariableNameRecord(names))
+ }
+}
+
+pub struct VeryLongString {
+ pub short_name: String,
+ pub length: usize,
+}
+
+impl VeryLongString {
+ fn parse(input: &str) -> Result<VeryLongString, Error> {
+ let Some((short_name, length)) = input.split_once('=') else {
+ return Err(Error::TBD);
+ };
+ let length: usize = length.parse().map_err(|_| Error::TBD)?;
+ Ok(VeryLongString {
+ short_name: short_name.into(),
+ length,
+ })
+ }
+}
+
+pub struct VeryLongStringRecord(Vec<VeryLongString>);
+
+impl TextRecord for VeryLongStringRecord {
+ const NAME: &'static str = "very long strings";
+ fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let mut very_long_strings = Vec::new();
+ for tuple in input
+ .split('\0')
+ .map(|s| s.trim_end_matches('\t'))
+ .filter(|s| !s.is_empty())
+ {
+ match VeryLongString::parse(tuple) {
+ Ok(vls) => very_long_strings.push(vls),
+ Err(error) => warn(error),
+ }
+ }
+ Ok(VeryLongStringRecord(very_long_strings))
+ }
+}
+
+pub struct LongStringValueLabels {
+ pub var_name: UnencodedString,
+ pub width: u32,
+
+ /// `(value, label)` pairs, where each value is `width` bytes.
+ pub labels: Vec<(UnencodedString, UnencodedString)>,
+}
+
+pub struct LongStringValueLabelSet(Vec<LongStringValueLabels>);
+
+impl ExtensionRecord for LongStringValueLabelSet {
+ const SIZE: Option<u32> = Some(1);
+ const COUNT: Option<u32> = None;
+ const NAME: &'static str = "long string value labels record";
+
+ fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ ext.check_size::<Self>()?;
+
+ let mut input = &ext.data[..];
+ let mut label_set = Vec::new();
+ while !input.is_empty() {
+ let var_name = read_string(&mut input, endian)?;
+ let width: u32 = endian.parse(read_bytes(&mut input)?);
+ let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
+ let mut labels = Vec::new();
+ for _ in 0..n_labels {
+ let value = read_string(&mut input, endian)?;
+ let label = read_string(&mut input, endian)?;
+ labels.push((value, label));
+ }
+ label_set.push(LongStringValueLabels {
+ var_name,
+ width,
+ labels,
+ })
+ }
+ Ok(LongStringValueLabelSet(label_set))
+ }
+}
+
+pub struct LongStringMissingValues {
+ /// Variable name.
+ pub var_name: UnencodedString,
+
+ /// Missing values.
+ pub missing_values: MissingValues,
+}
+
+pub struct LongStringMissingValueSet(Vec<LongStringMissingValues>);
+
+impl ExtensionRecord for LongStringMissingValueSet {
+ const SIZE: Option<u32> = Some(1);
+ const COUNT: Option<u32> = None;
+ const NAME: &'static str = "long string missing values record";
+
+ fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ ext.check_size::<Self>()?;
+
+ let mut input = &ext.data[..];
+ let mut missing_value_set = Vec::new();
+ while !input.is_empty() {
+ let var_name = read_string(&mut input, endian)?;
+ let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
+ let value_len: u32 = endian.parse(read_bytes(&mut input)?);
+ if value_len != 8 {
+ let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offset;
+ return Err(Error::BadLongMissingValueLength {
+ record_offset: ext.offset,
+ offset,
+ value_len,
+ });
+ }
+ let mut values = Vec::new();
+ for i in 0..n_missing_values {
+ let value: [u8; 8] = read_bytes(&mut input)?;
+ let numeric_value: u64 = endian.parse(value);
+ let value = if i > 0 && numeric_value == 8 {
+ // Tolerate files written by old, buggy versions of PSPP
+ // where we believed that the value_length was repeated
+ // before each missing value.
+ read_bytes(&mut input)?
+ } else {
+ value
+ };
+ values.push(Value::String(value));
+ }
+ let missing_values = MissingValues { values, range: None };
+ missing_value_set.push(LongStringMissingValues {
+ var_name,
+ missing_values
+ });
+ }
+ Ok(LongStringMissingValueSet(missing_value_set))
+ }
+}
+
+pub struct Encoding(pub String);
+
+impl ExtensionRecord for Encoding {
+ const SIZE: Option<u32> = Some(1);
+ const COUNT: Option<u32> = None;
+ const NAME: &'static str = "encoding record";
+
+ fn parse(ext: &Extension, _endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ ext.check_size::<Self>()?;
+
+ Ok(Encoding(String::from_utf8(ext.data.clone()).map_err(
+ |_| Error::BadEncodingName { offset: ext.offset },
+ )?))
+ }
+}
+
+pub struct Attribute {
+ pub name: String,
+ pub values: Vec<String>,
+}
+
+impl Attribute {
+ fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
+ let Some((name, mut input)) = input.split_once('(') else {
+ return Err(Error::TBD);
+ };
+ let mut values = Vec::new();
+ loop {
+ let Some((value, rest)) = input.split_once('\n') else {
+ return Err(Error::TBD);
+ };
+ if let Some(stripped) = value
+ .strip_prefix('\'')
+ .and_then(|value| value.strip_suffix('\''))
+ {
+ values.push(stripped.into());
+ } else {
+ warn(Error::TBD);
+ values.push(value.into());
+ }
+ if let Some(rest) = rest.strip_prefix(')') {
+ return Ok((
+ Attribute {
+ name: name.into(),
+ values,
+ },
+ rest,
+ ));
+ }
+ input = rest;
+ }
+ }
+}
+
+pub struct AttributeSet(pub Vec<Attribute>);
+
+impl AttributeSet {
+ fn parse<'a>(
+ mut input: &'a str,
+ sentinel: Option<char>,
+ warn: &impl Fn(Error),
+ ) -> Result<(AttributeSet, &'a str), Error> {
+ let mut attributes = Vec::new();
+ let rest = loop {
+ match input.chars().next() {
+ None => break input,
+ c if c == sentinel => break &input[1..],
+ _ => {
+ let (attribute, rest) = Attribute::parse(input, &warn)?;
+ attributes.push(attribute);
+ input = rest;
+ }
+ }
+ };
+ Ok((AttributeSet(attributes), rest))
+ }
+}
+
+pub struct FileAttributeRecord(AttributeSet);
+
+impl TextRecord for FileAttributeRecord {
+ const NAME: &'static str = "data file attributes";
+ fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let (set, rest) = AttributeSet::parse(input, None, &warn)?;
+ if !rest.is_empty() {
+ warn(Error::TBD);
+ }
+ Ok(FileAttributeRecord(set))
+ }
+}
+
+pub struct VarAttributeSet {
+ pub long_var_name: String,
+ pub attributes: AttributeSet,
+}
+
+impl VarAttributeSet {
+ fn parse<'a>(
+ input: &'a str,
+ warn: &impl Fn(Error),
+ ) -> Result<(VarAttributeSet, &'a str), Error> {
+ let Some((long_var_name, rest)) = input.split_once(':') else {
+ return Err(Error::TBD);
+ };
+ let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
+ Ok((
+ VarAttributeSet {
+ long_var_name: long_var_name.into(),
+ attributes,
+ },
+ rest,
+ ))
+ }
+}
+
+pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
+
+impl TextRecord for VariableAttributeRecord {
+ const NAME: &'static str = "variable attributes";
+ fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let mut var_attribute_sets = Vec::new();
+ while !input.is_empty() {
+ match VarAttributeSet::parse(input, &warn) {
+ Ok((var_attribute, rest)) => {
+ var_attribute_sets.push(var_attribute);
+ input = rest;
+ }
+ Err(error) => {
+ warn(error);
+ break;
+ }
+ }
+ }
+ Ok(VariableAttributeRecord(var_attribute_sets))
+ }
+}
+
+pub struct NumberOfCasesRecord {
+ /// Always observed as 1.
+ pub one: u64,
+
+ /// Number of cases.
+ pub n_cases: u64,
+}
+
+impl ExtensionRecord for NumberOfCasesRecord {
+ const SIZE: Option<u32> = Some(8);
+ const COUNT: Option<u32> = Some(2);
+ const NAME: &'static str = "extended number of cases record";
+
+ fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+ ext.check_size::<Self>()?;
+
+ let mut input = &ext.data[..];
+ let one = endian.parse(read_bytes(&mut input)?);
+ let n_cases = endian.parse(read_bytes(&mut input)?);
+
+ Ok(NumberOfCasesRecord { one, n_cases })
+ }
+}
+
+#[derive(Clone, Debug)]
pub struct Extension {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
}
}
+#[derive(Clone, Debug)]
pub struct ZHeader {
/// File offset to the start of the record.
pub offset: u64,
}
}
+#[derive(Clone, Debug)]
pub struct ZTrailer {
/// File offset to the start of the record.
pub offset: u64,
pub blocks: Vec<ZBlock>,
}
+#[derive(Clone, Debug)]
pub struct ZBlock {
/// Offset of block of data if simple compression were used.
pub uncompressed_ofs: u64,
r.read_exact(&mut vec)?;
Ok(vec)
}
+
+fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<UnencodedString, IoError> {
+ let length: u32 = endian.parse(read_bytes(r)?);
+ Ok(read_vec(r, length as usize)?.into())
+}