use encoding_rs::Encoding;
use crate::{
- format::Spec,
- identifier::{Identifier, Error as IdError},
- {endian::Endian, CategoryLabels, Compression},
+ format::{Spec, UncheckedSpec, Width},
+ identifier::{Error as IdError, Identifier},
+ {endian::Endian, CategoryLabels, Compression}, raw,
};
use thiserror::Error as ThisError;
}
impl Decoder {
- fn take_name(&mut self, id: Identifier) -> bool {
- self.var_names.insert(id)
+ fn take_name(&mut self, id: &Identifier) -> bool {
+ self.var_names.insert(id.clone())
}
fn generate_name(&mut self) -> Identifier {
loop {
self.n_generated_names += 1;
- let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding).unwrap();
- if self.take_name(name.clone()) {
+ let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
+ .unwrap();
+ if self.take_name(&name) {
return name;
}
assert!(self.n_generated_names < usize::MAX);
type Input = crate::raw::Header;
fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
- let eye_catcher = decoder.decode_string(&input.eye_catcher, &warn);
- let file_label = decoder.decode_string(&input.file_label, &warn);
- let creation_date = decoder.decode_string(&input.creation_date, &warn);
+ let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
+ let file_label = decoder.decode_string(&input.file_label.0, &warn);
+ let creation_date = decoder.decode_string(&input.creation_date.0, &warn);
let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
warn(Error::InvalidCreationDate {
creation_date: creation_date.into(),
});
Default::default()
});
- let creation_time = decoder.decode_string(&input.creation_time, &warn);
+ let creation_time = decoder.decode_string(&input.creation_time.0, &warn);
let creation_time =
NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
warn(Error::InvalidCreationTime {
}
pub struct Variable {
- pub width: i32,
+ pub width: Width,
pub name: Identifier,
pub print_format: Spec,
pub write_format: Spec,
+ //pub missing_values: MissingValues,
+ pub label: Option<String>,
+}
+
+fn decode_format(raw: raw::Spec, name: &str, width: Width) -> Spec {
+ UncheckedSpec::try_from(raw)
+ .and_then(Spec::try_from)
+ .and_then(|x| x.check_width_compatibility(Some(name), width))
+ .unwrap_or_else(|_warning| {
+ /*warn(warning);*/
+ Spec::default_for_width(width)
+ })
}
fn decode_var(
})
}
};
- let name = decoder.decode_string(&input.name, &warn);
+ let width = input.width as Width;
+ let name = decoder.decode_string(&input.name.0, &warn);
let name = match Identifier::new(&name, decoder.encoding) {
Ok(name) => {
- if !decoder.take_name(name) {
+ if !decoder.take_name(&name) {
decoder.generate_name()
} else {
name
decoder.generate_name()
}
};
+ let print_format = decode_format(input.print_format, &name.0, width);
+ let write_format = decode_format(input.write_format, &name.0, width);
+ let label = input.label.as_ref().map(|label| decoder.decode_string(&label.0, &warn).into());
+ Ok(Some(Variable { width, name, print_format, write_format, label }))
}
#[derive(Clone)]
input
.lines
.iter()
- .map(|s| decoder.decode_string(s, &warn).into())
+ .map(|s| decoder.decode_string(&s.0, &warn).into())
.collect(),
))
}
use thiserror::Error as ThisError;
-use crate::raw::VarType;
+use crate::raw::{VarType, self};
#[derive(ThisError, Debug)]
pub enum Error {
pub const MAX_STRING: Width = 32767;
-type Width = u16;
-type SignedWidth = i16;
+pub type Width = u16;
+pub type SignedWidth = i16;
-type Decimals = u8;
+pub type Decimals = u8;
impl Format {
pub fn max_width(self) -> Width {
self.d
}
+ pub fn default_for_width(w: Width) -> Self {
+ match w {
+ 0 => Spec { format: Format::F, w: 8, d: 2 },
+ _ => Spec { format: Format::A, w: w, d: 0 },
+ }
+ }
+
pub fn fixed_from(source: &UncheckedSpec) -> Self {
let UncheckedSpec { format, w, d } = *source;
let (min, max) = format.width_range().into_inner();
self.format.var_type()
}
- pub fn check_width_compatibility(self, variable: Option<&str>, w: Width) -> Result<(), Error> {
+ pub fn check_width_compatibility(self, variable: Option<&str>, w: Width) -> Result<Self, Error> {
self.format.check_type_compatibility(variable, self.var_type())?;
let expected_width = self.var_width();
if w != expected_width {
})
}
} else {
- Ok(())
+ Ok(self)
}
}
}
pub d: Decimals,
}
-impl TryFrom<u32> for UncheckedSpec {
+impl TryFrom<raw::Spec> for UncheckedSpec {
type Error = Error;
- fn try_from(source: u32) -> Result<Self, Self::Error> {
- let raw_format = (source >> 16) as u16;
+ fn try_from(raw: raw::Spec) -> Result<Self, Self::Error> {
+ let raw = raw.0;
+ let raw_format = (raw >> 16) as u16;
let format = raw_format.try_into()?;
- let w = ((source >> 8) & 0xff) as Width;
- let d = (source & 0xff) as Decimals;
+ let w = ((raw >> 8) & 0xff) as Width;
+ let d = (raw & 0xff) as Decimals;
Ok(Self { format, w, d })
}
}
BadVariableWidth { offset: u64, width: i32 },
#[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
- BadDocumentLength { offset: u64, n: u32, max: u32 },
+ BadDocumentLength { offset: u64, n: usize, max: usize },
#[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
BadRecordType { offset: u64, rec_type: u32 },
/// Eye-catcher string, product name, in the file's encoding. Padded
/// on the right with spaces.
- pub eye_catcher: [u8; 60],
+ pub eye_catcher: UnencodedStr<60>,
/// Layout code, normally either 2 or 3.
pub layout_code: u32,
pub bias: f64,
/// `dd mmm yy` in the file's encoding.
- pub creation_date: [u8; 9],
+ pub creation_date: UnencodedStr<9>,
/// `HH:MM:SS` in the file's encoding.
- pub creation_time: [u8; 8],
+ pub creation_time: UnencodedStr<8>,
/// File label, in the file's encoding. Padded on the right with spaces.
- pub file_label: [u8; 64],
+ pub file_label: UnencodedStr<64>,
/// Endianness of the data in the file header.
pub endian: Endian,
fn fmt(&self, f: &mut Formatter) -> FmtResult {
writeln!(f, "File header record:")?;
self.debug_field(f, "Magic", self.magic)?;
- self.debug_field(f, "Product name", FallbackEncoding(&self.eye_catcher))?;
+ self.debug_field(f, "Product name", &self.eye_catcher)?;
self.debug_field(f, "Layout code", self.layout_code)?;
self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
self.debug_field(f, "Compression", self.compression)?;
self.debug_field(f, "Weight index", self.weight_index)?;
self.debug_field(f, "Number of cases", self.n_cases)?;
self.debug_field(f, "Compression bias", self.bias)?;
- self.debug_field(f, "Creation date", FallbackEncoding(&self.creation_date))?;
- self.debug_field(f, "Creation time", FallbackEncoding(&self.creation_time))?;
- self.debug_field(f, "File label", FallbackEncoding(&self.file_label))?;
+ self.debug_field(f, "Creation date", &self.creation_date)?;
+ self.debug_field(f, "Creation time", &self.creation_time)?;
+ self.debug_field(f, "File label", &self.file_label)?;
self.debug_field(f, "Endianness", self.endian)
}
}
let magic: [u8; 4] = read_bytes(r)?;
let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
- let eye_catcher: [u8; 60] = read_bytes(r)?;
+ let eye_catcher = UnencodedStr::<60>(read_bytes(r)?);
let layout_code: [u8; 4] = read_bytes(r)?;
let endian = Endian::identify_u32(2, layout_code)
.or_else(|| Endian::identify_u32(2, layout_code))
let bias: f64 = endian.parse(read_bytes(r)?);
- let creation_date: [u8; 9] = read_bytes(r)?;
- let creation_time: [u8; 8] = read_bytes(r)?;
- let file_label: [u8; 64] = read_bytes(r)?;
+ let creation_date = UnencodedStr::<9>(read_bytes(r)?);
+ let creation_time = UnencodedStr::<8>(read_bytes(r)?);
+ let file_label = UnencodedStr::<64>(read_bytes(r)?);
let _: [u8; 3] = read_bytes(r)?;
Ok(Header {
#[derive(Copy, Clone)]
pub enum Value {
Number(Option<f64>),
- String([u8; 8]),
+ String(UnencodedStr<8>),
}
impl Debug for Value {
match self {
Value::Number(Some(number)) => write!(f, "{number:?}"),
Value::Number(None) => write!(f, "SYSMIS"),
- Value::String(bytes) => write!(f, "{:?}", FallbackEncoding(bytes)),
+ Value::String(bytes) => write!(f, "{:?}", bytes),
}
}
}
pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
match var_type {
- VarType::String => Value::String(raw),
+ VarType::String => Value::String(UnencodedStr(raw)),
VarType::Number => {
let number: f64 = endian.parse(raw);
Value::Number((number != -f64::MAX).then_some(number))
1..=251 => match var_type {
VarType::Number => break Value::Number(Some(code as f64 - bias)),
VarType::String => {
- break Value::String(endian.to_bytes(code as f64 - bias))
+ break Value::String(UnencodedStr(endian.to_bytes(code as f64 - bias)))
}
},
252 => {
}
253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
254 => match var_type {
- VarType::String => break Value::String(*b" "), // XXX EBCDIC
+ VarType::String => break Value::String(UnencodedStr(*b" ")), // XXX EBCDIC
VarType::Number => {
return Err(Error::CompressedStringExpected {
offset: case_start,
impl FusedIterator for Reader {}
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
-pub struct Format(pub u32);
+pub struct Spec(pub u32);
-impl Debug for Format {
+impl Debug for Spec {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
let type_ = format_name(self.0 >> 16);
let w = (self.0 >> 8) & 0xff;
pub width: i32,
/// Variable name, padded on the right with spaces.
- pub name: [u8; 8],
+ pub name: UnencodedStr<8>,
/// Print format.
- pub print_format: Format,
+ pub print_format: Spec,
/// Write format.
- pub write_format: Format,
+ pub write_format: Spec,
/// Missing values.
pub missing_values: MissingValues,
)?;
writeln!(f, "Print format: {:?}", self.print_format)?;
writeln!(f, "Write format: {:?}", self.write_format)?;
- writeln!(f, "Name: {:?}", FallbackEncoding(&self.name))?;
+ writeln!(f, "Name: {:?}", &self.name)?;
writeln!(f, "Variable label: {:?}", self.label)?;
writeln!(f, "Missing values: {:?}", self.missing_values)
}
let width: i32 = endian.parse(read_bytes(r)?);
let has_variable_label: u32 = endian.parse(read_bytes(r)?);
let missing_value_code: i32 = endian.parse(read_bytes(r)?);
- let print_format = Format(endian.parse(read_bytes(r)?));
- let write_format = Format(endian.parse(read_bytes(r)?));
- let name: [u8; 8] = read_bytes(r)?;
+ let print_format = Spec(endian.parse(read_bytes(r)?));
+ let write_format = Spec(endian.parse(read_bytes(r)?));
+ let name = UnencodedStr::<8>(read_bytes(r)?);
let label = match has_variable_label {
0 => None,
}
#[derive(Clone)]
-pub struct UnencodedString(Vec<u8>);
+pub struct UnencodedString(pub Vec<u8>);
impl From<Vec<u8>> for UnencodedString {
fn from(source: Vec<u8>) -> Self {
}
}
+#[derive(Copy, Clone)]
+pub struct UnencodedStr<const N: usize>(pub [u8; N]);
+
+impl<const N: usize> From<[u8; N]> for UnencodedStr<N> {
+ fn from(source: [u8; N]) -> Self {
+ Self(source)
+ }
+}
+
+impl<const N: usize> Debug for UnencodedStr<N> {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ write!(f, "{:?}", FallbackEncoding(&self.0))
+ }
+}
+
#[derive(Clone)]
pub struct ValueLabel {
/// Offset from the start of the file to the start of the record.
pub pos: u64,
/// The document, as an array of 80-byte lines.
- pub lines: Vec<[u8; Document::LINE_LEN as usize]>,
+ pub lines: Vec<DocumentLine>
}
+pub type DocumentLine = UnencodedStr<{Document::LINE_LEN}>;
+
impl Document {
/// Length of a line in a document. Document lines are fixed-length and
/// padded on the right with spaces.
- pub const LINE_LEN: u32 = 80;
+ pub const LINE_LEN: usize = 80;
/// Maximum number of lines we will accept in a document. This is simply
/// the maximum number that will fit in a 32-bit space.
- pub const MAX_LINES: u32 = i32::MAX as u32 / Self::LINE_LEN;
+ pub const MAX_LINES: usize = i32::MAX as usize / Self::LINE_LEN;
fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Document, Error> {
let offset = r.stream_position()?;
let n: u32 = endian.parse(read_bytes(r)?);
- match n {
- 0..=Self::MAX_LINES => Ok(Document {
- pos: r.stream_position()?,
- lines: (0..n)
- .map(|_| read_bytes(r))
- .collect::<Result<Vec<_>, _>>()?,
- }),
- _ => Err(Error::BadDocumentLength {
+ let n = n as usize;
+ if n > Self::MAX_LINES {
+ Err(Error::BadDocumentLength {
offset,
n,
max: Self::MAX_LINES,
- }),
+ })
+ } else {
+ let pos = r.stream_position()?;
+ let mut lines = Vec::with_capacity(n);
+ for _ in 0..n {
+ lines.push(UnencodedStr::<{Document::LINE_LEN}>(read_bytes(r)?));
+ }
+ Ok(Document { pos, lines })
}
}
}
} else {
value
};
- values.push(Value::String(value));
+ values.push(Value::String(UnencodedStr(value)));
}
let missing_values = MissingValues {
values,