mut headers: Headers,
encoding: &'static Encoding,
mut warn: impl FnMut(Error),
-) -> Result<(Dictionary, Metadata), Error> {
+) -> Result<(Dictionary, Metadata, Rc<RefCell<Cases>>), Error> {
let mut dictionary = Dictionary::new(encoding);
let file_label = fix_line_ends(headers.header.file_label.trim_end_matches(' '));
}
let metadata = Metadata::decode(&headers, warn);
- Ok((dictionary, metadata))
+ Ok((dictionary, metadata, headers.cases.take().unwrap()))
}
impl MultipleResponseSet {
use flate2::read::ZlibDecoder;
use itertools::Itertools;
use num::Integer;
+use smallvec::SmallVec;
use std::{
borrow::{Borrow, Cow},
cell::RefCell,
collections::{BTreeMap, VecDeque},
fmt::{Debug, Display, Formatter, Result as FmtResult},
io::{Error as IoError, Read, Seek, SeekFrom},
+ iter::repeat_n,
mem::take,
num::NonZeroU8,
- ops::{Deref, Range},
+ ops::{Deref, Not, Range},
rc::Rc,
str::from_utf8,
};
#[error("At offset {offset:#x}, at least one valid variable index for value labels is required but none were specified.")]
NoVarIndexes { offset: u64 },
- #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", var_type.opposite())]
+ #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", !var_type)]
MixedVarTypes {
offset: u64,
var_type: VarType,
String,
}
-impl VarType {
- pub fn opposite(self) -> VarType {
+impl Not for VarType {
+ type Output = Self;
+
+ fn not(self) -> Self::Output {
match self {
Self::Numeric => Self::String,
Self::String => Self::Numeric,
}
}
+impl Not for &VarType {
+ type Output = VarType;
+
+ fn not(self) -> Self::Output {
+ !*self
+ }
+}
+
impl Display for VarType {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
match self {
trait ReadSeek: Read + Seek {}
impl<T> ReadSeek for T where T: Read + Seek {}
+pub struct Case(pub Vec<Datum>);
+
+struct StringSegment {
+ data_bytes: usize,
+ padding_bytes: usize,
+}
+
+fn segment_widths(width: usize) -> impl Iterator<Item = usize> {
+ let n_segments = width.div_ceil(252);
+ repeat_n(255, n_segments - 1).chain(if n_segments > 1 {
+ std::iter::once(width - (n_segments - 1) * 252)
+ } else {
+ std::iter::once(width)
+ })
+}
+
+enum CaseVar {
+ Numeric,
+ String {
+ width: usize,
+ encoding: SmallVec<[StringSegment; 1]>,
+ },
+}
+
+impl CaseVar {
+ fn new(width: VarWidth) -> Self {
+ match width {
+ VarWidth::Numeric => Self::Numeric,
+ VarWidth::String(width) => {
+ let width = width as usize;
+ let mut segments = SmallVec::<[StringSegment; 1]>::new();
+ let mut remaining = width;
+ for segment in segment_widths(width) {
+ let data_bytes = remaining.min(255);
+ let padding_bytes = data_bytes.next_multiple_of(8) - data_bytes;
+ segments.push(StringSegment {
+ data_bytes,
+ padding_bytes,
+ });
+ remaining -= data_bytes;
+ }
+ }
+ }
+ }
+}
+
pub struct Cases {
reader: Box<dyn ReadSeek>,
- var_types: VarTypes,
+ vars: Vec<CaseVar>,
compression: Option<Compression>,
bias: f64,
endian: Endian,
}
impl Iterator for Cases {
- type Item = Result<Vec<RawDatum>, Error>;
+ type Item = Result<Vec<Datum>, Error>;
fn next(&mut self) -> Option<Self::Item> {
if self.eof {
let mut errors = Vec::new();
let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap();
- let (dictionary, metadata) = decode(headers, encoding, |e| errors.push(e)).unwrap();
+ let (dictionary, metadata, cases) =
+ decode(headers, encoding, |e| errors.push(e)).unwrap();
let (group, data) = metadata.to_pivot_rows();
let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))])
.with_data(