fn run(self) -> Result<()> {
let mut reader = Reader::new(BufReader::new(File::open(&self.input)?), Self::warn)?;
- let headers = reader.headers().collect::<Result<Vec<_>, _>>()?;
- let mut decoder = Decoder::with_inferred_encoding(&headers, |w| Self::warn(w))?;
+ let records = reader.records().collect::<Result<Vec<_>, _>>()?;
+ let mut decoder = Decoder::with_inferred_encoding(&records, |w| Self::warn(w))?;
let mut decoded_records = Vec::new();
- for header in headers {
- decoded_records.push(header.decode(&mut decoder)?);
+ for record in records {
+ decoded_records.push(record.decode(&mut decoder));
}
let headers = Headers::new(decoded_records, &mut |e| Self::err(e))?;
let SystemFile {
dictionary, cases, ..
- } = headers.decode(reader.cases(), decoder.encoding, |e| Self::err(e));
+ } = headers.decode(
+ reader.header().clone().decode(&mut decoder),
+ reader.cases(),
+ decoder.encoding,
+ |e| Self::err(e),
+ );
let writer = match self.output {
Some(path) => Box::new(File::create(path)?) as Box<dyn Write>,
None => Box::new(stdout()),
match mode {
Mode::Identify => {
- let Record::Header(header) = reader.headers().next().unwrap()? else {
- unreachable!()
- };
- match header.magic {
+ match reader.header().magic {
Magic::Sav => println!("SPSS System File"),
Magic::Zsav => println!("SPSS System File with Zlib compression"),
Magic::Ebcdic => println!("EBCDIC-encoded SPSS System File"),
return Ok(());
}
Mode::Raw => {
- for header in reader.headers() {
- let header = header?;
+ for record in reader.records() {
+ let header = record?;
println!("{:?}", header);
}
for (_index, case) in (0..max_cases).zip(reader.cases()) {
}
}
Mode::Decoded => {
- let headers: Vec<Record> = reader.headers().collect::<Result<Vec<_>, _>>()?;
+ let records: Vec<Record> = reader.records().collect::<Result<Vec<_>, _>>()?;
let encoding = match encoding {
Some(encoding) => encoding,
- None => infer_encoding(&headers, &mut |e| eprintln!("{e}"))?,
+ None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?,
};
let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}"));
- for header in headers {
+ for header in records {
let header = header.decode(&mut decoder);
println!("{:?}", header);
/*
}
}
Mode::Cooked => {
- let headers: Vec<Record> = reader.headers().collect::<Result<Vec<_>, _>>()?;
+ let records: Vec<Record> = reader.records().collect::<Result<Vec<_>, _>>()?;
let encoding = match encoding {
Some(encoding) => encoding,
- None => infer_encoding(&headers, &mut |e| eprintln!("{e}"))?,
+ None => infer_encoding(&records, &mut |e| eprintln!("{e}"))?,
};
let mut decoder = Decoder::new(encoding, |e| eprintln!("{e}"));
let mut decoded_records = Vec::new();
- for header in headers {
- decoded_records.push(header.decode(&mut decoder)?);
+ for record in records {
+ decoded_records.push(record.decode(&mut decoder));
}
let headers = Headers::new(decoded_records, &mut |e| eprintln!("{e}"))?;
let SystemFile {
dictionary,
metadata,
cases: _,
- } = headers.decode(reader.cases(), encoding, |e| eprintln!("{e}"));
+ } = headers.decode(
+ reader.header().clone().decode(&mut decoder),
+ reader.cases(),
+ encoding,
+ |e| eprintln!("{e}"),
+ );
println!("{dictionary:#?}");
println!("{metadata:#?}");
}
#[derive(Clone, Debug)]
pub struct Headers {
- pub header: HeaderRecord<String>,
pub variable: Vec<VariableRecord<String>>,
pub value_label: Vec<ValueLabelRecord<RawDatum, String>>,
pub document: Vec<DocumentRecord<String>>,
impl Headers {
pub fn new(
- headers: Vec<raw::DecodedRecord>,
- warn: &mut impl FnMut(Error),
+ records: Vec<raw::DecodedRecord>,
+ mut warn: impl FnMut(Error),
) -> Result<Headers, Error> {
- let mut file_header = Vec::new();
let mut variable = Vec::new();
let mut value_label = Vec::new();
let mut document = Vec::new();
let mut z_header = Vec::new();
let mut z_trailer = Vec::new();
- for header in headers {
- match header {
- DecodedRecord::Header(record) => {
- file_header.push(record);
- }
+ for record in records {
+ match record {
DecodedRecord::Variable(record) => {
variable.push(record);
}
}
}
- let Some(file_header) = take_first(file_header, "file header", warn) else {
- return Err(Error::MissingHeaderRecord);
- };
-
Ok(Headers {
- header: file_header,
variable,
value_label,
document,
- integer_info: take_first(integer_info, "integer info", warn),
- float_info: take_first(float_info, "float info", warn),
- var_display: take_first(var_display, "variable display", warn),
+ integer_info: take_first(integer_info, "integer info", &mut warn),
+ float_info: take_first(float_info, "float info", &mut warn),
+ var_display: take_first(var_display, "variable display", &mut warn),
multiple_response,
long_string_value_labels,
long_string_missing_values,
- encoding: take_first(encoding, "encoding", warn),
- number_of_cases: take_first(number_of_cases, "number of cases", warn),
+ encoding: take_first(encoding, "encoding", &mut warn),
+ number_of_cases: take_first(number_of_cases, "number of cases", &mut warn),
variable_sets,
- product_info: take_first(product_info, "product info", warn),
+ product_info: take_first(product_info, "product info", &mut warn),
long_names,
very_long_strings,
file_attributes,
variable_attributes,
other_extension,
- end_of_headers: take_first(end_of_headers, "end of headers", warn),
- z_header: take_first(z_header, "z_header", warn),
- z_trailer: take_first(z_trailer, "z_trailer", warn),
+ end_of_headers: take_first(end_of_headers, "end of headers", &mut warn),
+ z_header: take_first(z_header, "z_header", &mut warn),
+ z_trailer: take_first(z_trailer, "z_trailer", &mut warn),
})
}
pub fn decode(
mut self,
+ header: HeaderRecord<String>,
mut cases: Cases,
encoding: &'static Encoding,
mut warn: impl FnMut(Error),
) -> SystemFile {
let mut dictionary = Dictionary::new(encoding);
- let file_label = fix_line_ends(self.header.file_label.trim_end_matches(' '));
+ let file_label = fix_line_ends(header.file_label.trim_end_matches(' '));
if !file_label.is_empty() {
dictionary.file_label = Some(file_label);
}
warn(Error::UnexpectedFloatFormat(floating_point_rep))
}
- let expected = match self.header.endian {
+ let expected = match header.endian {
Endian::Big => 1,
Endian::Little => 2,
};
}
}
- if let Some(nominal_case_size) = self.header.nominal_case_size {
+ if let Some(nominal_case_size) = header.nominal_case_size {
let n_vars = self.variable.len();
if n_vars != nominal_case_size as usize
&& self
value_index += n_values;
}
- if let Some(weight_index) = self.header.weight_index {
+ if let Some(weight_index) = header.weight_index {
let index = weight_index as usize - 1;
if index >= value_index {
warn(Error::WeightIndexOutOfRange {
});
}
- let written_by_readstat = self.header.eye_catcher.contains("ReadStat");
+ let written_by_readstat = header.eye_catcher.contains("ReadStat");
for dict_index in dict_indexes {
let variable = dictionary.variables.get_index_mut2(dict_index).unwrap();
let mut duplicates = Vec::new();
});
}
- let metadata = Metadata::decode(&self, warn);
+ let metadata = Metadata::decode(&header, &self, warn);
if let Some(n_cases) = metadata.n_cases {
cases = cases.with_expected_cases(n_cases);
}
(group, values)
}
- fn decode(headers: &Headers, mut warn: impl FnMut(Error)) -> Self {
- let header = &headers.header;
+ fn decode(
+ header: &HeaderRecord<String>,
+ headers: &Headers,
+ mut warn: impl FnMut(Error),
+ ) -> Self {
+ let header = &header;
let creation_date = NaiveDate::parse_from_str(&header.creation_date, "%e %b %y")
.unwrap_or_else(|_| {
warn(Error::InvalidCreationDate {
#[allow(missing_docs)] // Don't warn for missing docs on tuple members.
#[derive(Clone, Debug)]
pub enum Record {
- /// The file header.
- ///
- /// Every system file has exactly one header record, at its very beginning.
- Header(HeaderRecord<RawString>),
-
/// Variable record.
///
/// Each numeric variable has one variable record. Each string variable has
/// or strings.
#[derive(Clone, Debug)]
pub enum DecodedRecord {
- /// File header, with strings decoded.
- Header(HeaderRecord<String>),
-
/// Variable record, with strings decoded.
Variable(VariableRecord<String>),
}
/// Decodes this record into a [DecodedRecord] using `decoder`.
- pub fn decode(self, decoder: &mut Decoder) -> Result<DecodedRecord, Error> {
- Ok(match self {
- Record::Header(record) => DecodedRecord::Header(record.decode(decoder)),
+ pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
+ match self {
Record::Variable(record) => DecodedRecord::Variable(record.decode(decoder)),
Record::ValueLabel(record) => DecodedRecord::ValueLabel(record.decode(decoder)),
Record::Document(record) => DecodedRecord::Document(record.decode(decoder)),
Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(record),
Record::ZHeader(record) => DecodedRecord::ZHeader(record.clone()),
Record::ZTrailer(record) => DecodedRecord::ZTrailer(record.clone()),
- })
+ }
}
}
/// PSPP only supports ASCII-based encodings.
pub fn infer_encoding(
records: &[Record],
- warn: &mut impl FnMut(Warning),
+ mut warn: impl FnMut(Warning),
) -> Result<&'static Encoding, Error> {
// Get the character encoding from the first (and only) encoding record.
let encoding = records
}
enum ReaderState {
- Start,
Headers,
ZlibHeader,
ZlibTrailer(ZHeader),
warn: Box::new(warn),
header,
var_types: VarTypes::new(),
- state: ReaderState::Start,
+ state: ReaderState::Headers,
cases: None,
})
}
- /// Returns a structure for reading the system file's header records.
- pub fn headers<'b>(&'b mut self) -> HeaderReader<'a, 'b, R> {
- HeaderReader(self)
+ /// Returns the header in this reader.
+ pub fn header(&self) -> &HeaderRecord<RawString> {
+ &self.header
+ }
+
+ /// Returns a structure for reading the system file's records.
+ pub fn records<'b>(&'b mut self) -> Records<'a, 'b, R> {
+ Records(self)
}
/// Returns a structure for reading the system file's cases.
}
}
-/// Reader for the raw header records in a system file.
-pub struct HeaderReader<'a, 'b, R>(&'b mut Reader<'a, R>)
+/// Reads raw records from a system file.
+pub struct Records<'a, 'b, R>(&'b mut Reader<'a, R>)
where
R: Read + Seek + 'static;
-impl<'a, 'b, R> HeaderReader<'a, 'b, R>
+impl<'a, 'b, R> Records<'a, 'b, R>
where
R: Read + Seek + 'static,
{
fn _next(&mut self) -> Option<<Self as Iterator>::Item> {
match self.0.state {
- ReaderState::Start => {
- self.0.state = ReaderState::Headers;
- Some(Ok(Record::Header(self.0.header.clone())))
- }
ReaderState::Headers => {
let record = loop {
match Record::read(
}
}
-impl<'a, 'b, R> Iterator for HeaderReader<'a, 'b, R>
+impl<'a, 'b, R> Iterator for Records<'a, 'b, R>
where
R: Read + Seek + 'static,
{
type Item = Result<Record, Error>;
fn next(&mut self) -> Option<Self::Item> {
- let retval = self._next();
- if matches!(retval, Some(Err(_))) {
- self.0.state = ReaderState::End;
- }
- retval
+ self._next().inspect(|retval| {
+ if retval.is_err() {
+ self.0.state = ReaderState::End;
+ }
+ })
}
}
{
let mut warnings = Vec::new();
let mut reader = Reader::new(sysfile, |warning| warnings.push(warning)).unwrap();
- let output = match reader.headers().collect::<Result<Vec<_>, _>>() {
- Ok(headers) => {
+ let output = match reader.records().collect::<Result<Vec<_>, _>>() {
+ Ok(records) => {
+ let header = reader.header().clone();
let cases = reader.cases();
- let encoding = infer_encoding(&headers, &mut |warning| warnings.push(warning)).unwrap();
+ let encoding = infer_encoding(&records, |warning| warnings.push(warning)).unwrap();
let mut decoder = Decoder::new(encoding, |warning| warnings.push(warning));
- let mut decoded_records = Vec::new();
- for header in headers {
- decoded_records.push(header.decode(&mut decoder).unwrap());
- }
+ let header = header.decode(&mut decoder);
+ let decoded_records = records
+ .into_iter()
+ .map(|record| record.decode(&mut decoder))
+ .collect::<Vec<_>>();
drop(decoder);
let mut errors = Vec::new();
- let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap();
+ let headers = Headers::new(decoded_records, |e| errors.push(e)).unwrap();
let SystemFile {
dictionary,
metadata,
cases,
- } = headers.decode(cases, encoding, |e| errors.push(e));
+ } = headers.decode(header, cases, encoding, |e| errors.push(e));
let (group, data) = metadata.to_pivot_rows();
let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data(
data.into_iter()