-use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat, ops::Range};
+use std::{
+ borrow::Cow, cell::RefCell, cmp::Ordering, collections::HashMap, iter::repeat, ops::Range,
+ rc::Rc,
+};
use crate::{
encoding::{default_encoding, get_encoding, Error as EncodingError},
n_generated_names: usize,
}
+fn decode_sort_order(record: &raw::Record) -> i32 {
+ match record {
+ // File header record.
+ raw::Record::Header(_) => 0,
+
+ // Then the records used to decide character encoding.
+ raw::Record::Encoding(_) => 1,
+ raw::Record::IntegerInfo(_) => 2,
+
+ // Then the other records that don't use variables at all.
+ raw::Record::Document(_) => 3,
+ raw::Record::FloatInfo(_) => 4,
+ raw::Record::ProductInfo(_) => 5,
+ raw::Record::FileAttributes(_) => 6,
+
+ // Variable records.
+ raw::Record::Variable(_) => 7,
+
+ // These records use variable indexes that would be invalidated by very
+ // long string variables.
+ raw::Record::ValueLabel(_) => 8,
+ raw::Record::VarDisplay(_) => 9,
+
+ // These records use short names.
+ raw::Record::MultipleResponse(_) => 10,
+ raw::Record::VeryLongStrings(_) => 11,
+
+ // Rename short names to long names.
+ raw::Record::LongNames(_) => 12,
+
+ // These records use long names.
+ raw::Record::VariableAttributes(_) => 13,
+ raw::Record::LongStringValueLabels(_) => 14,
+ raw::Record::LongStringMissingValues(_) => 15,
+ raw::Record::VariableSets(_) => 16,
+
+ // Cases come last.
+ raw::Record::Cases(_) => 17,
+
+ // We don't use these records at all.
+ raw::Record::NumberOfCases(_) => i32::MAX,
+ raw::Record::OtherExtension(_) => i32::MAX,
+ raw::Record::EndOfHeaders(_) => i32::MAX,
+ raw::Record::ZHeader(_) => i32::MAX,
+ raw::Record::ZTrailer(_) => i32::MAX,
+ }
+}
+
+#[derive(Default)]
+struct Headers<'a> {
+ header: Option<&'a raw::HeaderRecord>,
+ variables: Vec<&'a raw::VariableRecord>,
+ value_labels: Vec<&'a raw::ValueLabelRecord>,
+ document: Option<&'a raw::DocumentRecord>,
+ integer_info: Option<&'a raw::IntegerInfoRecord>,
+ float_info: Option<&'a raw::FloatInfoRecord>,
+ variable_sets: Vec<&'a raw::TextRecord>,
+ var_display: Option<&'a raw::VarDisplayRecord>,
+ multiple_response: Vec<&'a raw::MultipleResponseRecord>,
+ long_string_value_labels: Vec<&'a raw::LongStringValueLabelRecord>,
+ long_string_missing_values: Vec<&'a raw::LongStringMissingValueRecord>,
+ encoding: Option<&'a raw::EncodingRecord>,
+ number_of_cases: Option<&'a raw::NumberOfCasesRecord>,
+ product_info: Option<&'a raw::TextRecord>,
+ long_names: Vec<&'a raw::TextRecord>,
+ very_long_strings: Vec<&'a raw::TextRecord>,
+ file_attributes: Vec<&'a raw::TextRecord>,
+ variable_attributes: Vec<&'a raw::TextRecord>,
+ other_extensions: Vec<&'a raw::Extension>,
+ cases: Option<&'a Rc<RefCell<raw::Cases>>>,
+}
+
+fn set_or_warn<T>(option: &mut Option<T>, value: T, warn: &impl Fn(Error)) {
+ if option.is_none() {
+ let _ = option.insert(value);
+ } else {
+ warn(Error::TBD);
+ }
+}
+
+impl<'a> Headers<'a> {
+ fn new(headers: &'a Vec<raw::Record>, warn: &impl Fn(Error)) -> Headers<'a> {
+ let mut h = Headers::default();
+ for header in headers {
+ match header {
+ raw::Record::Header(record) => set_or_warn(&mut h.header, record, warn),
+ raw::Record::Variable(record) => h.variables.push(record),
+ raw::Record::ValueLabel(record) => h.value_labels.push(record),
+ raw::Record::Document(record) => set_or_warn(&mut h.document, record, warn),
+ raw::Record::IntegerInfo(record) => set_or_warn(&mut h.integer_info, record, warn),
+ raw::Record::FloatInfo(record) => set_or_warn(&mut h.float_info, record, warn),
+ raw::Record::VariableSets(record) => h.variable_sets.push(record),
+ raw::Record::VarDisplay(record) => set_or_warn(&mut h.var_display, record, warn),
+ raw::Record::MultipleResponse(record) => h.multiple_response.push(record),
+ raw::Record::LongStringValueLabels(record) => {
+ h.long_string_value_labels.push(record)
+ }
+ raw::Record::LongStringMissingValues(record) => {
+ h.long_string_missing_values.push(record)
+ }
+ raw::Record::Encoding(record) => set_or_warn(&mut h.encoding, record, warn),
+ raw::Record::NumberOfCases(record) => {
+ set_or_warn(&mut h.number_of_cases, record, warn)
+ }
+ raw::Record::ProductInfo(record) => set_or_warn(&mut h.product_info, record, warn),
+ raw::Record::LongNames(record) => h.long_names.push(record),
+ raw::Record::VeryLongStrings(record) => h.very_long_strings.push(record),
+ raw::Record::FileAttributes(record) => h.file_attributes.push(record),
+ raw::Record::VariableAttributes(record) => h.variable_attributes.push(record),
+ raw::Record::OtherExtension(record) => h.other_extensions.push(record),
+ raw::Record::EndOfHeaders(_) => todo!(),
+ raw::Record::ZHeader(_) => todo!(),
+ raw::Record::ZTrailer(_) => todo!(),
+ raw::Record::Cases(record) => set_or_warn(&mut h.cases, record, warn),
+ }
+ }
+ h
+ }
+}
+
pub fn decode(
headers: Vec<raw::Record>,
encoding: Option<&'static Encoding>,
warn: &impl Fn(Error),
) -> Result<Vec<Record>, Error> {
- let Some(header_record) = headers.iter().find_map(|rec| {
- if let raw::Record::Header(header) = rec {
- Some(header)
- } else {
- None
- }
- }) else {
+ let h = Headers::new(&headers, warn);
+ let Some(header) = h.header else {
return Err(Error::MissingHeaderRecord);
};
let encoding = match encoding {
Some(encoding) => encoding,
None => {
- let encoding = headers.iter().find_map(|rec| {
- if let raw::Record::Encoding(ref e) = rec {
- Some(e.0.as_str())
- } else {
- None
- }
- });
- let character_code = headers.iter().find_map(|rec| {
- if let raw::Record::IntegerInfo(ref r) = rec {
- Some(r.character_code)
- } else {
- None
- }
- });
+ let encoding = h.encoding.map(|record| record.0.as_str());
+ let character_code = h.integer_info.map(|record| record.character_code);
match get_encoding(encoding, character_code) {
Ok(encoding) => encoding,
Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)),
};
let mut decoder = Decoder {
- compression: header_record.compression,
- endian: header_record.endian,
+ compression: header.compression,
+ endian: header.endian,
encoding,
variables: HashMap::new(),
var_names: HashMap::new(),
};
let mut output = Vec::with_capacity(headers.len());
- for header in &headers {
- match header {
- raw::Record::Header(ref input) => {
- if let Some(header) = HeaderRecord::try_decode(&mut decoder, input, warn)? {
- output.push(Record::Header(header))
- }
- }
- raw::Record::Variable(ref input) => {
- if let Some(variable) = VariableRecord::try_decode(&mut decoder, input, warn)? {
- output.push(Record::Variable(variable));
- }
- }
- raw::Record::ValueLabel(ref input) => {
- if let Some(value_label) = ValueLabelRecord::try_decode(&mut decoder, input, warn)?
- {
- output.push(Record::ValueLabel(value_label));
- }
- }
- raw::Record::Document(ref input) => {
- if let Some(document) = DocumentRecord::try_decode(&mut decoder, input, warn)? {
- output.push(Record::Document(document))
- }
- }
- raw::Record::IntegerInfo(ref input) => output.push(Record::IntegerInfo(input.clone())),
- raw::Record::FloatInfo(ref input) => output.push(Record::FloatInfo(input.clone())),
- raw::Record::VariableSets(ref input) => {
- let s = decoder.decode_string_cow(&input.text.0, warn);
- output.push(Record::VariableSets(VariableSetRecord::parse(&s, warn)?));
- }
- raw::Record::VarDisplay(ref input) => {
- if let Some(vdr) = VarDisplayRecord::try_decode(&mut decoder, input, warn)? {
- output.push(Record::VarDisplay(vdr))
- }
- }
- raw::Record::MultipleResponse(ref input) => {
- if let Some(mrr) = MultipleResponseRecord::try_decode(&mut decoder, input, warn)? {
- output.push(Record::MultipleResponse(mrr))
- }
- }
- raw::Record::LongStringMissingValues(ref input) => {
- if let Some(mrr) = LongStringMissingValuesRecord::try_decode(&mut decoder, input, warn)? {
- output.push(Record::LongStringMissingValues(mrr))
- }
- }
- raw::Record::LongStringValueLabels(ref input) => {
- if let Some(mrr) =
- LongStringValueLabelRecord::try_decode(&mut decoder, input, warn)?
- {
- output.push(Record::LongStringValueLabels(mrr))
- }
- }
- raw::Record::Encoding(ref input) => output.push(Record::Encoding(input.clone())),
- raw::Record::NumberOfCases(ref input) => {
- output.push(Record::NumberOfCases(input.clone()))
- }
- raw::Record::ProductInfo(ref input) => {
- let s = decoder.decode_string_cow(&input.text.0, warn);
- output.push(Record::ProductInfo(ProductInfoRecord::parse(&s, warn)?));
- }
- raw::Record::LongNames(ref input) => {
- let s = decoder.decode_string_cow(&input.text.0, warn);
- output.push(Record::LongNames(LongNameRecord::parse(
- &mut decoder,
- &s,
- warn,
- )?));
- }
- raw::Record::VeryLongStrings(ref input) => {
- let s = decoder.decode_string_cow(&input.text.0, warn);
- output.push(Record::VeryLongStrings(VeryLongStringRecord::parse(
- &decoder, &s, warn,
- )?));
- }
- raw::Record::FileAttributes(ref input) => {
- let s = decoder.decode_string_cow(&input.text.0, warn);
- output.push(Record::FileAttributes(FileAttributeRecord::parse(
- &decoder, &s, warn,
- )?));
- }
- raw::Record::VariableAttributes(ref input) => {
- let s = decoder.decode_string_cow(&input.text.0, warn);
- output.push(Record::VariableAttributes(VariableAttributeRecord::parse(
- &decoder, &s, warn,
- )?));
- }
- raw::Record::OtherExtension(ref input) => {
- output.push(Record::OtherExtension(input.clone()))
- }
- raw::Record::EndOfHeaders(_) => (),
- raw::Record::ZHeader(_) => (),
- raw::Record::ZTrailer(_) => (),
- raw::Record::Cases(_) => (),
- };
+
+ // Decode the records that don't use variables at all.
+ if let Some(header) = HeaderRecord::try_decode(&mut decoder, header, warn)? {
+ output.push(Record::Header(header))
+ }
+ if let Some(raw) = h.document {
+ if let Some(document) = DocumentRecord::try_decode(&mut decoder, raw, warn)? {
+ output.push(Record::Document(document))
+ }
+ }
+ if let Some(raw) = h.integer_info {
+ output.push(Record::IntegerInfo(raw.clone()));
+ }
+ if let Some(raw) = h.float_info {
+ output.push(Record::FloatInfo(raw.clone()));
+ }
+ if let Some(raw) = h.product_info {
+ let s = decoder.decode_string_cow(&raw.text.0, warn);
+ output.push(Record::ProductInfo(ProductInfoRecord::parse(&s, warn)?));
+ }
+ if let Some(raw) = h.number_of_cases {
+ output.push(Record::NumberOfCases(raw.clone()))
+ }
+ for &raw in &h.file_attributes {
+ let s = decoder.decode_string_cow(&raw.text.0, warn);
+ output.push(Record::FileAttributes(FileAttributeRecord::parse(
+ &decoder, &s, warn,
+ )?));
+ }
+ for &raw in &h.other_extensions {
+ output.push(Record::OtherExtension(raw.clone()));
+ }
+
+ // Decode the variable records, which are the basis of almost everything
+ // else.
+ for &raw in &h.variables {
+ if let Some(variable) = VariableRecord::try_decode(&mut decoder, raw, warn)? {
+ output.push(Record::Variable(variable));
+ }
+ }
+
+ // Decode value labels and weight variable. These use indexes into the
+ // variable records, so we need to parse them before those indexes become
+ // invalidated by very long string variables.
+ for &raw in &h.value_labels {
+ if let Some(value_label) = ValueLabelRecord::try_decode(&mut decoder, raw, warn)? {
+ output.push(Record::ValueLabel(value_label));
+ }
+ }
+ // XXX weight
+ if let Some(raw) = h.var_display {
+ if let Some(vdr) = VarDisplayRecord::try_decode(&mut decoder, raw, warn)? {
+ output.push(Record::VarDisplay(vdr))
+ }
+ }
+
+ // Decode records that use short names.
+ for &raw in &h.multiple_response {
+ if let Some(mrr) = MultipleResponseRecord::try_decode(&mut decoder, raw, warn)? {
+ output.push(Record::MultipleResponse(mrr))
+ }
+ }
+ for &raw in &h.very_long_strings {
+ let s = decoder.decode_string_cow(&raw.text.0, warn);
+ output.push(Record::VeryLongStrings(VeryLongStringRecord::parse(
+ &decoder, &s, warn,
+ )?));
+ }
+
+ // Rename variables to their long names.
+ for &raw in &h.long_names {
+ let s = decoder.decode_string_cow(&raw.text.0, warn);
+ output.push(Record::LongNames(LongNameRecord::parse(
+ &mut decoder,
+ &s,
+ warn,
+ )?));
+ }
+
+ // Decode recods that use long names.
+ for &raw in &h.variable_attributes {
+ let s = decoder.decode_string_cow(&raw.text.0, warn);
+ output.push(Record::VariableAttributes(VariableAttributeRecord::parse(
+ &decoder, &s, warn,
+ )?));
+ }
+ for &raw in &h.long_string_value_labels {
+ if let Some(mrr) = LongStringValueLabelRecord::try_decode(&mut decoder, raw, warn)? {
+ output.push(Record::LongStringValueLabels(mrr))
+ }
+ }
+ for &raw in &h.long_string_missing_values {
+ if let Some(mrr) = LongStringMissingValuesRecord::try_decode(&mut decoder, raw, warn)? {
+ output.push(Record::LongStringMissingValues(mrr))
+ }
+ }
+ for &raw in &h.variable_sets {
+ let s = decoder.decode_string_cow(&raw.text.0, warn);
+ output.push(Record::VariableSets(VariableSetRecord::parse(&s, warn)?));
}
Ok(output)
}
Ok(LongStringMissingValues {
var_name,
- missing_values
+ missing_values,
})
}
}
pub struct LongStringMissingValuesRecord(Vec<LongStringMissingValues>);
impl TryDecode for LongStringMissingValuesRecord {
- type Input = raw::LongStringMissingValueSet;
+ type Input = raw::LongStringMissingValueRecord;
fn try_decode(
decoder: &mut Decoder,