-use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc};
+use std::{cell::RefCell, ops::Range, rc::Rc, collections::HashMap};
use crate::{
- dictionary::{Dictionary, VarWidth},
+ dictionary::{Dictionary, VarWidth, Variable},
encoding::Error as EncodingError,
endian::Endian,
- format::{Error as FormatError, Spec},
+ format::{Error as FormatError, Spec, UncheckedSpec},
identifier::{Error as IdError, Identifier},
raw::{
self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension, FileAttributeRecord,
};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::Encoding;
+use num::Integer;
use thiserror::Error as ThisError;
pub use crate::raw::{CategoryLabels, Compression};
type DictIndex = usize;
-pub struct Variable {
- pub dict_index: DictIndex,
- pub short_name: Identifier,
- pub long_name: Option<Identifier>,
- pub width: VarWidth,
-}
-
-pub struct Decoder {
- pub raw: raw::Decoder,
- pub encoding: &'static Encoding,
- pub variables: HashMap<DictIndex, Variable>,
- pub var_names: HashMap<Identifier, DictIndex>,
- pub dictionary: Dictionary,
- n_dict_indexes: usize,
- n_generated_names: usize,
-}
-
#[derive(Clone, Debug)]
pub struct Headers {
pub header: HeaderRecord<String>,
}
}
+struct Decoder {
+ //pub raw: raw::Decoder,
+ pub encoding: &'static Encoding,
+ //pub variables: HashMap<DictIndex, Variable>,
+ //pub var_names: HashMap<Identifier, DictIndex>,
+ //pub dictionary: Dictionary,
+ //n_dict_indexes: usize,
+ n_generated_names: usize,
+}
+
+impl Decoder {
+ fn generate_name(&mut self, dictionary: &Dictionary) -> Identifier {
+ loop {
+ self.n_generated_names += 1;
+ let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
+ .unwrap();
+ if !dictionary.variables.contains(&name) {
+ return name;
+ }
+ assert!(self.n_generated_names < usize::MAX);
+ }
+ }
+}
+
pub fn decode(
mut headers: Headers,
encoding: &'static Encoding,
}
for attributes in headers.file_attributes.drain(..) {
- dictionary.attributes.extend(attributes.0.0.into_iter())
+ dictionary.attributes.extend(attributes.0 .0.into_iter())
}
// Concatenate all the document records (really there should only be one)
// XXX warn for weird integer format
// XXX warn for weird floating-point format, etc.
- /*
- let mut decoder = Decoder {
- raw: decoder,
- variables: HashMap::new(),
- var_names: HashMap::new(),
- dictionary,
- n_dict_indexes: 0,
- n_generated_names: 0,
+ let mut decoder = Decoder {
+ encoding,
+ n_generated_names: 0,
+ };
+
+ let mut header_vars = headers.variable.iter().enumerate();
+ let mut var_index_map = HashMap::new();
+ while let Some((value_index, input)) = header_vars.next() {
+ let name = trim_end_spaces(input.name.to_string());
+ let name = match Identifier::new(&name, encoding) {
+ Ok(name) => {
+ if !dictionary.variables.contains(&name) {
+ name
+ } else {
+ let new_name = decoder.generate_name(&dictionary);
+ warn(Error::DuplicateVariableName {
+ duplicate_name: name.clone(),
+ new_name: new_name.clone(),
+ });
+ new_name
+ }
+ }
+ Err(id_error) => {
+ let new_name = decoder.generate_name(&dictionary);
+ warn(Error::InvalidVariableName {
+ id_error,
+ new_name: new_name.clone(),
+ });
+ new_name
+ }
};
- */
+ let mut variable = Variable::new(name.clone(), VarWidth::from_raw(input.width).unwrap());
+
+ // Set the short name the same as the long name (even if we renamed it).
+ variable.short_names = vec![name];
+
+ variable.label = input.label.clone();
+
+ variable.missing_values = input.missing_values.clone();
+
+ variable.print_format = decode_format(
+ input.print_format,
+ variable.width,
+ |new_spec, format_error| {
+ warn(Error::InvalidPrintFormat {
+ new_spec,
+ variable: variable.name.clone(),
+ format_error,
+ })
+ },
+ );
+ variable.write_format = decode_format(
+ input.write_format,
+ variable.width,
+ |new_spec, format_error| {
+ warn(Error::InvalidWriteFormat {
+ new_spec,
+ variable: variable.name.clone(),
+ format_error,
+ })
+ },
+ );
+
+ // Skip long string continuation records.
+ if input.width > 0 {
+ #[allow(unstable_name_collisions)]
+ for _ in 1..input.width.div_ceil(&8) {
+ if let Some((_, continuation)) = header_vars.next() {
+ if continuation.width == -1 {
+ continue;
+ }
+ }
+ return Err(Error::TBD);
+ }
+ }
+
+ let dict_index = dictionary.add_var(variable).unwrap();
+ assert_eq!(var_index_map.insert(value_index, dict_index), None);
+ }
+
let metadata = Metadata::decode(&headers, warn);
Ok((dictionary, metadata))
}
out
}
+fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
+ UncheckedSpec::try_from(raw)
+ .and_then(Spec::try_from)
+ .and_then(|x| x.check_width_compatibility(width))
+ .unwrap_or_else(|error| {
+ let new_format = Spec::default_for_width(width);
+ warn(new_format, error);
+ new_format
+ })
+}
+
/*
impl Decoder {
fn generate_name(&mut self) -> Identifier {
pub label: Option<String>,
}
-fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
- UncheckedSpec::try_from(raw)
- .and_then(Spec::try_from)
- .and_then(|x| x.check_width_compatibility(width))
- .unwrap_or_else(|error| {
- let new_format = Spec::default_for_width(width);
- warn(new_format, error);
- new_format
- })
-}
fn parse_variable_record(
decoder: &mut Decoder,