z_trailer: take_first(z_trailer, "z_trailer", warn),
})
}
+
+ pub fn decode(
+ mut self,
+ mut cases: Option<Cases>,
+ encoding: &'static Encoding,
+ mut warn: impl FnMut(Error),
+ ) -> Result<(Dictionary, Metadata, Option<Cases>), Error> {
+ let mut dictionary = Dictionary::new(encoding);
+
+ let file_label = fix_line_ends(self.header.file_label.trim_end_matches(' '));
+ if !file_label.is_empty() {
+ dictionary.file_label = Some(file_label);
+ }
+
+ for mut attributes in self.file_attributes.drain(..) {
+ dictionary.attributes.append(&mut attributes.0)
+ }
+
+ // Concatenate all the document records (really there should only be one)
+ // and trim off the trailing spaces that pad them to 80 bytes.
+ dictionary.documents = self
+ .document
+ .drain(..)
+ .flat_map(|record| record.lines)
+ .map(trim_end_spaces)
+ .collect();
+
+ if let Some(integer_info) = &self.integer_info {
+ let floating_point_rep = integer_info.floating_point_rep;
+ if floating_point_rep != 1 {
+ warn(Error::UnexpectedFloatFormat(floating_point_rep))
+ }
+
+ let expected = match self.header.endian {
+ Endian::Big => 1,
+ Endian::Little => 2,
+ };
+ let actual = integer_info.endianness;
+ if actual != expected {
+ warn(Error::UnexpectedEndianess { actual, expected });
+ }
+ };
+
+ if let Some(float_info) = &self.float_info {
+ for (expected, expected2, actual, name) in [
+ (f64::MIN, None, float_info.sysmis, "SYSMIS"),
+ (f64::MAX, None, float_info.highest, "HIGHEST"),
+ (
+ f64::MIN,
+ Some(f64::MIN.next_up()),
+ float_info.lowest,
+ "LOWEST",
+ ),
+ ] {
+ if actual != expected && expected2.is_none_or(|expected2| expected2 != actual) {
+ warn(Error::UnexpectedFloatValue {
+ expected,
+ actual,
+ name,
+ });
+ }
+ }
+ }
+
+ if let Some(nominal_case_size) = self.header.nominal_case_size {
+ let n_vars = self.variable.len();
+ if n_vars != nominal_case_size as usize
+ && self
+ .integer_info
+ .as_ref()
+ .is_none_or(|info| info.version.0 != 13)
+ {
+ warn(Error::WrongVariablePositions {
+ actual: n_vars,
+ expected: nominal_case_size as usize,
+ });
+ }
+ }
+
+ let mut decoder = Decoder {
+ encoding,
+ n_generated_names: 0,
+ };
+
+ let mut var_index_map = BTreeMap::new();
+ let mut value_index = 0;
+ for (index, input) in self
+ .variable
+ .iter()
+ .enumerate()
+ .filter(|(_index, record)| record.width != RawWidth::Continuation)
+ {
+ let name = trim_end_spaces(input.name.to_string());
+ let name = match Identifier::from_encoding(name, encoding)
+ .and_then(Identifier::must_be_ordinary)
+ {
+ Ok(name) => {
+ if !dictionary.variables.contains(&name.0) {
+ name
+ } else {
+ let new_name = decoder.generate_name(&dictionary);
+ warn(Error::DuplicateVariableName {
+ duplicate_name: name.clone(),
+ new_name: new_name.clone(),
+ });
+ new_name
+ }
+ }
+ Err(id_error) => {
+ let new_name = decoder.generate_name(&dictionary);
+ warn(Error::InvalidVariableName {
+ id_error,
+ new_name: new_name.clone(),
+ });
+ new_name
+ }
+ };
+ let mut variable = Variable::new(
+ name.clone(),
+ VarWidth::try_from(input.width).unwrap(),
+ encoding,
+ );
+
+ // Set the short name the same as the long name (even if we renamed it).
+ variable.short_names = vec![name];
+
+ variable.label = input.label.clone();
+
+ variable.missing_values = input.missing_values.clone();
+
+ variable.print_format = decode_format(
+ input.print_format,
+ variable.width,
+ |new_spec, format_error| {
+ warn(Error::InvalidPrintFormat {
+ new_spec,
+ variable: variable.name.clone(),
+ format_error,
+ })
+ },
+ );
+ variable.write_format = decode_format(
+ input.write_format,
+ variable.width,
+ |new_spec, format_error| {
+ warn(Error::InvalidWriteFormat {
+ new_spec,
+ variable: variable.name.clone(),
+ format_error,
+ })
+ },
+ );
+
+ // Check for long string continuation records.
+ let n_values = input.width.n_values().unwrap();
+ for offset in 1..n_values {
+ if self
+ .variable
+ .get(index + offset)
+ .is_none_or(|record| record.width != RawWidth::Continuation)
+ {
+ warn(Error::MissingLongStringContinuation {
+ width: input.width,
+ start_index: index,
+ end_index: index + n_values - 1,
+ error_index: index + offset,
+ });
+ break;
+ }
+ }
+
+ let dict_index = dictionary.add_var(variable).unwrap();
+ assert_eq!(var_index_map.insert(value_index, dict_index), None);
+ value_index += n_values;
+ }
+
+ if let Some(weight_index) = self.header.weight_index {
+ let index = weight_index as usize - 1;
+ if index >= value_index {
+ warn(Error::WeightIndexOutOfRange {
+ index: weight_index,
+ max_index: var_index_map.len(),
+ });
+ } else {
+ let (var_index, dict_index) = var_index_map.range(..=&index).last().unwrap();
+ let variable = &dictionary.variables[*dict_index];
+ if *var_index == index {
+ if variable.is_numeric() {
+ dictionary.weight = Some(*dict_index);
+ } else {
+ warn(Error::InvalidWeightVar {
+ index: weight_index,
+ name: variable.name.clone(),
+ });
+ }
+ } else {
+ warn(Error::WeightIndexStringContinuation {
+ index: weight_index,
+ name: variable.name.clone(),
+ });
+ }
+ }
+ }
+
+ for record in self.value_label.drain(..) {
+ let mut dict_indexes = Vec::with_capacity(record.dict_indexes.len());
+ let mut long_string_variables = Vec::new();
+ for value_index in record.dict_indexes.iter() {
+ let Some(dict_index) = var_index_map.get(&(*value_index as usize - 1)) else {
+ unreachable!()
+ };
+ let variable = &dictionary.variables[*dict_index];
+ if variable.width.is_long_string() {
+ long_string_variables.push(variable.name.clone());
+ } else {
+ dict_indexes.push(*dict_index);
+ }
+ }
+ if !long_string_variables.is_empty() {
+ warn(Error::InvalidLongStringValueLabels {
+ offsets: record.offsets.clone(),
+ variables: long_string_variables,
+ });
+ }
+
+ let written_by_readstat = self.header.eye_catcher.contains("ReadStat");
+ for dict_index in dict_indexes {
+ let variable = dictionary.variables.get_index_mut2(dict_index).unwrap();
+ let mut duplicates = Vec::new();
+ for ValueLabel {
+ datum: value,
+ label,
+ } in record.labels.iter().cloned()
+ {
+ let datum = value.decode(variable.width);
+ if variable.value_labels.insert(datum, label).is_some() {
+ duplicates.push(value);
+ }
+ }
+ if written_by_readstat {
+ // Ignore any possible duplicates. ReadStat is buggy and emits
+ // value labels whose values are longer than string variables'
+ // widths, that are identical in the actual width of the
+ // variable, e.g. both values "ABC123" and "ABC456" for a string
+ // variable with width 3.
+ } else if !duplicates.is_empty() {
+ warn(Error::DuplicateValueLabels {
+ variable: variable.name.clone(),
+ values: duplicates
+ .iter()
+ .map(|value| {
+ value
+ .decode(variable.width)
+ .display(variable.print_format, variable.encoding)
+ .with_trimming()
+ .with_quoted_string()
+ .to_string()
+ })
+ .collect(),
+ });
+ }
+ }
+ }
+
+ if let Some(display) = &self.var_display {
+ for (index, display) in display.0.iter().enumerate() {
+ if let Some(variable) = dictionary.variables.get_index_mut2(index) {
+ if let Some(width) = display.width {
+ variable.display_width = width;
+ }
+ if let Some(alignment) = display.alignment {
+ variable.alignment = alignment;
+ }
+ if let Some(measure) = display.measure {
+ variable.measure = Some(measure);
+ }
+ } else {
+ warn(dbg!(Error::TBD));
+ }
+ }
+ }
+
+ for record in self
+ .multiple_response
+ .iter()
+ .flat_map(|record| record.0.iter())
+ {
+ match MultipleResponseSet::decode(&dictionary, record, &mut warn) {
+ Ok(mrset) => {
+ dictionary.mrsets.insert(ByIdentifier::new(mrset));
+ }
+ Err(error) => warn(error),
+ }
+ }
+
+ if !self.very_long_strings.is_empty() {
+ 'outer: for record in self
+ .very_long_strings
+ .drain(..)
+ .flat_map(|record| record.0.into_iter())
+ {
+ let Some(index) = dictionary.variables.get_index_of(&record.short_name.0) else {
+ warn(dbg!(Error::TBD));
+ continue;
+ };
+ let width = VarWidth::String(record.length);
+ let n_segments = width.n_segments();
+ if n_segments == 1 {
+ warn(dbg!(Error::ShortVeryLongString {
+ short_name: record.short_name.clone(),
+ width: record.length
+ }));
+ continue;
+ }
+ if index + n_segments > dictionary.variables.len() {
+ warn(dbg!(Error::VeryLongStringOverflow {
+ short_name: record.short_name.clone(),
+ width: record.length,
+ index,
+ n_segments,
+ len: dictionary.variables.len()
+ }));
+ continue;
+ }
+ let mut short_names = Vec::with_capacity(n_segments);
+ for i in 0..n_segments {
+ let alloc_width = width.segment_alloc_width(i);
+ let segment = &dictionary.variables[index + i];
+ short_names.push(segment.short_names[0].clone());
+ let segment_width = segment.width.as_string_width().unwrap_or(0);
+ if segment_width.next_multiple_of(8) != alloc_width.next_multiple_of(8) {
+ warn(Error::VeryLongStringInvalidSegmentWidth {
+ short_name: record.short_name.clone(),
+ width: record.length,
+ index: i,
+ actual: segment_width,
+ expected: alloc_width,
+ });
+ continue 'outer;
+ }
+ }
+ dictionary.delete_vars(index + 1..index + n_segments);
+ let variable = dictionary.variables.get_index_mut2(index).unwrap();
+ variable.short_names = short_names;
+ variable.resize(width);
+ }
+ cases = cases
+ .take()
+ .map(|cases| cases.with_widths(dictionary.variables.iter().map(|var| var.width)));
+ }
+
+ if self.long_names.is_empty() {
+ // There are no long variable names. Use the short variable names,
+ // converted to lowercase, as the long variable names.
+ for index in 0..dictionary.variables.len() {
+ let lower = dictionary.variables[index].name.0.as_ref().to_lowercase();
+ if let Ok(new_name) = Identifier::from_encoding(lower, dictionary.encoding) {
+ let _ = dictionary.try_rename_var(index, new_name);
+ }
+ }
+ } else {
+ // Rename each of the variables, one by one. (In a correctly
+ // constructed system file, this cannot create any intermediate
+ // duplicate variable names, because all of the new variable names are
+ // longer than any of the old variable names and thus there cannot be
+ // any overlaps.)
+ for renaming in self
+ .long_names
+ .iter()
+ .flat_map(|record| record.0.iter().cloned())
+ {
+ let LongName {
+ short_name,
+ long_name,
+ } = renaming;
+ if let Some(index) = dictionary.variables.get_index_of(&short_name.0) {
+ if let Err(long_name) = dictionary.try_rename_var(index, long_name) {
+ warn(Error::DuplicateLongName(long_name));
+ }
+ dictionary
+ .variables
+ .get_index_mut2(index)
+ .unwrap()
+ .short_names = vec![short_name];
+ } else {
+ warn(dbg!(Error::TBD));
+ }
+ }
+ }
+
+ for mut attr_set in self
+ .variable_attributes
+ .drain(..)
+ .flat_map(|record| record.0.into_iter())
+ {
+ if let Some((_, variable)) = dictionary
+ .variables
+ .get_full_mut2(&attr_set.long_var_name.0)
+ {
+ variable.attributes.append(&mut attr_set.attributes);
+ } else {
+ warn(dbg!(Error::TBD));
+ }
+ }
+
+ // Assign variable roles.
+ for index in 0..dictionary.variables.len() {
+ let variable = dictionary.variables.get_index_mut2(index).unwrap();
+ match variable.attributes.role() {
+ Ok(Some(role)) => variable.role = role,
+ Ok(None) => (),
+ Err(error) => warn(Error::InvalidRole(error)),
+ }
+ }
+
+ // Long string value labels.
+ for record in self
+ .long_string_value_labels
+ .drain(..)
+ .flat_map(|record| record.0.into_iter())
+ {
+ let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
+ warn(Error::UnknownLongStringValueLabelVariable(
+ record.var_name.clone(),
+ ));
+ continue;
+ };
+ let Some(width) = variable.width.as_string_width() else {
+ warn(Error::LongStringValueLabelNumericVariable(
+ record.var_name.clone(),
+ ));
+ continue;
+ };
+ for (mut value, label) in record.labels.into_iter() {
+ // XXX warn about too-long value?
+ value.0.resize(width, b' ');
+ // XXX warn abouat duplicate value labels?
+ variable.value_labels.insert(Datum::String(value), label);
+ }
+ }
+
+ for mut record in self
+ .long_string_missing_values
+ .drain(..)
+ .flat_map(|record| record.0.into_iter())
+ {
+ let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
+ warn(Error::LongStringMissingValueUnknownVariable {
+ name: record.var_name.clone(),
+ });
+ continue;
+ };
+ if !variable.width.is_long_string() {
+ warn(Error::LongStringMissingValueBadWdith {
+ name: record.var_name.clone(),
+ width: variable.width,
+ });
+ continue;
+ }
+ if record.missing_values.len() > 3 {
+ warn(Error::LongStringMissingValueInvalidCount {
+ name: record.var_name.clone(),
+ count: record.missing_values.len(),
+ });
+ record.missing_values.truncate(3);
+ }
+ let values = record
+ .missing_values
+ .into_iter()
+ .map(|v| {
+ let mut value = RawString::from(v.0.as_slice());
+ value.resize(variable.width.as_string_width().unwrap());
+ Datum::String(value)
+ })
+ .collect::<Vec<_>>();
+ match MissingValues::new(values, None) {
+ Ok(missing_values) => variable.missing_values = missing_values,
+ Err(MissingValuesError::TooWide) => warn(dbg!(Error::TBD)),
+ Err(MissingValuesError::TooMany) | Err(MissingValuesError::MixedTypes) => {
+ unreachable!()
+ }
+ }
+ }
+
+ for record in self
+ .variable_sets
+ .drain(..)
+ .flat_map(|record| record.sets.into_iter())
+ {
+ let mut variables = Vec::with_capacity(record.variable_names.len());
+ for variable_name in record.variable_names {
+ let Some((dict_index, _)) = dictionary.variables.get_full_mut2(&variable_name.0)
+ else {
+ warn(Error::UnknownVariableSetVariable {
+ variable_set: record.name.clone(),
+ variable: variable_name.clone(),
+ });
+ continue;
+ };
+ variables.push(dict_index);
+ }
+ let variable_set = VariableSet {
+ name: record.name,
+ variables,
+ };
+ dictionary.variable_sets.push(variable_set);
+ }
+
+ for record in self.other_extension.drain(..) {
+ warn(Error::UnknownExtensionRecord {
+ offset: record.offsets.start,
+ subtype: record.subtype,
+ size: record.size,
+ count: record.count,
+ });
+ }
+
+ let metadata = Metadata::decode(&self, warn);
+ if let Some(n_cases) = metadata.n_cases {
+ cases = cases.take().map(|cases| cases.with_expected_cases(n_cases))
+ }
+ Ok((dictionary, metadata, cases))
+ }
}
#[derive(Clone, Debug, PartialEq, Eq)]
}
}
-pub fn decode(
- mut headers: Headers,
- mut cases: Option<Cases>,
- encoding: &'static Encoding,
- mut warn: impl FnMut(Error),
-) -> Result<(Dictionary, Metadata, Option<Cases>), Error> {
- let mut dictionary = Dictionary::new(encoding);
-
- let file_label = fix_line_ends(headers.header.file_label.trim_end_matches(' '));
- if !file_label.is_empty() {
- dictionary.file_label = Some(file_label);
- }
-
- for mut attributes in headers.file_attributes.drain(..) {
- dictionary.attributes.append(&mut attributes.0)
- }
-
- // Concatenate all the document records (really there should only be one)
- // and trim off the trailing spaces that pad them to 80 bytes.
- dictionary.documents = headers
- .document
- .drain(..)
- .flat_map(|record| record.lines)
- .map(trim_end_spaces)
- .collect();
-
- if let Some(integer_info) = &headers.integer_info {
- let floating_point_rep = integer_info.floating_point_rep;
- if floating_point_rep != 1 {
- warn(Error::UnexpectedFloatFormat(floating_point_rep))
- }
-
- let expected = match headers.header.endian {
- Endian::Big => 1,
- Endian::Little => 2,
- };
- let actual = integer_info.endianness;
- if actual != expected {
- warn(Error::UnexpectedEndianess { actual, expected });
- }
- };
-
- if let Some(float_info) = &headers.float_info {
- for (expected, expected2, actual, name) in [
- (f64::MIN, None, float_info.sysmis, "SYSMIS"),
- (f64::MAX, None, float_info.highest, "HIGHEST"),
- (
- f64::MIN,
- Some(f64::MIN.next_up()),
- float_info.lowest,
- "LOWEST",
- ),
- ] {
- if actual != expected && expected2.is_none_or(|expected2| expected2 != actual) {
- warn(Error::UnexpectedFloatValue {
- expected,
- actual,
- name,
- });
- }
- }
- }
-
- if let Some(nominal_case_size) = headers.header.nominal_case_size {
- let n_vars = headers.variable.len();
- if n_vars != nominal_case_size as usize
- && headers
- .integer_info
- .as_ref()
- .is_none_or(|info| info.version.0 != 13)
- {
- warn(Error::WrongVariablePositions {
- actual: n_vars,
- expected: nominal_case_size as usize,
- });
- }
- }
-
- let mut decoder = Decoder {
- encoding,
- n_generated_names: 0,
- };
-
- let mut var_index_map = BTreeMap::new();
- let mut value_index = 0;
- for (index, input) in headers
- .variable
- .iter()
- .enumerate()
- .filter(|(_index, record)| record.width != RawWidth::Continuation)
- {
- let name = trim_end_spaces(input.name.to_string());
- let name = match Identifier::from_encoding(name, encoding)
- .and_then(Identifier::must_be_ordinary)
- {
- Ok(name) => {
- if !dictionary.variables.contains(&name.0) {
- name
- } else {
- let new_name = decoder.generate_name(&dictionary);
- warn(Error::DuplicateVariableName {
- duplicate_name: name.clone(),
- new_name: new_name.clone(),
- });
- new_name
- }
- }
- Err(id_error) => {
- let new_name = decoder.generate_name(&dictionary);
- warn(Error::InvalidVariableName {
- id_error,
- new_name: new_name.clone(),
- });
- new_name
- }
- };
- let mut variable = Variable::new(
- name.clone(),
- VarWidth::try_from(input.width).unwrap(),
- encoding,
- );
-
- // Set the short name the same as the long name (even if we renamed it).
- variable.short_names = vec![name];
-
- variable.label = input.label.clone();
-
- variable.missing_values = input.missing_values.clone();
-
- variable.print_format = decode_format(
- input.print_format,
- variable.width,
- |new_spec, format_error| {
- warn(Error::InvalidPrintFormat {
- new_spec,
- variable: variable.name.clone(),
- format_error,
- })
- },
- );
- variable.write_format = decode_format(
- input.write_format,
- variable.width,
- |new_spec, format_error| {
- warn(Error::InvalidWriteFormat {
- new_spec,
- variable: variable.name.clone(),
- format_error,
- })
- },
- );
-
- // Check for long string continuation records.
- let n_values = input.width.n_values().unwrap();
- for offset in 1..n_values {
- if headers
- .variable
- .get(index + offset)
- .is_none_or(|record| record.width != RawWidth::Continuation)
- {
- warn(Error::MissingLongStringContinuation {
- width: input.width,
- start_index: index,
- end_index: index + n_values - 1,
- error_index: index + offset,
- });
- break;
- }
- }
-
- let dict_index = dictionary.add_var(variable).unwrap();
- assert_eq!(var_index_map.insert(value_index, dict_index), None);
- value_index += n_values;
- }
-
- if let Some(weight_index) = headers.header.weight_index {
- let index = weight_index as usize - 1;
- if index >= value_index {
- warn(Error::WeightIndexOutOfRange {
- index: weight_index,
- max_index: var_index_map.len(),
- });
- } else {
- let (var_index, dict_index) = var_index_map.range(..=&index).last().unwrap();
- let variable = &dictionary.variables[*dict_index];
- if *var_index == index {
- if variable.is_numeric() {
- dictionary.weight = Some(*dict_index);
- } else {
- warn(Error::InvalidWeightVar {
- index: weight_index,
- name: variable.name.clone(),
- });
- }
- } else {
- warn(Error::WeightIndexStringContinuation {
- index: weight_index,
- name: variable.name.clone(),
- });
- }
- }
- }
-
- for record in headers.value_label.drain(..) {
- let mut dict_indexes = Vec::with_capacity(record.dict_indexes.len());
- let mut long_string_variables = Vec::new();
- for value_index in record.dict_indexes.iter() {
- let Some(dict_index) = var_index_map.get(&(*value_index as usize - 1)) else {
- unreachable!()
- };
- let variable = &dictionary.variables[*dict_index];
- if variable.width.is_long_string() {
- long_string_variables.push(variable.name.clone());
- } else {
- dict_indexes.push(*dict_index);
- }
- }
- if !long_string_variables.is_empty() {
- warn(Error::InvalidLongStringValueLabels {
- offsets: record.offsets.clone(),
- variables: long_string_variables,
- });
- }
-
- let written_by_readstat = headers.header.eye_catcher.contains("ReadStat");
- for dict_index in dict_indexes {
- let variable = dictionary.variables.get_index_mut2(dict_index).unwrap();
- let mut duplicates = Vec::new();
- for ValueLabel {
- datum: value,
- label,
- } in record.labels.iter().cloned()
- {
- let datum = value.decode(variable.width);
- if variable.value_labels.insert(datum, label).is_some() {
- duplicates.push(value);
- }
- }
- if written_by_readstat {
- // Ignore any possible duplicates. ReadStat is buggy and emits
- // value labels whose values are longer than string variables'
- // widths, that are identical in the actual width of the
- // variable, e.g. both values "ABC123" and "ABC456" for a string
- // variable with width 3.
- } else if !duplicates.is_empty() {
- warn(Error::DuplicateValueLabels {
- variable: variable.name.clone(),
- values: duplicates
- .iter()
- .map(|value| {
- value
- .decode(variable.width)
- .display(variable.print_format, variable.encoding)
- .with_trimming()
- .with_quoted_string()
- .to_string()
- })
- .collect(),
- });
- }
- }
- }
-
- if let Some(display) = &headers.var_display {
- for (index, display) in display.0.iter().enumerate() {
- if let Some(variable) = dictionary.variables.get_index_mut2(index) {
- if let Some(width) = display.width {
- variable.display_width = width;
- }
- if let Some(alignment) = display.alignment {
- variable.alignment = alignment;
- }
- if let Some(measure) = display.measure {
- variable.measure = Some(measure);
- }
- } else {
- warn(dbg!(Error::TBD));
- }
- }
- }
-
- for record in headers
- .multiple_response
- .iter()
- .flat_map(|record| record.0.iter())
- {
- match MultipleResponseSet::decode(&dictionary, record, &mut warn) {
- Ok(mrset) => {
- dictionary.mrsets.insert(ByIdentifier::new(mrset));
- }
- Err(error) => warn(error),
- }
- }
-
- if !headers.very_long_strings.is_empty() {
- 'outer: for record in headers
- .very_long_strings
- .drain(..)
- .flat_map(|record| record.0.into_iter())
- {
- let Some(index) = dictionary.variables.get_index_of(&record.short_name.0) else {
- warn(dbg!(Error::TBD));
- continue;
- };
- let width = VarWidth::String(record.length);
- let n_segments = width.n_segments();
- if n_segments == 1 {
- warn(dbg!(Error::ShortVeryLongString {
- short_name: record.short_name.clone(),
- width: record.length
- }));
- continue;
- }
- if index + n_segments > dictionary.variables.len() {
- warn(dbg!(Error::VeryLongStringOverflow {
- short_name: record.short_name.clone(),
- width: record.length,
- index,
- n_segments,
- len: dictionary.variables.len()
- }));
- continue;
- }
- let mut short_names = Vec::with_capacity(n_segments);
- for i in 0..n_segments {
- let alloc_width = width.segment_alloc_width(i);
- let segment = &dictionary.variables[index + i];
- short_names.push(segment.short_names[0].clone());
- let segment_width = segment.width.as_string_width().unwrap_or(0);
- if segment_width.next_multiple_of(8) != alloc_width.next_multiple_of(8) {
- warn(Error::VeryLongStringInvalidSegmentWidth {
- short_name: record.short_name.clone(),
- width: record.length,
- index: i,
- actual: segment_width,
- expected: alloc_width,
- });
- continue 'outer;
- }
- }
- dictionary.delete_vars(index + 1..index + n_segments);
- let variable = dictionary.variables.get_index_mut2(index).unwrap();
- variable.short_names = short_names;
- variable.resize(width);
- }
- cases = cases
- .take()
- .map(|cases| cases.with_widths(dictionary.variables.iter().map(|var| var.width)));
- }
-
- if headers.long_names.is_empty() {
- // There are no long variable names. Use the short variable names,
- // converted to lowercase, as the long variable names.
- for index in 0..dictionary.variables.len() {
- let lower = dictionary.variables[index].name.0.as_ref().to_lowercase();
- if let Ok(new_name) = Identifier::from_encoding(lower, dictionary.encoding) {
- let _ = dictionary.try_rename_var(index, new_name);
- }
- }
- } else {
- // Rename each of the variables, one by one. (In a correctly
- // constructed system file, this cannot create any intermediate
- // duplicate variable names, because all of the new variable names are
- // longer than any of the old variable names and thus there cannot be
- // any overlaps.)
- for renaming in headers
- .long_names
- .iter()
- .flat_map(|record| record.0.iter().cloned())
- {
- let LongName {
- short_name,
- long_name,
- } = renaming;
- if let Some(index) = dictionary.variables.get_index_of(&short_name.0) {
- if let Err(long_name) = dictionary.try_rename_var(index, long_name) {
- warn(Error::DuplicateLongName(long_name));
- }
- dictionary
- .variables
- .get_index_mut2(index)
- .unwrap()
- .short_names = vec![short_name];
- } else {
- warn(dbg!(Error::TBD));
- }
- }
- }
-
- for mut attr_set in headers
- .variable_attributes
- .drain(..)
- .flat_map(|record| record.0.into_iter())
- {
- if let Some((_, variable)) = dictionary
- .variables
- .get_full_mut2(&attr_set.long_var_name.0)
- {
- variable.attributes.append(&mut attr_set.attributes);
- } else {
- warn(dbg!(Error::TBD));
- }
- }
-
- // Assign variable roles.
- for index in 0..dictionary.variables.len() {
- let variable = dictionary.variables.get_index_mut2(index).unwrap();
- match variable.attributes.role() {
- Ok(Some(role)) => variable.role = role,
- Ok(None) => (),
- Err(error) => warn(Error::InvalidRole(error)),
- }
- }
-
- // Long string value labels.
- for record in headers
- .long_string_value_labels
- .drain(..)
- .flat_map(|record| record.0.into_iter())
- {
- let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
- warn(Error::UnknownLongStringValueLabelVariable(
- record.var_name.clone(),
- ));
- continue;
- };
- let Some(width) = variable.width.as_string_width() else {
- warn(Error::LongStringValueLabelNumericVariable(
- record.var_name.clone(),
- ));
- continue;
- };
- for (mut value, label) in record.labels.into_iter() {
- // XXX warn about too-long value?
- value.0.resize(width, b' ');
- // XXX warn abouat duplicate value labels?
- variable.value_labels.insert(Datum::String(value), label);
- }
- }
-
- for mut record in headers
- .long_string_missing_values
- .drain(..)
- .flat_map(|record| record.0.into_iter())
- {
- let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
- warn(Error::LongStringMissingValueUnknownVariable {
- name: record.var_name.clone(),
- });
- continue;
- };
- if !variable.width.is_long_string() {
- warn(Error::LongStringMissingValueBadWdith {
- name: record.var_name.clone(),
- width: variable.width,
- });
- continue;
- }
- if record.missing_values.len() > 3 {
- warn(Error::LongStringMissingValueInvalidCount {
- name: record.var_name.clone(),
- count: record.missing_values.len(),
- });
- record.missing_values.truncate(3);
- }
- let values = record
- .missing_values
- .into_iter()
- .map(|v| {
- let mut value = RawString::from(v.0.as_slice());
- value.resize(variable.width.as_string_width().unwrap());
- Datum::String(value)
- })
- .collect::<Vec<_>>();
- match MissingValues::new(values, None) {
- Ok(missing_values) => variable.missing_values = missing_values,
- Err(MissingValuesError::TooWide) => warn(dbg!(Error::TBD)),
- Err(MissingValuesError::TooMany) | Err(MissingValuesError::MixedTypes) => {
- unreachable!()
- }
- }
- }
-
- for record in headers
- .variable_sets
- .drain(..)
- .flat_map(|record| record.sets.into_iter())
- {
- let mut variables = Vec::with_capacity(record.variable_names.len());
- for variable_name in record.variable_names {
- let Some((dict_index, _)) = dictionary.variables.get_full_mut2(&variable_name.0) else {
- warn(Error::UnknownVariableSetVariable {
- variable_set: record.name.clone(),
- variable: variable_name.clone(),
- });
- continue;
- };
- variables.push(dict_index);
- }
- let variable_set = VariableSet {
- name: record.name,
- variables,
- };
- dictionary.variable_sets.push(variable_set);
- }
-
- for record in headers.other_extension.drain(..) {
- warn(Error::UnknownExtensionRecord {
- offset: record.offsets.start,
- subtype: record.subtype,
- size: record.size,
- count: record.count,
- });
- }
-
- let metadata = Metadata::decode(&headers, warn);
- if let Some(n_cases) = metadata.n_cases {
- cases = cases.take().map(|cases| cases.with_expected_cases(n_cases))
- }
- Ok((dictionary, metadata, cases))
-}
-
impl MultipleResponseSet {
fn decode(
dictionary: &Dictionary,