* `char string_lengths[];`
- a list of key-value tuples, where key is the name of a variable, and
- value is its length. the key field is at most 8 bytes long and must
+ A list of key-value tuples, where key is the name of a variable, and
+ value is its length. The key field is at most 8 bytes long and must
match the name of a variable which appears in the [variable
- record](#variable-record). the value field is exactly 5 bytes long.
- it is a zero-padded, ASCII-encoded string that is the length of the
- variable. the key and value fields are separated by a `=` byte.
- tuples are delimited by a two-byte sequence {00, 09}. After the
+ record](#variable-record). The value field is exactly 5 bytes long.
+ It is a zero-padded, ASCII-encoded string that is the length of the
+ variable. The key and value fields are separated by a `=` byte.
+ Tuples are delimited by a two-byte sequence {00, 09}. After the
last tuple, there may be a single byte 00, or {00, 09}. The total
length is `count` bytes.
Datum::String(s) => Some(s),
}
}
+
+ pub fn is_resizable(&self, width: VarWidth) -> bool {
+ match (self, width) {
+ (Datum::Number(_), VarWidth::Numeric) => true,
+ (Datum::String(s), VarWidth::String(new_width)) => {
+ let new_len = new_width as usize;
+ new_len >= s.len() || s.0[new_len..].iter().all(|c| *c == b' ')
+ }
+ _ => false,
+ }
+ }
+
+ pub fn resize(&mut self, width: VarWidth) {
+ match (self, width) {
+ (Datum::Number(_), VarWidth::Numeric) => (),
+ (Datum::String(s), VarWidth::String(new_width)) => s.resize(new_width as usize),
+ _ => unreachable!(),
+ }
+ }
+
+ pub fn width(&self) -> VarWidth {
+ match self {
+ Datum::Number(_) => VarWidth::Numeric,
+ Datum::String(s) => VarWidth::String(s.len().try_into().unwrap()),
+ }
+ }
}
impl From<f64> for Datum {
.filter(|var| !var.value_labels.is_empty())
{
let mut group = Group::new(&**variable);
- let mut sorted_value_labels = variable.value_labels.iter().collect::<Vec<_>>();
+ let mut sorted_value_labels = variable.value_labels.0.iter().collect::<Vec<_>>();
sorted_value_labels.sort();
for (datum, label) in sorted_value_labels {
let mut value = Value::new_variable_value(variable, datum)
/// Value labels, to associate a number (or a string) with a more meaningful
/// description, e.g. 1 -> Apple, 2 -> Banana, ...
- pub value_labels: HashMap<Datum, String>,
+ pub value_labels: ValueLabels,
/// Variable label, an optional meaningful description for the variable
/// itself.
missing_values: MissingValues::default(),
print_format: Format::default_for_width(width),
write_format: Format::default_for_width(width),
- value_labels: HashMap::new(),
+ value_labels: ValueLabels::new(),
label: None,
measure: Measure::default_for_type(var_type),
role: Role::default(),
pub fn label(&self) -> Option<&String> {
self.label.as_ref()
}
+
+ pub fn resize(&mut self, width: VarWidth) {
+ if self.missing_values.is_resizable(width) {
+ self.missing_values.resize(width);
+ } else {
+ self.missing_values = MissingValues::default();
+ }
+
+ if self.value_labels.is_resizable(width) {
+ self.value_labels.resize(width);
+ } else {
+ self.value_labels = ValueLabels::default();
+ }
+
+ self.print_format.resize(width);
+ self.write_format.resize(width);
+
+ self.width = width;
+ }
}
impl HasIdentifier for Variable {
}
}
+#[derive(Clone, Debug, Default)]
+pub struct ValueLabels(pub HashMap<Datum, String>);
+
+impl ValueLabels {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ pub fn get(&self, datum: &Datum) -> Option<&str> {
+ self.0.get(datum).map(|s| s.as_str())
+ }
+
+ pub fn insert(&mut self, datum: Datum, label: String) -> Option<String> {
+ self.0.insert(datum, label)
+ }
+
+ pub fn is_resizable(&self, width: VarWidth) -> bool {
+ self.0.keys().all(|datum| datum.is_resizable(width))
+ }
+
+ pub fn resize(&mut self, width: VarWidth) {
+ self.0 = self
+ .0
+ .drain()
+ .map(|(mut datum, string)| {
+ datum.resize(width);
+ (datum, string)
+ })
+ .collect();
+ }
+}
+
#[cfg(test)]
mod test {
use std::collections::HashSet;
VarWidth::String(width) => Datum::String(RawString::spaces(width as usize)),
}
}
+
+ pub fn resize(&mut self, width: VarWidth) {
+ match (self.var_width(), width) {
+ (VarWidth::Numeric, VarWidth::Numeric) => {}
+ (VarWidth::String(_), VarWidth::String(new_width)) => {
+ self.w = if self.type_ == Type::AHex {
+ new_width * 2
+ } else {
+ new_width
+ };
+ }
+ _ => *self = Self::default_for_width(width),
+ }
+ }
}
impl Debug for Format {
}
pub fn new_variable_value(variable: &Variable, value: &Datum) -> Self {
let var_name = Some(variable.name.as_str().into());
- let value_label = variable.value_labels.get(value).cloned();
+ let value_label = variable.value_labels.get(value).map(String::from);
match value {
Datum::Number(number) => Self::new(ValueInner::Number(NumberValue {
show: None,
dictionary.delete_vars(index + 1..index + n_segments);
let variable = dictionary.variables.get_index_mut2(index).unwrap();
variable.short_names = short_names;
- variable.width = width;
+ variable.resize(width);
}
if headers.long_names.is_empty() {
}
}
+ pub fn is_resizable(&self, width: VarWidth) -> bool {
+ self.values.iter().all(|datum| datum.is_resizable(width))
+ && self.range.iter().all(|range| range.is_resizable(width))
+ }
+
+ pub fn resize(&mut self, width: VarWidth) {
+ for datum in &mut self.values {
+ datum.resize(width);
+ }
+ if let Some(range) = &mut self.range {
+ range.resize(width);
+ }
+ }
+
fn read<R: Read + Seek>(
r: &mut R,
offset: u64,
MissingValueRange::To { high } => number <= *high,
}
}
+
+ pub fn is_resizable(&self, width: VarWidth) -> bool {
+ width.is_numeric()
+ }
+
+ pub fn resize(&self, width: VarWidth) {
+ assert_eq!(width, VarWidth::Numeric);
+ }
}
impl Display for MissingValueRange {
pub fn resize(&mut self, len: usize) {
self.0.resize(len, b' ');
}
+ pub fn len(&self) -> usize {
+ self.0.len()
+ }
}
impl Borrow<RawStr> for RawString {
let mut very_long_strings = Vec::new();
for tuple in input
.split('\0')
- .map(|s| s.trim_end_matches('\t'))
+ .map(|s| s.trim_start_matches('\t'))
.filter(|s| !s.is_empty())
{
if let Some(vls) = VeryLongString::parse(decoder, tuple).issue_warning(&decoder.warn) {
test_sysfile("variable_display_with_width");
}
+#[test]
+fn long_variable_names() {
+ test_sysfile("long_variable_names");
+}
+
+#[test]
+fn very_long_strings() {
+ test_sysfile("very_long_strings");
+}
+
fn test_sysfile(name: &str) {
let input_filename = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("src/sys/testdata")
--- /dev/null
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+│ Version │1.2.3 │
+├──────────────────────┼────────────────────────┤
+│ Compression │None │
+│ Number of Cases│ 0│
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬────────────────────────╮
+│Label │PSPP synthetic test file│
+│Variables│ 7│
+╰─────────┴────────────────────────╯
+
+╭─────────────────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├─────────────────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│LongVariableName1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│LongVariableName2│ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│LongVariableName3│ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│LongVariableName4│ 4│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│Coördinate_X │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│Coördinate_Y │ 6│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│Coördinate_Z │ 7│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+╰─────────────────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; # Layout code
+7; # Nominal case size
+0; # Not compressed
+0; # Not weighted
+0; # No cases.
+100.0; # Bias.
+"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file";
+i8 0 *3;
+
+# Numeric variables.
+2; 0; 0; 0; 0x050800 *2; s8 "LONGVARI";
+2; 0; 0; 0; 0x050800 *2; s8 "LONGVA_A";
+2; 0; 0; 0; 0x050800 *2; s8 "LONGVA_B";
+2; 0; 0; 0; 0x050800 *2; s8 "LONGVA_C";
+2; 0; 0; 0; 0x050800 *2; "CO"; i8 214; "RDINA";
+2; 0; 0; 0; 0x050800 *2; "CO"; i8 214; "RDI_A";
+2; 0; 0; 0; 0x050800 *2; "CO"; i8 214; "RDI_B";
+
+# Machine integer info record.
+7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252;
+
+# Machine floating-point info record.
+7; 4; 8; 3; SYSMIS; HIGHEST; LOWEST;
+
+# Long variable names.
+7; 13; 1; COUNT (
+"LONGVARI=LongVariableName1"; i8 9;
+"LONGVA_A=LongVariableName2"; i8 9;
+"LONGVA_B=LongVariableName3"; i8 9;
+"LONGVA_C=LongVariableName4"; i8 9;
+"CO"; i8 214; "RDINA=Co"; i8 246; "rdinate_X"; i8 9;
+"CO"; i8 214; "RDI_A=Co"; i8 246; "rdinate_Y"; i8 9;
+"CO"; i8 214; "RDI_B=Co"; i8 246; "rdinate_Z";
+);
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# Dictionary termination record.
+999; 0;
--- /dev/null
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+│ Version │1.2.3 │
+├──────────────────────┼────────────────────────┤
+│ Compression │None │
+│ Number of Cases│ 1│
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬────────────────────────╮
+│Label │PSPP synthetic test file│
+│Variables│ 2│
+╰─────────┴────────────────────────╯
+
+╭──────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├──────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│séq256│ 1│ │Nominal │Input│ 32│Left │A256 │A256 │ │
+│str600│ 2│ │Nominal │Input│ 32│Left │A600 │A600 │ │
+╰──────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; # Layout code
+109; # Nominal case size
+0; # Not compressed
+0; # Not weighted
+1; # No cases.
+100.0; # Bias.
+"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file";
+i8 0 *3;
+
+# 256-byte string.
+2; 255; 0; 0; 0x01FF00 *2; "S"; i8 201; s6 "Q256";
+(2; -1; 0; 0; 0; 0; s8 "") * 31;
+2; 4; 0; 0; 0x010400 *2; "S"; i8 201; "Q256_1";
+
+# 600-byte string.
+2; 255; 0; 0; 0x01FF00 *2; s8 "STR600";
+(2; -1; 0; 0; 0; 0; s8 "") * 31;
+2; 255; 0; 0; 0x01FF00 *2; s8 "STR600_1";
+(2; -1; 0; 0; 0; 0; s8 "") * 31;
+2; 96; 0; 0; 0x016000 *2; s8 "STR600_2";
+(2; -1; 0; 0; 0; 0; s8 "") * 11;
+
+# Machine integer info record.
+7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252;
+
+# Very long string record.
+7; 14; 1; COUNT (
+"S"; i8 201; "Q256=00256"; i8 0; i8 9;
+"STR600=00600"; i8 0; i8 9;
+);
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# Dictionary termination record.
+999; 0;
+
+# Data.
+"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#" * 4;
+"abcdefgh";
+"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#" * 9;
+"abcdefghijklmnopqrstuvwxyzABCDEF";