width: usize,
saved: Option<(usize, BreakOpportunity)>,
breaks: B,
+ trailing_newlines: usize,
}
impl<'a, B> Iterator for LineBreaks<'a, B>
continue;
}
+ let segment = &self.text[self.indexes.end..index];
let segment_width = self.text[self.indexes.end..index].width();
if self.width == 0 || self.width + segment_width <= self.max_width {
// Add this segment to the current line.
return Some(segment);
}
}
- None
+ if self.trailing_newlines > 1 {
+ self.trailing_newlines -= 1;
+ Some("")
+ } else {
+ None
+ }
}
}
text: &str,
width: usize,
) -> LineBreaks<'_, impl Iterator<Item = (usize, BreakOpportunity)> + Clone + '_> {
+ // Trim trailing new-lines from the text, because the linebreaking algorithm
+ // treats them as if they have width. That is, if you break `"a b c\na b
+ // c\n"` with a 5-character width, then you end up with:
+ //
+ // ```text
+ // a b c
+ // a b
+ // c
+ // ```
+ //
+ // So, we trim trailing new-lines and then add in extra blank lines at the
+ // end if necessary.
+ //
+ // (The linebreaking algorithm treats new-lines in the middle of the text in
+ // a normal way, though.)
+ let trimmed = text.trim_end_matches('\n');
LineBreaks {
- text,
+ text: trimmed,
max_width: width,
indexes: 0..0,
width: 0,
saved: None,
- breaks: linebreaks(text),
+ breaks: linebreaks(trimmed),
+ trailing_newlines: text.len() - trimmed.len(),
}
}
}
#[test]
fn line_breaks() {
+ test_line_breaks(
+ "One line of text\nOne line of text\n",
+ 16,
+ vec!["One line of text", "One line of text"],
+ );
+ test_line_breaks("a b c\na b c\na b c\n", 5, vec!["a b c", "a b c", "a b c"]);
for width in 0..=6 {
test_line_breaks("abc def ghi", width, vec!["abc", "def", "ghi"]);
}
-use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc};
+use std::{
+ cell::RefCell,
+ collections::{BTreeMap, HashMap},
+ ops::Range,
+ rc::Rc,
+};
use crate::{
calendar::date_time_to_pspp,
InvalidWeightVar { name: Identifier, index: u32 },
#[error(
- "File weight variable index {index} is greater than maximum variable index {max_index}."
+ "File weight variable index {index} is invalid because it exceeds maximum variable index {max_index}."
+ )]
+ WeightIndexOutOfRange { index: u32, max_index: usize },
+
+ #[error(
+ "File weight variable index {index} is invalid because it refers to long string continuation for variable {name}."
)]
- InvalidWeightIndex { index: u32, max_index: usize },
+ WeightIndexStringContinuation { index: u32, name: Identifier },
#[error("{0}")]
InvalidRole(InvalidRole),
n_generated_names: 0,
};
- let mut var_index_map = HashMap::new();
+ let mut var_index_map = BTreeMap::new();
let mut value_index = 0;
for (index, input) in headers
.variable
}
if let Some(weight_index) = headers.header.weight_index {
- if let Some(dict_index) = var_index_map.get(&(weight_index as usize - 1)) {
+ let index = weight_index as usize - 1;
+ if index >= value_index {
+ warn(Error::WeightIndexOutOfRange {
+ index: weight_index,
+ max_index: var_index_map.len(),
+ });
+ } else {
+ let (var_index, dict_index) = var_index_map.range(..=&index).last().unwrap();
let variable = &dictionary.variables[*dict_index];
- if variable.is_numeric() {
- dictionary.weight = Some(*dict_index);
+ if *var_index == index {
+ if variable.is_numeric() {
+ dictionary.weight = Some(*dict_index);
+ } else {
+ warn(Error::InvalidWeightVar {
+ index: weight_index,
+ name: variable.name.clone(),
+ });
+ }
} else {
- warn(Error::InvalidWeightVar {
+ warn(Error::WeightIndexStringContinuation {
index: weight_index,
name: variable.name.clone(),
});
}
- } else {
- warn(Error::InvalidWeightIndex {
- index: weight_index,
- max_index: var_index_map.len(),
- });
}
}
test_sysfile("invalid_long_string_missing_values");
}
+#[test]
+fn weight_must_be_numeric() {
+ test_sysfile("weight_must_be_numeric");
+}
+
+#[test]
+fn weight_variable_bad_index() {
+ test_sysfile("weight_variable_bad_index");
+}
+
+#[test]
+fn weight_variable_continuation() {
+ test_sysfile("weight_variable_continuation");
+}
+
+#[test]
+fn multiple_documents_records() {
+ test_sysfile("multiple_documents_records");
+}
+
/// Duplicate variable name handling negative test.
///
/// SPSS-generated system file can contain duplicate variable names (see bug
--- /dev/null
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+├──────────────────────┼────────────────────────┤
+│ Compression │SAV │
+│ Number of Cases│Unknown │
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬─────────────────────╮
+│Variables│ 2│
+│Documents│One line of documents│
+│ │One line of documents│
+╰─────────┴─────────────────────╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│num2│ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 2; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Numeric variables, no label or missing values.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+2; 0; 0; 0; 0x050800 *2; s8 "NUM2";
+
+# Two document records.
+(6; 1; s80 "One line of documents") >>* 2<<;
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# Dictionary termination record.
+999; 0;
+
+# Data.
+1.0;
--- /dev/null
+File designates string variable STR1 (index 2) as weight variable, but weight variables must be numeric.
+
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+├──────────────────────┼────────────────────────┤
+│ Compression │SAV │
+│ Number of Cases│Unknown │
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬─╮
+│Variables│2│
+╰─────────┴─╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│str1│ 2│ │Nominal │Input│ 4│Left │A4 │A4 │ │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 2; 1; >>2<<; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Numeric variable.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# String variable.
+2; 4; 0; 0; 0x010400 *2; s8 "STR1";
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# End of dictionary.
+999; 0;
--- /dev/null
+File weight variable index 3 is invalid because it exceeds maximum variable index 2.
+
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+├──────────────────────┼────────────────────────┤
+│ Compression │SAV │
+│ Number of Cases│Unknown │
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬─╮
+│Variables│2│
+╰─────────┴─╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│str1│ 2│ │Nominal │Input│ 4│Left │A4 │A4 │ │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 2; 1; >>3<<; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Numeric variable.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# String variable.
+2; 4; 0; 0; 0x010400 *2; s8 "STR1";
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# End of dictionary.
+999; 0;
--- /dev/null
+File weight variable index 2 is invalid because it refers to long string continuation for variable STR1.
+
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+├──────────────────────┼────────────────────────┤
+│ Compression │SAV │
+│ Number of Cases│Unknown │
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬─╮
+│Variables│2│
+╰─────────┴─╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│str1│ 1│ │Nominal │Input│ 9│Left │A9 │A9 │ │
+│num1│ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 3; 1; >>2<<; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Long string variable.
+2; 9; 0; 0; 0x010900 *2; s8 "STR1";
+(2; -1; 0; 0; 0; 0; s8 "");
+
+# Numeric variable.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# End of dictionary.
+999; 0;