-//! Converting big- and little-endian `[u8]` arrays to and from primitive types.
-
// PSPP - a program for statistical analysis.
// Copyright (C) 2025 Free Software Foundation, Inc.
//
// You should have received a copy of the GNU General Public License along with
// this program. If not, see <http://www.gnu.org/licenses/>.
+//! Converting big- and little-endian `[u8]` arrays to and from primitive types.
+
use binrw::Endian;
/// Converts a primitive type into a big- or little-endian `[u8]` array.
// You should have received a copy of the GNU General Public License along with
// this program. If not, see <http://www.gnu.org/licenses/>.
-//! PSPP lexical analysis.
+//! Lexical analysis for PSPP syntax.
//!
//! PSPP divides traditional "lexical analysis" or "tokenization" into three
//! phases:
use binrw::Endian;
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::UTF_8;
+use hexplay::HexView;
use crate::{
crypto::EncryptedFile,
.add_values([Datum::new_utf8("0")])
.unwrap();
dictionary.add_var(s1).unwrap();
- /*
+
+ let mut s2 = Variable::new(Identifier::new("s2").unwrap(), VarWidth::String(9), UTF_8);
+ s2.value_labels.insert(
+ Datum::String(ByteString::from("0 ")),
+ String::from("Fourth value label"),
+ );
+ s2.value_labels.insert(
+ Datum::String(ByteString::from("01234567")),
+ String::from("Fifth value label"),
+ );
+ s2.value_labels.insert(
+ Datum::String(ByteString::from("012345678")),
+ String::from("Sixth value label"),
+ );
+ s2.missing_values_mut()
+ .add_values([Datum::String("12").cloned(), Datum::String("123").cloned()])
+ .unwrap();
+ dictionary.add_var(s2).unwrap();
+
+ let mut s3 = Variable::new(Identifier::new("s3").unwrap(), VarWidth::String(9), UTF_8);
+ s3.missing_values_mut()
+ .add_values([
+ Datum::String("1234").cloned(),
+ Datum::String("12345").cloned(),
+ Datum::String("12345678").cloned(),
+ ])
+ .unwrap();
+ dictionary.add_var(s3).unwrap();
+
+ dictionary
+ .add_var(Variable::new(
+ Identifier::new("s4").unwrap(),
+ VarWidth::String(9),
+ UTF_8,
+ ))
+ .unwrap();
let mut cases = WriteOptions::reproducible(compression)
.write_writer(&dictionary, Cursor::new(Vec::new()))
.unwrap();
for case in [
- [1, 1, 1, 2],
- [1, 1, 2, 30],
- [1, 2, 1, 8],
- [1, 2, 2, 20],
- [2, 1, 1, 2],
- [2, 1, 2, 22],
- [2, 2, 1, 1],
- [2, 2, 2, 3],
+ ["1", "1", "1", "2"],
+ ["1", "1", "2", "30"],
+ ["1", "2", "1", "8"],
+ ["1", "2", "2", "20"],
+ ["2", "1", "1", "2"],
+ ["2", "1", "2", "22"],
+ ["2", "2", "1", "1"],
+ ["2", "2", "2", "3"],
] {
cases
- .write_case(
- case.into_iter()
- .map(|number| Datum::<&str>::Number(Some(number as f64))),
- )
+ .write_case(case.into_iter().map(|s| Datum::String(s)))
.unwrap();
}
let sysfile = cases.finish().unwrap().unwrap().into_inner();
let expected_filename = PathBuf::from(&format!(
- "src/sys/testdata/write-numeric-{compression_string}.expected"
+ "src/sys/testdata/write-string-{compression_string}.expected"
));
let expected = String::from_utf8(std::fs::read(&expected_filename).unwrap()).unwrap();
- test_sysfile(Cursor::new(sysfile), &expected, &expected_filename);*/
+ test_sysfile(Cursor::new(sysfile), &expected, &expected_filename);
}
}
--- /dev/null
+╭──────────────────────┬────────────────────╮
+│ Created │30-JUL-2025 15:07:55│
+├──────────────────────┼────────────────────┤
+│Writer Product │PSPP TEST DATA FILE │
+│ Version │1.2.3 │
+├──────────────────────┼────────────────────┤
+│ Compression │SAV │
+│ Number of Cases│ 8│
+╰──────────────────────┴────────────────────╯
+
+╭─────────┬─╮
+│Variables│4│
+╰─────────┴─╯
+
+╭──┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬───────────────────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│ Missing Values │
+├──┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼───────────────────────────┤
+│s1│ 1│ │Nominal │Input│ 9│Left │A9 │A9 │"0" │
+│s2│ 2│ │Nominal │Input│ 9│Left │A9 │A9 │"12"; "123" │
+│s3│ 3│ │Nominal │Input│ 9│Left │A9 │A9 │"1234"; "12345"; "12345678"│
+│s4│ 4│ │Nominal │Input│ 9│Left │A9 │A9 │ │
+╰──┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴───────────────────────────╯
+
+╭────────────────────────┬──────────────────╮
+│Variable Value │ │
+├────────────────────────┼──────────────────┤
+│s1 abc │First value label │
+│ abcdefgh │Second value label│
+│ abcdefghi│Third value label │
+├────────────────────────┼──────────────────┤
+│s2 0 │Fourth value label│
+│ 01234567 │Fifth value label │
+│ 012345678│Sixth value label │
+╰────────────────────────┴──────────────────╯
+
+╭────┬─────────┬─────────┬─────────┬─────────╮
+│Case│ s1 │ s2 │ s3 │ s4 │
+├────┼─────────┼─────────┼─────────┼─────────┤
+│1 │1 │1 │1 │2 │
+│2 │1 │1 │2 │30 │
+│3 │1 │2 │1 │8 │
+│4 │1 │2 │2 │20 │
+│5 │2 │1 │1 │2 │
+│6 │2 │1 │2 │22 │
+│7 │2 │2 │1 │1 │
+│8 │2 │2 │2 │3 │
+╰────┴─────────┴─────────┴─────────┴─────────╯
--- /dev/null
+╭──────────────────────┬────────────────────╮
+│ Created │30-JUL-2025 15:07:55│
+├──────────────────────┼────────────────────┤
+│Writer Product │PSPP TEST DATA FILE │
+│ Version │1.2.3 │
+├──────────────────────┼────────────────────┤
+│ Compression │None │
+│ Number of Cases│ 8│
+╰──────────────────────┴────────────────────╯
+
+╭─────────┬─╮
+│Variables│4│
+╰─────────┴─╯
+
+╭──┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬───────────────────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│ Missing Values │
+├──┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼───────────────────────────┤
+│s1│ 1│ │Nominal │Input│ 9│Left │A9 │A9 │"0" │
+│s2│ 2│ │Nominal │Input│ 9│Left │A9 │A9 │"12"; "123" │
+│s3│ 3│ │Nominal │Input│ 9│Left │A9 │A9 │"1234"; "12345"; "12345678"│
+│s4│ 4│ │Nominal │Input│ 9│Left │A9 │A9 │ │
+╰──┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴───────────────────────────╯
+
+╭────────────────────────┬──────────────────╮
+│Variable Value │ │
+├────────────────────────┼──────────────────┤
+│s1 abc │First value label │
+│ abcdefgh │Second value label│
+│ abcdefghi│Third value label │
+├────────────────────────┼──────────────────┤
+│s2 0 │Fourth value label│
+│ 01234567 │Fifth value label │
+│ 012345678│Sixth value label │
+╰────────────────────────┴──────────────────╯
+
+╭────┬─────────┬─────────┬─────────┬─────────╮
+│Case│ s1 │ s2 │ s3 │ s4 │
+├────┼─────────┼─────────┼─────────┼─────────┤
+│1 │1 │1 │1 │2 │
+│2 │1 │1 │2 │30 │
+│3 │1 │2 │1 │8 │
+│4 │1 │2 │2 │20 │
+│5 │2 │1 │1 │2 │
+│6 │2 │1 │2 │22 │
+│7 │2 │2 │1 │1 │
+│8 │2 │2 │2 │3 │
+╰────┴─────────┴─────────┴─────────┴─────────╯
--- /dev/null
+╭──────────────────────┬────────────────────╮
+│ Created │30-JUL-2025 15:07:55│
+├──────────────────────┼────────────────────┤
+│Writer Product │PSPP TEST DATA FILE │
+│ Version │1.2.3 │
+├──────────────────────┼────────────────────┤
+│ Compression │ZSAV │
+│ Number of Cases│ 8│
+╰──────────────────────┴────────────────────╯
+
+╭─────────┬─╮
+│Variables│4│
+╰─────────┴─╯
+
+╭──┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬───────────────────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│ Missing Values │
+├──┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼───────────────────────────┤
+│s1│ 1│ │Nominal │Input│ 9│Left │A9 │A9 │"0" │
+│s2│ 2│ │Nominal │Input│ 9│Left │A9 │A9 │"12"; "123" │
+│s3│ 3│ │Nominal │Input│ 9│Left │A9 │A9 │"1234"; "12345"; "12345678"│
+│s4│ 4│ │Nominal │Input│ 9│Left │A9 │A9 │ │
+╰──┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴───────────────────────────╯
+
+╭────────────────────────┬──────────────────╮
+│Variable Value │ │
+├────────────────────────┼──────────────────┤
+│s1 abc │First value label │
+│ abcdefgh │Second value label│
+│ abcdefghi│Third value label │
+├────────────────────────┼──────────────────┤
+│s2 0 │Fourth value label│
+│ 01234567 │Fifth value label │
+│ 012345678│Sixth value label │
+╰────────────────────────┴──────────────────╯
+
+╭────┬─────────┬─────────┬─────────┬─────────╮
+│Case│ s1 │ s2 │ s3 │ s4 │
+├────┼─────────┼─────────┼─────────┼─────────┤
+│1 │1 │1 │1 │2 │
+│2 │1 │1 │2 │30 │
+│3 │1 │2 │1 │8 │
+│4 │1 │2 │2 │20 │
+│5 │2 │1 │1 │2 │
+│6 │2 │1 │2 │22 │
+│7 │2 │2 │1 │1 │
+│8 │2 │2 │2 │3 │
+╰────┴─────────┴─────────┴─────────┴─────────╯
dictionary: &'a Dictionary,
}
-fn count_segments(case_vars: &[CaseVar]) -> u32 {
- case_vars.iter().map(CaseVar::n_segments).sum::<usize>() as u32
-}
-
fn put_attributes(attributes: &Attributes, s: &mut String) {
for (name, values) in attributes.iter(true) {
write!(s, "{name}(").unwrap();
bytes.try_into().unwrap()
}
+ fn count_variable_positions(case_vars: &[CaseVar]) -> u32 {
+ case_vars
+ .iter()
+ .map(CaseVar::n_variable_positions)
+ .sum::<usize>() as u32
+ }
+
let header = RawHeader {
magic: if self.options.compression == Some(Compression::ZLib) {
Magic::Zsav
self.dictionary.encoding(),
),
layout_code: 2,
- nominal_case_size: count_segments(&self.case_vars),
+ nominal_case_size: count_variable_positions(&self.case_vars),
compression_code: match self.options.compression {
Some(Compression::Simple) => 1,
Some(Compression::ZLib) => 2,
None => 0,
},
weight_index: if let Some(weight_index) = self.dictionary.weight_index() {
- count_segments(&self.case_vars[..weight_index]) + 1
+ count_variable_positions(&self.case_vars[..weight_index]) + 1
} else {
0
},
let record = RawVariableRecord {
width: seg0_width.as_string_width().unwrap_or(0) as i32,
has_variable_label: variable.label.is_some() as u32,
- missing_value_code: if variable.width.is_long_string() {
+ missing_value_code: if !variable.width.is_long_string() {
let n = variable.missing_values().values().len() as i32;
match variable.missing_values().range() {
Some(_) => -(n + 2),
}
fn write_variable_display_parameters(&mut self) -> Result<(), BinError> {
- (7u32, 11u32, 4u32, count_segments(&self.case_vars) * 3).write_le(self.writer)?;
+ (
+ 7u32,
+ 11u32,
+ 4u32,
+ self.case_vars
+ .iter()
+ .map(CaseVar::n_segments)
+ .sum::<usize>() as u32
+ * 3,
+ )
+ .write_le(self.writer)?;
for variable in &self.dictionary.variables {
let measure = match variable.measure {
None => 0,
(
name.len() as u32,
&name[..],
- variable.missing_values().values().len() as u32,
+ variable.missing_values().values().len() as u8,
8u32,
)
.write_le(&mut cursor)?;
CaseVar::String(encoding) => encoding.len(),
}
}
+ fn n_variable_positions(&self) -> usize {
+ match self {
+ CaseVar::Numeric => 1,
+ CaseVar::String(encoding) => encoding
+ .iter()
+ .map(|segment| (segment.data_bytes + segment.padding_bytes) / 8)
+ .sum(),
+ }
+ }
}
/// System file writer.
CaseVar::String(encoding) => {
let mut s = datum.as_string().unwrap().raw_string_bytes();
for segment in encoding {
+ let spaces = segment.data_bytes.saturating_sub(s.len());
+ let data_bytes = segment.data_bytes - spaces;
+
let data;
- (data, s) = s.split_at(segment.data_bytes);
- (data, Pad::new(segment.padding_bytes, 0)).write_le(&mut self.inner)?;
+ (data, s) = s.split_at(data_bytes);
+ (
+ data,
+ Pad::new(spaces, b' '),
+ Pad::new(segment.padding_bytes, 0),
+ )
+ .write_le(&mut self.inner)?;
}
}
}
CaseVar::String(encoding) => {
let mut s = datum.as_string().unwrap().raw_string_bytes();
for segment in encoding {
+ let excess = segment.data_bytes.saturating_sub(s.len());
+ let data_bytes = segment.data_bytes - excess;
+ let padding_bytes = segment.padding_bytes + excess;
+
let data;
- (data, s) = s.split_at(segment.data_bytes);
+ (data, s) = s.split_at(data_bytes);
let (chunks, remainder) = data.as_chunks::<8>();
for chunk in chunks {
} else {
self.put_opcode(253)?;
self.data.extend_from_slice(remainder);
- self.data.extend(repeat_n(0, 8 - remainder.len()));
+ self.data.extend(repeat_n(b' ', 8 - remainder.len()));
}
}
- for _ in 0..segment.padding_bytes / 8 {
+ for _ in 0..padding_bytes / 8 {
self.put_opcode(254)?;
}
}