pub values: Vec<Value<S>>,
/// Optional range of missing values.
- pub range: Option<(Value<S>, Value<S>)>,
+ pub range: Option<MissingValueRange<S>>,
+}
+
+#[derive(Clone)]
+pub enum MissingValueRange<S = Box<[u8]>>
+where
+ S: Debug,
+{
+ In { low: Value<S>, high: Value<S> },
+ From { low: Value<S> },
+ To { high: Value<S> },
}
impl<S> Debug for MissingValues<S>
write!(f, "{value:?}")?;
}
- if let Some((low, high)) = &self.range {
+ if let Some(range) = &self.range {
if !self.values.is_empty() {
write!(f, ", ")?;
}
- write!(f, "{low:?} THRU {high:?}")?;
+ match range {
+ MissingValueRange::In { low, high } => write!(f, "{low:?} THRU {high:?}")?,
+ MissingValueRange::From { low } => write!(f, "{low:?} THRU HI")?,
+ MissingValueRange::To { high } => write!(f, "LOW THRU {high:?}")?,
+ }
}
if self.is_empty() {
};
let mut values = Vec::with_capacity(individual_values);
- for _ in 0..individual_values {
- values.push(read_bytes::<8, _>(r)?);
- }
let range = if has_range {
let low = read_bytes::<8, _>(r)?;
let high = read_bytes::<8, _>(r)?;
} else {
None
};
+ for _ in 0..individual_values {
+ values.push(read_bytes::<8, _>(r)?);
+ }
match VarWidth::try_from(width) {
Ok(VarWidth::Numeric) => {
.into_iter()
.map(|v| Value::Number(endian.parse(v)))
.collect();
- let range = range.map(|(low, high)| {
- (
- Value::Number(endian.parse(low)),
- Value::Number(endian.parse(high)),
- )
- });
+
+ const LOWEST: f64 = f64::MIN.next_up();
+ let range =
+ range.map(
+ |(low, high)| match (endian.parse(low), endian.parse(high)) {
+ (f64::MIN | LOWEST, high) => MissingValueRange::To {
+ high: Value::Number(Some(high)),
+ },
+ (low, f64::MAX) => MissingValueRange::From {
+ low: Value::Number(Some(low)),
+ },
+ (low, high) => MissingValueRange::In {
+ low: Value::Number(Some(low)),
+ high: Value::Number(Some(high)),
+ },
+ },
+ );
return Ok(Self { values, range });
}
Ok(VarWidth::String(width)) if width <= 8 && range.is_none() => {
let mut symbol_table = HashMap::new();
let output = _sack(input, input_file_name, endian, &mut symbol_table)?;
let output = if !symbol_table.is_empty() {
- for (k, v) in symbol_table.iter() {
- println!("{k} => {v:?}");
- }
for (k, v) in symbol_table.iter() {
if v.is_none() {
Err(Error::new(
lexer.get()?;
}
Token::Label(name) => {
- println!("define {name}");
let value = output.len() as u32;
match symbol_table.entry(name.clone()) {
Entry::Vacant(v) => {
T: Bounded + Display + TryFrom<i64> + Copy,
Endian: ToBytes<T, N>,
{
- println!("put_integers {:?}", lexer.token);
let mut n = 0;
while let Some(integer) = lexer.take_if(|t| match t {
Token::Integer(integer) => Some(*integer),
_ => None,
})? {
- println!("got integer {integer}");
let Ok(integer) = integer.try_into() else {
Err(lexer.error(format!(
"{integer} is not in the valid range [{},{}]",
output.extend_from_slice(&lexer.endian.to_bytes(integer));
n += 1;
}
- println!("put_integers {:?} {n}", lexer.token);
if n == 0 {
Err(lexer.error(format!("integer expected after '{name}'")))?
}
};
self.input = rest;
let repr = &start[..start.len() - rest.len()];
- println!("{token:?} {repr}");
Ok(Some((token, repr)))
}
}
--- /dev/null
+use std::io::Cursor;
+
+use crate::{
+ endian::Endian,
+ sys::{
+ cooked::{decode, Headers},
+ raw::{encoding_from_headers, Decoder, Reader, Record},
+ sack::sack,
+ },
+};
+
+#[test]
+fn variable_labels_and_missing_values() {
+ let input = r#"
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; # Layout code
+28; # Nominal case size
+0; # Not compressed
+0; # Not weighted
+1; # 1 case.
+100.0; # Bias.
+"01 Jan 11"; "20:53:52";
+"PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 "";
+i8 0 *3;
+
+# Numeric variable, no label or missing values.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# Numeric variable, variable label.
+2; 0; 1; 0; 0x050800 *2; s8 "NUM2";
+32; "Numeric variable 2's label ("; i8 249; i8 250; i8 251; ")";
+
+# Numeric variable, one missing value.
+2; 0; 0; 1; 0x050800 *2; s8 "NUM3";
+1.0;
+
+# Numeric variable, variable label and missing value.
+2; 0; 1; 1; 0x050800 *2; s8 "NUM4";
+30; "Another numeric variable label"; i8 0 * 2;
+1.0;
+
+# Numeric variable, two missing values.
+2; 0; 0; 2; 0x050800 *2; s8 "NUM5"; 1.0; 2.0;
+
+# Numeric variable, three missing values.
+2; 0; 0; 3; 0x050800 *2; s8 "NUM6"; 1.0; 2.0; 3.0;
+
+# Numeric variable, range of missing values.
+2; 0; 0; -2; 0x050800 *2; s8 "NUM7"; 1.0; 3.0;
+
+# Numeric variables, range of missing values plus discrete value.
+2; 0; 0; -3; 0x050800 *2; s8 "NUM8"; 1.0; 3.0; 5.0;
+2; 0; 0; -3; 0x050800 *2; s8 "NUM9"; 1.0; HIGHEST; -5.0;
+2; 0; 0; -3; 0x050800 *2; "NUM"; i8 192; i8 200; i8 204; i8 209; i8 210;
+LOWEST; 1.0; 5.0;
+
+# String variable, no label or missing values.
+2; 4; 0; 0; 0x010400 *2; s8 "STR1";
+
+# String variable, variable label.
+2; 4; 1; 0; 0x010400 *2; s8 "STR2";
+25; "String variable 2's label"; i8 0 * 3;
+
+# String variable, one missing value.
+2; 4; 0; 1; 0x010400 *2; s8 "STR3"; s8 "MISS";
+
+# String variable, variable label and missing value.
+2; 4; 1; 1; 0x010400 *2; s8 "STR4";
+29; "Another string variable label"; i8 0 * 3;
+s8 "OTHR";
+
+# String variable, two missing values.
+2; 4; 0; 2; 0x010400 *2; s8 "STR5"; s8 "MISS"; s8 "OTHR";
+
+# String variable, three missing values.
+2; 4; 0; 3; 0x010400 *2; s8 "STR6"; s8 "MISS"; s8 "OTHR"; s8 "MORE";
+
+# Long string variable, one missing value.
+# (This is not how SPSS represents missing values for long strings--it
+# uses a separate record as shown later below--but old versions of PSPP
+# did use this representation so we continue supporting it for backward
+# compatibility.
+2; 11; 0; 1; 0x010b00 *2; s8 "STR7"; "first8by";
+2; -1; 0; 0; 0; 0; s8 "";
+
+# Long string variables that will have missing values added with a
+# later record.
+2; 9; 0; 0; 0x010900 *2; s8 "STR8";
+2; -1; 0; 0; 0; 0; s8 "";
+2; 10; 0; 0; 0x010a00 *2; s8 "STR9";
+2; -1; 0; 0; 0; 0; s8 "";
+2; 11; 0; 0; 0x010b00 *2; s8 "STR10";
+2; -1; 0; 0; 0; 0; s8 "";
+
+# Long string variable, value label.
+2; 25; 1; 0; 0x011900 *2; s8 "STR11"; 14; "25-byte string"; i8 0 * 2;
+( 2; -1; 0; 0; 0; 0; s8 ""; ) * 2;
+# Variable label fields on continuation records have been spotted in system
+# files created by "SPSS Power Macintosh Release 6.1".
+2; -1; 1; 0; 0; 0; s8 ""; 20; "dummy variable label";
+
+# Machine integer info record.
+7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252;
+
+# Machine floating-point info record.
+7; 4; 8; 3; SYSMIS; HIGHEST; LOWEST;
+
+# Long string variable missing values record.
+7; 22; 1; COUNT (
+# One missing value for STR8.
+COUNT("STR8"); i8 1; 8; "abcdefgh";
+
+# Two missing values for STR9.
+COUNT("STR9"); i8 2; 8; "abcdefgh"; "01234567";
+
+# Three missing values for STR9.
+COUNT("STR10"); i8 3; 8; "abcdefgh"; "01234567"; "0 ";
+);
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# Dictionary termination record.
+999; 0;
+
+# Data.
+1.0; 2.0; 3.0; 4.0; 5.0; 6.0; 7.0; 8.0; 9.0; 10.0;
+s8 "abcd"; s8 "efgh"; s8 "ijkl"; s8 "mnop"; s8 "qrst"; s8 "uvwx";
+s16 "yzABCDEFGHI"; s16 "JKLMNOPQR"; s16 "STUVWXYZ01";
+s16 "23456789abc"; s32 "defghijklmnopqstuvwxyzABC";
+"#;
+ let sysfile = sack(input, None, Endian::Big).unwrap();
+ let cursor = Cursor::new(sysfile);
+ let reader = Reader::new(cursor, |warning| println!("{warning}")).unwrap();
+ let headers: Vec<Record> = reader.collect::<Result<Vec<_>, _>>().unwrap();
+ let encoding = encoding_from_headers(&headers, &|e| eprintln!("{e}")).unwrap();
+ let decoder = Decoder::new(encoding, |e| eprintln!("{e}"));
+ let mut decoded_records = Vec::new();
+ for header in headers {
+ decoded_records.push(header.decode(&decoder).unwrap());
+ }
+ let headers = Headers::new(decoded_records, &|e| eprintln!("{e}")).unwrap();
+ let (dictionary, metadata) = decode(headers, encoding, |e| eprintln!("{e}")).unwrap();
+ println!("{dictionary:#?}");
+ println!("{metadata:#?}");
+}