1 use std::{borrow::Cow, collections::{HashSet, HashMap}, cmp::Ordering};
3 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
4 use encoding_rs::Encoding;
5 use num::integer::div_ceil;
7 format::{Spec, UncheckedSpec},
8 identifier::{Error as IdError, Identifier},
9 raw::{self, MissingValues, VarType},
10 {endian::Endian, Compression},
12 use thiserror::Error as ThisError;
14 #[derive(ThisError, Debug)]
16 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
17 BadVariableWidth { offset: u64, width: i32 },
19 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
20 BadLongMissingValueFormat,
22 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
23 InvalidCreationDate { creation_date: String },
25 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
26 InvalidCreationTime { creation_time: String },
28 #[error("Invalid variable name: {0}")]
29 BadIdentifier(#[from] IdError),
31 #[error("Details TBD")]
36 pub compression: Option<Compression>,
38 pub encoding: &'static Encoding,
39 pub var_names: HashSet<Identifier>,
40 pub dict_indexes: HashMap<usize, Identifier>,
41 n_dict_indexes: usize,
42 n_generated_names: usize,
46 fn take_name(&mut self, id: &Identifier) -> bool {
47 self.var_names.insert(id.clone())
49 fn generate_name(&mut self) -> Identifier {
51 self.n_generated_names += 1;
52 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
54 if self.take_name(&name) {
57 assert!(self.n_generated_names < usize::MAX);
60 fn take_dict_indexes(&mut self, id: &Identifier, width: VarWidth) -> usize {
62 VarWidth::Numeric => 1,
63 VarWidth::String(w) => div_ceil(w as usize, 8),
65 let dict_index = self.n_dict_indexes;
66 self.dict_indexes.insert(self.n_dict_indexes, id.clone());
67 self.n_dict_indexes += n;
71 fn decode_string<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
72 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
80 pub trait Decode: Sized {
82 fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error>;
87 pub eye_catcher: String,
88 pub weight_index: Option<usize>,
89 pub n_cases: Option<u64>,
90 pub creation: NaiveDateTime,
91 pub file_label: String,
94 impl Decode for Header {
95 type Input = crate::raw::Header;
97 fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
98 let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
99 let file_label = decoder.decode_string(&input.file_label.0, &warn);
100 let creation_date = decoder.decode_string(&input.creation_date.0, &warn);
101 let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
102 warn(Error::InvalidCreationDate {
103 creation_date: creation_date.into(),
107 let creation_time = decoder.decode_string(&input.creation_time.0, &warn);
109 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
110 warn(Error::InvalidCreationTime {
111 creation_time: creation_time.into(),
116 eye_catcher: eye_catcher.into(),
117 weight_index: input.weight_index.map(|n| n as usize),
118 n_cases: input.n_cases.map(|n| n as u64),
119 creation: NaiveDateTime::new(creation_date, creation_time),
120 file_label: file_label.into(),
125 #[derive(Copy, Clone, PartialEq, Eq)]
131 impl PartialOrd for VarWidth {
132 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
133 match (self, other) {
134 (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
135 (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
141 impl From<VarWidth> for VarType {
142 fn from(source: VarWidth) -> Self {
144 VarWidth::Numeric => VarType::Numeric,
145 VarWidth::String(_) => VarType::String,
150 pub struct Variable {
152 pub name: Identifier,
153 pub print_format: Spec,
154 pub write_format: Spec,
155 pub missing_values: MissingValues,
156 pub label: Option<String>,
159 fn decode_format(raw: raw::Spec, name: &str, width: VarWidth) -> Spec {
160 UncheckedSpec::try_from(raw)
161 .and_then(Spec::try_from)
162 .and_then(|x| x.check_width_compatibility(Some(name), width))
163 .unwrap_or_else(|_warning| {
165 Spec::default_for_width(width)
171 decoder: &mut Decoder,
172 input: &crate::raw::Variable,
173 warn: impl Fn(Error),
174 ) -> Result<Option<Variable>, Error> {
175 let width = match input.width {
176 0 => VarWidth::Numeric,
177 w @ 1..=255 => VarWidth::String(w as u16),
178 -1 => return Ok(None),
180 return Err(Error::BadVariableWidth {
181 offset: input.offset,
186 let name = decoder.decode_string(&input.name.0, &warn);
187 let name = match Identifier::new(&name, decoder.encoding) {
189 if !decoder.take_name(&name) {
190 decoder.generate_name()
197 decoder.generate_name()
200 let print_format = decode_format(input.print_format, &name.0, width);
201 let write_format = decode_format(input.write_format, &name.0, width);
205 .map(|label| decoder.decode_string(&label.0, &warn).into());
206 decoder.take_dict_indexes(&name, width);
212 missing_values: input.missing_values.clone(),
219 pub struct Document(Vec<String>);
221 impl Decode for Document {
222 type Input = crate::raw::Document;
224 fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
229 .map(|s| decoder.decode_string(&s.0, &warn).into())
235 pub use crate::raw::FloatInfo;
236 pub use crate::raw::IntegerInfo;
242 const NAME: &'static str;
243 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
246 pub struct VariableSet {
248 pub vars: Vec<String>,
252 fn parse(input: &str) -> Result<Self, Error> {
253 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
254 let vars = input.split_ascii_whitespace().map(String::from).collect();
263 pub struct ValueLabelRecord {
267 pub struct VariableSetRecord(Vec<VariableSet>);
269 impl TextRecord for VariableSetRecord {
270 const NAME: &'static str = "variable set";
271 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
272 let mut sets = Vec::new();
273 for line in input.lines() {
274 match VariableSet::parse(line) {
275 Ok(set) => sets.push(set),
276 Err(error) => warn(error),
279 Ok(VariableSetRecord(sets))
283 pub struct ProductInfo(pub String);
285 impl TextRecord for ProductInfo {
286 const NAME: &'static str = "extra product info";
287 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
288 Ok(ProductInfo(input.into()))
292 pub struct LongVariableName {
293 pub short_name: String,
294 pub long_name: String,
297 pub struct LongVariableNameRecord(Vec<LongVariableName>);
299 impl TextRecord for LongVariableNameRecord {
300 const NAME: &'static str = "long variable names";
301 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
302 let mut names = Vec::new();
303 for pair in input.split('\t').filter(|s| !s.is_empty()) {
304 if let Some((short_name, long_name)) = pair.split_once('=') {
305 let name = LongVariableName {
306 short_name: short_name.into(),
307 long_name: long_name.into(),
314 Ok(LongVariableNameRecord(names))
318 pub struct VeryLongString {
319 pub short_name: String,
323 impl VeryLongString {
324 fn parse(input: &str) -> Result<VeryLongString, Error> {
325 let Some((short_name, length)) = input.split_once('=') else {
326 return Err(Error::TBD);
328 let length: usize = length.parse().map_err(|_| Error::TBD)?;
330 short_name: short_name.into(),
336 pub struct VeryLongStringRecord(Vec<VeryLongString>);
338 impl TextRecord for VeryLongStringRecord {
339 const NAME: &'static str = "very long strings";
340 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
341 let mut very_long_strings = Vec::new();
344 .map(|s| s.trim_end_matches('\t'))
345 .filter(|s| !s.is_empty())
347 match VeryLongString::parse(tuple) {
348 Ok(vls) => very_long_strings.push(vls),
349 Err(error) => warn(error),
352 Ok(VeryLongStringRecord(very_long_strings))
356 pub struct Attribute {
358 pub values: Vec<String>,
362 fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
363 let Some((name, mut input)) = input.split_once('(') else {
364 return Err(Error::TBD);
366 let mut values = Vec::new();
368 let Some((value, rest)) = input.split_once('\n') else {
369 return Err(Error::TBD);
371 if let Some(stripped) = value
373 .and_then(|value| value.strip_suffix('\''))
375 values.push(stripped.into());
378 values.push(value.into());
380 if let Some(rest) = rest.strip_prefix(')') {
394 pub struct AttributeSet(pub Vec<Attribute>);
399 sentinel: Option<char>,
400 warn: &impl Fn(Error),
401 ) -> Result<(AttributeSet, &'a str), Error> {
402 let mut attributes = Vec::new();
404 match input.chars().next() {
406 c if c == sentinel => break &input[1..],
408 let (attribute, rest) = Attribute::parse(input, &warn)?;
409 attributes.push(attribute);
414 Ok((AttributeSet(attributes), rest))
418 pub struct FileAttributeRecord(AttributeSet);
420 impl TextRecord for FileAttributeRecord {
421 const NAME: &'static str = "data file attributes";
422 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
423 let (set, rest) = AttributeSet::parse(input, None, &warn)?;
424 if !rest.is_empty() {
427 Ok(FileAttributeRecord(set))
431 pub struct VarAttributeSet {
432 pub long_var_name: String,
433 pub attributes: AttributeSet,
436 impl VarAttributeSet {
439 warn: &impl Fn(Error),
440 ) -> Result<(VarAttributeSet, &'a str), Error> {
441 let Some((long_var_name, rest)) = input.split_once(':') else {
442 return Err(Error::TBD);
444 let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
447 long_var_name: long_var_name.into(),
455 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
457 impl TextRecord for VariableAttributeRecord {
458 const NAME: &'static str = "variable attributes";
459 fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
460 let mut var_attribute_sets = Vec::new();
461 while !input.is_empty() {
462 match VarAttributeSet::parse(input, &warn) {
463 Ok((var_attribute, rest)) => {
464 var_attribute_sets.push(var_attribute);
473 Ok(VariableAttributeRecord(var_attribute_sets))
489 pub struct VarDisplay {
490 pub measure: Option<Measure>,
492 pub align: Option<Alignment>,
495 pub struct VarDisplayRecord(pub Vec<VarDisplay>);