1 use std::{borrow::Cow, collections::{HashSet, HashMap}};
3 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
4 use encoding_rs::Encoding;
5 use num::integer::div_ceil;
7 format::{Spec, UncheckedSpec, Width},
8 identifier::{Error as IdError, Identifier},
9 raw::{self, MissingValues},
10 {endian::Endian, Compression},
12 use thiserror::Error as ThisError;
14 #[derive(ThisError, Debug)]
16 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
17 BadVariableWidth { offset: u64, width: i32 },
19 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
20 BadLongMissingValueFormat,
22 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
23 InvalidCreationDate { creation_date: String },
25 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
26 InvalidCreationTime { creation_time: String },
28 #[error("Invalid variable name: {0}")]
29 BadIdentifier(#[from] IdError),
31 #[error("Details TBD")]
36 pub compression: Option<Compression>,
38 pub encoding: &'static Encoding,
39 pub var_names: HashSet<Identifier>,
40 pub dict_indexes: HashMap<usize, Identifier>,
41 n_dict_indexes: usize,
42 n_generated_names: usize,
46 fn take_name(&mut self, id: &Identifier) -> bool {
47 self.var_names.insert(id.clone())
49 fn generate_name(&mut self) -> Identifier {
51 self.n_generated_names += 1;
52 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
54 if self.take_name(&name) {
57 assert!(self.n_generated_names < usize::MAX);
60 fn take_dict_indexes(&mut self, id: &Identifier, width: Width) -> usize {
63 w => div_ceil(w, 8) as usize,
65 let dict_index = self.n_dict_indexes;
66 self.dict_indexes.insert(self.n_dict_indexes, id.clone());
67 self.n_dict_indexes += n;
71 fn decode_string<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
72 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
80 pub trait Decode: Sized {
82 fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error>;
87 pub eye_catcher: String,
88 pub weight_index: Option<usize>,
89 pub n_cases: Option<u64>,
90 pub creation: NaiveDateTime,
91 pub file_label: String,
94 impl Decode for Header {
95 type Input = crate::raw::Header;
97 fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
98 let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
99 let file_label = decoder.decode_string(&input.file_label.0, &warn);
100 let creation_date = decoder.decode_string(&input.creation_date.0, &warn);
101 let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
102 warn(Error::InvalidCreationDate {
103 creation_date: creation_date.into(),
107 let creation_time = decoder.decode_string(&input.creation_time.0, &warn);
109 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
110 warn(Error::InvalidCreationTime {
111 creation_time: creation_time.into(),
116 eye_catcher: eye_catcher.into(),
117 weight_index: input.weight_index.map(|n| n as usize),
118 n_cases: input.n_cases.map(|n| n as u64),
119 creation: NaiveDateTime::new(creation_date, creation_time),
120 file_label: file_label.into(),
125 pub struct Variable {
127 pub name: Identifier,
128 pub print_format: Spec,
129 pub write_format: Spec,
130 pub missing_values: MissingValues,
131 pub label: Option<String>,
134 fn decode_format(raw: raw::Spec, name: &str, width: Width) -> Spec {
135 UncheckedSpec::try_from(raw)
136 .and_then(Spec::try_from)
137 .and_then(|x| x.check_width_compatibility(Some(name), width))
138 .unwrap_or_else(|_warning| {
140 Spec::default_for_width(width)
146 decoder: &mut Decoder,
147 input: &crate::raw::Variable,
148 warn: impl Fn(Error),
149 ) -> Result<Option<Variable>, Error> {
152 -1 => return Ok(None),
154 return Err(Error::BadVariableWidth {
155 offset: input.offset,
160 let width = input.width as Width;
161 let name = decoder.decode_string(&input.name.0, &warn);
162 let name = match Identifier::new(&name, decoder.encoding) {
164 if !decoder.take_name(&name) {
165 decoder.generate_name()
172 decoder.generate_name()
175 let print_format = decode_format(input.print_format, &name.0, width);
176 let write_format = decode_format(input.write_format, &name.0, width);
180 .map(|label| decoder.decode_string(&label.0, &warn).into());
181 decoder.take_dict_indexes(&name, width);
187 missing_values: input.missing_values.clone(),
194 pub struct Document(Vec<String>);
196 impl Decode for Document {
197 type Input = crate::raw::Document;
199 fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
204 .map(|s| decoder.decode_string(&s.0, &warn).into())
210 pub use crate::raw::FloatInfo;
211 pub use crate::raw::IntegerInfo;
217 const NAME: &'static str;
218 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
221 pub struct VariableSet {
223 pub vars: Vec<String>,
227 fn parse(input: &str) -> Result<Self, Error> {
228 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
229 let vars = input.split_ascii_whitespace().map(String::from).collect();
237 pub struct VariableSetRecord(Vec<VariableSet>);
239 impl TextRecord for VariableSetRecord {
240 const NAME: &'static str = "variable set";
241 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
242 let mut sets = Vec::new();
243 for line in input.lines() {
244 match VariableSet::parse(line) {
245 Ok(set) => sets.push(set),
246 Err(error) => warn(error),
249 Ok(VariableSetRecord(sets))
253 pub struct ProductInfo(pub String);
255 impl TextRecord for ProductInfo {
256 const NAME: &'static str = "extra product info";
257 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
258 Ok(ProductInfo(input.into()))
262 pub struct LongVariableName {
263 pub short_name: String,
264 pub long_name: String,
267 pub struct LongVariableNameRecord(Vec<LongVariableName>);
269 impl TextRecord for LongVariableNameRecord {
270 const NAME: &'static str = "long variable names";
271 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
272 let mut names = Vec::new();
273 for pair in input.split('\t').filter(|s| !s.is_empty()) {
274 if let Some((short_name, long_name)) = pair.split_once('=') {
275 let name = LongVariableName {
276 short_name: short_name.into(),
277 long_name: long_name.into(),
284 Ok(LongVariableNameRecord(names))
288 pub struct VeryLongString {
289 pub short_name: String,
293 impl VeryLongString {
294 fn parse(input: &str) -> Result<VeryLongString, Error> {
295 let Some((short_name, length)) = input.split_once('=') else {
296 return Err(Error::TBD);
298 let length: usize = length.parse().map_err(|_| Error::TBD)?;
300 short_name: short_name.into(),
306 pub struct VeryLongStringRecord(Vec<VeryLongString>);
308 impl TextRecord for VeryLongStringRecord {
309 const NAME: &'static str = "very long strings";
310 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
311 let mut very_long_strings = Vec::new();
314 .map(|s| s.trim_end_matches('\t'))
315 .filter(|s| !s.is_empty())
317 match VeryLongString::parse(tuple) {
318 Ok(vls) => very_long_strings.push(vls),
319 Err(error) => warn(error),
322 Ok(VeryLongStringRecord(very_long_strings))
326 pub struct Attribute {
328 pub values: Vec<String>,
332 fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
333 let Some((name, mut input)) = input.split_once('(') else {
334 return Err(Error::TBD);
336 let mut values = Vec::new();
338 let Some((value, rest)) = input.split_once('\n') else {
339 return Err(Error::TBD);
341 if let Some(stripped) = value
343 .and_then(|value| value.strip_suffix('\''))
345 values.push(stripped.into());
348 values.push(value.into());
350 if let Some(rest) = rest.strip_prefix(')') {
364 pub struct AttributeSet(pub Vec<Attribute>);
369 sentinel: Option<char>,
370 warn: &impl Fn(Error),
371 ) -> Result<(AttributeSet, &'a str), Error> {
372 let mut attributes = Vec::new();
374 match input.chars().next() {
376 c if c == sentinel => break &input[1..],
378 let (attribute, rest) = Attribute::parse(input, &warn)?;
379 attributes.push(attribute);
384 Ok((AttributeSet(attributes), rest))
388 pub struct FileAttributeRecord(AttributeSet);
390 impl TextRecord for FileAttributeRecord {
391 const NAME: &'static str = "data file attributes";
392 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
393 let (set, rest) = AttributeSet::parse(input, None, &warn)?;
394 if !rest.is_empty() {
397 Ok(FileAttributeRecord(set))
401 pub struct VarAttributeSet {
402 pub long_var_name: String,
403 pub attributes: AttributeSet,
406 impl VarAttributeSet {
409 warn: &impl Fn(Error),
410 ) -> Result<(VarAttributeSet, &'a str), Error> {
411 let Some((long_var_name, rest)) = input.split_once(':') else {
412 return Err(Error::TBD);
414 let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
417 long_var_name: long_var_name.into(),
425 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
427 impl TextRecord for VariableAttributeRecord {
428 const NAME: &'static str = "variable attributes";
429 fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
430 let mut var_attribute_sets = Vec::new();
431 while !input.is_empty() {
432 match VarAttributeSet::parse(input, &warn) {
433 Ok((var_attribute, rest)) => {
434 var_attribute_sets.push(var_attribute);
443 Ok(VariableAttributeRecord(var_attribute_sets))
459 pub struct VarDisplay {
460 pub measure: Option<Measure>,
462 pub align: Option<Alignment>,
465 pub struct VarDisplayRecord(pub Vec<VarDisplay>);