1 use std::{borrow::Cow, collections::HashSet};
3 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
4 use encoding_rs::Encoding;
7 format::{Spec, UncheckedSpec, Width},
8 identifier::{Error as IdError, Identifier},
9 {endian::Endian, CategoryLabels, Compression}, raw,
11 use thiserror::Error as ThisError;
13 #[derive(ThisError, Debug)]
15 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
16 BadVariableWidth { offset: u64, width: i32 },
18 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
19 BadLongMissingValueFormat,
21 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
22 InvalidCreationDate { creation_date: String },
24 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
25 InvalidCreationTime { creation_time: String },
27 #[error("Invalid variable name: {0}")]
28 BadIdentifier(#[from] IdError),
30 #[error("Details TBD")]
35 pub compression: Option<Compression>,
37 pub encoding: &'static Encoding,
38 pub var_names: HashSet<Identifier>,
39 n_generated_names: usize,
43 fn take_name(&mut self, id: &Identifier) -> bool {
44 self.var_names.insert(id.clone())
46 fn generate_name(&mut self) -> Identifier {
48 self.n_generated_names += 1;
49 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
51 if self.take_name(&name) {
54 assert!(self.n_generated_names < usize::MAX);
57 fn decode_string<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
58 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
66 pub trait Decode: Sized {
68 fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error>;
73 pub eye_catcher: String,
74 pub weight_index: Option<usize>,
75 pub n_cases: Option<u64>,
76 pub creation: NaiveDateTime,
77 pub file_label: String,
80 impl Decode for Header {
81 type Input = crate::raw::Header;
83 fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
84 let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
85 let file_label = decoder.decode_string(&input.file_label.0, &warn);
86 let creation_date = decoder.decode_string(&input.creation_date.0, &warn);
87 let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
88 warn(Error::InvalidCreationDate {
89 creation_date: creation_date.into(),
93 let creation_time = decoder.decode_string(&input.creation_time.0, &warn);
95 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
96 warn(Error::InvalidCreationTime {
97 creation_time: creation_time.into(),
102 eye_catcher: eye_catcher.into(),
103 weight_index: input.weight_index.map(|n| n as usize),
104 n_cases: input.n_cases.map(|n| n as u64),
105 creation: NaiveDateTime::new(creation_date, creation_time),
106 file_label: file_label.into(),
111 pub struct Variable {
113 pub name: Identifier,
114 pub print_format: Spec,
115 pub write_format: Spec,
116 //pub missing_values: MissingValues,
117 pub label: Option<String>,
120 fn decode_format(raw: raw::Spec, name: &str, width: Width) -> Spec {
121 UncheckedSpec::try_from(raw)
122 .and_then(Spec::try_from)
123 .and_then(|x| x.check_width_compatibility(Some(name), width))
124 .unwrap_or_else(|_warning| {
126 Spec::default_for_width(width)
131 decoder: &mut Decoder,
132 input: &crate::raw::Variable,
133 warn: impl Fn(Error),
134 ) -> Result<Option<Variable>, Error> {
137 -1 => return Ok(None),
139 return Err(Error::BadVariableWidth {
140 offset: input.offset,
145 let width = input.width as Width;
146 let name = decoder.decode_string(&input.name.0, &warn);
147 let name = match Identifier::new(&name, decoder.encoding) {
149 if !decoder.take_name(&name) {
150 decoder.generate_name()
157 decoder.generate_name()
160 let print_format = decode_format(input.print_format, &name.0, width);
161 let write_format = decode_format(input.write_format, &name.0, width);
162 let label = input.label.as_ref().map(|label| decoder.decode_string(&label.0, &warn).into());
163 Ok(Some(Variable { width, name, print_format, write_format, label }))
167 pub struct Document(Vec<String>);
169 impl Decode for Document {
170 type Input = crate::raw::Document;
172 fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
177 .map(|s| decoder.decode_string(&s.0, &warn).into())
183 pub use crate::raw::FloatInfo;
184 pub use crate::raw::IntegerInfo;
186 #[derive(Clone, Debug)]
187 pub enum MultipleResponseType {
190 labels: CategoryLabels,
194 #[derive(Clone, Debug)]
195 pub struct MultipleResponseSet {
198 pub mr_type: MultipleResponseType,
199 pub vars: Vec<String>,
202 #[derive(Clone, Debug)]
203 pub struct MultipleResponseRecord(Vec<MultipleResponseSet>);
205 #[derive(Clone, Debug)]
206 pub struct ProductInfo(String);
220 pub struct VarDisplay {
221 pub measure: Option<Measure>,
223 pub align: Option<Alignment>,
226 pub struct VarDisplayRecord(pub Vec<VarDisplay>);