From 9b851026fd94c614fcce417e357076de4845816c Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 20 Aug 2023 14:43:39 -0700 Subject: [PATCH] work --- rust/src/cooked.rs | 71 ++++++++++++++++++++++++++++++++++++---------- rust/src/raw.rs | 41 +++++++++++++++++--------- 2 files changed, 84 insertions(+), 28 deletions(-) diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index 16cead934a..02f5c23d79 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -9,6 +9,7 @@ use crate::{ use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; use encoding_rs::{DecoderResult, Encoding}; use num::integer::div_ceil; +use ordered_float::OrderedFloat; use thiserror::Error as ThisError; #[derive(ThisError, Debug)] @@ -76,6 +77,37 @@ pub enum Error { TBD, } +#[derive(Clone, Debug)] +pub enum Record { + Header(HeaderRecord), + Variable(VariableRecord), + ValueLabel(ValueLabelRecord), + Document(DocumentRecord), + IntegerInfo(IntegerInfoRecord), + FloatInfo(FloatInfoRecord), + VariableSets(VariableSetRecord), + VarDisplay(VarDisplayRecord), + //MultipleResponse(MultipleResponseRecord), + //LongStringValueLabels(LongStringValueLabelRecord), + Encoding(EncodingRecord), + NumberOfCases(NumberOfCasesRecord), + ProductInfo(ProductInfoRecord), + //LongNames(UnencodedString), + //LongStrings(UnencodedString), + //FileAttributes(UnencodedString), + //VariableAttributes(UnencodedString), + //OtherExtension(Extension), + //EndOfHeaders(u32), + //ZHeader(ZHeader), + //ZTrailer(ZTrailer), + //Case(Vec), +} + +pub use crate::raw::IntegerInfoRecord; +pub use crate::raw::FloatInfoRecord; +pub use crate::raw::EncodingRecord; +pub use crate::raw::NumberOfCasesRecord; + type DictIndex = usize; pub struct Variable { @@ -168,8 +200,8 @@ pub trait Decode: Sized { fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result; } -#[derive(Clone)] -pub struct Header { +#[derive(Clone, Debug)] +pub struct HeaderRecord { pub eye_catcher: String, pub weight_index: Option, pub n_cases: Option, @@ -177,7 +209,7 @@ pub struct Header { pub file_label: String, } -impl Decode for Header { +impl Decode for HeaderRecord { type Input = crate::raw::HeaderRecord; fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result { @@ -198,7 +230,7 @@ impl Decode for Header { }); Default::default() }); - Ok(Header { + Ok(HeaderRecord { eye_catcher: eye_catcher.into(), weight_index: input.weight_index.map(|n| n as usize), n_cases: input.n_cases.map(|n| n as u64), @@ -208,7 +240,7 @@ impl Decode for Header { } } -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum VarWidth { Numeric, String(u16), @@ -242,6 +274,7 @@ impl From for VarType { } } +#[derive(Clone, Debug)] pub struct VariableRecord { pub width: VarWidth, pub name: Identifier, @@ -347,14 +380,14 @@ impl VariableRecord { } } -#[derive(Clone)] -pub struct Document(Vec); +#[derive(Clone, Debug)] +pub struct DocumentRecord(Vec); -impl Decode for Document { +impl Decode for DocumentRecord { type Input = crate::raw::DocumentRecord; fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result { - Ok(Document( + Ok(DocumentRecord( input .lines .iter() @@ -372,6 +405,7 @@ where fn parse(input: &str, warn: impl Fn(Error)) -> Result; } +#[derive(Clone, Debug)] pub struct VariableSet { pub name: String, pub vars: Vec, @@ -388,21 +422,22 @@ impl VariableSet { } } -#[derive(Clone)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Value { - Number(Option), + Number(Option>), String(String), } impl Value { pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self { match raw { - raw::Value::Number(x) => Value::Number(x), + raw::Value::Number(x) => Value::Number(x.map(|x| x.into())), raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()), } } } +#[derive(Clone, Debug)] pub struct ValueLabelRecord { pub var_type: VarType, pub labels: Vec<(Value, String)>, @@ -489,6 +524,7 @@ impl ValueLabelRecord { } } +#[derive(Clone, Debug)] pub struct VariableSetRecord(Vec); impl TextRecord for VariableSetRecord { @@ -504,12 +540,13 @@ impl TextRecord for VariableSetRecord { } } -pub struct ProductInfo(pub String); +#[derive(Clone, Debug)] +pub struct ProductInfoRecord(pub String); -impl TextRecord for ProductInfo { +impl TextRecord for ProductInfoRecord { const NAME: &'static str = "extra product info"; fn parse(input: &str, _warn: impl Fn(Error)) -> Result { - Ok(ProductInfo(input.into())) + Ok(ProductInfoRecord(input.into())) } } @@ -694,24 +731,28 @@ impl TextRecord for VariableAttributeRecord { } } +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Measure { Nominal, Ordinal, Scale, } +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Alignment { Left, Right, Center, } +#[derive(Clone, Debug)] pub struct VarDisplay { pub measure: Option, pub width: u32, pub align: Option, } +#[derive(Clone, Debug)] pub struct VarDisplayRecord(pub Vec); #[cfg(test)] diff --git a/rust/src/raw.rs b/rust/src/raw.rs index ead9da8341..b5968e67b9 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -135,17 +135,17 @@ pub enum Record { Document(DocumentRecord), IntegerInfo(IntegerInfoRecord), FloatInfo(FloatInfoRecord), - VariableSets(UnencodedString), + VariableSets(TextRecord), VarDisplay(VarDisplayRecord), MultipleResponse(MultipleResponseRecord), LongStringValueLabels(LongStringValueLabelRecord), Encoding(EncodingRecord), NumberOfCases(NumberOfCasesRecord), - ProductInfo(UnencodedString), - LongNames(UnencodedString), - LongStrings(UnencodedString), - FileAttributes(UnencodedString), - VariableAttributes(UnencodedString), + ProductInfo(TextRecord), + LongNames(TextRecord), + LongStrings(TextRecord), + FileAttributes(TextRecord), + VariableAttributes(TextRecord), OtherExtension(Extension), EndOfHeaders(u32), ZHeader(ZHeader), @@ -337,7 +337,7 @@ impl TryFrom<[u8; 4]> for Magic { } } -#[derive(Copy, Clone, PartialEq, Eq, Hash)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum VarType { Numeric, String, @@ -1455,6 +1455,21 @@ impl ExtensionRecord for NumberOfCasesRecord { } } +#[derive(Clone, Debug)] +pub struct TextRecord { + /// Offset from the start of the file to the start of the record. + pub offset: u64, + + /// The text content of the record. + pub text: UnencodedString, +} + +impl From for TextRecord { + fn from(source: Extension) -> Self { + TextRecord { offset: source.offset, text: source.data.into() } + } +} + #[derive(Clone, Debug)] pub struct Extension { /// Offset from the start of the file to the start of the record. @@ -1552,12 +1567,12 @@ impl Extension { endian, |_| (), )?)), - 5 => Ok(Record::VariableSets(UnencodedString(extension.data))), - 10 => Ok(Record::ProductInfo(UnencodedString(extension.data))), - 13 => Ok(Record::LongNames(UnencodedString(extension.data))), - 14 => Ok(Record::LongStrings(UnencodedString(extension.data))), - 17 => Ok(Record::FileAttributes(UnencodedString(extension.data))), - 18 => Ok(Record::VariableAttributes(UnencodedString(extension.data))), + 5 => Ok(Record::VariableSets(extension.into())), + 10 => Ok(Record::ProductInfo(extension.into())), + 13 => Ok(Record::LongNames(extension.into())), + 14 => Ok(Record::LongStrings(extension.into())), + 17 => Ok(Record::FileAttributes(extension.into())), + 18 => Ok(Record::VariableAttributes(extension.into())), _ => Ok(Record::OtherExtension(extension)), } } -- 2.30.2