From 4b6af2637a820add4736bcc4a4a259ab3a4e64d8 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 22 Dec 2023 12:23:53 -0800 Subject: [PATCH] documentrecord --- rust/src/cooked.rs | 6 +++--- rust/src/raw.rs | 43 ++++++++++++++++++++++++++++++------------- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index b3c0ff74bf..095bfe507d 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -8,7 +8,7 @@ use crate::{ endian::Endian, format::{Error as FormatError, Spec, UncheckedSpec}, identifier::{Error as IdError, Identifier}, - raw::{self, RawStr, RawString, VarType}, + raw::{self, RawStr, RawString, VarType, RawDocumentLine}, }; use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; use encoding_rs::{DecoderResult, Encoding}; @@ -215,7 +215,7 @@ struct Headers<'a> { header: Option<&'a raw::HeaderRecord>, variables: Vec<&'a raw::VariableRecord>>, value_labels: Vec<&'a raw::ValueLabelRecord, RawString>>, - document: Option<&'a raw::DocumentRecord>, + document: Option<&'a raw::DocumentRecord>, integer_info: Option<&'a raw::IntegerInfoRecord>, float_info: Option<&'a raw::FloatInfoRecord>, variable_sets: Vec<&'a raw::TextRecord>, @@ -780,7 +780,7 @@ impl TryDecode for VariableRecord { pub struct DocumentRecord(Vec); impl TryDecode for DocumentRecord { - type Input = crate::raw::DocumentRecord; + type Input = crate::raw::DocumentRecord; fn try_decode( decoder: &mut Decoder, diff --git a/rust/src/raw.rs b/rust/src/raw.rs index af29cdf61e..e0c5a79eff 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -161,7 +161,7 @@ pub enum Record { Header(HeaderRecord), Variable(VariableRecord>), ValueLabel(ValueLabelRecord, RawString>), - Document(DocumentRecord), + Document(DocumentRecord), IntegerInfo(IntegerInfoRecord), FloatInfo(FloatInfoRecord), VariableSets(TextRecord), @@ -393,8 +393,8 @@ struct Decoder { } impl Decoder { - fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> { - let (output, malformed) = self.encoding.decode_without_bom_handling(&input.0); + fn decode_slice<'a>(&self, input: &'a [u8]) -> Cow<'a, str> { + let (output, malformed) = self.encoding.decode_without_bom_handling(input); if malformed { (self.warn)(Error::MalformedString { encoding: self.encoding.name().into(), @@ -403,6 +403,10 @@ impl Decoder { } output } + + fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> { + self.decode_slice(input.0.as_slice()) + } /// Returns `input` decoded from `self.encoding` into UTF-8 such that /// re-encoding the result back into `self.encoding` will have exactly the @@ -1413,23 +1417,26 @@ impl ValueLabelRecord, RawString> { } #[derive(Clone, Debug)] -pub struct DocumentRecord { +pub struct DocumentRecord +where + S: Debug, +{ pub offsets: Range, /// The document, as an array of 80-byte lines. - pub lines: Vec, + pub lines: Vec, } -pub type DocumentLine = RawStr<{ DocumentRecord::LINE_LEN }>; +pub type RawDocumentLine = RawStr; -impl DocumentRecord { - /// Length of a line in a document. Document lines are fixed-length and - /// padded on the right with spaces. - pub const LINE_LEN: usize = 80; +/// Length of a line in a document. Document lines are fixed-length and +/// padded on the right with spaces. +pub const DOC_LINE_LEN: usize = 80; +impl DocumentRecord { /// Maximum number of lines we will accept in a document. This is simply /// the maximum number that will fit in a 32-bit space. - pub const MAX_LINES: usize = i32::MAX as usize / Self::LINE_LEN; + pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN; fn read(r: &mut R, endian: Endian) -> Result { let start_offset = r.stream_position()?; @@ -1444,7 +1451,7 @@ impl DocumentRecord { } else { let mut lines = Vec::with_capacity(n); for _ in 0..n { - lines.push(RawStr::<{ DocumentRecord::LINE_LEN }>(read_bytes(r)?)); + lines.push(RawStr(read_bytes(r)?)); } let end_offset = r.stream_position()?; Ok(Record::Document(DocumentRecord { @@ -1453,9 +1460,19 @@ impl DocumentRecord { })) } } + + fn decode<'a>(&'a self, decoder: &Decoder) -> DocumentRecord> { + DocumentRecord { + offsets: self.offsets.clone(), + lines: self.lines.iter().map(|s| decoder.decode_slice(&s.0)).collect(), + } + } } -impl Header for DocumentRecord { +impl Header for DocumentRecord +where + S: Debug, +{ fn offsets(&self) -> Range { self.offsets.clone() } -- 2.30.2