From: Ben Pfaff Date: Wed, 16 Aug 2023 04:23:50 +0000 (-0700) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1bb9fba87b1caa260ea79c40c7dcdddf55cad33a;p=pspp work --- diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index 0012a51dcc..4b12e8f314 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -4,9 +4,9 @@ use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; use encoding_rs::Encoding; use crate::{ - format::Spec, - identifier::{Identifier, Error as IdError}, - {endian::Endian, CategoryLabels, Compression}, + format::{Spec, UncheckedSpec, Width}, + identifier::{Error as IdError, Identifier}, + {endian::Endian, CategoryLabels, Compression}, raw, }; use thiserror::Error as ThisError; @@ -40,14 +40,15 @@ pub struct Decoder { } impl Decoder { - fn take_name(&mut self, id: Identifier) -> bool { - self.var_names.insert(id) + fn take_name(&mut self, id: &Identifier) -> bool { + self.var_names.insert(id.clone()) } fn generate_name(&mut self) -> Identifier { loop { self.n_generated_names += 1; - let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding).unwrap(); - if self.take_name(name.clone()) { + let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding) + .unwrap(); + if self.take_name(&name) { return name; } assert!(self.n_generated_names < usize::MAX); @@ -80,16 +81,16 @@ impl Decode for Header { type Input = crate::raw::Header; fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result { - let eye_catcher = decoder.decode_string(&input.eye_catcher, &warn); - let file_label = decoder.decode_string(&input.file_label, &warn); - let creation_date = decoder.decode_string(&input.creation_date, &warn); + let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn); + let file_label = decoder.decode_string(&input.file_label.0, &warn); + let creation_date = decoder.decode_string(&input.creation_date.0, &warn); let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| { warn(Error::InvalidCreationDate { creation_date: creation_date.into(), }); Default::default() }); - let creation_time = decoder.decode_string(&input.creation_time, &warn); + let creation_time = decoder.decode_string(&input.creation_time.0, &warn); let creation_time = NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| { warn(Error::InvalidCreationTime { @@ -108,10 +109,22 @@ impl Decode for Header { } pub struct Variable { - pub width: i32, + pub width: Width, pub name: Identifier, pub print_format: Spec, pub write_format: Spec, + //pub missing_values: MissingValues, + pub label: Option, +} + +fn decode_format(raw: raw::Spec, name: &str, width: Width) -> Spec { + UncheckedSpec::try_from(raw) + .and_then(Spec::try_from) + .and_then(|x| x.check_width_compatibility(Some(name), width)) + .unwrap_or_else(|_warning| { + /*warn(warning);*/ + Spec::default_for_width(width) + }) } fn decode_var( @@ -129,10 +142,11 @@ fn decode_var( }) } }; - let name = decoder.decode_string(&input.name, &warn); + let width = input.width as Width; + let name = decoder.decode_string(&input.name.0, &warn); let name = match Identifier::new(&name, decoder.encoding) { Ok(name) => { - if !decoder.take_name(name) { + if !decoder.take_name(&name) { decoder.generate_name() } else { name @@ -143,6 +157,10 @@ fn decode_var( decoder.generate_name() } }; + let print_format = decode_format(input.print_format, &name.0, width); + let write_format = decode_format(input.write_format, &name.0, width); + let label = input.label.as_ref().map(|label| decoder.decode_string(&label.0, &warn).into()); + Ok(Some(Variable { width, name, print_format, write_format, label })) } #[derive(Clone)] @@ -156,7 +174,7 @@ impl Decode for Document { input .lines .iter() - .map(|s| decoder.decode_string(s, &warn).into()) + .map(|s| decoder.decode_string(&s.0, &warn).into()) .collect(), )) } diff --git a/rust/src/format.rs b/rust/src/format.rs index 81f50ba0ba..857c05e67f 100644 --- a/rust/src/format.rs +++ b/rust/src/format.rs @@ -5,7 +5,7 @@ use std::{ use thiserror::Error as ThisError; -use crate::raw::VarType; +use crate::raw::{VarType, self}; #[derive(ThisError, Debug)] pub enum Error { @@ -175,10 +175,10 @@ pub enum Format { pub const MAX_STRING: Width = 32767; -type Width = u16; -type SignedWidth = i16; +pub type Width = u16; +pub type SignedWidth = i16; -type Decimals = u8; +pub type Decimals = u8; impl Format { pub fn max_width(self) -> Width { @@ -404,6 +404,13 @@ impl Spec { self.d } + pub fn default_for_width(w: Width) -> Self { + match w { + 0 => Spec { format: Format::F, w: 8, d: 2 }, + _ => Spec { format: Format::A, w: w, d: 0 }, + } + } + pub fn fixed_from(source: &UncheckedSpec) -> Self { let UncheckedSpec { format, w, d } = *source; let (min, max) = format.width_range().into_inner(); @@ -430,7 +437,7 @@ impl Spec { self.format.var_type() } - pub fn check_width_compatibility(self, variable: Option<&str>, w: Width) -> Result<(), Error> { + pub fn check_width_compatibility(self, variable: Option<&str>, w: Width) -> Result { self.format.check_type_compatibility(variable, self.var_type())?; let expected_width = self.var_width(); if w != expected_width { @@ -455,7 +462,7 @@ impl Spec { }) } } else { - Ok(()) + Ok(self) } } } @@ -553,14 +560,15 @@ pub struct UncheckedSpec { pub d: Decimals, } -impl TryFrom for UncheckedSpec { +impl TryFrom for UncheckedSpec { type Error = Error; - fn try_from(source: u32) -> Result { - let raw_format = (source >> 16) as u16; + fn try_from(raw: raw::Spec) -> Result { + let raw = raw.0; + let raw_format = (raw >> 16) as u16; let format = raw_format.try_into()?; - let w = ((source >> 8) & 0xff) as Width; - let d = (source & 0xff) as Decimals; + let w = ((raw >> 8) & 0xff) as Width; + let d = (raw & 0xff) as Decimals; Ok(Self { format, w, d }) } } diff --git a/rust/src/raw.rs b/rust/src/raw.rs index 0ae4f53b8c..eda5b5724f 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -36,7 +36,7 @@ pub enum Error { BadVariableWidth { offset: u64, width: i32 }, #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")] - BadDocumentLength { offset: u64, n: u32, max: u32 }, + BadDocumentLength { offset: u64, n: usize, max: usize }, #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")] BadRecordType { offset: u64, rec_type: u32 }, @@ -193,7 +193,7 @@ pub struct Header { /// Eye-catcher string, product name, in the file's encoding. Padded /// on the right with spaces. - pub eye_catcher: [u8; 60], + pub eye_catcher: UnencodedStr<60>, /// Layout code, normally either 2 or 3. pub layout_code: u32, @@ -216,13 +216,13 @@ pub struct Header { pub bias: f64, /// `dd mmm yy` in the file's encoding. - pub creation_date: [u8; 9], + pub creation_date: UnencodedStr<9>, /// `HH:MM:SS` in the file's encoding. - pub creation_time: [u8; 8], + pub creation_time: UnencodedStr<8>, /// File label, in the file's encoding. Padded on the right with spaces. - pub file_label: [u8; 64], + pub file_label: UnencodedStr<64>, /// Endianness of the data in the file header. pub endian: Endian, @@ -238,16 +238,16 @@ impl Debug for Header { fn fmt(&self, f: &mut Formatter) -> FmtResult { writeln!(f, "File header record:")?; self.debug_field(f, "Magic", self.magic)?; - self.debug_field(f, "Product name", FallbackEncoding(&self.eye_catcher))?; + self.debug_field(f, "Product name", &self.eye_catcher)?; self.debug_field(f, "Layout code", self.layout_code)?; self.debug_field(f, "Nominal case size", self.nominal_case_size)?; self.debug_field(f, "Compression", self.compression)?; self.debug_field(f, "Weight index", self.weight_index)?; self.debug_field(f, "Number of cases", self.n_cases)?; self.debug_field(f, "Compression bias", self.bias)?; - self.debug_field(f, "Creation date", FallbackEncoding(&self.creation_date))?; - self.debug_field(f, "Creation time", FallbackEncoding(&self.creation_time))?; - self.debug_field(f, "File label", FallbackEncoding(&self.file_label))?; + self.debug_field(f, "Creation date", &self.creation_date)?; + self.debug_field(f, "Creation time", &self.creation_time)?; + self.debug_field(f, "File label", &self.file_label)?; self.debug_field(f, "Endianness", self.endian) } } @@ -257,7 +257,7 @@ impl Header { let magic: [u8; 4] = read_bytes(r)?; let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?; - let eye_catcher: [u8; 60] = read_bytes(r)?; + let eye_catcher = UnencodedStr::<60>(read_bytes(r)?); let layout_code: [u8; 4] = read_bytes(r)?; let endian = Endian::identify_u32(2, layout_code) .or_else(|| Endian::identify_u32(2, layout_code)) @@ -285,9 +285,9 @@ impl Header { let bias: f64 = endian.parse(read_bytes(r)?); - let creation_date: [u8; 9] = read_bytes(r)?; - let creation_time: [u8; 8] = read_bytes(r)?; - let file_label: [u8; 64] = read_bytes(r)?; + let creation_date = UnencodedStr::<9>(read_bytes(r)?); + let creation_time = UnencodedStr::<8>(read_bytes(r)?); + let file_label = UnencodedStr::<64>(read_bytes(r)?); let _: [u8; 3] = read_bytes(r)?; Ok(Header { @@ -507,7 +507,7 @@ mod state { #[derive(Copy, Clone)] pub enum Value { Number(Option), - String([u8; 8]), + String(UnencodedStr<8>), } impl Debug for Value { @@ -515,7 +515,7 @@ impl Debug for Value { match self { Value::Number(Some(number)) => write!(f, "{number:?}"), Value::Number(None) => write!(f, "SYSMIS"), - Value::String(bytes) => write!(f, "{:?}", FallbackEncoding(bytes)), + Value::String(bytes) => write!(f, "{:?}", bytes), } } } @@ -527,7 +527,7 @@ impl Value { pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value { match var_type { - VarType::String => Value::String(raw), + VarType::String => Value::String(UnencodedStr(raw)), VarType::Number => { let number: f64 = endian.parse(raw); Value::Number((number != -f64::MAX).then_some(number)) @@ -591,7 +591,7 @@ impl Value { 1..=251 => match var_type { VarType::Number => break Value::Number(Some(code as f64 - bias)), VarType::String => { - break Value::String(endian.to_bytes(code as f64 - bias)) + break Value::String(UnencodedStr(endian.to_bytes(code as f64 - bias))) } }, 252 => { @@ -607,7 +607,7 @@ impl Value { } 253 => break Value::from_raw(var_type, read_bytes(reader)?, endian), 254 => match var_type { - VarType::String => break Value::String(*b" "), // XXX EBCDIC + VarType::String => break Value::String(UnencodedStr(*b" ")), // XXX EBCDIC VarType::Number => { return Err(Error::CompressedStringExpected { offset: case_start, @@ -716,9 +716,9 @@ impl Iterator for Reader { impl FusedIterator for Reader {} #[derive(Copy, Clone, PartialEq, Eq, Hash)] -pub struct Format(pub u32); +pub struct Spec(pub u32); -impl Debug for Format { +impl Debug for Spec { fn fmt(&self, f: &mut Formatter) -> FmtResult { let type_ = format_name(self.0 >> 16); let w = (self.0 >> 8) & 0xff; @@ -849,13 +849,13 @@ pub struct Variable { pub width: i32, /// Variable name, padded on the right with spaces. - pub name: [u8; 8], + pub name: UnencodedStr<8>, /// Print format. - pub print_format: Format, + pub print_format: Spec, /// Write format. - pub write_format: Format, + pub write_format: Spec, /// Missing values. pub missing_values: MissingValues, @@ -880,7 +880,7 @@ impl Debug for Variable { )?; writeln!(f, "Print format: {:?}", self.print_format)?; writeln!(f, "Write format: {:?}", self.write_format)?; - writeln!(f, "Name: {:?}", FallbackEncoding(&self.name))?; + writeln!(f, "Name: {:?}", &self.name)?; writeln!(f, "Variable label: {:?}", self.label)?; writeln!(f, "Missing values: {:?}", self.missing_values) } @@ -892,9 +892,9 @@ impl Variable { let width: i32 = endian.parse(read_bytes(r)?); let has_variable_label: u32 = endian.parse(read_bytes(r)?); let missing_value_code: i32 = endian.parse(read_bytes(r)?); - let print_format = Format(endian.parse(read_bytes(r)?)); - let write_format = Format(endian.parse(read_bytes(r)?)); - let name: [u8; 8] = read_bytes(r)?; + let print_format = Spec(endian.parse(read_bytes(r)?)); + let write_format = Spec(endian.parse(read_bytes(r)?)); + let name = UnencodedStr::<8>(read_bytes(r)?); let label = match has_variable_label { 0 => None, @@ -957,7 +957,7 @@ impl Debug for UntypedValue { } #[derive(Clone)] -pub struct UnencodedString(Vec); +pub struct UnencodedString(pub Vec); impl From> for UnencodedString { fn from(source: Vec) -> Self { @@ -977,6 +977,21 @@ impl Debug for UnencodedString { } } +#[derive(Copy, Clone)] +pub struct UnencodedStr(pub [u8; N]); + +impl From<[u8; N]> for UnencodedStr { + fn from(source: [u8; N]) -> Self { + Self(source) + } +} + +impl Debug for UnencodedStr { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + write!(f, "{:?}", FallbackEncoding(&self.0)) + } +} + #[derive(Clone)] pub struct ValueLabel { /// Offset from the start of the file to the start of the record. @@ -1076,33 +1091,37 @@ pub struct Document { pub pos: u64, /// The document, as an array of 80-byte lines. - pub lines: Vec<[u8; Document::LINE_LEN as usize]>, + pub lines: Vec } +pub type DocumentLine = UnencodedStr<{Document::LINE_LEN}>; + impl Document { /// Length of a line in a document. Document lines are fixed-length and /// padded on the right with spaces. - pub const LINE_LEN: u32 = 80; + pub const LINE_LEN: usize = 80; /// Maximum number of lines we will accept in a document. This is simply /// the maximum number that will fit in a 32-bit space. - pub const MAX_LINES: u32 = i32::MAX as u32 / Self::LINE_LEN; + pub const MAX_LINES: usize = i32::MAX as usize / Self::LINE_LEN; fn read(r: &mut R, endian: Endian) -> Result { let offset = r.stream_position()?; let n: u32 = endian.parse(read_bytes(r)?); - match n { - 0..=Self::MAX_LINES => Ok(Document { - pos: r.stream_position()?, - lines: (0..n) - .map(|_| read_bytes(r)) - .collect::, _>>()?, - }), - _ => Err(Error::BadDocumentLength { + let n = n as usize; + if n > Self::MAX_LINES { + Err(Error::BadDocumentLength { offset, n, max: Self::MAX_LINES, - }), + }) + } else { + let pos = r.stream_position()?; + let mut lines = Vec::with_capacity(n); + for _ in 0..n { + lines.push(UnencodedStr::<{Document::LINE_LEN}>(read_bytes(r)?)); + } + Ok(Document { pos, lines }) } } } @@ -1534,7 +1553,7 @@ impl ExtensionRecord for LongStringMissingValueSet { } else { value }; - values.push(Value::String(value)); + values.push(Value::String(UnencodedStr(value))); } let missing_values = MissingValues { values,