From: Ben Pfaff Date: Sat, 4 Nov 2023 00:17:30 +0000 (-0700) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6165ed413d9aa818e3246d0a063c646dc4efc7e5;p=pspp work --- diff --git a/rust/build.rs b/rust/build.rs index 48e011b4c5..112f3d3da8 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -102,7 +102,7 @@ use lazy_static::lazy_static; use std::collections::HashMap; lazy_static! { - static ref CODEPAGE_NUMBER_TO_NAME: HashMap = { + static ref CODEPAGE_NUMBER_TO_NAME: HashMap = { let mut map = HashMap::new(); " )?; diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index 3fac184063..ef4b79ccf4 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -1,10 +1,11 @@ use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat}; use crate::{ + encoding::{get_encoding, Error as EncodingError}, endian::Endian, format::{Error as FormatError, Spec, UncheckedSpec}, identifier::{Error as IdError, Identifier}, - raw::{self, MissingValues, UnencodedStr, VarType}, encoding::get_encoding, + raw::{self, MissingValues, UnencodedStr, VarType}, }; use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; use encoding_rs::{DecoderResult, Encoding}; @@ -16,6 +17,14 @@ pub use crate::raw::{CategoryLabels, Compression}; #[derive(ThisError, Debug)] pub enum Error { + // XXX this is really an internal error and maybe we should change the + // interfaces to make it impossible + #[error("Missing header record")] + MissingHeaderRecord, + + #[error("{0}")] + EncodingError(EncodingError), + #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")] InvalidVariableWidth { offset: u64, width: i32 }, @@ -187,7 +196,16 @@ pub struct Decoder { n_generated_names: usize, } -pub fn decode(headers: Vec, warn: &impl Fn(Error)) -> Vec { +pub fn decode(headers: Vec, warn: &impl Fn(Error)) -> Result, Error> { + let Some(header_record) = headers.iter().find_map(|rec| { + if let raw::Record::Header(header) = rec { + Some(header) + } else { + None + } + }) else { + return Err(Error::MissingHeaderRecord); + }; let encoding = headers.iter().find_map(|rec| { if let raw::Record::Encoding(ref e) = rec { Some(e.0.as_str()) @@ -202,12 +220,27 @@ pub fn decode(headers: Vec, warn: &impl Fn(Error)) -> Vec encoding, + Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)), + Err(err) => { + warn(Error::EncodingError(err)); + // Warn that we're using the default encoding. + + } + }; let decoder = Decoder { + compression: header_record.compression, + endian: header_record.endian, + encoding, + variables: HashMap::new(), + var_names: HashMap::new(), + n_dict_indexes: 0, + n_generated_names: 0, }; - Vec::new() + unreachable!() } impl Decoder { diff --git a/rust/src/encoding.rs b/rust/src/encoding.rs index 3d585a6b63..3509b73954 100644 --- a/rust/src/encoding.rs +++ b/rust/src/encoding.rs @@ -9,11 +9,11 @@ pub fn codepage_from_encoding(encoding: &str) -> Option { use thiserror::Error as ThisError; #[derive(ThisError, Debug)] pub enum Error { - #[error("This system file does not indicate its own character encoding. xFor best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")] + #[error("This system file does not indicate its own character encoding. For best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")] NoEncoding, #[error("This system file encodes text strings with unknown code page {0}.")] - UnknownCodepage(u32), + UnknownCodepage(i32), #[error("This system file is encoded in EBCDIC, which is not supported.")] Ebcdic, @@ -26,7 +26,7 @@ pub fn locale_charset() -> &'static str { "UTF-8" } -pub fn get_encoding(encoding: Option<&str>, character_code: Option) -> Result<&str, Error> { +pub fn get_encoding(encoding: Option<&str>, character_code: Option) -> Result<&str, Error> { if let Some(encoding) = encoding { Ok(encoding) } else if let Some(codepage) = character_code {