use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
use crate::{
+ encoding::{get_encoding, Error as EncodingError},
endian::Endian,
format::{Error as FormatError, Spec, UncheckedSpec},
identifier::{Error as IdError, Identifier},
- raw::{self, MissingValues, UnencodedStr, VarType}, encoding::get_encoding,
+ raw::{self, MissingValues, UnencodedStr, VarType},
};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::{DecoderResult, Encoding};
#[derive(ThisError, Debug)]
pub enum Error {
+ // XXX this is really an internal error and maybe we should change the
+ // interfaces to make it impossible
+ #[error("Missing header record")]
+ MissingHeaderRecord,
+
+ #[error("{0}")]
+ EncodingError(EncodingError),
+
#[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
InvalidVariableWidth { offset: u64, width: i32 },
n_generated_names: usize,
}
-pub fn decode<T>(headers: Vec<raw::Record>, warn: &impl Fn(Error)) -> Vec<Record> {
+pub fn decode<T>(headers: Vec<raw::Record>, warn: &impl Fn(Error)) -> Result<Vec<Record>, Error> {
+ let Some(header_record) = headers.iter().find_map(|rec| {
+ if let raw::Record::Header(header) = rec {
+ Some(header)
+ } else {
+ None
+ }
+ }) else {
+ return Err(Error::MissingHeaderRecord);
+ };
let encoding = headers.iter().find_map(|rec| {
if let raw::Record::Encoding(ref e) = rec {
Some(e.0.as_str())
None
}
});
- let encoding = get_encoding(encoding, character_code)
+ let encoding = match get_encoding(encoding, character_code) {
+ Ok(encoding) => encoding,
+ Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)),
+ Err(err) => {
+ warn(Error::EncodingError(err));
+ // Warn that we're using the default encoding.
+
+ }
+ };
let decoder = Decoder {
+ compression: header_record.compression,
+ endian: header_record.endian,
+ encoding,
+ variables: HashMap::new(),
+ var_names: HashMap::new(),
+ n_dict_indexes: 0,
+ n_generated_names: 0,
};
- Vec::new()
+ unreachable!()
}
impl Decoder {
use thiserror::Error as ThisError;
#[derive(ThisError, Debug)]
pub enum Error {
- #[error("This system file does not indicate its own character encoding. xFor best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")]
+ #[error("This system file does not indicate its own character encoding. For best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")]
NoEncoding,
#[error("This system file encodes text strings with unknown code page {0}.")]
- UnknownCodepage(u32),
+ UnknownCodepage(i32),
#[error("This system file is encoded in EBCDIC, which is not supported.")]
Ebcdic,
"UTF-8"
}
-pub fn get_encoding(encoding: Option<&str>, character_code: Option<u32>) -> Result<&str, Error> {
+pub fn get_encoding(encoding: Option<&str>, character_code: Option<i32>) -> Result<&str, Error> {
if let Some(encoding) = encoding {
Ok(encoding)
} else if let Some(codepage) = character_code {