endian::Endian,
format::{Error as FormatError, Spec, UncheckedSpec},
identifier::{Error as IdError, Identifier},
- raw::{self, MissingValues, UnencodedStr, VarType},
+ raw::{self, MissingValues, UnencodedStr, VarType}, encoding::get_encoding,
};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::{DecoderResult, Encoding};
n_generated_names: usize,
}
-pub fn decode<T>(headers: Vec<raw::Record>) -> Vec<Record> {
+pub fn decode<T>(headers: Vec<raw::Record>, warn: &impl Fn(Error)) -> Vec<Record> {
let encoding = headers.iter().find_map(|rec| {
if let raw::Record::Encoding(ref e) = rec {
Some(e.0.as_str())
None
}
});
-
+ let encoding = get_encoding(encoding, character_code)
+
+ let decoder = Decoder {
+ };
Vec::new()
}
use thiserror::Error as ThisError;
#[derive(ThisError, Debug)]
pub enum Error {
- #[error("This system file does not indicate its own character encoding. Using default encoding {0}. For best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")]
- NoEncoding(String),
-
+ #[error("This system file does not indicate its own character encoding. xFor best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")]
+ NoEncoding,
+
+ #[error("This system file encodes text strings with unknown code page {0}.")]
+ UnknownCodepage(u32),
+
+ #[error("This system file is encoded in EBCDIC, which is not supported.")]
+ Ebcdic,
}
/// Returns the character set used by the locale configured in the operating
"UTF-8"
}
-/*
-pub fn encoding_from_hints(encoding: Option<&str>, codepage: Option<u32>) -> Result<&str, ()> {
- let label = if encoding.is_some() {
- encoding
- } else if let Some(codepage) = codepage {
+pub fn get_encoding(encoding: Option<&str>, character_code: Option<u32>) -> Result<&str, Error> {
+ if let Some(encoding) = encoding {
+ Ok(encoding)
+ } else if let Some(codepage) = character_code {
match codepage {
- 1 => Some("EBCDIC-US"),
+ 1 => Err(Error::Ebcdic),
2 | 3 => {
// These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
// respectively. However, many files have character code 2 but
// data which are clearly not ASCII. Therefore, ignore these
// values.
- None
- },
- 4 => Some("MS_KANJI"),
- _ => CODEPAGE_NUMBER_TO_NAME.get(&codepage).copied()
+ Err(Error::NoEncoding)
+ }
+ 4 => Ok("MS_KANJI"),
+ _ => CODEPAGE_NUMBER_TO_NAME
+ .get(&codepage)
+ .copied()
+ .ok_or(Error::UnknownCodepage(codepage)),
}
} else {
- None
- };
+ Err(Error::NoEncoding)
+ }
}
-*/