+use crate::locale_charset::locale_charset;
+use encoding_rs::{Encoding, UTF_8};
+
include!(concat!(env!("OUT_DIR"), "/encodings.rs"));
pub fn codepage_from_encoding(encoding: &str) -> Option<u32> {
.copied()
}
-pub fn encoding_from_hints(encoding: Option<&str>, codepage: Option<u32>) -> Option<&str> {
- if encoding.is_some() {
+use thiserror::Error as ThisError;
+
+#[derive(ThisError, Debug)]
+pub enum Error {
+ #[error("This system file does not indicate its own character encoding. For best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")]
+ NoEncoding,
+
+ #[error("This system file encodes text strings with unknown code page {0}.")]
+ UnknownCodepage(i32),
+
+ #[error("This system file encodes text strings with unknown encoding {0}.")]
+ UnknownEncoding(String),
+
+ #[error("This system file is encoded in EBCDIC, which is not supported.")]
+ Ebcdic,
+}
+
+pub fn default_encoding() -> &'static Encoding {
+ lazy_static! {
+ static ref DEFAULT_ENCODING: &'static Encoding =
+ Encoding::for_label(locale_charset().as_bytes()).unwrap_or(&UTF_8);
+ }
+ &DEFAULT_ENCODING
+}
+
+pub fn get_encoding(
+ encoding: Option<&str>,
+ character_code: Option<i32>,
+) -> Result<&'static Encoding, Error> {
+ let label = if let Some(encoding) = encoding {
encoding
- } else if let Some(codepage) = codepage {
+ } else if let Some(codepage) = character_code {
match codepage {
- 1 => Some("EBCDIC-US"),
+ 1 => return Err(Error::Ebcdic),
2 | 3 => {
// These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
// respectively. However, many files have character code 2 but
// data which are clearly not ASCII. Therefore, ignore these
// values.
- None
- },
- 4 => Some("MS_KANJI"),
- _ => CODEPAGE_NUMBER_TO_NAME.get(&codepage).copied()
+ return Err(Error::NoEncoding);
+ }
+ 4 => "MS_KANJI",
+ _ => CODEPAGE_NUMBER_TO_NAME
+ .get(&codepage)
+ .copied()
+ .ok_or(Error::UnknownCodepage(codepage))?,
}
} else {
- None
- }
+ return Err(Error::NoEncoding);
+ };
+
+ Ok(Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into()))?)
}