@table @asis
@item 1
-EBCDIC.
+EBCDIC. Only one example has been observed.
@item 2
7-bit ASCII. Old versions of SPSS for Unix and Windows always wrote
The @code{windows-874} code page for Thai.
@item 932
-The @code{windows-932} code page for Japanese.
+The @code{windows-932} code page for Japanese (aka @code{Shift_JIS}).
@item 936
-The @code{windows-936} code page for simplified Chinese
+The @code{windows-936} code page for simplified Chinese (aka @code{GBK}).
@item 949
Probably @code{ks_c_5601-1987}, Unified Hangul Code.
include!(concat!(env!("OUT_DIR"), "/encodings.rs"));
+
+pub fn codepage_from_encoding(encoding: &str) -> Option<u32> {
+ CODEPAGE_NAME_TO_NUMBER
+ .get(encoding.to_ascii_lowercase().as_str())
+ .copied()
+}
+
+pub fn encoding_from_hints(encoding: Option<&str>, codepage: Option<u32>) -> Option<&str> {
+ if encoding.is_some() {
+ encoding
+ } else if let Some(codepage) = codepage {
+ match codepage {
+ 1 => Some("EBCDIC-US"),
+ 2 | 3 => {
+ // These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+ // respectively. However, many files have character code 2 but
+ // data which are clearly not ASCII. Therefore, ignore these
+ // values.
+ None
+ },
+ 4 => Some("MS_KANJI"),
+ _ => CODEPAGE_NUMBER_TO_NAME.get(&codepage).copied()
+ }
+ } else {
+ None
+ }
+}
}
}
+pub struct Encoding(pub String);
+
+impl ExtensionRecord for Encoding {
+ const SIZE: Option<u32> = Some(1);
+ const COUNT: Option<u32> = None;
+ const NAME: &'static str = "encoding record";
+
+ fn parse(ext: &Extension, endian: Endian) -> Result<Self, Error>{
+ ext.check_size::<Self>()?;
+
+ Ok(Encoding(String::from_utf8(ext.data)?))
+ }
+}
+
pub struct Extension {
/// Offset from the start of the file to the start of the record.
pub offset: u64,