work
[pspp] / rust / src / encoding.rs
index a0e28af77100de023d5d142acd0bba1782a784ee..296e4e65a8e4f32a61e4e415318610e3180513f7 100644 (file)
@@ -1 +1,28 @@
 include!(concat!(env!("OUT_DIR"), "/encodings.rs"));
+
+pub fn codepage_from_encoding(encoding: &str) -> Option<u32> {
+    CODEPAGE_NAME_TO_NUMBER
+        .get(encoding.to_ascii_lowercase().as_str())
+        .copied()
+}
+
+pub fn encoding_from_hints(encoding: Option<&str>, codepage: Option<u32>) -> Option<&str> {
+    if encoding.is_some() {
+        encoding
+    } else if let Some(codepage) = codepage {
+        match codepage {
+            1 => Some("EBCDIC-US"),
+            2 | 3 => {
+                // These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+                // respectively.  However, many files have character code 2 but
+                // data which are clearly not ASCII.  Therefore, ignore these
+                // values.
+                None
+            },
+            4 => Some("MS_KANJI"),
+            _ => CODEPAGE_NUMBER_TO_NAME.get(&codepage).copied()
+        }
+    } else {
+        None
+    }
+}