From 92419c2b58e3fcef8898f4c5569a24c9bacaaf73 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 4 Nov 2023 16:31:19 -0700 Subject: [PATCH] work --- rust/src/cooked.rs | 7 +++++-- rust/src/encoding.rs | 17 ++++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index ef4b79ccf4..1749ecc301 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -1,7 +1,7 @@ use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat}; use crate::{ - encoding::{get_encoding, Error as EncodingError}, + encoding::{get_encoding, Error as EncodingError, default_encoding}, endian::Endian, format::{Error as FormatError, Spec, UncheckedSpec}, identifier::{Error as IdError, Identifier}, @@ -25,6 +25,9 @@ pub enum Error { #[error("{0}")] EncodingError(EncodingError), + #[error("Using default encoding {0}.")] + UsingDefaultEncoding(String), + #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")] InvalidVariableWidth { offset: u64, width: i32 }, @@ -226,7 +229,7 @@ pub fn decode(headers: Vec, warn: &impl Fn(Error)) -> Result { warn(Error::EncodingError(err)); // Warn that we're using the default encoding. - + default_encoding() } }; diff --git a/rust/src/encoding.rs b/rust/src/encoding.rs index 3509b73954..d135b8e9e6 100644 --- a/rust/src/encoding.rs +++ b/rust/src/encoding.rs @@ -1,3 +1,5 @@ +use encoding_rs::{Encoding, UTF_8}; + include!(concat!(env!("OUT_DIR"), "/encodings.rs")); pub fn codepage_from_encoding(encoding: &str) -> Option { @@ -7,6 +9,8 @@ pub fn codepage_from_encoding(encoding: &str) -> Option { } use thiserror::Error as ThisError; + +use crate::locale_charset::locale_charset; #[derive(ThisError, Debug)] pub enum Error { #[error("This system file does not indicate its own character encoding. For best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")] @@ -19,11 +23,14 @@ pub enum Error { Ebcdic, } -/// Returns the character set used by the locale configured in the operating -/// system. This should implement roughly the same behavior as the function -/// with the same name in Gnulib. Until then, we'll just use a default. -pub fn locale_charset() -> &'static str { - "UTF-8" + +pub fn default_encoding() -> &'static Encoding { + lazy_static! { + static ref DEFAULT_ENCODING: &'static Encoding = { + Encoding::for_label(locale_charset()).unwrap_or(&UTF_8) + }; + } + DEFAULT_ENCODING } pub fn get_encoding(encoding: Option<&str>, character_code: Option) -> Result<&str, Error> { -- 2.30.2