1 use encoding_rs::{EncoderResult, Encoding};
2 use finl_unicode::categories::{CharacterCategories, MajorCategory};
3 use thiserror::Error as ThisError;
6 pub trait IdentifierChar {
7 /// Returns true if `self` may be the first character in an identifier.
8 fn may_start_id(self) -> bool;
10 /// Returns true if `self` may be a second or subsequent character in an
12 fn may_continue_id(self) -> bool;
15 impl IdentifierChar for char {
16 fn may_start_id(self) -> bool {
19 ([L, M, S].contains(&self.get_major_category()) || "@#$".contains(self))
20 && self != char::REPLACEMENT_CHARACTER
23 fn may_continue_id(self) -> bool {
26 ([L, M, S, N].contains(&self.get_major_category()) || "@#$._".contains(self))
27 && self != char::REPLACEMENT_CHARACTER
31 #[derive(Clone, PartialEq, Eq, Debug, Hash)]
32 pub struct Identifier(pub UniCase<String>);
34 #[derive(Clone, Debug, ThisError)]
36 #[error("Identifier cannot be empty string.")]
39 #[error("\"{0}\" may not be used as an identifier because it is a reserved word.")]
42 #[error("\"{0}\" may not be used as an identifier because it begins with disallowed character \"{1}\".")]
43 BadFirstCharacter(String, char),
45 #[error("\"{0}\" may not be used as an identifier because it contains disallowed character \"{1}\".")]
46 BadLaterCharacter(String, char),
48 #[error("Identifier \"{id}\" is {length} bytes in the encoding in use ({encoding}), which exceeds the {max}-byte limit.")]
52 encoding: &'static str,
56 #[error("\"{id}\" may not be used as an identifier because the encoding in use ({encoding}) cannot represent \"{c}\".")]
59 encoding: &'static str,
64 fn is_reserved_word(s: &str) -> bool {
66 "and", "or", "not", "eq", "ge", "gt", "le", "ne", "all", "by", "to", "with",
68 if s.eq_ignore_ascii_case(word) {
76 /// Maximum length of an identifier, in bytes. The limit applies in the
77 /// encoding used by the dictionary, not in UTF-8.
78 pub const MAX_LEN: usize = 64;
80 pub fn new(s: &str, encoding: &'static Encoding) -> Result<Identifier, Error> {
81 Self::is_plausible(s)?;
82 let (encoded, _, unencodable) = encoding.encode(s);
84 let mut encoder = encoding.new_encoder();
86 Vec::with_capacity(encoder.max_buffer_length_from_utf8_without_replacement(s.len()).unwrap());
87 let EncoderResult::Unmappable(c) = encoder
88 .encode_from_utf8_to_vec_without_replacement(s, &mut buf, true)
93 return Err(Error::NotEncodable { id: s.into(), encoding: encoding.name(), c });
95 if encoded.len() > Self::MAX_LEN {
96 return Err(Error::TooLong { id: s.into(), length: encoded.len(), encoding: encoding.name(), max: Self::MAX_LEN });
98 Ok(Identifier(s.into()))
100 pub fn is_plausible(s: &str) -> Result<(), Error> {
102 return Err(Error::Empty);
104 if is_reserved_word(s) {
105 return Err(Error::Reserved(s.into()));
108 let mut i = s.chars();
109 let first = i.next().unwrap();
110 if !first.may_start_id() {
111 return Err(Error::BadFirstCharacter(s.into(), first));
114 if !c.may_continue_id() {
115 return Err(Error::BadLaterCharacter(s.into(), c));