1 use std::fmt::{Display, Formatter, Result as FmtResult};
3 use encoding_rs::{EncoderResult, Encoding};
4 use finl_unicode::categories::{CharacterCategories, MajorCategory};
5 use thiserror::Error as ThisError;
8 pub trait IdentifierChar {
9 /// Returns true if `self` may be the first character in an identifier.
10 fn may_start_id(self) -> bool;
12 /// Returns true if `self` may be a second or subsequent character in an
14 fn may_continue_id(self) -> bool;
17 impl IdentifierChar for char {
18 fn may_start_id(self) -> bool {
21 ([L, M, S].contains(&self.get_major_category()) || "@#$".contains(self))
22 && self != char::REPLACEMENT_CHARACTER
25 fn may_continue_id(self) -> bool {
28 ([L, M, S, N].contains(&self.get_major_category()) || "@#$._".contains(self))
29 && self != char::REPLACEMENT_CHARACTER
33 #[derive(Clone, Debug, ThisError)]
35 #[error("Identifier cannot be empty string.")]
38 #[error("\"{0}\" may not be used as an identifier because it is a reserved word.")]
41 #[error("\"{0}\" may not be used as an identifier because it begins with disallowed character \"{1}\".")]
42 BadFirstCharacter(String, char),
44 #[error("\"{0}\" may not be used as an identifier because it contains disallowed character \"{1}\".")]
45 BadLaterCharacter(String, char),
47 #[error("Identifier \"{id}\" is {length} bytes in the encoding in use ({encoding}), which exceeds the {max}-byte limit.")]
51 encoding: &'static str,
55 #[error("\"{id}\" may not be used as an identifier because the encoding in use ({encoding}) cannot represent \"{c}\".")]
58 encoding: &'static str,
63 fn is_reserved_word(s: &str) -> bool {
65 "and", "or", "not", "eq", "ge", "gt", "le", "ne", "all", "by", "to", "with",
67 if s.eq_ignore_ascii_case(word) {
74 #[derive(Clone, PartialEq, Eq, Debug, Hash)]
75 pub struct Identifier(pub UniCase<String>);
78 /// Maximum length of an identifier, in bytes. The limit applies in the
79 /// encoding used by the dictionary, not in UTF-8.
80 pub const MAX_LEN: usize = 64;
82 pub fn new(s: &str, encoding: &'static Encoding) -> Result<Identifier, Error> {
83 Self::is_plausible(s)?;
84 let (encoded, _, unencodable) = encoding.encode(s);
86 let mut encoder = encoding.new_encoder();
87 let mut buf = Vec::with_capacity(
89 .max_buffer_length_from_utf8_without_replacement(s.len())
92 let EncoderResult::Unmappable(c) = encoder
93 .encode_from_utf8_to_vec_without_replacement(s, &mut buf, true)
98 return Err(Error::NotEncodable {
100 encoding: encoding.name(),
104 if encoded.len() > Self::MAX_LEN {
105 return Err(Error::TooLong {
107 length: encoded.len(),
108 encoding: encoding.name(),
112 Ok(Identifier(s.into()))
114 pub fn is_plausible(s: &str) -> Result<(), Error> {
116 return Err(Error::Empty);
118 if is_reserved_word(s) {
119 return Err(Error::Reserved(s.into()));
122 let mut i = s.chars();
123 let first = i.next().unwrap();
124 if !first.may_start_id() {
125 return Err(Error::BadFirstCharacter(s.into(), first));
128 if !c.may_continue_id() {
129 return Err(Error::BadLaterCharacter(s.into(), c));
136 impl Display for Identifier {
137 fn fmt(&self, f: &mut Formatter) -> FmtResult {
138 write!(f, "{}", self.0)