4 fmt::{Debug, Display, Formatter, Result as FmtResult},
9 use encoding_rs::{EncoderResult, Encoding, UTF_8};
10 use finl_unicode::categories::{CharacterCategories, MajorCategory};
11 use thiserror::Error as ThisError;
14 pub trait IdentifierChar {
15 /// Returns true if `self` may be the first character in an identifier.
16 fn may_start_id(self) -> bool;
18 /// Returns true if `self` may be a second or subsequent character in an
20 fn may_continue_id(self) -> bool;
23 impl IdentifierChar for char {
24 fn may_start_id(self) -> bool {
27 ([L, M, S].contains(&self.get_major_category()) || "@#$".contains(self))
28 && self != char::REPLACEMENT_CHARACTER
31 fn may_continue_id(self) -> bool {
34 ([L, M, S, N].contains(&self.get_major_category()) || "@#$._".contains(self))
35 && self != char::REPLACEMENT_CHARACTER
39 #[derive(Clone, Debug, ThisError)]
41 #[error("Identifier cannot be empty string.")]
44 #[error("\"{0}\" may not be used as an identifier because it is a reserved word.")]
47 #[error("\"{0}\" may not be used as an identifier because it begins with disallowed character \"{1}\".")]
48 BadFirstCharacter(String, char),
50 #[error("\"{0}\" may not be used as an identifier because it contains disallowed character \"{1}\".")]
51 BadLaterCharacter(String, char),
53 #[error("Identifier \"{id}\" is {length} bytes in the encoding in use ({encoding}), which exceeds the {max}-byte limit.")]
57 encoding: &'static str,
61 #[error("\"{id}\" may not be used as an identifier because the encoding in use ({encoding}) cannot represent \"{c}\".")]
64 encoding: &'static str,
69 fn is_reserved_word(s: &str) -> bool {
71 "and", "or", "not", "eq", "ge", "gt", "le", "ne", "all", "by", "to", "with",
73 if s.eq_ignore_ascii_case(word) {
80 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
81 pub struct Identifier(pub UniCase<String>);
84 /// Maximum length of an identifier, in bytes. The limit applies in the
85 /// encoding used by the dictionary, not in UTF-8.
86 pub const MAX_LEN: usize = 64;
88 pub fn new_utf8(s: &str) -> Result<Identifier, Error> {
91 pub fn new(s: &str, encoding: &'static Encoding) -> Result<Identifier, Error> {
92 Self::is_plausible(s)?;
93 let identifier = Identifier(s.into());
94 identifier.check_encoding(encoding)?;
97 /// Checks whether this is a valid identifier in the given `encoding`. An
98 /// identifier that is valid in one encoding might be invalid in another
99 /// because some characters are unencodable or because it is too long.
100 pub fn check_encoding(&self, encoding: &'static Encoding) -> Result<(), Error> {
101 let s = self.0.as_str();
102 let (encoded, _, unencodable) = encoding.encode(s);
104 let mut encoder = encoding.new_encoder();
105 let mut buf = Vec::with_capacity(
107 .max_buffer_length_from_utf8_without_replacement(s.len())
110 let EncoderResult::Unmappable(c) = encoder
111 .encode_from_utf8_to_vec_without_replacement(s, &mut buf, true)
116 return Err(Error::NotEncodable {
118 encoding: encoding.name(),
122 if encoded.len() > Self::MAX_LEN {
123 return Err(Error::TooLong {
125 length: encoded.len(),
126 encoding: encoding.name(),
132 pub fn is_plausible(s: &str) -> Result<(), Error> {
134 return Err(Error::Empty);
136 if is_reserved_word(s) {
137 return Err(Error::Reserved(s.into()));
140 let mut i = s.chars();
141 let first = i.next().unwrap();
142 if !first.may_start_id() {
143 return Err(Error::BadFirstCharacter(s.into(), first));
146 if !c.may_continue_id() {
147 return Err(Error::BadLaterCharacter(s.into(), c));
154 impl Display for Identifier {
155 fn fmt(&self, f: &mut Formatter) -> FmtResult {
156 write!(f, "{}", self.0)
160 pub trait HasIdentifier {
161 fn identifier(&self) -> &Identifier;
164 pub struct ByIdentifier<T>(pub T)
168 impl<T> ByIdentifier<T>
172 pub fn new(inner: T) -> Self {
177 impl<T> PartialEq for ByIdentifier<T>
181 fn eq(&self, other: &Self) -> bool {
182 self.0.identifier().eq(other.0.identifier())
186 impl<T> Eq for ByIdentifier<T> where T: HasIdentifier {}
188 impl<T> PartialOrd for ByIdentifier<T>
192 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
193 Some(self.cmp(other))
197 impl<T> Ord for ByIdentifier<T>
201 fn cmp(&self, other: &Self) -> Ordering {
202 self.0.identifier().cmp(other.0.identifier())
206 impl<T> Hash for ByIdentifier<T>
210 fn hash<H: Hasher>(&self, state: &mut H) {
211 self.0.identifier().hash(state)
215 impl<T> Borrow<Identifier> for ByIdentifier<T>
219 fn borrow(&self) -> &Identifier {
224 impl<T> Debug for ByIdentifier<T>
226 T: HasIdentifier + Debug,
228 fn fmt(&self, f: &mut Formatter) -> FmtResult {
233 impl<T> Clone for ByIdentifier<T>
235 T: HasIdentifier + Clone,
237 fn clone(&self) -> Self {
242 impl<T> Deref for ByIdentifier<T>
244 T: HasIdentifier + Clone,
248 fn deref(&self) -> &Self::Target {