+/* Concatenates a prefix of HEAD with all of TAIL and returns the result as a
+ null-terminated string owned by the caller. HEAD, TAIL, and the returned
+ string are all encoded in UTF-8. As many characters[*] from the beginning
+ of HEAD are included as will fit within MAX_LEN bytes supposing that the
+ resulting string were to be re-encoded in ENCODING. All of TAIL is always
+ included, even if TAIL by itself is longer than MAX_LEN in ENCODING.
+
+ [*] Actually this function drops grapheme clusters instead of characters, so
+ that, e.g. a Unicode character followed by a combining accent character
+ is either completely included or completely excluded from the returned
+ string. See UAX #29 at http://unicode.org/reports/tr29/ for more
+ information on grapheme clusters.
+
+ A null ENCODING is treated as UTF-8.
+
+ Simple examples for encoding="UTF-8", max_len=6:
+
+ head="abc", tail="xyz" => "abcxyz"
+ head="abcd", tail="xyz" => "abcxyz"
+ head="abc", tail="uvwxyz" => "uvwxyz"
+ head="abc", tail="tuvwxyz" => "tuvwxyz"
+
+ Examples for encoding="ISO-8859-1", max_len=6:
+
+ head="éèä", tail="xyz" => "éèäxyz"
+ (each letter in HEAD is only 1 byte in ISO-8859-1 even though they
+ each take 2 bytes in UTF-8 encoding)
+*/
+char *
+utf8_encoding_concat (const char *head, const char *tail,
+ const char *encoding, size_t max_len)
+{
+ size_t tail_len = strlen (tail);
+ size_t prefix_len;
+ char *result;
+
+ prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
+ encoding, max_len, &result);
+ return (result != NULL
+ ? result
+ : xconcat2 (head, prefix_len, tail, tail_len));
+}
+
+/* Returns the length, in bytes, of the string that would be returned by
+ utf8_encoding_concat() if passed the same arguments, but the implementation
+ is often more efficient. */
+size_t
+utf8_encoding_concat_len (const char *head, const char *tail,
+ const char *encoding, size_t max_len)
+{
+ size_t tail_len = strlen (tail);
+ size_t prefix_len;
+ char *result;
+
+ prefix_len = utf8_encoding_concat__ (head, strlen (head), tail, tail_len,
+ encoding, max_len, &result);
+ free (result);
+ return prefix_len + tail_len;
+}
+
+/* Returns an allocated, null-terminated string, owned by the caller,
+ containing as many characters[*] from the beginning of S that would fit
+ within MAX_LEN bytes if the returned string were to be re-encoded in
+ ENCODING. Both S and the returned string are encoded in UTF-8.
+
+ [*] Actually this function drops grapheme clusters instead of characters, so
+ that, e.g. a Unicode character followed by a combining accent character
+ is either completely included or completely excluded from the returned
+ string. See UAX #29 at http://unicode.org/reports/tr29/ for more
+ information on grapheme clusters.
+
+ A null ENCODING is treated as UTF-8.
+*/
+char *
+utf8_encoding_trunc (const char *s, const char *encoding, size_t max_len)
+{
+ return utf8_encoding_concat (s, "", encoding, max_len);
+}