+/* Returns the length, in bytes, of the string that a similar recode_string()
+ call would return. */
+size_t
+recode_string_len (const char *to, const char *from,
+ const char *text, int length)
+{
+ char *s = recode_string (to, from, text, length);
+ size_t len = strlen (s);
+ free (s);
+ return len;
+}
+
+/* Uses CONV to convert the INBYTES starting at IP into the OUTBYTES starting
+ at OP, and appends a null terminator to the output.
+
+ Returns the output length if successful, -1 if the output buffer is too
+ small. */
+static ssize_t
+try_recode (iconv_t conv,
+ const char *ip, size_t inbytes,
+ char *op_, size_t outbytes)
+{
+ /* FIXME: Need to ensure that this char is valid in the target encoding */
+ const char fallbackchar = '?';
+ char *op = op_;
+
+ /* Put the converter into the initial shift state, in case there was any
+ state information left over from its last usage. */
+ iconv (conv, NULL, 0, NULL, 0);
+
+ while (iconv (conv, (ICONV_CONST char **) &ip, &inbytes,
+ &op, &outbytes) == -1)
+ switch (errno)
+ {
+ case EINVAL:
+ if (outbytes < 2)
+ return -1;
+ *op++ = fallbackchar;
+ *op = '\0';
+ return op - op_;
+
+ case EILSEQ:
+ if (outbytes == 0)
+ return -1;
+ *op++ = fallbackchar;
+ outbytes--;
+ ip++;
+ inbytes--;
+ break;
+
+ case E2BIG:
+ return -1;
+
+ default:
+ /* should never happen */
+ fprintf (stderr, "Character conversion error: %s\n", strerror (errno));
+ NOT_REACHED ();
+ break;
+ }
+
+ if (outbytes == 0)
+ return -1;
+
+ *op = '\0';
+ return op - op_;
+}
+
+/* Converts the string TEXT, which should be encoded in FROM-encoding, to a
+ dynamically allocated string in TO-encoding. Any characters which cannot be
+ converted will be represented by '?'.
+
+ LENGTH should be the length of the string or -1, if null terminated.
+
+ The returned string will be allocated on POOL.
+
+ This function's behaviour differs from that of g_convert_with_fallback
+ provided by GLib. The GLib function will fail (returns NULL) if any part of
+ the input string is not valid in the declared input encoding. This function
+ however perseveres even in the presence of badly encoded input. */
+char *
+recode_string_pool (const char *to, const char *from,
+ const char *text, int length, struct pool *pool)
+{
+ struct substring out;
+
+ if ( text == NULL )
+ return NULL;
+
+ if ( length == -1 )
+ length = strlen (text);
+
+ out = recode_substring_pool (to, from, ss_buffer (text, length), pool);
+ return out.string;
+}
+
+/* Returns the name of the encoding that should be used for file names.
+
+ This is meant to be the same encoding used by g_filename_from_uri() and
+ g_filename_to_uri() in GLib. */
+static const char *
+filename_encoding (void)
+{
+#if defined _WIN32 || defined __WIN32__
+ return "UTF-8";
+#else
+ return locale_charset ();
+#endif
+}
+
+static char *
+xconcat2 (const char *a, size_t a_len,
+ const char *b, size_t b_len)
+{
+ char *s = xmalloc (a_len + b_len + 1);
+ memcpy (s, a, a_len);
+ memcpy (s + a_len, b, b_len);
+ s[a_len + b_len] = '\0';
+ return s;
+}
+
+/* Conceptually, this function concatenates HEAD_LEN-byte string HEAD and
+ TAIL_LEN-byte string TAIL, both encoded in UTF-8, then converts them to
+ ENCODING. If the re-encoded result is no more than MAX_LEN bytes long, then
+ it returns HEAD_LEN. Otherwise, it drops one character[*] from the end of
+ HEAD and tries again, repeating as necessary until the concatenated result
+ fits or until HEAD_LEN reaches 0.
+
+ [*] Actually this function drops grapheme clusters instead of characters, so
+ that, e.g. a Unicode character followed by a combining accent character
+ is either completely included or completely excluded from HEAD_LEN. See
+ UAX #29 at http://unicode.org/reports/tr29/ for more information on
+ grapheme clusters.
+
+ A null ENCODING is treated as UTF-8.
+
+ Sometimes this function has to actually construct the concatenated string to
+ measure its length. When this happens, it sets *RESULTP to that
+ null-terminated string, allocated with malloc(), for the caller to use if it
+ needs it. Otherwise, it sets *RESULTP to NULL.
+
+ Simple examples for encoding="UTF-8", max_len=6:
+
+ head="abc", tail="xyz" => 3
+ head="abcd", tail="xyz" => 3 ("d" dropped).
+ head="abc", tail="uvwxyz" => 0 ("abc" dropped).
+ head="abc", tail="tuvwxyz" => 0 ("abc" dropped).
+
+ Examples for encoding="ISO-8859-1", max_len=6:
+
+ head="éèä", tail="xyz" => 6
+ (each letter in head is only 1 byte in ISO-8859-1 even though they
+ each take 2 bytes in UTF-8 encoding)
+*/
+static size_t
+utf8_encoding_concat__ (const char *head, size_t head_len,
+ const char *tail, size_t tail_len,
+ const char *encoding, size_t max_len,
+ char **resultp)
+{
+ *resultp = NULL;
+ if (head_len == 0)
+ return 0;
+ else if (encoding == NULL || !c_strcasecmp (encoding, "UTF-8"))