--- /dev/null
+/* Localization of proper names.
+ Copyright (C) 2006-2008 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2006.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "propername.h"
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_ICONV
+# include <iconv.h>
+#endif
+
+#include "trim.h"
+#include "mbchar.h"
+#if HAVE_MBRTOWC
+# include "mbuiter.h"
+#endif
+#include "localcharset.h"
+#include "c-strcase.h"
+#include "xstriconv.h"
+#include "xalloc.h"
+#include "gettext.h"
+
+
+/* Tests whether STRING contains trim (SUB), starting and ending at word
+ boundaries.
+ Here, instead of implementing Unicode Standard Annex #29 for determining
+ word boundaries, we assume that trim (SUB) starts and ends with words and
+ only test whether the part before it ends with a non-word and the part
+ after it starts with a non-word. */
+static bool
+mbsstr_trimmed_wordbounded (const char *string, const char *sub)
+{
+ char *tsub = trim (sub);
+ bool found = false;
+
+ for (; *string != '\0';)
+ {
+ const char *tsub_in_string = mbsstr (string, tsub);
+ if (tsub_in_string == NULL)
+ break;
+ else
+ {
+#if HAVE_MBRTOWC
+ if (MB_CUR_MAX > 1)
+ {
+ mbui_iterator_t string_iter;
+ bool word_boundary_before;
+ bool word_boundary_after;
+
+ mbui_init (string_iter, string);
+ word_boundary_before = true;
+ if (mbui_cur_ptr (string_iter) < tsub_in_string)
+ {
+ mbchar_t last_char_before_tsub;
+ do
+ {
+ if (!mbui_avail (string_iter))
+ abort ();
+ last_char_before_tsub = mbui_cur (string_iter);
+ mbui_advance (string_iter);
+ }
+ while (mbui_cur_ptr (string_iter) < tsub_in_string);
+ if (mb_isalnum (last_char_before_tsub))
+ word_boundary_before = false;
+ }
+
+ mbui_init (string_iter, tsub_in_string);
+ {
+ mbui_iterator_t tsub_iter;
+
+ for (mbui_init (tsub_iter, tsub);
+ mbui_avail (tsub_iter);
+ mbui_advance (tsub_iter))
+ {
+ if (!mbui_avail (string_iter))
+ abort ();
+ mbui_advance (string_iter);
+ }
+ }
+ word_boundary_after = true;
+ if (mbui_avail (string_iter))
+ {
+ mbchar_t first_char_after_tsub = mbui_cur (string_iter);
+ if (mb_isalnum (first_char_after_tsub))
+ word_boundary_after = false;
+ }
+
+ if (word_boundary_before && word_boundary_after)
+ {
+ found = true;
+ break;
+ }
+
+ mbui_init (string_iter, tsub_in_string);
+ if (!mbui_avail (string_iter))
+ break;
+ string = tsub_in_string + mb_len (mbui_cur (string_iter));
+ }
+ else
+#endif /* HAVE_MBRTOWC */
+ {
+ bool word_boundary_before;
+ const char *p;
+ bool word_boundary_after;
+
+ word_boundary_before = true;
+ if (string < tsub_in_string)
+ if (isalnum ((unsigned char) tsub_in_string[-1]))
+ word_boundary_before = false;
+
+ p = tsub_in_string + strlen (tsub);
+ word_boundary_after = true;
+ if (*p != '\0')
+ if (isalnum ((unsigned char) *p))
+ word_boundary_after = false;
+
+ if (word_boundary_before && word_boundary_after)
+ {
+ found = true;
+ break;
+ }
+
+ if (*tsub_in_string == '\0')
+ break;
+ string = tsub_in_string + 1;
+ }
+ }
+ }
+ free (tsub);
+ return found;
+}
+
+/* Return the localization of NAME. NAME is written in ASCII. */
+
+const char *
+proper_name (const char *name)
+{
+ /* See whether there is a translation. */
+ const char *translation = gettext (name);
+
+ if (translation != name)
+ {
+ /* See whether the translation contains the original name. */
+ if (mbsstr_trimmed_wordbounded (translation, name))
+ return translation;
+ else
+ {
+ /* Return "TRANSLATION (NAME)". */
+ char *result =
+ XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
+
+ sprintf (result, "%s (%s)", translation, name);
+ return result;
+ }
+ }
+ else
+ return name;
+}
+
+/* Return the localization of a name whose original writing is not ASCII.
+ NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
+ escape sequences. NAME_ASCII is a fallback written only with ASCII
+ characters. */
+
+const char *
+proper_name_utf8 (const char *name_ascii, const char *name_utf8)
+{
+ /* See whether there is a translation. */
+ const char *translation = gettext (name_ascii);
+
+ /* Try to convert NAME_UTF8 to the locale encoding. */
+ const char *locale_code = locale_charset ();
+ char *alloc_name_converted = NULL;
+ char *alloc_name_converted_translit = NULL;
+ const char *name_converted = NULL;
+ const char *name_converted_translit = NULL;
+ const char *name;
+
+ if (c_strcasecmp (locale_code, "UTF-8") != 0)
+ {
+#if HAVE_ICONV
+ name_converted = alloc_name_converted =
+ xstr_iconv (name_utf8, "UTF-8", locale_code);
+
+# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \
+ || _LIBICONV_VERSION >= 0x0105
+ {
+ size_t len = strlen (locale_code);
+ char *locale_code_translit = XNMALLOC (len + 10 + 1, char);
+ memcpy (locale_code_translit, locale_code, len);
+ memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1);
+
+ name_converted_translit = alloc_name_converted_translit =
+ xstr_iconv (name_utf8, "UTF-8", locale_code_translit);
+
+ free (locale_code_translit);
+ }
+# endif
+#endif
+ }
+ else
+ {
+ name_converted = name_utf8;
+ name_converted_translit = name_utf8;
+ }
+
+ /* The name in locale encoding. */
+ name = (name_converted != NULL ? name_converted :
+ name_converted_translit != NULL ? name_converted_translit :
+ name_ascii);
+
+ if (translation != name_ascii)
+ {
+ /* See whether the translation contains the original name. */
+ if (mbsstr_trimmed_wordbounded (translation, name_ascii)
+ || (name_converted != NULL
+ && mbsstr_trimmed_wordbounded (translation, name_converted))
+ || (name_converted_translit != NULL
+ && mbsstr_trimmed_wordbounded (translation, name_converted_translit)))
+ {
+ if (alloc_name_converted != NULL)
+ free (alloc_name_converted);
+ if (alloc_name_converted_translit != NULL)
+ free (alloc_name_converted_translit);
+ return translation;
+ }
+ else
+ {
+ /* Return "TRANSLATION (NAME)". */
+ char *result =
+ XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
+
+ sprintf (result, "%s (%s)", translation, name);
+
+ if (alloc_name_converted != NULL)
+ free (alloc_name_converted);
+ if (alloc_name_converted_translit != NULL)
+ free (alloc_name_converted_translit);
+ return result;
+ }
+ }
+ else
+ {
+ if (alloc_name_converted != NULL && alloc_name_converted != name)
+ free (alloc_name_converted);
+ if (alloc_name_converted_translit != NULL
+ && alloc_name_converted_translit != name)
+ free (alloc_name_converted_translit);
+ return name;
+ }
+}
+
+#ifdef TEST
+# include <locale.h>
+int
+main (int argc, char *argv[])
+{
+ setlocale (LC_ALL, "");
+ if (mbsstr_trimmed_wordbounded (argv[1], argv[2]))
+ printf("found\n");
+ return 0;
+}
+#endif
--- /dev/null
+/* Localization of proper names.
+ Copyright (C) 2006, 2008 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2006.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* INTRODUCTION
+
+ What do
+
+ Torbjörn Granlund (coreutils)
+ François Pinard (coreutils)
+ Danilo Šegan (gettext)
+
+ have in common?
+
+ A non-ASCII name. This causes trouble in the --version output. The simple
+ "solution", unfortunately mutilates the name.
+
+ $ du --version| grep Granlund
+ Écrit par Torbjorn Granlund, David MacKenzie, Paul Eggert et Jim Meyering.
+
+ $ ptx --version| grep Pinard
+ Écrit par F. Pinard.
+
+ What is desirable, is to print the full name if the output character set
+ allows it, and the ASCIIfied name only as a fallback.
+
+ $ recode-sr-latin --version
+ ...
+ Written by Danilo Šegan and Bruno Haible.
+
+ $ LC_ALL=C recode-sr-latin --version
+ ...
+ Written by Danilo Segan and Bruno Haible.
+
+ The 'propername' module does exactly this. Plus, for languages that use
+ a different writing system than the Latin alphabet, it allows a translator
+ to write the name using that different writing system. In that case the
+ output will look like this:
+ <translated name> (<original name in English>)
+
+ To use the 'propername' module is done in three simple steps:
+
+ 1) Add it to the list of gnulib modules to import,
+
+ 2) Change the arguments of version_etc, from
+
+ from "Paul Eggert"
+ to proper_name ("Paul Eggert")
+
+ from "Torbjorn Granlund"
+ to proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund")
+
+ from "F. Pinard"
+ to proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard")
+
+ (Optionally, here you can also add / * TRANSLATORS: ... * / comments
+ explaining how the name is written or pronounced.)
+
+ 3) If you are using GNU gettext version 0.16.1 or older, in po/Makevars,
+ in the definition of the XGETTEXT_OPTIONS variable, add:
+
+ --keyword=proper_name:1,"This is a proper name. See the gettext manual, section Names."
+ --keyword=proper_name_utf8:1,"This is a proper name. See the gettext manual, section Names."
+
+ This specifies automatic comments for the translator. (Requires
+ xgettext >= 0.15.)
+ */
+
+#ifndef _PROPERNAME_H
+#define _PROPERNAME_H
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Return the localization of NAME. NAME is written in ASCII. */
+extern const char * proper_name (const char *name);
+
+/* Return the localization of a name whose original writing is not ASCII.
+ NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
+ escape sequences. NAME_ASCII is a fallback written only with ASCII
+ characters. */
+extern const char * proper_name_utf8 (const char *name_ascii,
+ const char *name_utf8);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _PROPERNAME_H */