New module 'propername', moved here from GNU gettext.

author Bruno Haible <bruno@clisp.org>

Sun, 18 May 2008 13:38:15 +0000 (15:38 +0200)

committer Bruno Haible <bruno@clisp.org>

Sun, 18 May 2008 13:38:15 +0000 (15:38 +0200)
author Bruno Haible <bruno@clisp.org>
Sun, 18 May 2008 13:38:15 +0000 (15:38 +0200)
committer Bruno Haible <bruno@clisp.org>
Sun, 18 May 2008 13:38:15 +0000 (15:38 +0200)
diff --git a/ChangeLog b/ChangeLog

index 7c0a840fb9b0406960a720ec7bf903fa0bc07e4a..41ea89df03e06f2a01959b32d7e38af62cf6fef8 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2008-05-18  Bruno Haible  <bruno@clisp.org>
+
+       * modules/propername: New file, from GNU gettext.
+       * lib/propername.h: New file, from GNU gettext.
+       * lib/propername.c: New file, from GNU gettext.
+       * MODULES.html.sh (Internationalization functions): Add propername.
+
  2008-05-16  Jim Meyering  <meyering@redhat.com>
              Bruno Haible  <bruno@clisp.org>
  
diff --git a/MODULES.html.sh b/MODULES.html.sh

index 277d84cf9e0b047ad9e3d0384c52bbfa923de79b..d46cf681daddaadbeb006227b4d141e04496c950 100755 (executable)
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -2382,6 +2382,7 @@ func_all_modules ()
    func_begin_table
    func_module gettext
    func_module gettext-h
+  func_module propername
    func_module iconv
    func_module striconv
    func_module xstriconv
diff --git a/lib/propername.c b/lib/propername.c

new file mode 100644 (file)

index 0000000..0d3681e
--- /dev/null
+++ b/lib/propername.c
@@ -0,0 +1,283 @@
+/* Localization of proper names.
+   Copyright (C) 2006-2008 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2006.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "propername.h"
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_ICONV
+# include <iconv.h>
+#endif
+
+#include "trim.h"
+#include "mbchar.h"
+#if HAVE_MBRTOWC
+# include "mbuiter.h"
+#endif
+#include "localcharset.h"
+#include "c-strcase.h"
+#include "xstriconv.h"
+#include "xalloc.h"
+#include "gettext.h"
+
+
+/* Tests whether STRING contains trim (SUB), starting and ending at word
+   boundaries.
+   Here, instead of implementing Unicode Standard Annex #29 for determining
+   word boundaries, we assume that trim (SUB) starts and ends with words and
+   only test whether the part before it ends with a non-word and the part
+   after it starts with a non-word.  */
+static bool
+mbsstr_trimmed_wordbounded (const char *string, const char *sub)
+{
+  char *tsub = trim (sub);
+  bool found = false;
+
+  for (; *string != '\0';)
+    {
+      const char *tsub_in_string = mbsstr (string, tsub);
+      if (tsub_in_string == NULL)
+       break;
+      else
+       {
+#if HAVE_MBRTOWC
+         if (MB_CUR_MAX > 1)
+           {
+             mbui_iterator_t string_iter;
+             bool word_boundary_before;
+             bool word_boundary_after;
+
+             mbui_init (string_iter, string);
+             word_boundary_before = true;
+             if (mbui_cur_ptr (string_iter) < tsub_in_string)
+               {
+                 mbchar_t last_char_before_tsub;
+                 do
+                   {
+                     if (!mbui_avail (string_iter))
+                       abort ();
+                     last_char_before_tsub = mbui_cur (string_iter);
+                     mbui_advance (string_iter);
+                   }
+                 while (mbui_cur_ptr (string_iter) < tsub_in_string);
+                 if (mb_isalnum (last_char_before_tsub))
+                   word_boundary_before = false;
+               }
+
+             mbui_init (string_iter, tsub_in_string);
+             {
+               mbui_iterator_t tsub_iter;
+
+               for (mbui_init (tsub_iter, tsub);
+                    mbui_avail (tsub_iter);
+                    mbui_advance (tsub_iter))
+                 {
+                   if (!mbui_avail (string_iter))
+                     abort ();
+                   mbui_advance (string_iter);
+                 }
+             }
+             word_boundary_after = true;
+             if (mbui_avail (string_iter))
+               {
+                 mbchar_t first_char_after_tsub = mbui_cur (string_iter);
+                 if (mb_isalnum (first_char_after_tsub))
+                   word_boundary_after = false;
+               }
+
+             if (word_boundary_before && word_boundary_after)
+               {
+                 found = true;
+                 break;
+               }
+
+             mbui_init (string_iter, tsub_in_string);
+             if (!mbui_avail (string_iter))
+               break;
+             string = tsub_in_string + mb_len (mbui_cur (string_iter));
+           }
+         else
+#endif /* HAVE_MBRTOWC */
+           {
+             bool word_boundary_before;
+             const char *p;
+             bool word_boundary_after;
+
+             word_boundary_before = true;
+             if (string < tsub_in_string)
+               if (isalnum ((unsigned char) tsub_in_string[-1]))
+                 word_boundary_before = false;
+
+             p = tsub_in_string + strlen (tsub);
+             word_boundary_after = true;
+             if (*p != '\0')
+               if (isalnum ((unsigned char) *p))
+                 word_boundary_after = false;
+
+             if (word_boundary_before && word_boundary_after)
+               {
+                 found = true;
+                 break;
+               }
+
+             if (*tsub_in_string == '\0')
+               break;
+             string = tsub_in_string + 1;
+           }
+       }
+    }
+  free (tsub);
+  return found;
+}
+
+/* Return the localization of NAME.  NAME is written in ASCII.  */
+
+const char *
+proper_name (const char *name)
+{
+  /* See whether there is a translation.   */
+  const char *translation = gettext (name);
+
+  if (translation != name)
+    {
+      /* See whether the translation contains the original name.  */
+      if (mbsstr_trimmed_wordbounded (translation, name))
+       return translation;
+      else
+       {
+         /* Return "TRANSLATION (NAME)".  */
+         char *result =
+           XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
+
+         sprintf (result, "%s (%s)", translation, name);
+         return result;
+       }
+    }
+  else
+    return name;
+}
+
+/* Return the localization of a name whose original writing is not ASCII.
+   NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
+   escape sequences.  NAME_ASCII is a fallback written only with ASCII
+   characters.  */
+
+const char *
+proper_name_utf8 (const char *name_ascii, const char *name_utf8)
+{
+  /* See whether there is a translation.   */
+  const char *translation = gettext (name_ascii);
+
+  /* Try to convert NAME_UTF8 to the locale encoding.  */
+  const char *locale_code = locale_charset ();
+  char *alloc_name_converted = NULL;
+  char *alloc_name_converted_translit = NULL;
+  const char *name_converted = NULL;
+  const char *name_converted_translit = NULL;
+  const char *name;
+
+  if (c_strcasecmp (locale_code, "UTF-8") != 0)
+    {
+#if HAVE_ICONV
+      name_converted = alloc_name_converted =
+       xstr_iconv (name_utf8, "UTF-8", locale_code);
+
+# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \
+     || _LIBICONV_VERSION >= 0x0105
+      {
+       size_t len = strlen (locale_code);
+       char *locale_code_translit = XNMALLOC (len + 10 + 1, char);
+       memcpy (locale_code_translit, locale_code, len);
+       memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1);
+
+       name_converted_translit = alloc_name_converted_translit =
+         xstr_iconv (name_utf8, "UTF-8", locale_code_translit);
+
+       free (locale_code_translit);
+      }
+# endif
+#endif
+    }
+  else
+    {
+      name_converted = name_utf8;
+      name_converted_translit = name_utf8;
+    }
+
+  /* The name in locale encoding.  */
+  name = (name_converted != NULL ? name_converted :
+         name_converted_translit != NULL ? name_converted_translit :
+         name_ascii);
+
+  if (translation != name_ascii)
+    {
+      /* See whether the translation contains the original name.  */
+      if (mbsstr_trimmed_wordbounded (translation, name_ascii)
+         || (name_converted != NULL
+             && mbsstr_trimmed_wordbounded (translation, name_converted))
+         || (name_converted_translit != NULL
+             && mbsstr_trimmed_wordbounded (translation, name_converted_translit)))
+       {
+         if (alloc_name_converted != NULL)
+           free (alloc_name_converted);
+         if (alloc_name_converted_translit != NULL)
+           free (alloc_name_converted_translit);
+         return translation;
+       }
+      else
+       {
+         /* Return "TRANSLATION (NAME)".  */
+         char *result =
+           XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
+
+         sprintf (result, "%s (%s)", translation, name);
+
+         if (alloc_name_converted != NULL)
+           free (alloc_name_converted);
+         if (alloc_name_converted_translit != NULL)
+           free (alloc_name_converted_translit);
+         return result;
+       }
+    }
+  else
+    {
+      if (alloc_name_converted != NULL && alloc_name_converted != name)
+       free (alloc_name_converted);
+      if (alloc_name_converted_translit != NULL
+         && alloc_name_converted_translit != name)
+       free (alloc_name_converted_translit);
+      return name;
+    }
+}
+
+#ifdef TEST
+# include <locale.h>
+int
+main (int argc, char *argv[])
+{
+  setlocale (LC_ALL, "");
+  if (mbsstr_trimmed_wordbounded (argv[1], argv[2]))
+    printf("found\n");
+  return 0;
+}
+#endif
diff --git a/lib/propername.h b/lib/propername.h

new file mode 100644 (file)

index 0000000..1b0545b
--- /dev/null
+++ b/lib/propername.h
@@ -0,0 +1,105 @@
+/* Localization of proper names.
+   Copyright (C) 2006, 2008 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2006.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* INTRODUCTION
+
+   What do
+
+      Torbjörn Granlund    (coreutils)
+      François Pinard      (coreutils)
+      Danilo Šegan         (gettext)
+
+   have in common?
+
+   A non-ASCII name. This causes trouble in the --version output. The simple
+   "solution", unfortunately mutilates the name.
+
+     $ du --version| grep Granlund
+     Écrit par Torbjorn Granlund, David MacKenzie, Paul Eggert et Jim Meyering.
+
+     $ ptx --version| grep Pinard
+     Écrit par F. Pinard.
+
+   What is desirable, is to print the full name if the output character set
+   allows it, and the ASCIIfied name only as a fallback.
+
+     $ recode-sr-latin --version
+     ...
+     Written by Danilo Šegan and Bruno Haible.
+
+     $ LC_ALL=C recode-sr-latin --version
+     ...
+     Written by Danilo Segan and Bruno Haible.
+
+   The 'propername' module does exactly this. Plus, for languages that use
+   a different writing system than the Latin alphabet, it allows a translator
+   to write the name using that different writing system. In that case the
+   output will look like this:
+      <translated name> (<original name in English>)
+
+   To use the 'propername' module is done in three simple steps:
+
+     1) Add it to the list of gnulib modules to import,
+
+     2) Change the arguments of version_etc, from
+
+          from "Paul Eggert"
+          to   proper_name ("Paul Eggert")
+
+          from "Torbjorn Granlund"
+          to   proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund")
+
+          from "F. Pinard"
+          to   proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard")
+
+        (Optionally, here you can also add / * TRANSLATORS: ... * / comments
+        explaining how the name is written or pronounced.)
+
+     3) If you are using GNU gettext version 0.16.1 or older, in po/Makevars,
+        in the definition of the XGETTEXT_OPTIONS variable, add:
+
+           --keyword=proper_name:1,"This is a proper name. See the gettext manual, section Names."
+           --keyword=proper_name_utf8:1,"This is a proper name. See the gettext manual, section Names."
+
+        This specifies automatic comments for the translator. (Requires
+        xgettext >= 0.15.)
+ */
+
+#ifndef _PROPERNAME_H
+#define _PROPERNAME_H
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Return the localization of NAME.  NAME is written in ASCII.  */
+extern const char * proper_name (const char *name);
+
+/* Return the localization of a name whose original writing is not ASCII.
+   NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
+   escape sequences.  NAME_ASCII is a fallback written only with ASCII
+   characters.  */
+extern const char * proper_name_utf8 (const char *name_ascii,
+                                     const char *name_utf8);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _PROPERNAME_H */
diff --git a/modules/propername b/modules/propername

new file mode 100644 (file)

index 0000000..fb31901
--- /dev/null
+++ b/modules/propername
@@ -0,0 +1,43 @@
+Description:
+Localization of proper names.
+
+Notice:
+If you are using GNU gettext version 0.16.1 or older, add the following options
+to XGETTEXT_OPTIONS in your po/Makevars:
+  --keyword=proper_name:1,'This is a proper name. See the gettext manual, section Names.'
+  --keyword=proper_name_utf8:1,'This is a proper name. See the gettext manual, section Names.'
+
+Files:
+lib/propername.h
+lib/propername.c
+
+Depends-on:
+stdbool
+trim
+mbsstr
+mbchar
+mbuiter
+iconv
+localcharset
+c-strcase
+xstriconv
+xalloc
+gettext-h
+
+configure.ac:
+m4_ifdef([AM_XGETTEXT_OPTION],
+  [AM_XGETTEXT_OPTION([--keyword=proper_name:1,'This is a proper name. See the gettext manual, section Names.'])
+   AM_XGETTEXT_OPTION([--keyword=proper_name_utf8:1,'This is a proper name. See the gettext manual, section Names.'])])
+
+Makefile.am:
+lib_SOURCES += propername.h propername.c
+
+Include:
+"propername.h"
+
+License:
+GPL
+
+Maintainer:
+Bruno Haible
+
author	Bruno Haible <bruno@clisp.org>
	Sun, 18 May 2008 13:38:15 +0000 (15:38 +0200)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 18 May 2008 13:38:15 +0000 (15:38 +0200)
ChangeLog		patch \| blob \| history
MODULES.html.sh		patch \| blob \| history
lib/propername.c	[new file with mode: 0644]	patch \| blob
lib/propername.h	[new file with mode: 0644]	patch \| blob
modules/propername	[new file with mode: 0644]	patch \| blob