1 /* Determine name of the currently selected locale.
2 Copyright (C) 1995-1999, 2000-2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU Library General Public License as published
6 by the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
19 /* Written by Ulrich Drepper <drepper@gnu.org>, 1995. */
20 /* Win32 code written by Tor Lillqvist <tml@iki.fi>. */
21 /* MacOS X code written by Bruno Haible <bruno@clisp.org>. */
27 # include "gettextP.h"
29 # include "localename.h"
35 #if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE
37 # include <CoreFoundation/CFString.h>
38 # if HAVE_CFLOCALECOPYCURRENT
39 # include <CoreFoundation/CFLocale.h>
40 # elif HAVE_CFPREFERENCESCOPYAPPVALUE
41 # include <CoreFoundation/CFPreferences.h>
45 #if defined _WIN32 || defined __WIN32__
50 # define WIN32_LEAN_AND_MEAN
52 /* List of language codes, sorted by value:
75 0x17 LANG_RHAETO_ROMANCE
78 0x1a LANG_CROATIAN == LANG_SERBIAN
170 /* Mingw headers don't have latest language and sublanguage codes. */
171 # ifndef LANG_AFRIKAANS
172 # define LANG_AFRIKAANS 0x36
174 # ifndef LANG_ALBANIAN
175 # define LANG_ALBANIAN 0x1c
177 # ifndef LANG_AMHARIC
178 # define LANG_AMHARIC 0x5e
181 # define LANG_ARABIC 0x01
183 # ifndef LANG_ARMENIAN
184 # define LANG_ARMENIAN 0x2b
186 # ifndef LANG_ASSAMESE
187 # define LANG_ASSAMESE 0x4d
190 # define LANG_AZERI 0x2c
193 # define LANG_BASQUE 0x2d
195 # ifndef LANG_BELARUSIAN
196 # define LANG_BELARUSIAN 0x23
198 # ifndef LANG_BENGALI
199 # define LANG_BENGALI 0x45
201 # ifndef LANG_BURMESE
202 # define LANG_BURMESE 0x55
204 # ifndef LANG_CAMBODIAN
205 # define LANG_CAMBODIAN 0x53
207 # ifndef LANG_CATALAN
208 # define LANG_CATALAN 0x03
210 # ifndef LANG_CHEROKEE
211 # define LANG_CHEROKEE 0x5c
214 # define LANG_DIVEHI 0x65
217 # define LANG_EDO 0x66
219 # ifndef LANG_ESTONIAN
220 # define LANG_ESTONIAN 0x25
222 # ifndef LANG_FAEROESE
223 # define LANG_FAEROESE 0x38
226 # define LANG_FARSI 0x29
228 # ifndef LANG_FRISIAN
229 # define LANG_FRISIAN 0x62
231 # ifndef LANG_FULFULDE
232 # define LANG_FULFULDE 0x67
235 # define LANG_GAELIC 0x3c
237 # ifndef LANG_GALICIAN
238 # define LANG_GALICIAN 0x56
240 # ifndef LANG_GEORGIAN
241 # define LANG_GEORGIAN 0x37
243 # ifndef LANG_GUARANI
244 # define LANG_GUARANI 0x74
246 # ifndef LANG_GUJARATI
247 # define LANG_GUJARATI 0x47
250 # define LANG_HAUSA 0x68
252 # ifndef LANG_HAWAIIAN
253 # define LANG_HAWAIIAN 0x75
256 # define LANG_HEBREW 0x0d
259 # define LANG_HINDI 0x39
262 # define LANG_IBIBIO 0x69
265 # define LANG_IGBO 0x70
267 # ifndef LANG_INDONESIAN
268 # define LANG_INDONESIAN 0x21
270 # ifndef LANG_INUKTITUT
271 # define LANG_INUKTITUT 0x5d
273 # ifndef LANG_KANNADA
274 # define LANG_KANNADA 0x4b
277 # define LANG_KANURI 0x71
279 # ifndef LANG_KASHMIRI
280 # define LANG_KASHMIRI 0x60
283 # define LANG_KAZAK 0x3f
285 # ifndef LANG_KONKANI
286 # define LANG_KONKANI 0x57
289 # define LANG_KYRGYZ 0x40
292 # define LANG_LAO 0x54
295 # define LANG_LATIN 0x76
297 # ifndef LANG_LATVIAN
298 # define LANG_LATVIAN 0x26
300 # ifndef LANG_LITHUANIAN
301 # define LANG_LITHUANIAN 0x27
303 # ifndef LANG_MACEDONIAN
304 # define LANG_MACEDONIAN 0x2f
307 # define LANG_MALAY 0x3e
309 # ifndef LANG_MALAYALAM
310 # define LANG_MALAYALAM 0x4c
312 # ifndef LANG_MALTESE
313 # define LANG_MALTESE 0x3a
315 # ifndef LANG_MANIPURI
316 # define LANG_MANIPURI 0x58
318 # ifndef LANG_MARATHI
319 # define LANG_MARATHI 0x4e
321 # ifndef LANG_MONGOLIAN
322 # define LANG_MONGOLIAN 0x50
325 # define LANG_NEPALI 0x61
328 # define LANG_ORIYA 0x48
331 # define LANG_OROMO 0x72
333 # ifndef LANG_PAPIAMENTU
334 # define LANG_PAPIAMENTU 0x79
337 # define LANG_PASHTO 0x63
339 # ifndef LANG_PUNJABI
340 # define LANG_PUNJABI 0x46
342 # ifndef LANG_RHAETO_ROMANCE
343 # define LANG_RHAETO_ROMANCE 0x17
346 # define LANG_SAAMI 0x3b
348 # ifndef LANG_SANSKRIT
349 # define LANG_SANSKRIT 0x4f
351 # ifndef LANG_SERBIAN
352 # define LANG_SERBIAN 0x1a
355 # define LANG_SINDHI 0x59
357 # ifndef LANG_SINHALESE
358 # define LANG_SINHALESE 0x5b
361 # define LANG_SLOVAK 0x1b
364 # define LANG_SOMALI 0x77
366 # ifndef LANG_SORBIAN
367 # define LANG_SORBIAN 0x2e
370 # define LANG_SUTU 0x30
372 # ifndef LANG_SWAHILI
373 # define LANG_SWAHILI 0x41
376 # define LANG_SYRIAC 0x5a
378 # ifndef LANG_TAGALOG
379 # define LANG_TAGALOG 0x64
382 # define LANG_TAJIK 0x28
384 # ifndef LANG_TAMAZIGHT
385 # define LANG_TAMAZIGHT 0x5f
388 # define LANG_TAMIL 0x49
391 # define LANG_TATAR 0x44
394 # define LANG_TELUGU 0x4a
397 # define LANG_THAI 0x1e
399 # ifndef LANG_TIBETAN
400 # define LANG_TIBETAN 0x51
402 # ifndef LANG_TIGRINYA
403 # define LANG_TIGRINYA 0x73
406 # define LANG_TSONGA 0x31
409 # define LANG_TSWANA 0x32
411 # ifndef LANG_TURKMEN
412 # define LANG_TURKMEN 0x42
414 # ifndef LANG_UKRAINIAN
415 # define LANG_UKRAINIAN 0x22
418 # define LANG_URDU 0x20
421 # define LANG_UZBEK 0x43
424 # define LANG_VENDA 0x33
426 # ifndef LANG_VIETNAMESE
427 # define LANG_VIETNAMESE 0x2a
430 # define LANG_WELSH 0x52
433 # define LANG_XHOSA 0x34
436 # define LANG_YI 0x78
438 # ifndef LANG_YIDDISH
439 # define LANG_YIDDISH 0x3d
442 # define LANG_YORUBA 0x6a
445 # define LANG_ZULU 0x35
447 # ifndef SUBLANG_ARABIC_SAUDI_ARABIA
448 # define SUBLANG_ARABIC_SAUDI_ARABIA 0x01
450 # ifndef SUBLANG_ARABIC_IRAQ
451 # define SUBLANG_ARABIC_IRAQ 0x02
453 # ifndef SUBLANG_ARABIC_EGYPT
454 # define SUBLANG_ARABIC_EGYPT 0x03
456 # ifndef SUBLANG_ARABIC_LIBYA
457 # define SUBLANG_ARABIC_LIBYA 0x04
459 # ifndef SUBLANG_ARABIC_ALGERIA
460 # define SUBLANG_ARABIC_ALGERIA 0x05
462 # ifndef SUBLANG_ARABIC_MOROCCO
463 # define SUBLANG_ARABIC_MOROCCO 0x06
465 # ifndef SUBLANG_ARABIC_TUNISIA
466 # define SUBLANG_ARABIC_TUNISIA 0x07
468 # ifndef SUBLANG_ARABIC_OMAN
469 # define SUBLANG_ARABIC_OMAN 0x08
471 # ifndef SUBLANG_ARABIC_YEMEN
472 # define SUBLANG_ARABIC_YEMEN 0x09
474 # ifndef SUBLANG_ARABIC_SYRIA
475 # define SUBLANG_ARABIC_SYRIA 0x0a
477 # ifndef SUBLANG_ARABIC_JORDAN
478 # define SUBLANG_ARABIC_JORDAN 0x0b
480 # ifndef SUBLANG_ARABIC_LEBANON
481 # define SUBLANG_ARABIC_LEBANON 0x0c
483 # ifndef SUBLANG_ARABIC_KUWAIT
484 # define SUBLANG_ARABIC_KUWAIT 0x0d
486 # ifndef SUBLANG_ARABIC_UAE
487 # define SUBLANG_ARABIC_UAE 0x0e
489 # ifndef SUBLANG_ARABIC_BAHRAIN
490 # define SUBLANG_ARABIC_BAHRAIN 0x0f
492 # ifndef SUBLANG_ARABIC_QATAR
493 # define SUBLANG_ARABIC_QATAR 0x10
495 # ifndef SUBLANG_AZERI_LATIN
496 # define SUBLANG_AZERI_LATIN 0x01
498 # ifndef SUBLANG_AZERI_CYRILLIC
499 # define SUBLANG_AZERI_CYRILLIC 0x02
501 # ifndef SUBLANG_BENGALI_INDIA
502 # define SUBLANG_BENGALI_INDIA 0x01
504 # ifndef SUBLANG_BENGALI_BANGLADESH
505 # define SUBLANG_BENGALI_BANGLADESH 0x02
507 # ifndef SUBLANG_CHINESE_MACAU
508 # define SUBLANG_CHINESE_MACAU 0x05
510 # ifndef SUBLANG_ENGLISH_SOUTH_AFRICA
511 # define SUBLANG_ENGLISH_SOUTH_AFRICA 0x07
513 # ifndef SUBLANG_ENGLISH_JAMAICA
514 # define SUBLANG_ENGLISH_JAMAICA 0x08
516 # ifndef SUBLANG_ENGLISH_CARIBBEAN
517 # define SUBLANG_ENGLISH_CARIBBEAN 0x09
519 # ifndef SUBLANG_ENGLISH_BELIZE
520 # define SUBLANG_ENGLISH_BELIZE 0x0a
522 # ifndef SUBLANG_ENGLISH_TRINIDAD
523 # define SUBLANG_ENGLISH_TRINIDAD 0x0b
525 # ifndef SUBLANG_ENGLISH_ZIMBABWE
526 # define SUBLANG_ENGLISH_ZIMBABWE 0x0c
528 # ifndef SUBLANG_ENGLISH_PHILIPPINES
529 # define SUBLANG_ENGLISH_PHILIPPINES 0x0d
531 # ifndef SUBLANG_ENGLISH_INDONESIA
532 # define SUBLANG_ENGLISH_INDONESIA 0x0e
534 # ifndef SUBLANG_ENGLISH_HONGKONG
535 # define SUBLANG_ENGLISH_HONGKONG 0x0f
537 # ifndef SUBLANG_ENGLISH_INDIA
538 # define SUBLANG_ENGLISH_INDIA 0x10
540 # ifndef SUBLANG_ENGLISH_MALAYSIA
541 # define SUBLANG_ENGLISH_MALAYSIA 0x11
543 # ifndef SUBLANG_ENGLISH_SINGAPORE
544 # define SUBLANG_ENGLISH_SINGAPORE 0x12
546 # ifndef SUBLANG_FRENCH_LUXEMBOURG
547 # define SUBLANG_FRENCH_LUXEMBOURG 0x05
549 # ifndef SUBLANG_FRENCH_MONACO
550 # define SUBLANG_FRENCH_MONACO 0x06
552 # ifndef SUBLANG_FRENCH_WESTINDIES
553 # define SUBLANG_FRENCH_WESTINDIES 0x07
555 # ifndef SUBLANG_FRENCH_REUNION
556 # define SUBLANG_FRENCH_REUNION 0x08
558 # ifndef SUBLANG_FRENCH_CONGO
559 # define SUBLANG_FRENCH_CONGO 0x09
561 # ifndef SUBLANG_FRENCH_SENEGAL
562 # define SUBLANG_FRENCH_SENEGAL 0x0a
564 # ifndef SUBLANG_FRENCH_CAMEROON
565 # define SUBLANG_FRENCH_CAMEROON 0x0b
567 # ifndef SUBLANG_FRENCH_COTEDIVOIRE
568 # define SUBLANG_FRENCH_COTEDIVOIRE 0x0c
570 # ifndef SUBLANG_FRENCH_MALI
571 # define SUBLANG_FRENCH_MALI 0x0d
573 # ifndef SUBLANG_FRENCH_MOROCCO
574 # define SUBLANG_FRENCH_MOROCCO 0x0e
576 # ifndef SUBLANG_FRENCH_HAITI
577 # define SUBLANG_FRENCH_HAITI 0x0f
579 # ifndef SUBLANG_GERMAN_LUXEMBOURG
580 # define SUBLANG_GERMAN_LUXEMBOURG 0x04
582 # ifndef SUBLANG_GERMAN_LIECHTENSTEIN
583 # define SUBLANG_GERMAN_LIECHTENSTEIN 0x05
585 # ifndef SUBLANG_KASHMIRI_INDIA
586 # define SUBLANG_KASHMIRI_INDIA 0x02
588 # ifndef SUBLANG_MALAY_MALAYSIA
589 # define SUBLANG_MALAY_MALAYSIA 0x01
591 # ifndef SUBLANG_MALAY_BRUNEI_DARUSSALAM
592 # define SUBLANG_MALAY_BRUNEI_DARUSSALAM 0x02
594 # ifndef SUBLANG_NEPALI_INDIA
595 # define SUBLANG_NEPALI_INDIA 0x02
597 # ifndef SUBLANG_PUNJABI_INDIA
598 # define SUBLANG_PUNJABI_INDIA 0x01
600 # ifndef SUBLANG_PUNJABI_PAKISTAN
601 # define SUBLANG_PUNJABI_PAKISTAN 0x02
603 # ifndef SUBLANG_ROMANIAN_ROMANIA
604 # define SUBLANG_ROMANIAN_ROMANIA 0x01
606 # ifndef SUBLANG_ROMANIAN_MOLDOVA
607 # define SUBLANG_ROMANIAN_MOLDOVA 0x02
609 # ifndef SUBLANG_SERBIAN_LATIN
610 # define SUBLANG_SERBIAN_LATIN 0x02
612 # ifndef SUBLANG_SERBIAN_CYRILLIC
613 # define SUBLANG_SERBIAN_CYRILLIC 0x03
615 # ifndef SUBLANG_SINDHI_PAKISTAN
616 # define SUBLANG_SINDHI_PAKISTAN 0x01
618 # ifndef SUBLANG_SINDHI_AFGHANISTAN
619 # define SUBLANG_SINDHI_AFGHANISTAN 0x02
621 # ifndef SUBLANG_SPANISH_GUATEMALA
622 # define SUBLANG_SPANISH_GUATEMALA 0x04
624 # ifndef SUBLANG_SPANISH_COSTA_RICA
625 # define SUBLANG_SPANISH_COSTA_RICA 0x05
627 # ifndef SUBLANG_SPANISH_PANAMA
628 # define SUBLANG_SPANISH_PANAMA 0x06
630 # ifndef SUBLANG_SPANISH_DOMINICAN_REPUBLIC
631 # define SUBLANG_SPANISH_DOMINICAN_REPUBLIC 0x07
633 # ifndef SUBLANG_SPANISH_VENEZUELA
634 # define SUBLANG_SPANISH_VENEZUELA 0x08
636 # ifndef SUBLANG_SPANISH_COLOMBIA
637 # define SUBLANG_SPANISH_COLOMBIA 0x09
639 # ifndef SUBLANG_SPANISH_PERU
640 # define SUBLANG_SPANISH_PERU 0x0a
642 # ifndef SUBLANG_SPANISH_ARGENTINA
643 # define SUBLANG_SPANISH_ARGENTINA 0x0b
645 # ifndef SUBLANG_SPANISH_ECUADOR
646 # define SUBLANG_SPANISH_ECUADOR 0x0c
648 # ifndef SUBLANG_SPANISH_CHILE
649 # define SUBLANG_SPANISH_CHILE 0x0d
651 # ifndef SUBLANG_SPANISH_URUGUAY
652 # define SUBLANG_SPANISH_URUGUAY 0x0e
654 # ifndef SUBLANG_SPANISH_PARAGUAY
655 # define SUBLANG_SPANISH_PARAGUAY 0x0f
657 # ifndef SUBLANG_SPANISH_BOLIVIA
658 # define SUBLANG_SPANISH_BOLIVIA 0x10
660 # ifndef SUBLANG_SPANISH_EL_SALVADOR
661 # define SUBLANG_SPANISH_EL_SALVADOR 0x11
663 # ifndef SUBLANG_SPANISH_HONDURAS
664 # define SUBLANG_SPANISH_HONDURAS 0x12
666 # ifndef SUBLANG_SPANISH_NICARAGUA
667 # define SUBLANG_SPANISH_NICARAGUA 0x13
669 # ifndef SUBLANG_SPANISH_PUERTO_RICO
670 # define SUBLANG_SPANISH_PUERTO_RICO 0x14
672 # ifndef SUBLANG_SWEDISH_FINLAND
673 # define SUBLANG_SWEDISH_FINLAND 0x02
675 # ifndef SUBLANG_TAMAZIGHT_ARABIC
676 # define SUBLANG_TAMAZIGHT_ARABIC 0x01
678 # ifndef SUBLANG_TAMAZIGHT_ALGERIA_LATIN
679 # define SUBLANG_TAMAZIGHT_ALGERIA_LATIN 0x02
681 # ifndef SUBLANG_TIGRINYA_ETHIOPIA
682 # define SUBLANG_TIGRINYA_ETHIOPIA 0x01
684 # ifndef SUBLANG_TIGRINYA_ERITREA
685 # define SUBLANG_TIGRINYA_ERITREA 0x02
687 # ifndef SUBLANG_URDU_PAKISTAN
688 # define SUBLANG_URDU_PAKISTAN 0x01
690 # ifndef SUBLANG_URDU_INDIA
691 # define SUBLANG_URDU_INDIA 0x02
693 # ifndef SUBLANG_UZBEK_LATIN
694 # define SUBLANG_UZBEK_LATIN 0x01
696 # ifndef SUBLANG_UZBEK_CYRILLIC
697 # define SUBLANG_UZBEK_CYRILLIC 0x02
701 # if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE
702 /* MacOS X 10.2 or newer */
704 /* Canonicalize a MacOS X locale name to a Unix locale name.
705 NAME is a sufficiently large buffer.
706 On input, it contains the MacOS X locale name.
707 On output, it contains the Unix locale name. */
708 # if !defined IN_LIBINTL
712 gl_locale_name_canonicalize (char *name)
714 /* This conversion is based on a posting by
715 Deborah GoldSmith <goldsmit@apple.com> on 2005-03-08,
716 http://lists.apple.com/archives/carbon-dev/2005/Mar/msg00293.html */
718 /* Convert legacy (NeXTstep inherited) English names to Unix (ISO 639 and
719 ISO 3166) names. Prior to MacOS X 10.3, there is no API for doing this.
720 Therefore we do it ourselves, using a table based on the results of the
721 MacOS X 10.3.8 function
722 CFLocaleCreateCanonicalLocaleIdentifierFromString(). */
723 typedef struct { const char legacy[21+1]; const char unixy[5+1]; }
725 static const legacy_entry legacy_table[] = {
726 { "Afrikaans", "af" },
727 { "Albanian", "sq" },
730 { "Armenian", "hy" },
731 { "Assamese", "as" },
733 { "Azerbaijani", "az" },
735 { "Belarusian", "be" },
736 { "Belorussian", "be" },
738 { "Brazilian Portugese", "pt_BR" },
739 { "Brazilian Portuguese", "pt_BR" },
741 { "Bulgarian", "bg" },
743 { "Byelorussian", "be" },
746 { "Chichewa", "ny" },
748 { "Chinese, Simplified", "zh_CN" },
749 { "Chinese, Traditional", "zh_TW" },
750 { "Chinese, Tradtional", "zh_TW" },
751 { "Croatian", "hr" },
755 { "Dzongkha", "dz" },
757 { "Esperanto", "eo" },
758 { "Estonian", "et" },
762 { "Flemish", "nl_BE" },
764 { "Galician", "gl" },
765 { "Gallegan", "gl" },
766 { "Georgian", "ka" },
769 { "Greenlandic", "kl" },
771 { "Gujarati", "gu" },
772 { "Hawaiian", "haw" }, /* Yes, "haw", not "cpe". */
775 { "Hungarian", "hu" },
776 { "Icelandic", "is" },
777 { "Indonesian", "id" },
778 { "Inuktitut", "iu" },
781 { "Japanese", "ja" },
782 { "Javanese", "jv" },
783 { "Kalaallisut", "kl" },
785 { "Kashmiri", "ks" },
788 { "Kinyarwanda", "rw" },
794 { "Lithuanian", "lt" },
795 { "Macedonian", "mk" },
796 { "Malagasy", "mg" },
798 { "Malayalam", "ml" },
802 { "Moldavian", "mo" },
803 { "Mongolian", "mn" },
805 { "Norwegian", "nb" }, /* Yes, "nb", not the obsolete "no". */
814 { "Portuguese", "pt" },
815 { "Portuguese, Brazilian", "pt_BR" },
819 { "Romanian", "ro" },
823 { "Sami", "se_NO" }, /* Not just "se". */
824 { "Sanskrit", "sa" },
825 { "Scottish", "gd" },
827 { "Simplified Chinese", "zh_CN" },
829 { "Sinhalese", "si" },
831 { "Slovenian", "sl" },
834 { "Sundanese", "su" },
845 { "Tigrinya", "ti" },
847 { "Traditional Chinese", "zh_TW" },
851 { "Ukrainian", "uk" },
854 { "Vietnamese", "vi" },
859 /* Convert new-style locale names with language tags (ISO 639 and ISO 15924)
860 to Unix (ISO 639 and ISO 3166) names. */
861 typedef struct { const char langtag[7+1]; const char unixy[12+1]; }
863 static const langtag_entry langtag_table[] = {
864 /* MacOS X has "az-Arab", "az-Cyrl", "az-Latn".
865 The default script for az on Unix is Latin. */
867 /* MacOS X has "ga-dots". Does not yet exist on Unix. */
869 /* MacOS X has "kk-Cyrl". Does not yet exist on Unix. */
870 /* MacOS X has "mn-Cyrl", "mn-Mong".
871 The default script for mn on Unix is Cyrillic. */
873 /* MacOS X has "ms-Arab", "ms-Latn".
874 The default script for ms on Unix is Latin. */
876 /* MacOS X has "tg-Cyrl".
877 The default script for tg on Unix is Cyrillic. */
879 /* MacOS X has "tk-Cyrl". Does not yet exist on Unix. */
880 /* MacOS X has "tt-Cyrl".
881 The default script for tt on Unix is Cyrillic. */
883 /* MacOS X has "zh-Hans", "zh-Hant".
884 Country codes are used to distinguish these on Unix. */
885 { "zh-Hans", "zh_CN" },
886 { "zh-Hant", "zh_TW" }
889 /* Convert script names (ISO 15924) to Unix conventions.
890 See http://www.unicode.org/iso15924/iso15924-codes.html */
891 typedef struct { const char script[4+1]; const char unixy[9+1]; }
893 static const script_entry script_table[] = {
894 { "Arab", "arabic" },
895 { "Cyrl", "cyrillic" },
896 { "Mong", "mongolian" }
899 /* Step 1: Convert using legacy_table. */
900 if (name[0] >= 'A' && name[0] <= 'Z')
904 i2 = sizeof (legacy_table) / sizeof (legacy_entry);
907 /* At this point we know that if name occurs in legacy_table,
908 its index must be >= i1 and < i2. */
909 unsigned int i = (i1 + i2) >> 1;
910 const legacy_entry *p = &legacy_table[i];
911 if (strcmp (name, p->legacy) < 0)
916 if (strcmp (name, legacy_table[i1].legacy) == 0)
918 strcpy (name, legacy_table[i1].unixy);
923 /* Step 2: Convert using langtag_table and script_table. */
924 if (strlen (name) == 7 && name[2] == '-')
928 i2 = sizeof (langtag_table) / sizeof (langtag_entry);
931 /* At this point we know that if name occurs in langtag_table,
932 its index must be >= i1 and < i2. */
933 unsigned int i = (i1 + i2) >> 1;
934 const langtag_entry *p = &langtag_table[i];
935 if (strcmp (name, p->langtag) < 0)
940 if (strcmp (name, langtag_table[i1].langtag) == 0)
942 strcpy (name, langtag_table[i1].unixy);
947 i2 = sizeof (script_table) / sizeof (script_entry);
950 /* At this point we know that if (name + 3) occurs in script_table,
951 its index must be >= i1 and < i2. */
952 unsigned int i = (i1 + i2) >> 1;
953 const script_entry *p = &script_table[i];
954 if (strcmp (name + 3, p->script) < 0)
959 if (strcmp (name + 3, script_table[i1].script) == 0)
962 strcpy (name + 3, script_table[i1].unixy);
967 /* Step 3: Convert new-style dash to Unix underscore. */
970 for (p = name; *p != '\0'; p++)
978 /* XPG3 defines the result of 'setlocale (category, NULL)' as:
979 "Directs 'setlocale()' to query 'category' and return the current
981 However it does not specify the exact format. Neither do SUSV2 and
982 ISO C 99. So we can use this feature only on selected systems (e.g.
983 those using GNU C Library). */
984 #if defined _LIBC || (defined __GLIBC__ && __GLIBC__ >= 2)
985 # define HAVE_LOCALE_NULL
988 /* Determine the current locale's name, and canonicalize it into XPG syntax
989 language[_territory][.codeset][@modifier]
990 The codeset part in the result is not reliable; the locale_charset()
991 should be used for codeset information instead.
992 The result must not be freed; it is statically allocated. */
995 gl_locale_name_posix (int category, const char *categoryname)
997 /* Use the POSIX methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'.
998 On some systems this can be done by the 'setlocale' function itself. */
999 #if defined HAVE_SETLOCALE && defined HAVE_LC_MESSAGES && defined HAVE_LOCALE_NULL
1000 return setlocale (category, NULL);
1004 /* Setting of LC_ALL overrides all other. */
1005 retval = getenv ("LC_ALL");
1006 if (retval != NULL && retval[0] != '\0')
1008 /* Next comes the name of the desired category. */
1009 retval = getenv (categoryname);
1010 if (retval != NULL && retval[0] != '\0')
1012 /* Last possibility is the LANG environment variable. */
1013 retval = getenv ("LANG");
1014 if (retval != NULL && retval[0] != '\0')
1022 gl_locale_name_default (void)
1025 "All implementations shall define a locale as the default locale, to be
1026 invoked when no environment variables are set, or set to the empty
1027 string. This default locale can be the POSIX locale or any other
1028 implementation-defined locale. Some implementations may provide
1029 facilities for local installation administrators to set the default
1030 locale, customizing it for each location. POSIX:2001 does not require
1033 #if !(HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE || defined(WIN32_NATIVE))
1035 /* The system does not have a way of setting the locale, other than the
1036 POSIX specified environment variables. We use C as default locale. */
1041 /* Return an XPG style locale name language[_territory][@modifier].
1042 Don't even bother determining the codeset; it's not useful in this
1043 context, because message catalogs are not specific to a single
1046 # if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE
1047 /* MacOS X 10.2 or newer */
1049 /* Cache the locale name, since CoreFoundation calls are expensive. */
1050 static const char *cached_localename;
1052 if (cached_localename == NULL)
1055 # if HAVE_CFLOCALECOPYCURRENT /* MacOS X 10.3 or newer */
1056 CFLocaleRef locale = CFLocaleCopyCurrent ();
1057 CFStringRef name = CFLocaleGetIdentifier (locale);
1059 if (CFStringGetCString (name, namebuf, sizeof(namebuf),
1060 kCFStringEncodingASCII))
1062 gl_locale_name_canonicalize (namebuf);
1063 cached_localename = strdup (namebuf);
1066 # elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.2 or newer */
1068 CFPreferencesCopyAppValue (CFSTR ("AppleLocale"),
1069 kCFPreferencesCurrentApplication);
1071 && CFGetTypeID (value) == CFStringGetTypeID ()
1072 && CFStringGetCString ((CFStringRef)value, namebuf, sizeof(namebuf),
1073 kCFStringEncodingASCII))
1075 gl_locale_name_canonicalize (namebuf);
1076 cached_localename = strdup (namebuf);
1079 if (cached_localename == NULL)
1080 cached_localename = "C";
1082 return cached_localename;
1087 # if defined(WIN32_NATIVE) /* WIN32, not Cygwin */
1093 /* Use native Win32 API locale ID. */
1094 lcid = GetThreadLocale ();
1096 /* Strip off the sorting rules, keep only the language part. */
1097 langid = LANGIDFROMLCID (lcid);
1099 /* Split into language and territory part. */
1100 primary = PRIMARYLANGID (langid);
1101 sub = SUBLANGID (langid);
1103 /* Dispatch on language.
1104 See also http://www.unicode.org/unicode/onlinedat/languages.html .
1105 For details about languages, see http://www.ethnologue.com/ . */
1108 case LANG_AFRIKAANS: return "af_ZA";
1109 case LANG_ALBANIAN: return "sq_AL";
1110 case LANG_AMHARIC: return "am_ET";
1114 case SUBLANG_ARABIC_SAUDI_ARABIA: return "ar_SA";
1115 case SUBLANG_ARABIC_IRAQ: return "ar_IQ";
1116 case SUBLANG_ARABIC_EGYPT: return "ar_EG";
1117 case SUBLANG_ARABIC_LIBYA: return "ar_LY";
1118 case SUBLANG_ARABIC_ALGERIA: return "ar_DZ";
1119 case SUBLANG_ARABIC_MOROCCO: return "ar_MA";
1120 case SUBLANG_ARABIC_TUNISIA: return "ar_TN";
1121 case SUBLANG_ARABIC_OMAN: return "ar_OM";
1122 case SUBLANG_ARABIC_YEMEN: return "ar_YE";
1123 case SUBLANG_ARABIC_SYRIA: return "ar_SY";
1124 case SUBLANG_ARABIC_JORDAN: return "ar_JO";
1125 case SUBLANG_ARABIC_LEBANON: return "ar_LB";
1126 case SUBLANG_ARABIC_KUWAIT: return "ar_KW";
1127 case SUBLANG_ARABIC_UAE: return "ar_AE";
1128 case SUBLANG_ARABIC_BAHRAIN: return "ar_BH";
1129 case SUBLANG_ARABIC_QATAR: return "ar_QA";
1132 case LANG_ARMENIAN: return "hy_AM";
1133 case LANG_ASSAMESE: return "as_IN";
1137 /* FIXME: Adjust this when Azerbaijani locales appear on Unix. */
1138 case SUBLANG_AZERI_LATIN: return "az_AZ@latin";
1139 case SUBLANG_AZERI_CYRILLIC: return "az_AZ@cyrillic";
1145 case SUBLANG_DEFAULT: return "eu_ES";
1147 return "eu"; /* Ambiguous: could be "eu_ES" or "eu_FR". */
1148 case LANG_BELARUSIAN: return "be_BY";
1152 case SUBLANG_BENGALI_INDIA: return "bn_IN";
1153 case SUBLANG_BENGALI_BANGLADESH: return "bn_BD";
1156 case LANG_BULGARIAN: return "bg_BG";
1157 case LANG_BURMESE: return "my_MM";
1158 case LANG_CAMBODIAN: return "km_KH";
1159 case LANG_CATALAN: return "ca_ES";
1160 case LANG_CHEROKEE: return "chr_US";
1164 case SUBLANG_CHINESE_TRADITIONAL: return "zh_TW";
1165 case SUBLANG_CHINESE_SIMPLIFIED: return "zh_CN";
1166 case SUBLANG_CHINESE_HONGKONG: return "zh_HK";
1167 case SUBLANG_CHINESE_SINGAPORE: return "zh_SG";
1168 case SUBLANG_CHINESE_MACAU: return "zh_MO";
1171 case LANG_CROATIAN: /* LANG_CROATIAN == LANG_SERBIAN
1172 * What used to be called Serbo-Croatian
1173 * should really now be two separate
1174 * languages because of political reasons.
1175 * (Says tml, who knows nothing about Serbian
1177 * (I can feel those flames coming already.)
1181 case SUBLANG_DEFAULT: return "hr_HR";
1182 case SUBLANG_SERBIAN_LATIN: return "sr_CS";
1183 case SUBLANG_SERBIAN_CYRILLIC: return "sr_CS@cyrillic";
1186 case LANG_CZECH: return "cs_CZ";
1187 case LANG_DANISH: return "da_DK";
1188 case LANG_DIVEHI: return "dv_MV";
1192 case SUBLANG_DUTCH: return "nl_NL";
1193 case SUBLANG_DUTCH_BELGIAN: /* FLEMISH, VLAAMS */ return "nl_BE";
1196 case LANG_EDO: return "bin_NG";
1200 /* SUBLANG_ENGLISH_US == SUBLANG_DEFAULT. Heh. I thought
1201 * English was the language spoken in England.
1204 case SUBLANG_ENGLISH_US: return "en_US";
1205 case SUBLANG_ENGLISH_UK: return "en_GB";
1206 case SUBLANG_ENGLISH_AUS: return "en_AU";
1207 case SUBLANG_ENGLISH_CAN: return "en_CA";
1208 case SUBLANG_ENGLISH_NZ: return "en_NZ";
1209 case SUBLANG_ENGLISH_EIRE: return "en_IE";
1210 case SUBLANG_ENGLISH_SOUTH_AFRICA: return "en_ZA";
1211 case SUBLANG_ENGLISH_JAMAICA: return "en_JM";
1212 case SUBLANG_ENGLISH_CARIBBEAN: return "en_GD"; /* Grenada? */
1213 case SUBLANG_ENGLISH_BELIZE: return "en_BZ";
1214 case SUBLANG_ENGLISH_TRINIDAD: return "en_TT";
1215 case SUBLANG_ENGLISH_ZIMBABWE: return "en_ZW";
1216 case SUBLANG_ENGLISH_PHILIPPINES: return "en_PH";
1217 case SUBLANG_ENGLISH_INDONESIA: return "en_ID";
1218 case SUBLANG_ENGLISH_HONGKONG: return "en_HK";
1219 case SUBLANG_ENGLISH_INDIA: return "en_IN";
1220 case SUBLANG_ENGLISH_MALAYSIA: return "en_MY";
1221 case SUBLANG_ENGLISH_SINGAPORE: return "en_SG";
1224 case LANG_ESTONIAN: return "et_EE";
1225 case LANG_FAEROESE: return "fo_FO";
1226 case LANG_FARSI: return "fa_IR";
1227 case LANG_FINNISH: return "fi_FI";
1231 case SUBLANG_FRENCH: return "fr_FR";
1232 case SUBLANG_FRENCH_BELGIAN: /* WALLOON */ return "fr_BE";
1233 case SUBLANG_FRENCH_CANADIAN: return "fr_CA";
1234 case SUBLANG_FRENCH_SWISS: return "fr_CH";
1235 case SUBLANG_FRENCH_LUXEMBOURG: return "fr_LU";
1236 case SUBLANG_FRENCH_MONACO: return "fr_MC";
1237 case SUBLANG_FRENCH_WESTINDIES: return "fr"; /* Caribbean? */
1238 case SUBLANG_FRENCH_REUNION: return "fr_RE";
1239 case SUBLANG_FRENCH_CONGO: return "fr_CG";
1240 case SUBLANG_FRENCH_SENEGAL: return "fr_SN";
1241 case SUBLANG_FRENCH_CAMEROON: return "fr_CM";
1242 case SUBLANG_FRENCH_COTEDIVOIRE: return "fr_CI";
1243 case SUBLANG_FRENCH_MALI: return "fr_ML";
1244 case SUBLANG_FRENCH_MOROCCO: return "fr_MA";
1245 case SUBLANG_FRENCH_HAITI: return "fr_HT";
1248 case LANG_FRISIAN: return "fy_NL";
1250 /* Spoken in Nigeria, Guinea, Senegal, Mali, Niger, Cameroon, Benin. */
1255 case 0x01: /* SCOTTISH */ return "gd_GB";
1256 case 0x02: /* IRISH */ return "ga_IE";
1259 case LANG_GALICIAN: return "gl_ES";
1260 case LANG_GEORGIAN: return "ka_GE";
1264 case SUBLANG_GERMAN: return "de_DE";
1265 case SUBLANG_GERMAN_SWISS: return "de_CH";
1266 case SUBLANG_GERMAN_AUSTRIAN: return "de_AT";
1267 case SUBLANG_GERMAN_LUXEMBOURG: return "de_LU";
1268 case SUBLANG_GERMAN_LIECHTENSTEIN: return "de_LI";
1271 case LANG_GREEK: return "el_GR";
1272 case LANG_GUARANI: return "gn_PY";
1273 case LANG_GUJARATI: return "gu_IN";
1274 case LANG_HAUSA: return "ha_NG";
1276 /* FIXME: Do they mean Hawaiian ("haw_US", 1000 speakers)
1277 or Hawaii Creole English ("cpe_US", 600000 speakers)? */
1279 case LANG_HEBREW: return "he_IL";
1280 case LANG_HINDI: return "hi_IN";
1281 case LANG_HUNGARIAN: return "hu_HU";
1282 case LANG_IBIBIO: return "nic_NG";
1283 case LANG_ICELANDIC: return "is_IS";
1284 case LANG_IGBO: return "ig_NG";
1285 case LANG_INDONESIAN: return "id_ID";
1286 case LANG_INUKTITUT: return "iu_CA";
1290 case SUBLANG_ITALIAN: return "it_IT";
1291 case SUBLANG_ITALIAN_SWISS: return "it_CH";
1294 case LANG_JAPANESE: return "ja_JP";
1295 case LANG_KANNADA: return "kn_IN";
1296 case LANG_KANURI: return "kr_NG";
1300 case SUBLANG_DEFAULT: return "ks_PK";
1301 case SUBLANG_KASHMIRI_INDIA: return "ks_IN";
1304 case LANG_KAZAK: return "kk_KZ";
1306 /* FIXME: Adjust this when such locales appear on Unix. */
1308 case LANG_KOREAN: return "ko_KR";
1309 case LANG_KYRGYZ: return "ky_KG";
1310 case LANG_LAO: return "lo_LA";
1311 case LANG_LATIN: return "la_VA";
1312 case LANG_LATVIAN: return "lv_LV";
1313 case LANG_LITHUANIAN: return "lt_LT";
1314 case LANG_MACEDONIAN: return "mk_MK";
1318 case SUBLANG_MALAY_MALAYSIA: return "ms_MY";
1319 case SUBLANG_MALAY_BRUNEI_DARUSSALAM: return "ms_BN";
1322 case LANG_MALAYALAM: return "ml_IN";
1323 case LANG_MALTESE: return "mt_MT";
1325 /* FIXME: Adjust this when such locales appear on Unix. */
1327 case LANG_MARATHI: return "mr_IN";
1328 case LANG_MONGOLIAN:
1331 case SUBLANG_DEFAULT: return "mn_MN";
1333 return "mn"; /* Ambiguous: could be "mn_CN" or "mn_MN". */
1337 case SUBLANG_DEFAULT: return "ne_NP";
1338 case SUBLANG_NEPALI_INDIA: return "ne_IN";
1341 case LANG_NORWEGIAN:
1344 case SUBLANG_NORWEGIAN_BOKMAL: return "nb_NO";
1345 case SUBLANG_NORWEGIAN_NYNORSK: return "nn_NO";
1348 case LANG_ORIYA: return "or_IN";
1349 case LANG_OROMO: return "om_ET";
1350 case LANG_PAPIAMENTU: return "pap_AN";
1352 return "ps"; /* Ambiguous: could be "ps_PK" or "ps_AF". */
1353 case LANG_POLISH: return "pl_PL";
1354 case LANG_PORTUGUESE:
1357 case SUBLANG_PORTUGUESE: return "pt_PT";
1358 /* Hmm. SUBLANG_PORTUGUESE_BRAZILIAN == SUBLANG_DEFAULT.
1359 Same phenomenon as SUBLANG_ENGLISH_US == SUBLANG_DEFAULT. */
1360 case SUBLANG_PORTUGUESE_BRAZILIAN: return "pt_BR";
1366 case SUBLANG_PUNJABI_INDIA: return "pa_IN"; /* Gurmukhi script */
1367 case SUBLANG_PUNJABI_PAKISTAN: return "pa_PK"; /* Arabic script */
1370 case LANG_RHAETO_ROMANCE: return "rm_CH";
1374 case SUBLANG_ROMANIAN_ROMANIA: return "ro_RO";
1375 case SUBLANG_ROMANIAN_MOLDOVA: return "ro_MD";
1381 case SUBLANG_DEFAULT: return "ru_RU";
1383 return "ru"; /* Ambiguous: could be "ru_RU" or "ru_UA" or "ru_MD". */
1384 case LANG_SAAMI: /* actually Northern Sami */ return "se_NO";
1385 case LANG_SANSKRIT: return "sa_IN";
1389 case SUBLANG_SINDHI_PAKISTAN: return "sd_PK";
1390 case SUBLANG_SINDHI_AFGHANISTAN: return "sd_AF";
1393 case LANG_SINHALESE: return "si_LK";
1394 case LANG_SLOVAK: return "sk_SK";
1395 case LANG_SLOVENIAN: return "sl_SI";
1396 case LANG_SOMALI: return "so_SO";
1398 /* FIXME: Adjust this when such locales appear on Unix. */
1403 case SUBLANG_SPANISH: return "es_ES";
1404 case SUBLANG_SPANISH_MEXICAN: return "es_MX";
1405 case SUBLANG_SPANISH_MODERN:
1406 return "es_ES@modern"; /* not seen on Unix */
1407 case SUBLANG_SPANISH_GUATEMALA: return "es_GT";
1408 case SUBLANG_SPANISH_COSTA_RICA: return "es_CR";
1409 case SUBLANG_SPANISH_PANAMA: return "es_PA";
1410 case SUBLANG_SPANISH_DOMINICAN_REPUBLIC: return "es_DO";
1411 case SUBLANG_SPANISH_VENEZUELA: return "es_VE";
1412 case SUBLANG_SPANISH_COLOMBIA: return "es_CO";
1413 case SUBLANG_SPANISH_PERU: return "es_PE";
1414 case SUBLANG_SPANISH_ARGENTINA: return "es_AR";
1415 case SUBLANG_SPANISH_ECUADOR: return "es_EC";
1416 case SUBLANG_SPANISH_CHILE: return "es_CL";
1417 case SUBLANG_SPANISH_URUGUAY: return "es_UY";
1418 case SUBLANG_SPANISH_PARAGUAY: return "es_PY";
1419 case SUBLANG_SPANISH_BOLIVIA: return "es_BO";
1420 case SUBLANG_SPANISH_EL_SALVADOR: return "es_SV";
1421 case SUBLANG_SPANISH_HONDURAS: return "es_HN";
1422 case SUBLANG_SPANISH_NICARAGUA: return "es_NI";
1423 case SUBLANG_SPANISH_PUERTO_RICO: return "es_PR";
1426 case LANG_SUTU: return "bnt_TZ"; /* or "st_LS" or "nso_ZA"? */
1427 case LANG_SWAHILI: return "sw_KE";
1431 case SUBLANG_DEFAULT: return "sv_SE";
1432 case SUBLANG_SWEDISH_FINLAND: return "sv_FI";
1435 case LANG_SYRIAC: return "syr_TR"; /* An extinct language. */
1436 case LANG_TAGALOG: return "tl_PH";
1437 case LANG_TAJIK: return "tg_TJ";
1438 case LANG_TAMAZIGHT:
1441 /* FIXME: Adjust this when Tamazight locales appear on Unix. */
1442 case SUBLANG_TAMAZIGHT_ARABIC: return "ber_MA@arabic";
1443 case SUBLANG_TAMAZIGHT_ALGERIA_LATIN: return "ber_DZ@latin";
1449 case SUBLANG_DEFAULT: return "ta_IN";
1451 return "ta"; /* Ambiguous: could be "ta_IN" or "ta_LK" or "ta_SG". */
1452 case LANG_TATAR: return "tt_RU";
1453 case LANG_TELUGU: return "te_IN";
1454 case LANG_THAI: return "th_TH";
1455 case LANG_TIBETAN: return "bo_CN";
1459 case SUBLANG_TIGRINYA_ETHIOPIA: return "ti_ET";
1460 case SUBLANG_TIGRINYA_ERITREA: return "ti_ER";
1463 case LANG_TSONGA: return "ts_ZA";
1464 case LANG_TSWANA: return "tn_BW";
1465 case LANG_TURKISH: return "tr_TR";
1466 case LANG_TURKMEN: return "tk_TM";
1467 case LANG_UKRAINIAN: return "uk_UA";
1471 case SUBLANG_URDU_PAKISTAN: return "ur_PK";
1472 case SUBLANG_URDU_INDIA: return "ur_IN";
1478 case SUBLANG_UZBEK_LATIN: return "uz_UZ";
1479 case SUBLANG_UZBEK_CYRILLIC: return "uz_UZ@cyrillic";
1482 case LANG_VENDA: return "ve_ZA";
1483 case LANG_VIETNAMESE: return "vi_VN";
1484 case LANG_WELSH: return "cy_GB";
1485 case LANG_XHOSA: return "xh_ZA";
1486 case LANG_YI: return "sit_CN";
1487 case LANG_YIDDISH: return "yi_IL";
1488 case LANG_YORUBA: return "yo_NG";
1489 case LANG_ZULU: return "zu_ZA";
1490 default: return "C";
1498 gl_locale_name (int category, const char *categoryname)
1502 retval = gl_locale_name_posix (category, categoryname);
1506 return gl_locale_name_default ();