From b106c9452e2682e7923dc43028cf0e4b353e5443 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Wed, 1 Apr 2009 09:56:49 +0800 Subject: [PATCH] Implemented the SET LOCALE='...' command. Allow the user to set the default character encoding. --- doc/utilities.texi | 35 +++++++++++++++++- src/data/settings.c | 1 + src/language/utilities/set.q | 44 +++++++++++++++++++++++ src/libpspp/i18n.c | 69 ++++++++++++++++++++++++++++++++++++ src/libpspp/i18n.h | 11 +++++- 5 files changed, 158 insertions(+), 2 deletions(-) diff --git a/doc/utilities.texi b/doc/utilities.texi index 95cfa25d..6882aa21 100644 --- a/doc/utilities.texi +++ b/doc/utilities.texi @@ -374,8 +374,10 @@ SET /COMPRESSION=@{ON,OFF@} /SCOMPRESSION=@{ON,OFF@} -(security) +(miscellaneous) /SAFER=ON + /LOCALE='string' + (obsolete settings accepted for compatibility, but ignored) /BOXSTRING=@{'xxx','xxxxxxxxxxx'@} @@ -701,6 +703,37 @@ Be aware that this setting does not guarantee safety (commands can still overwrite files, for instance) but it is an improvement. When set, this setting cannot be reset during the same session, for obvious security reasons. + +@item LOCALE +@cindex locale +@cindex encoding, characters +This item is used to set the default character encoding. +The encoding may be specified either as an encoding name or alias +(see @url{http://www.iana.org/assignments/character-sets}), or +as a locale name. +If given as a locale name, only the character encoding of the +locale is relevant. + +System files written by PSPP will use this encoding. +System files read by PSPP, for which the encoding is unknown, will be +interpreted using this encoding. + +The full list of valid encodings and locale names/alias are operating system +dependent. +The following are all examples of acceptable syntax on common GNU/Linux +systems. +@example + +SET LOCALE='iso-8859-1'. + +SET LOCALE='ru_RU.cp1251'. + +SET LOCALE='japanese'. + +@end example + +Contrary to the intuition, this command does not affect any aspect +of the system's locale. @end table @node SHOW diff --git a/src/data/settings.c b/src/data/settings.c index 94f1f09a..f9c65fc8 100644 --- a/src/data/settings.c +++ b/src/data/settings.c @@ -24,6 +24,7 @@ #include "xalloc.h" #include #include +#include #include "error.h" diff --git a/src/language/utilities/set.q b/src/language/utilities/set.q index 37388f92..e8cdf1aa 100644 --- a/src/language/utilities/set.q +++ b/src/language/utilities/set.q @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -86,6 +87,7 @@ int tgetnum (const char *); journal=custom; log=custom; length=custom; + locale=custom; listing=custom; lowres=lores:auto/on/off; lpi=integer "x>0" "%s must be greater than 0"; @@ -361,6 +363,41 @@ stc_custom_length (struct lexer *lexer, struct dataset *ds UNUSED, struct cmd_se return 1; } +static int +stc_custom_locale (struct lexer *lexer, struct dataset *ds UNUSED, + struct cmd_set *cmd UNUSED, void *aux UNUSED) +{ + const struct string *s; + + lex_match (lexer, '='); + + if ( !lex_force_string (lexer)) + return 0; + + s = lex_tokstr (lexer); + + lex_get (lexer); + + /* First try this string as an encoding name */ + if ( valid_encoding (ds_cstr (s))) + set_default_encoding (ds_cstr (s)); + + /* Now try as a locale name (or alias) */ + else if (set_encoding_from_locale (ds_cstr (s))) + { + } + else + { + msg (ME, _("%s is not a recognised encoding or locale name"), + ds_cstr (s)); + return 0; + } + + return 1; +} + + + static int stc_custom_seed (struct lexer *lexer, struct dataset *ds UNUSED, struct cmd_set *cmd UNUSED, void *aux UNUSED) { @@ -589,6 +626,12 @@ show_length (const struct dataset *ds UNUSED) msg (SN, _("LENGTH is %d."), settings_get_viewlength ()); } +static void +show_locale (const struct dataset *ds UNUSED) +{ + msg (SN, _("LOCALE is %s"), get_default_encoding ()); +} + static void show_mxerrs (const struct dataset *ds UNUSED) { @@ -744,6 +787,7 @@ const struct show_sbc show_table[] = {"ERRORS", show_errors}, {"FORMAT", show_format}, {"LENGTH", show_length}, + {"LOCALE", show_locale}, {"MXERRS", show_mxerrs}, {"MXLOOPS", show_mxloops}, {"MXWARNS", show_mxwarns}, diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index 36215b70..c0459712 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -204,6 +204,62 @@ i18n_init (void) } +const char * +get_default_encoding (void) +{ + return default_encoding; +} + +void +set_default_encoding (const char *enc) +{ + free (default_encoding); + default_encoding = strdup (enc); +} + + +/* Attempts to set the encoding from a locale name + returns true if successfull. + This function does not (should not!) alter the current locale. +*/ +bool +set_encoding_from_locale (const char *loc) +{ + bool ok = true; + char *c_encoding; + char *loc_encoding; + char *tmp = strdup (setlocale (LC_CTYPE, NULL)); + + setlocale (LC_CTYPE, "C"); + c_encoding = strdup (locale_charset ()); + + setlocale (LC_CTYPE, loc); + loc_encoding = strdup (locale_charset ()); + + + if ( 0 == strcmp (loc_encoding, c_encoding)) + { + ok = false; + } + + + setlocale (LC_CTYPE, tmp); + + free (tmp); + + if (ok) + { + free (default_encoding); + default_encoding = loc_encoding; + } + else + free (loc_encoding); + + free (c_encoding); + + return ok; +} + void i18n_done (void) { @@ -220,6 +276,19 @@ i18n_done (void) +bool +valid_encoding (const char *enc) +{ + iconv_t conv = iconv_open ("UTF8", enc); + + if ( conv == (iconv_t) -1) + return false; + + iconv_close (conv); + + return true; +} + /* Return the system local's idea of the decimal seperator character */ diff --git a/src/libpspp/i18n.h b/src/libpspp/i18n.h index c167e1f9..2c30a700 100644 --- a/src/libpspp/i18n.h +++ b/src/libpspp/i18n.h @@ -17,18 +17,27 @@ #ifndef I18N_H #define I18N_H +#include + void i18n_done (void); void i18n_init (void); - #define UTF8 "UTF-8" char * recode_string (const char *to, const char *from, const char *text, int len); +bool valid_encoding (const char *enc); + /* Return the decimal separator according to the system locale */ char get_system_decimal (void); +const char * get_default_encoding (void); +void set_default_encoding (const char *enc); + +bool set_encoding_from_locale (const char *loc); + + #endif /* i18n.h */ -- 2.30.2