From: Ben Pfaff Date: Thu, 16 Jun 2011 05:15:10 +0000 (-0700) Subject: sys-file-writer: Write encoding names in all-uppercase. X-Git-Tag: v0.7.9~280 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8a2da340e85f70ce9449554ba42729a56277c24b;p=pspp-builds.git sys-file-writer: Write encoding names in all-uppercase. SPSS appears to write encoding names in all-upperacse, so PSPP should too. --- diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index 22209a5f..84b3f66f 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -987,8 +987,11 @@ The size of each element in the @code{encoding} member. Always set to 1. The total number of bytes in @code{encoding}. @item char encoding[]; -The name of the character encoding. Normally this will be an official IANA characterset name or alias. +The name of the character encoding. Normally this will be an official +IANA character set name or alias. See @url{http://www.iana.org/assignments/character-sets}. +Character set names are not case-sensitive, but SPSS appears to write +them in all-uppercase. @end table This record is not present in files generated by older software. See diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index a67cf7dd..aeb97d64 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -851,20 +851,19 @@ static void write_encoding_record (struct sfm_writer *w, const struct dictionary *d) { - const char *enc = dict_get_encoding (d); - - if ( NULL == enc) - return; - - write_int (w, 7); /* Record type. */ - write_int (w, 20); /* Record subtype. */ - write_int (w, 1); /* Data item (char) size. */ - - /* IANA says "...character set names may be up to 40 characters taken from - the printable characters of US-ASCII," so character set names don't need - to be recoded. */ - write_int (w, strlen (enc)); /* Number of data items. */ - write_string (w, enc, strlen (enc)); + if (dict_get_encoding (d) != NULL) + { + /* IANA says "...character set names may be up to 40 characters taken + from the printable characters of US-ASCII," so character set names + don't need to be recoded to be in UTF-8. + + We convert encoding names to uppercase because SPSS writes encoding + names in uppercase. */ + char *encoding = xstrdup (dict_get_encoding (d)); + str_uppercase (encoding); + write_string_record (w, ss_cstr (encoding), 20); + free (encoding); + } }