From 8a2da340e85f70ce9449554ba42729a56277c24b Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 15 Jun 2011 22:15:10 -0700 Subject: [PATCH] sys-file-writer: Write encoding names in all-uppercase. SPSS appears to write encoding names in all-upperacse, so PSPP should too. --- doc/dev/system-file-format.texi | 5 ++++- src/data/sys-file-writer.c | 27 +++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index 22209a5fcd..84b3f66ff7 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -987,8 +987,11 @@ The size of each element in the @code{encoding} member. Always set to 1. The total number of bytes in @code{encoding}. @item char encoding[]; -The name of the character encoding. Normally this will be an official IANA characterset name or alias. +The name of the character encoding. Normally this will be an official +IANA character set name or alias. See @url{http://www.iana.org/assignments/character-sets}. +Character set names are not case-sensitive, but SPSS appears to write +them in all-uppercase. @end table This record is not present in files generated by older software. See diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index a67cf7dd2f..aeb97d64e3 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -851,20 +851,19 @@ static void write_encoding_record (struct sfm_writer *w, const struct dictionary *d) { - const char *enc = dict_get_encoding (d); - - if ( NULL == enc) - return; - - write_int (w, 7); /* Record type. */ - write_int (w, 20); /* Record subtype. */ - write_int (w, 1); /* Data item (char) size. */ - - /* IANA says "...character set names may be up to 40 characters taken from - the printable characters of US-ASCII," so character set names don't need - to be recoded. */ - write_int (w, strlen (enc)); /* Number of data items. */ - write_string (w, enc, strlen (enc)); + if (dict_get_encoding (d) != NULL) + { + /* IANA says "...character set names may be up to 40 characters taken + from the printable characters of US-ASCII," so character set names + don't need to be recoded to be in UTF-8. + + We convert encoding names to uppercase because SPSS writes encoding + names in uppercase. */ + char *encoding = xstrdup (dict_get_encoding (d)); + str_uppercase (encoding); + write_string_record (w, ss_cstr (encoding), 20); + free (encoding); + } } -- 2.30.2