From: Ben Pfaff <blp@cs.stanford.edu>
Date: Thu, 16 Jun 2011 05:15:10 +0000 (-0700)
Subject: sys-file-writer: Write encoding names in all-uppercase.
X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fbuilds%2F20110616030507%2Fpspp;p=pspp

sys-file-writer: Write encoding names in all-uppercase.

SPSS appears to write encoding names in all-upperacse, so PSPP
should too.
---

diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi
index 22209a5fcd..84b3f66ff7 100644
--- a/doc/dev/system-file-format.texi
+++ b/doc/dev/system-file-format.texi
@@ -987,8 +987,11 @@ The size of each element in the @code{encoding} member. Always set to 1.
 The total number of bytes in @code{encoding}.
 
 @item char encoding[];
-The name of the character encoding.  Normally this will be an official IANA characterset name or alias.
+The name of the character encoding.  Normally this will be an official
+IANA character set name or alias.
 See @url{http://www.iana.org/assignments/character-sets}.
+Character set names are not case-sensitive, but SPSS appears to write
+them in all-uppercase.
 @end table
 
 This record is not present in files generated by older software.  See
diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c
index a67cf7dd2f..aeb97d64e3 100644
--- a/src/data/sys-file-writer.c
+++ b/src/data/sys-file-writer.c
@@ -851,20 +851,19 @@ static void
 write_encoding_record (struct sfm_writer *w,
 		       const struct dictionary *d)
 {
-  const char *enc = dict_get_encoding (d);
-
-  if ( NULL == enc)
-    return;
-
-  write_int (w, 7);             /* Record type. */
-  write_int (w, 20);            /* Record subtype. */
-  write_int (w, 1);             /* Data item (char) size. */
-
-  /* IANA says "...character set names may be up to 40 characters taken from
-     the printable characters of US-ASCII," so character set names don't need
-     to be recoded. */
-  write_int (w, strlen (enc));  /* Number of data items. */
-  write_string (w, enc, strlen (enc));
+  if (dict_get_encoding (d) != NULL)
+    {
+      /* IANA says "...character set names may be up to 40 characters taken
+         from the printable characters of US-ASCII," so character set names
+         don't need to be recoded to be in UTF-8.
+
+         We convert encoding names to uppercase because SPSS writes encoding
+         names in uppercase. */
+      char *encoding = xstrdup (dict_get_encoding (d));
+      str_uppercase (encoding);
+      write_string_record (w, ss_cstr (encoding), 20);
+      free (encoding);
+    }
 }