sys-file-writer: Write encoding names in all-uppercase.

author Ben Pfaff <blp@cs.stanford.edu>

Thu, 16 Jun 2011 05:15:10 +0000 (22:15 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Thu, 16 Jun 2011 05:15:10 +0000 (22:15 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Thu, 16 Jun 2011 05:15:10 +0000 (22:15 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Thu, 16 Jun 2011 05:15:10 +0000 (22:15 -0700)
diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi

index 22209a5fcd2183a709d5fbee57f765420499bb2b..84b3f66ff7385afd57c232730648e640e4a67062 100644 (file)
--- a/doc/dev/system-file-format.texi
+++ b/doc/dev/system-file-format.texi
@@ -987,8 +987,11 @@ The size of each element in the @code{encoding} member. Always set to 1.
  The total number of bytes in @code{encoding}.
  
  @item char encoding[];
-The name of the character encoding.  Normally this will be an official IANA characterset name or alias.
+The name of the character encoding.  Normally this will be an official
+IANA character set name or alias.
  See @url{http://www.iana.org/assignments/character-sets}.
+Character set names are not case-sensitive, but SPSS appears to write
+them in all-uppercase.
  @end table
  
  This record is not present in files generated by older software.  See
diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c

index a67cf7dd2f609ef3ce724acde611ad6497ef9ee1..aeb97d64e30c31f685a740b27dc806789acd4377 100644 (file)
--- a/src/data/sys-file-writer.c
+++ b/src/data/sys-file-writer.c
@@ -851,20 +851,19 @@ static void
  write_encoding_record (struct sfm_writer *w,
                        const struct dictionary *d)
  {
-  const char *enc = dict_get_encoding (d);
-
-  if ( NULL == enc)
-    return;
-
-  write_int (w, 7);             /* Record type. */
-  write_int (w, 20);            /* Record subtype. */
-  write_int (w, 1);             /* Data item (char) size. */
-
-  /* IANA says "...character set names may be up to 40 characters taken from
-     the printable characters of US-ASCII," so character set names don't need
-     to be recoded. */
-  write_int (w, strlen (enc));  /* Number of data items. */
-  write_string (w, enc, strlen (enc));
+  if (dict_get_encoding (d) != NULL)
+    {
+      /* IANA says "...character set names may be up to 40 characters taken
+         from the printable characters of US-ASCII," so character set names
+         don't need to be recoded to be in UTF-8.
+
+         We convert encoding names to uppercase because SPSS writes encoding
+         names in uppercase. */
+      char *encoding = xstrdup (dict_get_encoding (d));
+      str_uppercase (encoding);
+      write_string_record (w, ss_cstr (encoding), 20);
+      free (encoding);
+    }
  }
author	Ben Pfaff <blp@cs.stanford.edu>
	Thu, 16 Jun 2011 05:15:10 +0000 (22:15 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Thu, 16 Jun 2011 05:15:10 +0000 (22:15 -0700)
doc/dev/system-file-format.texi		patch \| blob \| history
src/data/sys-file-writer.c		patch \| blob \| history