From 55b94b3e3e34b6d11279c29c1a28f6140162e169 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Mon, 30 Mar 2009 07:47:38 +0800 Subject: [PATCH] Document record 7, subtype 20 in system file format. Add information about the character encoding record to the developer's reference guide. --- doc/dev/system-file-format.texi | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index 3e764c8c..164807b8 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -96,6 +96,7 @@ Each type of record is described separately below. * Variable Display Parameter Record:: * Long Variable Names Record:: * Very Long String Record:: +* Character Encoding Record:: * Data File and Variable Attributes Records:: * Miscellaneous Informational Records:: * Dictionary Termination Record:: @@ -546,9 +547,14 @@ Compression code. Always set to 1. Machine endianness. 1 indicates big-endian, 2 indicates little-endian. @item int32 character_code; +@anchor{character-code} Character code. 1 indicates EBCDIC, 2 indicates 7-bit ASCII, 3 indicates 8-bit ASCII, 4 indicates DEC Kanji. Windows code page numbers are also valid. + +Experience has shown that in many files, this field is ignored or incorrect. +For a more reliable indication of the file's character encoding +see @ref{Character Encoding Record}. @end table @node Machine Floating-Point Info Record @@ -792,6 +798,46 @@ After the last tuple, there may be a single byte 00, or @{00, 09@}. The total length is @code{count} bytes. @end table +@node Character Encoding Record +@section Character Encoding Record + +This record, if present, indicates the character encoding for string data, +long variable names, variable labels, value labels and other strings in the +file. + +@example +/* @r{Header.} */ +int32 rec_type; +int32 subtype; +int32 size; +int32 count; + +/* @r{Exactly @code{count} bytes of data.} */ +char encoding[]; +@end example + +@table @code +@item int32 rec_type; +Record type. Always set to 7. + +@item int32 subtype; +Record subtype. Always set to 20. + +@item int32 size; +The size of each element in the @code{encoding} member. Always set to 1. + +@item int32 count; +The total number of bytes in @code{encoding}. + +@item char encoding[]; +The name of the character encoding. Normally this will be an official IANA characterset name or alias. +See @url{http://www.iana.org/assignments/character-sets}. +@end table + +This record is not present in files generated by older software. +See also @ref{character-code}. + + @node Data File and Variable Attributes Records @section Data File and Variable Attributes Records -- 2.30.2