Merge 'master' into 'psppsheet'.

[pspp] / doc / dev / system-file-format.texi
diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi

index 3f82a4628e0918e1797e6c23a0670e42e0a5e8dc..8315762cef52cb26ea20f1aa2757ca3d6f622554 100644 (file)
--- a/doc/dev/system-file-format.texi
+++ b/doc/dev/system-file-format.texi
@@ -60,7 +60,8 @@ if present.
  Document record, if present.
  
  @item
-Any records not explicitly included in this list, in any order.
+Extension (type 7) records, in ascending numerical order of their
+subtypes.
  
  @item
  Dictionary termination record.
@@ -114,7 +115,9 @@ char                padding[3];
  
  @table @code
  @item char rec_type[4];
-Record type code, set to @samp{$FL2}.
+Record type code, set to @samp{$FL2}, that is, either @code{24 46 4c
+32} if the file uses an ASCII-based character encoding, or @code{5b c6
+d3 f2} if the file uses an EBCDIC-based character encoding.
  
  @item char prod_name[60];
  Product identification string.  This always begins with the characters
@@ -390,6 +393,11 @@ Format types are defined as follows:
  @end multitable
  @end quotation
  
+A few system files have been observed in the wild with invalid
+@code{write} fields, in particular with value 0.  Readers should
+probably treat invalid @code{print} or @code{write} fields as some
+default format.
+
  @node Value Labels Records
  @section Value Labels Records
  
@@ -553,6 +561,9 @@ Machine endianness.  1 indicates big-endian, 2 indicates little-endian.
  been actually observed in system files:
  
  @table @asis
+@item 1
+EBCDIC.
+
  @item 2
  7-bit ASCII.
  
@@ -573,9 +584,6 @@ UTF-8.
  The following additional values are known to be defined:
  
  @table @asis
-@item 1
-EBCDIC.
-
  @item 3
  8-bit ``ASCII''.
  
@@ -585,9 +593,10 @@ DEC Kanji.
  
  Other Windows code page numbers are known to be generally valid.
  
-Old versions of SPSS always wrote value 2 in this field, regardless of
-the encoding in use.  Newer versions also write the character encoding
-as a string (see @ref{Character Encoding Record}).
+Old versions of SPSS for Unix and Windows always wrote value 2 in this
+field, regardless of the encoding in use.  Newer versions also write
+the character encoding as a string (see @ref{Character Encoding
+Record}).
  @end table
  
  @node Machine Floating-Point Info Record
@@ -675,7 +684,8 @@ following:
  
  @itemize @bullet
  @item
-The set's name (an identifier that begins with @samp{$}).
+The set's name (an identifier that begins with @samp{$}), in mixed
+upper and lower case.
  
  @item
  An equals sign (@samp{=}).
@@ -716,8 +726,8 @@ written if LABELSOURCE=VARLABEL was specified.
  A space.
  
  @item
-The names of the variables in the set, each separated from the
-previous by a single space.
+The short names of the variables in the set, converted to lowercase,
+each separated from the previous by a single space.
  
  @item
  A line feed (byte 0x0a).
@@ -986,8 +996,11 @@ The size of each element in the @code{encoding} member. Always set to 1.
  The total number of bytes in @code{encoding}.
  
  @item char encoding[];
-The name of the character encoding.  Normally this will be an official IANA characterset name or alias.
+The name of the character encoding.  Normally this will be an official
+IANA character set name or alias.
  See @url{http://www.iana.org/assignments/character-sets}.
+Character set names are not case-sensitive, but SPSS appears to write
+them in all-uppercase.
  @end table
  
  This record is not present in files generated by older software.  See
@@ -1132,8 +1145,8 @@ element.
  In record type 18, this field contains a sequence of one or more
  variable attribute sets.  If more than one variable attribute set is
  present, each one after the first is delimited from the previous by
-@code{/}.  Each variable attribute set consists of a (potentially
-long) variable name,
+@code{/}.  Each variable attribute set consists of a long
+variable name,
  followed by @code{:}, followed by an attribute set with the same
  syntax as on record type 17.