From 7a9ae16da2aecc9fd0af17e8daf674de2cd59047 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 31 Aug 2023 20:07:45 -0700 Subject: [PATCH] docs --- doc/dev/system-file-format.texi | 88 ++++++++++++++++++--------------- rust/src/cooked.rs | 3 +- 2 files changed, 50 insertions(+), 41 deletions(-) diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index 9d408f8680..d8c1ca3bb8 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -1321,87 +1321,95 @@ record (@pxref{character-code}). The following character encoding names have been observed. The names are shown in lowercase, even though they were not always in lowercase in the file. Alternative names for the same encoding are, when known, -listed together. For each encoding, the @code[character_code} values +listed together. For each encoding, the @code{character_code} values that they were observed paired with are also listed. First, the following are strictly single-byte, ASCII-compatible encodings: -@table @code -@item @r{(encoding record missing)} +@table @asis +@item (encoding record missing) 0, 2, 3, 874, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 20127, 28591, 28592, 28605 -@item ansi_x3.4-1968 -@itemx ascii +@item @code{ansi_x3.4-1968} +@itemx @code{ascii} 1252 -@item cp28605 +@item @code{cp28605} 2 -@item cp874 +@item @code{cp874} 9066 -@item iso-8859-1 +@item @code{iso-8859-1} 819 -@item windows-874 +@item @code{windows-874} 874 -@item windows-1250 +@item @code{windows-1250} 2, 1250, 1252 -@item windows-1251 +@item @code{windows-1251} 2, 1251 -@item cp1252 -@itemx windows-1252 +@item @code{cp1252} +@itemx @code{windows-1252} 2, 1250, 1252, 1253 -@item cp1253 -@itemx windows-1253 +@item @code{cp1253} +@itemx @code{windows-1253} 1253 -@item windows-1254 +@item @code{windows-1254} 2, 1254 -@item windows-1255 +@item @code{windows-1255} 2, 1255 -@Item windows-1256 +@item @code{windows-1256} 2, 1252, 1256 -@item windows-1257 +@item @code{windows-1257} 2, 1257 -@item windows-1258 +@item @code{windows-1258} 1258 @end table -The following are multibyte encodings, in which some code points -occupy a single byte and others multiple bytes. All of the following -encode ASCII characters as their native values, but some of them -(marked as ``not ASCII compatible'') also use ASCII values as second -or later bytes in multibyte sequences: +The others are multibyte encodings, in which some code points occupy a +single byte and others multiple bytes. The following multibyte +encodings are ``ASCII compatible,'' that is, they use ASCII values +only to indicate ASCII: -@table @code -@item @r{(encoding record missing)} -65001, 949 (ASCII compatible) and 932, 936, 950 (not ASCII compatible). +@table @asis +@item (encoding record missing) +65001, 949 -@item big5 -@itemx cp950 -2, 950 (not ASCII compatible) +@item @code{euc-kr} +2, 51949 -@item euc-kr -2, 51949 (ASCII compatible) +@item @code{utf-8} +0, 2, 1250, 1251, 1252, 1256, 65001 +@end table + +The following multibyte encodings are not ASCII compatible, that is, +while they encode ASCII characters as their native values, they also +use ASCII values as second or later bytes in multibyte sequences: + +@table @asis +@item (encoding record missing) +932, 936, 950 -@item gbk -936 (not ASCII compatible) +@item @code{big5} +@itemx @code{cp950} +2, 950 -@item utf-8 -0, 2, 1250, 1251, 1252, 1256, 65001 (ASCII compatible) +@item @code{gbk} +936 -@item cp932 -@itemx windows-31j -932 (not ASCII compatible) +@item @code{cp932} +@itemx @code{windows-31j} +932 @end table As the tables above show, when the character encoding record and the diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index 30430c101e..97ea4906d6 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -155,7 +155,7 @@ pub enum Record { VeryLongStrings(VeryLongStringRecord), FileAttributes(FileAttributeRecord), VariableAttributes(VariableAttributeRecord), - //OtherExtension(Extension), + OtherExtension(Extension), //EndOfHeaders(u32), //ZHeader(ZHeader), //ZTrailer(ZTrailer), @@ -166,6 +166,7 @@ pub use crate::raw::EncodingRecord; pub use crate::raw::FloatInfoRecord; pub use crate::raw::IntegerInfoRecord; pub use crate::raw::NumberOfCasesRecord; +pub use crate::raw::Extension; type DictIndex = usize; -- 2.30.2