From: Ben Pfaff Date: Sun, 9 Aug 2015 08:15:12 +0000 (-0700) Subject: Refine understanding of minor points. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=245cfa40ee078a212b37afef1cdd052087e4f1b4 Refine understanding of minor points. --- diff --git a/dump.c b/dump.c index 9b90fe651d..7665a4ccd0 100644 --- a/dump.c +++ b/dump.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -234,7 +235,7 @@ dump_nested_string(void) } static void -dump_value_31(FILE *stream) +dump_optional_value(FILE *stream) { if (match_byte (0x31)) { @@ -420,7 +421,7 @@ dump_value(FILE *stream, int level) if (match_byte (3)) { char *text = get_string(); - dump_value_31(stream); + dump_optional_value(stream); char *identifier = get_string(); char *text_eng = get_string(); fprintf (stream, "\n", @@ -498,7 +499,7 @@ dump_value(FILE *stream, int level) } else { - dump_value_31(stream); + dump_optional_value(stream); char *base = get_string(); int x = get_u32(); @@ -676,7 +677,6 @@ dump_data(void) static void dump_title(void) { - pos = 0x27; printf ("\n"); dump_value(stdout, 0); match_byte(1); @@ -705,24 +705,17 @@ dump_title(void) else match_byte_assert(0x58); - int n_footnotes = get_u32(); for (int i = 0; i < n_footnotes; i++) { printf ("\n", i); dump_value(stdout, 0); + /* Custom footnote marker string. */ if (match_byte (0x31)) - { - /* Custom footnote marker string. */ - match_byte_assert(3); - get_string(); - match_byte_assert(0x58); - match_u32_assert(0); - get_string(); - } + dump_value(stdout, 0); else match_byte_assert (0x58); - printf("(%d)\n", get_u32()); + get_u32 (); printf ("\n"); } } @@ -810,7 +803,8 @@ dump_fonts(void) int count = get_u32(); pos += 4 * count; - printf ("%s\n", get_string ()); + const char *encoding = get_string(); + printf ("%s\n", encoding); if (!match_u32(0)) match_u32_assert(UINT32_MAX); @@ -829,6 +823,9 @@ dump_fonts(void) } else match_u32_assert(UINT32_MAX); + + int decimal = data[pos]; + int grouping = data[pos + 1]; if (match_byte('.')) { if (!match_byte(',') && !match_byte('\'')) @@ -840,10 +837,14 @@ dump_fonts(void) if (!match_byte('.') && !match_byte(' ')) match_byte_assert(0); } + printf("\n"); if (match_u32(5)) { for (int i = 0; i < 5; i++) - get_string(); + printf("%s\n", 'A' + i, get_string(), 'A' + i); } else match_u32_assert(0); @@ -898,6 +899,7 @@ main(int argc, char *argv[]) } else if (!strcmp(argv[1], "title")) { + pos = 0x27; dump_title(); exit(0); } @@ -929,23 +931,64 @@ main(int argc, char *argv[]) /* This might be a version number of some kind, because value 1 seems to only appear in an SPV file that also required its own weird - special cases in dump_value_31(). */ + special cases in dump_optional_value(). */ version = get_u32(); pos -= 4; if (!match_u32(1)) match_u32_assert(3); match_byte_assert(1); + if (!match_byte(0)) + match_byte_assert(1); + + /* Offset 8. */ + match_byte_assert(0); + match_byte_assert(0); + if (!match_byte(0)) + match_byte_assert(1); + + /* Offset 11. */ + pos++; + match_byte_assert(0); + match_byte_assert(0); + match_byte_assert(0); + + /* Offset 15. */ + pos++; if (!match_byte(0)) match_byte_assert(1); match_byte_assert(0); match_byte_assert(0); + + /* Offset 19. */ + pos++; if (!match_byte(0)) match_byte_assert(1); + match_byte_assert(0); + match_byte_assert(0); + + /* Offset 23. */ pos++; + if (!match_byte(0)) + match_byte_assert(1); match_byte_assert(0); match_byte_assert(0); + + /* Offset 27. */ + pos++; + pos++; + match_byte_assert(0); match_byte_assert(0); + + /* Offset 31. + + This is the tableId, e.g. -4154297861994971133 would be 0xdca00003. + We don't have enough context to validate it. */ + pos += 4; + + /* Offset 35. */ + pos += 4; + dump_title (); dump_fonts(); dump_dims (); diff --git a/spv-file-format.texi b/spv-file-format.texi index 5e95b53a9e..3251e3e38b 100644 --- a/spv-file-format.texi +++ b/spv-file-format.texi @@ -353,3 +353,73 @@ Typical contents (indented for clarity): Always @code{text}. @end table @end table + +@node SPV Light Detail Member Format +@subsection Light Detail Member Format + +A ``light'' detail member @file{.bin} consists of a number of sections +concatenated together, terminated by a byte 01: + +@example +light-member := header title fonts dims data 01 +@end example + +The first section is a 0x27-byte header: + +@example +header := 01 00 version 01 (00 | 01) byte*21 00 00 table-id byte*4 +version := i1 | i3 +table-id := int +@end example + +@code{header} includes @code{version}, a version number that affects +the interpretation of some of the other data in the member. We will +refer to ``version 1'' and ``version 3'' members later on. It also +@code{table-id} is a binary version of @code{tableId} attribute in the +structure member that refers to the detail member. For example, if +@code{tableId} is @code{-4154297861994971133}, then @code{table-id} +would be 0xdca00003. The meaning of the other variable parts of the +header is not known. + +@example +title := value 01? /* @r{localized title} */ + value 01? 31 /* @r{subtype} */ + value 01? 00? 58 /* @r{locale-invariant title} */ + (31 value | 58) /* @r{caption} */ + int[n] footnote*[n] /* @r{footnotes} */ +footnote := value (31 value | 58) byte*4 +@end example + +@example +fonts := 00 font*8 + int[x1] byte*[x1] + int[x2] byte*[x2] + int[x3] byte*[x3] + int[x4] int*[x4] + string /* @r{encoding} */ + (i0 | i-1) (00 | 01) 00 (00 | 01) + int + byte[decimal] byte[grouping] + int[x5] string*[x5] /* @r{custom currency} */ + int[x6] byte*[x6] +@end example + +In every example in the corpus, @code{x1} is 240. The meaning of the +bytes that follow it is unknown. + +In every example in the corpus, @code{x2} is 18 and the bytes that +follow it are @code{00 00 00 01 00 00 00 00 00 00 00 00 00 02 00 00 00 +00}. The meaning of these bytes is unknown. + +Observed values of @code{x3} vary from 16 to 150. The bytes that +follow it vary somewhat. + +Observed values of @code{x4} vary from 0 to 17. Out of 7060 examples +in the corpus, it is nonzero only 36 times. + +@code{decimal} is the decimal point character. The observed values +are @samp{.} and @samp{,}. + +@code{grouping} is the grouping character. The observed values are +@samp{,}, @samp{.}, @samp{'}, @samp{ }, and zero (presumably +indicating that digits should not be grouped).