From 371cb01fb5467917de7e90fd49872f15a8e1490e Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 27 May 2017 22:21:53 -0700 Subject: [PATCH] Figure out more parts of format. --- dump.c | 91 ++++++++++++++++++++++++++++++++++++++------ spv-file-format.texi | 33 ++++++++-------- 2 files changed, 97 insertions(+), 27 deletions(-) diff --git a/dump.c b/dump.c index 212421cb80..12ee73ebdf 100644 --- a/dump.c +++ b/dump.c @@ -130,6 +130,27 @@ match_u32_assert(uint32_t x, const char *where) } #define match_u32_assert(x) match_u32_assert(x, WHERE) +static bool __attribute__((unused)) +match_u64(uint64_t x) +{ + if (get_u64() == x) + return true; + pos -= 8; + return false; +} + +static void __attribute__((unused)) +match_u64_assert(uint64_t x, const char *where) +{ + unsigned long long int y = get_u64(); + if (x != y) + { + fprintf(stderr, "%s: 0x%x: expected u64:%llu, got u64:%llu\n", where, pos - 8, x, y); + exit(1); + } +} +#define match_u64_assert(x) match_u64_assert(x, WHERE) + static bool __attribute__((unused)) match_be32(uint32_t x) { @@ -320,18 +341,17 @@ hex_dump(int ofs, int n) int c = data[ofs + i]; #if 1 if (i && !(i % 16)) - fprintf(stderr, "-"); + printf("-"); else - fprintf(stderr, " "); + printf(" "); #endif - fprintf(stderr, "%02x", c); + printf("%02x", c); } for (int i = 0; i < n; i++) { int c = data[ofs + i]; - fprintf(stderr, "%c", c >= 32 && c < 127 ? c : '.'); + printf("%c", c >= 32 && c < 127 ? c : '.'); } - fprintf(stderr, "\n"); } static char * @@ -996,7 +1016,7 @@ dump_fonts(void) } bool grid = get_byte(); pos += 3; - printf(" \n", grid ? "yes" : "no"); + printf(" \n", grid ? "yes" : "no"); printf("\n"); assert(pos == x1_end); @@ -1020,7 +1040,11 @@ dump_fonts(void) if (!get_byte()) printf(" footnoteposition=\"subscript\""); get_byte(); - pos += get_be32(); + int nbytes = get_be32(); + printf("\n"); + hex_dump(pos, nbytes); + printf("\n"); + pos += nbytes; get_string_be(); char *look = get_string_be(); if (look[0]) @@ -1029,8 +1053,16 @@ dump_fonts(void) } pos = x3_end; + /* Manual column widths, if present. */ int count = get_u32(); - pos += 4 * count; + printf(""); + for (int i = 0; i < count; i++) + { + if (i) + putchar(' '); + printf("%d", get_u32()); + } + printf("\n"); const char *locale = get_string(); printf ("%s\n", locale); @@ -1076,7 +1108,35 @@ dump_fonts(void) int outer_end = get_end(); /* First inner envelope: byte*33 int[n] int*[n]. */ - pos = get_end(); + int inner_len = get_u32(); + int inner_end = pos + inner_len; + int array_start = pos + 33; + match_byte_assert(0); + pos++; /* 0, 1, 10 seen. */ + match_byte_assert(0); + pos++; /* 0...11 seen. */ + if (!match_byte(0) && !match_byte(1) && !match_byte(2)) + match_byte_assert(3); + if (!match_byte(0) && !match_byte(2)) + match_byte_assert(3); + if (!match_u64(0)) + match_u64_assert(UINT64_MAX); + match_u32_assert(0); + match_u32_assert(0); + match_u32_assert(0); + match_u32_assert(0); + match_byte_assert(0); + if (!match_byte(0)) + match_byte_assert(1); + match_byte_assert(1); + pos = array_start; +#if 1 + printf("widths:"); + while (pos < inner_end) + printf(" %d", get_u32()); + printf("\n"); +#endif + pos = inner_end;; /* Second inner envelope. */ assert(get_end() == outer_end); @@ -1117,9 +1177,9 @@ dump_fonts(void) match_byte_assert(0); } - pos += 8; - match_byte_assert(1); + printf ("small: %g\n", get_double()); + match_byte_assert(1); if (outer_end - pos > 6) { /* There might be a pair of strings representing a dataset and @@ -1158,7 +1218,7 @@ dump_fonts(void) else match_u32_assert(0); - match_byte_assert(0x2e); + match_byte_assert('.'); if (!match_byte(0)) match_byte_assert(1); @@ -1301,10 +1361,17 @@ main(int argc, char *argv[]) match_byte_assert(0); /* Offset 27. */ +#if 1 pos++; pos++; pos++; pos++; +#else + printf("\nheader:"); + for (int i = 0; i < 4; i++) + printf(" %02x", data[pos++]); + printf("\n"); +#endif /* Offset 31. */ printf("%lld", get_u64()); diff --git a/spv-file-format.texi b/spv-file-format.texi index c5d327407f..6c690920ce 100644 --- a/spv-file-format.texi +++ b/spv-file-format.texi @@ -572,7 +572,7 @@ concatenated together, terminated by a byte 01: LightMember @result{} Header Title Caption Footnotes - Fonts Formats Borders PrintSettings TableSettings + Fonts Borders PrintSettings TableSettings Formats Dimensions Data 01 @end format @@ -583,7 +583,7 @@ The following sections go into more detail. @menu * SPV Light Member Header:: * SPV Light Member Title:: -* PSV Light Member Caption:: +* SPV Light Member Caption:: * SPV Light Member Footnotes:: * SPV Light Member Fonts:: * SPV Light Member Borders:: @@ -648,7 +648,7 @@ appropriate for presentation, and localized to the user's language, well formatted. For example, for a frequency table, @code{title1} and @code{title2} name the variable and @code{c} is simply ``Frequencies''. -@node PSV Light Member Caption +@node SPV Light Member Caption @subsection Caption @cartouche @@ -753,8 +753,8 @@ endianness. @code{show-grid-lines} is 1 to draw grid lines, otherwise 0. Each Border describes one kind of border. @code{n-borders} seems to -always be 19. Each @code{border-type} appears once in order, and they -correspond to the following borders: +always be 19. Each @code{border-type} appears once (although in an +unpredictable order) and correspond to the following borders: @table @asis @item 0 @@ -886,32 +886,35 @@ TableSettings ends with an arbitrary number of null bytes. @cartouche @format Formats @result{} - int[@t{n4}] int*[@t{n4}] + int[@t{nwidths}] int*[@t{nwidths}] string[@t{encoding}] - (i0 @math{|} i-1) (00 @math{|} 01) 00 (00 @math{|} 01) - int + int (00 @math{|} 01) 00 (00 @math{|} 01) + int[@t{epoch}] byte[@t{decimal}] byte[@t{grouping}] - int[@t{n-ccs}] string*[@t{n-ccs}] + CustomCurrency v1(i0) v3(count(count(X5) count(X6))) +CustomCurrency @result{} int[@t{n-ccs}] string*[@t{n-ccs}] + X5 @result{} byte*33 int[@t{n}] int*[@t{n}] X6 @result{} 01 00 (03 @math{|} 04) 00 00 00 string[@t{command}] string[@t{subcommand}] string[@t{language}] string[@t{charset}] string[@t{locale}] - (00 @math{|} 01) 00 (00 @math{|} 01) (00 @math{|} 01) + (00 @math{|} 01) 00 bool bool int[@t{epoch}] byte[@t{decimal}] byte[@t{grouping}] - (2d 43 1c eb e2 36 1a 3f | 00*8) 01 + double[@t{small}] 01 (string[@t{dataset}] string[@t{datafile}] i0 int[@t{date}] i0)? - int[@t{n-ccs}] string*[@t{n-ccs}] - 2e (00 @math{|} 01) (i2000000 i0)? + CustomCurrency + byte[@t{missing}] bool (i2000000 i0)? @end format @end cartouche -Observed values of @code{n4} vary from 0 to 17. Out of 7,060 examples -in the corpus, it is nonzero only 36 times. +If @code{nwidths} is nonzero, then the accompanying integers are +column widths as manually adjusted by the user. (Row heights are +computed automatically based on the widths.) @code{encoding} is a character encoding, usually a Windows code page such as @code{en_US.windows-1252} or @code{it_IT.windows-1252}. The -- 2.30.2