From 9eeb4188822b51ae29568c666caed7f57d350194 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 4 Jul 2018 12:11:21 -0700 Subject: [PATCH] Refine understanding of legacy binary data format. --- dump.c | 7 ++- dump2.c | 116 ++++++++++++++++++++++++++++++++---------------- parse-all-heavy | 11 +++-- 3 files changed, 90 insertions(+), 44 deletions(-) diff --git a/dump.c b/dump.c index 62e9307d7c..7f3c2b8c75 100644 --- a/dump.c +++ b/dump.c @@ -508,6 +508,7 @@ dump_value(FILE *stream, int level) for (int i = 0; i <= level; i++) fprintf (stream, " "); + printf ("%02x: value (%d)\n", pos, data[pos]); if (match_byte (1)) { unsigned int format; @@ -589,6 +590,7 @@ dump_value(FILE *stream, int level) } else { + printf ("else %#x\n", pos); dump_value_modifier(stream); char *base = get_string(); @@ -1008,7 +1010,8 @@ dump_fonts(void) printf ("%d\n", get_u32()); if (!match_byte(0)) match_byte_assert(1); - match_byte_assert(0); + if (!match_byte(0)) + match_byte_assert(1); if (!match_byte(0)) match_byte_assert(1); printf("%d\n", get_u32()); @@ -1117,7 +1120,7 @@ dump_fonts(void) printf("%s\n", get_string()); get_bool(); - match_byte_assert(0); + get_bool(); get_bool(); get_bool(); diff --git a/dump2.c b/dump2.c index 740cbbedaa..4bd86e1b98 100644 --- a/dump2.c +++ b/dump2.c @@ -16,6 +16,8 @@ static size_t n, pos; #define STR(x) XSTR(x) #define WHERE __FILE__":" STR(__LINE__) +bool ok = true; + static unsigned int get_u32(void) { @@ -105,11 +107,31 @@ all_utf8(const uint8_t *p) static char * get_fixed_string(int len, const char *where) { - if (pos + len > n || !memchr(&data[pos], 0, len) || !all_utf8(&data[pos])) + size_t i; + for (i = 0; ; i++) + { + if (!data[pos + i]) + break; + if (i >= len) + { + fprintf(stderr, "%s: 0x%x: unterminated fixed-width string\n", where, pos); + exit(1); + } + } + if (!all_utf8(&data[pos])) { fprintf(stderr, "%s: 0x%x: bad fixed-width string\n", where, pos); exit(1); } + while (++i < len) + { + if (data[pos + i]) + { + fprintf(stderr, "%s: 0x%x: text in middle of fixed-width string\n", where, pos); + //exit(1); + break; + } + } char *s = (char *) &data[pos]; pos += len; return s; @@ -128,7 +150,7 @@ all_ascii2(const uint8_t *p, size_t n) static char * get_string(const char *where) { - if (1 + if (pos + 4 <= n /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/ /*&& all_ascii(&data[pos + 4], data[pos])*/) { @@ -154,7 +176,7 @@ dump_raw(FILE *stream, int start, int end, const char *separator) for (size_t i = start; i < end; ) { if (i + 5 <= n - && data[i] > 0 + && data[i] > 1 //&& !data[i + 1] && !data[i + 2] && !data[i + 3] @@ -217,8 +239,8 @@ dump_source(int end, int count, int n_series, const char *name) int n_sysmis = 0; for (int i = 0; i < n_series; i++) { - printf (" series %d: \"%s\", %d values:\n ", - i, get_fixed_string(288), count); + printf (" %08x: series %d: \"%s\", %d values:\n ", + pos, i, get_fixed_string(288), count); for (int i = 0; i < count; i++) { double d = get_double(); @@ -232,46 +254,53 @@ dump_source(int end, int count, int n_series, const char *name) } printf ("\n"); } +} - if (pos >= end) - return; - - match_u32_assert(1); - char *name2 = get_string(); - assert(!strcmp(name, name2)); +static void +dump_strings(void) - printf ("\n %08x:", pos); - int n_more_series = get_u32(); - if (n_series != n_more_series) - printf("different series counts: %d %d\n", n_series, n_more_series); - assert(n_more_series <= n_series); - printf (" %d series to come\n", n_more_series); +{ + if (pos >= n) + return; + int start = pos; + int offset = pos; + int n_maps = get_u32(); int max1 = -1; - int ofs = pos; - for (int i = 0; i < n_more_series; i++) + for (int k = 0; k < n_maps; k++) { - printf ("%08x:", pos); - printf (" \"%s\"", get_string()); - int n_pairs = get_u32(); - for (int j = 0; j < n_pairs; j++) + char *source_name = get_string(); + printf ("%08x: %s\n", offset, source_name); + + int n_series = get_u32(); + for (int i = 0; i < n_series; i++) { - int x = get_u32(); - int y = get_u32(); - printf (" (%d, %d)", x, y); - if (y > max1) - max1 = y; + printf ("%08x:", pos); + printf (" \"%s\"", get_string()); + int n_pairs = get_u32(); + for (int j = 0; j < n_pairs; j++) + { + int x = get_u32(); + int y = get_u32(); + printf (" (%d, %d)", x, y); + if (y > max1) + max1 = y; + } + printf ("\n"); } - printf ("\n"); } - printf ("\n%08x:", pos); int n_strings = get_u32(); - assert(n_strings == max1 + 1); + if (n_strings != max1 + 1) + { + fprintf (stderr, "n_strings=%d max1+1=%d (-s %#x -n %u)\n", n_strings, max1 + 1, start, n - start); + dump_raw (stderr, start, n, "\n"); + assert(n_strings == max1 + 1); + } printf (" %d strings\n", n_strings); - char **strings = malloc(n_strings * sizeof *strings); - for (int i = 0; i < n_strings; i++) + char **strings = malloc((max1 + 1) * sizeof *strings); + for (int i = 0; i <= max1; i++) { int frequency = get_u32(); char *s = get_string(); @@ -280,7 +309,8 @@ dump_source(int end, int count, int n_series, const char *name) } printf ("\n"); - assert (pos == end); + assert (pos == n); +#if 0 pos = ofs; printf("Strings:\n"); for (int i = 0; i < n_more_series; i++) @@ -296,7 +326,7 @@ dump_source(int end, int count, int n_series, const char *name) } printf ("\n"); } - pos = end; +#endif } int @@ -365,11 +395,21 @@ main(int argc, char **argv) if (pos != sources[i].offset) { fprintf (stderr, "pos=0x%x expected=0x%x reading source %d\n", pos, sources[i].offset, i); - exit(1); + //exit(1); } + printf ("source %d:\n", i); + pos = sources[i].offset; dump_source(i + 1 >= n_sources ? n : sources[i + 1].offset, sources[i].count, sources[i].n_series, sources[i].name); } + dump_strings(); +#if 0 + if (pos != n) + { + fprintf (stderr, "consumed %zu bytes, file has %zu bytes\n", pos, n); + ok = false; + } assert(pos == n); - - return 0; +#endif + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/parse-all-heavy b/parse-all-heavy index 757e46d200..a6fce9ebef 100755 --- a/parse-all-heavy +++ b/parse-all-heavy @@ -1,10 +1,13 @@ #! /bin/sh -heavy=`ls -1 unzipped/*/*.bin | grep -v light` -for d in $heavy; do - if ! ./dump2 < $d > /dev/null 2>&1; then +make || exit $? +for d in unzipped*/*/*.bin; do + case $d in + *light*) continue ;; + esac + if ! ./dump2 < $d > /dev/null; then echo $d - ./dump2 < $d +#./dump2 < $d >/dev/null echo fi done -- 2.30.2