From c6d72eb6a6bab66d9e951315f20e04af536e38e4 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 2 Jan 2020 23:53:22 +0000 Subject: [PATCH] some success with dissecting data and dimensions --- dump-spo.c | 1259 +++++++++++++--------------------------------------- spo-notes | 138 ++++-- 2 files changed, 410 insertions(+), 987 deletions(-) diff --git a/dump-spo.c b/dump-spo.c index 2a75bef6a9..1ca9f0a03f 100644 --- a/dump-spo.c +++ b/dump-spo.c @@ -93,6 +93,15 @@ match_u32(uint32_t x) return false; } +bool +match_u16(uint16_t x) +{ + if (get_u16() == x) + return true; + pos -= 2; + return false; +} + static void match_u32_assert(uint32_t x, const char *where) { @@ -105,6 +114,18 @@ match_u32_assert(uint32_t x, const char *where) } #define match_u32_assert(x) match_u32_assert(x, WHERE) +static void +match_u16_assert(uint16_t x, const char *where) +{ + unsigned int y = get_u16(); + if (x != y) + { + fprintf(stderr, "%s: 0x%x: expected u16:%u, got u16:%u\n", where, pos - 2, x, y); + exit(1); + } +} +#define match_u16_assert(x) match_u16_assert(x, WHERE) + static bool __attribute__((unused)) match_u64(uint64_t x) { @@ -203,6 +224,15 @@ is_ascii(uint8_t p) return (p >= ' ' && p < 127) || p == '\r' || p == '\n' || p == '\t'; } +static int +count_zeros(const uint8_t *p) +{ + size_t n = 0; + while (p[n] == 0) + n++; + return n; +} + static bool __attribute__((unused)) all_utf8(const char *p_, size_t len) { @@ -218,6 +248,24 @@ all_utf8(const char *p_, size_t len) return true; } +static char * +get_string1(void) +{ + int len = data[pos++]; + char *s = xmemdup0(&data[pos], len); + pos += len; + return s; +} + +static char * +get_string2(void) +{ + int len = data[pos] + data[pos + 1] * 256; + char *s = xmemdup0(&data[pos + 2], len); + pos += 2 + len; + return s; +} + static char * get_string(const char *where) { @@ -304,190 +352,20 @@ char_dump(FILE *stream, int ofs, int n) putc('\n', stream); } -static char * -dump_counted_string(void) -{ - int inner_end = get_end(); - if (pos == inner_end) - return NULL; - - if (match_u32(5)) - { - match_u32_assert(0); - match_byte_assert(0x58); - } - else - match_u32_assert(0); - - char *s = NULL; - if (match_byte(0x31)) - s = get_string(); - else - match_byte_assert(0x58); - if (pos != inner_end) - { - fprintf(stderr, "inner end discrepancy\n"); - exit(1); - } - return s; -} - -static void -dump_style(FILE *stream) -{ - if (match_byte(0x58)) - return; - - match_byte_assert(0x31); - if (get_bool()) - printf (" bold=\"yes\""); - if (get_bool()) - printf (" italic=\"yes\""); - if (get_bool()) - printf (" underline=\"yes\""); - if (!get_bool()) - printf (" show=\"no\""); - char *fg = get_string(); /* foreground */ - char *bg = get_string(); /* background */ - char *font = get_string(); /* font */ - int size = get_byte() * (72. / 96.); - fprintf(stream, " fgcolor=\"%s\" bgcolor=\"%s\" font=\"%s\" size=\"%dpt\"", - fg, bg, font, size); -} - -static void -dump_style2(FILE *stream) -{ - if (match_byte(0x58)) - return; - match_byte_assert(0x31); - uint32_t halign = get_u32(); - printf (" halign=\"%s\"", - halign == 0 ? "center" - : halign == 2 ? "left" - : halign == 4 ? "right" - : halign == 6 ? "decimal" - : halign == 0xffffffad ? "mixed" - : ""); - int valign = get_u32(); - printf (" valign=\"%s\"", - valign == 0 ? "center" - : valign == 1 ? "top" - : valign == 3 ? "bottom" - : ""); - printf (" offset=\"%gpt\"", get_double()); - int l = get_u16(); - int r = get_u16(); - int t = get_u16(); - int b = get_u16(); - printf (" margins=\"%d %d %d %d\"", l, r, t, b); -} - -static char * -dump_nested_string(FILE *stream) +static int +compare_int(const void *a_, const void *b_) { - char *s = NULL; - - match_byte_assert (0); - match_byte_assert (0); - int outer_end = get_end(); - s = dump_counted_string(); - if (s) - fprintf(stream, " \"%s\"", s); - dump_style(stream); - match_byte_assert(0x58); - if (pos != outer_end) - { - fprintf(stderr, "outer end discrepancy\n"); - exit(1); - } - - return s; + const int *a = a_; + const int *b = b_; + return *a < *b ? -1 : *a > *b; } -static void -dump_value_modifier(FILE *stream) -{ - if (match_byte (0x31)) - { - if (match_u32 (0)) - { - fprintf(stream, "\n"); - return; - } - - int outer_end = get_end(); - - /* This counted-string appears to be a template string, - e.g. "Design\: [:^1:]1 Within Subjects Design\: [:^1:]2". */ - char *template = dump_counted_string(); - if (template) - fprintf(stream, " template=\"%s\"", template); - - dump_style(stream); - dump_style2(stream); - if (pos != outer_end) - { - fprintf(stderr, "outer end discrepancy\n"); - exit(1); - } - fprintf(stream, "/>\n"); - } - else - { - int count = get_u32(); - fprintf(stream, "\n"); - } - } - else - match_byte_assert (0x58); -} static const char * -format_to_string (int type) +format_name (int format, char *buf) { - static char tmp[16]; - switch (type) + switch (format) { case 1: return "A"; case 2: return "AHEX"; @@ -524,797 +402,265 @@ format_to_string (int type) case 37: return "CCE"; case 38: return "EDATE"; case 39: return "SDATE"; - default: - assert(false); - sprintf(tmp, "<%d>", type); - return tmp; + default: sprintf(buf, "(%d)", format); return buf; } } static void -dump_value(FILE *stream, int level) +dump_DspNumber(void) { - match_byte(0); - match_byte(0); - match_byte(0); - match_byte(0); + match_byte_assert(1); + int d = get_byte(); + int w = get_byte(); + int fmt = get_byte(); + char buf[64]; + printf ("%s%d.%d ", format_name (fmt, buf), w, d); - for (int i = 0; i <= level; i++) - fprintf (stream, " "); + match_byte_assert(0x80); + match_byte_assert(2); + printf ("%f ", get_double ()); + printf ("\"%s\"\n", get_string1 ()); - printf ("%02x: value (%d)\n", pos, data[pos]); - if (match_byte (1)) - { - unsigned int format; - double value; - - dump_value_modifier(stream); - format = get_u32 (); - value = get_double (); - fprintf (stream, "\n", - DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff); - } - else if (match_byte (2)) - { - unsigned int format; - char *var, *vallab; - double value; - - dump_value_modifier (stream); - format = get_u32 (); - value = get_double (); - var = get_string (); - vallab = get_string (); - fprintf (stream, "> 16), (format >> 8) & 0xff, format & 0xff); - if (var[0]) - fprintf (stream, " variable=\"%s\"", var); - if (vallab[0]) - fprintf (stream, " label=\"%s\"", vallab); - fprintf (stream, "/>\n"); - if (!match_byte (1) && !match_byte(2)) - match_byte_assert (3); - } - else if (match_byte (3)) - { - char *text = get_string(); - dump_value_modifier(stream); - char *identifier = get_string(); - char *text_eng = get_string(); - fprintf (stream, "\n"); - if (!match_byte (0)) - match_byte_assert(1); - } - else if (match_byte (4)) - { - unsigned int format; - char *var, *vallab, *value; - - dump_value_modifier(stream); - format = get_u32 (); - vallab = get_string (); - var = get_string (); - if (!match_byte(1) && !match_byte(2)) - match_byte_assert (3); - value = get_string (); - fprintf (stream, "> 16), (format >> 8) & 0xff, format & 0xff); - if (var[0]) - fprintf (stream, " variable=\"%s\"", var); - if (vallab[0]) - fprintf (stream, " label=\"%s\"/>\n", vallab); - fprintf (stream, "/>\n"); - } - else if (match_byte (5)) - { - dump_value_modifier(stream); - char *name = get_string (); - char *label = get_string (); - fprintf (stream, "\n"); - if (!match_byte(1) && !match_byte(2)) - match_byte_assert(3); - } - else + for (;;) { - printf ("else %#x\n", pos); - dump_value_modifier(stream); - - char *base = get_string(); - int x = get_u32(); - fprintf (stream, "\n"); - } -} -static int -compare_int(const void *a_, const void *b_) -{ - const int *a = a_; - const int *b = b_; - return *a < *b ? -1 : *a > *b; -} - -static void -check_permutation(int *a, int n, const char *name) -{ - int b[n]; - memcpy(b, a, n * sizeof *a); - qsort(b, n, sizeof *b, compare_int); - for (int i = 0; i < n; i++) - if (b[i] != i) - { - fprintf(stderr, "bad %s permutation:", name); - for (int i = 0; i < n; i++) - fprintf(stderr, " %d", a[i]); - putc('\n', stderr); - exit(1); - } -} - -static void -dump_category(FILE *stream, int level, int **indexes, int *allocated_indexes, - int *n_indexes) -{ - for (int i = 0; i <= level; i++) - fprintf (stream, " "); - printf ("\n"); - dump_value (stream, level + 1); - - bool merge = get_bool(); - match_byte_assert (0); - int unindexed = get_bool(); - - int x = get_u32 (); - pos -= 4; - if (!match_u32 (0)) - match_u32_assert (2); - - int indx = get_u32(); - int n_categories = get_u32(); - if (indx == -1) - { - if (merge) + if (data[pos] == 0x80 && data[pos + 1] == 1) { - for (int i = 0; i <= level + 1; i++) - fprintf (stream, " "); - fprintf (stream, "\n"); + pos += 2; + int d = get_byte(); + int w = get_byte(); + int fmt = get_byte(); + char buf[64]; + printf ("\n%% %s%d.%d\n", format_name (fmt, buf), w, d); } - assert (unindexed); - } - else - { - assert (!merge); - assert (!unindexed); - assert (x == 2); - assert (n_categories == 0); - if (*n_indexes >= *allocated_indexes) + else if (data[pos] == 0x80 && data[pos + 1] == 2) { - *allocated_indexes = *allocated_indexes ? 2 * *allocated_indexes : 16; - *indexes = realloc(*indexes, *allocated_indexes * sizeof **indexes); + pos += 2; + printf ("\n%f ", get_double ()); + printf ("'%s'\n", get_string1 ()); } - (*indexes)[(*n_indexes)++] = indx; - } - - if (n_categories == 0) - { - for (int i = 0; i <= level + 1; i++) - fprintf (stream, " "); - fprintf (stream, "%d\n", indx); - } - for (int i = 0; i < n_categories; i++) - dump_category (stream, level + 1, indexes, allocated_indexes, n_indexes); - for (int i = 0; i <= level; i++) - fprintf (stream, " "); - printf ("\n"); -} - -static int -dump_dim(int indx) -{ - int n_categories; - - printf ("\n", indx); - dump_value (stdout, 0); - - /* This byte is usually 0 but many other values have been spotted. - No visible effect. */ - pos++; - - /* This byte can cause data to be oddly replicated. */ - if (!match_byte(0) && !match_byte(1)) - match_byte_assert(2); - - if (!match_u32(0)) - match_u32_assert(2); - - bool show_dim_label = get_bool(); - if (show_dim_label) - printf(" \n"); - - bool hide_all_labels = get_bool(); - if (hide_all_labels) - printf(" \n"); - - match_byte_assert(1); - if (!match_u32(UINT32_MAX)) - match_u32_assert(indx); - - n_categories = get_u32(); - - int *indexes = NULL; - int n_indexes = 0; - int allocated_indexes = 0; - for (int i = 0; i < n_categories; i++) - dump_category (stdout, 0, &indexes, &allocated_indexes, &n_indexes); - check_permutation(indexes, n_indexes, "categories"); - - fprintf (stdout, "\n"); - return n_indexes; -} - -int n_dims; -static int dim_n_cats[64]; -#define MAX_DIMS (sizeof dim_n_cats / sizeof *dim_n_cats) - -static void -dump_dims(void) -{ - n_dims = get_u32(); - assert(n_dims < MAX_DIMS); - for (int i = 0; i < n_dims; i++) - dim_n_cats[i] = dump_dim (i); -} - -static void -dump_data(void) -{ - /* The first three numbers add to the number of dimensions. */ - int l = get_u32(); - int r = get_u32(); - int c = n_dims - l - r; - match_u32_assert(c); - - /* The next n_dims numbers are a permutation of the dimension numbers. */ - int a[n_dims]; - for (int i = 0; i < n_dims; i++) - { - int dim = get_u32(); - a[i] = dim; - - const char *name = i < l ? "layer" : i < l + r ? "row" : "column"; - printf ("<%s dimension=\"%d\"/>\n", name, dim); - } - check_permutation(a, n_dims, "dimensions"); - - int x = get_u32(); - printf ("\n"); - for (int i = 0; i < x; i++) - { - unsigned int indx = get_u32(); - printf (" \n"); - match_u32_assert(0); - if (version == 1) - match_byte(0); - dump_value(stdout, 1); - fprintf (stdout, " \n"); + else if (data[pos] == 0x1 && data[pos + 1]) + { + pos += 1; + printf ("\n\"%s\"\n", get_string1 ()); + } + else + printf ("%02x ", get_byte()); } - printf ("\n"); } static void -dump_title(void) +dump_cell(void) { - printf ("\n"); - dump_value(stdout, 0); - match_byte(1); - printf ("\n"); - - printf ("\n"); - dump_value(stdout, 0); - match_byte(1); - printf ("\n"); - - match_byte_assert(0x31); - - printf ("\n"); - dump_value(stdout, 0); - match_byte(1); - printf ("\n"); - - if (match_byte(0x31)) - { - printf ("\n"); - dump_value(stdout, 0); - printf ("\n"); - } - else - match_byte_assert(0x58); - if (match_byte(0x31)) + static const int cell_prefix[] = { + 0x00, 0x03, 0x80, + 0x00, 0x00, 0x00, 0x00, 0x00, -1 /* 00 or 10 */, 0x00, 0x00, 0x00, 0x00, + + /*13 14 15 16 17 18 19 */ + -1, 0x80, 0x01, -1, -1, -1, -1, + }; + size_t cell_prefix_len = sizeof cell_prefix / sizeof *cell_prefix; + if (!match_bytes(pos, cell_prefix, cell_prefix_len)) { - printf ("\n"); - dump_value(stdout, 0); - printf ("\n"); + printf ("match failed at %x\n", pos); + return; } - else - match_byte_assert(0x58); - int n_footnotes = get_u32(); - for (int i = 0; i < n_footnotes; i++) + char buf[64]; + printf ("cell %s%d.%d ", + format_name (data[pos + 18], buf), + data[pos + 17], + data[pos + 16]); + + int len = cell_prefix_len; + if (data[pos + 19] == 0) { - printf ("\n", i); - dump_value(stdout, 0); - /* Custom footnote marker string. */ - if (match_byte (0x31)) - dump_value(stdout, 0); - else - match_byte_assert (0x58); - int n = get_u32(); - if (n >= 0) + assert (data[pos + 13] == 5); + if (data[pos + 20] == 0) + { + int count = (data[pos + 22]); + printf ("%d %d \"%.*s\"", + data[pos + 21], data[pos + 22], + count, &data[pos + 23]); + len = 23 + count; + } + else if (data[pos + 20] == 1 + && data[pos + 21] == 0xff + && data[pos + 22] == 0xff) + { + int count = 255; + printf ("%d \"%.*s\"", count, data[pos + 23], + &data[pos + 24]); + len = 23 + count; + } + else if (data[pos + 20] == 1 && data[pos + 21] == 255) { - /* Appears to be the number of references to a footnote. */ - printf (" \n", n); + int count = data[pos + 22] + (data[pos + 23] << 8); + printf ("\"%.*s\"", + count, &data[pos + 24]); + len = 24 + count; } - else if (n == -2) + else if (data[pos + 20] == 1) { - /* The user deleted the footnote references. */ - printf (" \n"); + int count = (data[pos + 21]); + printf ("\"%.*s\"", + count, &data[pos + 22]); + len = 22 + count; } else - assert(0); - printf ("\n"); + assert (false); } -} - -static void -dump_fonts(void) -{ - match_byte(0); - for (int i = 1; i <= 8; i++) + else if (data[pos + 19] == 128 && data[pos + 20] == 2) { - printf ("