+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <float.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
+#include <time.h>
#include <unistd.h>
+#include "u8-mbtouc.h"
+static const char *filename;
static uint8_t *data;
static size_t n;
-static bool
-all_ascii(const uint8_t *p, size_t n)
-{
- for (size_t i = 0; i < n; i++)
- if (p[i] < 32 || p[i] > 126)
- return false;
- return true;
-}
-
-static size_t
-find(const char *target, size_t target_len)
-{
- const uint8_t *pos = (const uint8_t *) memmem (data, n, target, target_len);
- if (!pos)
- {
- fprintf (stderr, "not found\n");
- exit(1);
- }
- return pos - data;
-}
+int version;
size_t pos;
#define STR(x) XSTR(x)
#define WHERE __FILE__":" STR(__LINE__)
+static uint8_t
+get_byte(void)
+{
+ return data[pos++];
+}
+
static unsigned int
get_u32(void)
{
return x;
}
+static unsigned long long int
+get_u64(void)
+{
+ uint64_t x;
+ memcpy(&x, &data[pos], 8);
+ pos += 8;
+ return x;
+}
+
+static unsigned int
+get_be32(void)
+{
+ uint32_t x;
+ x = (data[pos] << 24) | (data[pos + 1] << 16) | (data[pos + 2] << 8) | data[pos + 3];
+ pos += 4;
+ return x;
+}
+
+static unsigned int
+get_u16(void)
+{
+ uint16_t x;
+ memcpy(&x, &data[pos], 2);
+ pos += 2;
+ return x;
+}
+
static double
get_double(void)
{
return x;
}
+static double __attribute__((unused))
+get_float(void)
+{
+ float x;
+ memcpy(&x, &data[pos], 4);
+ pos += 4;
+ return x;
+}
+
static bool
match_u32(uint32_t x)
{
}
#define match_u32_assert(x) match_u32_assert(x, WHERE)
+static bool __attribute__((unused))
+match_u64(uint64_t x)
+{
+ if (get_u64() == x)
+ return true;
+ pos -= 8;
+ return false;
+}
+
+static void __attribute__((unused))
+match_u64_assert(uint64_t x, const char *where)
+{
+ unsigned long long int y = get_u64();
+ if (x != y)
+ {
+ fprintf(stderr, "%s: 0x%x: expected u64:%llu, got u64:%llu\n", where, pos - 8, x, y);
+ exit(1);
+ }
+}
+#define match_u64_assert(x) match_u64_assert(x, WHERE)
+
+static bool __attribute__((unused))
+match_be32(uint32_t x)
+{
+ if (get_be32() == x)
+ return true;
+ pos -= 4;
+ return false;
+}
+
+static void
+match_be32_assert(uint32_t x, const char *where)
+{
+ unsigned int y = get_be32();
+ if (x != y)
+ {
+ fprintf(stderr, "%s: 0x%x: expected be%u, got be%u\n", where, pos - 4, x, y);
+ exit(1);
+ }
+}
+#define match_be32_assert(x) match_be32_assert(x, WHERE)
+
static bool
match_byte(uint8_t b)
{
- if (data[pos] == b)
+ if (pos < n && data[pos] == b)
{
pos++;
return true;
}
#define match_byte_assert(b) match_byte_assert(b, WHERE)
+static bool
+get_bool(void)
+{
+ if (match_byte(0))
+ return false;
+ match_byte_assert(1);
+ return true;
+}
+
+static bool __attribute__((unused))
+all_utf8(const char *p_)
+{
+ const uint8_t *p = (const uint8_t *) p_;
+ size_t len = strlen ((char *) p);
+ for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
+ {
+ ucs4_t uc;
+
+ mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
+ if ((uc < 32 && uc != '\n') || uc == 127 || uc == 0xfffd)
+ return false;
+ }
+ return true;
+}
+
static char *
-get_string(void)
+get_string(const char *where)
{
- if (data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0
- && all_ascii(&data[pos + 4], data[pos]))
+ if (1
+ /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
+ /*&& all_ascii(&data[pos + 4], data[pos])*/)
{
- int len = data[pos];
+ int len = data[pos] + data[pos + 1] * 256;
char *s = malloc(len + 1);
memcpy(s, &data[pos + 4], len);
s[len] = 0;
- pos += 4 + data[pos];
+ pos += 4 + len;
return s;
}
else
{
- fprintf(stderr, "0x%x: expected string\n", pos);
+ fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
exit(1);
}
}
+#define get_string() get_string(WHERE)
+
+static char *
+get_string_be(const char *where)
+{
+ if (1
+ /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
+ /*&& all_ascii(&data[pos + 4], data[pos])*/)
+ {
+ int len = data[pos + 2] * 256 + data[pos + 3];
+ char *s = malloc(len + 1);
+
+ memcpy(s, &data[pos + 4], len);
+ s[len] = 0;
+ pos += 4 + len;
+ return s;
+ }
+ else
+ {
+ fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
+ exit(1);
+ }
+}
+#define get_string_be() get_string_be(WHERE)
+
+static int
+get_end(void)
+{
+ int len = get_u32();
+ return pos + len;
+}
+
+static void __attribute__((unused))
+hex_dump(FILE *stream, int ofs, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int c = data[ofs + i];
+#if 1
+ if (i && !(i % 16))
+ putc('-', stream);
+ else
+ putc(' ', stream);
+#endif
+ fprintf(stream, "%02x", c);
+ }
+ for (int i = 0; i < n; i++)
+ {
+ int c = data[ofs + i];
+ putc(c >= 32 && c < 127 ? c : '.', stream);
+ }
+ putc('\n', stream);
+}
+
+static char *
+dump_counted_string(void)
+{
+ int inner_end = get_end();
+ if (pos == inner_end)
+ return NULL;
+
+ if (match_u32(5))
+ {
+ match_u32_assert(0);
+ match_byte_assert(0x58);
+ }
+ else
+ match_u32_assert(0);
+
+ char *s = NULL;
+ if (match_byte(0x31))
+ s = get_string();
+ else
+ match_byte_assert(0x58);
+ if (pos != inner_end)
+ {
+ fprintf(stderr, "inner end discrepancy\n");
+ exit(1);
+ }
+ return s;
+}
static void
-dump_category(int level)
+dump_style(FILE *stream)
{
- for (int i = 0; i <= level; i++)
- printf (" ");
+ if (match_byte(0x58))
+ return;
+
+ match_byte_assert(0x31);
+ if (get_bool())
+ printf (" bold=\"yes\"");
+ if (get_bool())
+ printf (" italic=\"yes\"");
+ if (get_bool())
+ printf (" underline=\"yes\"");
+ if (!get_bool())
+ printf (" show=\"no\"");
+ char *fg = get_string(); /* foreground */
+ char *bg = get_string(); /* background */
+ char *font = get_string(); /* font */
+ int size = get_byte() * (72. / 96.);
+ fprintf(stream, " fgcolor=\"%s\" bgcolor=\"%s\" font=\"%s\" size=\"%dpt\"",
+ fg, bg, font, size);
+}
+
+static void
+dump_style2(FILE *stream)
+{
+ if (match_byte(0x58))
+ return;
+
+ match_byte_assert(0x31);
+ uint32_t halign = get_u32();
+ printf (" halign=\"%s\"",
+ halign == 0 ? "center"
+ : halign == 2 ? "left"
+ : halign == 4 ? "right"
+ : halign == 6 ? "decimal"
+ : halign == 0xffffffad ? "mixed"
+ : "<error>");
+ int valign = get_u32();
+ printf (" valign=\"%s\"",
+ valign == 0 ? "center"
+ : valign == 1 ? "top"
+ : valign == 3 ? "bottom"
+ : "<error>");
+ printf (" offset=\"%gpt\"", get_double());
+ int l = get_u16();
+ int r = get_u16();
+ int t = get_u16();
+ int b = get_u16();
+ printf (" margins=\"%d %d %d %d\"", l, r, t, b);
+}
+
+static char *
+dump_nested_string(FILE *stream)
+{
+ char *s = NULL;
- match_byte (0);
- if (match_byte (3))
+ match_byte_assert (0);
+ match_byte_assert (0);
+ int outer_end = get_end();
+ s = dump_counted_string();
+ if (s)
+ fprintf(stream, " \"%s\"", s);
+ dump_style(stream);
+ match_byte_assert(0x58);
+ if (pos != outer_end)
{
- get_string();
- match_byte_assert (0x58);
- get_string();
- printf("string \"%s\"", get_string());
- match_byte_assert (1);
- match_byte (0);
- match_byte (0);
- match_byte (0);
- match_byte (1);
+ fprintf(stderr, "outer end discrepancy\n");
+ exit(1);
}
- else if (match_byte (5))
+
+ return s;
+}
+
+static void
+dump_value_modifier(FILE *stream)
+{
+ if (match_byte (0x31))
{
- match_byte_assert (0x58);
- printf ("variable \"%s\"", get_string());
- get_string();
- if (!match_byte (3))
- match_byte_assert (2);
- match_byte (0);
- match_byte (0);
- match_byte (0);
+ if (match_u32 (0))
+ {
+ fprintf(stream, "<special0");
+ if (match_u32 (1))
+ {
+ /* Corpus frequencies:
+ 124 "a"
+ 12 "b"
+ 8 "a, b"
+
+ The given text is appended to the cell in a subscript font.
+ */
+ fprintf(stream, " subscript=\"%s\"", get_string());
+ }
+ else
+ match_u32_assert (0);
+
+ if (version == 1)
+ {
+ /* We only have one SPV file for this version (with many
+ tables). */
+ match_byte(0);
+ if (!match_u32(1))
+ match_u32_assert(2);
+ match_byte(0);
+ match_byte(0);
+ if (!match_u32(0) && !match_u32(1) && !match_u32(2) && !match_u32(3) && !match_u32(4) && !match_u32(5) && !match_u32(6) && !match_u32(7) && !match_u32(8) && !match_u32(9))
+ match_u32_assert(10);
+ match_byte(0);
+ match_byte(0);
+ fprintf(stream, "/>\n");
+ return;
+ }
+
+ int outer_end = get_end();
+
+ /* This counted-string appears to be a template string,
+ e.g. "Design\: [:^1:]1 Within Subjects Design\: [:^1:]2". */
+ char *template = dump_counted_string();
+ if (template)
+ fprintf(stream, " template=\"%s\"", template);
+
+ dump_style(stream);
+ dump_style2(stream);
+ if (pos != outer_end)
+ {
+ fprintf(stderr, "outer end discrepancy\n");
+ exit(1);
+ }
+ fprintf(stream, "/>\n");
+ }
+ else
+ {
+ int count = get_u32();
+ fprintf(stream, "<footnote-ref indexes=\"");
+ for (int i = 0; i < count; i++)
+ {
+ if (i)
+ putc(' ', stream);
+ fprintf(stream, "%d", get_u16());
+ }
+ putc('"', stream);
+ match_byte_assert(0);
+ match_byte_assert(0);
+ dump_nested_string(stream);
+ fprintf(stream, "/>\n");
+ }
}
- else if (match_byte (2))
+ else
+ match_byte_assert (0x58);
+}
+
+static const char *
+format_to_string (int type)
+{
+ static char tmp[16];
+ switch (type)
+ {
+ case 1: return "A";
+ case 2: return "AHEX";
+ case 3: return "COMMA";
+ case 4: return "DOLLAR";
+ case 5: case 40: return "F";
+ case 6: return "IB";
+ case 7: return "PIBHEX";
+ case 8: return "P";
+ case 9: return "PIB";
+ case 10: return "PK";
+ case 11: return "RB";
+ case 12: return "RBHEX";
+ case 15: return "Z";
+ case 16: return "N";
+ case 17: return "E";
+ case 20: return "DATE";
+ case 21: return "TIME";
+ case 22: return "DATETIME";
+ case 23: return "ADATE";
+ case 24: return "JDATE";
+ case 25: return "DTIME";
+ case 26: return "WKDAY";
+ case 27: return "MONTH";
+ case 28: return "MOYR";
+ case 29: return "QYR";
+ case 30: return "WKYR";
+ case 31: return "PCT";
+ case 32: return "DOT";
+ case 33: return "CCA";
+ case 34: return "CCB";
+ case 35: return "CCC";
+ case 36: return "CCD";
+ case 37: return "CCE";
+ case 38: return "EDATE";
+ case 39: return "SDATE";
+ default:
+ abort();
+ sprintf(tmp, "<%d>", type);
+ return tmp;
+ }
+}
+
+static void
+dump_value(FILE *stream, int level)
+{
+ match_byte(0);
+ match_byte(0);
+ match_byte(0);
+ match_byte(0);
+
+ for (int i = 0; i <= level; i++)
+ fprintf (stream, " ");
+
+ printf ("%02x: value (%d)\n", pos, data[pos]);
+ if (match_byte (1))
{
unsigned int format;
double value;
- char *var;
- match_byte_assert (0x58);
+ dump_value_modifier(stream);
format = get_u32 ();
value = get_double ();
- var = get_string ();
- get_string ();
- printf ("value %g format %d(%d.%d) var \"%s\"", value, format >> 16, (format >> 8) & 0xff, format & 0xff, var);
- match_u32_assert (3);
+ fprintf (stream, "<number value=\"%.*g\" format=\"%s%d.%d\"/>\n",
+ DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
}
- else
+ else if (match_byte (2))
{
unsigned int format;
+ char *var, *vallab;
double value;
- match_byte_assert (1);
- match_byte_assert (0x58);
+ dump_value_modifier (stream);
format = get_u32 ();
value = get_double ();
- printf ("value %g format %d(%d.%d)", value, format >> 16, (format >> 8) & 0xff, format & 0xff);
- match_byte (1);
- match_byte (0);
- match_byte (0);
- match_byte (0);
- match_byte (1);
+ var = get_string ();
+ vallab = get_string ();
+ fprintf (stream, "<numeric-datum value=\"%.*g\" format=\"%s%d.%d\"",
+ DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
+ if (var[0])
+ fprintf (stream, " variable=\"%s\"", var);
+ if (vallab[0])
+ fprintf (stream, " label=\"%s\"", vallab);
+ fprintf (stream, "/>\n");
+ if (!match_byte (1) && !match_byte(2))
+ match_byte_assert (3);
+ }
+ else if (match_byte (3))
+ {
+ char *text = get_string();
+ dump_value_modifier(stream);
+ char *identifier = get_string();
+ char *text_eng = get_string();
+ fprintf (stream, "<string c=\"%s\"", text_eng);
+ if (identifier[0])
+ fprintf (stream, " identifier=\"%s\"", identifier);
+ if (strcmp(text_eng, text))
+ fprintf (stream, " local=\"%s\"", text);
+ fprintf (stream, "/>\n");
+ if (!match_byte (0))
+ match_byte_assert(1);
}
+ else if (match_byte (4))
+ {
+ unsigned int format;
+ char *var, *vallab, *value;
- if (match_u32 (2))
- get_u32 ();
+ dump_value_modifier(stream);
+ format = get_u32 ();
+ vallab = get_string ();
+ var = get_string ();
+ if (!match_byte(1) && !match_byte(2))
+ match_byte_assert (3);
+ value = get_string ();
+ fprintf (stream, "<string-datum value=\"%s\" format=\"%s%d.%d\"",
+ value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
+ if (var[0])
+ fprintf (stream, " variable=\"%s\"", var);
+ if (vallab[0])
+ fprintf (stream, " label=\"%s\"/>\n", vallab);
+ fprintf (stream, "/>\n");
+ }
+ else if (match_byte (5))
+ {
+ dump_value_modifier(stream);
+ char *name = get_string ();
+ char *label = get_string ();
+ fprintf (stream, "<variable name=\"%s\"", name);
+ if (label[0])
+ fprintf (stream, " label=\"%s\"", label);
+ fprintf (stream, "/>\n");
+ if (!match_byte(1) && !match_byte(2))
+ match_byte_assert(3);
+ }
else
{
- match_u32_assert (1);
- match_byte (0);
- match_byte (0);
- match_byte (0);
- get_u32 ();
+ printf ("else %#x\n", pos);
+ dump_value_modifier(stream);
+
+ char *base = get_string();
+ int x = get_u32();
+ fprintf (stream, "<template format=\"%s\">\n", base);
+ for (int i = 0; i < x; i++)
+ {
+ int y = get_u32();
+ if (!y)
+ y = 1;
+ else
+ match_u32_assert(0);
+ for (int j = 0; j <= level + 1; j++)
+ fprintf (stream, " ");
+ fprintf (stream, "<substitution index=\"%d\">\n", i + 1);
+ for (int j = 0; j < y; j++)
+ dump_value (stream, level + 2);
+ for (int j = 0; j <= level + 1; j++)
+ fprintf (stream, " ");
+ fprintf (stream, "</substitution>\n");
+ }
+ for (int j = 0; j <= level; j++)
+ fprintf (stream, " ");
+ fprintf (stream, "</template>\n");
}
- int n_categories = get_u32();
- if (n_categories > 0)
- printf (", %d subcategories:", n_categories);
- printf("\n");
- for (int i = 0; i < n_categories; i++)
- dump_category (level + 1);
+}
+
+static int
+compare_int(const void *a_, const void *b_)
+{
+ const int *a = a_;
+ const int *b = b_;
+ return *a < *b ? -1 : *a > *b;
}
static void
-dump_dim(void)
+check_permutation(int *a, int n, const char *name)
{
- int n_categories;
- if (match_byte(3))
+ int b[n];
+ memcpy(b, a, n * sizeof *a);
+ qsort(b, n, sizeof *b, compare_int);
+ for (int i = 0; i < n; i++)
+ if (b[i] != i)
+ {
+ fprintf(stderr, "bad %s permutation:", name);
+ for (int i = 0; i < n; i++)
+ fprintf(stderr, " %d", a[i]);
+ putc('\n', stderr);
+ exit(1);
+ }
+}
+
+static void
+dump_category(FILE *stream, int level, int **indexes, int *allocated_indexes,
+ int *n_indexes)
+{
+ for (int i = 0; i <= level; i++)
+ fprintf (stream, " ");
+ printf ("<category>\n");
+ dump_value (stream, level + 1);
+
+ bool merge = get_bool();
+ match_byte_assert (0);
+ int unindexed = get_bool();
+
+ int x = get_u32 ();
+ pos -= 4;
+ if (!match_u32 (0))
+ match_u32_assert (2);
+
+ int indx = get_u32();
+ int n_categories = get_u32();
+ if (indx == -1)
{
- get_string();
- match_byte_assert(0x58);
- get_string();
- printf("string \"%s\": ", get_string());
- match_byte_assert(1);
+ if (merge)
+ {
+ for (int i = 0; i <= level + 1; i++)
+ fprintf (stream, " ");
+ fprintf (stream, "<merge/>\n");
+ }
+ assert (unindexed);
}
- else if (match_byte(5))
+ else
{
- match_byte_assert(0x58);
- printf("variable \"%s\": ", get_string());
- get_string();
- if (!match_byte(2))
- match_byte_assert(3);
+ assert (!merge);
+ assert (!unindexed);
+ assert (x == 2);
+ assert (n_categories == 0);
+ if (*n_indexes >= *allocated_indexes)
+ {
+ *allocated_indexes = *allocated_indexes ? 2 * *allocated_indexes : 16;
+ *indexes = realloc(*indexes, *allocated_indexes * sizeof **indexes);
+ }
+ (*indexes)[(*n_indexes)++] = indx;
}
- else
+
+ if (n_categories == 0)
{
- fprintf(stderr, "%08x: unexpected byte\n", pos);
- exit(1);
+ for (int i = 0; i <= level + 1; i++)
+ fprintf (stream, " ");
+ fprintf (stream, "<category-index>%d</category-index>\n", indx);
}
+ for (int i = 0; i < n_categories; i++)
+ dump_category (stream, level + 1, indexes, allocated_indexes, n_indexes);
+ for (int i = 0; i <= level; i++)
+ fprintf (stream, " ");
+ printf ("</category>\n");
+}
- match_byte_assert(0);
+static int
+dump_dim(int indx)
+{
+ int n_categories;
+
+ printf ("<dimension index=\"%d\">\n", indx);
+ dump_value (stdout, 0);
+
+ /* This byte is usually 0 but many other values have been spotted.
+ No visible effect. */
+ pos++;
+
+ /* This byte can cause data to be oddly replicated. */
if (!match_byte(0) && !match_byte(1))
match_byte_assert(2);
- match_u32_assert(2);
- if (!match_byte(0))
- match_byte_assert(1);
- match_byte(0);
- match_byte(0);
- match_byte(0);
- match_byte(0);
- get_u32();
- match_byte(0);
- match_byte(0);
- match_byte(0);
- match_byte(0);
+
+ if (!match_u32(0))
+ match_u32_assert(2);
+
+ bool show_dim_label = get_bool();
+ if (show_dim_label)
+ printf(" <show-dim-label/>\n");
+
+ bool hide_all_labels = get_bool();
+ if (hide_all_labels)
+ printf(" <hide-all-labels/>\n");
+
+ match_byte_assert(1);
+ if (!match_u32(UINT32_MAX))
+ match_u32_assert(indx);
+
n_categories = get_u32();
- printf("%d nested categories\n", n_categories);
+
+ int *indexes = NULL;
+ int n_indexes = 0;
+ int allocated_indexes = 0;
for (int i = 0; i < n_categories; i++)
- dump_category (0);
+ dump_category (stdout, 0, &indexes, &allocated_indexes, &n_indexes);
+ check_permutation(indexes, n_indexes, "categories");
+
+ fprintf (stdout, "</dimension>\n");
+ return n_indexes;
}
+int n_dims;
+static int dim_n_cats[64];
+#define MAX_DIMS (sizeof dim_n_cats / sizeof *dim_n_cats)
+
static void
dump_dims(void)
{
- int n_dims = get_u32();
+ n_dims = get_u32();
+ assert(n_dims < MAX_DIMS);
+ for (int i = 0; i < n_dims; i++)
+ dim_n_cats[i] = dump_dim (i);
+}
- printf ("%u dimensions\n", n_dims);
+static void
+dump_data(void)
+{
+ /* The first three numbers add to the number of dimensions. */
+ int l = get_u32();
+ int r = get_u32();
+ int c = n_dims - l - r;
+ match_u32_assert(c);
+
+ /* The next n_dims numbers are a permutation of the dimension numbers. */
+ int a[n_dims];
for (int i = 0; i < n_dims; i++)
{
- printf("\n");
- dump_dim ();
+ int dim = get_u32();
+ a[i] = dim;
+
+ const char *name = i < l ? "layer" : i < l + r ? "row" : "column";
+ printf ("<%s dimension=\"%d\"/>\n", name, dim);
+ }
+ check_permutation(a, n_dims, "dimensions");
+
+ int x = get_u32();
+ printf ("<data>\n");
+ for (int i = 0; i < x; i++)
+ {
+ unsigned int indx = get_u32();
+ printf (" <datum index=\"%d\" coords=", indx);
+
+ int coords[MAX_DIMS];
+ for (int i = n_dims; i-- > 0; )
+ {
+ coords[i] = indx % dim_n_cats[i];
+ indx /= dim_n_cats[i];
+ }
+ for (int i = 0; i < n_dims; i++)
+ printf("%c%d", i ? ',' : '"', coords[i]);
+
+ printf ("\">\n");
+ match_u32_assert(0);
+ if (version == 1)
+ match_byte(0);
+ dump_value(stdout, 1);
+ fprintf (stdout, " </datum>\n");
}
+ printf ("</data>\n");
}
-int
-main(int argc, char *argv[])
+static void
+dump_title(void)
{
- size_t start;
- struct stat s;
+ printf ("<title-local>\n");
+ dump_value(stdout, 0);
+ match_byte(1);
+ printf ("</title-local>\n");
+
+ printf ("<subtype>\n");
+ dump_value(stdout, 0);
+ match_byte(1);
+ printf ("</subtype>\n");
+
+ match_byte_assert(0x31);
+
+ printf ("<title-c>\n");
+ dump_value(stdout, 0);
+ match_byte(1);
+ printf ("</title-c>\n");
- if (isatty(STDIN_FILENO))
+ if (match_byte(0x31))
{
- fprintf(stderr, "redirect stdin from a .bin file\n");
- exit(1);
+ printf ("<user-caption>\n");
+ dump_value(stdout, 0);
+ printf ("</user-caption>\n");
}
- if (fstat(STDIN_FILENO, &s))
+ else
+ match_byte_assert(0x58);
+ if (match_byte(0x31))
{
- perror("fstat");
- exit(1);
+ printf ("<caption>\n");
+ dump_value(stdout, 0);
+ printf ("</caption>\n");
}
- n = s.st_size;
- data = malloc(n);
- if (!data)
+ else
+ match_byte_assert(0x58);
+
+ int n_footnotes = get_u32();
+ for (int i = 0; i < n_footnotes; i++)
{
- perror("malloc");
- exit(1);
+ printf ("<footnote index=\"%d\">\n", i);
+ dump_value(stdout, 0);
+ /* Custom footnote marker string. */
+ if (match_byte (0x31))
+ dump_value(stdout, 0);
+ else
+ match_byte_assert (0x58);
+ int n = get_u32();
+ if (n >= 0)
+ {
+ /* Appears to be the number of references to a footnote. */
+ printf (" <references n=\"%d\"/>\n", n);
+ }
+ else if (n == -2)
+ {
+ /* The user deleted the footnote references. */
+ printf (" <deleted/>\n");
+ }
+ else
+ assert(0);
+ printf ("</footnote>\n");
+ }
+}
+
+static void
+dump_fonts(void)
+{
+ match_byte(0);
+ for (int i = 1; i <= 8; i++)
+ {
+ printf ("<style index=\"%d\"", i);
+ match_byte_assert(i);
+ match_byte_assert(0x31);
+ printf(" font=\"%s\"", get_string());
+
+ printf(" size=\"%gpt\"", get_float());
+
+ int style = get_u32();
+ if (style & 1)
+ printf(" bold=\"true\"");
+ if (style & 2)
+ printf(" italic=\"true\"");
+
+ bool underline = data[pos++];
+ if (underline)
+ printf(" underline=\"true\"");
+
+ int halign = get_u32();
+ printf(" halign=%d", halign);
+
+ int valign = get_u32();
+ printf(" valign=%d", valign);
+
+ printf (" fgcolor=\"%s\"", get_string());
+ printf (" bgcolor=\"%s\"", get_string());
+
+ if (!match_byte(0))
+ match_byte_assert(1);
+
+ char *alt_fgcolor = get_string();
+ if (alt_fgcolor[0])
+ printf (" altfg=\"%s\"", alt_fgcolor);
+ char *alt_bgcolor = get_string();
+ if (alt_bgcolor[0])
+ printf (" altbg=\"%s\"", alt_bgcolor);
+
+ if (version > 1)
+ {
+ printf(" margins=\"");
+ for (int i = 0; i < 4; i++)
+ {
+ if (i)
+ putchar(' ');
+ printf("%d", get_u32());
+ }
+ putchar('"');
+ }
+
+ printf ("/>\n");
}
- if (read(STDIN_FILENO, data, n) != n)
+
+ int x1 = get_u32();
+ int x1_end = pos + x1;
+ printf("<borders>\n");
+ match_be32_assert(1);
+ int n_borders = get_be32();
+ for (int i = 0; i < n_borders; i++)
{
- perror("read");
- exit(1);
+ int type = get_be32();
+ int stroke = get_be32();
+ int color = get_be32();
+ printf(" <border type=\"%d\" stroke=\"%s\" color=\"#%06x\"/>\n",
+ type,
+ (stroke == 0 ? "none"
+ : stroke == 1 ? "solid"
+ : stroke == 2 ? "dashed"
+ : stroke == 3 ? "thick"
+ : stroke == 4 ? "thin"
+ : stroke == 5 ? "double"
+ : "<error>"),
+ color);
}
+ bool grid = get_byte();
+ pos += 3;
+ printf(" <grid show=\"%s\"/>\n", grid ? "yes" : "no");
+ printf("</borders>\n");
+ assert(pos == x1_end);
+
+ int skip = get_u32();
+ assert(skip == 18 || skip == 25);
+ pos += skip;
- if (argc > 1)
+ int x3 = get_u32();
+ int x3_end = pos + x3;
+ if (version == 3)
{
- if (!strcmp(argv[1], "title"))
+ match_be32_assert(1);
+ get_be32();
+ printf("<settings layer=\"%d\"", get_be32());
+ if (!get_bool())
+ printf(" skipempty=\"false\"");
+ if (!get_bool())
+ printf(" showdimensionincorner=\"false\"");
+ if (!get_bool())
+ printf(" markers=\"numeric\"");
+ if (!get_bool())
+ printf(" footnoteposition=\"subscript\"");
+ get_byte();
+ int nbytes = get_be32();
+ int end = pos + nbytes;
+ printf("\n");
+ while (pos + 4 <= end)
+ printf(" %d", get_be32());
+ pos = end;
+ printf("\n");
+ pos += nbytes;
+ char *notes = get_string_be();
+ if (notes[0])
+ printf(" notes=\"%s\"", notes);
+ char *look = get_string_be();
+ if (look[0])
+ printf(" look=\"%s\"", look);
+ printf(">\n");
+ }
+ pos = x3_end;
+
+ /* Manual column widths, if present. */
+ int count = get_u32();
+ if (count > 0)
+ {
+ printf("<columnwidths>");
+ for (int i = 0; i < count; i++)
+ {
+ if (i)
+ putchar(' ');
+ printf("%d", get_u32());
+ }
+ printf("</columnwidths>\n");
+ }
+
+ const char *locale = get_string();
+ printf ("<locale>%s</locale>\n", locale);
+
+ printf ("<layer>%d</layer>\n", get_u32());
+ if (!match_byte(0))
+ match_byte_assert(1);
+ if (!match_byte(0))
+ match_byte_assert(1);
+ if (!match_byte(0))
+ match_byte_assert(1);
+ printf("<epoch>%d</epoch>\n", get_u32());
+
+ int decimal = data[pos];
+ int grouping = data[pos + 1];
+ if (match_byte('.'))
+ {
+ if (!match_byte(',') && !match_byte('\''))
+ match_byte_assert(' ');
+ }
+ else
+ {
+ match_byte_assert(',');
+ if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
+ match_byte_assert(0);
+ }
+ printf("<format decimal=\"%c\"", decimal);
+ if (grouping)
+ printf(" grouping=\"%c\"", grouping);
+ printf("\"/>\n");
+ if (match_u32(5))
+ {
+ for (int i = 0; i < 5; i++)
+ printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
+ }
+ else
+ match_u32_assert(0);
+
+ /* The last chunk is an outer envelope that contains two inner envelopes.
+ The second inner envelope has some interesting data like the encoding and
+ the locale. */
+ int outer_end = get_end();
+ if (version == 3)
+ {
+ /* First inner envelope: byte*33 int[n] int*[n]. */
+ int inner_len = get_u32();
+ int inner_end = pos + inner_len;
+ int array_start = pos + 33;
+ match_byte_assert(0);
+ pos++; /* 0, 1, 10 seen. */
+ get_bool();
+
+ /* 0=en 1=de 2=es 3=it 5=ko 6=pl 8=zh-tw 10=pt_BR 11=fr */
+ printf("lang=%d ", get_byte());
+
+ printf ("variable_mode=%d\n", get_byte());
+ printf ("value_mode=%d\n", get_byte());
+ if (!match_u64(0))
+ match_u64_assert(UINT64_MAX);
+ match_u32_assert(0);
+ match_u32_assert(0);
+ match_u32_assert(0);
+ match_u32_assert(0);
+ match_byte_assert(0);
+ get_bool();
+ match_byte_assert(1);
+ pos = array_start;
+
+ assert(get_end() == inner_end);
+ printf("<heights>");
+ int n_heights = get_u32();
+ for (int i = 0; i < n_heights; i++)
{
- const char fonts[] = "\x01\x31\x09\0\0\0SansSerif";
- start = 0x27;
- n = find(fonts, sizeof fonts - 1);
+ if (i)
+ putchar(' ');
+ printf("%d", get_u32());
}
- else if (!strcmp(argv[1], "fonts"))
+ printf("</heights>\n");
+
+ int n_style_map = get_u32();
+ for (int i = 0; i < n_style_map; i++)
{
- const char fonts[] = "\x01\x31\x09\0\0\0SansSerif";
- const char styles[] = "\xf0\0\0\0";
- start = find(fonts, sizeof fonts - 1);
- n = find(styles, sizeof styles - 1);
+ uint64_t cell = get_u64();
+ int style = get_u16();
+ printf("<style-map cell=\"%llu\" style=\"%d\"/>\n", cell, style);
}
- else if (!strcmp(argv[1], "styles"))
+
+ int n_styles = get_u32();
+ for (int i = 0; i < n_styles; i++)
{
- const char styles[] = "\xf0\0\0\0";
- const char dimensions[] = "-,,,.\0";
- start = find(styles, sizeof styles - 1);
- n = find(dimensions, sizeof dimensions - 1) + sizeof dimensions - 1;
+ printf("<cell-style index=\"%d\"", i);
+ dump_style(stdout);
+ dump_style2(stdout);
+ printf("/>\n");
}
- else if (!strcmp(argv[1], "dimensions"))
+
+ pos = get_end();
+ assert(pos == inner_end);
+
+ /* Second inner envelope. */
+ assert(get_end() == outer_end);
+
+ match_byte_assert(1);
+ match_byte_assert(0);
+ if (!match_byte(3) && !match_byte(4))
+ match_byte_assert(5);
+ match_byte_assert(0);
+ match_byte_assert(0);
+ match_byte_assert(0);
+
+ printf("<command>%s</command>\n", get_string());
+ printf("<command-local>%s</command-local>\n", get_string());
+ printf("<language>%s</language>\n", get_string());
+ printf("<charset>%s</charset>\n", get_string());
+ printf("<locale>%s</locale>\n", get_string());
+
+ get_bool();
+ get_bool();
+ get_bool();
+ get_bool();
+
+ printf("<epoch2>%d</epoch2>\n", get_u32());
+
+ if (match_byte('.'))
{
- const char dimensions[] = "-,,,.\0";
- start = find(dimensions, sizeof dimensions - 1) + sizeof dimensions - 1;
- pos = start;
- dump_dims ();
- return 0;
+ if (!match_byte(',') && !match_byte('\''))
+ match_byte_assert(' ');
}
else
{
- fprintf (stderr, "unknown section %s\n", argv[1]);
- exit(1);
+ match_byte_assert(',');
+ if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
+ match_byte_assert(0);
}
- }
- else
- start = 0x27;
- for (size_t i = start; i < n; )
- {
- if (i + 5 <= n
- && data[i]
- && !data[i + 1]
- && !data[i + 2]
- && !data[i + 3]
- && i + 4 + data[i] <= n
- && all_ascii(&data[i + 4], data[i]))
- {
- fputs("\n\"", stdout);
- fwrite(&data[i + 4], 1, data[i], stdout);
- fputs("\" ", stdout);
+ printf ("small: %g\n", get_double());
- i += 4 + data[i];
- }
- else if (i + 12 <= n
- && data[i + 1] == 40
- && data[i + 2] == 5
- && data[i + 3] == 0)
+ match_byte_assert(1);
+ if (outer_end - pos > 6)
{
- double d;
+ /* There might be a pair of strings representing a dataset and
+ datafile name, or there might be a set of custom currency strings.
+ The custom currency strings start with a pair of integers, so we
+ can distinguish these from a string by checking for a null byte; a
+ small 32-bit integer will always contain a null and a text string
+ never will. */
+ int save_pos = pos;
+ int len = get_u32();
+ bool has_dataset = !memchr(&data[pos], '\0', len);
+ pos = save_pos;
- memcpy (&d, &data[i + 4], 8);
- printf ("F40.%d(%.*f)\n", data[i], data[i], d);
- i += 12;
+ if (has_dataset)
+ {
+ printf("<dataset>%s</dataset>\n", get_string());
+ printf("<datafile>%s</datafile>\n", get_string());
+
+ match_u32_assert(0);
+
+ time_t date = get_u32();
+ struct tm tm = *localtime(&date);
+ char s[128];
+ strftime(s, sizeof s, "%a, %d %b %Y %H:%M:%S %z", &tm);
+ printf("<date>%s</date>\n", s);
+
+ match_u32_assert(0);
+ }
}
- else if (i + 12 <= n
- && data[i + 1] == 40
- && data[i + 2] == 31
- && data[i + 3] == 0)
+
+ if (match_u32(5))
{
- double d;
+ for (int i = 0; i < 5; i++)
+ printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
+ }
+ else
+ match_u32_assert(0);
- memcpy (&d, &data[i + 4], 8);
- printf ("PCT40.%d(%.*f)\n", data[i], data[i], d);
- i += 12;
+ match_byte_assert('.');
+ get_bool();
+
+ if (pos < outer_end)
+ {
+ get_u32();
+ match_u32_assert(0);
}
- else if (i + 4 <= n
- && (data[i] && data[i] != 88 && data[i] != 0x41)
- && !data[i + 1]
- && !data[i + 2]
- && !data[i + 3])
+ assert(pos == outer_end);
+
+ pos = outer_end;
+ }
+ else if (outer_end != pos)
+ {
+ pos += 14;
+ printf("<command>%s</command>\n", get_string());
+ printf("<command-local>%s</command-local>\n", get_string());
+ printf("<language>%s</command>\n", get_string());
+ printf("<charset>%s</charset>\n", get_string());
+ printf("<locale>%s</locale>\n", get_string());
+ get_bool();
+ match_byte_assert(0);
+ get_bool();
+ get_bool();
+
+ printf("<epoch2>%d</epoch2>\n", get_u32());
+ int decimal = data[pos];
+ int grouping = data[pos + 1];
+ if (match_byte('.'))
{
- printf ("i%d ", data[i]);
- i += 4;
+ if (!match_byte(',') && !match_byte('\''))
+ match_byte_assert(' ');
}
else
{
- printf("%02x ", data[i]);
- i++;
+ match_byte_assert(',');
+ if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
+ match_byte_assert(0);
}
+ printf("<format decimal=\"%c\"", decimal);
+ if (grouping)
+ printf(" grouping=\"%c\"", grouping);
+ printf("\"/>\n");
+ if (match_u32(5))
+ {
+ for (int i = 0; i < 5; i++)
+ printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
+ }
+ else
+ match_u32_assert(0);
+
+ match_byte_assert('.');
+ get_bool();
+
+ assert(pos == outer_end);
+ pos = outer_end;
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ if (argc != 2)
+ {
+ fprintf (stderr, "usage: %s FILE.bin", argv[0]);
+ exit (1);
+ }
+
+ filename = argv[1];
+ int fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ {
+ fprintf (stderr, "%s: open failed (%s)", filename, strerror (errno));
+ exit (1);
+ }
+
+ struct stat s;
+ if (fstat(fd, &s))
+ {
+ perror("fstat");
+ exit(1);
+ }
+ n = s.st_size;
+ data = malloc(n);
+ if (!data)
+ {
+ perror("malloc");
+ exit(1);
+ }
+ if (read(fd, data, n) != n)
+ {
+ perror("read");
+ exit(1);
+ }
+ close(fd);
+
+ pos = 0;
+ match_byte_assert(1);
+ match_byte_assert(0);
+
+ version = get_u32();
+ assert(version == 1 || version == 3);
+
+ match_byte_assert(1);
+ bool number_footnotes = get_bool();
+ printf("<footnote markers=\"%s\"/>\n",
+ number_footnotes ? "number" : "letter");
+ bool rotate_inner_column_labels = get_bool();
+ bool rotate_outer_row_labels = get_bool();
+ printf("x=%d\n", get_bool());
+ printf("<rotate-labels inner-column=\"%s\" outer-row=\"%s\"/>",
+ rotate_inner_column_labels ? "yes" : "no",
+ rotate_outer_row_labels ? "yes" : "no");
+ //fprintf(stderr, "option-number=%d\n", get_u32());
+ get_u32();
+
+ int min_col_width = get_u32();
+ int max_col_width = get_u32();
+ int min_row_width = get_u32();
+ int max_row_width = get_u32();
+ printf("<label-width min-col=\"%d\" max-col=\"%d\" min-row=\"%d\" "
+ "max-row=\"%d\"/>\n",
+ min_col_width, max_col_width,
+ min_row_width, max_row_width);
+
+ /* Offset 31. */
+ printf("<tableid>%lld</tableid>", get_u64());
+
+ dump_title ();
+ dump_fonts();
+ dump_dims ();
+ dump_data ();
+ match_byte (1);
+ if (pos != n)
+ {
+ fprintf (stderr, "%x / %x\n", pos, n);
+ exit(1);
}
+ exit(0);
return 0;
}