+#include <float.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
return pos ? pos - data : 0;
}
-static size_t
-try_find_tail(const char *target, size_t target_len)
-{
- size_t pos = try_find(target, target_len);
- return pos ? pos + target_len : 0;
-}
-
static size_t
find(const char *target, size_t target_len)
{
return pos;
}
-static size_t
-find_tail(const char *target, size_t target_len)
-{
- size_t pos = try_find_tail(target, target_len);
- if (!pos)
- {
- fprintf (stderr, "not found\n");
- exit(1);
- }
- return pos;
-}
-
size_t pos;
#define XSTR(x) #x
}
#define get_string() get_string(WHERE)
-static void
-dump_nested(void)
+static char *
+dump_nested_string(void)
{
- int subn = get_u32 ();
-#if 0
- fprintf (stderr, "nested %d bytes: ", subn);
- dump_raw(stderr, pos, pos + subn, "");
- putc('\n', stderr);
-#endif
- pos += subn;
+ char *s = NULL;
+
+ match_byte_assert (0);
+ match_byte_assert (0);
+ int outer_end = pos + get_u32();
+ int inner_end = pos + get_u32();
+ if (pos != inner_end)
+ {
+ match_u32_assert(0);
+ if (match_byte(0x31))
+ s = get_string();
+ else
+ match_byte_assert(0x58);
+ if (pos != inner_end)
+ {
+ fprintf(stderr, "inner end discrepancy\n");
+ exit(1);
+ }
+ }
+ match_byte_assert(0x58);
+ match_byte_assert(0x58);
+ if (pos != outer_end)
+ {
+ fprintf(stderr, "outer end discrepancy\n");
+ exit(1);
+ }
+
+ return s;
}
static void
-dump_value_31(void)
+dump_value_31(FILE *stream)
{
if (match_byte (0x31))
{
if (match_u32 (0))
{
if (match_u32 (1))
- get_string();
+ {
+ /* Only "a" observed as a sample value (although it appears 44 times in the corpus). */
+ get_string();
+ }
else
match_u32_assert (0);
- dump_nested();
+
+ int outer_end = pos + get_u32();
+ int inner_end = pos + get_u32();
+ match_u32_assert(0);
+ if (match_byte(0x31))
+ {
+ /* Appears to be a template string, e.g. '^1 cells (^2) expf < 5. Min exp = ^3...'.
+ Probably doesn't actually appear in output because many examples look unpolished,
+ e.g. 'partial list cases value ^1 shown upper...' */
+ get_string();
+ }
+ else
+ match_byte_assert(0x58);
+ if (pos != inner_end)
+ {
+ fprintf(stderr, "inner end discrepancy\n");
+ exit(1);
+ }
+
+ if (match_byte(0x31))
+ {
+ /* Only one example in the corpus. */
+ match_byte(0);
+ match_byte(0);
+ match_byte(0);
+ match_byte_assert(1);
+ get_string(); /* foreground */
+ get_string(); /* background */
+ get_string(); /* font */
+ match_byte_assert(12); /* size? */
+ }
+ else
+ match_byte_assert(0x58);
+ match_byte_assert(0x58);
+ if (pos != outer_end)
+ {
+ fprintf(stderr, "outer end discrepancy\n");
+ exit(1);
+ }
}
else if (match_u32 (1))
{
- printf("(footnote %d) ", get_u32());
- match_byte_assert (0);
- match_byte_assert (0);
- dump_nested();
+ fprintf(stream, "(footnote %d) ", get_u32());
+ dump_nested_string();
}
else if (match_u32 (2))
{
- printf("(special 2)");
+ fprintf(stream, "(special 2)");
match_byte_assert(0);
match_byte_assert(0);
if (!match_u32 (2))
match_u32_assert(1);
- match_byte_assert(0);
- match_byte_assert(0);
- dump_nested();
+ dump_nested_string(); /* Our corpus doesn't contain any examples with strings though. */
}
else
{
match_u32_assert(3);
- printf("(special 3)");
+ fprintf(stream, "(special 3)");
match_byte_assert(0);
match_byte_assert(0);
match_byte_assert(1);
match_byte_assert(0);
- dump_nested();
- dump_nested();
+ match_u32_assert(2);
+ dump_nested_string(); /* Our corpus doesn't contain any examples with strings though. */
}
}
else
match_byte_assert (0x58);
}
-static void
-dump_value__(int level, bool match1)
+static const char *
+format_to_string (int type)
{
- for (int i = 0; i <= level; i++)
- printf (" ");
+ static char tmp[16];
+ switch (type)
+ {
+ case 1: return "A";
+ case 2: return "AHEX";
+ case 3: return "COMMA";
+ case 4: return "DOLLAR";
+ case 5: case 40: return "F";
+ case 6: return "IB";
+ case 7: return "PIBHEX";
+ case 8: return "P";
+ case 9: return "PIB";
+ case 10: return "PK";
+ case 11: return "RB";
+ case 12: return "RBHEX";
+ case 15: return "Z";
+ case 16: return "N";
+ case 17: return "E";
+ case 20: return "DATE";
+ case 21: return "TIME";
+ case 22: return "DATETIME";
+ case 23: return "ADATE";
+ case 24: return "JDATE";
+ case 25: return "DTIME";
+ case 26: return "WKDAY";
+ case 27: return "MONTH";
+ case 28: return "MOYR";
+ case 29: return "QYR";
+ case 30: return "WKYR";
+ case 31: return "PCT";
+ case 32: return "DOT";
+ case 33: return "CCA";
+ case 34: return "CCB";
+ case 35: return "CCC";
+ case 36: return "CCD";
+ case 37: return "CCE";
+ case 38: return "EDATE";
+ case 39: return "SDATE";
+ default:
+ abort();
+ sprintf(tmp, "<%d>", type);
+ return tmp;
+ }
+}
+static void
+dump_value(FILE *stream, int level, bool match1)
+{
match_byte(0);
match_byte(0);
match_byte(0);
match_byte(0);
+ for (int i = 0; i <= level; i++)
+ fprintf (stream, " ");
+
if (match_byte (3))
{
- char *s1 = get_string();
- dump_value_31();
- char *s2 = get_string();
- char *s3 = get_string();
- if (strcmp(s1, s3))
- printf("strings \"%s\", \"%s\" and \"%s\"", s1, s2, s3);
- else
- printf("string \"%s\" and \"%s\"", s1, s2);
+ char *text = get_string();
+ dump_value_31(stream);
+ char *identifier = get_string();
+ char *text_eng = get_string();
+ fprintf (stream, "<string c=\"%s\"", text_eng);
+ if (identifier[0])
+ fprintf (stream, " identifier=\"%s\"", identifier);
+ if (strcmp(text_eng, text))
+ fprintf (stream, " local=\"%s\"", text);
+ fprintf (stream, "/>\n");
if (!match_byte (0))
match_byte_assert(1);
if (match1)
}
else if (match_byte (5))
{
- dump_value_31();
- printf ("variable \"%s\"", get_string());
- get_string();
+ dump_value_31(stream);
+ char *name = get_string ();
+ char *label = get_string ();
+ fprintf (stream, "<variable name=\"%s\"", name);
+ if (label[0])
+ fprintf (stream, " label=\"%s\"", label);
+ fprintf (stream, "/>\n");
if (!match_byte(1) && !match_byte(2))
match_byte_assert(3);
}
value = get_double ();
var = get_string ();
vallab = get_string ();
- printf ("value %g format %d(%d.%d) var \"%s\" vallab \"%s\"",
- value, format >> 16, (format >> 8) & 0xff, format & 0xff, var, vallab);
+ fprintf (stream, "<numeric-datum value=\"%.*g\" format=\"%s%d.%d\"",
+ DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
+ if (var[0])
+ fprintf (stream, " variable=\"%s\"", var);
+ if (vallab[0])
+ fprintf (stream, " label=\"%s\"/>\n", vallab);
+ fprintf (stream, "/>\n");
if (!match_byte (1) && !match_byte(2))
match_byte_assert (3);
}
if (!match_byte(1) && !match_byte(2))
match_byte_assert (3);
value = get_string ();
- printf ("value \"%s\" format %d(%d.%d) var \"%s\" vallab \"%s\"",
- value, format >> 16, (format >> 8) & 0xff, format & 0xff, var, vallab);
+ fprintf (stream, "<string-datum value=\"%s\" format=\"%s%d.%d\"",
+ value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
+ if (var[0])
+ fprintf (stream, " variable=\"%s\"", var);
+ if (vallab[0])
+ fprintf (stream, " label=\"%s\"/>\n", vallab);
+ fprintf (stream, "/>\n");
}
else if (match_byte (1))
{
unsigned int format;
double value;
- dump_value_31();
+ dump_value_31(stream);
format = get_u32 ();
value = get_double ();
- printf ("value %g format %d(%d.%d)", value, format >> 16, (format >> 8) & 0xff, format & 0xff);
+ fprintf (stream, "<number value=\"%.*g\" format=\"%s%d.%d\"/>\n",
+ DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
if (match1)
match_byte (1);
}
else
{
- dump_value_31();
+ dump_value_31(stream);
char *base = get_string();
int x = get_u32();
- printf ("\"%s\" with %d variables:\n", base, x);
+ fprintf (stream, "<template format=\"%s\">\n", base);
for (int i = 0; i < x; i++)
{
int y = get_u32();
y = 1;
else
match_u32_assert(0);
- for (int j = 0; j <= level; j++)
- printf (" ");
- printf("variable %d has %d values:\n", i, y);
+ for (int j = 0; j <= level + 1; j++)
+ fprintf (stream, " ");
+ fprintf (stream, "<substitution index=\"%d\">\n", i + 1);
for (int j = 0; j < y; j++)
- {
- dump_value__ (level + 1, false);
- putchar('\n');
- }
+ dump_value (stream, level + 2, false);
+ for (int j = 0; j <= level + 1; j++)
+ fprintf (stream, " ");
+ fprintf (stream, "</substitution>\n");
}
+ for (int j = 0; j <= level; j++)
+ fprintf (stream, " ");
+ fprintf (stream, "</template>\n");
}
}
static void
dump_category(int level, int *indexes, int *n_indexes)
{
- dump_value__ (level, true);
+ for (int i = 0; i <= level; i++)
+ fprintf (stdout, " ");
+ printf ("<category>\n");
+ dump_value (stdout, level + 1, true);
match_byte(0);
match_byte(0);
match_byte(0);
}
indexes[(*n_indexes)++] = indx;
}
- if (n_categories > 0)
- printf (", %d subcategories:", n_categories);
- else
- printf (", index %d", indx);
- printf("\n");
+ if (n_categories == 0)
+ {
+ for (int i = 0; i <= level + 1; i++)
+ fprintf (stdout, " ");
+ fprintf (stdout, "<category-index>%d</category-index>\n", indx);
+ }
for (int i = 0; i < n_categories; i++)
dump_category (level + 1, indexes, n_indexes);
+ for (int i = 0; i <= level; i++)
+ fprintf (stdout, " ");
+ printf ("</category>\n");
}
static void
-dump_dim(void)
+dump_dim(int indx)
{
int n_categories;
- printf("next dim\n");
- dump_value__ (0, false);
+
+ printf ("<dimension index=\"%d\">\n", indx);
+ dump_value (stdout, 0, false);
/* This byte is usually 0x02 but 0x00 and 0x75 (!) have also been spotted. */
pos++;
if (!match_byte(0))
match_byte_assert(1);
match_byte_assert(1);
- static int dim_indx = 0;
- match_u32_assert(dim_indx++);
+ match_u32_assert(indx);
n_categories = get_u32();
- printf("%d nested categories\n", n_categories);
int indexes[1024];
int n_indexes = 0;
for (int i = 0; i < n_categories; i++)
dump_category (0, indexes, &n_indexes);
check_permutation(indexes, n_indexes, "categories");
+
+ fprintf (stdout, "</dimension>\n");
}
int n_dims;
dump_dims(void)
{
n_dims = get_u32();
- printf ("%u dimensions\n", n_dims);
for (int i = 0; i < n_dims; i++)
- {
- printf("\n");
- dump_dim ();
- }
+ dump_dim (i);
}
static void
check_permutation(a, n_dims, "dimensions");
int x = get_u32();
- printf ("%d data values, starting at %08x\n", x, pos);
+ printf ("<data>\n");
for (int i = 0; i < x; i++)
{
- printf("%08x, index %d:\n", pos, get_u32());
+ printf (" <datum index=\"%d\">\n", get_u32());
match_u32_assert(0);
- dump_value__(0, false);
- putchar('\n');
+ dump_value(stdout, 1, false);
+ fprintf (stdout, " </datum>\n");
}
+ printf ("</data>\n");
}
static void
dump_title(void)
{
pos = 0x27;
- dump_value__(0, true); putchar('\n');
- dump_value__(0, true); putchar('\n');
+ printf ("<title-local>\n");
+ dump_value(stdout, 0, true);
+ printf ("</title-local>\n");
+
+ printf ("<subtype>\n");
+ dump_value(stdout, 0, true);
+ printf ("</subtype>\n");
+
match_byte_assert(0x31);
- dump_value__(0, true); putchar('\n');
+
+ printf ("<title-c>\n");
+ dump_value(stdout, 0, true);
+ printf ("</title-c>\n");
+
match_byte(0);
match_byte_assert(0x58);
if (match_byte(0x31))
{
- dump_value__(0, false); putchar('\n');
+ printf ("<caption>\n");
+ dump_value(stdout, 0, false);
+ printf ("</caption>\n");
}
else
match_byte_assert(0x58);
int n_footnotes = get_u32();
- if (n_footnotes >= 20)
- {
- fprintf(stderr, "%08x: %d footnotes\n", pos - 4, n_footnotes);
- exit(1);
- }
-
- printf("------\n%d footnotes\n", n_footnotes);
- if (n_footnotes < 20)
+ for (int i = 0; i < n_footnotes; i++)
{
- for (int i = 0; i < n_footnotes; i++)
+ printf ("<footnote index=\"%d\">\n", i);
+ dump_value(stdout, 0, false);
+ if (match_byte (0x31))
{
- printf("footnote %d:\n", i);
- dump_value__(0, false);
- if (match_byte (0x31))
- {
- /* Custom footnote marker string. */
- match_byte_assert(3);
- get_string();
- match_byte_assert(0x58);
- match_u32_assert(0);
- get_string();
- }
- else
- match_byte_assert (0x58);
- printf("(%d)\n", get_u32());
+ /* Custom footnote marker string. */
+ match_byte_assert(3);
+ get_string();
+ match_byte_assert(0x58);
+ match_u32_assert(0);
+ get_string();
}
+ else
+ match_byte_assert (0x58);
+ printf("(%d)\n", get_u32());
+ printf ("</footnote>\n");
}
}
-static int
-find_dimensions(void)
-{
- {
- const char dimensions[] = "-,,,.\0";
- int x = try_find_tail(dimensions, sizeof dimensions - 1);
- if (x)
- return x;
- }
-
- const char dimensions[] = "-,,, .\0";
- return find_tail(dimensions, sizeof dimensions - 1);
-}
-
static void
dump_fonts(void)
{
- printf("fonts: offset=%08x\n", pos);
match_byte(0);
for (int i = 1; i <= 8; i++)
{
- printf("%08x: font %d, ", pos, i);
+ printf ("<style index=\"%d\"", i);
match_byte_assert(i);
match_byte_assert(0x31);
- printf("%s, ", get_string());
+ printf(" font=\"%s\"", get_string());
match_byte_assert(0);
match_byte_assert(0);
if (!match_byte(0x40) && !match_byte(0x20) && !match_byte(0x80) && !match_byte(0x10))
if (!match_u32(0) && !match_u32(1) && !match_u32(2))
match_u32_assert(3);
- printf ("%s, ", get_string());
- printf ("%s, ", get_string());
+ printf (" fgcolor=\"%s\"", get_string());
+ printf (" bgcolor=\"%s\"", get_string());
match_u32_assert(0);
match_u32_assert(0);
match_byte_assert(0);
/* Who knows? Ranges from -1 to 8 with no obvious pattern. */
get_u32();
+
+ printf ("/>\n");
}
match_u32_assert(240);
int count = get_u32();
pos += 4 * count;
- char *encoding = get_string();
- printf("encoding=%s\n", encoding);
+ printf ("<encoding>%s</encoding>\n", get_string ());
if (!match_u32(0))
match_u32_assert(UINT32_MAX);
for (int i = 0; i < 5; i++)
get_string();
pos += get_u32();
- if (pos != find_dimensions())
- fprintf (stderr, "%08x / %08x\n", pos, find_dimensions());
}
int
dump_title ();
dump_fonts();
dump_dims ();
- printf("\n\ndata:\n");
dump_data ();
match_byte (1);
if (pos != n)