return pos ? pos - data : 0;
}
-static size_t
-try_find_tail(const char *target, size_t target_len)
-{
- size_t pos = try_find(target, target_len);
- return pos ? pos + target_len : 0;
-}
-
static size_t
find(const char *target, size_t target_len)
{
return pos;
}
-static size_t
-find_tail(const char *target, size_t target_len)
-{
- size_t pos = try_find_tail(target, target_len);
- if (!pos)
- {
- fprintf (stderr, "not found\n");
- exit(1);
- }
- return pos;
-}
-
size_t pos;
#define XSTR(x) #x
#define match_byte_assert(b) match_byte_assert(b, WHERE)
static void
-dump_raw(FILE *stream, int start, int end, const char *separator)
+newline(FILE *stream, int pos)
+{
+ fprintf(stream, "\n%08x: ", pos);
+}
+
+static void
+dump_raw(FILE *stream, int start, int end)
{
for (size_t i = start; i < end; )
{
&& i + 4 + data[i] + data[i + 1] * 256 <= end
&& all_ascii(&data[i + 4], data[i] + data[i + 1] * 256))
{
- fprintf(stream, "%s\"", separator);
+ newline(stream, i);
+ fprintf(stream, "\"");
fwrite(&data[i + 4], 1, data[i] + data[i + 1] * 256, stream);
fputs("\" ", stream);
double d;
memcpy (&d, &data[i + 4], 8);
- fprintf (stream, "F40.%d(%.*f)%s", data[i], data[i], d, separator);
+ fprintf (stream, "F40.%d(%.*f)", data[i], data[i], d);
i += 12;
+ newline (stream, i);
}
else if (i + 12 <= end
&& data[i + 1] == 40
double d;
memcpy (&d, &data[i + 4], 8);
- fprintf (stream, "PCT40.%d(%.*f)%s", data[i], data[i], d, separator);
+ fprintf (stream, "PCT40.%d(%.*f)", data[i], data[i], d);
i += 12;
+ newline(stream, i);
}
else if (i + 4 <= end
&& (data[i] && data[i] != 88 && data[i] != 0x41)
if (match_byte(0x31))
{
/* Only one example in the corpus. */
+ match_byte(1);
match_byte(0);
match_byte(0);
match_byte(0);
get_string(); /* foreground */
get_string(); /* background */
get_string(); /* font */
- match_byte_assert(12); /* size? */
+ if (!match_byte(14))
+ match_byte_assert(12); /* size? */
+ }
+ else
+ match_byte_assert(0x58);
+ if (match_byte(0x31))
+ {
+ /* All the examples in the corpus, all from one SPV file, are
+ exactly like this. */
+ match_u32_assert(0);
+ match_u32_assert(0);
+ match_u32_assert(0);
+ match_u32_assert(0);
+ match_byte_assert(1);
+ match_byte_assert(0);
+ match_byte_assert(8);
+ match_byte_assert(0);
+ match_byte_assert(8);
+ match_byte_assert(0);
+ match_byte_assert(10);
+ match_byte_assert(0);
}
else
match_byte_assert(0x58);
- match_byte_assert(0x58);
if (pos != outer_end)
{
fprintf(stderr, "outer end discrepancy\n");
}
static void
-dump_value__(FILE *stream, int level, bool match1)
+dump_value(FILE *stream, int level, bool match1)
{
match_byte(0);
match_byte(0);
fprintf (stream, " ");
fprintf (stream, "<substitution index=\"%d\">\n", i + 1);
for (int j = 0; j < y; j++)
- dump_value__ (stream, level + 2, false);
+ dump_value (stream, level + 2, false);
for (int j = 0; j <= level + 1; j++)
fprintf (stream, " ");
fprintf (stream, "</substitution>\n");
static void
dump_category(int level, int *indexes, int *n_indexes)
{
- dump_value__ (stdout, level, true);
+ for (int i = 0; i <= level; i++)
+ fprintf (stdout, " ");
+ printf ("<category>\n");
+ dump_value (stdout, level + 1, true);
match_byte(0);
match_byte(0);
match_byte(0);
}
indexes[(*n_indexes)++] = indx;
}
- if (n_categories > 0)
- printf (", %d subcategories:", n_categories);
- else
- printf (", index %d", indx);
- printf("\n");
+ if (n_categories == 0)
+ {
+ for (int i = 0; i <= level + 1; i++)
+ fprintf (stdout, " ");
+ fprintf (stdout, "<category-index>%d</category-index>\n", indx);
+ }
for (int i = 0; i < n_categories; i++)
dump_category (level + 1, indexes, n_indexes);
+ for (int i = 0; i <= level; i++)
+ fprintf (stdout, " ");
+ printf ("</category>\n");
}
static void
-dump_dim(void)
+dump_dim(int indx)
{
int n_categories;
- printf("next dim\n");
- dump_value__ (stdout, 0, false);
+
+ printf ("<dimension index=\"%d\">\n", indx);
+ dump_value (stdout, 0, false);
/* This byte is usually 0x02 but 0x00 and 0x75 (!) have also been spotted. */
pos++;
if (!match_byte(0))
match_byte_assert(1);
match_byte_assert(1);
- static int dim_indx = 0;
- match_u32_assert(dim_indx++);
+ match_u32_assert(indx);
n_categories = get_u32();
- printf("%d nested categories\n", n_categories);
int indexes[1024];
int n_indexes = 0;
for (int i = 0; i < n_categories; i++)
dump_category (0, indexes, &n_indexes);
check_permutation(indexes, n_indexes, "categories");
+
+ fprintf (stdout, "</dimension>\n");
}
int n_dims;
dump_dims(void)
{
n_dims = get_u32();
- printf ("%u dimensions\n", n_dims);
for (int i = 0; i < n_dims; i++)
- {
- printf("\n");
- dump_dim ();
- }
+ dump_dim (i);
}
static void
check_permutation(a, n_dims, "dimensions");
int x = get_u32();
- printf ("%d data values, starting at %08x\n", x, pos);
+ printf ("<data>\n");
for (int i = 0; i < x; i++)
{
- printf("%08x, index %d:\n", pos, get_u32());
+ printf (" <datum index=\"%d\">\n", get_u32());
match_u32_assert(0);
- dump_value__(stdout, 0, false);
- putchar('\n');
+ dump_value(stdout, 1, false);
+ fprintf (stdout, " </datum>\n");
}
+ printf ("</data>\n");
}
static void
dump_title(void)
{
pos = 0x27;
- printf("text:\n");
- dump_value__(stdout, 0, true); putchar('\n');
- printf("subtype:\n");
- dump_value__(stdout, 0, true); putchar('\n');
+ printf ("<title-local>\n");
+ dump_value(stdout, 0, true);
+ printf ("</title-local>\n");
+
+ printf ("<subtype>\n");
+ dump_value(stdout, 0, true);
+ printf ("</subtype>\n");
+
match_byte_assert(0x31);
- printf("text_eng:\n");
- dump_value__(stdout, 0, true); putchar('\n');
+
+ printf ("<title-c>\n");
+ dump_value(stdout, 0, true);
+ printf ("</title-c>\n");
+
match_byte(0);
match_byte_assert(0x58);
if (match_byte(0x31))
{
- printf("caption:\n");
- dump_value__(stdout, 0, false); putchar('\n');
+ printf ("<caption>\n");
+ dump_value(stdout, 0, false);
+ printf ("</caption>\n");
}
else
match_byte_assert(0x58);
int n_footnotes = get_u32();
- if (n_footnotes >= 20)
- {
- fprintf(stderr, "%08x: %d footnotes\n", pos - 4, n_footnotes);
- exit(1);
- }
-
- printf("------\n%d footnotes\n", n_footnotes);
- if (n_footnotes < 20)
+ for (int i = 0; i < n_footnotes; i++)
{
- for (int i = 0; i < n_footnotes; i++)
+ printf ("<footnote index=\"%d\">\n", i);
+ dump_value(stdout, 0, false);
+ if (match_byte (0x31))
{
- printf("footnote %d:\n", i);
- dump_value__(stdout, 0, false);
- if (match_byte (0x31))
- {
- /* Custom footnote marker string. */
- match_byte_assert(3);
- get_string();
- match_byte_assert(0x58);
- match_u32_assert(0);
- get_string();
- }
- else
- match_byte_assert (0x58);
- printf("(%d)\n", get_u32());
+ /* Custom footnote marker string. */
+ match_byte_assert(3);
+ get_string();
+ match_byte_assert(0x58);
+ match_u32_assert(0);
+ get_string();
}
+ else
+ match_byte_assert (0x58);
+ printf("(%d)\n", get_u32());
+ printf ("</footnote>\n");
}
}
-static int
-find_dimensions(void)
-{
- {
- const char dimensions[] = "-,,,.\0";
- int x = try_find_tail(dimensions, sizeof dimensions - 1);
- if (x)
- return x;
- }
-
- const char dimensions[] = "-,,, .\0";
- return find_tail(dimensions, sizeof dimensions - 1);
-}
-
static void
dump_fonts(void)
{
- printf("fonts: offset=%08x\n", pos);
match_byte(0);
for (int i = 1; i <= 8; i++)
{
- printf("%08x: font %d, ", pos, i);
+ printf ("<style index=\"%d\"", i);
match_byte_assert(i);
match_byte_assert(0x31);
- printf("%s, ", get_string());
+ printf(" font=\"%s\"", get_string());
match_byte_assert(0);
match_byte_assert(0);
if (!match_byte(0x40) && !match_byte(0x20) && !match_byte(0x80) && !match_byte(0x10))
if (!match_u32(0) && !match_u32(1) && !match_u32(2))
match_u32_assert(3);
- printf ("%s, ", get_string());
- printf ("%s, ", get_string());
+ printf (" fgcolor=\"%s\"", get_string());
+ printf (" bgcolor=\"%s\"", get_string());
match_u32_assert(0);
match_u32_assert(0);
match_byte_assert(0);
/* Who knows? Ranges from -1 to 8 with no obvious pattern. */
get_u32();
+
+ printf ("/>\n");
}
match_u32_assert(240);
int count = get_u32();
pos += 4 * count;
- char *encoding = get_string();
- printf("encoding=%s\n", encoding);
+ printf ("<encoding>%s</encoding>\n", get_string ());
if (!match_u32(0))
match_u32_assert(UINT32_MAX);
match_byte_assert(0);
if (!match_byte(0))
match_byte_assert(1);
- if (!match_byte(0x99) && !match_byte(0x98))
- match_byte_assert(0x97);
+ if (!match_byte(0x97) && !match_byte(0x98) && !match_byte(0x99))
+ match_byte_assert(0x9a);
match_byte_assert(7);
match_byte_assert(0);
match_byte_assert(0);
for (int i = 0; i < 5; i++)
get_string();
pos += get_u32();
- if (pos != find_dimensions())
- fprintf (stderr, "%08x / %08x\n", pos, find_dimensions());
}
int
dump_title ();
dump_fonts();
dump_dims ();
- printf("\n\ndata:\n");
dump_data ();
match_byte (1);
if (pos != n)
else
start = 0x27;
- dump_raw(stdout, start, n, "\n");
+ dump_raw(stdout, start, n);
return 0;
}