#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
+#include "u8-mbtouc.h"
static uint8_t *data;
static size_t n;
}
+static bool __attribute__((unused))
+all_utf8(const char *p_)
+{
+ const uint8_t *p = (const uint8_t *) p_;
+ size_t len = strlen ((char *) p);
+ for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
+ {
+ ucs4_t uc;
+
+ mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
+ if ((uc < 32 && uc != '\n') || uc == 127 || uc == 0xfffd)
+ return false;
+ }
+ return true;
+}
+
static char *
get_string(const char *where)
{
}
static void
-dump_category(int level, int *indexes, int *n_indexes, int max_indexes)
+dump_category(FILE *stream, int level, int *indexes, int *n_indexes, int max_indexes)
{
for (int i = 0; i <= level; i++)
- fprintf (stdout, " ");
+ fprintf (stream, " ");
printf ("<category>\n");
- dump_value (stdout, level + 1);
- match_byte(1);
- match_byte(0);
- match_byte(0);
- match_byte(0);
+ dump_value (stream, level + 1);
- if (match_u32 (1))
- match_byte (0);
- else if (match_byte (1))
- {
- match_byte (0);
- if (!match_u32 (2))
- match_u32_assert (1);
- match_byte (0);
- }
- else if (!match_u32(2))
- match_u32_assert (0);
+ int merge = data[pos];
+ if (!match_byte(0))
+ match_byte_assert (1);
+
+ match_byte_assert (0);
+
+ int unindexed = data[pos];
+ if (!match_byte(0))
+ match_byte_assert (1);
+
+ int x = get_u32 ();
+ pos -= 4;
+ if (!match_u32 (0))
+ match_u32_assert (2);
int indx = get_u32();
int n_categories = get_u32();
- if (indx != -1)
+ if (indx == -1)
+ {
+ if (merge)
+ {
+ for (int i = 0; i <= level + 1; i++)
+ fprintf (stream, " ");
+ fprintf (stream, "<merge/>\n");
+ }
+ }
+ else
{
+ if (merge)
+ {
+ fprintf(stderr, "index not -1 but merged\n");
+ exit(1);
+ }
+ if (x != 2)
+ {
+ fprintf(stderr, "index not -1 but x != 2\n");
+ exit(1);
+ }
if (n_categories != 0)
{
fprintf(stderr, "index not -1 but subcategories\n");
}
indexes[(*n_indexes)++] = indx;
}
+
+ int expected_unindexed = indx == -1;
+ if (unindexed != expected_unindexed)
+ {
+ fprintf(stderr, "unindexed (%d) mismatch with indx (%d)\n",
+ unindexed, indx);
+ exit(1);
+ }
+
if (n_categories == 0)
{
for (int i = 0; i <= level + 1; i++)
- fprintf (stdout, " ");
- fprintf (stdout, "<category-index>%d</category-index>\n", indx);
+ fprintf (stream, " ");
+ fprintf (stream, "<category-index>%d</category-index>\n", indx);
}
for (int i = 0; i < n_categories; i++)
- dump_category (level + 1, indexes, n_indexes, max_indexes);
+ dump_category (stream, level + 1, indexes, n_indexes, max_indexes);
for (int i = 0; i <= level; i++)
- fprintf (stdout, " ");
+ fprintf (stream, " ");
printf ("</category>\n");
}
printf ("<dimension index=\"%d\">\n", indx);
dump_value (stdout, 0);
- /* This byte is usually 0x02 but many other values have been spotted. */
+ /* This byte is usually 0 but many other values have been spotted. */
pos++;
if (!match_byte(0) && !match_byte(1))
int indexes[2048];
int n_indexes = 0;
for (int i = 0; i < n_categories; i++)
- dump_category (0, indexes, &n_indexes, sizeof indexes / sizeof *indexes);
+ dump_category (stdout, 0, indexes, &n_indexes, sizeof indexes / sizeof *indexes);
check_permutation(indexes, n_indexes, "categories");
fprintf (stdout, "</dimension>\n");
dump_data(void)
{
/* The first three numbers add to the number of dimensions. */
- int t = get_u32();
- t += get_u32();
- match_u32_assert(n_dims - t);
+ int l = get_u32();
+ int r = get_u32();
+ int c = n_dims - l - r;
+ match_u32_assert(c);
/* The next n_dims numbers are a permutation of the dimension numbers. */
int a[n_dims];
for (int i = 0; i < n_dims; i++)
- a[i] = get_u32();
+ {
+ int dim = get_u32();
+ a[i] = dim;
+
+ const char *name = i < l ? "layer" : i < l + r ? "row" : "column";
+ printf ("<%s dimension=\"%d\"/>\n", name, dim);
+ }
check_permutation(a, n_dims, "dimensions");
int x = get_u32();
match_byte_assert(0);
if (!match_byte(0x40) && !match_byte(0x20) && !match_byte(0x80) && !match_byte(0x10) && !match_byte(0x70))
match_byte_assert(0x50);
- if (!match_byte(0x41))
- match_byte_assert(0x51);
+ match_byte_assert(0x41);
if (!match_u32(0) && !match_u32(1))
match_u32_assert(2);
match_byte_assert(0);
/* OK, this seems really unlikely to be totally correct, but it matches my corpus... */
if (!match_u32(0) && !match_u32(2))
- match_u32_assert(0xfaad);
+ {
+ if (i == 7)
+ match_u32_assert(0xfaad);
+ else
+ match_u32_assert(0);
+ }
if (!match_u32(0) && !match_u32(1) && !match_u32(2))
match_u32_assert(3);
if (version > 1)
{
- /* These seem unlikely to be correct too. */
if (i != 3)
{
if (!match_u32(8))