+#include <assert.h>
#include <float.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
+#include "u8-mbtouc.h"
static uint8_t *data;
static size_t n, pos;
#define STR(x) XSTR(x)
#define WHERE __FILE__":" STR(__LINE__)
+bool ok = true;
+
static unsigned int
get_u32(void)
{
}
#define match_u32_assert(x) match_u32_assert(x, WHERE)
+static bool
+match_byte(uint8_t b)
+{
+ if (pos < n && data[pos] == b)
+ {
+ pos++;
+ return true;
+ }
+ else
+ return false;
+}
+
+static void
+match_byte_assert(uint8_t b, const char *where)
+{
+ if (!match_byte(b))
+ {
+ fprintf(stderr, "%s: 0x%x: expected %02x, got %02x\n", where, pos, b, data[pos]);
+ exit(1);
+ }
+}
+#define match_byte_assert(b) match_byte_assert(b, WHERE)
+
static bool
all_ascii(const uint8_t *p)
{
return true;
}
+static bool
+all_utf8(const uint8_t *p)
+{
+ size_t len = strlen ((char *) p);
+ for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
+ {
+ ucs4_t uc;
+
+ mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
+ if (uc < 32 || uc == 127 || uc == 0xfffd)
+ return false;
+ }
+ return true;
+}
+
static char *
get_fixed_string(int len, const char *where)
{
- if (pos + len > n || !memchr(&data[pos], 0, len) || !all_ascii(&data[pos]))
+ size_t i;
+ for (i = 0; ; i++)
+ {
+ if (!data[pos + i])
+ break;
+ if (i >= len)
+ {
+ fprintf(stderr, "%s: 0x%x: unterminated fixed-width string\n", where, pos);
+ exit(1);
+ }
+ }
+ if (!all_utf8(&data[pos]))
{
fprintf(stderr, "%s: 0x%x: bad fixed-width string\n", where, pos);
exit(1);
}
+ while (++i < len)
+ {
+ if (data[pos + i])
+ {
+ fprintf(stderr, "%s: 0x%x: text in middle of fixed-width string\n", where, pos);
+ //exit(1);
+ break;
+ }
+ }
char *s = (char *) &data[pos];
pos += len;
return s;
static char *
get_string(const char *where)
{
- if (1
+ if (pos + 4 <= n
/*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
/*&& all_ascii(&data[pos + 4], data[pos])*/)
{
for (size_t i = start; i < end; )
{
if (i + 5 <= n
- && data[i] > 0
+ && data[i] > 1
//&& !data[i + 1]
&& !data[i + 2]
&& !data[i + 3]
}
}
+static void
+dump_source(int end, int count, int n_series, const char *name)
+{
+ const union
+ {
+ uint8_t b[8];
+ double d;
+ }
+ sysmis = {.b = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xff}};
+ int n_sysmis = 0;
+ for (int i = 0; i < n_series; i++)
+ {
+ printf (" %08x: series %d: \"%s\", %d values:\n ",
+ pos, i, get_fixed_string(288), count);
+ for (int i = 0; i < count; i++)
+ {
+ double d = get_double();
+ if (d == sysmis.d)
+ {
+ printf (" .");
+ n_sysmis++;
+ }
+ else
+ printf (" %.*g", DBL_DIG, d);
+ }
+ printf ("\n");
+ }
+}
+
+static void
+dump_strings(void)
+
+{
+ if (pos >= n)
+ return;
+
+ int start = pos;
+ int offset = pos;
+ int n_maps = get_u32();
+ int max1 = -1;
+ for (int k = 0; k < n_maps; k++)
+ {
+ char *source_name = get_string();
+ printf ("%08x: %s\n", offset, source_name);
+
+ int n_series = get_u32();
+ for (int i = 0; i < n_series; i++)
+ {
+ printf ("%08x:", pos);
+ printf (" \"%s\"", get_string());
+ int n_pairs = get_u32();
+ for (int j = 0; j < n_pairs; j++)
+ {
+ int x = get_u32();
+ int y = get_u32();
+ printf (" (%d, %d)", x, y);
+ if (y > max1)
+ max1 = y;
+ }
+ printf ("\n");
+ }
+ }
+ printf ("\n%08x:", pos);
+ int n_strings = get_u32();
+ if (n_strings != max1 + 1)
+ {
+ fprintf (stderr, "n_strings=%d max1+1=%d (-s %#x -n %u)\n", n_strings, max1 + 1, start, n - start);
+ dump_raw (stderr, start, n, "\n");
+ assert(n_strings == max1 + 1);
+ }
+ printf (" %d strings\n", n_strings);
+
+ char **strings = malloc((max1 + 1) * sizeof *strings);
+ for (int i = 0; i <= max1; i++)
+ {
+ int frequency = get_u32();
+ char *s = get_string();
+ printf ("%d: \"%s\" (%d)\n", i, s, frequency);
+ strings[i] = s;
+ }
+ printf ("\n");
+
+ assert (pos == n);
+#if 0
+ pos = ofs;
+ printf("Strings:\n");
+ for (int i = 0; i < n_more_series; i++)
+ {
+ printf (" \"%s\"\n", get_string());
+ int n_pairs = get_u32();
+ for (int j = 0; j < n_pairs; j++)
+ {
+ int x = get_u32();
+ //assert (x == j);
+ int y = get_u32();
+ printf (" %d: \"%s\"\n", x, strings[y]);
+ }
+ printf ("\n");
+ }
+#endif
+}
+
int
main(int argc, char **argv)
{
}
pos = 0;
- match_u32_assert(0x1b000);
- match_u32_assert(s.st_size);
+ match_byte_assert(0);
+ int version = data[pos];
+ if (!match_byte(0xaf))
+ match_byte_assert(0xb0);
+ int n_sources = data[pos++];
+ match_byte_assert(0);
- printf ("%08x:", pos);
- int count = get_u32(), n_series = get_u32();
- printf (" %d series, %d observations per series\n", n_series, count);
- printf ("%08x\n\n", pos);
+ match_u32_assert(s.st_size);
+ printf ("%d sources\n", n_sources);
- const union
+ struct source
{
- uint8_t b[8];
- double d;
+ int offset, count, n_series;
+ char *name;
}
- sysmis = {.b = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xff}};
- pos = 0x58;
- for (int i = 0; i < n_series; i++)
+ sources[n_sources];
+ for (int i = 0; i < n_sources; i++)
{
- printf ("%08x:", pos);
- printf (" %s\n", get_fixed_string(288));
- printf ("%08x:", pos);
- for (int i = 0; i < count; i++)
- {
- double d = get_double();
- if (d == sysmis.d)
- printf (" .");
- else
- printf (" %.*g", DBL_DIG, d);
- }
- printf ("\n");
+ int count = get_u32();
+ int n_series = get_u32();
+ int offset = get_u32();
+ char *name = get_fixed_string(version == 0xb0 ? 64 : 28);
+ int dunno = version == 0xb0 ? get_u32() : 0;
+ printf ("source %d: %d series, %d observations per series, offset %08x, \"%s\", %x\n",
+ i, n_series, count, offset, name, dunno);
+ sources[i].offset = offset;
+ sources[i].count = count;
+ sources[i].n_series = n_series;
+ sources[i].name = name;
}
- printf ("%08x:", pos);
- printf (" %d", get_u32());
- printf (", \"%s\"\n", get_string());
-
- printf ("\n%08x:", pos);
- int n_more_series = get_u32();
- printf (" %d series to come\n", n_more_series);
-
- for (int i = 0; i < n_more_series; i++)
+ for (int i = 0; i < n_sources; i++)
{
- printf ("%08x:", pos);
- printf (" \"%s\"", get_string());
- int n_pairs = get_u32();
- for (int j = 0; j < n_pairs; j++)
+ if (pos != sources[i].offset)
{
- int x = get_u32();
- int y = get_u32();
- printf (" (%d,%d)", x, y);
+ fprintf (stderr, "pos=0x%x expected=0x%x reading source %d\n", pos, sources[i].offset, i);
+ //exit(1);
}
- printf ("\n");
+ printf ("source %d:\n", i);
+ pos = sources[i].offset;
+ dump_source(i + 1 >= n_sources ? n : sources[i + 1].offset, sources[i].count, sources[i].n_series, sources[i].name);
}
-
- printf ("\n%08x:", pos);
- int n_strings = get_u32();
- printf (" %d strings\n", n_strings);
- for (int i = 0; i < n_strings; i++)
+ dump_strings();
+#if 0
+ if (pos != n)
{
- int x = get_u32();
- char *s = get_string();
- printf ("%d: \"%s\" (%d)\n", i, s, x);
+ fprintf (stderr, "consumed %zu bytes, file has %zu bytes\n", pos, n);
+ ok = false;
}
-
- dump_raw (stdout, pos, n, "\n");
- putchar('\n');
- return 0;
+ assert(pos == n);
+#endif
+
+ return ok ? EXIT_SUCCESS : EXIT_FAILURE;
}