#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
+#include "u8-mbtouc.h"
static uint8_t *data;
static size_t n, pos;
return true;
}
+static bool
+all_utf8(const uint8_t *p)
+{
+ size_t len = strlen ((char *) p);
+ for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
+ {
+ ucs4_t uc;
+
+ mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
+ if (uc < 32 || uc == 127 || uc == 0xfffd)
+ return false;
+ }
+ return true;
+}
+
static char *
get_fixed_string(int len, const char *where)
{
- if (pos + len > n || !memchr(&data[pos], 0, len) || !all_ascii(&data[pos]))
+ if (pos + len > n || !memchr(&data[pos], 0, len) || !all_utf8(&data[pos]))
{
fprintf(stderr, "%s: 0x%x: bad fixed-width string\n", where, pos);
exit(1);
}
static void
-dump_source(int count, int n_series)
+dump_source(int end, int count, int n_series)
{
const union
{
printf ("\n");
}
- if (!n_sysmis)
+ if (pos >= end)
return;
- printf ("\n %08x:", pos);
+ printf ("\n %08x: (%d sysmis)", pos, n_sysmis);
printf (" %d", get_u32());
printf (", \"%s\"\n", get_string());
pos = 0;
match_byte_assert(0);
+ int version = data[pos];
if (!match_byte(0xaf))
match_byte_assert(0xb0);
int n_sources = data[pos++];
match_u32_assert(s.st_size);
printf ("%d sources\n", n_sources);
+
+ struct source
+ {
+ int offset, count, n_series;
+ }
+ sources[n_sources];
for (int i = 0; i < n_sources; i++)
{
- pos = 8 + 80 * i;
int count = get_u32();
int n_series = get_u32();
int offset = get_u32();
- char *name = get_fixed_string(64);
- int dunno = get_u32();
+ char *name = get_fixed_string(version == 0xb0 ? 64 : 28);
+ int dunno = version == 0xb0 ? get_u32() : 0;
printf ("source %d: %d series, %d observations per series, offset %08x, \"%s\", %x\n",
i, n_series, count, offset, name, dunno);
- pos = offset;
- dump_source(count, n_series);
+ sources[i].offset = offset;
+ sources[i].count = count;
+ sources[i].n_series = n_series;
+ }
+
+ for (int i = 0; i < n_sources; i++)
+ {
+ if (pos != sources[i].offset)
+ {
+ fprintf (stderr, "pos=0x%x expected=0x%x reading source %d\n", pos, sources[i].offset, i);
+ exit(1);
+ }
+ dump_source(i + 1 >= n_sources ? n : sources[i + 1].offset, sources[i].count, sources[i].n_series);
}
+ assert(pos == n);
return 0;
}