+#include <float.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
}
#define get_string() get_string(WHERE)
-static void
+static char *
dump_nested_string(void)
{
+ char *s = NULL;
+
match_byte_assert (0);
match_byte_assert (0);
int outer_end = pos + get_u32();
{
match_u32_assert(0);
if (match_byte(0x31))
- get_string();
+ s = get_string();
else
match_byte_assert(0x58);
if (pos != inner_end)
fprintf(stderr, "outer end discrepancy\n");
exit(1);
}
+
+ return s;
}
static void
-dump_value_31(void)
+dump_value_31(FILE *stream)
{
if (match_byte (0x31))
{
if (match_u32 (0))
{
if (match_u32 (1))
- get_string();
+ {
+ /* Only "a" observed as a sample value (although it appears 44 times in the corpus). */
+ get_string();
+ }
else
match_u32_assert (0);
int inner_end = pos + get_u32();
match_u32_assert(0);
if (match_byte(0x31))
- get_string();
+ {
+ /* Appears to be a template string, e.g. '^1 cells (^2) expf < 5. Min exp = ^3...'.
+ Probably doesn't actually appear in output because many examples look unpolished,
+ e.g. 'partial list cases value ^1 shown upper...' */
+ get_string();
+ }
else
match_byte_assert(0x58);
if (pos != inner_end)
if (match_byte(0x31))
{
+ /* Only one example in the corpus. */
match_byte(0);
match_byte(0);
match_byte(0);
}
else if (match_u32 (1))
{
- printf("(footnote %d) ", get_u32());
+ fprintf(stream, "(footnote %d) ", get_u32());
dump_nested_string();
}
else if (match_u32 (2))
{
- printf("(special 2)");
+ fprintf(stream, "(special 2)");
match_byte_assert(0);
match_byte_assert(0);
if (!match_u32 (2))
else
{
match_u32_assert(3);
- printf("(special 3)");
+ fprintf(stream, "(special 3)");
match_byte_assert(0);
match_byte_assert(0);
match_byte_assert(1);
match_byte_assert (0x58);
}
-static void
-dump_value__(int level, bool match1)
+static const char *
+format_to_string (int type)
{
- for (int i = 0; i <= level; i++)
- printf (" ");
+ static char tmp[16];
+ switch (type)
+ {
+ case 1: return "A";
+ case 2: return "AHEX";
+ case 3: return "COMMA";
+ case 4: return "DOLLAR";
+ case 5: case 40: return "F";
+ case 6: return "IB";
+ case 7: return "PIBHEX";
+ case 8: return "P";
+ case 9: return "PIB";
+ case 10: return "PK";
+ case 11: return "RB";
+ case 12: return "RBHEX";
+ case 15: return "Z";
+ case 16: return "N";
+ case 17: return "E";
+ case 20: return "DATE";
+ case 21: return "TIME";
+ case 22: return "DATETIME";
+ case 23: return "ADATE";
+ case 24: return "JDATE";
+ case 25: return "DTIME";
+ case 26: return "WKDAY";
+ case 27: return "MONTH";
+ case 28: return "MOYR";
+ case 29: return "QYR";
+ case 30: return "WKYR";
+ case 31: return "PCT";
+ case 32: return "DOT";
+ case 33: return "CCA";
+ case 34: return "CCB";
+ case 35: return "CCC";
+ case 36: return "CCD";
+ case 37: return "CCE";
+ case 38: return "EDATE";
+ case 39: return "SDATE";
+ default:
+ abort();
+ sprintf(tmp, "<%d>", type);
+ return tmp;
+ }
+}
+static void
+dump_value__(FILE *stream, int level, bool match1)
+{
match_byte(0);
match_byte(0);
match_byte(0);
match_byte(0);
+ for (int i = 0; i <= level; i++)
+ fprintf (stream, " ");
+
if (match_byte (3))
{
- char *s1 = get_string();
- dump_value_31();
- char *s2 = get_string();
- char *s3 = get_string();
- if (strcmp(s1, s3))
- printf("strings \"%s\", \"%s\" and \"%s\"", s1, s2, s3);
- else
- printf("string \"%s\" and \"%s\"", s1, s2);
+ char *text = get_string();
+ dump_value_31(stream);
+ char *identifier = get_string();
+ char *text_eng = get_string();
+ fprintf (stream, "<string c=\"%s\"", text_eng);
+ if (identifier[0])
+ fprintf (stream, " identifier=\"%s\"", identifier);
+ if (strcmp(text_eng, text))
+ fprintf (stream, " local=\"%s\"", text);
+ fprintf (stream, "/>\n");
if (!match_byte (0))
match_byte_assert(1);
if (match1)
}
else if (match_byte (5))
{
- dump_value_31();
- printf ("variable \"%s\"", get_string());
- get_string();
+ dump_value_31(stream);
+ char *name = get_string ();
+ char *label = get_string ();
+ fprintf (stream, "<variable name=\"%s\"", name);
+ if (label[0])
+ fprintf (stream, " label=\"%s\"", label);
+ fprintf (stream, "/>\n");
if (!match_byte(1) && !match_byte(2))
match_byte_assert(3);
}
value = get_double ();
var = get_string ();
vallab = get_string ();
- printf ("value %g format %d(%d.%d) var \"%s\" vallab \"%s\"",
- value, format >> 16, (format >> 8) & 0xff, format & 0xff, var, vallab);
+ fprintf (stream, "<numeric-datum value=\"%.*g\" format=\"%s%d.%d\"",
+ DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
+ if (var[0])
+ fprintf (stream, " variable=\"%s\"", var);
+ if (vallab[0])
+ fprintf (stream, " label=\"%s\"/>\n", vallab);
+ fprintf (stream, "/>\n");
if (!match_byte (1) && !match_byte(2))
match_byte_assert (3);
}
if (!match_byte(1) && !match_byte(2))
match_byte_assert (3);
value = get_string ();
- printf ("value \"%s\" format %d(%d.%d) var \"%s\" vallab \"%s\"",
- value, format >> 16, (format >> 8) & 0xff, format & 0xff, var, vallab);
+ fprintf (stream, "<string-datum value=\"%s\" format=\"%s%d.%d\"",
+ value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
+ if (var[0])
+ fprintf (stream, " variable=\"%s\"", var);
+ if (vallab[0])
+ fprintf (stream, " label=\"%s\"/>\n", vallab);
+ fprintf (stream, "/>\n");
}
else if (match_byte (1))
{
unsigned int format;
double value;
- dump_value_31();
+ dump_value_31(stream);
format = get_u32 ();
value = get_double ();
- printf ("value %g format %d(%d.%d)", value, format >> 16, (format >> 8) & 0xff, format & 0xff);
+ fprintf (stream, "<number value=\"%.*g\" format=\"%s%d.%d\"/>\n",
+ DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
if (match1)
match_byte (1);
}
else
{
- dump_value_31();
+ dump_value_31(stream);
char *base = get_string();
int x = get_u32();
- printf ("\"%s\" with %d variables:\n", base, x);
+ fprintf (stream, "<template format=\"%s\">\n", base);
for (int i = 0; i < x; i++)
{
int y = get_u32();
y = 1;
else
match_u32_assert(0);
- for (int j = 0; j <= level; j++)
- printf (" ");
- printf("variable %d has %d values:\n", i, y);
+ for (int j = 0; j <= level + 1; j++)
+ fprintf (stream, " ");
+ fprintf (stream, "<substitution index=\"%d\">\n", i + 1);
for (int j = 0; j < y; j++)
- {
- dump_value__ (level + 1, false);
- putchar('\n');
- }
+ dump_value__ (stream, level + 2, false);
+ for (int j = 0; j <= level + 1; j++)
+ fprintf (stream, " ");
+ fprintf (stream, "</substitution>\n");
}
+ for (int j = 0; j <= level; j++)
+ fprintf (stream, " ");
+ fprintf (stream, "</template>\n");
}
}
static void
dump_category(int level, int *indexes, int *n_indexes)
{
- dump_value__ (level, true);
+ dump_value__ (stdout, level, true);
match_byte(0);
match_byte(0);
match_byte(0);
{
int n_categories;
printf("next dim\n");
- dump_value__ (0, false);
+ dump_value__ (stdout, 0, false);
/* This byte is usually 0x02 but 0x00 and 0x75 (!) have also been spotted. */
pos++;
{
printf("%08x, index %d:\n", pos, get_u32());
match_u32_assert(0);
- dump_value__(0, false);
+ dump_value__(stdout, 0, false);
putchar('\n');
}
}
dump_title(void)
{
pos = 0x27;
- dump_value__(0, true); putchar('\n');
- dump_value__(0, true); putchar('\n');
+ printf("text:\n");
+ dump_value__(stdout, 0, true); putchar('\n');
+ printf("subtype:\n");
+ dump_value__(stdout, 0, true); putchar('\n');
match_byte_assert(0x31);
- dump_value__(0, true); putchar('\n');
+ printf("text_eng:\n");
+ dump_value__(stdout, 0, true); putchar('\n');
match_byte(0);
match_byte_assert(0x58);
if (match_byte(0x31))
{
- dump_value__(0, false); putchar('\n');
+ printf("caption:\n");
+ dump_value__(stdout, 0, false); putchar('\n');
}
else
match_byte_assert(0x58);
for (int i = 0; i < n_footnotes; i++)
{
printf("footnote %d:\n", i);
- dump_value__(0, false);
+ dump_value__(stdout, 0, false);
if (match_byte (0x31))
{
/* Custom footnote marker string. */