From: Ben Pfaff Date: Tue, 11 Nov 2014 06:23:46 +0000 (-0800) Subject: dump: Work on dumping to an XML-like format. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=016b5f18d03c3eafe8e0b14f0db2c9e23d6bfc03;p=pspp dump: Work on dumping to an XML-like format. --- diff --git a/dump.c b/dump.c index ff06d66747..e1446edeb8 100644 --- a/dump.c +++ b/dump.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -207,9 +208,11 @@ get_string(const char *where) } #define get_string() get_string(WHERE) -static void +static char * dump_nested_string(void) { + char *s = NULL; + match_byte_assert (0); match_byte_assert (0); int outer_end = pos + get_u32(); @@ -218,7 +221,7 @@ dump_nested_string(void) { match_u32_assert(0); if (match_byte(0x31)) - get_string(); + s = get_string(); else match_byte_assert(0x58); if (pos != inner_end) @@ -234,17 +237,22 @@ dump_nested_string(void) fprintf(stderr, "outer end discrepancy\n"); exit(1); } + + return s; } static void -dump_value_31(void) +dump_value_31(FILE *stream) { if (match_byte (0x31)) { if (match_u32 (0)) { if (match_u32 (1)) - get_string(); + { + /* Only "a" observed as a sample value (although it appears 44 times in the corpus). */ + get_string(); + } else match_u32_assert (0); @@ -252,7 +260,12 @@ dump_value_31(void) int inner_end = pos + get_u32(); match_u32_assert(0); if (match_byte(0x31)) - get_string(); + { + /* Appears to be a template string, e.g. '^1 cells (^2) expf < 5. Min exp = ^3...'. + Probably doesn't actually appear in output because many examples look unpolished, + e.g. 'partial list cases value ^1 shown upper...' */ + get_string(); + } else match_byte_assert(0x58); if (pos != inner_end) @@ -263,6 +276,7 @@ dump_value_31(void) if (match_byte(0x31)) { + /* Only one example in the corpus. */ match_byte(0); match_byte(0); match_byte(0); @@ -283,12 +297,12 @@ dump_value_31(void) } else if (match_u32 (1)) { - printf("(footnote %d) ", get_u32()); + fprintf(stream, "(footnote %d) ", get_u32()); dump_nested_string(); } else if (match_u32 (2)) { - printf("(special 2)"); + fprintf(stream, "(special 2)"); match_byte_assert(0); match_byte_assert(0); if (!match_u32 (2)) @@ -298,7 +312,7 @@ dump_value_31(void) else { match_u32_assert(3); - printf("(special 3)"); + fprintf(stream, "(special 3)"); match_byte_assert(0); match_byte_assert(0); match_byte_assert(1); @@ -311,27 +325,77 @@ dump_value_31(void) match_byte_assert (0x58); } -static void -dump_value__(int level, bool match1) +static const char * +format_to_string (int type) { - for (int i = 0; i <= level; i++) - printf (" "); + static char tmp[16]; + switch (type) + { + case 1: return "A"; + case 2: return "AHEX"; + case 3: return "COMMA"; + case 4: return "DOLLAR"; + case 5: case 40: return "F"; + case 6: return "IB"; + case 7: return "PIBHEX"; + case 8: return "P"; + case 9: return "PIB"; + case 10: return "PK"; + case 11: return "RB"; + case 12: return "RBHEX"; + case 15: return "Z"; + case 16: return "N"; + case 17: return "E"; + case 20: return "DATE"; + case 21: return "TIME"; + case 22: return "DATETIME"; + case 23: return "ADATE"; + case 24: return "JDATE"; + case 25: return "DTIME"; + case 26: return "WKDAY"; + case 27: return "MONTH"; + case 28: return "MOYR"; + case 29: return "QYR"; + case 30: return "WKYR"; + case 31: return "PCT"; + case 32: return "DOT"; + case 33: return "CCA"; + case 34: return "CCB"; + case 35: return "CCC"; + case 36: return "CCD"; + case 37: return "CCE"; + case 38: return "EDATE"; + case 39: return "SDATE"; + default: + abort(); + sprintf(tmp, "<%d>", type); + return tmp; + } +} +static void +dump_value__(FILE *stream, int level, bool match1) +{ match_byte(0); match_byte(0); match_byte(0); match_byte(0); + for (int i = 0; i <= level; i++) + fprintf (stream, " "); + if (match_byte (3)) { - char *s1 = get_string(); - dump_value_31(); - char *s2 = get_string(); - char *s3 = get_string(); - if (strcmp(s1, s3)) - printf("strings \"%s\", \"%s\" and \"%s\"", s1, s2, s3); - else - printf("string \"%s\" and \"%s\"", s1, s2); + char *text = get_string(); + dump_value_31(stream); + char *identifier = get_string(); + char *text_eng = get_string(); + fprintf (stream, "\n"); if (!match_byte (0)) match_byte_assert(1); if (match1) @@ -339,9 +403,13 @@ dump_value__(int level, bool match1) } else if (match_byte (5)) { - dump_value_31(); - printf ("variable \"%s\"", get_string()); - get_string(); + dump_value_31(stream); + char *name = get_string (); + char *label = get_string (); + fprintf (stream, "\n"); if (!match_byte(1) && !match_byte(2)) match_byte_assert(3); } @@ -356,8 +424,13 @@ dump_value__(int level, bool match1) value = get_double (); var = get_string (); vallab = get_string (); - printf ("value %g format %d(%d.%d) var \"%s\" vallab \"%s\"", - value, format >> 16, (format >> 8) & 0xff, format & 0xff, var, vallab); + fprintf (stream, "> 16), (format >> 8) & 0xff, format & 0xff); + if (var[0]) + fprintf (stream, " variable=\"%s\"", var); + if (vallab[0]) + fprintf (stream, " label=\"%s\"/>\n", vallab); + fprintf (stream, "/>\n"); if (!match_byte (1) && !match_byte(2)) match_byte_assert (3); } @@ -373,28 +446,34 @@ dump_value__(int level, bool match1) if (!match_byte(1) && !match_byte(2)) match_byte_assert (3); value = get_string (); - printf ("value \"%s\" format %d(%d.%d) var \"%s\" vallab \"%s\"", - value, format >> 16, (format >> 8) & 0xff, format & 0xff, var, vallab); + fprintf (stream, "> 16), (format >> 8) & 0xff, format & 0xff); + if (var[0]) + fprintf (stream, " variable=\"%s\"", var); + if (vallab[0]) + fprintf (stream, " label=\"%s\"/>\n", vallab); + fprintf (stream, "/>\n"); } else if (match_byte (1)) { unsigned int format; double value; - dump_value_31(); + dump_value_31(stream); format = get_u32 (); value = get_double (); - printf ("value %g format %d(%d.%d)", value, format >> 16, (format >> 8) & 0xff, format & 0xff); + fprintf (stream, "\n", + DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff); if (match1) match_byte (1); } else { - dump_value_31(); + dump_value_31(stream); char *base = get_string(); int x = get_u32(); - printf ("\"%s\" with %d variables:\n", base, x); + fprintf (stream, "\n"); } } @@ -442,7 +524,7 @@ check_permutation(int *a, int n, const char *name) static void dump_category(int level, int *indexes, int *n_indexes) { - dump_value__ (level, true); + dump_value__ (stdout, level, true); match_byte(0); match_byte(0); match_byte(0); @@ -484,7 +566,7 @@ dump_dim(void) { int n_categories; printf("next dim\n"); - dump_value__ (0, false); + dump_value__ (stdout, 0, false); /* This byte is usually 0x02 but 0x00 and 0x75 (!) have also been spotted. */ pos++; @@ -543,7 +625,7 @@ dump_data(void) { printf("%08x, index %d:\n", pos, get_u32()); match_u32_assert(0); - dump_value__(0, false); + dump_value__(stdout, 0, false); putchar('\n'); } } @@ -552,15 +634,19 @@ static void dump_title(void) { pos = 0x27; - dump_value__(0, true); putchar('\n'); - dump_value__(0, true); putchar('\n'); + printf("text:\n"); + dump_value__(stdout, 0, true); putchar('\n'); + printf("subtype:\n"); + dump_value__(stdout, 0, true); putchar('\n'); match_byte_assert(0x31); - dump_value__(0, true); putchar('\n'); + printf("text_eng:\n"); + dump_value__(stdout, 0, true); putchar('\n'); match_byte(0); match_byte_assert(0x58); if (match_byte(0x31)) { - dump_value__(0, false); putchar('\n'); + printf("caption:\n"); + dump_value__(stdout, 0, false); putchar('\n'); } else match_byte_assert(0x58); @@ -579,7 +665,7 @@ dump_title(void) for (int i = 0; i < n_footnotes; i++) { printf("footnote %d:\n", i); - dump_value__(0, false); + dump_value__(stdout, 0, false); if (match_byte (0x31)) { /* Custom footnote marker string. */