From: Ben Pfaff Date: Tue, 5 Jan 2016 07:49:48 +0000 (-0800) Subject: Start work on documenting values. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=38a126b38f4963ba033120754e7a4f026e9fa01e Start work on documenting values. --- diff --git a/dump.c b/dump.c index 0445597e63..c247c4af66 100644 --- a/dump.c +++ b/dump.c @@ -219,13 +219,9 @@ get_string(const char *where) #define get_string() get_string(WHERE) static char * -dump_nested_string(void) +dump_counted_string(void) { char *s = NULL; - - match_byte_assert (0); - match_byte_assert (0); - int outer_end = pos + get_u32(); int inner_end = pos + get_u32(); if (pos != inner_end) { @@ -240,6 +236,18 @@ dump_nested_string(void) exit(1); } } + return s; +} + +static char * +dump_nested_string(void) +{ + char *s = NULL; + + match_byte_assert (0); + match_byte_assert (0); + int outer_end = pos + get_u32(); + s = dump_counted_string(); match_byte_assert(0x58); match_byte_assert(0x58); if (pos != outer_end) @@ -260,7 +268,11 @@ dump_optional_value(FILE *stream) { if (match_u32 (1)) { - /* Only "a" observed as a sample value (although it appears 44 times in the corpus). */ + /* Corpus frequencies: + 124 "a" + 12 "b" + 8 "a, b" + */ get_string(); } else @@ -283,25 +295,10 @@ dump_optional_value(FILE *stream) } int outer_end = pos + get_u32(); - int inner_end = pos + get_u32(); - if (pos != inner_end) - { - match_u32_assert(0); - if (match_byte(0x31)) - { - /* Appears to be a template string, e.g. '^1 cells (^2) expf < 5. Min exp = ^3...'. - Probably doesn't actually appear in output because many examples look unpolished, - e.g. 'partial list cases value ^1 shown upper...' */ - get_string(); - } - else - match_byte_assert(0x58); - if (pos != inner_end) - { - fprintf(stderr, "inner end discrepancy\n"); - exit(1); - } - } + + /* This counted-string appears to be a template string, + e.g. "Design\: [:^1:]1 Within Subjects Design\: [:^1:]2". */ + dump_counted_string(); if (match_byte(0x31)) { @@ -358,7 +355,7 @@ dump_optional_value(FILE *stream) match_byte_assert(0); if (!match_u32 (2) && !match_u32(1)) match_u32_assert(3); - dump_nested_string(); /* Our corpus doesn't contain any examples with strings though. */ + dump_nested_string(); } else { @@ -435,32 +432,16 @@ dump_value(FILE *stream, int level) for (int i = 0; i <= level; i++) fprintf (stream, " "); - if (match_byte (3)) - { - char *text = get_string(); - dump_optional_value(stream); - char *identifier = get_string(); - char *text_eng = get_string(); - fprintf (stream, "\n"); - if (!match_byte (0)) - match_byte_assert(1); - } - else if (match_byte (5)) + if (match_byte (1)) { + unsigned int format; + double value; + dump_optional_value(stream); - char *name = get_string (); - char *label = get_string (); - fprintf (stream, "\n"); - if (!match_byte(1) && !match_byte(2)) - match_byte_assert(3); + format = get_u32 (); + value = get_double (); + fprintf (stream, "\n", + DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff); } else if (match_byte (2)) { @@ -483,12 +464,27 @@ dump_value(FILE *stream, int level) if (!match_byte (1) && !match_byte(2)) match_byte_assert (3); } + else if (match_byte (3)) + { + char *text = get_string(); + dump_optional_value(stream); + char *identifier = get_string(); + char *text_eng = get_string(); + fprintf (stream, "\n"); + if (!match_byte (0)) + match_byte_assert(1); + } else if (match_byte (4)) { unsigned int format; char *var, *vallab, *value; - match_byte_assert (0x58); + dump_optional_value(stream); format = get_u32 (); vallab = get_string (); var = get_string (); @@ -503,16 +499,17 @@ dump_value(FILE *stream, int level) fprintf (stream, " label=\"%s\"/>\n", vallab); fprintf (stream, "/>\n"); } - else if (match_byte (1)) + else if (match_byte (5)) { - unsigned int format; - double value; - dump_optional_value(stream); - format = get_u32 (); - value = get_double (); - fprintf (stream, "\n", - DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff); + char *name = get_string (); + char *label = get_string (); + fprintf (stream, "\n"); + if (!match_byte(1) && !match_byte(2)) + match_byte_assert(3); } else { diff --git a/spv-file-format.texi b/spv-file-format.texi index a5f45e5b49..f9d698706e 100644 --- a/spv-file-format.texi +++ b/spv-file-format.texi @@ -411,8 +411,12 @@ In every example in the corpus, @code{x2} is 18 and the bytes that follow it are @code{00 00 00 01 00 00 00 00 00 00 00 00 00 02 00 00 00 00}. The meaning of these bytes is unknown. -Observed values of @code{x3} vary from 16 to 150. The bytes that -follow it vary somewhat. +In every example in the corpus for version 1, @code{x3} is 16 and the +bytes that follow it are @code{00 00 00 01 00 00 00 01 00 00 00 00 01 +01 01 01}. In version 3, observed @code{x3} varies from 117 to 150 and +the bytes that follow it vary somewhat and often include a readable +text string, e.g. ``Default'' or ``Academic'', which appears to be the +name of a ``TableLook''. Observed values of @code{x4} vary from 0 to 17. Out of 7060 examples in the corpus, it is nonzero only 36 times. @@ -574,10 +578,13 @@ When there is more than one dimension of a given kind, the inner dimensions are given first. @example -datum := int64[index] 00? value @r{# Version 1.} -datum := int64[index] value @r{# Version 3.} +datum := int64[index] 00? value /* @r{version 1} */ +datum := int64[index] value /* @r{version 3} */ @end example +The format of a datum varies slightly from version 1 to version 3: in +version 1 it allows for an extra optional 00 byte. + A datum consists of an index and a value. Suppose there are @math{d} dimensions and dimension @math{i} for @math{0 \le i < d} has @math{n_i} categories. Consider the datum at coordinates @math{x_i} @@ -594,5 +601,34 @@ For example, suppose there are 3 dimensions with 3, 4, and 5 categories, respectively. The datum at coordinates (1, 2, 3) has index @math{5 \times (4 \times (3 \times 0 + 1) + 2) + 3 = 33}. -The format of a datum varies slightly from version 1 to version 3, in -that version 1 has an extra optional 00 byte. +@example +value := 00? 00? 00? 00? raw-value +raw-value := 01 opt-value int32[format] double + | 02 opt-value int32[format] double string[varname] string[vallab] + (01 | 02 | 03) + | 03 string[local] opt-value string[id] string[c] (00 | 01) + | 04 opt-value int32[format] string[vallab] string[varname] + (01 | 02 | 03) string[vallab] + | 05 opt-value string[varname] string[varlabel] (01 | 02 | 03) + | opt-value string[format] int32[n-substs] substitution*[n-substs] +substitution := i0 value + | int32[x] value*[x + 1] /* @r{x > 0} */ +opt-value := 31 i0 (i0 | i1 string) opt-value-i0-v1 /* @r{version 1} */ + | 31 i0 (i0 | i1 string) opt-value-i0-v3 /* @r{version 3} */ + | 31 i1 int32[footnote-number] nested-string + | 31 i2 (00 | 02) 00 (i1 | i2 | i3) nested-string + | 31 i3 00 00 01 00 i2 nested-string + | 58 +opt-value-i0-v1 := 00 (i1 | i2) 00 00 int32 00 00 +opt-value-i0-v3 := count(counted-string + (58 + | 31 01? 00? 00? 00? 01 + string[fgcolor] string[bgcolor] string[typeface] + byte) + (58 + | 31 i0 i0 i0 i0 01 00 (01 | 02 | 08) + 00 08 00 0a 00)) + +nested-string := 00 00 count(counted-string 58 58) +counted-string := count((i0 (58 | 31 string))?) +@end example