#define get_string() get_string(WHERE)
static char *
-dump_nested_string(void)
+dump_counted_string(void)
{
char *s = NULL;
-
- match_byte_assert (0);
- match_byte_assert (0);
- int outer_end = pos + get_u32();
int inner_end = pos + get_u32();
if (pos != inner_end)
{
exit(1);
}
}
+ return s;
+}
+
+static char *
+dump_nested_string(void)
+{
+ char *s = NULL;
+
+ match_byte_assert (0);
+ match_byte_assert (0);
+ int outer_end = pos + get_u32();
+ s = dump_counted_string();
match_byte_assert(0x58);
match_byte_assert(0x58);
if (pos != outer_end)
{
if (match_u32 (1))
{
- /* Only "a" observed as a sample value (although it appears 44 times in the corpus). */
+ /* Corpus frequencies:
+ 124 "a"
+ 12 "b"
+ 8 "a, b"
+ */
get_string();
}
else
}
int outer_end = pos + get_u32();
- int inner_end = pos + get_u32();
- if (pos != inner_end)
- {
- match_u32_assert(0);
- if (match_byte(0x31))
- {
- /* Appears to be a template string, e.g. '^1 cells (^2) expf < 5. Min exp = ^3...'.
- Probably doesn't actually appear in output because many examples look unpolished,
- e.g. 'partial list cases value ^1 shown upper...' */
- get_string();
- }
- else
- match_byte_assert(0x58);
- if (pos != inner_end)
- {
- fprintf(stderr, "inner end discrepancy\n");
- exit(1);
- }
- }
+
+ /* This counted-string appears to be a template string,
+ e.g. "Design\: [:^1:]1 Within Subjects Design\: [:^1:]2". */
+ dump_counted_string();
if (match_byte(0x31))
{
match_byte_assert(0);
if (!match_u32 (2) && !match_u32(1))
match_u32_assert(3);
- dump_nested_string(); /* Our corpus doesn't contain any examples with strings though. */
+ dump_nested_string();
}
else
{
for (int i = 0; i <= level; i++)
fprintf (stream, " ");
- if (match_byte (3))
- {
- char *text = get_string();
- dump_optional_value(stream);
- char *identifier = get_string();
- char *text_eng = get_string();
- fprintf (stream, "<string c=\"%s\"", text_eng);
- if (identifier[0])
- fprintf (stream, " identifier=\"%s\"", identifier);
- if (strcmp(text_eng, text))
- fprintf (stream, " local=\"%s\"", text);
- fprintf (stream, "/>\n");
- if (!match_byte (0))
- match_byte_assert(1);
- }
- else if (match_byte (5))
+ if (match_byte (1))
{
+ unsigned int format;
+ double value;
+
dump_optional_value(stream);
- char *name = get_string ();
- char *label = get_string ();
- fprintf (stream, "<variable name=\"%s\"", name);
- if (label[0])
- fprintf (stream, " label=\"%s\"", label);
- fprintf (stream, "/>\n");
- if (!match_byte(1) && !match_byte(2))
- match_byte_assert(3);
+ format = get_u32 ();
+ value = get_double ();
+ fprintf (stream, "<number value=\"%.*g\" format=\"%s%d.%d\"/>\n",
+ DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
}
else if (match_byte (2))
{
if (!match_byte (1) && !match_byte(2))
match_byte_assert (3);
}
+ else if (match_byte (3))
+ {
+ char *text = get_string();
+ dump_optional_value(stream);
+ char *identifier = get_string();
+ char *text_eng = get_string();
+ fprintf (stream, "<string c=\"%s\"", text_eng);
+ if (identifier[0])
+ fprintf (stream, " identifier=\"%s\"", identifier);
+ if (strcmp(text_eng, text))
+ fprintf (stream, " local=\"%s\"", text);
+ fprintf (stream, "/>\n");
+ if (!match_byte (0))
+ match_byte_assert(1);
+ }
else if (match_byte (4))
{
unsigned int format;
char *var, *vallab, *value;
- match_byte_assert (0x58);
+ dump_optional_value(stream);
format = get_u32 ();
vallab = get_string ();
var = get_string ();
fprintf (stream, " label=\"%s\"/>\n", vallab);
fprintf (stream, "/>\n");
}
- else if (match_byte (1))
+ else if (match_byte (5))
{
- unsigned int format;
- double value;
-
dump_optional_value(stream);
- format = get_u32 ();
- value = get_double ();
- fprintf (stream, "<number value=\"%.*g\" format=\"%s%d.%d\"/>\n",
- DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
+ char *name = get_string ();
+ char *label = get_string ();
+ fprintf (stream, "<variable name=\"%s\"", name);
+ if (label[0])
+ fprintf (stream, " label=\"%s\"", label);
+ fprintf (stream, "/>\n");
+ if (!match_byte(1) && !match_byte(2))
+ match_byte_assert(3);
}
else
{
follow it are @code{00 00 00 01 00 00 00 00 00 00 00 00 00 02 00 00 00
00}. The meaning of these bytes is unknown.
-Observed values of @code{x3} vary from 16 to 150. The bytes that
-follow it vary somewhat.
+In every example in the corpus for version 1, @code{x3} is 16 and the
+bytes that follow it are @code{00 00 00 01 00 00 00 01 00 00 00 00 01
+01 01 01}. In version 3, observed @code{x3} varies from 117 to 150 and
+the bytes that follow it vary somewhat and often include a readable
+text string, e.g. ``Default'' or ``Academic'', which appears to be the
+name of a ``TableLook''.
Observed values of @code{x4} vary from 0 to 17. Out of 7060 examples
in the corpus, it is nonzero only 36 times.
dimensions are given first.
@example
-datum := int64[index] 00? value @r{# Version 1.}
-datum := int64[index] value @r{# Version 3.}
+datum := int64[index] 00? value /* @r{version 1} */
+datum := int64[index] value /* @r{version 3} */
@end example
+The format of a datum varies slightly from version 1 to version 3: in
+version 1 it allows for an extra optional 00 byte.
+
A datum consists of an index and a value. Suppose there are @math{d}
dimensions and dimension @math{i} for @math{0 \le i < d} has
@math{n_i} categories. Consider the datum at coordinates @math{x_i}
categories, respectively. The datum at coordinates (1, 2, 3) has
index @math{5 \times (4 \times (3 \times 0 + 1) + 2) + 3 = 33}.
-The format of a datum varies slightly from version 1 to version 3, in
-that version 1 has an extra optional 00 byte.
+@example
+value := 00? 00? 00? 00? raw-value
+raw-value := 01 opt-value int32[format] double
+ | 02 opt-value int32[format] double string[varname] string[vallab]
+ (01 | 02 | 03)
+ | 03 string[local] opt-value string[id] string[c] (00 | 01)
+ | 04 opt-value int32[format] string[vallab] string[varname]
+ (01 | 02 | 03) string[vallab]
+ | 05 opt-value string[varname] string[varlabel] (01 | 02 | 03)
+ | opt-value string[format] int32[n-substs] substitution*[n-substs]
+substitution := i0 value
+ | int32[x] value*[x + 1] /* @r{x > 0} */
+opt-value := 31 i0 (i0 | i1 string) opt-value-i0-v1 /* @r{version 1} */
+ | 31 i0 (i0 | i1 string) opt-value-i0-v3 /* @r{version 3} */
+ | 31 i1 int32[footnote-number] nested-string
+ | 31 i2 (00 | 02) 00 (i1 | i2 | i3) nested-string
+ | 31 i3 00 00 01 00 i2 nested-string
+ | 58
+opt-value-i0-v1 := 00 (i1 | i2) 00 00 int32 00 00
+opt-value-i0-v3 := count(counted-string
+ (58
+ | 31 01? 00? 00? 00? 01
+ string[fgcolor] string[bgcolor] string[typeface]
+ byte)
+ (58
+ | 31 i0 i0 i0 i0 01 00 (01 | 02 | 08)
+ 00 08 00 0a 00))
+
+nested-string := 00 00 count(counted-string 58 58)
+counted-string := count((i0 (58 | 31 string))?)
+@end example