}
static void
-dump_style(void)
+dump_style(FILE *stream)
{
match_byte(1);
match_byte(0);
match_byte(0);
match_byte(0);
match_byte_assert(1);
- get_string(); /* foreground */
- get_string(); /* background */
- get_string(); /* font */
+ char *fg = get_string(); /* foreground */
+ char *bg = get_string(); /* background */
+ char *font = get_string(); /* font */
+ int size = data[pos];
if (!match_byte(14))
match_byte_assert(12); /* size? */
+ fprintf(stream, " fgcolor=\"%s\" bgcolor=\"%s\" font=\"%s\" size=\"%d\"",
+ fg, bg, font, size);
}
static char *
-dump_nested_string(void)
+dump_nested_string(FILE *stream)
{
char *s = NULL;
match_byte_assert (0);
int outer_end = get_end();
s = dump_counted_string();
+ if (s)
+ fprintf(stream, " \"%s\"", s);
if (match_byte(0x31))
- dump_style();
+ dump_style(stream);
else
match_byte_assert(0x58);
match_byte_assert(0x58);
}
static void
-dump_optional_value(FILE *stream)
+dump_value_modifier(FILE *stream)
{
if (match_byte (0x31))
{
if (match_u32 (0))
{
+ fprintf(stream, "<special0");
if (match_u32 (1))
{
/* Corpus frequencies:
124 "a"
12 "b"
8 "a, b"
+
+ The given text is appended to the cell in a subscript font.
*/
- get_string();
+ fprintf(stream, " subscript=\"%s\"", get_string());
}
else
match_u32_assert (0);
match_u32_assert(10);
match_byte(0);
match_byte(0);
+ fprintf(stream, "/>\n");
return;
}
/* This counted-string appears to be a template string,
e.g. "Design\: [:^1:]1 Within Subjects Design\: [:^1:]2". */
- dump_counted_string();
+ char *template = dump_counted_string();
+ if (template)
+ fprintf(stream, " template=\"%s\"", template);
if (match_byte(0x31))
- dump_style();
+ dump_style(stream);
else
match_byte_assert(0x58);
if (match_byte(0x31))
fprintf(stderr, "outer end discrepancy\n");
exit(1);
}
+ fprintf(stream, "/>\n");
}
else if (match_u32 (1))
{
- fprintf(stream, "(footnote %d) ", get_u32());
- dump_nested_string();
+ fprintf(stream, "<footnote-ref index=%d", get_u32());
+ dump_nested_string(stream);
+ fprintf(stream, "/>\n");
}
else if (match_u32 (2))
{
- fprintf(stream, "(special 2)");
+ fprintf(stream, "<special2 %d %d", data[pos], data[pos + 2]);
if (!match_byte(0) && !match_byte(1))
match_byte_assert(2);
match_byte_assert(0);
if (!match_u32 (2) && !match_u32(1))
match_u32_assert(3);
- dump_nested_string();
+ dump_nested_string(stream);
+ fprintf(stream, "/>\n");
}
else
{
+ fprintf(stream, "<special3");
match_u32_assert(3);
- fprintf(stream, "(special 3)");
match_byte_assert(0);
match_byte_assert(0);
match_byte_assert(1);
match_byte_assert(0);
match_u32_assert(2);
- dump_nested_string(); /* Our corpus doesn't contain any examples with strings though. */
+ dump_nested_string(stream); /* Our corpus doesn't contain any examples with strings though. */
+ fprintf(stream, "/>\n");
}
}
else
unsigned int format;
double value;
- dump_optional_value(stream);
+ dump_value_modifier(stream);
format = get_u32 ();
value = get_double ();
fprintf (stream, "<number value=\"%.*g\" format=\"%s%d.%d\"/>\n",
char *var, *vallab;
double value;
- dump_optional_value (stream);
+ dump_value_modifier (stream);
format = get_u32 ();
value = get_double ();
var = get_string ();
else if (match_byte (3))
{
char *text = get_string();
- dump_optional_value(stream);
+ dump_value_modifier(stream);
char *identifier = get_string();
char *text_eng = get_string();
fprintf (stream, "<string c=\"%s\"", text_eng);
unsigned int format;
char *var, *vallab, *value;
- dump_optional_value(stream);
+ dump_value_modifier(stream);
format = get_u32 ();
vallab = get_string ();
var = get_string ();
}
else if (match_byte (5))
{
- dump_optional_value(stream);
+ dump_value_modifier(stream);
char *name = get_string ();
char *label = get_string ();
fprintf (stream, "<variable name=\"%s\"", name);
}
else
{
- dump_optional_value(stream);
+ dump_value_modifier(stream);
char *base = get_string();
int x = get_u32();
/* This might be a version number of some kind, because value 1 seems
to only appear in an SPV file that also required its own weird
- special cases in dump_optional_value(). */
+ special cases in dump_value_modifier(). */
version = get_u32();
pos -= 4;
if (!match_u32(1))
-------------
The primary format specifier for substitution 1 is ^1.
-Substitutions with multiple values take the form [:^1:]1. The : are mandatory even though not apparently useful.
+
+Substitutions with multiple values take the form [:^1:]1, where ]1
+references substitution 1 and ^1 refers to the first value within the
+substitution. The : are mandatory even though not apparently useful.
+
+There can be extra text, e.g. [:^1\n:]1.
+
Substitutions with two forms for multiple values are written as [%1:, ^1:]1.
+
+The general form appears to be [a:b:]x. Apparently, % is used in a and
+^ is used in b, but there's no obvious reason why.
+
Substitutions can take two values at a time, like: [:^1 = ^2:]1
but more often there would be two forms: [%1 = %2:, ^1 = ^2:]1.
-
+A literal : is written \:
+Presumably a literal ^ is written \^ but there are no examples.
WM is constant when Age = 19.00. It will be included in any boxplots produced but other output will be omitted.
<template format="^1 is constant when [%1 = %2:, ^1 = ^2:]2. It will be included in any boxplots produced but other output will be omitted.">
@example
value := 00? 00? 00? 00? raw-value
-raw-value := 01 opt-value int32[format] double[x]
- | 02 opt-value int32[format] double[x]
- string[varname] string[vallab] (01 | 02 | 03)
- | 03 string[local] opt-value string[id] string[c] (00 | 01)[type]
- | 04 opt-value int32[format] string[vallab] string[varname]
- (01 | 02 | 03) string[vallab]
- | 05 opt-value string[varname] string[varlabel] (01 | 02 | 03)
- | opt-value string[format] int32[n-substs] substitution*[n-substs]
-substitution := i0 value
- | int32[x] value*[x + 1] /* @r{x > 0} */
-opt-value := 31 i0 (i0 | i1 string) opt-value-i0-v1 /* @r{version 1} */
- | 31 i0 (i0 | i1 string) opt-value-i0-v3 /* @r{version 3} */
- | 31 i1 int32[footnote-number] nested-string
- | 31 i2 (00 | 01 | 02) 00 (i1 | i2 | i3) nested-string
- | 31 i3 00 00 01 00 i2 nested-string
- | 58
-opt-value-i0-v1 := 00 (i1 | i2) 00 00 int32 00 00
-opt-value-i0-v3 := count(counted-string
+raw-value :=
+ 01 value-mod int32[format] double[x]
+ | 02 value-mod int32[format] double[x]
+ string[varname] string[vallab] (01 | 02 | 03)
+ | 03 string[local] value-mod string[id] string[c] (00 | 01)[type]
+ | 04 value-mod int32[format] string[vallab] string[varname]
+ (01 | 02 | 03) string[s]
+ | 05 value-mod string[varname] string[varlabel] (01 | 02 | 03)
+ | value-mod string[format] int32[n-substs] substitution*[n-substs]
+substitution :=
+ i0 value
+ | int32[x] value*[x + 1] /* @r{x > 0} */
+value-mod :=
+ 31 i0 (i0 | i1 string[subscript]) value-mod-i0-v1 /* @r{version 1} */
+ | 31 i0 (i0 | i1 string[subscript]) value-mod-i0-v3 /* @r{version 3} */
+ | 31 i1 int32[footnote-number] template
+ | 31 i2 (00 | 01 | 02) 00 (i1 | i2 | i3) template
+ | 31 i3 00 00 01 00 i2 template
+ | 58
+value-mod-i0-v1 := 00 (i1 | i2) 00 00 int32 00 00
+value-mod-i0-v3 := count(template-string
(58 | 31 style)
(58
| 31 i0 i0 i0 i0 01 00 (01 | 02 | 08)
00 08 00 0a 00))
style := 01? 00? 00? 00? 01 string[fgcolor] string[bgcolor] string[font] byte
-nested-string := 00 00 count(counted-string (58 | 31 style) 58)
-counted-string := count((i0 (58 | 31 string))?)
+template := 00 00 count(template-string (58 | 31 style) 58)
+template-string := count((i0 (58 | 31 string))?)
@end example
A @code{value} boils down to a number or a string. There are several
whether the value or the label should be displayed.
@item 03
-A text string that originates from the software program (rather than
-from user data). The string is provided in two forms: @code{c} is in
-English and @code{local} is localized to the user's language
-environment. In an English-language locale, the two strings are often
-the same, and in cases where they differ @code{c} is often abbreviated
-or obscure and @code{local} is more appropriate for a user interface,
-e.g.@: @code{c} of ``Not a PxP table for MCN...'' versus @code{local}
-of ``Computed only for a PxP table, where P must be greater than 1.''
+A text string, in two forms: @code{c} is in English, and sometimes
+abbreviated or obscure, and @code{local} is localized to the user's
+locale. In an English-language locale, the two strings are often the
+same, and in the cases where they differ, @code{local} is more
+appropriate for a user interface, e.g.@: @code{c} of ``Not a PxP table
+for MCN...'' versus @code{local} of ``Computed only for a PxP table,
+where P must be greater than 1.''
+
+@code{c} and @code{local} are always either both empty or both
+nonempty.
@code{id} is a brief identifying string whose form seems to resemble a
programming language identifier, e.g.@: @code{cumulative_percent} or
@code{type} is 00 for text taken from user input, such as syntax
fragment, expressions, file names, data set names, and 01 for fixed
-text strings such as names of procedures or statistics.
+text strings such as names of procedures or statistics. In the former
+case, @code{id} is always the empty string; in the latter case,
+@code{id} is still sometimes empty.
@item 04
+The string value @code{s}, presented to the user formatted according
+to @code{format}. The format for a string is not too interesting, and
+clearly invalid formats like A16.39 or A255.127 or A134.1 abound in
+the corpus, so readers should probably ignore the format entirely.
+
+@code{s} is a value of variable @code{varname} and has value label
+@code{vallab}. @code{varname} is never empty but @code{vallab} is
+commonly empty.
+
+The meaning of the final byte is unknown.
+
+@item 05
+Variable @code{varname}, which is rarely observed as empty in the
+corpus, with variable label @code{varlabel}, which is often empty.
+
+The meaning of the final byte is unknown.
+@end itemize