From: Ben Pfaff Date: Sun, 10 Jan 2016 00:49:54 +0000 (-0800) Subject: Figured out more details, documented most values, work on substitutions. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=4d4f766d5ff71793b51f22c5fca3be7775de9991 Figured out more details, documented most values, work on substitutions. --- diff --git a/dump.c b/dump.c index b383f43590..a52cafddf0 100644 --- a/dump.c +++ b/dump.c @@ -265,22 +265,25 @@ hex_dump(int ofs, int n) } static void -dump_style(void) +dump_style(FILE *stream) { match_byte(1); match_byte(0); match_byte(0); match_byte(0); match_byte_assert(1); - get_string(); /* foreground */ - get_string(); /* background */ - get_string(); /* font */ + char *fg = get_string(); /* foreground */ + char *bg = get_string(); /* background */ + char *font = get_string(); /* font */ + int size = data[pos]; if (!match_byte(14)) match_byte_assert(12); /* size? */ + fprintf(stream, " fgcolor=\"%s\" bgcolor=\"%s\" font=\"%s\" size=\"%d\"", + fg, bg, font, size); } static char * -dump_nested_string(void) +dump_nested_string(FILE *stream) { char *s = NULL; @@ -288,8 +291,10 @@ dump_nested_string(void) match_byte_assert (0); int outer_end = get_end(); s = dump_counted_string(); + if (s) + fprintf(stream, " \"%s\"", s); if (match_byte(0x31)) - dump_style(); + dump_style(stream); else match_byte_assert(0x58); match_byte_assert(0x58); @@ -303,20 +308,23 @@ dump_nested_string(void) } static void -dump_optional_value(FILE *stream) +dump_value_modifier(FILE *stream) { if (match_byte (0x31)) { if (match_u32 (0)) { + fprintf(stream, "\n"); return; } @@ -341,10 +350,12 @@ dump_optional_value(FILE *stream) /* This counted-string appears to be a template string, e.g. "Design\: [:^1:]1 Within Subjects Design\: [:^1:]2". */ - dump_counted_string(); + char *template = dump_counted_string(); + if (template) + fprintf(stream, " template=\"%s\"", template); if (match_byte(0x31)) - dump_style(); + dump_style(stream); else match_byte_assert(0x58); if (match_byte(0x31)) @@ -372,32 +383,36 @@ dump_optional_value(FILE *stream) fprintf(stderr, "outer end discrepancy\n"); exit(1); } + fprintf(stream, "/>\n"); } else if (match_u32 (1)) { - fprintf(stream, "(footnote %d) ", get_u32()); - dump_nested_string(); + fprintf(stream, "\n"); } else if (match_u32 (2)) { - fprintf(stream, "(special 2)"); + fprintf(stream, "\n"); } else { + fprintf(stream, "\n"); } } else @@ -468,7 +483,7 @@ dump_value(FILE *stream, int level) unsigned int format; double value; - dump_optional_value(stream); + dump_value_modifier(stream); format = get_u32 (); value = get_double (); fprintf (stream, "\n", @@ -480,7 +495,7 @@ dump_value(FILE *stream, int level) char *var, *vallab; double value; - dump_optional_value (stream); + dump_value_modifier (stream); format = get_u32 (); value = get_double (); var = get_string (); @@ -498,7 +513,7 @@ dump_value(FILE *stream, int level) else if (match_byte (3)) { char *text = get_string(); - dump_optional_value(stream); + dump_value_modifier(stream); char *identifier = get_string(); char *text_eng = get_string(); fprintf (stream, " diff --git a/spv-file-format.texi b/spv-file-format.texi index 82367ddef4..cb2cae75b4 100644 --- a/spv-file-format.texi +++ b/spv-file-format.texi @@ -621,32 +621,35 @@ index @math{5 \times (4 \times (3 \times 0 + 1) + 2) + 3 = 33}. @example value := 00? 00? 00? 00? raw-value -raw-value := 01 opt-value int32[format] double[x] - | 02 opt-value int32[format] double[x] - string[varname] string[vallab] (01 | 02 | 03) - | 03 string[local] opt-value string[id] string[c] (00 | 01)[type] - | 04 opt-value int32[format] string[vallab] string[varname] - (01 | 02 | 03) string[vallab] - | 05 opt-value string[varname] string[varlabel] (01 | 02 | 03) - | opt-value string[format] int32[n-substs] substitution*[n-substs] -substitution := i0 value - | int32[x] value*[x + 1] /* @r{x > 0} */ -opt-value := 31 i0 (i0 | i1 string) opt-value-i0-v1 /* @r{version 1} */ - | 31 i0 (i0 | i1 string) opt-value-i0-v3 /* @r{version 3} */ - | 31 i1 int32[footnote-number] nested-string - | 31 i2 (00 | 01 | 02) 00 (i1 | i2 | i3) nested-string - | 31 i3 00 00 01 00 i2 nested-string - | 58 -opt-value-i0-v1 := 00 (i1 | i2) 00 00 int32 00 00 -opt-value-i0-v3 := count(counted-string +raw-value := + 01 value-mod int32[format] double[x] + | 02 value-mod int32[format] double[x] + string[varname] string[vallab] (01 | 02 | 03) + | 03 string[local] value-mod string[id] string[c] (00 | 01)[type] + | 04 value-mod int32[format] string[vallab] string[varname] + (01 | 02 | 03) string[s] + | 05 value-mod string[varname] string[varlabel] (01 | 02 | 03) + | value-mod string[format] int32[n-substs] substitution*[n-substs] +substitution := + i0 value + | int32[x] value*[x + 1] /* @r{x > 0} */ +value-mod := + 31 i0 (i0 | i1 string[subscript]) value-mod-i0-v1 /* @r{version 1} */ + | 31 i0 (i0 | i1 string[subscript]) value-mod-i0-v3 /* @r{version 3} */ + | 31 i1 int32[footnote-number] template + | 31 i2 (00 | 01 | 02) 00 (i1 | i2 | i3) template + | 31 i3 00 00 01 00 i2 template + | 58 +value-mod-i0-v1 := 00 (i1 | i2) 00 00 int32 00 00 +value-mod-i0-v3 := count(template-string (58 | 31 style) (58 | 31 i0 i0 i0 i0 01 00 (01 | 02 | 08) 00 08 00 0a 00)) style := 01? 00? 00? 00? 01 string[fgcolor] string[bgcolor] string[font] byte -nested-string := 00 00 count(counted-string (58 | 31 style) 58) -counted-string := count((i0 (58 | 31 string))?) +template := 00 00 count(template-string (58 | 31 style) 58) +template-string := count((i0 (58 | 31 string))?) @end example A @code{value} boils down to a number or a string. There are several @@ -675,14 +678,16 @@ The meaning of the final byte is unknown. Possibly it is connected to whether the value or the label should be displayed. @item 03 -A text string that originates from the software program (rather than -from user data). The string is provided in two forms: @code{c} is in -English and @code{local} is localized to the user's language -environment. In an English-language locale, the two strings are often -the same, and in cases where they differ @code{c} is often abbreviated -or obscure and @code{local} is more appropriate for a user interface, -e.g.@: @code{c} of ``Not a PxP table for MCN...'' versus @code{local} -of ``Computed only for a PxP table, where P must be greater than 1.'' +A text string, in two forms: @code{c} is in English, and sometimes +abbreviated or obscure, and @code{local} is localized to the user's +locale. In an English-language locale, the two strings are often the +same, and in the cases where they differ, @code{local} is more +appropriate for a user interface, e.g.@: @code{c} of ``Not a PxP table +for MCN...'' versus @code{local} of ``Computed only for a PxP table, +where P must be greater than 1.'' + +@code{c} and @code{local} are always either both empty or both +nonempty. @code{id} is a brief identifying string whose form seems to resemble a programming language identifier, e.g.@: @code{cumulative_percent} or @@ -690,6 +695,25 @@ programming language identifier, e.g.@: @code{cumulative_percent} or @code{type} is 00 for text taken from user input, such as syntax fragment, expressions, file names, data set names, and 01 for fixed -text strings such as names of procedures or statistics. +text strings such as names of procedures or statistics. In the former +case, @code{id} is always the empty string; in the latter case, +@code{id} is still sometimes empty. @item 04 +The string value @code{s}, presented to the user formatted according +to @code{format}. The format for a string is not too interesting, and +clearly invalid formats like A16.39 or A255.127 or A134.1 abound in +the corpus, so readers should probably ignore the format entirely. + +@code{s} is a value of variable @code{varname} and has value label +@code{vallab}. @code{varname} is never empty but @code{vallab} is +commonly empty. + +The meaning of the final byte is unknown. + +@item 05 +Variable @code{varname}, which is rarely observed as empty in the +corpus, with variable label @code{varlabel}, which is often empty. + +The meaning of the final byte is unknown. +@end itemize