From 9692b579384a142daf732706ad2f4401481a3077 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 9 Aug 2015 08:59:55 -0700 Subject: [PATCH] Clarify confusing category stuff. --- dump.c | 19 +++++++++---------- spv-file-format.texi | 31 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/dump.c b/dump.c index 830a5dac1d..e37202000c 100644 --- a/dump.c +++ b/dump.c @@ -580,17 +580,16 @@ dump_category(int level, int *indexes, int *n_indexes, int max_indexes) match_byte(0); match_byte(0); - if (match_u32 (1)) - match_byte (0); - else if (match_byte (1)) + if (match_byte (1)) { - match_byte (0); - if (!match_u32 (2)) - match_u32_assert (1); - match_byte (0); + if (!match_byte (0) && !match_byte (1)) + match_byte_assert (2); } - else if (!match_u32(2)) - match_u32_assert (0); + else if (!match_byte (2)) + match_byte_assert (0); + match_byte_assert (0); + match_byte_assert (0); + match_byte_assert (0); int indx = get_u32(); int n_categories = get_u32(); @@ -629,7 +628,7 @@ dump_dim(int indx) printf ("\n", indx); dump_value (stdout, 0); - /* This byte is usually 0x02 but many other values have been spotted. */ + /* This byte is usually 0 but many other values have been spotted. */ pos++; if (!match_byte(0) && !match_byte(1)) diff --git a/spv-file-format.texi b/spv-file-format.texi index 89749de0da..e7c3f88890 100644 --- a/spv-file-format.texi +++ b/spv-file-format.texi @@ -489,3 +489,34 @@ font f1 f2 f3 f4 f5 f6 f7 f8 7 40 0 64173 0/1 8 10/11 1 1 8 40 0 2 3 8 10/11 1 4 @end example + +@example +dimensions := int[n-dims] dimension*[n-dims] +dimension := value[name] + byte[d1] + (00 | 01 | 02)[d2] + (i0 | i2)[d3] + (00 | 01)[d4] + (00 | 01)[d5] + 01 + int[d6] + int[n-categories] category*[n-categories] +@end example + +@code{name} is the name of the dimension, e.g. @code{Variables}, +@code{Statistics}, or a variable name. + +@code{d1} is usually 0 but many other values have been observed. + +@code{d3} is 2 over 99% of the time. + +@code{d5} is 0 over 99% of the time. + +@code{d6} is either -1 or the 0-based index of the dimension, e.g.@: 0 +for the first dimension, 1 for the second, and so on. The latter is +the case 98% of the time in the corpus. + +@example +category := value i1 + (00 | 01 (00 | 01 | 02) | 02) 00 00 00 +@end example -- 2.30.2