printf ("<category>\n");
dump_value (stream, level + 1);
- int merge = data[pos];
- if (!match_byte(0))
- match_byte_assert (1);
-
+ bool merge = get_bool();
match_byte_assert (0);
-
- int unindexed = data[pos];
- if (!match_byte(0))
- match_byte_assert (1);
+ int unindexed = get_bool();
int x = get_u32 ();
pos -= 4;
fprintf (stream, " ");
fprintf (stream, "<merge/>\n");
}
+ assert (unindexed);
}
else
{
- if (merge)
- {
- fprintf(stderr, "index not -1 but merged\n");
- exit(1);
- }
- if (x != 2)
- {
- fprintf(stderr, "index not -1 but x != 2\n");
- exit(1);
- }
- if (n_categories != 0)
- {
- fprintf(stderr, "index not -1 but subcategories\n");
- exit(1);
- }
+ assert (!merge);
+ assert (!unindexed);
+ assert (x == 2);
+ assert (n_categories == 0);
if (*n_indexes >= *allocated_indexes)
{
*allocated_indexes = *allocated_indexes ? 2 * *allocated_indexes : 16;
(*indexes)[(*n_indexes)++] = indx;
}
- int expected_unindexed = indx == -1;
- if (unindexed != expected_unindexed)
- {
- fprintf(stderr, "unindexed (%d) mismatch with indx (%d)\n",
- unindexed, indx);
- exit(1);
- }
-
if (n_categories == 0)
{
for (int i = 0; i <= level + 1; i++)
printf ("<dimension index=\"%d\">\n", indx);
dump_value (stdout, 0);
- /* This byte is usually 0 but many other values have been spotted. */
+ /* This byte is usually 0 but many other values have been spotted.
+ No visible effect. */
pos++;
+ /* This byte can cause data to be oddly replicated. */
if (!match_byte(0) && !match_byte(1))
match_byte_assert(2);
+
if (!match_u32(0))
match_u32_assert(2);
- if (!match_byte(0))
- match_byte_assert(1);
- if (!match_byte(0))
- match_byte_assert(1);
+
+ bool show_dim_label = get_bool();
+ if (show_dim_label)
+ printf(" <show-dim-label/>\n");
+
+ bool hide_all_labels = get_bool();
+ if (hide_all_labels)
+ printf(" <hide-all-labels/>\n");
+
match_byte_assert(1);
if (!match_u32(UINT32_MAX))
match_u32_assert(indx);
+
n_categories = get_u32();
int *indexes = NULL;
@cartouche
@format
Dimensions @result{} int[@t{n-dims}] Dimension*[@t{n-dims}]
-Dimension @result{} Value[@t{name}] DimUnknown int[@t{n-categories}] Category*[@t{n-categories}]
-DimUnknown @result{}
+Dimension @result{} Value[@t{name}] DimProperties int[@t{n-categories}] Category*[@t{n-categories}]
+DimProperties @result{}
byte[@t{d1}]
(00 @math{|} 01 @math{|} 02)[@t{d2}]
(i0 @math{|} i2)[@t{d3}]
- bool[@t{d4}]
- bool[@t{d5}]
- 01
- int[@t{d6}]
+ bool[@t{show-dim-label}]
+ bool[@t{hide-all-labels}]
+ 01 int[@t{dim-index}]
@end format
@end cartouche
@code{name} is the name of the dimension, e.g. @code{Variables},
@code{Statistics}, or a variable name.
+The meanings of @code{d1}, @code{d2}, and @code{d3} are unknown.
@code{d1} is usually 0 but many other values have been observed.
-@code{d3} is 2 over 99% of the time.
+If @code{show-dim-label} is 01, the pivot table displays a label for
+the dimension itself. Because usually the group and category labels
+are enough explanation, it is usually 00.
-@code{d5} is 0 over 99% of the time.
+If @code{hide-all-labels} is 01, the pivot table omits all labels for
+the dimension, including group and category labels. It is usually 00.
+When @code{hide-all-labels} is 01, @code{show-dim-label} is ignored.
-@code{d6} is either -1 or the 0-based index of the dimension, e.g.@: 0
-for the first dimension, 1 for the second, and so on. The latter is
-the case 98% of the time in the corpus.
+@code{dim-index} is usually the 0-based index of the dimension, e.g.@:
+0 for the first dimension, 1 for the second, and so on. Sometimes it
+is -1. There is no visible difference.
@node SPV Light Member Categories
@subsection Categories
@cartouche
@format
Category @result{} Value[@t{name}] (Leaf @math{|} Group)
-Leaf @result{} 00 00 00 i2 int[@t{index}] i0
+Leaf @result{} 00 00 00 i2 int[@t{cat-index}] i0
Group @result{}
bool[@t{merge}] 00 01 (i0 @math{|} i2)[@t{data}]
i-1 int[@t{n-subcategories}] Category*[@t{n-subcategories}]
@code{name} is the name of the category (or group).
-A Leaf represents a leaf category. The Leaf's @code{index} is a
+A Leaf represents a leaf category. The Leaf's @code{cat-index} is a
nonnegative integer less than @code{n-categories} in the Dimension in
-which the Category is nested (directly or indirectly).
+which the Category is nested (directly or indirectly). These
+categories represent the original order in which the categories were
+sorted; if the user sorted or rearranged the categories, then the
+order of categories in the file reflects that without changing the
+@code{cat-index} values.
-A Group represents a Group of nested categories. Usually a Group
-contains at least one Category, so that @code{n-subcategories} is
-positive, but a few Groups with @code{n-subcategories} 0 has been
-observed.
+A Group is a group of nested categories. Usually a Group contains at
+least one Category, so that @code{n-subcategories} is positive, but a
+few Groups with @code{n-subcategories} 0 has been observed.
If a Group's @code{merge} is 00, the most common value, then the group
is really a distinct group that should be represented as such in the
Data @result{}
int[@t{layers}] int[@t{rows}] int[@t{columns}] int*[@t{n-dimensions}]
int[@t{n-data}] Datum*[@t{n-data}]
-Datum @result{} int64[@t{index}] v3(00?) Value
+Datum @result{} int64[@t{index}] v1(00?) Value
@end format
@end cartouche
-The values of @code{layers}, @code{rows}, and @code{columns} each
-specifies the number of dimensions displayed in layers, rows, and
+The values of @code{n-layers}, @code{n-rows}, and @code{n-columns}
+each specifies the number of dimensions displayed in layers, rows, and
columns, respectively. Any of them may be zero. Their values sum to
@code{n-dimensions} from Dimensions (@pxref{SPV Light Member
Dimensions}).
The @code{n-dimensions} integers are a permutation of the 0-based
-dimension numbers. The first @code{layers} integers specify each of
-the dimensions represented by layers, the next @code{rows} integers
+dimension numbers. The first @code{n-layers} integers specify each of
+the dimensions represented by layers, the next @code{n-rows} integers
specify the dimensions represented by rows, and the final
-@code{columns} integers specify the dimensions represented by columns.
-When there is more than one dimension of a given kind, the inner
-dimensions are given first.
+@code{n-columns} integers specify the dimensions represented by
+columns. When there is more than one dimension of a given kind, the
+inner dimensions are given first.
The format of a Datum varies slightly from version 1 to version 3: in
version 1 it allows for an extra optional 00 byte.
For example, suppose there are 3 dimensions with 3, 4, and 5
categories, respectively. The datum at coordinates (1, 2, 3) has
index @math{5 \times (4 \times (3 \times 0 + 1) + 2) + 3 = 33}.
+Within a given dimension, the index is the @code{cat-index} in a Leaf.
@node SPV Light Member Value
@subsection Value