From: Ben Pfaff Date: Mon, 4 Jan 2016 06:55:46 +0000 (-0800) Subject: Figured out some meanings for category data. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=368df5e381894c9fa6bb2b51596a1a58b0d869a8 Figured out some meanings for category data. --- diff --git a/dump.c b/dump.c index 65955d8199..e9a068699e 100644 --- a/dump.c +++ b/dump.c @@ -569,38 +569,41 @@ check_permutation(int *a, int n, const char *name) } static void -dump_category(int level, int *indexes, int *n_indexes, int max_indexes) +dump_category(FILE *stream, int level, int *indexes, int *n_indexes, int max_indexes) { for (int i = 0; i <= level; i++) - fprintf (stdout, " "); + fprintf (stream, " "); printf ("\n"); - dump_value (stdout, level + 1); + dump_value (stream, level + 1); - int start = pos; - int d0 = data[pos]; - match_byte(1); - int c = 0; - c += match_byte(0); - c += match_byte(0); - c += match_byte(0); - - int d1 = data[pos]; - int d2 = 9; - if (match_byte (1)) - { - d2 = data[pos]; - if (!match_u32 (0) && !match_u32 (1)) - match_u32_assert (2); - } - else + int merge = data[pos]; + if (!match_byte(0)) + match_byte_assert (1); + match_byte_assert (0); + int unindexed = data[pos]; + if (!match_byte(0)) + match_byte_assert (1); + if (!match_u32 (0)) match_u32_assert (2); - int len = pos - start; int indx = get_u32(); int n_categories = get_u32(); - if (indx != -1) + if (indx == -1) + { + if (merge) + { + for (int i = 0; i <= level + 1; i++) + fprintf (stream, " "); + fprintf (stream, "\n"); + } + } + else { - fprintf (stderr, "category %d %d %d %d %d:", d0, c, d1, d2, len); + if (merge) + { + fprintf(stderr, "index not -1 but merged\n"); + exit(1); + } if (n_categories != 0) { fprintf(stderr, "index not -1 but subcategories\n"); @@ -613,22 +616,25 @@ dump_category(int level, int *indexes, int *n_indexes, int max_indexes) } indexes[(*n_indexes)++] = indx; } - else - fprintf (stderr, "group %d %d %d %d %d:", d0, c, d1, d2, len); - for (int i = 0; i < len; i++) - fprintf (stderr, " %02x", data[start + i]); - putc ('\n', stderr); + + int expected_unindexed = indx == -1; + if (unindexed != expected_unindexed) + { + fprintf(stderr, "unindexed (%d) mismatch with indx (%d)\n", + unindexed, indx); + exit(1); + } if (n_categories == 0) { for (int i = 0; i <= level + 1; i++) - fprintf (stdout, " "); - fprintf (stdout, "%d\n", indx); + fprintf (stream, " "); + fprintf (stream, "%d\n", indx); } for (int i = 0; i < n_categories; i++) - dump_category (level + 1, indexes, n_indexes, max_indexes); + dump_category (stream, level + 1, indexes, n_indexes, max_indexes); for (int i = 0; i <= level; i++) - fprintf (stdout, " "); + fprintf (stream, " "); printf ("\n"); } @@ -659,7 +665,7 @@ dump_dim(int indx) int indexes[2048]; int n_indexes = 0; for (int i = 0; i < n_categories; i++) - dump_category (0, indexes, &n_indexes, sizeof indexes / sizeof *indexes); + dump_category (stdout, 0, indexes, &n_indexes, sizeof indexes / sizeof *indexes); check_permutation(indexes, n_indexes, "categories"); fprintf (stdout, "\n"); diff --git a/spv-file-format.texi b/spv-file-format.texi index f6f8bb27dd..5ced97ce61 100644 --- a/spv-file-format.texi +++ b/spv-file-format.texi @@ -518,17 +518,27 @@ the case 98% of the time in the corpus. @example category := value[name] - 01? 00? 00? 00? - (01 (i0 | i1 | i2) | i2) + (00 | 01)[merge] 00 (00 | 01)[unindexed] (i0 | i2) int[index] int[n-subcategories] category*[n-subcategories] @end example @code{category} can represent a terminal category. In that case, -@code{name} is the name of the category, @code{index} is a nonnegative -integer less than @code{n-categories} in the @code{dimension} in which -the @code{category} is nested (directly or indirectly), and +@code{name} is the name of the category, @code{merge} is 00, +@code{unindexed} is 00, @code{index} is a nonnegative integer less +than @code{n-categories} in the @code{dimension} in which the +@code{category} is nested (directly or indirectly), and @code{n-subcategories} is 0. Alternatively, @code{category} can represent a group of nested categories. In that case, @code{name} is the name of the group, -@code{index} is -1, and @code{n-subcategories} is positive. +@code{unindexed} is 01, and @code{index} is -1. Ordinarily a group +has some nested content, so that @code{n-subcategories} is positive, +but a few instances of groups with @code{n-subcategories} 0 has been +observed. If @code{merge} is 00, the most common value, then the +group is really a distinct group that should be represented as such in +the visual representation and user interface. If @code{merge} is 01, +however, the categories in this group should be shown and treated as +if they were direct children of the group's parent group (or if it has +no parent group, then direct children of the dimension), and this +group's name is irrelevant and should not be displayed. (Merged +groups can be nested!)