projects
/
pspp
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
e10d92f
)
Work on improving distinction between groups and categories.
author
Ben Pfaff
<blp@cs.stanford.edu>
Sun, 9 Aug 2015 21:31:49 +0000
(14:31 -0700)
committer
Ben Pfaff
<blp@cs.stanford.edu>
Sun, 9 Aug 2015 21:31:49 +0000
(14:31 -0700)
dump.c
patch
|
blob
|
history
spv-file-format.texi
patch
|
blob
|
history
diff --git
a/dump.c
b/dump.c
index e37202000c64f310a309f872ccbbd0ae2f46cc96..65955d81995b16e50c631e430ee33fe95cf485d1 100644
(file)
--- a/
dump.c
+++ b/
dump.c
@@
-575,26
+575,32
@@
dump_category(int level, int *indexes, int *n_indexes, int max_indexes)
fprintf (stdout, " ");
printf ("<category>\n");
dump_value (stdout, level + 1);
fprintf (stdout, " ");
printf ("<category>\n");
dump_value (stdout, level + 1);
+
+ int start = pos;
+ int d0 = data[pos];
match_byte(1);
match_byte(1);
- match_byte(0);
- match_byte(0);
- match_byte(0);
+ int c = 0;
+ c += match_byte(0);
+ c += match_byte(0);
+ c += match_byte(0);
+ int d1 = data[pos];
+ int d2 = 9;
if (match_byte (1))
{
if (match_byte (1))
{
- if (!match_byte (0) && !match_byte (1))
- match_byte_assert (2);
+ d2 = data[pos];
+ if (!match_u32 (0) && !match_u32 (1))
+ match_u32_assert (2);
}
}
- else if (!match_byte (2))
- match_byte_assert (0);
- match_byte_assert (0);
- match_byte_assert (0);
- match_byte_assert (0);
+ else
+ match_u32_assert (2);
+ int len = pos - start;
int indx = get_u32();
int n_categories = get_u32();
if (indx != -1)
{
int indx = get_u32();
int n_categories = get_u32();
if (indx != -1)
{
+ fprintf (stderr, "category %d %d %d %d %d:", d0, c, d1, d2, len);
if (n_categories != 0)
{
fprintf(stderr, "index not -1 but subcategories\n");
if (n_categories != 0)
{
fprintf(stderr, "index not -1 but subcategories\n");
@@
-607,6
+613,12
@@
dump_category(int level, int *indexes, int *n_indexes, int max_indexes)
}
indexes[(*n_indexes)++] = indx;
}
}
indexes[(*n_indexes)++] = indx;
}
+ else
+ fprintf (stderr, "group %d %d %d %d %d:", d0, c, d1, d2, len);
+ for (int i = 0; i < len; i++)
+ fprintf (stderr, " %02x", data[start + i]);
+ putc ('\n', stderr);
+
if (n_categories == 0)
{
for (int i = 0; i <= level + 1; i++)
if (n_categories == 0)
{
for (int i = 0; i <= level + 1; i++)
diff --git
a/spv-file-format.texi
b/spv-file-format.texi
index e7c3f888900bcdae677436f01d42cc70e64d248a..f6f8bb27dd116771c1df96cad66f7499867d60d6 100644
(file)
--- a/
spv-file-format.texi
+++ b/
spv-file-format.texi
@@
-517,6
+517,18
@@
for the first dimension, 1 for the second, and so on. The latter is
the case 98% of the time in the corpus.
@example
the case 98% of the time in the corpus.
@example
-category := value i1
- (00 | 01 (00 | 01 | 02) | 02) 00 00 00
+category := value[name]
+ 01? 00? 00? 00?
+ (01 (i0 | i1 | i2) | i2)
+ int[index] int[n-subcategories] category*[n-subcategories]
@end example
@end example
+
+@code{category} can represent a terminal category. In that case,
+@code{name} is the name of the category, @code{index} is a nonnegative
+integer less than @code{n-categories} in the @code{dimension} in which
+the @code{category} is nested (directly or indirectly), and
+@code{n-subcategories} is 0.
+
+Alternatively, @code{category} can represent a group of nested
+categories. In that case, @code{name} is the name of the group,
+@code{index} is -1, and @code{n-subcategories} is positive.