}
}
+static void
+print_string(xmlChar *s)
+{
+ for (char *p = (char *) s; *p; p++)
+ if (*p == '\n')
+ printf ("\\n");
+ else
+ putchar (*p);
+}
+
+static void
+print_cdata (xmlNode * a_node)
+{
+ for (xmlNode *node = a_node; node; node = node->next)
+ {
+ if (node->type == XML_CDATA_SECTION_NODE)
+ {
+ print_string (node->content);
+ putchar ('\n');
+ }
+
+ print_cdata (node->children);
+ }
+}
+
static void
print_attribute (xmlNode *node, const char *attr)
{
}
}
+static void
+print_text (xmlNode *node)
+{
+ for (; node; node = node->next)
+ {
+ if (node->type == XML_ELEMENT_NODE)
+ {
+ printf ("%s", node->name);
+ for (xmlNode *child = node->children; child; child = child->next)
+ if (child->type == XML_TEXT_NODE)
+ {
+ putchar (' ');
+ print_string (child->content);
+ }
+ putchar ('\n');
+ }
+
+ print_text (node->children);
+ }
+}
+
static void
usage (void)
{
print_containment (root);
else if (!strcmp(argv[2], "attributes"))
print_attributes (root);
+ else if (!strcmp(argv[2], "cdata"))
+ print_cdata (root);
+ else if (!strcmp(argv[2], "text"))
+ print_text (root);
else if (!strncmp(argv[2], "attr:", 5))
print_attribute (root, argv[2] + 5);
else if (!strcmp(argv[2], "labels"))
Parent: @code{text} @*
Contents: cdata
+The cdata contains an HTML document. In some cases, the document
+starts with @code{<html>} and ends with @code{</html}; in others the
+@code{html} element is implied. Generally the HTML includes a
+@code{head} element with a CSS stylesheet. The HTML body often begins
+with @code{<BR>}. The actual content ranges from trivial to simple:
+just discarding the CSS and tags yields readable results.
+
+@table @asis
+@item Required attribute: @code{lang}
+This always contains @code{en} in the corpus.
+@end table
+
@item table
Parent: @code{container} @*
Contents: @code{tableStructure}
+@table @asis
+@item Required attribute: @code{commandName}
+As on the @code{heading} element.
+
+@item Required attribute: @code{type}
+One of @code{table}, @code{note}, or @code{warning}.
+
+@item Required attribute: @code{subType}
+The locale-invariant name for the particular kind of output that this
+table represents in the procedure. This can be the same as
+@code{commandName} e.g.@: @code{Frequencies}, or different, e.g.@:
+@code{Case Processing Summary}. Generic subtypes @code{Notes} and
+@code{Warnings} are often used.
+
+@item Required attribute: @code{tableId}
+A number that uniquely identifies the table within the SPV file,
+typically a large negative number such as @code{-4147135649387905023}.
+
+@item Optional attribute: @code{creator-version}
+As on the @code{heading} element. In the corpus, this is only present
+for version 21 and up and always includes all 8 digits.
+@end table
+
@item tableStructure
Parent: @code{table}
Contents: @code{dataPath}
@item dataPath
Parent: @code{tableStructure}
Contents: text
+
+Contains the name of the Zip member that holds the table details,
+e.g.@: @code{0000000001437_lightTableData.bin}.
@end table