printf 'Structure:\n'
for d in $lightTables; do
./parse-xml $d containment
-done | sort | uniq | sort
+done | sort | uniq -c | sort -rn
printf '\nAttributes:\n'
for d in $lightTables; do
./parse-xml $d attributes
-done | sort | uniq | sort
+done | sort | uniq -c | sort -rn
+
+printf '\nLabels:\n'
+for d in $lightTables; do
+ ./parse-xml $d labels
+done | sort | uniq -c | sort -rn
#include <libxml/tree.h>
static void
-print_containment (xmlNode * a_node)
+print_containment (xmlNode *node)
{
- for (xmlNode *node = a_node; node; node = node->next)
+ for (; node; node = node->next)
{
- const xmlNode *parent = node->parent;
- if (parent->type == XML_ELEMENT_NODE)
- printf ("%s ", parent->name);
- else
- printf ("<root> ");
-
if (node->type == XML_ELEMENT_NODE)
- printf ("%s", node->name);
- else if (node->type == XML_TEXT_NODE)
- printf("<text>");
- else if (node->type == XML_CDATA_SECTION_NODE)
- printf("<cdata>");
- else
- printf("<other>");
+ {
+ printf ("%s", node->name);
+ for (xmlNode *child = node->children; child; child = child->next)
+ {
+ putchar (' ');
+ if (child->type == XML_ELEMENT_NODE)
+ {
+ printf ("%s", child->name);
- putchar('\n');
+ int n = 0;
+ while (child->next
+ && child->next->type == XML_ELEMENT_NODE
+ && !strcmp((char *) child->name, (char *) child->next->name))
+ {
+ child = child->next;
+ n++;
+ }
+ if (n > 0)
+ putchar ('+');
+ }
+ else if (child->type == XML_TEXT_NODE)
+ printf ("<text>");
+ else if (child->type == XML_CDATA_SECTION_NODE)
+ printf ("<cdata>");
+ else
+ printf ("<%d>", child->type);
+ }
+ putchar ('\n');
+ }
print_containment (node->children);
}
}
+static void
+print_labels (xmlNode *node)
+{
+ for (; node; node = node->next)
+ {
+ if (node->type == XML_ELEMENT_NODE
+ && !strcmp((char *) node->name, "label")
+ && node->parent->type == XML_ELEMENT_NODE
+ && !strcmp((char *) node->parent->name, "container"))
+ {
+ for (xmlNode *child = node->children; child; child = child->next)
+ if (child->type == XML_TEXT_NODE)
+ puts ((char *) child->content);
+ }
+
+ print_labels (node->children);
+ }
+}
+
static void
print_attributes (xmlNode * a_node)
{
for (xmlNode *node = a_node; node; node = node->next)
{
- for (xmlAttr *attr = node->properties; attr; attr = attr->next)
- printf ("%s %s\n", node->name, attr->name);
+ if (node->properties)
+ {
+ printf ("%s", node->name);
+ for (xmlAttr *attr = node->properties; attr; attr = attr->next)
+ printf (" %s", attr->name);
+ putchar ('\n');
+ }
print_attributes (node->children);
}
print_containment (root);
else if (!strcmp(argv[2], "attributes"))
print_attributes (root);
+ else if (!strcmp(argv[2], "labels"))
+ print_labels (root);
else
usage ();
One may ignore all of the above in interpreting a structure member.
The actual XML has a simple and straightforward form that does not
-require a reader to take schemas or namespaces into account. The
+require a reader to take schemas or namespaces into account.
+
+@table @code
+@item heading
+Parent: Document root or @code{heading} @*
+Contents: @code{label} [@code{container} | @code{heading}]*
+
+The root of a structure member is a @code{heading}, which represents a
+section of output beginning with a title (the @code{label}) and
+ordinarily followed by a container for content and possibly further
+nested (sub)-sections of output.
+
+@item label
+Parent: @code{heading} or @code{container} @*
+Contents: text
+
+Every @code{heading} and @code{container} holds a @code{label} as its
+first child. The root @code{heading} in a structure member always
+contains the string ``Output''. Otherwise, the text in @code{label}
+describes what it labels, often by naming the statistical procedure
+that was executed, e.g.@: ``Frequencies'' or ``T-Test''. Labels are
+often very generic, especially within a @code{container}, e.g.@:
+``Title'' or ``Warnings'' or ``Notes''. Label text is localized
+according to the output language, e.g. in Italian a frequency table
+procedure is labeled ``Frequenze''.
+
+The corpus contains one example of an empty label, one that contains
+no text.
+
+@item container
+Parent: @code{heading} @*
+Contents: @code{label} [@code{table} | @code{text}]
+
+A @code{container} is the immediate parent of a
+@end table