From 7674958d6669183799289f701e1148b6903b801a Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 7 Aug 2015 00:17:46 -0700 Subject: [PATCH] Document some elements. --- parse-all-xml | 9 ++++-- parse-xml.c | 76 +++++++++++++++++++++++++++++++++----------- spv-file-format.texi | 36 ++++++++++++++++++++- 3 files changed, 100 insertions(+), 21 deletions(-) diff --git a/parse-all-xml b/parse-all-xml index 8b1fc366f6..a91f68eab6 100755 --- a/parse-all-xml +++ b/parse-all-xml @@ -9,9 +9,14 @@ lightTables=`ls -1 unzipped/*/*.xml |grep -vE 'notes|table|warning|chart|model' printf 'Structure:\n' for d in $lightTables; do ./parse-xml $d containment -done | sort | uniq | sort +done | sort | uniq -c | sort -rn printf '\nAttributes:\n' for d in $lightTables; do ./parse-xml $d attributes -done | sort | uniq | sort +done | sort | uniq -c | sort -rn + +printf '\nLabels:\n' +for d in $lightTables; do + ./parse-xml $d labels +done | sort | uniq -c | sort -rn diff --git a/parse-xml.c b/parse-xml.c index efc7bb3569..85fbaf05fe 100644 --- a/parse-xml.c +++ b/parse-xml.c @@ -4,38 +4,76 @@ #include static void -print_containment (xmlNode * a_node) +print_containment (xmlNode *node) { - for (xmlNode *node = a_node; node; node = node->next) + for (; node; node = node->next) { - const xmlNode *parent = node->parent; - if (parent->type == XML_ELEMENT_NODE) - printf ("%s ", parent->name); - else - printf (" "); - if (node->type == XML_ELEMENT_NODE) - printf ("%s", node->name); - else if (node->type == XML_TEXT_NODE) - printf(""); - else if (node->type == XML_CDATA_SECTION_NODE) - printf(""); - else - printf(""); + { + printf ("%s", node->name); + for (xmlNode *child = node->children; child; child = child->next) + { + putchar (' '); + if (child->type == XML_ELEMENT_NODE) + { + printf ("%s", child->name); - putchar('\n'); + int n = 0; + while (child->next + && child->next->type == XML_ELEMENT_NODE + && !strcmp((char *) child->name, (char *) child->next->name)) + { + child = child->next; + n++; + } + if (n > 0) + putchar ('+'); + } + else if (child->type == XML_TEXT_NODE) + printf (""); + else if (child->type == XML_CDATA_SECTION_NODE) + printf (""); + else + printf ("<%d>", child->type); + } + putchar ('\n'); + } print_containment (node->children); } } +static void +print_labels (xmlNode *node) +{ + for (; node; node = node->next) + { + if (node->type == XML_ELEMENT_NODE + && !strcmp((char *) node->name, "label") + && node->parent->type == XML_ELEMENT_NODE + && !strcmp((char *) node->parent->name, "container")) + { + for (xmlNode *child = node->children; child; child = child->next) + if (child->type == XML_TEXT_NODE) + puts ((char *) child->content); + } + + print_labels (node->children); + } +} + static void print_attributes (xmlNode * a_node) { for (xmlNode *node = a_node; node; node = node->next) { - for (xmlAttr *attr = node->properties; attr; attr = attr->next) - printf ("%s %s\n", node->name, attr->name); + if (node->properties) + { + printf ("%s", node->name); + for (xmlAttr *attr = node->properties; attr; attr = attr->next) + printf (" %s", attr->name); + putchar ('\n'); + } print_attributes (node->children); } @@ -69,6 +107,8 @@ main (int argc, char **argv) print_containment (root); else if (!strcmp(argv[2], "attributes")) print_attributes (root); + else if (!strcmp(argv[2], "labels")) + print_labels (root); else usage (); diff --git a/spv-file-format.texi b/spv-file-format.texi index 3c40e76e21..1497c49b6d 100644 --- a/spv-file-format.texi +++ b/spv-file-format.texi @@ -88,4 +88,38 @@ not resolvable to obtain the schemas themselves. One may ignore all of the above in interpreting a structure member. The actual XML has a simple and straightforward form that does not -require a reader to take schemas or namespaces into account. The +require a reader to take schemas or namespaces into account. + +@table @code +@item heading +Parent: Document root or @code{heading} @* +Contents: @code{label} [@code{container} | @code{heading}]* + +The root of a structure member is a @code{heading}, which represents a +section of output beginning with a title (the @code{label}) and +ordinarily followed by a container for content and possibly further +nested (sub)-sections of output. + +@item label +Parent: @code{heading} or @code{container} @* +Contents: text + +Every @code{heading} and @code{container} holds a @code{label} as its +first child. The root @code{heading} in a structure member always +contains the string ``Output''. Otherwise, the text in @code{label} +describes what it labels, often by naming the statistical procedure +that was executed, e.g.@: ``Frequencies'' or ``T-Test''. Labels are +often very generic, especially within a @code{container}, e.g.@: +``Title'' or ``Warnings'' or ``Notes''. Label text is localized +according to the output language, e.g. in Italian a frequency table +procedure is labeled ``Frequenze''. + +The corpus contains one example of an empty label, one that contains +no text. + +@item container +Parent: @code{heading} @* +Contents: @code{label} [@code{table} | @code{text}] + +A @code{container} is the immediate parent of a +@end table -- 2.30.2