#! /bin/sh
-for d in `ls -1 unzipped/*/*.xml |grep -vE 'notes|table|warning|chart|model'`
-do
- ./parse-xml $d
-done | sort -u
+
+# Parse the structure members that do not represent models or graphs
+# and that are not page setups (which are always the first structure member).
+# Also skip those with borderProperties, which indicate the non-"light"
+# format.
+lightTables=`ls -1 unzipped/*/*.xml |grep -vE 'notes|table|warning|chart|model' \
+ | xargs grep -EL '<([a-z]*:)?(model|graph|pageSetup|borderProperties)'`
+printf 'Structure:\n'
+for d in $lightTables; do
+ ./parse-xml $d containment
+done | sort | uniq | sort
+
+printf '\nAttributes:\n'
+for d in $lightTables; do
+ ./parse-xml $d attributes
+done | sort | uniq | sort
#include <stdio.h>
+#include <string.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
{
for (xmlNode *node = a_node; node; node = node->next)
{
+ const xmlNode *parent = node->parent;
+ if (parent->type == XML_ELEMENT_NODE)
+ printf ("%s ", parent->name);
+ else
+ printf ("<root> ");
+
if (node->type == XML_ELEMENT_NODE)
- {
- const xmlNode *parent = node->parent;
- if (parent->type == XML_ELEMENT_NODE)
- printf ("%s %s\n", parent->name, node->name);
- else if (parent->type == XML_DOCUMENT_NODE)
- printf ("<root> %s\n", node->name);
- }
+ printf ("%s", node->name);
+ else if (node->type == XML_TEXT_NODE)
+ printf("<text>");
+ else if (node->type == XML_CDATA_SECTION_NODE)
+ printf("<cdata>");
+ else
+ printf("<other>");
+
+ putchar('\n');
print_containment (node->children);
}
}
-int
-main (int argc, char **argv)
+static void
+print_attributes (xmlNode * a_node)
{
- if (argc != 2)
+ for (xmlNode *node = a_node; node; node = node->next)
{
- fprintf (stderr, "usage: %s FILE.xml\n", argv[0]);
- exit (1);
+ for (xmlAttr *attr = node->properties; attr; attr = attr->next)
+ printf ("%s %s\n", node->name, attr->name);
+
+ print_attributes (node->children);
}
+}
+
+static void
+usage (void)
+{
+ fprintf (stderr, "usage: parse-xml FILE.xml containment|attributes\n");
+ exit (1);
+}
+
+int
+main (int argc, char **argv)
+{
+ if (argc != 3)
+ usage ();
LIBXML_TEST_VERSION;
}
xmlNode *root = xmlDocGetRootElement(doc);
- print_containment(root);
+
+ if (!strcmp(argv[2], "containment"))
+ print_containment (root);
+ else if (!strcmp(argv[2], "attributes"))
+ print_attributes (root);
+ else
+ usage ();
+
xmlFreeDoc(doc);
xmlCleanupParser();
@node SPV Structure Member Format
@subsection Structure Member Format
+Structure members XML files claim conformance with a collection of XML
+Schemas. These schemas are distributed, under a nonfree license, with
+SPSS binaries. Fortunately, the schemas are not necessary to
+understand the structure members. To a degree, the schemas can even
+be deceptive because they document elements and attributes that are
+not in the corpus and lack documentation of elements and attributes
+that are commonly found in the corpus.
+
+Structure members use a different XML namespace for each schema, but
+these namespaces are not entirely consistent: in some SPV files, for
+example, the @code{viewer-tree} schema is associated with namespace
+@indicateurl{http://xml.spss.com/spss/viewer-tree} and in other with
+@indicateurl{http://xml.spss.com/spss/viewer/viewer-tree} (note the
+additional @file{viewer/} directory. In any case, the schema URIs are
+not resolvable to obtain the schemas themselves.
+
+One may ignore all of the above in interpreting a structure member.
+The actual XML has a simple and straightforward form that does not
+require a reader to take schemas or namespaces into account. The