# Also skip those with borderProperties, which indicate the non-"light"
# format.
lightTables=`ls -1 unzipped/*/*.xml |grep -vE 'notes|table|warning|chart|model' \
- | xargs grep -EL '<([a-z]*:)?(model|graph|pageSetup|borderProperties)'`
-#printf 'Structure:\n'
-#for d in $lightTables; do
-# ./parse-xml $d containment
-#done | sort | uniq -c | sort -rn
+ | xargs grep -EL '<([a-z]*:)?(model|graph)'`
-printf '\nAttributes:\n'
-for d in $lightTables; do
- ./parse-xml $d text
-done | sort | uniq -c | sort -rn
-
-#printf '\nLabels:\n'
-#for d in $lightTables; do
-# ./parse-xml $d labels
-#done | sort | uniq -c | sort -rn
+if test -n "$1"; then
+ for d in $lightTables; do
+ ./parse-xml $d $1
+ done | sort | uniq -c | sort -rn
+fi
{
const char *s = (char *) xmlGetProp (node, (xmlChar *) attr);
if (s)
- printf ("%s %s\n", node->name, s);
+ printf ("%s %s=%s\n", node->name, attr, s);
print_attribute (node->children, attr);
}
}
+static xmlNode *
+find_page_setup (xmlNode *node)
+{
+ for (; node; node = node->next)
+ {
+ if (node->name && !strcmp ((char *) node->name, "pageSetup"))
+ return node;
+
+ xmlNode *ps = find_page_setup (node->children);
+ if (ps)
+ return ps;
+ }
+ return NULL;
+}
+
static void
print_text (xmlNode *node)
{
}
xmlNode *root = xmlDocGetRootElement(doc);
+ root = find_page_setup(root);
+ if (!root)
+ return 0;
+ root->next = NULL;
if (!strcmp(argv[2], "containment"))
print_containment (root);
@table @code
@item heading
Parent: Document root or @code{heading} @*
-Contents: @code{label} [@code{container} | @code{heading}]*
+Contents: [@code{pageSetup}] @code{label} [@code{container} | @code{heading}]*
The root of a structure member is a @code{heading}, which represents a
section of output beginning with a title (the @code{label}) and
-ordinarily followed by a container for content and possibly further
-nested (sub)-sections of output.
+ordinarily followed by content containers or further nested
+(sub)-sections of output.
+
+The document root heading may also contain a @code{pageSetup} element.
The following attributes have been observed on both document root and
nested @code{heading} elements:
Parent: @code{container} @*
Contents: @code{html}
+This @code{text} element is nested inside a @code{container}. There
+is a different @code{text} element that is nested inside a
+@code{pageParagraph}.
+
@table @asis
@item Required attribute: @code{type}
One of @code{title}, @code{log}, or @code{text}.
Contains the name of the Zip member that holds the table details,
e.g.@: @code{0000000001437_lightTableData.bin}.
+
+@item pageSetup
+Parent: @code{heading} @*
+Contents: @code{pageHeader} @code{pageFooter}
+
+@table @asis
+@item Required attribute: @code{initial-page-number}
+Always @code{1}.
+
+@item Optional attribute: @code{chart-size}
+Always @code{as-is} or a localization (!) of it (e.g.@: @code{dimensione
+attuale}, @code{Wie vorgegeben}).
+
+@item Optional attribute: @code{margin-left}
+@itemx Optional attribute: @code{margin-right}
+@itemx Optional attribute: @code{margin-top}
+@itemx Optional attribute: @code{margin-bottom}
+Margin sizes in the form @code{@var{size}in}, e.g.@: @code{0.25in}.
+
+@item Optional attribute: @code{paper-height}
+@itemx Optional attribute: @code{paper-width}
+Paper sizes in the form @code{@var{size}in}, e.g.@: @code{8.5in} by
+@code{11in} for letter paper or @code{8.267in} by @code{11.692in} for
+A4 paper.
+
+@item Optional attribute: @code{reference-orientation}
+Always @code{0deg}.
+
+@item Optional attribute: @code{space-after}
+Always @code{12pt}.
+@end table
+
+@item pageHeader
+@itemx pageFooter
+Parent: @code{pageSetup} @*
+Contents: @code{pageParagraph}*
+
+No attributes.
+
+@item pageParagraph
+Parent: @code{pageHeader} or @code{pageFooter} @*
+Contents: @code{text}
+
+Text to go at the top or bottom of a page, respectively.
+
+@item text
+Parent: @code{pageParagraph} @*
+Contents: [cdata]
+
+This @code{text} element is nested inside a @code{pageParagraph}. There
+is a different @code{text} element that is nested inside a
+@code{container}.
+
+The element is either empty, or contains cdata that holds almost-XHTML
+text: in the corpus, either an @code{html} or @code{p} element. It is
+@emph{almost}-XHTML because the @code{html} element designates the
+default namespace as
+@code{http://xml.spss.com/spss/viewer/viewer-tree} instead of an XHTML
+namespace.
+
+The cdata can contain substitution variables: @code{&[Page]} for the
+page number and @code{&[PageTitle]} for the page title.
+
+Typical contents (indented for clarity):
+
+@example
+<html xmlns="http://xml.spss.com/spss/viewer/viewer-tree">
+ <head></head>
+ <body>
+ <p style="text-align:right; margin-top: 0">Page &[Page]</p>
+ </body>
+</html>
+@end example
+
+@table @asis
+@item Required attribute: @code{type}
+Always @code{text}.
+@end table
@end table