From a44813e69593b8c3a8c1ee08c0f08a18a52af256 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 20 Jan 2016 20:47:22 -0800 Subject: [PATCH 01/16] Add graphviz file for structure of detail XML. --- detail-xml | 3 +- spv-detail.gv | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 spv-detail.gv diff --git a/detail-xml b/detail-xml index 1684915bf5..3735a657fa 100644 --- a/detail-xml +++ b/detail-xml @@ -119,9 +119,8 @@ style := textAlignment size?)? visible? - color? color2? + color? color2?] - layer := [id? value variable method? visible?] layerController := [id? source target] diff --git a/spv-detail.gv b/spv-detail.gv new file mode 100644 index 0000000000..e026a483e3 --- /dev/null +++ b/spv-detail.gv @@ -0,0 +1,92 @@ +digraph spv_detail { + "visualization" -> "extension" [label="?"]; + "visualization" -> "userSource"; + "visualization" -> "sourceVariable" [label="*"]; + "visualization" -> "derivedVariable" [label="*"]; + "visualization" -> "graph"; + "visualization" -> "labelFrame" [label="+"]; + "visualization" -> "container" [label="?"]; + "visualization" -> "style" [label="+"]; + "visualization" -> "layerController" [label="?"]; + + "sourceVariable" -> "extension" [label="*"]; + "sourceVariable" -> "format" [label="?"]; + "sourceVariable" -> "stringFormat" [label="?"]; + + "derivedVariable" -> "extension" [label="*"]; + "derivedVariable" -> "format" [label="?"]; + "derivedVariable" -> "stringFormat" [label="?"]; + "derivedVariable" -> "valueMapEntry" [label="*"]; + + "format" -> "affix" [label="*"]; + "format" -> "relabel" [label="?"]; + + "dateTimeFormat" -> "affix" [label="?"]; + + "stringFormat" -> "affix" [label="*"]; + "stringFormat" -> "relabel" [label="?"]; + + "graph" -> "location" [label="+"]; + "graph" -> "coordinates"; + "graph" -> "faceting"; + "graph" -> "facetLayout"; + "graph" -> "interval"; + + "faceting" -> "cross"; + "faceting" -> "layer" [label="*"]; + + "interval" -> "labeling"; + "interval" -> "footnotes" [label="?"]; + + "labeling" -> "format" [label="*"]; + "labeling" -> "formatting" [label="*"]; + "labeling" -> "footnotes" [label="*"]; + + "formatting" -> "formatMapping" [label="*"]; + + "formatMapping" -> "format"; + + "footnotes" -> "footnoteMapping" [label="+"]; + + "cross" -> "nest" [label="+"]; + + "nest" -> "variableReference" [label="+"]; + + "facetLayout" -> "tableLayout"; + "facetLayout" -> "facetLevel" [label="+"]; + "facetLayout" -> "setCellProperties" [label="*"]; + "facetLevel" -> "axis"; + + "axis" -> "majorTicks"; + + "label" -> "descriptionGroup" [label="*"]; + "label" -> "text" [label="*"]; + + "majorTicks" -> "gridline" [label="?"]; + + "descriptionGroup" -> "description" [label="+"]; + "descriptionGroup" -> "text"; + + "setCellProperties" -> "setMetadata"; + "setCellProperties" -> "setStyle" [label="*"]; + "setCellProperties" -> "setFormat" [label="+"]; + "setCellProperties" -> "union" [label="?"]; + + "setFormat" -> "dateTimeFormat" [label="?"]; + "setFormat" -> "format" [label="?"]; + "setFormat" -> "numberFormat" [label="?"]; + "setFormat" -> "stringFormat" [label="*"]; + + "labelFrame" -> "location" [label="+"]; + "labelFrame" -> "label"; + "labelFrame" -> "paragraph" [label="?"]; + + "container" -> "extension" [label="?"]; + "container" -> "location" [label="+"]; + "container" -> "labelFrame" [label="+"]; + + "union" -> "intersect" [label="+"]; + + "intersect" -> "intersectWhere" [label="?"]; + "intersect" -> "where" [label="*"]; +} -- 2.30.2 From 2fd5ae863380ca80272555ae606ac814dfd2e403 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 20 Jan 2016 22:47:05 -0800 Subject: [PATCH 02/16] Work on describing the detail XML format at a high level (What's a "facet"?) --- parse-xml.c | 20 ++++++++++++++++++++ spv-file-format.texi | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/parse-xml.c b/parse-xml.c index 2f6b33813e..3a32ee36e2 100644 --- a/parse-xml.c +++ b/parse-xml.c @@ -159,6 +159,24 @@ print_attribute (xmlNode *node, const char *attr) } } +static void +print_element (xmlDoc *doc, xmlNode *node, const char *element) +{ + for (; node; node = node->next) + { + if (!strcmp(element, (char *) node->name)) + { + xmlBuffer *buf = xmlBufferCreate(); + xmlNodeDump (buf, doc, node, 0, 1); + xmlBufferDump (stdout, buf); + xmlBufferFree (buf); + putchar ('\n'); + } + + print_element (doc, node->children, element); + } +} + static __attribute__((unused)) xmlNode * find_page_setup (xmlNode *node) { @@ -237,6 +255,8 @@ main (int argc, char **argv) print_cdata (root); else if (!strcmp(argv[2], "text")) print_text (root); + else if (!strncmp(argv[2], "element:", 8)) + print_element (doc, root, argv[2] + 8); else if (!strncmp(argv[2], "attr:", 5)) print_attribute (root, argv[2] + 5); else if (!strcmp(argv[2], "labels")) diff --git a/spv-file-format.texi b/spv-file-format.texi index 789a77707c..6e73aef309 100644 --- a/spv-file-format.texi +++ b/spv-file-format.texi @@ -1227,7 +1227,35 @@ label is the string @code{s}. Each label also includes a This format is still under investigation. -All elements have an optional @code{id} attribute. +The design of the detail XML format is not what one would end up with +for describing pivot tables. This is because it is a specialization +of a much more general format (``visualization XML'' or ``VizML'') +that can describe a wide range of visualizations. Most of this +generality is overkill for tables, and so we end up with a funny +subset of a general-purpose format. + +The important elements of the detail XML format are: + +@itemize @bullet +@item +Variables. Variables in detail XML roughly correspond to the +dimensions in a light detail member. There is one variable for each +dimension, plus one variable for each level of labeling along an axis. + +The bulk of variables are defined with @code{sourceVariable} elements. +The data for these variables comes from the associated +@code{tableData.bin} member. Some variables are defined, with +@code{derivedVariable} elements, as a constant or in terms of a +mapping function from a source variable. + +@item +Assignment of variables to axes. A variable can appear as columns, or +rows, or layers. The @code{faceting} element and its sub-elements +describe this assignment. +@end itemize + +All elements have an optional @code{id} attribute. In practice many +elements are assigned @code{id} attributes that are never referenced. @node SPV Detail visualization Element @subsection The @code{visualization} Element @@ -1309,7 +1337,8 @@ Contents: @code{extension}* (@code{format} @math{|} @code{stringFormat})? This element defines a variable whose values can be used elsewhere in the visualization. It ties this element's @code{id} to a variable -from the @file{.bin} member that corresponds to this @file{.xml}. +from the @file{tableData.bin} member that corresponds to this +@file{.xml}. This element has the following attributes. @@ -1319,12 +1348,14 @@ Always set to @code{true}. @defvr {Required} source Always set to @code{tableData}, the @code{source-name} in the -corresponding @file{.bin} member (@pxref{SPV Legacy Member Metadata}). +corresponding @file{tableData.bin} member (@pxref{SPV Legacy Member +Metadata}). @end defvr @defvr {Required} sourceName The name of a variable within the source, the @code{variable-name} in -the corresponding @file{.bin} member (@pxref{SPV Legacy Member Data}). +the corresponding @file{tableData.bin} member (@pxref{SPV Legacy +Member Data}). @end defvr @defvr {Optional} dependsOn -- 2.30.2 From adc35059e3ae7e8968bc6c7a835abc412920e9b5 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 31 Jan 2016 09:48:45 -0800 Subject: [PATCH 03/16] work on figuring more stuff out--may want to revert this commit --- dump.c | 2 +- dump2.c | 3 ++- parse-all-heavy | 11 +++++------ parse-detail-xml | 4 +++- parse-xml.c | 43 +++++++++++++++++++++++++++++++++++++------ 5 files changed, 48 insertions(+), 15 deletions(-) diff --git a/dump.c b/dump.c index a52cafddf0..c57057f025 100644 --- a/dump.c +++ b/dump.c @@ -823,7 +823,7 @@ dump_title(void) dump_value(stdout, 0); /* Custom footnote marker string. */ if (match_byte (0x31)) - dump_value(stdout, 0); + dump_value(stderr, 0); else match_byte_assert (0x58); get_u32 (); diff --git a/dump2.c b/dump2.c index 15670ecf4b..740cbbedaa 100644 --- a/dump2.c +++ b/dump2.c @@ -217,7 +217,8 @@ dump_source(int end, int count, int n_series, const char *name) int n_sysmis = 0; for (int i = 0; i < n_series; i++) { - printf (" series %d: \"%s\"\n ", i, get_fixed_string(288)); + printf (" series %d: \"%s\", %d values:\n ", + i, get_fixed_string(288), count); for (int i = 0; i < count; i++) { double d = get_double(); diff --git a/parse-all-heavy b/parse-all-heavy index 757e46d200..20370b1fea 100755 --- a/parse-all-heavy +++ b/parse-all-heavy @@ -1,10 +1,9 @@ #! /bin/sh -heavy=`ls -1 unzipped/*/*.bin | grep -v light` +heavy=`ls -1 unzipped*/*/*.bin | grep -v light` for d in $heavy; do - if ! ./dump2 < $d > /dev/null 2>&1; then - echo $d - ./dump2 < $d - echo - fi + echo $d + ./dump2 < $d + echo + echo done diff --git a/parse-detail-xml b/parse-detail-xml index f0b175e7ab..6d7858835e 100755 --- a/parse-detail-xml +++ b/parse-detail-xml @@ -2,8 +2,10 @@ # Parse the detail XML members. legacyXML=`ls -1 unzipped/*/*.xml |grep -vE 'outputViewer|stats|chart|model'` +for d in $legacyXML; do echo $d; done | wc +#grep -h -o '