Start working on detail XML.
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 17 Jan 2016 19:13:44 +0000 (11:13 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 17 Jan 2016 19:13:44 +0000 (11:13 -0800)
detail-xml [new file with mode: 0644]
parse-detail-xml [new file with mode: 0755]
parse-xml.c

diff --git a/detail-xml b/detail-xml
new file mode 100644 (file)
index 0000000..742b5f9
--- /dev/null
@@ -0,0 +1,74 @@
+visualization :=
+    extension?
+    userSource
+    (sourceVariable | derivedVariable)+
+    graph
+    labelFrame+
+    container?
+    style+
+    layerController?
+
+extension :=
+
+userSource :=
+
+ sourceVariable := extension* (format | stringFormat)?
+derivedVariable := extension* (format | stringFormat valueMapEntry*)?
+
+format := (affix+ | relabel)?
+dateTimeFormat :=
+numberFormat := affix?
+stringFormat := (affix | relabel+)?
+affix :=
+relabel :=
+
+valueMapEntry :=
+
+graph := location+ coordinates faceting facetLayout interval
+
+location :=
+
+coordinates :=
+faceting := cross layer*
+interval := labeling footnotes?
+labeling := (format | formatting | footnotes)*
+formatting := formatMapping*
+formatMapping := format
+footnotes := footnoteMapping+
+footnoteMapping :=
+
+cross := nest+
+nest := variableReference+
+variableReference :=
+
+facetLayout := tableLayout facetLevel+ setCellProperties*
+tableLayout :=
+facetLevel := axis
+
+axis := label? majorTicks
+label := descriptionGroup | text+
+majorTicks := gridline?
+gridline :=
+
+descriptionGroup := description+ text
+description :=
+text := <text>
+paragraph :=
+
+setCellProperties := setMetadata setStyle* setFormat+ union?
+setMetadata :=
+setStyle :=
+setFormat := dateTimeFormat | format | numberFormat | stringFormat+
+
+labelFrame := location+ label paragraph?
+container := extension? location+ labelFrame+
+
+style :=
+
+layer :=
+layerController :=
+
+union := intersect+
+intersect := intersectWhere | where+
+intersectWhere :=
+where :=
diff --git a/parse-detail-xml b/parse-detail-xml
new file mode 100755 (executable)
index 0000000..f0b175e
--- /dev/null
@@ -0,0 +1,9 @@
+#! /bin/sh
+
+# Parse the detail XML members.
+legacyXML=`ls -1 unzipped/*/*.xml |grep -vE 'outputViewer|stats|chart|model'`
+if test -n "$1"; then
+    for d in $legacyXML; do
+       ./parse-xml $d $1
+    done | sort | uniq
+fi
index 9faa946f45a7ac64574312b8505d139d4e07f2ee..2f6b33813eb43bc4aa050d3bee9fd415ff32ccf5 100644 (file)
@@ -3,6 +3,32 @@
 #include <libxml/parser.h>
 #include <libxml/tree.h>
 
+static void
+print_parents(xmlNode *node)
+{
+  for (; node; node = node->next)
+    {
+      if (node->type == XML_ELEMENT_NODE)
+        {
+          for (xmlNode *child = node->children; child; child = child->next)
+            {
+              if (child->type == XML_ELEMENT_NODE)
+                  printf ("%s %s\n", node->name, child->name);
+              else if (child->type == XML_TEXT_NODE)
+                printf ("%s <text>\n", node->name);
+              else if (child->type == XML_CDATA_SECTION_NODE)
+                printf ("%s <cdata>\n", node->name);
+              else if (child->type == XML_COMMENT_NODE)
+                printf ("%s <comment>\n", node->name);
+              else
+                printf ("%s <%d>\n", node->name, child->type);
+            }
+        }
+
+      print_parents (node->children);
+    }
+}
+
 static void
 print_containment (xmlNode *node)
 {
@@ -10,33 +36,49 @@ print_containment (xmlNode *node)
     {
       if (node->type == XML_ELEMENT_NODE)
         {
-          printf ("%s", node->name);
+          const char *child_names[512];
+          int child_name_cnt[512];
+          int n_names = 0;
           for (xmlNode *child = node->children; child; child = child->next)
             {
-              putchar (' ');
+              const char *name;
+
               if (child->type == XML_ELEMENT_NODE)
-                {
-                  printf ("%s", child->name);
-
-                  int n = 0;
-                  while (child->next
-                         && child->next->type == XML_ELEMENT_NODE
-                         && !strcmp((char *) child->name, (char *) child->next->name))
-                    {
-                      child = child->next;
-                      n++;
-                    }
-                  if (n > 0)
-                    putchar ('+');
-                }
+                name = (char *) child->name;
               else if (child->type == XML_TEXT_NODE)
-                printf ("<text>");
+                name = "<text>";
               else if (child->type == XML_CDATA_SECTION_NODE)
-                printf ("<cdata>");
+                name = "<cdata>";
+              else if (child->type == XML_COMMENT_NODE)
+                {
+                  name = "<comment>";
+                  //printf ("comment %s\n", (char *) child->content);
+                  continue;
+                }
               else
-                printf ("<%d>", child->type);
+                name = "<other>";
+
+              for (int i = 0; i < n_names; i++)
+                if (!strcmp(name, child_names[i]))
+                  {
+                    child_name_cnt[i]++;
+                    goto next;
+                  }
+              child_names[n_names] = name;
+              child_name_cnt[n_names] = 1;
+              n_names++;
+
+            next:;
             }
-          putchar ('\n');
+
+          printf ("%s", node->name);
+          for (int i = 0; i < n_names; i++)
+            {
+              printf (" %s", child_names[i]);
+              if (child_name_cnt[i] > 1)
+                printf ("+");
+            }
+          printf ("\n");
         }
 
       print_containment (node->children);
@@ -168,7 +210,7 @@ main (int argc, char **argv)
 
   LIBXML_TEST_VERSION;
 
-  xmlDoc *doc = xmlReadFile(argv[1], NULL, 0);
+  xmlDoc *doc = xmlReadFile(argv[1], NULL, XML_PARSE_NOBLANKS);
   if (doc == NULL)
     {
       fprintf (stderr, "error: could not parse file %s\n", argv[1]);
@@ -185,7 +227,9 @@ main (int argc, char **argv)
   root->next = NULL;
 #endif
 
-  if (!strcmp(argv[2], "containment"))
+  if (!strcmp(argv[2], "parents"))
+    print_parents (root);
+  else if (!strcmp(argv[2], "containment"))
     print_containment (root);
   else if (!strcmp(argv[2], "attributes"))
     print_attributes (root);