Document epoch.

author Ben Pfaff <blp@cs.stanford.edu>

Sat, 27 May 2017 17:30:20 +0000 (10:30 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Sat, 27 May 2017 17:30:20 +0000 (10:30 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sat, 27 May 2017 17:30:20 +0000 (10:30 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Sat, 27 May 2017 17:30:20 +0000 (10:30 -0700)
diff --git a/dump.c b/dump.c

index 6cc9e52abe4e5a9767b5d008841ba462a14893cc..c737c01d1dd4f8252967d8e8b66785f992cf9a70 100644 (file)
--- a/dump.c
+++ b/dump.c
@@ -1031,19 +1031,7 @@ dump_fonts(void)
    match_byte_assert(0);
    if (!match_byte(0))
      match_byte_assert(1);
-  if (version > 1)
-    {
-      if (!match_byte(0x97) && !match_byte(0x98)
-          && !match_byte(0x99) && !match_byte(0x9a) && !match_byte(0x9b))
-        match_byte_assert(0x9c);
-      match_byte_assert(7);
-      match_byte_assert(0);
-      match_byte_assert(0);
-    }
-  else
-    {
-      printf("%x\n", get_u32());
-    }
+  printf("<epoch>%d</epoch>\n", get_u32());
  
    int decimal = data[pos];
    int grouping = data[pos + 1];
@@ -1105,12 +1093,7 @@ dump_fonts(void)
        if (!match_byte(0))
          match_byte_assert(1);
  
-      if (!match_byte(0x97) && !match_byte(0x98)
-          && !match_byte(0x99) && !match_byte(0x9a) && !match_byte(0x9b))
-        match_byte_assert(0x9c);
-      match_byte_assert(7);
-      match_byte_assert(0);
-      match_byte_assert(0);
+      printf("<epoch2>%d</epoch2>\n", get_u32());
  
        if (match_byte('.'))
          {
diff --git a/spv-file-format.texi b/spv-file-format.texi

index dff235859b6f9b711dd3362abaf26c3e9a8d3a63..192c1c3ea1384534518ece495cbbb3536463cf56 100644 (file)
--- a/spv-file-format.texi
+++ b/spv-file-format.texi
@@ -901,7 +901,7 @@ X6 @result{}
      string[@t{command}] string[@t{subcommand}]
      string[@t{language}] string[@t{charset}] string[@t{locale}]
      (00 @math{|} 01) 00 (00 @math{|} 01) (00 @math{|} 01)
-    int
+    int[@t{epoch}]
      byte[@t{decimal}] byte[@t{grouping}]
      byte*8 01
      (string[@t{dataset}] string[@t{data file}] i0 int i0)?
@@ -918,6 +918,12 @@ such as @code{en_US.windows-1252} or @code{it_IT.windows-1252}.  The
  rest of the character strings in the member use this encoding.  The
  encoding string is itself encoded in US-ASCII.
  
+@code{epoch} is the year that starts the epoch.  A 2-digit year is
+interpreted as belonging to the 100 years beginning at the epoch.  The
+default epoch year is 69 years prior to the current year; thus, in
+2017 this field by default contains 1948.  In the corpus, @t{epoch}
+ranges from 1943 to 1948, plus some contain -1.
+
  @code{decimal} is the decimal point character.  The observed values
  are @samp{.} and @samp{,}.
author	Ben Pfaff <blp@cs.stanford.edu>
	Sat, 27 May 2017 17:30:20 +0000 (10:30 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sat, 27 May 2017 17:30:20 +0000 (10:30 -0700)
dump.c		patch \| blob \| history
spv-file-format.texi		patch \| blob \| history