13 #include "u8-mbtouc.h"
23 #define STR(x) XSTR(x)
24 #define WHERE __FILE__":" STR(__LINE__)
36 memcpy(&x, &data[pos], 4);
41 static unsigned long long int
45 memcpy(&x, &data[pos], 8);
54 x = (data[pos] << 24) | (data[pos + 1] << 16) | (data[pos + 2] << 8) | data[pos + 3];
63 memcpy(&x, &data[pos], 2);
72 memcpy(&x, &data[pos], 8);
77 static double __attribute__((unused))
81 memcpy(&x, &data[pos], 4);
96 match_u32_assert(uint32_t x, const char *where)
98 unsigned int y = get_u32();
101 fprintf(stderr, "%s: 0x%x: expected i%u, got i%u\n", where, pos - 4, x, y);
105 #define match_u32_assert(x) match_u32_assert(x, WHERE)
107 static bool __attribute__((unused))
108 match_u64(uint64_t x)
116 static void __attribute__((unused))
117 match_u64_assert(uint64_t x, const char *where)
119 unsigned long long int y = get_u64();
122 fprintf(stderr, "%s: 0x%x: expected u64:%llu, got u64:%llu\n", where, pos - 8, x, y);
126 #define match_u64_assert(x) match_u64_assert(x, WHERE)
128 static bool __attribute__((unused))
129 match_be32(uint32_t x)
138 match_be32_assert(uint32_t x, const char *where)
140 unsigned int y = get_be32();
143 fprintf(stderr, "%s: 0x%x: expected be%u, got be%u\n", where, pos - 4, x, y);
147 #define match_be32_assert(x) match_be32_assert(x, WHERE)
150 match_byte(uint8_t b)
152 if (pos < n && data[pos] == b)
162 match_byte_assert(uint8_t b, const char *where)
166 fprintf(stderr, "%s: 0x%x: expected %02x, got %02x\n", where, pos, b, data[pos]);
170 #define match_byte_assert(b) match_byte_assert(b, WHERE)
177 match_byte_assert(1);
181 static bool __attribute__((unused))
182 all_utf8(const char *p_)
184 const uint8_t *p = (const uint8_t *) p_;
185 size_t len = strlen ((char *) p);
186 for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
190 mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
191 if ((uc < 32 && uc != '\n') || uc == 127 || uc == 0xfffd)
198 get_string(const char *where)
201 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
202 /*&& all_ascii(&data[pos + 4], data[pos])*/)
204 int len = data[pos] + data[pos + 1] * 256;
205 char *s = malloc(len + 1);
207 memcpy(s, &data[pos + 4], len);
214 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
218 #define get_string() get_string(WHERE)
221 get_string_be(const char *where)
224 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
225 /*&& all_ascii(&data[pos + 4], data[pos])*/)
227 int len = data[pos + 2] * 256 + data[pos + 3];
228 char *s = malloc(len + 1);
230 memcpy(s, &data[pos + 4], len);
237 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
241 #define get_string_be() get_string_be(WHERE)
250 static void __attribute__((unused))
251 hex_dump(int ofs, int n)
253 for (int i = 0; i < n; i++)
255 int c = data[ofs + i];
264 for (int i = 0; i < n; i++)
266 int c = data[ofs + i];
267 printf("%c", c >= 32 && c < 127 ? c : '.');
272 dump_counted_string(void)
274 int inner_end = get_end();
275 if (pos == inner_end)
281 match_byte_assert(0x58);
287 if (match_byte(0x31))
290 match_byte_assert(0x58);
291 if (pos != inner_end)
293 fprintf(stderr, "inner end discrepancy\n");
300 dump_style(FILE *stream)
302 if (match_byte(0x58))
305 match_byte_assert(0x31);
307 printf (" bold=\"yes\"");
309 printf (" italic=\"yes\"");
311 printf (" underline=\"yes\"");
313 printf (" show=\"no\"");
314 char *fg = get_string(); /* foreground */
315 char *bg = get_string(); /* background */
316 char *font = get_string(); /* font */
317 int size = get_byte() * (72. / 96.);
318 fprintf(stream, " fgcolor=\"%s\" bgcolor=\"%s\" font=\"%s\" size=\"%dpt\"",
323 dump_style2(FILE *stream)
325 if (match_byte(0x58))
328 match_byte_assert(0x31);
329 uint32_t halign = get_u32();
330 printf (" halign=\"%s\"",
331 halign == 0 ? "center"
332 : halign == 2 ? "left"
333 : halign == 4 ? "right"
334 : halign == 6 ? "decimal"
335 : halign == 0xffffffad ? "mixed"
337 int valign = get_u32();
338 printf (" valign=\"%s\"",
339 valign == 0 ? "center"
340 : valign == 1 ? "top"
341 : valign == 3 ? "bottom"
343 printf (" offset=\"%gpt\"", get_double());
348 printf (" margins=\"%d %d %d %d\"", l, r, t, b);
352 dump_nested_string(FILE *stream)
356 match_byte_assert (0);
357 match_byte_assert (0);
358 int outer_end = get_end();
359 s = dump_counted_string();
361 fprintf(stream, " \"%s\"", s);
363 match_byte_assert(0x58);
364 if (pos != outer_end)
366 fprintf(stderr, "outer end discrepancy\n");
374 dump_value_modifier(FILE *stream)
376 if (match_byte (0x31))
380 fprintf(stream, "<special0");
383 /* Corpus frequencies:
388 The given text is appended to the cell in a subscript font.
390 fprintf(stream, " subscript=\"%s\"", get_string());
393 match_u32_assert (0);
397 /* We only have one SPV file for this version (with many
404 if (!match_u32(0) && !match_u32(1) && !match_u32(2) && !match_u32(3) && !match_u32(4) && !match_u32(5) && !match_u32(6) && !match_u32(7) && !match_u32(8) && !match_u32(9))
405 match_u32_assert(10);
408 fprintf(stream, "/>\n");
412 int outer_end = get_end();
414 /* This counted-string appears to be a template string,
415 e.g. "Design\: [:^1:]1 Within Subjects Design\: [:^1:]2". */
416 char *template = dump_counted_string();
418 fprintf(stream, " template=\"%s\"", template);
422 if (pos != outer_end)
424 fprintf(stderr, "outer end discrepancy\n");
427 fprintf(stream, "/>\n");
431 int count = get_u32();
432 fprintf(stream, "<footnote-ref indexes=\"");
433 for (int i = 0; i < count; i++)
437 fprintf(stream, "%d", get_u16());
440 match_byte_assert(0);
441 match_byte_assert(0);
442 dump_nested_string(stream);
443 fprintf(stream, "/>\n");
447 match_byte_assert (0x58);
451 format_to_string (int type)
457 case 2: return "AHEX";
458 case 3: return "COMMA";
459 case 4: return "DOLLAR";
460 case 5: case 40: return "F";
462 case 7: return "PIBHEX";
464 case 9: return "PIB";
465 case 10: return "PK";
466 case 11: return "RB";
467 case 12: return "RBHEX";
471 case 20: return "DATE";
472 case 21: return "TIME";
473 case 22: return "DATETIME";
474 case 23: return "ADATE";
475 case 24: return "JDATE";
476 case 25: return "DTIME";
477 case 26: return "WKDAY";
478 case 27: return "MONTH";
479 case 28: return "MOYR";
480 case 29: return "QYR";
481 case 30: return "WKYR";
482 case 31: return "PCT";
483 case 32: return "DOT";
484 case 33: return "CCA";
485 case 34: return "CCB";
486 case 35: return "CCC";
487 case 36: return "CCD";
488 case 37: return "CCE";
489 case 38: return "EDATE";
490 case 39: return "SDATE";
493 sprintf(tmp, "<%d>", type);
499 dump_value(FILE *stream, int level)
506 for (int i = 0; i <= level; i++)
507 fprintf (stream, " ");
514 dump_value_modifier(stream);
516 value = get_double ();
517 fprintf (stream, "<number value=\"%.*g\" format=\"%s%d.%d\"/>\n",
518 DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
520 else if (match_byte (2))
526 dump_value_modifier (stream);
528 value = get_double ();
530 vallab = get_string ();
531 fprintf (stream, "<numeric-datum value=\"%.*g\" format=\"%s%d.%d\"",
532 DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
534 fprintf (stream, " variable=\"%s\"", var);
536 fprintf (stream, " label=\"%s\"", vallab);
537 fprintf (stream, "/>\n");
538 if (!match_byte (1) && !match_byte(2))
539 match_byte_assert (3);
541 else if (match_byte (3))
543 char *text = get_string();
544 dump_value_modifier(stream);
545 char *identifier = get_string();
546 char *text_eng = get_string();
547 fprintf (stream, "<string c=\"%s\"", text_eng);
549 fprintf (stream, " identifier=\"%s\"", identifier);
550 if (strcmp(text_eng, text))
551 fprintf (stream, " local=\"%s\"", text);
552 fprintf (stream, "/>\n");
554 match_byte_assert(1);
556 else if (match_byte (4))
559 char *var, *vallab, *value;
561 dump_value_modifier(stream);
563 vallab = get_string ();
565 if (!match_byte(1) && !match_byte(2))
566 match_byte_assert (3);
567 value = get_string ();
568 fprintf (stream, "<string-datum value=\"%s\" format=\"%s%d.%d\"",
569 value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
571 fprintf (stream, " variable=\"%s\"", var);
573 fprintf (stream, " label=\"%s\"/>\n", vallab);
574 fprintf (stream, "/>\n");
576 else if (match_byte (5))
578 dump_value_modifier(stream);
579 char *name = get_string ();
580 char *label = get_string ();
581 fprintf (stream, "<variable name=\"%s\"", name);
583 fprintf (stream, " label=\"%s\"", label);
584 fprintf (stream, "/>\n");
585 if (!match_byte(1) && !match_byte(2))
586 match_byte_assert(3);
590 dump_value_modifier(stream);
592 char *base = get_string();
594 fprintf (stream, "<template format=\"%s\">\n", base);
595 for (int i = 0; i < x; i++)
602 for (int j = 0; j <= level + 1; j++)
603 fprintf (stream, " ");
604 fprintf (stream, "<substitution index=\"%d\">\n", i + 1);
605 for (int j = 0; j < y; j++)
606 dump_value (stream, level + 2);
607 for (int j = 0; j <= level + 1; j++)
608 fprintf (stream, " ");
609 fprintf (stream, "</substitution>\n");
611 for (int j = 0; j <= level; j++)
612 fprintf (stream, " ");
613 fprintf (stream, "</template>\n");
618 compare_int(const void *a_, const void *b_)
622 return *a < *b ? -1 : *a > *b;
626 check_permutation(int *a, int n, const char *name)
629 memcpy(b, a, n * sizeof *a);
630 qsort(b, n, sizeof *b, compare_int);
631 for (int i = 0; i < n; i++)
634 fprintf(stderr, "bad %s permutation:", name);
635 for (int i = 0; i < n; i++)
636 fprintf(stderr, " %d", a[i]);
643 dump_category(FILE *stream, int level, int **indexes, int *allocated_indexes,
646 for (int i = 0; i <= level; i++)
647 fprintf (stream, " ");
648 printf ("<category>\n");
649 dump_value (stream, level + 1);
651 int merge = data[pos];
653 match_byte_assert (1);
655 match_byte_assert (0);
657 int unindexed = data[pos];
659 match_byte_assert (1);
664 match_u32_assert (2);
666 int indx = get_u32();
667 int n_categories = get_u32();
672 for (int i = 0; i <= level + 1; i++)
673 fprintf (stream, " ");
674 fprintf (stream, "<merge/>\n");
681 fprintf(stderr, "index not -1 but merged\n");
686 fprintf(stderr, "index not -1 but x != 2\n");
689 if (n_categories != 0)
691 fprintf(stderr, "index not -1 but subcategories\n");
694 if (*n_indexes >= *allocated_indexes)
696 *allocated_indexes = *allocated_indexes ? 2 * *allocated_indexes : 16;
697 *indexes = realloc(*indexes, *allocated_indexes * sizeof **indexes);
699 (*indexes)[(*n_indexes)++] = indx;
702 int expected_unindexed = indx == -1;
703 if (unindexed != expected_unindexed)
705 fprintf(stderr, "unindexed (%d) mismatch with indx (%d)\n",
710 if (n_categories == 0)
712 for (int i = 0; i <= level + 1; i++)
713 fprintf (stream, " ");
714 fprintf (stream, "<category-index>%d</category-index>\n", indx);
716 for (int i = 0; i < n_categories; i++)
717 dump_category (stream, level + 1, indexes, allocated_indexes, n_indexes);
718 for (int i = 0; i <= level; i++)
719 fprintf (stream, " ");
720 printf ("</category>\n");
728 printf ("<dimension index=\"%d\">\n", indx);
729 dump_value (stdout, 0);
731 /* This byte is usually 0 but many other values have been spotted. */
734 if (!match_byte(0) && !match_byte(1))
735 match_byte_assert(2);
739 match_byte_assert(1);
741 match_byte_assert(1);
742 match_byte_assert(1);
743 if (!match_u32(UINT32_MAX))
744 match_u32_assert(indx);
745 n_categories = get_u32();
749 int allocated_indexes = 0;
750 for (int i = 0; i < n_categories; i++)
751 dump_category (stdout, 0, &indexes, &allocated_indexes, &n_indexes);
752 check_permutation(indexes, n_indexes, "categories");
754 fprintf (stdout, "</dimension>\n");
759 static int dim_n_cats[64];
760 #define MAX_DIMS (sizeof dim_n_cats / sizeof *dim_n_cats)
766 assert(n_dims < MAX_DIMS);
767 for (int i = 0; i < n_dims; i++)
768 dim_n_cats[i] = dump_dim (i);
774 /* The first three numbers add to the number of dimensions. */
777 int c = n_dims - l - r;
780 /* The next n_dims numbers are a permutation of the dimension numbers. */
782 for (int i = 0; i < n_dims; i++)
787 const char *name = i < l ? "layer" : i < l + r ? "row" : "column";
788 printf ("<%s dimension=\"%d\"/>\n", name, dim);
790 check_permutation(a, n_dims, "dimensions");
794 for (int i = 0; i < x; i++)
796 unsigned int indx = get_u32();
797 printf (" <datum index=\"%d\" coords=", indx);
799 int coords[MAX_DIMS];
800 for (int i = n_dims; i-- > 0; )
802 coords[i] = indx % dim_n_cats[i];
803 indx /= dim_n_cats[i];
805 for (int i = 0; i < n_dims; i++)
806 printf("%c%d", i ? ',' : '"', coords[i]);
812 dump_value(stdout, 1);
813 fprintf (stdout, " </datum>\n");
815 printf ("</data>\n");
821 printf ("<title-local>\n");
822 dump_value(stdout, 0);
824 printf ("</title-local>\n");
826 printf ("<subtype>\n");
827 dump_value(stdout, 0);
829 printf ("</subtype>\n");
831 match_byte_assert(0x31);
833 printf ("<title-c>\n");
834 dump_value(stdout, 0);
836 printf ("</title-c>\n");
838 if (match_byte(0x31))
840 printf ("<user-caption>\n");
841 dump_value(stdout, 0);
842 printf ("</user-caption>\n");
845 match_byte_assert(0x58);
846 if (match_byte(0x31))
848 printf ("<caption>\n");
849 dump_value(stdout, 0);
850 printf ("</caption>\n");
853 match_byte_assert(0x58);
855 int n_footnotes = get_u32();
856 for (int i = 0; i < n_footnotes; i++)
858 printf ("<footnote index=\"%d\">\n", i);
859 dump_value(stdout, 0);
860 /* Custom footnote marker string. */
861 if (match_byte (0x31))
862 dump_value(stdout, 0);
864 match_byte_assert (0x58);
866 printf ("</footnote>\n");
874 for (int i = 1; i <= 8; i++)
876 printf ("<style index=\"%d\"", i);
877 match_byte_assert(i);
878 match_byte_assert(0x31);
879 printf(" font=\"%s\"", get_string());
881 printf(" size=\"%gpt\"", get_float());
883 int style = get_u32();
885 printf(" bold=\"true\"");
887 printf(" italic=\"true\"");
889 bool underline = data[pos++];
891 printf(" underline=\"true\"");
893 int halign = get_u32();
894 printf(" halign=%d", halign);
896 int valign = get_u32();
897 printf(" valign=%d", valign);
899 printf (" fgcolor=\"%s\"", get_string());
900 printf (" bgcolor=\"%s\"", get_string());
903 match_byte_assert(1);
905 char *alt_fgcolor = get_string();
907 printf (" altfg=\"%s\"", alt_fgcolor);
908 char *alt_bgcolor = get_string();
910 printf (" altbg=\"%s\"", alt_bgcolor);
914 printf(" margins=\"");
915 for (int i = 0; i < 4; i++)
919 printf("%d", get_u32());
928 int x1_end = pos + x1;
929 printf("<borders>\n");
930 match_be32_assert(1);
931 int n_borders = get_be32();
932 for (int i = 0; i < n_borders; i++)
934 int type = get_be32();
935 int stroke = get_be32();
936 int color = get_be32();
937 printf(" <border type=\"%d\" stroke=\"%s\" color=\"#%06x\"/>\n",
939 (stroke == 0 ? "none"
940 : stroke == 1 ? "solid"
941 : stroke == 2 ? "dashed"
942 : stroke == 3 ? "thick"
943 : stroke == 4 ? "thin"
944 : stroke == 5 ? "double"
948 bool grid = get_byte();
950 printf(" <grid show=\"%s\"/>\n", grid ? "yes" : "no");
951 printf("</borders>\n");
952 assert(pos == x1_end);
954 int skip = get_u32();
955 assert(skip == 18 || skip == 25);
959 int x3_end = pos + x3;
962 match_be32_assert(1);
964 printf("<settings layer=\"%d\"", get_be32());
966 printf(" skipempty=\"false\"");
968 printf(" showdimensionincorner=\"false\"");
970 printf(" markers=\"numeric\"");
972 printf(" footnoteposition=\"subscript\"");
974 int nbytes = get_be32();
976 hex_dump(pos, nbytes);
979 char *notes = get_string_be();
981 printf(" notes=\"%s\"", notes);
982 char *look = get_string_be();
984 printf(" look=\"%s\"", look);
989 /* Manual column widths, if present. */
990 int count = get_u32();
993 printf("<columnwidths>");
994 for (int i = 0; i < count; i++)
998 printf("%d", get_u32());
1000 printf("</columnwidths>\n");
1003 const char *locale = get_string();
1004 printf ("<locale>%s</locale>\n", locale);
1006 get_u32(); /* Seen: 0, UINT32_MAX, 2, 3, 4, 5, 6, 8, 9, 21, 24. */
1008 match_byte_assert(1);
1009 match_byte_assert(0);
1011 match_byte_assert(1);
1012 printf("<epoch>%d</epoch>\n", get_u32());
1014 int decimal = data[pos];
1015 int grouping = data[pos + 1];
1016 if (match_byte('.'))
1018 if (!match_byte(',') && !match_byte('\''))
1019 match_byte_assert(' ');
1023 match_byte_assert(',');
1024 if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
1025 match_byte_assert(0);
1027 printf("<format decimal=\"%c\" grouping=\"", decimal);
1033 for (int i = 0; i < 5; i++)
1034 printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
1037 match_u32_assert(0);
1039 /* The last chunk is an outer envelope that contains two inner envelopes.
1040 The second inner envelope has some interesting data like the encoding and
1044 int outer_end = get_end();
1046 /* First inner envelope: byte*33 int[n] int*[n]. */
1047 int inner_len = get_u32();
1048 int inner_end = pos + inner_len;
1049 int array_start = pos + 33;
1050 match_byte_assert(0);
1051 pos++; /* 0, 1, 10 seen. */
1052 match_byte_assert(0);
1053 pos++; /* 0...11 seen. */
1054 if (!match_byte(0) && !match_byte(1) && !match_byte(2))
1055 match_byte_assert(3);
1056 if (!match_byte(0) && !match_byte(2))
1057 match_byte_assert(3);
1059 match_u64_assert(UINT64_MAX);
1060 match_u32_assert(0);
1061 match_u32_assert(0);
1062 match_u32_assert(0);
1063 match_u32_assert(0);
1064 match_byte_assert(0);
1066 match_byte_assert(1);
1067 match_byte_assert(1);
1071 while (pos < inner_end)
1072 printf(" %d", get_u32());
1077 /* Second inner envelope. */
1078 assert(get_end() == outer_end);
1080 match_byte_assert(1);
1081 match_byte_assert(0);
1082 if (!match_byte(3) && !match_byte(4))
1083 match_byte_assert(5);
1084 match_byte_assert(0);
1085 match_byte_assert(0);
1086 match_byte_assert(0);
1088 printf("<command>%s</command>\n", get_string());
1089 printf("<subcommand>%s</subcommand>\n", get_string());
1090 printf("<language>%s</language>\n", get_string());
1091 printf("<charset>%s</charset>\n", get_string());
1092 printf("<locale>%s</locale>\n", get_string());
1095 match_byte_assert(1);
1096 match_byte_assert(0);
1098 match_byte_assert(1);
1100 match_byte_assert(1);
1102 printf("<epoch2>%d</epoch2>\n", get_u32());
1104 if (match_byte('.'))
1106 if (!match_byte(',') && !match_byte('\''))
1107 match_byte_assert(' ');
1111 match_byte_assert(',');
1112 if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
1113 match_byte_assert(0);
1116 printf ("small: %g\n", get_double());
1118 match_byte_assert(1);
1119 if (outer_end - pos > 6)
1121 /* There might be a pair of strings representing a dataset and
1122 datafile name, or there might be a set of custom currency strings.
1123 The custom currency strings start with a pair of integers, so we
1124 can distinguish these from a string by checking for a null byte; a
1125 small 32-bit integer will always contain a null and a text string
1128 int len = get_u32();
1129 bool has_dataset = !memchr(&data[pos], '\0', len);
1134 printf("<dataset>%s</dataset>\n", get_string());
1135 printf("<datafile>%s</datafile>\n", get_string());
1137 match_u32_assert(0);
1139 time_t date = get_u32();
1140 struct tm tm = *localtime(&date);
1142 strftime(s, sizeof s, "%a, %d %b %Y %H:%M:%S %z", &tm);
1143 printf("<date>%s</date>\n", s);
1145 match_u32_assert(0);
1151 for (int i = 0; i < 5; i++)
1152 printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
1155 match_u32_assert(0);
1157 match_byte_assert('.');
1159 match_byte_assert(1);
1161 if (pos < outer_end)
1163 printf("<seed>%d</seed>\n", get_u32());
1164 match_u32_assert(0);
1166 assert(pos == outer_end);
1177 main(int argc, char *argv[])
1181 fprintf (stderr, "usage: %s FILE.bin", argv[0]);
1185 int fd = open(argv[1], O_RDONLY);
1188 fprintf (stderr, "%s: open failed (%s)", argv[1], strerror (errno));
1205 if (read(fd, data, n) != n)
1213 match_byte_assert(1);
1214 match_byte_assert(0);
1216 version = get_u32();
1217 assert(version == 1 || version == 3);
1219 match_byte_assert(1);
1220 for (int i = 0; i < 4; i++)
1224 int min_col_width = get_u32();
1225 int max_col_width = get_u32();
1226 int min_row_width = get_u32();
1227 int max_row_width = get_u32();
1228 printf("<label-width min-col=\"%d\" max-col=\"%d\" min-row=\"%d\" "
1229 "max-row=\"%d\"/>\n",
1230 min_col_width, max_col_width,
1231 min_row_width, max_row_width);
1234 printf("<tableid>%lld</tableid>", get_u64());
1243 fprintf (stderr, "%x / %x\n", pos, n);