13 #include "u8-mbtouc.h"
15 static const char *filename;
24 #define STR(x) XSTR(x)
25 #define WHERE __FILE__":" STR(__LINE__)
37 memcpy(&x, &data[pos], 4);
42 static unsigned long long int
46 memcpy(&x, &data[pos], 8);
55 x = (data[pos] << 24) | (data[pos + 1] << 16) | (data[pos + 2] << 8) | data[pos + 3];
64 memcpy(&x, &data[pos], 2);
73 memcpy(&x, &data[pos], 8);
78 static double __attribute__((unused))
82 memcpy(&x, &data[pos], 4);
97 match_u32_assert(uint32_t x, const char *where)
99 unsigned int y = get_u32();
102 fprintf(stderr, "%s: 0x%x: expected i%u, got i%u\n", where, pos - 4, x, y);
106 #define match_u32_assert(x) match_u32_assert(x, WHERE)
108 static bool __attribute__((unused))
109 match_u64(uint64_t x)
117 static void __attribute__((unused))
118 match_u64_assert(uint64_t x, const char *where)
120 unsigned long long int y = get_u64();
123 fprintf(stderr, "%s: 0x%x: expected u64:%lu, got u64:%llu\n", where, pos - 8, x, y);
127 #define match_u64_assert(x) match_u64_assert(x, WHERE)
129 static bool __attribute__((unused))
130 match_be32(uint32_t x)
139 match_be32_assert(uint32_t x, const char *where)
141 unsigned int y = get_be32();
144 fprintf(stderr, "%s: 0x%x: expected be%u, got be%u\n", where, pos - 4, x, y);
148 #define match_be32_assert(x) match_be32_assert(x, WHERE)
151 match_byte(uint8_t b)
153 if (pos < n && data[pos] == b)
163 match_byte_assert(uint8_t b, const char *where)
167 fprintf(stderr, "%s: 0x%x: expected %02x, got %02x\n", where, pos, b, data[pos]);
171 #define match_byte_assert(b) match_byte_assert(b, WHERE)
174 match_bytes(int start, const int *bytes, size_t n_bytes)
176 for (size_t i = 0; i < n_bytes; i++)
177 if (bytes[i] >= 0 && data[start + i] != bytes[i])
183 xmemdup0(const void *p, size_t n)
185 char *s = malloc(n + 1);
196 match_byte_assert(1);
200 static bool __attribute__((unused))
203 return (p >= ' ' && p < 127) || p == '\r' || p == '\n' || p == '\t';
206 static bool __attribute__((unused))
207 all_utf8(const char *p_, size_t len)
209 const uint8_t *p = (const uint8_t *) p_;
210 for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
214 mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
215 if ((uc < 32 && uc != '\n') || uc == 127 || uc == 0xfffd)
222 get_string(const char *where)
225 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
226 /*&& all_ascii(&data[pos + 4], data[pos])*/)
228 int len = data[pos] + data[pos + 1] * 256;
229 char *s = malloc(len + 1);
231 memcpy(s, &data[pos + 4], len);
238 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
242 #define get_string() get_string(WHERE)
245 get_string_be(const char *where)
248 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
249 /*&& all_ascii(&data[pos + 4], data[pos])*/)
251 int len = data[pos + 2] * 256 + data[pos + 3];
252 char *s = malloc(len + 1);
254 memcpy(s, &data[pos + 4], len);
261 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
265 #define get_string_be() get_string_be(WHERE)
274 static void __attribute__((unused))
275 hex_dump(FILE *stream, int ofs, int n)
278 for (int i = 0; i < n; i++)
280 int c = data[ofs + i];
281 n_ascii += is_ascii(c);
282 fprintf(stream, " %02x", c);
287 for (int i = 0; i < n; i++)
289 int c = data[ofs + i];
290 putc(c >= 32 && c < 127 ? c : '.', stream);
296 static void __attribute__((unused))
297 char_dump(FILE *stream, int ofs, int n)
299 for (int i = 0; i < n; i++)
301 int c = data[ofs + i];
302 putc(c >= 32 && c < 127 ? c : '.', stream);
308 dump_counted_string(void)
310 int inner_end = get_end();
311 if (pos == inner_end)
317 match_byte_assert(0x58);
323 if (match_byte(0x31))
326 match_byte_assert(0x58);
327 if (pos != inner_end)
329 fprintf(stderr, "inner end discrepancy\n");
336 dump_style(FILE *stream)
338 if (match_byte(0x58))
341 match_byte_assert(0x31);
343 printf (" bold=\"yes\"");
345 printf (" italic=\"yes\"");
347 printf (" underline=\"yes\"");
349 printf (" show=\"no\"");
350 char *fg = get_string(); /* foreground */
351 char *bg = get_string(); /* background */
352 char *font = get_string(); /* font */
353 int size = get_byte() * (72. / 96.);
354 fprintf(stream, " fgcolor=\"%s\" bgcolor=\"%s\" font=\"%s\" size=\"%dpt\"",
359 dump_style2(FILE *stream)
361 if (match_byte(0x58))
364 match_byte_assert(0x31);
365 uint32_t halign = get_u32();
366 printf (" halign=\"%s\"",
367 halign == 0 ? "center"
368 : halign == 2 ? "left"
369 : halign == 4 ? "right"
370 : halign == 6 ? "decimal"
371 : halign == 0xffffffad ? "mixed"
373 int valign = get_u32();
374 printf (" valign=\"%s\"",
375 valign == 0 ? "center"
376 : valign == 1 ? "top"
377 : valign == 3 ? "bottom"
379 printf (" offset=\"%gpt\"", get_double());
384 printf (" margins=\"%d %d %d %d\"", l, r, t, b);
388 dump_nested_string(FILE *stream)
392 match_byte_assert (0);
393 match_byte_assert (0);
394 int outer_end = get_end();
395 s = dump_counted_string();
397 fprintf(stream, " \"%s\"", s);
399 match_byte_assert(0x58);
400 if (pos != outer_end)
402 fprintf(stderr, "outer end discrepancy\n");
410 dump_value_modifier(FILE *stream)
412 if (match_byte (0x31))
416 fprintf(stream, "<special0");
419 /* Corpus frequencies:
424 The given text is appended to the cell in a subscript font.
426 fprintf(stream, " subscript=\"%s\"", get_string());
429 match_u32_assert (0);
433 /* We only have one SPV file for this version (with many
440 if (!match_u32(0) && !match_u32(1) && !match_u32(2) && !match_u32(3) && !match_u32(4) && !match_u32(5) && !match_u32(6) && !match_u32(7) && !match_u32(8) && !match_u32(9))
441 match_u32_assert(10);
444 fprintf(stream, "/>\n");
448 int outer_end = get_end();
450 /* This counted-string appears to be a template string,
451 e.g. "Design\: [:^1:]1 Within Subjects Design\: [:^1:]2". */
452 char *template = dump_counted_string();
454 fprintf(stream, " template=\"%s\"", template);
458 if (pos != outer_end)
460 fprintf(stderr, "outer end discrepancy\n");
463 fprintf(stream, "/>\n");
467 int count = get_u32();
468 fprintf(stream, "<footnote-ref indexes=\"");
469 for (int i = 0; i < count; i++)
473 fprintf(stream, "%d", get_u16());
476 match_byte_assert(0);
477 match_byte_assert(0);
478 dump_nested_string(stream);
479 fprintf(stream, "/>\n");
483 match_byte_assert (0x58);
487 format_to_string (int type)
493 case 2: return "AHEX";
494 case 3: return "COMMA";
495 case 4: return "DOLLAR";
496 case 5: case 40: return "F";
498 case 7: return "PIBHEX";
500 case 9: return "PIB";
501 case 10: return "PK";
502 case 11: return "RB";
503 case 12: return "RBHEX";
507 case 20: return "DATE";
508 case 21: return "TIME";
509 case 22: return "DATETIME";
510 case 23: return "ADATE";
511 case 24: return "JDATE";
512 case 25: return "DTIME";
513 case 26: return "WKDAY";
514 case 27: return "MONTH";
515 case 28: return "MOYR";
516 case 29: return "QYR";
517 case 30: return "WKYR";
518 case 31: return "PCT";
519 case 32: return "DOT";
520 case 33: return "CCA";
521 case 34: return "CCB";
522 case 35: return "CCC";
523 case 36: return "CCD";
524 case 37: return "CCE";
525 case 38: return "EDATE";
526 case 39: return "SDATE";
529 sprintf(tmp, "<%d>", type);
535 dump_value(FILE *stream, int level)
542 for (int i = 0; i <= level; i++)
543 fprintf (stream, " ");
545 printf ("%02x: value (%d)\n", pos, data[pos]);
551 dump_value_modifier(stream);
553 value = get_double ();
554 fprintf (stream, "<number value=\"%.*g\" format=\"%s%d.%d\"/>\n",
555 DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
557 else if (match_byte (2))
563 dump_value_modifier (stream);
565 value = get_double ();
567 vallab = get_string ();
568 fprintf (stream, "<numeric-datum value=\"%.*g\" format=\"%s%d.%d\"",
569 DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
571 fprintf (stream, " variable=\"%s\"", var);
573 fprintf (stream, " label=\"%s\"", vallab);
574 fprintf (stream, "/>\n");
575 if (!match_byte (1) && !match_byte(2))
576 match_byte_assert (3);
578 else if (match_byte (3))
580 char *text = get_string();
581 dump_value_modifier(stream);
582 char *identifier = get_string();
583 char *text_eng = get_string();
584 fprintf (stream, "<string c=\"%s\"", text_eng);
586 fprintf (stream, " identifier=\"%s\"", identifier);
587 if (strcmp(text_eng, text))
588 fprintf (stream, " local=\"%s\"", text);
589 fprintf (stream, "/>\n");
591 match_byte_assert(1);
593 else if (match_byte (4))
596 char *var, *vallab, *value;
598 dump_value_modifier(stream);
600 vallab = get_string ();
602 if (!match_byte(1) && !match_byte(2))
603 match_byte_assert (3);
604 value = get_string ();
605 fprintf (stream, "<string-datum value=\"%s\" format=\"%s%d.%d\"",
606 value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
608 fprintf (stream, " variable=\"%s\"", var);
610 fprintf (stream, " label=\"%s\"/>\n", vallab);
611 fprintf (stream, "/>\n");
613 else if (match_byte (5))
615 dump_value_modifier(stream);
616 char *name = get_string ();
617 char *label = get_string ();
618 fprintf (stream, "<variable name=\"%s\"", name);
620 fprintf (stream, " label=\"%s\"", label);
621 fprintf (stream, "/>\n");
622 if (!match_byte(1) && !match_byte(2))
623 match_byte_assert(3);
627 printf ("else %#x\n", pos);
628 dump_value_modifier(stream);
630 char *base = get_string();
632 fprintf (stream, "<template format=\"%s\">\n", base);
633 for (int i = 0; i < x; i++)
640 for (int j = 0; j <= level + 1; j++)
641 fprintf (stream, " ");
642 fprintf (stream, "<substitution index=\"%d\">\n", i + 1);
643 for (int j = 0; j < y; j++)
644 dump_value (stream, level + 2);
645 for (int j = 0; j <= level + 1; j++)
646 fprintf (stream, " ");
647 fprintf (stream, "</substitution>\n");
649 for (int j = 0; j <= level; j++)
650 fprintf (stream, " ");
651 fprintf (stream, "</template>\n");
656 compare_int(const void *a_, const void *b_)
660 return *a < *b ? -1 : *a > *b;
664 check_permutation(int *a, int n, const char *name)
667 memcpy(b, a, n * sizeof *a);
668 qsort(b, n, sizeof *b, compare_int);
669 for (int i = 0; i < n; i++)
672 fprintf(stderr, "bad %s permutation:", name);
673 for (int i = 0; i < n; i++)
674 fprintf(stderr, " %d", a[i]);
681 dump_category(FILE *stream, int level, int **indexes, int *allocated_indexes,
684 for (int i = 0; i <= level; i++)
685 fprintf (stream, " ");
686 printf ("<category>\n");
687 dump_value (stream, level + 1);
689 bool merge = get_bool();
690 match_byte_assert (0);
691 int unindexed = get_bool();
696 match_u32_assert (2);
698 int indx = get_u32();
699 int n_categories = get_u32();
704 for (int i = 0; i <= level + 1; i++)
705 fprintf (stream, " ");
706 fprintf (stream, "<merge/>\n");
715 assert (n_categories == 0);
716 if (*n_indexes >= *allocated_indexes)
718 *allocated_indexes = *allocated_indexes ? 2 * *allocated_indexes : 16;
719 *indexes = realloc(*indexes, *allocated_indexes * sizeof **indexes);
721 (*indexes)[(*n_indexes)++] = indx;
724 if (n_categories == 0)
726 for (int i = 0; i <= level + 1; i++)
727 fprintf (stream, " ");
728 fprintf (stream, "<category-index>%d</category-index>\n", indx);
730 for (int i = 0; i < n_categories; i++)
731 dump_category (stream, level + 1, indexes, allocated_indexes, n_indexes);
732 for (int i = 0; i <= level; i++)
733 fprintf (stream, " ");
734 printf ("</category>\n");
742 printf ("<dimension index=\"%d\">\n", indx);
743 dump_value (stdout, 0);
745 /* This byte is usually 0 but many other values have been spotted.
746 No visible effect. */
749 /* This byte can cause data to be oddly replicated. */
750 if (!match_byte(0) && !match_byte(1))
751 match_byte_assert(2);
756 bool show_dim_label = get_bool();
758 printf(" <show-dim-label/>\n");
760 bool hide_all_labels = get_bool();
762 printf(" <hide-all-labels/>\n");
764 match_byte_assert(1);
765 if (!match_u32(UINT32_MAX))
766 match_u32_assert(indx);
768 n_categories = get_u32();
772 int allocated_indexes = 0;
773 for (int i = 0; i < n_categories; i++)
774 dump_category (stdout, 0, &indexes, &allocated_indexes, &n_indexes);
775 check_permutation(indexes, n_indexes, "categories");
777 fprintf (stdout, "</dimension>\n");
782 static int dim_n_cats[64];
783 #define MAX_DIMS (sizeof dim_n_cats / sizeof *dim_n_cats)
789 assert(n_dims < MAX_DIMS);
790 for (int i = 0; i < n_dims; i++)
791 dim_n_cats[i] = dump_dim (i);
797 /* The first three numbers add to the number of dimensions. */
800 int c = n_dims - l - r;
803 /* The next n_dims numbers are a permutation of the dimension numbers. */
805 for (int i = 0; i < n_dims; i++)
810 const char *name = i < l ? "layer" : i < l + r ? "row" : "column";
811 printf ("<%s dimension=\"%d\"/>\n", name, dim);
813 check_permutation(a, n_dims, "dimensions");
817 for (int i = 0; i < x; i++)
819 unsigned int indx = get_u32();
820 printf (" <datum index=\"%d\" coords=", indx);
822 int coords[MAX_DIMS];
823 for (int i = n_dims; i-- > 0; )
825 coords[i] = indx % dim_n_cats[i];
826 indx /= dim_n_cats[i];
828 for (int i = 0; i < n_dims; i++)
829 printf("%c%d", i ? ',' : '"', coords[i]);
835 dump_value(stdout, 1);
836 fprintf (stdout, " </datum>\n");
838 printf ("</data>\n");
844 printf ("<title-local>\n");
845 dump_value(stdout, 0);
847 printf ("</title-local>\n");
849 printf ("<subtype>\n");
850 dump_value(stdout, 0);
852 printf ("</subtype>\n");
854 match_byte_assert(0x31);
856 printf ("<title-c>\n");
857 dump_value(stdout, 0);
859 printf ("</title-c>\n");
861 if (match_byte(0x31))
863 printf ("<user-caption>\n");
864 dump_value(stdout, 0);
865 printf ("</user-caption>\n");
868 match_byte_assert(0x58);
869 if (match_byte(0x31))
871 printf ("<caption>\n");
872 dump_value(stdout, 0);
873 printf ("</caption>\n");
876 match_byte_assert(0x58);
878 int n_footnotes = get_u32();
879 for (int i = 0; i < n_footnotes; i++)
881 printf ("<footnote index=\"%d\">\n", i);
882 dump_value(stdout, 0);
883 /* Custom footnote marker string. */
884 if (match_byte (0x31))
885 dump_value(stdout, 0);
887 match_byte_assert (0x58);
891 /* Appears to be the number of references to a footnote. */
892 printf (" <references n=\"%d\"/>\n", n);
896 /* The user deleted the footnote references. */
897 printf (" <deleted/>\n");
901 printf ("</footnote>\n");
909 for (int i = 1; i <= 8; i++)
911 printf ("<style index=\"%d\"", i);
912 match_byte_assert(i);
913 match_byte_assert(0x31);
914 printf(" font=\"%s\"", get_string());
916 printf(" size=\"%gpt\"", get_float());
918 int style = get_u32();
920 printf(" bold=\"true\"");
922 printf(" italic=\"true\"");
924 bool underline = data[pos++];
926 printf(" underline=\"true\"");
928 int halign = get_u32();
929 printf(" halign=%d", halign);
931 int valign = get_u32();
932 printf(" valign=%d", valign);
934 printf (" fgcolor=\"%s\"", get_string());
935 printf (" bgcolor=\"%s\"", get_string());
938 match_byte_assert(1);
940 char *alt_fgcolor = get_string();
942 printf (" altfg=\"%s\"", alt_fgcolor);
943 char *alt_bgcolor = get_string();
945 printf (" altbg=\"%s\"", alt_bgcolor);
949 printf(" margins=\"");
950 for (int i = 0; i < 4; i++)
954 printf("%d", get_u32());
963 int x1_end = pos + x1;
964 printf("<borders>\n");
965 match_be32_assert(1);
966 int n_borders = get_be32();
967 for (int i = 0; i < n_borders; i++)
969 int type = get_be32();
970 int stroke = get_be32();
971 int color = get_be32();
972 printf(" <border type=\"%d\" stroke=\"%s\" color=\"#%06x\"/>\n",
974 (stroke == 0 ? "none"
975 : stroke == 1 ? "solid"
976 : stroke == 2 ? "dashed"
977 : stroke == 3 ? "thick"
978 : stroke == 4 ? "thin"
979 : stroke == 5 ? "double"
983 bool grid = get_byte();
985 printf(" <grid show=\"%s\"/>\n", grid ? "yes" : "no");
986 printf("</borders>\n");
987 assert(pos == x1_end);
989 int skip = get_u32();
990 assert(skip == 18 || skip == 25);
994 int x3_end = pos + x3;
997 match_be32_assert(1);
999 printf("<settings layer=\"%d\"", get_be32());
1001 printf(" skipempty=\"false\"");
1003 printf(" showdimensionincorner=\"false\"");
1005 printf(" markers=\"numeric\"");
1007 printf(" footnoteposition=\"subscript\"");
1009 int nbytes = get_be32();
1010 int end = pos + nbytes;
1012 while (pos + 4 <= end)
1013 printf(" %d", get_be32());
1017 char *notes = get_string_be();
1019 printf(" notes=\"%s\"", notes);
1020 char *look = get_string_be();
1022 printf(" look=\"%s\"", look);
1027 /* Manual column widths, if present. */
1028 int count = get_u32();
1031 printf("<columnwidths>");
1032 for (int i = 0; i < count; i++)
1036 printf("%d", get_u32());
1038 printf("</columnwidths>\n");
1041 const char *locale = get_string();
1042 printf ("<locale>%s</locale>\n", locale);
1044 printf ("<layer>%d</layer>\n", get_u32());
1046 match_byte_assert(1);
1048 match_byte_assert(1);
1050 match_byte_assert(1);
1051 printf("<epoch>%d</epoch>\n", get_u32());
1053 int decimal = data[pos];
1054 int grouping = data[pos + 1];
1055 if (match_byte('.'))
1057 if (!match_byte(',') && !match_byte('\''))
1058 match_byte_assert(' ');
1062 match_byte_assert(',');
1063 if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
1064 match_byte_assert(0);
1066 printf("<format decimal=\"%c\"", decimal);
1068 printf(" grouping=\"%c\"", grouping);
1072 for (int i = 0; i < 5; i++)
1073 printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
1076 match_u32_assert(0);
1078 /* The last chunk is an outer envelope that contains two inner envelopes.
1079 The second inner envelope has some interesting data like the encoding and
1081 int outer_end = get_end();
1084 /* First inner envelope: byte*33 int[n] int*[n]. */
1085 int inner_len = get_u32();
1086 int inner_end = pos + inner_len;
1087 int array_start = pos + 33;
1088 match_byte_assert(0);
1089 pos++; /* 0, 1, 10 seen. */
1092 /* 0=en 1=de 2=es 3=it 5=ko 6=pl 8=zh-tw 10=pt_BR 11=fr */
1093 printf("lang=%d ", get_byte());
1095 printf ("variable_mode=%d\n", get_byte());
1096 printf ("value_mode=%d\n", get_byte());
1098 match_u64_assert(UINT64_MAX);
1099 match_u32_assert(0);
1100 match_u32_assert(0);
1101 match_u32_assert(0);
1102 match_u32_assert(0);
1103 match_byte_assert(0);
1105 match_byte_assert(1);
1108 assert(get_end() == inner_end);
1109 printf("<heights>");
1110 int n_heights = get_u32();
1111 for (int i = 0; i < n_heights; i++)
1115 printf("%d", get_u32());
1117 printf("</heights>\n");
1119 int n_style_map = get_u32();
1120 for (int i = 0; i < n_style_map; i++)
1122 uint64_t cell = get_u64();
1123 int style = get_u16();
1124 printf("<style-map cell=\"%lu\" style=\"%d\"/>\n", cell, style);
1127 int n_styles = get_u32();
1128 for (int i = 0; i < n_styles; i++)
1130 printf("<cell-style index=\"%d\"", i);
1132 dump_style2(stdout);
1137 assert(pos == inner_end);
1139 /* Second inner envelope. */
1140 assert(get_end() == outer_end);
1142 match_byte_assert(1);
1143 match_byte_assert(0);
1144 if (!match_byte(3) && !match_byte(4))
1145 match_byte_assert(5);
1146 match_byte_assert(0);
1147 match_byte_assert(0);
1148 match_byte_assert(0);
1150 printf("<command>%s</command>\n", get_string());
1151 printf("<command-local>%s</command-local>\n", get_string());
1152 printf("<language>%s</language>\n", get_string());
1153 printf("<charset>%s</charset>\n", get_string());
1154 printf("<locale>%s</locale>\n", get_string());
1161 printf("<epoch2>%d</epoch2>\n", get_u32());
1163 if (match_byte('.'))
1165 if (!match_byte(',') && !match_byte('\''))
1166 match_byte_assert(' ');
1170 match_byte_assert(',');
1171 if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
1172 match_byte_assert(0);
1175 printf ("small: %g\n", get_double());
1177 match_byte_assert(1);
1178 if (outer_end - pos > 6)
1180 /* There might be a pair of strings representing a dataset and
1181 datafile name, or there might be a set of custom currency strings.
1182 The custom currency strings start with a pair of integers, so we
1183 can distinguish these from a string by checking for a null byte; a
1184 small 32-bit integer will always contain a null and a text string
1187 int len = get_u32();
1188 bool has_dataset = !memchr(&data[pos], '\0', len);
1193 printf("<dataset>%s</dataset>\n", get_string());
1194 printf("<datafile>%s</datafile>\n", get_string());
1196 match_u32_assert(0);
1198 time_t date = get_u32();
1199 struct tm tm = *localtime(&date);
1201 strftime(s, sizeof s, "%a, %d %b %Y %H:%M:%S %z", &tm);
1202 printf("<date>%s</date>\n", s);
1204 match_u32_assert(0);
1210 for (int i = 0; i < 5; i++)
1211 printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
1214 match_u32_assert(0);
1216 match_byte_assert('.');
1219 if (pos < outer_end)
1222 match_u32_assert(0);
1224 assert(pos == outer_end);
1228 else if (outer_end != pos)
1231 printf("<command>%s</command>\n", get_string());
1232 printf("<command-local>%s</command-local>\n", get_string());
1233 printf("<language>%s</command>\n", get_string());
1234 printf("<charset>%s</charset>\n", get_string());
1235 printf("<locale>%s</locale>\n", get_string());
1237 match_byte_assert(0);
1241 printf("<epoch2>%d</epoch2>\n", get_u32());
1242 int decimal = data[pos];
1243 int grouping = data[pos + 1];
1244 if (match_byte('.'))
1246 if (!match_byte(',') && !match_byte('\''))
1247 match_byte_assert(' ');
1251 match_byte_assert(',');
1252 if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
1253 match_byte_assert(0);
1255 printf("<format decimal=\"%c\"", decimal);
1257 printf(" grouping=\"%c\"", grouping);
1261 for (int i = 0; i < 5; i++)
1262 printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
1265 match_u32_assert(0);
1267 match_byte_assert('.');
1270 assert(pos == outer_end);
1276 format_name (int format, char *buf)
1281 case 2: return "AHEX";
1282 case 3: return "COMMA";
1283 case 4: return "DOLLAR";
1285 case 6: return "IB";
1286 case 7: return "PIBHEX";
1288 case 9: return "PIB";
1289 case 10: return "PK";
1290 case 11: return "RB";
1291 case 12: return "RBHEX";
1292 case 15: return "Z";
1293 case 16: return "N";
1294 case 17: return "E";
1295 case 20: return "DATE";
1296 case 21: return "TIME";
1297 case 22: return "DATETIME";
1298 case 23: return "ADATE";
1299 case 24: return "JDATE";
1300 case 25: return "DTIME";
1301 case 26: return "WKDAY";
1302 case 27: return "MONTH";
1303 case 28: return "MOYR";
1304 case 29: return "QYR";
1305 case 30: return "WKYR";
1306 case 31: return "PCT";
1307 case 32: return "DOT";
1308 case 33: return "CCA";
1309 case 34: return "CCB";
1310 case 35: return "CCC";
1311 case 36: return "CCD";
1312 case 37: return "CCE";
1313 case 38: return "EDATE";
1314 case 39: return "SDATE";
1315 case 40: return "MTIME";
1316 case 41: return "YMDHMS";
1317 default: sprintf(buf, "(%d)", format); return buf;
1322 main(int argc, char *argv[])
1324 bool print_offsets = false;
1327 int c = getopt (argc, argv, "o");
1334 print_offsets = true;
1341 if (argc - optind != 1)
1343 fprintf (stderr, "usage: %s FILE.bin", argv[0]);
1347 const char *filename = argv[optind];
1348 int fd = open(filename, O_RDONLY);
1351 fprintf (stderr, "%s: open failed (%s)", filename, strerror (errno));
1368 if (read(fd, data, n) != n)
1375 setvbuf (stdout, NULL, _IOLBF, 0);
1380 unsigned int prev_end = 0;
1381 for (pos = 0; pos + 50 < n; pos++)
1383 if (data[pos + 0] == 0xff &&
1384 data[pos + 1] == 0xff &&
1385 data[pos + 2] == 0 &&
1388 int len = data[pos + 4] + (data[pos + 5] << 8);
1389 if (len < 3 || pos + len + 6 >= n || !all_utf8 ((char *) &data[pos + 6], len))
1392 printf ("+%04x %04x...%04x: %-25.*s\n",
1393 pos - prev_end, pos, pos + 6 + len,
1394 len < 50 ? (int) len : 50, &data[pos + 6]);
1395 prev_end = pos + 6 + len;
1400 for (pos = 0; pos + 50 < n; pos++)
1402 if (data[pos + 0] == 'L' &&
1403 data[pos + 1] == 'o' &&
1404 data[pos + 2] == 'g' &&
1405 !all_utf8((char *) &data[pos + 3], 1) &&
1406 data[pos - 1] != 'v')
1409 printf ("%04x: ", pos);
1410 unsigned int p = pos;
1411 while (all_utf8 ((char *) &data[p], 1))
1413 hex_dump (stdout, p - 28, 38);
1417 unsigned int prev_end = 0;
1419 for (pos = 2; pos + 50 < n; pos++)
1421 static const int cell_prefix[] = {
1423 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, -1 /* 00 or 10 */, 0x00, 0x00, 0x00, 0x00, -1,
1425 /*14 15 16 17 18 19 */
1426 0x80, 0x01, -1, -1, -1, -1,
1428 size_t cell_prefix_len = sizeof cell_prefix / sizeof *cell_prefix;
1429 if (match_bytes(pos, cell_prefix, cell_prefix_len))
1431 if (prev_end != pos)
1434 printf ("%04x ", prev_end);
1435 hex_dump (stdout, prev_end, pos - prev_end);
1437 if (!strcmp (title, "DspNumber")
1438 && pos - prev_end == 2
1439 && data[prev_end + 1] == 0x80)
1441 static int already = false;
1443 fprintf (stderr, " sum=%d %02x\n", sum, data[prev_end]);
1449 printf ("cell %s%d.%d ",
1450 format_name (data[pos + 18], buf),
1454 int len = cell_prefix_len;
1455 if (data[pos + 19] == 0)
1457 assert (data[pos + 13] == 5);
1458 if (data[pos + 20] == 0)
1460 int count = (data[pos + 22]);
1461 printf ("%d %d \"%.*s\"\n",
1462 data[pos + 21], data[pos + 22],
1463 count, &data[pos + 23]);
1466 else if (data[pos + 20] == 1
1467 && data[pos + 21] == 0xff
1468 && data[pos + 22] == 0xff)
1471 printf ("%d \"%.*s\"\n", count, data[pos + 23],
1475 else if (data[pos + 20] == 1)
1477 int count = (data[pos + 21]);
1478 printf ("\"%.*s\"\n",
1479 count, &data[pos + 22]);
1485 else if (data[pos + 19] == 128 && data[pos + 20] == 2)
1487 /* pos + 13 is usually 22...53, and it's 3 more than the
1488 " xx 80" separator between cells */
1489 printf ("xxx%x ", data[pos + 13]);
1490 double d = *(double *) &data[pos + 21];
1497 sysmis = {.b = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xff}};
1503 if (data[pos + 29] < 0xff
1504 && all_utf8((char *) &data[pos + 30], data[pos + 29]))
1506 printf (" \"%.*s\"", (int) data[pos + 29],
1508 len += data[pos + 29] + 1;
1515 else if (data[pos + 19] == 128 && data[pos + 20] == 1 &&
1516 data[pos + 21] == 0)
1518 if (data[pos + 23] < 0xff
1519 && all_utf8((char *) &data[pos + 24], data[pos + 23]))
1521 printf (" \"%.*s\"\n", (int) data[pos + 23],
1523 len = 24 + data[pos + 23];
1530 printf ("xxx%d %d %d %d\n",
1531 data[pos + 19], data[pos + 20],
1532 data[pos + 21], data[pos + 22]);
1540 static const int record_prefix[] = {
1541 0xff, 0xff, 0x00, 0x00,
1543 size_t record_prefix_len = sizeof record_prefix / sizeof *record_prefix;
1544 if (match_bytes(pos, record_prefix, record_prefix_len))
1546 int len = record_prefix_len;
1547 int slen = data[pos + 4] + (data[pos + 5] << 8);
1548 if (slen >= 2 && slen < 256 && all_utf8((char *) &data[pos + 6], slen))
1550 if (prev_end != pos)
1553 printf ("%04x ", prev_end);
1554 hex_dump (stdout, prev_end, pos - prev_end);
1559 printf ("rec:%-20.*s ", slen, &data[pos + 6]);
1561 title = xmemdup0(&data[pos + 6], slen);
1562 fprintf (stderr, "%s%d ", title, data[pos + len]);
1563 sum += data[pos+len];
1571 static const int number_prefix[] = {
1574 size_t number_prefix_len = sizeof number_prefix / sizeof *number_prefix;
1575 if (match_bytes(pos, number_prefix, number_prefix_len))
1577 if (prev_end != pos)
1580 printf ("%04x ", prev_end);
1581 hex_dump (stdout, prev_end, pos - prev_end);
1585 double d = *(double *) &data[pos + number_prefix_len];
1586 printf ("float %f\n", d);
1593 if (!memcmp (&data[pos + 4], "{\\rtf", 5))
1595 int len = data[pos] + (data[pos + 1] << 8) + (data[pos + 2] << 16)
1596 + (data[pos + 3] << 24);
1597 if (len < n - pos - 4)
1599 if (prev_end != pos)
1602 printf ("%04x ", prev_end);
1603 hex_dump (stdout, prev_end, pos - prev_end);
1614 if (data[pos] && data[pos + 1] && data[pos + 2] >= 0xfe
1615 && data[pos + 3] == 0xff && data[pos + 4] && data[pos + 4] != 0xff)
1617 if (prev_end != pos)
1620 printf ("%04x ", prev_end);
1621 hex_dump (stdout, prev_end, pos - prev_end);
1625 static int prev_num;
1626 int32_t num = data[pos] + (data[pos + 1] << 8)
1627 + (data[pos + 2] << 16) + (data[pos + 3] << 24);
1628 printf ("%d (%+d) ", num, num - prev_num);
1635 static const int font_prefix[] =
1637 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x41, 0x72, 0x69, 0x61, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
1639 size_t font_prefix_len = sizeof font_prefix / sizeof *font_prefix;
1640 if (match_bytes(pos, font_prefix, font_prefix_len))
1642 if (prev_end != pos)
1645 printf ("%04x ", prev_end);
1646 hex_dump (stdout, prev_end, pos - prev_end);
1652 pos += font_prefix_len - 1;
1657 static const int string_prefix[] = {
1658 0x80, 0x01, 0x02, 0x28, 0x05, 0x00, 0x01
1660 size_t string_prefix_len = sizeof string_prefix / sizeof *string_prefix;
1661 if (match_bytes(pos, string_prefix, string_prefix_len) && data[pos + string_prefix_len] != 255)
1663 if (prev_end != pos)
1666 printf ("%04x ", prev_end);
1667 hex_dump (stdout, prev_end, pos - prev_end);
1671 int len = data[pos + 7];
1672 printf ("string %.*s\n", len, &data[pos + 8]);
1677 if (match_bytes(pos, string_prefix, string_prefix_len) && data[pos + string_prefix_len] == 255)
1679 if (prev_end != pos)
1682 printf ("%04x ", prev_end);
1683 hex_dump (stdout, prev_end, pos - prev_end);
1687 int len = data[pos + 8] + (data[pos + 9] << 8);
1688 printf ("\nlongstring %.*s\n", len, &data[pos + 10]);
1689 pos += 10 + len - 1;
1697 if (!is_ascii(data[pos]))
1700 unsigned int start = pos;
1701 unsigned int end = pos + 1;
1702 while (is_ascii(data[end]))
1705 unsigned int len = end - start;
1709 unsigned int len2 = data[start - 2] + (data[start - 1] << 8);
1710 unsigned int len3 = data[start - 1];
1712 if (len2 && len2 <= len)
1717 else if (len3 && len3 <= len)
1728 unsigned real_start = start - length_bytes;
1729 if (prev_end != real_start)
1732 printf ("%04x ", prev_end);
1733 hex_dump (stdout, prev_end, real_start - prev_end);
1736 printf ("%04x ", real_start);
1737 printf ("\"%.*s\"\n",
1738 (int) end - start, (char *) &data[start]);