13 #include "u8-mbtouc.h"
15 static const char *filename;
24 #define STR(x) XSTR(x)
25 #define WHERE __FILE__":" STR(__LINE__)
37 memcpy(&x, &data[pos], 4);
42 static unsigned long long int
46 memcpy(&x, &data[pos], 8);
55 x = (data[pos] << 24) | (data[pos + 1] << 16) | (data[pos + 2] << 8) | data[pos + 3];
64 memcpy(&x, &data[pos], 2);
73 memcpy(&x, &data[pos], 8);
78 static double __attribute__((unused))
82 memcpy(&x, &data[pos], 4);
97 match_u32_assert(uint32_t x, const char *where)
99 unsigned int y = get_u32();
102 fprintf(stderr, "%s: 0x%x: expected i%u, got i%u\n", where, pos - 4, x, y);
106 #define match_u32_assert(x) match_u32_assert(x, WHERE)
108 static bool __attribute__((unused))
109 match_u64(uint64_t x)
117 static void __attribute__((unused))
118 match_u64_assert(uint64_t x, const char *where)
120 unsigned long long int y = get_u64();
123 fprintf(stderr, "%s: 0x%x: expected u64:%lu, got u64:%llu\n", where, pos - 8, x, y);
127 #define match_u64_assert(x) match_u64_assert(x, WHERE)
129 static bool __attribute__((unused))
130 match_be32(uint32_t x)
139 match_be32_assert(uint32_t x, const char *where)
141 unsigned int y = get_be32();
144 fprintf(stderr, "%s: 0x%x: expected be%u, got be%u\n", where, pos - 4, x, y);
148 #define match_be32_assert(x) match_be32_assert(x, WHERE)
151 match_byte(uint8_t b)
153 if (pos < n && data[pos] == b)
163 match_byte_assert(uint8_t b, const char *where)
167 fprintf(stderr, "%s: 0x%x: expected %02x, got %02x\n", where, pos, b, data[pos]);
171 #define match_byte_assert(b) match_byte_assert(b, WHERE)
174 match_bytes(int start, const int *bytes, size_t n_bytes)
176 for (size_t i = 0; i < n_bytes; i++)
177 if (bytes[i] >= 0 && data[start + i] != bytes[i])
187 match_byte_assert(1);
191 static bool __attribute__((unused))
194 return (p >= ' ' && p < 127) || p == '\r' || p == '\n' || p == '\t';
197 static bool __attribute__((unused))
198 all_utf8(const char *p_, size_t len)
200 const uint8_t *p = (const uint8_t *) p_;
201 for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
205 mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
206 if ((uc < 32 && uc != '\n') || uc == 127 || uc == 0xfffd)
213 get_string(const char *where)
216 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
217 /*&& all_ascii(&data[pos + 4], data[pos])*/)
219 int len = data[pos] + data[pos + 1] * 256;
220 char *s = malloc(len + 1);
222 memcpy(s, &data[pos + 4], len);
229 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
233 #define get_string() get_string(WHERE)
236 get_string_be(const char *where)
239 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
240 /*&& all_ascii(&data[pos + 4], data[pos])*/)
242 int len = data[pos + 2] * 256 + data[pos + 3];
243 char *s = malloc(len + 1);
245 memcpy(s, &data[pos + 4], len);
252 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
256 #define get_string_be() get_string_be(WHERE)
265 static void __attribute__((unused))
266 hex_dump(FILE *stream, int ofs, int n)
269 for (int i = 0; i < n; i++)
271 int c = data[ofs + i];
272 n_ascii += is_ascii(c);
273 fprintf(stream, " %02x", c);
278 for (int i = 0; i < n; i++)
280 int c = data[ofs + i];
281 putc(c >= 32 && c < 127 ? c : '.', stream);
287 static void __attribute__((unused))
288 char_dump(FILE *stream, int ofs, int n)
290 for (int i = 0; i < n; i++)
292 int c = data[ofs + i];
293 putc(c >= 32 && c < 127 ? c : '.', stream);
299 dump_counted_string(void)
301 int inner_end = get_end();
302 if (pos == inner_end)
308 match_byte_assert(0x58);
314 if (match_byte(0x31))
317 match_byte_assert(0x58);
318 if (pos != inner_end)
320 fprintf(stderr, "inner end discrepancy\n");
327 dump_style(FILE *stream)
329 if (match_byte(0x58))
332 match_byte_assert(0x31);
334 printf (" bold=\"yes\"");
336 printf (" italic=\"yes\"");
338 printf (" underline=\"yes\"");
340 printf (" show=\"no\"");
341 char *fg = get_string(); /* foreground */
342 char *bg = get_string(); /* background */
343 char *font = get_string(); /* font */
344 int size = get_byte() * (72. / 96.);
345 fprintf(stream, " fgcolor=\"%s\" bgcolor=\"%s\" font=\"%s\" size=\"%dpt\"",
350 dump_style2(FILE *stream)
352 if (match_byte(0x58))
355 match_byte_assert(0x31);
356 uint32_t halign = get_u32();
357 printf (" halign=\"%s\"",
358 halign == 0 ? "center"
359 : halign == 2 ? "left"
360 : halign == 4 ? "right"
361 : halign == 6 ? "decimal"
362 : halign == 0xffffffad ? "mixed"
364 int valign = get_u32();
365 printf (" valign=\"%s\"",
366 valign == 0 ? "center"
367 : valign == 1 ? "top"
368 : valign == 3 ? "bottom"
370 printf (" offset=\"%gpt\"", get_double());
375 printf (" margins=\"%d %d %d %d\"", l, r, t, b);
379 dump_nested_string(FILE *stream)
383 match_byte_assert (0);
384 match_byte_assert (0);
385 int outer_end = get_end();
386 s = dump_counted_string();
388 fprintf(stream, " \"%s\"", s);
390 match_byte_assert(0x58);
391 if (pos != outer_end)
393 fprintf(stderr, "outer end discrepancy\n");
401 dump_value_modifier(FILE *stream)
403 if (match_byte (0x31))
407 fprintf(stream, "<special0");
410 /* Corpus frequencies:
415 The given text is appended to the cell in a subscript font.
417 fprintf(stream, " subscript=\"%s\"", get_string());
420 match_u32_assert (0);
424 /* We only have one SPV file for this version (with many
431 if (!match_u32(0) && !match_u32(1) && !match_u32(2) && !match_u32(3) && !match_u32(4) && !match_u32(5) && !match_u32(6) && !match_u32(7) && !match_u32(8) && !match_u32(9))
432 match_u32_assert(10);
435 fprintf(stream, "/>\n");
439 int outer_end = get_end();
441 /* This counted-string appears to be a template string,
442 e.g. "Design\: [:^1:]1 Within Subjects Design\: [:^1:]2". */
443 char *template = dump_counted_string();
445 fprintf(stream, " template=\"%s\"", template);
449 if (pos != outer_end)
451 fprintf(stderr, "outer end discrepancy\n");
454 fprintf(stream, "/>\n");
458 int count = get_u32();
459 fprintf(stream, "<footnote-ref indexes=\"");
460 for (int i = 0; i < count; i++)
464 fprintf(stream, "%d", get_u16());
467 match_byte_assert(0);
468 match_byte_assert(0);
469 dump_nested_string(stream);
470 fprintf(stream, "/>\n");
474 match_byte_assert (0x58);
478 format_to_string (int type)
484 case 2: return "AHEX";
485 case 3: return "COMMA";
486 case 4: return "DOLLAR";
487 case 5: case 40: return "F";
489 case 7: return "PIBHEX";
491 case 9: return "PIB";
492 case 10: return "PK";
493 case 11: return "RB";
494 case 12: return "RBHEX";
498 case 20: return "DATE";
499 case 21: return "TIME";
500 case 22: return "DATETIME";
501 case 23: return "ADATE";
502 case 24: return "JDATE";
503 case 25: return "DTIME";
504 case 26: return "WKDAY";
505 case 27: return "MONTH";
506 case 28: return "MOYR";
507 case 29: return "QYR";
508 case 30: return "WKYR";
509 case 31: return "PCT";
510 case 32: return "DOT";
511 case 33: return "CCA";
512 case 34: return "CCB";
513 case 35: return "CCC";
514 case 36: return "CCD";
515 case 37: return "CCE";
516 case 38: return "EDATE";
517 case 39: return "SDATE";
520 sprintf(tmp, "<%d>", type);
526 dump_value(FILE *stream, int level)
533 for (int i = 0; i <= level; i++)
534 fprintf (stream, " ");
536 printf ("%02x: value (%d)\n", pos, data[pos]);
542 dump_value_modifier(stream);
544 value = get_double ();
545 fprintf (stream, "<number value=\"%.*g\" format=\"%s%d.%d\"/>\n",
546 DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
548 else if (match_byte (2))
554 dump_value_modifier (stream);
556 value = get_double ();
558 vallab = get_string ();
559 fprintf (stream, "<numeric-datum value=\"%.*g\" format=\"%s%d.%d\"",
560 DBL_DIG, value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
562 fprintf (stream, " variable=\"%s\"", var);
564 fprintf (stream, " label=\"%s\"", vallab);
565 fprintf (stream, "/>\n");
566 if (!match_byte (1) && !match_byte(2))
567 match_byte_assert (3);
569 else if (match_byte (3))
571 char *text = get_string();
572 dump_value_modifier(stream);
573 char *identifier = get_string();
574 char *text_eng = get_string();
575 fprintf (stream, "<string c=\"%s\"", text_eng);
577 fprintf (stream, " identifier=\"%s\"", identifier);
578 if (strcmp(text_eng, text))
579 fprintf (stream, " local=\"%s\"", text);
580 fprintf (stream, "/>\n");
582 match_byte_assert(1);
584 else if (match_byte (4))
587 char *var, *vallab, *value;
589 dump_value_modifier(stream);
591 vallab = get_string ();
593 if (!match_byte(1) && !match_byte(2))
594 match_byte_assert (3);
595 value = get_string ();
596 fprintf (stream, "<string-datum value=\"%s\" format=\"%s%d.%d\"",
597 value, format_to_string(format >> 16), (format >> 8) & 0xff, format & 0xff);
599 fprintf (stream, " variable=\"%s\"", var);
601 fprintf (stream, " label=\"%s\"/>\n", vallab);
602 fprintf (stream, "/>\n");
604 else if (match_byte (5))
606 dump_value_modifier(stream);
607 char *name = get_string ();
608 char *label = get_string ();
609 fprintf (stream, "<variable name=\"%s\"", name);
611 fprintf (stream, " label=\"%s\"", label);
612 fprintf (stream, "/>\n");
613 if (!match_byte(1) && !match_byte(2))
614 match_byte_assert(3);
618 printf ("else %#x\n", pos);
619 dump_value_modifier(stream);
621 char *base = get_string();
623 fprintf (stream, "<template format=\"%s\">\n", base);
624 for (int i = 0; i < x; i++)
631 for (int j = 0; j <= level + 1; j++)
632 fprintf (stream, " ");
633 fprintf (stream, "<substitution index=\"%d\">\n", i + 1);
634 for (int j = 0; j < y; j++)
635 dump_value (stream, level + 2);
636 for (int j = 0; j <= level + 1; j++)
637 fprintf (stream, " ");
638 fprintf (stream, "</substitution>\n");
640 for (int j = 0; j <= level; j++)
641 fprintf (stream, " ");
642 fprintf (stream, "</template>\n");
647 compare_int(const void *a_, const void *b_)
651 return *a < *b ? -1 : *a > *b;
655 check_permutation(int *a, int n, const char *name)
658 memcpy(b, a, n * sizeof *a);
659 qsort(b, n, sizeof *b, compare_int);
660 for (int i = 0; i < n; i++)
663 fprintf(stderr, "bad %s permutation:", name);
664 for (int i = 0; i < n; i++)
665 fprintf(stderr, " %d", a[i]);
672 dump_category(FILE *stream, int level, int **indexes, int *allocated_indexes,
675 for (int i = 0; i <= level; i++)
676 fprintf (stream, " ");
677 printf ("<category>\n");
678 dump_value (stream, level + 1);
680 bool merge = get_bool();
681 match_byte_assert (0);
682 int unindexed = get_bool();
687 match_u32_assert (2);
689 int indx = get_u32();
690 int n_categories = get_u32();
695 for (int i = 0; i <= level + 1; i++)
696 fprintf (stream, " ");
697 fprintf (stream, "<merge/>\n");
706 assert (n_categories == 0);
707 if (*n_indexes >= *allocated_indexes)
709 *allocated_indexes = *allocated_indexes ? 2 * *allocated_indexes : 16;
710 *indexes = realloc(*indexes, *allocated_indexes * sizeof **indexes);
712 (*indexes)[(*n_indexes)++] = indx;
715 if (n_categories == 0)
717 for (int i = 0; i <= level + 1; i++)
718 fprintf (stream, " ");
719 fprintf (stream, "<category-index>%d</category-index>\n", indx);
721 for (int i = 0; i < n_categories; i++)
722 dump_category (stream, level + 1, indexes, allocated_indexes, n_indexes);
723 for (int i = 0; i <= level; i++)
724 fprintf (stream, " ");
725 printf ("</category>\n");
733 printf ("<dimension index=\"%d\">\n", indx);
734 dump_value (stdout, 0);
736 /* This byte is usually 0 but many other values have been spotted.
737 No visible effect. */
740 /* This byte can cause data to be oddly replicated. */
741 if (!match_byte(0) && !match_byte(1))
742 match_byte_assert(2);
747 bool show_dim_label = get_bool();
749 printf(" <show-dim-label/>\n");
751 bool hide_all_labels = get_bool();
753 printf(" <hide-all-labels/>\n");
755 match_byte_assert(1);
756 if (!match_u32(UINT32_MAX))
757 match_u32_assert(indx);
759 n_categories = get_u32();
763 int allocated_indexes = 0;
764 for (int i = 0; i < n_categories; i++)
765 dump_category (stdout, 0, &indexes, &allocated_indexes, &n_indexes);
766 check_permutation(indexes, n_indexes, "categories");
768 fprintf (stdout, "</dimension>\n");
773 static int dim_n_cats[64];
774 #define MAX_DIMS (sizeof dim_n_cats / sizeof *dim_n_cats)
780 assert(n_dims < MAX_DIMS);
781 for (int i = 0; i < n_dims; i++)
782 dim_n_cats[i] = dump_dim (i);
788 /* The first three numbers add to the number of dimensions. */
791 int c = n_dims - l - r;
794 /* The next n_dims numbers are a permutation of the dimension numbers. */
796 for (int i = 0; i < n_dims; i++)
801 const char *name = i < l ? "layer" : i < l + r ? "row" : "column";
802 printf ("<%s dimension=\"%d\"/>\n", name, dim);
804 check_permutation(a, n_dims, "dimensions");
808 for (int i = 0; i < x; i++)
810 unsigned int indx = get_u32();
811 printf (" <datum index=\"%d\" coords=", indx);
813 int coords[MAX_DIMS];
814 for (int i = n_dims; i-- > 0; )
816 coords[i] = indx % dim_n_cats[i];
817 indx /= dim_n_cats[i];
819 for (int i = 0; i < n_dims; i++)
820 printf("%c%d", i ? ',' : '"', coords[i]);
826 dump_value(stdout, 1);
827 fprintf (stdout, " </datum>\n");
829 printf ("</data>\n");
835 printf ("<title-local>\n");
836 dump_value(stdout, 0);
838 printf ("</title-local>\n");
840 printf ("<subtype>\n");
841 dump_value(stdout, 0);
843 printf ("</subtype>\n");
845 match_byte_assert(0x31);
847 printf ("<title-c>\n");
848 dump_value(stdout, 0);
850 printf ("</title-c>\n");
852 if (match_byte(0x31))
854 printf ("<user-caption>\n");
855 dump_value(stdout, 0);
856 printf ("</user-caption>\n");
859 match_byte_assert(0x58);
860 if (match_byte(0x31))
862 printf ("<caption>\n");
863 dump_value(stdout, 0);
864 printf ("</caption>\n");
867 match_byte_assert(0x58);
869 int n_footnotes = get_u32();
870 for (int i = 0; i < n_footnotes; i++)
872 printf ("<footnote index=\"%d\">\n", i);
873 dump_value(stdout, 0);
874 /* Custom footnote marker string. */
875 if (match_byte (0x31))
876 dump_value(stdout, 0);
878 match_byte_assert (0x58);
882 /* Appears to be the number of references to a footnote. */
883 printf (" <references n=\"%d\"/>\n", n);
887 /* The user deleted the footnote references. */
888 printf (" <deleted/>\n");
892 printf ("</footnote>\n");
900 for (int i = 1; i <= 8; i++)
902 printf ("<style index=\"%d\"", i);
903 match_byte_assert(i);
904 match_byte_assert(0x31);
905 printf(" font=\"%s\"", get_string());
907 printf(" size=\"%gpt\"", get_float());
909 int style = get_u32();
911 printf(" bold=\"true\"");
913 printf(" italic=\"true\"");
915 bool underline = data[pos++];
917 printf(" underline=\"true\"");
919 int halign = get_u32();
920 printf(" halign=%d", halign);
922 int valign = get_u32();
923 printf(" valign=%d", valign);
925 printf (" fgcolor=\"%s\"", get_string());
926 printf (" bgcolor=\"%s\"", get_string());
929 match_byte_assert(1);
931 char *alt_fgcolor = get_string();
933 printf (" altfg=\"%s\"", alt_fgcolor);
934 char *alt_bgcolor = get_string();
936 printf (" altbg=\"%s\"", alt_bgcolor);
940 printf(" margins=\"");
941 for (int i = 0; i < 4; i++)
945 printf("%d", get_u32());
954 int x1_end = pos + x1;
955 printf("<borders>\n");
956 match_be32_assert(1);
957 int n_borders = get_be32();
958 for (int i = 0; i < n_borders; i++)
960 int type = get_be32();
961 int stroke = get_be32();
962 int color = get_be32();
963 printf(" <border type=\"%d\" stroke=\"%s\" color=\"#%06x\"/>\n",
965 (stroke == 0 ? "none"
966 : stroke == 1 ? "solid"
967 : stroke == 2 ? "dashed"
968 : stroke == 3 ? "thick"
969 : stroke == 4 ? "thin"
970 : stroke == 5 ? "double"
974 bool grid = get_byte();
976 printf(" <grid show=\"%s\"/>\n", grid ? "yes" : "no");
977 printf("</borders>\n");
978 assert(pos == x1_end);
980 int skip = get_u32();
981 assert(skip == 18 || skip == 25);
985 int x3_end = pos + x3;
988 match_be32_assert(1);
990 printf("<settings layer=\"%d\"", get_be32());
992 printf(" skipempty=\"false\"");
994 printf(" showdimensionincorner=\"false\"");
996 printf(" markers=\"numeric\"");
998 printf(" footnoteposition=\"subscript\"");
1000 int nbytes = get_be32();
1001 int end = pos + nbytes;
1003 while (pos + 4 <= end)
1004 printf(" %d", get_be32());
1008 char *notes = get_string_be();
1010 printf(" notes=\"%s\"", notes);
1011 char *look = get_string_be();
1013 printf(" look=\"%s\"", look);
1018 /* Manual column widths, if present. */
1019 int count = get_u32();
1022 printf("<columnwidths>");
1023 for (int i = 0; i < count; i++)
1027 printf("%d", get_u32());
1029 printf("</columnwidths>\n");
1032 const char *locale = get_string();
1033 printf ("<locale>%s</locale>\n", locale);
1035 printf ("<layer>%d</layer>\n", get_u32());
1037 match_byte_assert(1);
1039 match_byte_assert(1);
1041 match_byte_assert(1);
1042 printf("<epoch>%d</epoch>\n", get_u32());
1044 int decimal = data[pos];
1045 int grouping = data[pos + 1];
1046 if (match_byte('.'))
1048 if (!match_byte(',') && !match_byte('\''))
1049 match_byte_assert(' ');
1053 match_byte_assert(',');
1054 if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
1055 match_byte_assert(0);
1057 printf("<format decimal=\"%c\"", decimal);
1059 printf(" grouping=\"%c\"", grouping);
1063 for (int i = 0; i < 5; i++)
1064 printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
1067 match_u32_assert(0);
1069 /* The last chunk is an outer envelope that contains two inner envelopes.
1070 The second inner envelope has some interesting data like the encoding and
1072 int outer_end = get_end();
1075 /* First inner envelope: byte*33 int[n] int*[n]. */
1076 int inner_len = get_u32();
1077 int inner_end = pos + inner_len;
1078 int array_start = pos + 33;
1079 match_byte_assert(0);
1080 pos++; /* 0, 1, 10 seen. */
1083 /* 0=en 1=de 2=es 3=it 5=ko 6=pl 8=zh-tw 10=pt_BR 11=fr */
1084 printf("lang=%d ", get_byte());
1086 printf ("variable_mode=%d\n", get_byte());
1087 printf ("value_mode=%d\n", get_byte());
1089 match_u64_assert(UINT64_MAX);
1090 match_u32_assert(0);
1091 match_u32_assert(0);
1092 match_u32_assert(0);
1093 match_u32_assert(0);
1094 match_byte_assert(0);
1096 match_byte_assert(1);
1099 assert(get_end() == inner_end);
1100 printf("<heights>");
1101 int n_heights = get_u32();
1102 for (int i = 0; i < n_heights; i++)
1106 printf("%d", get_u32());
1108 printf("</heights>\n");
1110 int n_style_map = get_u32();
1111 for (int i = 0; i < n_style_map; i++)
1113 uint64_t cell = get_u64();
1114 int style = get_u16();
1115 printf("<style-map cell=\"%lu\" style=\"%d\"/>\n", cell, style);
1118 int n_styles = get_u32();
1119 for (int i = 0; i < n_styles; i++)
1121 printf("<cell-style index=\"%d\"", i);
1123 dump_style2(stdout);
1128 assert(pos == inner_end);
1130 /* Second inner envelope. */
1131 assert(get_end() == outer_end);
1133 match_byte_assert(1);
1134 match_byte_assert(0);
1135 if (!match_byte(3) && !match_byte(4))
1136 match_byte_assert(5);
1137 match_byte_assert(0);
1138 match_byte_assert(0);
1139 match_byte_assert(0);
1141 printf("<command>%s</command>\n", get_string());
1142 printf("<command-local>%s</command-local>\n", get_string());
1143 printf("<language>%s</language>\n", get_string());
1144 printf("<charset>%s</charset>\n", get_string());
1145 printf("<locale>%s</locale>\n", get_string());
1152 printf("<epoch2>%d</epoch2>\n", get_u32());
1154 if (match_byte('.'))
1156 if (!match_byte(',') && !match_byte('\''))
1157 match_byte_assert(' ');
1161 match_byte_assert(',');
1162 if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
1163 match_byte_assert(0);
1166 printf ("small: %g\n", get_double());
1168 match_byte_assert(1);
1169 if (outer_end - pos > 6)
1171 /* There might be a pair of strings representing a dataset and
1172 datafile name, or there might be a set of custom currency strings.
1173 The custom currency strings start with a pair of integers, so we
1174 can distinguish these from a string by checking for a null byte; a
1175 small 32-bit integer will always contain a null and a text string
1178 int len = get_u32();
1179 bool has_dataset = !memchr(&data[pos], '\0', len);
1184 printf("<dataset>%s</dataset>\n", get_string());
1185 printf("<datafile>%s</datafile>\n", get_string());
1187 match_u32_assert(0);
1189 time_t date = get_u32();
1190 struct tm tm = *localtime(&date);
1192 strftime(s, sizeof s, "%a, %d %b %Y %H:%M:%S %z", &tm);
1193 printf("<date>%s</date>\n", s);
1195 match_u32_assert(0);
1201 for (int i = 0; i < 5; i++)
1202 printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
1205 match_u32_assert(0);
1207 match_byte_assert('.');
1210 if (pos < outer_end)
1213 match_u32_assert(0);
1215 assert(pos == outer_end);
1219 else if (outer_end != pos)
1222 printf("<command>%s</command>\n", get_string());
1223 printf("<command-local>%s</command-local>\n", get_string());
1224 printf("<language>%s</command>\n", get_string());
1225 printf("<charset>%s</charset>\n", get_string());
1226 printf("<locale>%s</locale>\n", get_string());
1228 match_byte_assert(0);
1232 printf("<epoch2>%d</epoch2>\n", get_u32());
1233 int decimal = data[pos];
1234 int grouping = data[pos + 1];
1235 if (match_byte('.'))
1237 if (!match_byte(',') && !match_byte('\''))
1238 match_byte_assert(' ');
1242 match_byte_assert(',');
1243 if (!match_byte('.') && !match_byte(' ') && !match_byte(','))
1244 match_byte_assert(0);
1246 printf("<format decimal=\"%c\"", decimal);
1248 printf(" grouping=\"%c\"", grouping);
1252 for (int i = 0; i < 5; i++)
1253 printf("<CC%c>%s</CC%c>\n", 'A' + i, get_string(), 'A' + i);
1256 match_u32_assert(0);
1258 match_byte_assert('.');
1261 assert(pos == outer_end);
1267 format_name (int format, char *buf)
1272 case 2: return "AHEX";
1273 case 3: return "COMMA";
1274 case 4: return "DOLLAR";
1276 case 6: return "IB";
1277 case 7: return "PIBHEX";
1279 case 9: return "PIB";
1280 case 10: return "PK";
1281 case 11: return "RB";
1282 case 12: return "RBHEX";
1283 case 15: return "Z";
1284 case 16: return "N";
1285 case 17: return "E";
1286 case 20: return "DATE";
1287 case 21: return "TIME";
1288 case 22: return "DATETIME";
1289 case 23: return "ADATE";
1290 case 24: return "JDATE";
1291 case 25: return "DTIME";
1292 case 26: return "WKDAY";
1293 case 27: return "MONTH";
1294 case 28: return "MOYR";
1295 case 29: return "QYR";
1296 case 30: return "WKYR";
1297 case 31: return "PCT";
1298 case 32: return "DOT";
1299 case 33: return "CCA";
1300 case 34: return "CCB";
1301 case 35: return "CCC";
1302 case 36: return "CCD";
1303 case 37: return "CCE";
1304 case 38: return "EDATE";
1305 case 39: return "SDATE";
1306 case 40: return "MTIME";
1307 case 41: return "YMDHMS";
1308 default: sprintf(buf, "(%d)", format); return buf;
1313 main(int argc, char *argv[])
1317 fprintf (stderr, "usage: %s FILE.bin", argv[0]);
1322 int fd = open(filename, O_RDONLY);
1325 fprintf (stderr, "%s: open failed (%s)", filename, strerror (errno));
1342 if (read(fd, data, n) != n)
1350 unsigned int prev_end = 0;
1351 for (pos = 0; pos + 50 < n; pos++)
1353 if (data[pos + 0] == 0xff &&
1354 data[pos + 1] == 0xff &&
1355 data[pos + 2] == 0 &&
1358 int len = data[pos + 4] + (data[pos + 5] << 8);
1359 if (len < 3 || pos + len + 6 >= n || !all_utf8 ((char *) &data[pos + 6], len))
1362 printf ("+%04x %04x...%04x: %-25.*s\n",
1363 pos - prev_end, pos, pos + 6 + len,
1364 len < 50 ? (int) len : 50, &data[pos + 6]);
1365 prev_end = pos + 6 + len;
1370 for (pos = 0; pos + 50 < n; pos++)
1372 if (data[pos + 0] == 'L' &&
1373 data[pos + 1] == 'o' &&
1374 data[pos + 2] == 'g' &&
1375 !all_utf8((char *) &data[pos + 3], 1) &&
1376 data[pos - 1] != 'v')
1378 //printf ("%04x: ", pos);
1379 unsigned int p = pos;
1380 while (all_utf8 ((char *) &data[p], 1))
1382 hex_dump (stdout, p - 28, 38);
1386 unsigned int prev_end = 0;
1387 for (pos = 2; pos + 50 < n; pos++)
1389 static const int cell_prefix[] = {
1391 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1,
1392 0x80, 0x01, -1, -1, -1, -1,
1394 size_t cell_prefix_len = sizeof cell_prefix / sizeof *cell_prefix;
1395 if (match_bytes(pos, cell_prefix, cell_prefix_len))
1397 if (prev_end != pos)
1399 //printf ("%04x ", prev_end);
1400 hex_dump (stdout, prev_end, pos - prev_end);
1404 printf ("cell %s%d.%d ",
1405 format_name (data[pos + 18], buf),
1409 int len = cell_prefix_len;
1410 if (data[pos + 19] == 0)
1412 assert (data[pos + 13] == 5);
1413 if (data[pos + 20] == 0)
1415 int count = (data[pos + 22]);
1416 printf ("%d %d \"%.*s\"\n",
1417 data[pos + 21], data[pos + 22],
1418 count, &data[pos + 23]);
1421 else if (data[pos + 20] == 1
1422 && data[pos + 21] == 0xff
1423 && data[pos + 22] == 0xff)
1426 printf ("%d \"%.*s\"\n", count, data[pos + 23],
1430 else if (data[pos + 20] == 1)
1432 int count = (data[pos + 21]);
1433 printf ("\"%.*s\"\n",
1434 count, &data[pos + 22]);
1440 else if (data[pos + 19] == 128)
1442 /* pos + 13 is usually 22...53 but also 5 or 69 */
1443 /* pos + 20 is 2 most of the time, occasionally 1 */
1444 printf ("%d %d ", data[pos + 13], data[pos + 20]);
1445 double d = *(double *) &data[pos + 21];
1452 sysmis = {.b = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xff}};
1454 printf ("sysmis\n");
1465 static const int col_prefix[] = {
1466 0x11, 0x80, 0x00, -1, 0x00, 0x00, 0x00, 0x01, 0x00
1468 size_t col_prefix_len = sizeof col_prefix / sizeof *col_prefix;
1469 if (match_bytes(pos, col_prefix, col_prefix_len))
1471 if (prev_end != pos)
1473 //printf ("%04x ", prev_end);
1474 hex_dump (stdout, prev_end, pos - prev_end);
1477 printf ("col %d\n", data[pos + 3]);
1478 pos += col_prefix_len - 1;
1483 static const int record_prefix[] = {
1484 0xff, 0xff, 0x00, 0x00,
1486 size_t record_prefix_len = sizeof record_prefix / sizeof *record_prefix;
1487 if (match_bytes(pos, record_prefix, record_prefix_len))
1489 if (prev_end != pos)
1491 //printf ("%04x ", prev_end);
1492 hex_dump (stdout, prev_end, pos - prev_end);
1497 //printf ("%d ", pos % 4);
1498 int len = record_prefix_len;
1499 int slen = data[pos + 4] + (data[pos + 5] << 8);
1500 if (slen > 2 && slen < 256 && all_utf8((char *) &data[pos + 6], slen))
1502 printf ("%.*s ", slen, &data[pos + 6]);
1506 printf ("notitle ");
1513 static const int number_prefix[] = {
1516 size_t number_prefix_len = sizeof number_prefix / sizeof *number_prefix;
1517 if (match_bytes(pos, number_prefix, number_prefix_len))
1519 if (prev_end != pos)
1521 //printf ("%04x ", prev_end);
1522 hex_dump (stdout, prev_end, pos - prev_end);
1526 double d = *(double *) &data[pos + number_prefix_len];
1527 printf ("float%f ", d);
1533 static const int string_prefix[] = {
1534 0x80, 0x01, 0x02, 0x28, 0x05, 0x00, 0x01
1536 size_t string_prefix_len = sizeof string_prefix / sizeof *string_prefix;
1537 if (match_bytes(pos, string_prefix, string_prefix_len) && data[pos + string_prefix_len] != 255)
1539 if (prev_end != pos)
1541 //printf ("%04x ", prev_end);
1542 hex_dump (stdout, prev_end, pos - prev_end);
1546 printf ("\nstring %.*s\n", (int) data[pos + 7], &data[pos + 8]);
1548 if (match_bytes(pos, string_prefix, string_prefix_len) && data[pos + string_prefix_len] == 255)
1550 if (prev_end != pos)
1552 //printf ("%04x ", prev_end);
1553 hex_dump (stdout, prev_end, pos - prev_end);
1557 int len = data[pos + 8] + (data[pos + 9] << 8);
1558 printf ("\nlongstring %.*s\n", len, &data[pos + 10]);
1564 static const int heading_prefix[] = {
1565 -1, 0x00, 0x00, 0x00, 0x50, 0x80, 0x00, 0x52, 0x80, 0x00, -1, 0x00,
1566 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00,
1567 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
1568 0x00, 0x05, 0x80, 0x01, 0x02, 0x28, 0x05, 0x00, 0x01
1570 size_t heading_prefix_len = sizeof heading_prefix / sizeof *heading_prefix;
1571 if (match_bytes(pos, heading_prefix, heading_prefix_len))
1573 if (prev_end != pos)
1575 //printf ("%04x ", prev_end);
1576 hex_dump (stdout, prev_end, pos - prev_end);
1579 printf ("heading %d %d\n", data[pos],data[pos + 10]);
1580 pos += heading_prefix_len - 1;
1585 static const int font_prefix[] = {
1586 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1, 0x80, 0x00, 0x01, 0x00,
1587 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, -1,
1588 0x80, 0x00, -1, 0x00, -1, 0x00, 0xc8, 0x00, -1, -1, -1, -1, -1,
1589 0x00, -1, 0x00, 0x00, 0x00, 0x01, 0x00, -1,
1590 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1591 0x00, 0x00, 0x00, -1, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1592 0x00, 0x00, -1 /* 12 or 22 */,
1594 size_t font_prefix_len = sizeof font_prefix / sizeof *font_prefix;
1595 if (match_bytes(pos, font_prefix, font_prefix_len))
1597 if (prev_end != pos)
1599 //printf ("%04x", prev_end);
1600 hex_dump (stdout, prev_end, pos - prev_end);
1603 printf ("font %d %d %d %d %d %d %d %d %d %d\n",
1604 data[pos + 24], data[pos + 26],
1605 data[pos + 30], data[pos + 31], data[pos + 32],
1606 data[pos + 33], data[pos + 34], data[pos + 36],
1607 data[pos + 58], data[pos + 59]);
1608 pos += font_prefix_len - 1;
1613 static const int table_prefix[] = {
1614 -1 /* ed or e9 */, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00,
1615 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbc, 0x02, 0x00, 0x00,
1616 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x41, 0x72, 0x69,
1617 0x61, 0x6c, 0x00, -1, 0x00, -1, 0x00, 0x00, 0x00, 0x00, 0x00,
1618 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1619 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
1620 0x00, 0x00, 0x00, 0x00, -1, 0x00, 0x00, 0x00, -1,
1622 size_t table_prefix_len = sizeof table_prefix / sizeof *table_prefix;
1623 if (match_bytes(pos, table_prefix, table_prefix_len))
1625 if (prev_end != pos)
1627 //printf ("%04x", prev_end);
1628 hex_dump (stdout, prev_end, pos - prev_end);
1631 printf ("table %d\n", data[pos + 72]);
1632 pos += table_prefix_len - 1;
1638 static const int dim_prefix[] = {
1639 0x00, 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, -1,
1640 0x00, 0x00, 0x00, 0x00, -1, 0x80, 0x01, 0x02, -1,
1641 -1, -1, -1 /* 00 or 01 */,
1643 size_t dim_prefix_len = sizeof dim_prefix / sizeof *dim_prefix;
1644 if (match_bytes(pos, dim_prefix, dim_prefix_len))
1646 if (prev_end != pos)
1648 //printf ("%04x", prev_end);
1649 hex_dump (stdout, prev_end, pos - prev_end);
1652 printf ("dim %d %d %d %d %d\n", data[pos + 8], data[pos + 13],
1653 data[pos + 17], data[pos + 18], data[pos + 19]);
1654 pos += dim_prefix_len - 1;
1659 static const int dim2_prefix[] = {
1660 0x50, 0x80, 0x00, 0x52, 0x80, 0x00, -1, 0x00, 0x00, 0x00, -1, 0, 0, 0,
1663 size_t dim2_prefix_len = sizeof dim2_prefix / sizeof *dim2_prefix;
1664 if (match_bytes(pos, dim2_prefix, dim2_prefix_len))
1666 if (prev_end != pos)
1668 //printf ("%04x", prev_end);
1669 hex_dump (stdout, prev_end, pos - prev_end);
1672 int16_t x = *(int16_t *) &data[pos + 14];
1673 int16_t y = *(int16_t *) &data[pos + 16];
1674 printf ("dim2 %d %d %d %d\n", data[pos + 6], data[pos + 10], x, y);
1675 pos += dim2_prefix_len - 1;
1680 if (!is_ascii(data[pos]))
1683 unsigned int start = pos;
1684 unsigned int end = pos + 1;
1685 while (is_ascii(data[end]))
1688 unsigned int len = end - start;
1692 unsigned int len2 = data[start - 2] + (data[start - 1] << 8);
1693 unsigned int len3 = data[start - 1];
1695 if (len2 && len2 <= len)
1700 else if (len3 && len3 <= len)
1711 unsigned real_start = start - length_bytes;
1712 if (prev_end != real_start)
1714 //printf ("%04x ", prev_end);
1715 hex_dump (stdout, prev_end, real_start - prev_end);
1717 //printf ("%04x ", real_start);
1718 printf ("\"%.*s\"\n",
1719 (int) end - start, (char *) &data[start]);