14 #include "u8-mbtouc.h"
16 static const char *filename;
25 #define STR(x) XSTR(x)
26 #define WHERE __FILE__":" STR(__LINE__)
28 static void __attribute__((unused))
29 hex_dump(FILE *stream, int ofs, int n);
41 memcpy(&x, &data[pos], 4);
46 static unsigned long long int
50 memcpy(&x, &data[pos], 8);
59 x = (data[pos] << 24) | (data[pos + 1] << 16) | (data[pos + 2] << 8) | data[pos + 3];
68 memcpy(&x, &data[pos], 2);
77 memcpy(&x, &data[pos], 8);
82 static double __attribute__((unused))
86 memcpy(&x, &data[pos], 4);
101 match_u16(uint16_t x)
110 match_u32_assert(uint32_t x, const char *where)
112 unsigned int y = get_u32();
115 fprintf(stderr, "%s: 0x%x: expected i%u, got i%u: ", where, pos - 4, x, y);
116 hex_dump(stderr, pos - 4, 64);
120 #define match_u32_assert(x) match_u32_assert(x, WHERE)
123 match_u16_assert(uint16_t x, const char *where)
125 unsigned int y = get_u16();
128 fprintf(stderr, "%s: 0x%x: expected u16:%u, got u16:%u\n", where, pos - 2, x, y);
132 #define match_u16_assert(x) match_u16_assert(x, WHERE)
134 static bool __attribute__((unused))
135 match_u64(uint64_t x)
143 static void __attribute__((unused))
144 match_u64_assert(uint64_t x, const char *where)
146 unsigned long long int y = get_u64();
149 fprintf(stderr, "%s: 0x%x: expected u64:%lu, got u64:%llu\n", where, pos - 8, x, y);
153 #define match_u64_assert(x) match_u64_assert(x, WHERE)
155 static bool __attribute__((unused))
156 match_be32(uint32_t x)
165 match_be32_assert(uint32_t x, const char *where)
167 unsigned int y = get_be32();
170 fprintf(stderr, "%s: 0x%x: expected be%u, got be%u\n", where, pos - 4, x, y);
174 #define match_be32_assert(x) match_be32_assert(x, WHERE)
177 match_byte(uint8_t b)
179 if (pos < n && data[pos] == b)
189 match_byte_assert(uint8_t b, const char *where)
193 fprintf(stderr, "%s: 0x%x: expected %02x, got %02x: ", where, pos, b, data[pos]);
194 hex_dump(stderr, pos, 64);
198 #define match_byte_assert(b) match_byte_assert(b, WHERE)
201 match_bytes(int start, const int *bytes, size_t n_bytes)
203 for (size_t i = 0; i < n_bytes; i++)
204 if (bytes[i] >= 0 && data[start + i] != bytes[i])
210 xmemdup0(const void *p, size_t n)
212 char *s = malloc(n + 1);
223 match_byte_assert(1);
227 static bool __attribute__((unused))
230 return (p >= ' ' && p < 127) || p == '\r' || p == '\n' || p == '\t';
234 count_zeros(const uint8_t *p)
242 static bool __attribute__((unused))
243 all_utf8(const char *p_, size_t len)
245 const uint8_t *p = (const uint8_t *) p_;
246 for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
250 mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
251 if ((uc < 32 && uc != '\n') || uc == 127 || uc == 0xfffd)
260 int len = data[pos] + data[pos + 1] * 256;
261 char *s = xmemdup0(&data[pos + 2], len);
269 int len = data[pos++];
271 return get_string2();
274 char *s = xmemdup0(&data[pos], len);
281 match_string1_assert(const char *exp, const char *where)
284 char *act = get_string1();
285 if (strcmp(act, exp))
287 fprintf(stderr, "%s: 0x%x: expected \"%s\", got \"%s\"\n",
288 where, start, exp, act);
292 #define match_string1_assert(x) match_string1_assert(x, WHERE)
295 match_string2_assert(const char *exp, const char *where)
298 char *act = get_string2();
299 if (strcmp(act, exp))
301 fprintf(stderr, "%s: 0x%x: expected \"%s\", got \"%s\"\n",
302 where, start, exp, act);
306 #define match_string2_assert(x) match_string2_assert(x, WHERE)
309 get_string4(const char *where)
312 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
313 /*&& all_ascii(&data[pos + 4], data[pos])*/)
315 assert(data[pos + 3] == 0);
316 int len = data[pos] + data[pos + 1] * 256 + data[pos + 2] * 65536;
317 char *s = malloc(len + 1);
319 memcpy(s, &data[pos + 4], len);
326 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
330 #define get_string4() get_string4(WHERE)
333 get_padded_string(int len)
335 char *s = xmemdup0(&data[pos], len);
341 get_string_be(const char *where)
344 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
345 /*&& all_ascii(&data[pos + 4], data[pos])*/)
347 int len = data[pos + 2] * 256 + data[pos + 3];
348 char *s = malloc(len + 1);
350 memcpy(s, &data[pos + 4], len);
357 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
361 #define get_string_be() get_string_be(WHERE)
370 static void __attribute__((unused))
371 hex_dump(FILE *stream, int ofs, int n)
374 for (int i = 0; i < n; i++)
376 int c = data[ofs + i];
377 n_ascii += is_ascii(c);
378 fprintf(stream, " %02x", c);
383 for (int i = 0; i < n; i++)
385 int c = data[ofs + i];
386 putc(c >= 32 && c < 127 ? c : '.', stream);
392 static void __attribute__((unused))
393 char_dump(FILE *stream, int ofs, int n)
395 for (int i = 0; i < n; i++)
397 int c = data[ofs + i];
398 putc(c >= 32 && c < 127 ? c : '.', stream);
405 compare_int(const void *a_, const void *b_)
409 return *a < *b ? -1 : *a > *b;
414 format_name (int format, char *buf)
419 case 2: return "AHEX";
420 case 3: return "COMMA";
421 case 4: return "DOLLAR";
422 case 5: case 40: return "F";
424 case 7: return "PIBHEX";
426 case 9: return "PIB";
427 case 10: return "PK";
428 case 11: return "RB";
429 case 12: return "RBHEX";
433 case 20: return "DATE";
434 case 21: return "TIME";
435 case 22: return "DATETIME";
436 case 23: return "ADATE";
437 case 24: return "JDATE";
438 case 25: return "DTIME";
439 case 26: return "WKDAY";
440 case 27: return "MONTH";
441 case 28: return "MOYR";
442 case 29: return "QYR";
443 case 30: return "WKYR";
444 case 31: return "PCT";
445 case 32: return "DOT";
446 case 33: return "CCA";
447 case 34: return "CCB";
448 case 35: return "CCC";
449 case 36: return "CCD";
450 case 37: return "CCE";
451 case 38: return "EDATE";
452 case 39: return "SDATE";
453 default: sprintf(buf, "(%d)", format); return buf;
462 int fmt = data[pos++];
464 printf ("%s%d.%d", format_name(fmt, buf), w, d);
468 parse_heading(const char *name)
470 match_u16_assert(0xffff);
472 match_string2_assert(name);
476 match_zeros_assert(int count, const char *where)
478 for (int i = 0; i < count; i++)
482 "%s: %#x: expected %d zeros here but offset %d is %#"PRIx8": ",
483 where, pos, count, i, data[pos + i]);
484 hex_dump (stderr, pos, 64);
489 #define match_zeros_assert(count) match_zeros_assert(count, WHERE)
492 put_safe(const char *s)
500 else if (*s < 0x20 || *s > 0x7e)
501 printf ("\\x%02"PRIx8, (uint8_t) *s);
508 static void parse_flexible(void);
511 parse_DspString(void)
513 printf("%#x: DspString(", pos);
516 printf("%f, \"", get_double());
517 printf("%s\")\n", get_string1());
521 match_byte_assert(1);
524 match_byte_assert(0);
525 match_byte_assert(1);
526 put_safe(get_string1());
532 match_DspString(void)
534 match_byte_assert(5);
535 match_byte_assert(0x80);
540 match_DspSimpleText(void)
542 match_byte_assert(3);
543 match_byte_assert(0x80);
544 match_byte_assert(0);
547 match_zeros_assert(3);
548 if (!match_byte(0x10))
549 match_byte_assert(0);
550 match_zeros_assert(4);
555 parse_weirdness(void)
557 match_byte_assert(1);
559 match_zeros_assert(12);
560 pos++; /* 90 or BC */
562 match_byte_assert(1);
563 match_zeros_assert(5);
565 match_zeros_assert(3);
566 puts(get_padded_string(32));
570 match_NavTreeViewItem(void)
572 match_byte_assert(7);
573 match_byte_assert(0x80);
574 match_zeros_assert(1);
575 if (!match_byte(0) && !match_byte(7) && !match_byte(2))
576 match_byte_assert(8);
577 match_zeros_assert(3);
579 match_byte_assert(0);
580 match_byte_assert(1);
581 match_byte_assert(0);
584 match_zeros_assert(7);
586 match_byte_assert(1);
587 match_zeros_assert(5);
592 match_byte_assert(0);
594 match_zeros_assert(11);
595 match_byte_assert(1);
596 match_zeros_assert(3);
598 match_byte_assert(0);
601 match_zeros_assert(2);
603 match_u32_assert(11000);
606 match_u32_assert(11000);
607 match_u32_assert(8500);
612 match_byte_assert(1);
617 get_string4(); /* page title */
618 match_byte_assert(1);
619 match_byte_assert(1);
620 match_zeros_assert(3);
621 get_string4(); /* page number */
622 match_byte_assert(0);
629 match_zeros_assert(3);
631 //fprintf(stderr, "%#x ", pos - 16);
635 parse_DspNumber(void)
637 match_byte_assert(1);
638 printf("DspNumber(");
640 match_byte_assert(0x80);
642 printf (" %f", get_double());
643 printf (" \"%s\")\n", get_string1());
647 match_DspNumber(void)
649 match_byte_assert(0x2a);
650 match_byte_assert(0x80);
657 match_byte_assert(0);
658 match_DspSimpleText();
659 parse_flexible(); /* DspString or DspNumber. */
665 match_byte_assert(0x27);
666 match_byte_assert(0x80);
673 match_byte_assert(2);
680 match_byte_assert(9);
681 match_byte_assert(0x80);
686 parse_PMModelItemInfo(void)
688 match_byte_assert(0);
689 pos += 1; /* Counter */
690 match_zeros_assert(7);
693 match_byte_assert(0xe);
694 match_byte_assert(0);
698 match_PMModelItemInfo(void)
700 match_byte_assert(0x54);
701 match_byte_assert(0x80);
702 parse_PMModelItemInfo();
703 match_DspSimpleText();
708 match_PMPivotItemTree(void)
710 match_byte_assert(0x52);
711 match_byte_assert(0x80);
712 match_byte_assert(0);
713 match_PMModelItemInfo();
719 match_byte_assert(2);
720 match_zeros_assert(24);
721 match_byte_assert(1);
722 match_zeros_assert(3);
724 match_byte_assert(0);
725 match_zeros_assert(3);
726 match_DspSimpleText();
731 parse_NavOleItem(void)
733 match_byte_assert(0);
734 match_byte_assert(1);
735 match_zeros_assert(2);
737 match_zeros_assert(9);
738 match_byte_assert(1);
739 match_zeros_assert(10);
740 match_byte_assert(1);
741 match_zeros_assert(5);
743 match_byte_assert(1);
745 match_byte_assert(0);
747 match_zeros_assert(11);
748 match_byte_assert(1);
749 match_zeros_assert(3);
751 match_byte_assert(0);
755 match_NavOleItem(void)
757 match_byte_assert(0x0e);
758 match_byte_assert(0x80);
765 match_byte_assert(2);
766 match_zeros_assert(8);
767 match_u32_assert(24);
771 match_byte_assert(4);
772 match_zeros_assert(2);
782 match_byte_assert(2);
783 match_zeros_assert(8);
784 match_u32_assert(24);
786 match_u32_assert(-40);
796 if (data[pos] == 0xff && data[pos + 1] == 0xff)
798 match_u16_assert(0xffff);
800 char *heading = get_string2();
801 if (!strcmp(heading, "DspCell"))
803 else if (!strcmp(heading, "DspNumber"))
805 else if (!strcmp(heading, "DspString"))
807 else if (!strcmp(heading, "NavHead"))
809 else if (!strcmp(heading, "IndexedCollection"))
810 match_zeros_assert(14);
811 else if (!strcmp(heading, "NavOleItem"))
813 else if (!strcmp(heading, "NavTitle"))
815 else if (!strcmp(heading, "NavNote"))
819 fprintf(stderr, "don't know %s at offset 0x%x: ", heading, start);
820 hex_dump(stderr, pos, 64);
824 else if (data[pos + 1] == 0x80)
826 if (data[pos] == 0x2a && data[pos + 1] == 0x80)
828 else if (data[pos] == 0x27 && data[pos + 1] == 0x80)
830 else if (data[pos] == 0x5 && data[pos + 1] == 0x80)
832 else if (data[pos] == 0x7 && data[pos + 1] == 0x80)
833 match_NavTreeViewItem();
834 else if (data[pos] == 0x3 && data[pos + 1] == 0x80)
835 match_DspSimpleText();
836 else if ((data[pos] == 0x3c || data[pos] == 0x39)
837 && data[pos + 1] == 0x80)
843 /* match_byte_assert(0x01);
844 match_byte_assert(0x02);
845 match_byte_assert(0x0d); */
847 else if (data[pos] == 0x15 && data[pos + 1] == 0x80)
851 match_byte_assert(2);
852 printf ("15 80(%f", get_double());
853 printf (" %s)\n", get_string1());
855 else if (data[pos] == 0x9 && data[pos + 1] == 0x80)
859 else if (data[pos] == 0xe)
863 fprintf (stderr, "bad record 0x%02x at offset %x: ",
865 hex_dump (stderr, pos, 64);
869 else if (match_byte(0xa))
871 match_zeros_assert(5);
877 fprintf (stderr, "bad record start at offset %x: ", pos);
878 hex_dump (stderr, pos, 64);
886 main(int argc, char *argv[])
888 bool print_offsets = false;
891 int c = getopt (argc, argv, "o");
898 print_offsets = true;
905 if (argc - optind != 1)
907 fprintf (stderr, "usage: %s FILE.bin", argv[0]);
911 const char *filename = argv[optind];
912 int fd = open(filename, O_RDONLY);
915 fprintf (stderr, "%s: open failed (%s)", filename, strerror (errno));
926 data = malloc(n + 256);
932 if (read(fd, data, n) != n)
937 for (int i = 0; i < 256; i++)
938 data[n + i] = i % 2 ? 0xaa : 0x55;
941 setvbuf (stdout, NULL, _IOLBF, 0);
943 match_byte_assert(4);
945 match_string1_assert("SPSS Output Document");
947 match_byte_assert(0x63);
949 parse_heading("NavRoot");
950 match_byte_assert(2);
951 match_zeros_assert(32);
953 parse_heading("DspSimpleText");
954 match_zeros_assert(10);
956 parse_heading("DspString");
959 parse_heading("NavTreeViewItem");
960 match_byte_assert(0);
963 match_byte_assert(2);
964 match_byte_assert(0);
965 match_byte_assert(1);
966 match_zeros_assert(9);
970 match_u32_assert(0x18);
972 match_u32_assert(0xffffffd8);
973 match_u32_assert(0xffffffde);
974 match_u32_assert(0x18);
976 match_u32_assert(0xffffffd8);
977 match_u32_assert(0x28);
978 match_u32_assert(0x28);
982 match_zeros_assert(5);
985 match_u32_assert(11000);
988 match_u32_assert(11000);
989 match_u32_assert(8500);
994 match_byte_assert(1);
999 get_string4(); /* page title */
1000 match_byte_assert(1);
1001 match_byte_assert(1);
1002 match_zeros_assert(3);
1003 get_string4(); /* page number */
1004 match_byte_assert(0);
1006 match_u16_assert(2);
1009 if (data[pos + 9] != 'L')
1011 parse_heading("NavLog");
1024 puts(get_padded_string(32));
1026 match_u32_assert(132);
1027 match_zeros_assert(8);
1028 match_u32_assert(1);
1029 printf ("0x%x\n", pos);
1031 match_byte_assert(0);
1033 parse_heading("NavHead");
1035 match_NavTreeViewItem();
1036 match_zeros_assert(3);
1038 parse_heading("NavTitle");
1040 match_DspSimpleText();
1042 match_NavTreeViewItem();
1044 match_byte_assert(1);
1045 match_byte_assert(1);
1046 match_u32_assert(-19);
1047 match_zeros_assert(12);
1048 match_byte_assert(0xbc);
1049 match_byte_assert(2);
1050 match_zeros_assert(9);
1051 match_byte_assert(0x22);
1052 puts(get_padded_string(32));
1053 match_u32_assert(80);
1054 match_zeros_assert(8);
1055 match_u32_assert(1);
1057 match_byte_assert(0);
1059 parse_heading("NavNote");
1060 match_byte_assert(2);
1061 match_zeros_assert(8);
1062 match_u32_assert(24);
1064 match_u32_assert(-40);
1066 match_u32_assert(2);
1067 match_u32_assert(1);
1068 match_DspSimpleText();
1070 match_NavTreeViewItem();
1071 match_byte_assert(1);
1073 parse_heading("PTPivotController");
1074 match_byte_assert(2);
1076 match_u32_assert(100);
1077 match_u32_assert(100);
1078 match_u32_assert(100);
1079 match_u32_assert(100);
1081 parse_heading("PVPivotView");
1082 match_u32_assert(5);
1083 match_byte_assert(0);
1085 parse_heading("PMPivotModel");
1086 match_byte_assert(3);
1088 parse_heading("NDimensional__DspCell");
1089 match_byte_assert(0);
1090 match_u32_assert(1);
1092 parse_heading("IndexedCollection");
1093 match_byte_assert(0);
1095 match_zeros_assert(3);
1096 match_byte_assert(1);
1097 match_byte_assert(0);
1098 match_zeros_assert(7);
1100 while (data[pos] != 1)
1108 match_byte_assert(1);
1109 match_byte_assert(0);
1110 puts(get_string1());
1112 match_u32_assert(2);
1113 puts(get_string1());
1115 match_byte_assert(0);
1116 match_byte_assert(1);
1117 match_byte_assert(0);
1118 match_byte_assert(0);
1119 match_byte_assert(0);
1120 match_byte_assert(1);
1121 match_byte_assert(0);
1125 parse_heading("PMPivotItemTree");
1126 match_byte_assert(0);
1128 parse_heading("AbstractTreeBranch");
1129 match_byte_assert(0);
1131 parse_heading("PMModelItemInfo");
1132 parse_PMModelItemInfo();
1133 match_DspSimpleText();
1136 match_u32_assert(7);
1137 match_PMPivotItemTree();
1139 match_u32_assert(0);
1140 match_PMPivotItemTree();
1142 match_u32_assert(0);
1143 match_PMPivotItemTree();
1145 match_u32_assert(6);
1146 match_PMPivotItemTree();
1148 match_u32_assert(0);
1149 match_PMPivotItemTree();
1151 match_u32_assert(0);
1152 match_PMPivotItemTree();
1154 match_u32_assert(0);
1155 match_PMPivotItemTree();
1157 match_u32_assert(0);
1158 match_PMPivotItemTree();
1160 match_u32_assert(0);
1161 match_PMPivotItemTree();
1163 match_u32_assert(0);
1164 match_PMPivotItemTree();
1166 match_u32_assert(2);
1167 match_PMPivotItemTree();
1169 match_u32_assert(0);
1170 match_PMPivotItemTree();
1172 match_u32_assert(0);
1173 match_PMPivotItemTree();
1175 match_u32_assert(0);
1176 match_PMPivotItemTree();
1178 match_u32_assert(0);
1179 match_PMPivotItemTree();
1181 match_u32_assert(2);
1182 match_PMPivotItemTree();
1184 match_u32_assert(0);
1185 match_PMPivotItemTree();
1187 match_u32_assert(0);
1191 while (data[pos] != 0xff || data[pos + 1] != 0xff)
1193 parse_heading("PVViewDimension");
1196 for (i = 0; data[pos + i] != 0xff || data[pos + i + 1] != 0xff; i++)
1198 hex_dump(stdout, pos, i);
1200 printf ("%#x: end of successful parse\n", pos);