14 #include "u8-mbtouc.h"
16 static const char *filename;
25 #define STR(x) XSTR(x)
26 #define WHERE __FILE__":" STR(__LINE__)
28 static void __attribute__((unused))
29 hex_dump(FILE *stream, int ofs, int n);
41 memcpy(&x, &data[pos], 4);
46 static unsigned long long int
50 memcpy(&x, &data[pos], 8);
59 x = (data[pos] << 24) | (data[pos + 1] << 16) | (data[pos + 2] << 8) | data[pos + 3];
68 memcpy(&x, &data[pos], 2);
77 memcpy(&x, &data[pos], 8);
82 static double __attribute__((unused))
86 memcpy(&x, &data[pos], 4);
101 match_u16(uint16_t x)
110 match_u32_assert(uint32_t x, const char *where)
112 unsigned int y = get_u32();
115 fprintf(stderr, "%s: 0x%x: expected i%u, got i%u\n", where, pos - 4, x, y);
119 #define match_u32_assert(x) match_u32_assert(x, WHERE)
122 match_u16_assert(uint16_t x, const char *where)
124 unsigned int y = get_u16();
127 fprintf(stderr, "%s: 0x%x: expected u16:%u, got u16:%u\n", where, pos - 2, x, y);
131 #define match_u16_assert(x) match_u16_assert(x, WHERE)
133 static bool __attribute__((unused))
134 match_u64(uint64_t x)
142 static void __attribute__((unused))
143 match_u64_assert(uint64_t x, const char *where)
145 unsigned long long int y = get_u64();
148 fprintf(stderr, "%s: 0x%x: expected u64:%lu, got u64:%llu\n", where, pos - 8, x, y);
152 #define match_u64_assert(x) match_u64_assert(x, WHERE)
154 static bool __attribute__((unused))
155 match_be32(uint32_t x)
164 match_be32_assert(uint32_t x, const char *where)
166 unsigned int y = get_be32();
169 fprintf(stderr, "%s: 0x%x: expected be%u, got be%u\n", where, pos - 4, x, y);
173 #define match_be32_assert(x) match_be32_assert(x, WHERE)
176 match_byte(uint8_t b)
178 if (pos < n && data[pos] == b)
188 match_byte_assert(uint8_t b, const char *where)
192 fprintf(stderr, "%s: 0x%x: expected %02x, got %02x: ", where, pos, b, data[pos]);
193 hex_dump(stderr, pos, 64);
197 #define match_byte_assert(b) match_byte_assert(b, WHERE)
200 match_bytes(int start, const int *bytes, size_t n_bytes)
202 for (size_t i = 0; i < n_bytes; i++)
203 if (bytes[i] >= 0 && data[start + i] != bytes[i])
209 xmemdup0(const void *p, size_t n)
211 char *s = malloc(n + 1);
222 match_byte_assert(1);
226 static bool __attribute__((unused))
229 return (p >= ' ' && p < 127) || p == '\r' || p == '\n' || p == '\t';
233 count_zeros(const uint8_t *p)
241 static bool __attribute__((unused))
242 all_utf8(const char *p_, size_t len)
244 const uint8_t *p = (const uint8_t *) p_;
245 for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
249 mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
250 if ((uc < 32 && uc != '\n') || uc == 127 || uc == 0xfffd)
259 int len = data[pos] + data[pos + 1] * 256;
260 char *s = xmemdup0(&data[pos + 2], len);
268 int len = data[pos++];
270 return get_string2();
273 char *s = xmemdup0(&data[pos], len);
280 match_string1_assert(const char *exp, const char *where)
283 char *act = get_string1();
284 if (strcmp(act, exp))
286 fprintf(stderr, "%s: 0x%x: expected \"%s\", got \"%s\"\n",
287 where, start, exp, act);
291 #define match_string1_assert(x) match_string1_assert(x, WHERE)
294 match_string2_assert(const char *exp, const char *where)
297 char *act = get_string2();
298 if (strcmp(act, exp))
300 fprintf(stderr, "%s: 0x%x: expected \"%s\", got \"%s\"\n",
301 where, start, exp, act);
305 #define match_string2_assert(x) match_string2_assert(x, WHERE)
308 get_string4(const char *where)
311 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
312 /*&& all_ascii(&data[pos + 4], data[pos])*/)
314 assert(data[pos + 3] == 0);
315 int len = data[pos] + data[pos + 1] * 256 + data[pos + 2] * 65536;
316 char *s = malloc(len + 1);
318 memcpy(s, &data[pos + 4], len);
325 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
329 #define get_string4() get_string4(WHERE)
332 get_padded_string(int len)
334 char *s = xmemdup0(&data[pos], len);
340 get_string_be(const char *where)
343 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
344 /*&& all_ascii(&data[pos + 4], data[pos])*/)
346 int len = data[pos + 2] * 256 + data[pos + 3];
347 char *s = malloc(len + 1);
349 memcpy(s, &data[pos + 4], len);
356 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
360 #define get_string_be() get_string_be(WHERE)
369 static void __attribute__((unused))
370 hex_dump(FILE *stream, int ofs, int n)
373 for (int i = 0; i < n; i++)
375 int c = data[ofs + i];
376 n_ascii += is_ascii(c);
377 fprintf(stream, " %02x", c);
382 for (int i = 0; i < n; i++)
384 int c = data[ofs + i];
385 putc(c >= 32 && c < 127 ? c : '.', stream);
391 static void __attribute__((unused))
392 char_dump(FILE *stream, int ofs, int n)
394 for (int i = 0; i < n; i++)
396 int c = data[ofs + i];
397 putc(c >= 32 && c < 127 ? c : '.', stream);
404 compare_int(const void *a_, const void *b_)
408 return *a < *b ? -1 : *a > *b;
413 format_name (int format, char *buf)
418 case 2: return "AHEX";
419 case 3: return "COMMA";
420 case 4: return "DOLLAR";
421 case 5: case 40: return "F";
423 case 7: return "PIBHEX";
425 case 9: return "PIB";
426 case 10: return "PK";
427 case 11: return "RB";
428 case 12: return "RBHEX";
432 case 20: return "DATE";
433 case 21: return "TIME";
434 case 22: return "DATETIME";
435 case 23: return "ADATE";
436 case 24: return "JDATE";
437 case 25: return "DTIME";
438 case 26: return "WKDAY";
439 case 27: return "MONTH";
440 case 28: return "MOYR";
441 case 29: return "QYR";
442 case 30: return "WKYR";
443 case 31: return "PCT";
444 case 32: return "DOT";
445 case 33: return "CCA";
446 case 34: return "CCB";
447 case 35: return "CCC";
448 case 36: return "CCD";
449 case 37: return "CCE";
450 case 38: return "EDATE";
451 case 39: return "SDATE";
452 default: sprintf(buf, "(%d)", format); return buf;
461 int fmt = data[pos++];
463 printf ("%s%d.%d", format_name(fmt, buf), w, d);
467 parse_heading(const char *name)
469 match_u16_assert(0xffff);
471 match_string2_assert(name);
475 match_zeros_assert(int count, const char *where)
477 for (int i = 0; i < count; i++)
481 "%s: %#x: expected %d zeros here but offset %d is %#"PRIx8": ",
482 where, pos, count, i, data[pos + i]);
483 hex_dump (stderr, pos, 64);
488 #define match_zeros_assert(count) match_zeros_assert(count, WHERE)
491 put_safe(const char *s)
499 else if (*s < 0x20 || *s > 0x7e)
500 printf ("\\x%02"PRIx8, (uint8_t) *s);
507 static void parse_flexible(void);
510 parse_DspString(void)
512 printf("%#x: DspString(", pos);
515 printf("%f, \"", get_double());
516 printf("%s\")\n", get_string1());
520 match_byte_assert(1);
523 match_byte_assert(0);
524 match_byte_assert(1);
525 put_safe(get_string1());
531 match_DspString(void)
533 match_byte_assert(5);
534 match_byte_assert(0x80);
539 match_DspSimpleText(void)
541 match_byte_assert(3);
542 match_byte_assert(0x80);
543 match_byte_assert(0);
546 match_zeros_assert(3);
547 if (!match_byte(0x10))
548 match_byte_assert(0);
549 match_zeros_assert(4);
554 parse_weirdness(void)
556 match_byte_assert(1);
558 match_zeros_assert(12);
559 match_byte_assert(0x90);
560 match_byte_assert(1);
561 match_zeros_assert(5);
563 match_zeros_assert(3);
564 puts(get_padded_string(32));
568 match_NavTreeViewItem(void)
570 match_byte_assert(7);
571 match_byte_assert(0x80);
572 match_zeros_assert(1);
573 if (!match_byte(0) && !match_byte(7) && !match_byte(2))
574 match_byte_assert(8);
575 match_zeros_assert(3);
577 match_byte_assert(0);
578 match_byte_assert(1);
579 match_byte_assert(0);
582 match_zeros_assert(7);
584 match_byte_assert(1);
585 match_zeros_assert(5);
590 match_byte_assert(0);
592 match_zeros_assert(11);
593 match_byte_assert(1);
594 match_zeros_assert(3);
596 match_byte_assert(0);
599 match_zeros_assert(2);
601 match_u32_assert(11000);
604 match_u32_assert(11000);
605 match_u32_assert(8500);
610 match_byte_assert(1);
615 get_string4(); /* page title */
616 match_byte_assert(1);
617 match_byte_assert(1);
618 match_zeros_assert(3);
619 get_string4(); /* page number */
620 match_byte_assert(0);
627 match_zeros_assert(3);
629 //fprintf(stderr, "%#x ", pos - 16);
633 parse_DspNumber(void)
635 match_byte_assert(1);
636 printf("DspNumber(");
638 match_byte_assert(0x80);
640 printf (" %f", get_double());
641 printf (" \"%s\")\n", get_string1());
645 match_DspNumber(void)
647 match_byte_assert(0x2a);
648 match_byte_assert(0x80);
655 match_byte_assert(0);
656 match_DspSimpleText();
657 parse_flexible(); /* DspString or DspNumber. */
663 match_byte_assert(0x27);
664 match_byte_assert(0x80);
671 match_byte_assert(2);
678 match_byte_assert(9);
679 match_byte_assert(0x80);
684 parse_PMModelItemInfo(void)
686 match_byte_assert(0);
687 pos += 1; /* Counter */
688 match_zeros_assert(7);
691 match_byte_assert(0xe);
692 match_byte_assert(0);
696 match_PMModelItemInfo(void)
698 match_byte_assert(0x54);
699 match_byte_assert(0x80);
700 parse_PMModelItemInfo();
701 match_DspSimpleText();
706 match_PMPivotItemTree(void)
708 match_byte_assert(0x52);
709 match_byte_assert(0x80);
710 match_byte_assert(0);
711 match_PMModelItemInfo();
717 match_byte_assert(2);
718 match_zeros_assert(24);
719 match_byte_assert(1);
720 match_zeros_assert(3);
722 match_byte_assert(0);
723 match_zeros_assert(3);
724 match_DspSimpleText();
729 parse_NavOleItem(void)
731 match_byte_assert(0);
732 match_byte_assert(1);
733 match_zeros_assert(2);
735 match_zeros_assert(9);
736 match_byte_assert(1);
737 match_zeros_assert(10);
738 match_byte_assert(1);
739 match_zeros_assert(6);
740 match_byte_assert(1);
742 match_byte_assert(0);
744 match_zeros_assert(11);
745 match_byte_assert(1);
746 match_zeros_assert(3);
748 match_byte_assert(0);
754 match_byte_assert(2);
755 match_zeros_assert(8);
756 match_u32_assert(24);
759 match_byte_assert(4);
760 match_zeros_assert(2);
770 if (data[pos] == 0xff && data[pos + 1] == 0xff)
772 match_u16_assert(0xffff);
774 char *heading = get_string2();
775 if (!strcmp(heading, "DspCell"))
777 else if (!strcmp(heading, "DspNumber"))
779 else if (!strcmp(heading, "DspString"))
781 else if (!strcmp(heading, "NavHead"))
783 else if (!strcmp(heading, "IndexedCollection"))
784 match_zeros_assert(14);
785 else if (!strcmp(heading, "NavOleItem"))
787 else if (!strcmp(heading, "NavTitle"))
791 fprintf(stderr, "don't know %s at offset 0x%x: ", heading, start);
792 hex_dump(stderr, pos, 64);
796 else if (data[pos + 1] == 0x80)
798 if (data[pos] == 0x2a && data[pos + 1] == 0x80)
800 else if (data[pos] == 0x27 && data[pos + 1] == 0x80)
802 else if (data[pos] == 0x5 && data[pos + 1] == 0x80)
804 else if (data[pos] == 0x7 && data[pos + 1] == 0x80)
805 match_NavTreeViewItem();
806 else if (data[pos] == 0x3 && data[pos + 1] == 0x80)
807 match_DspSimpleText();
808 else if ((data[pos] == 0x3c || data[pos] == 0x39)
809 && data[pos + 1] == 0x80)
815 /* match_byte_assert(0x01);
816 match_byte_assert(0x02);
817 match_byte_assert(0x0d); */
819 else if (data[pos] == 0x15 && data[pos + 1] == 0x80)
823 match_byte_assert(2);
824 printf ("15 80(%f", get_double());
825 printf (" %s)\n", get_string1());
827 else if (data[pos] == 0x9 && data[pos + 1] == 0x80)
833 fprintf (stderr, "bad record 0x%02x at offset %x\n",
835 hex_dump (stderr, pos, 64);
839 else if (match_byte(0xa))
841 match_zeros_assert(5);
847 fprintf (stderr, "bad record start at offset %x: ", pos);
848 hex_dump (stderr, pos, 64);
856 main(int argc, char *argv[])
858 bool print_offsets = false;
861 int c = getopt (argc, argv, "o");
868 print_offsets = true;
875 if (argc - optind != 1)
877 fprintf (stderr, "usage: %s FILE.bin", argv[0]);
881 const char *filename = argv[optind];
882 int fd = open(filename, O_RDONLY);
885 fprintf (stderr, "%s: open failed (%s)", filename, strerror (errno));
896 data = malloc(n + 256);
902 if (read(fd, data, n) != n)
907 for (int i = 0; i < 256; i++)
908 data[n + i] = i % 2 ? 0xaa : 0x55;
911 setvbuf (stdout, NULL, _IOLBF, 0);
913 match_byte_assert(4);
915 match_string1_assert("SPSS Output Document");
917 match_byte_assert(0x63);
919 parse_heading("NavRoot");
920 match_byte_assert(2);
921 match_zeros_assert(32);
923 parse_heading("DspSimpleText");
924 match_zeros_assert(10);
926 parse_heading("DspString");
929 parse_heading("NavTreeViewItem");
930 match_byte_assert(0);
933 match_byte_assert(2);
934 match_byte_assert(0);
935 match_byte_assert(1);
936 match_zeros_assert(9);
940 match_u32_assert(0x18);
942 match_u32_assert(0xffffffd8);
943 match_u32_assert(0xffffffde);
944 match_u32_assert(0x18);
946 match_u32_assert(0xffffffd8);
947 match_u32_assert(0x28);
948 match_u32_assert(0x28);
952 match_zeros_assert(5);
955 match_u32_assert(11000);
958 match_u32_assert(11000);
959 match_u32_assert(8500);
964 match_byte_assert(1);
969 get_string4(); /* page title */
970 match_byte_assert(1);
971 match_byte_assert(1);
972 match_zeros_assert(3);
973 get_string4(); /* page number */
974 match_byte_assert(0);
979 if (data[pos + 9] != 'L')
981 parse_heading("NavLog");
990 puts(get_padded_string(32));
992 match_u32_assert(132);
993 match_zeros_assert(8);
995 printf ("0x%x\n", pos);
997 match_byte_assert(0);
999 parse_heading("NavHead");
1001 match_NavTreeViewItem();
1002 match_zeros_assert(3);
1004 parse_heading("NavTitle");
1006 match_DspSimpleText();
1008 match_NavTreeViewItem();
1010 match_byte_assert(1);
1011 match_byte_assert(1);
1012 match_u32_assert(-19);
1013 match_zeros_assert(12);
1014 match_byte_assert(0xbc);
1015 match_byte_assert(2);
1016 match_zeros_assert(9);
1017 match_byte_assert(0x22);
1018 puts(get_padded_string(32));
1019 match_u32_assert(80);
1020 match_zeros_assert(8);
1021 match_u32_assert(1);
1023 match_byte_assert(0);
1025 parse_heading("NavNote");
1026 match_byte_assert(2);
1027 match_zeros_assert(8);
1028 match_u32_assert(24);
1030 match_u32_assert(-40);
1032 match_u32_assert(2);
1033 match_u32_assert(1);
1034 match_DspSimpleText();
1036 match_NavTreeViewItem();
1037 match_byte_assert(1);
1039 parse_heading("PTPivotController");
1040 match_byte_assert(2);
1042 match_u32_assert(100);
1043 match_u32_assert(100);
1044 match_u32_assert(100);
1045 match_u32_assert(100);
1047 parse_heading("PVPivotView");
1048 match_u32_assert(5);
1049 match_byte_assert(0);
1051 parse_heading("PMPivotModel");
1052 match_byte_assert(3);
1054 parse_heading("NDimensional__DspCell");
1055 match_byte_assert(0);
1056 match_u32_assert(1);
1058 parse_heading("IndexedCollection");
1059 match_byte_assert(0);
1061 match_zeros_assert(3);
1062 match_byte_assert(1);
1063 match_byte_assert(0);
1064 match_zeros_assert(7);
1066 while (data[pos] != 1)
1074 match_byte_assert(1);
1075 match_byte_assert(0);
1076 puts(get_string1());
1078 match_u32_assert(2);
1079 puts(get_string1());
1081 match_byte_assert(0);
1082 match_byte_assert(1);
1083 match_byte_assert(0);
1084 match_byte_assert(0);
1085 match_byte_assert(0);
1086 match_byte_assert(1);
1087 match_byte_assert(0);
1091 parse_heading("PMPivotItemTree");
1092 match_byte_assert(0);
1094 parse_heading("AbstractTreeBranch");
1095 match_byte_assert(0);
1097 parse_heading("PMModelItemInfo");
1098 parse_PMModelItemInfo();
1099 match_DspSimpleText();
1102 match_u32_assert(7);
1103 match_PMPivotItemTree();
1105 match_u32_assert(0);
1106 match_PMPivotItemTree();
1108 match_u32_assert(0);
1109 match_PMPivotItemTree();
1111 match_u32_assert(6);
1112 match_PMPivotItemTree();
1114 match_u32_assert(0);
1115 match_PMPivotItemTree();
1117 match_u32_assert(0);
1118 match_PMPivotItemTree();
1120 match_u32_assert(0);
1121 match_PMPivotItemTree();
1123 match_u32_assert(0);
1124 match_PMPivotItemTree();
1126 match_u32_assert(0);
1127 match_PMPivotItemTree();
1129 match_u32_assert(0);
1130 match_PMPivotItemTree();
1132 match_u32_assert(2);
1133 match_PMPivotItemTree();
1135 match_u32_assert(0);
1136 match_PMPivotItemTree();
1138 match_u32_assert(0);
1139 match_PMPivotItemTree();
1141 match_u32_assert(0);
1142 match_PMPivotItemTree();
1144 match_u32_assert(0);
1145 match_PMPivotItemTree();
1147 match_u32_assert(2);
1148 match_PMPivotItemTree();
1150 match_u32_assert(0);
1151 match_PMPivotItemTree();
1153 match_u32_assert(0);
1157 while (data[pos] != 0xff || data[pos + 1] != 0xff)
1159 parse_heading("PVViewDimension");
1162 for (i = 0; data[pos + i] != 0xff || data[pos + i + 1] != 0xff; i++)
1164 hex_dump(stdout, pos, i);
1166 printf ("%#x: end of successful parse\n", pos);