14 #include "u8-mbtouc.h"
16 static const char *filename;
25 #define STR(x) XSTR(x)
26 #define WHERE __FILE__":" STR(__LINE__)
28 static void __attribute__((unused))
29 hex_dump(FILE *stream, int ofs, int n);
41 memcpy(&x, &data[pos], 4);
46 static unsigned long long int
50 memcpy(&x, &data[pos], 8);
59 x = (data[pos] << 24) | (data[pos + 1] << 16) | (data[pos + 2] << 8) | data[pos + 3];
68 memcpy(&x, &data[pos], 2);
77 memcpy(&x, &data[pos], 8);
82 static double __attribute__((unused))
86 memcpy(&x, &data[pos], 4);
101 match_u16(uint16_t x)
110 match_u32_assert(uint32_t x, const char *where)
112 unsigned int y = get_u32();
115 fprintf(stderr, "%s: 0x%x: expected i%u, got i%u\n", where, pos - 4, x, y);
119 #define match_u32_assert(x) match_u32_assert(x, WHERE)
122 match_u16_assert(uint16_t x, const char *where)
124 unsigned int y = get_u16();
127 fprintf(stderr, "%s: 0x%x: expected u16:%u, got u16:%u\n", where, pos - 2, x, y);
131 #define match_u16_assert(x) match_u16_assert(x, WHERE)
133 static bool __attribute__((unused))
134 match_u64(uint64_t x)
142 static void __attribute__((unused))
143 match_u64_assert(uint64_t x, const char *where)
145 unsigned long long int y = get_u64();
148 fprintf(stderr, "%s: 0x%x: expected u64:%lu, got u64:%llu\n", where, pos - 8, x, y);
152 #define match_u64_assert(x) match_u64_assert(x, WHERE)
154 static bool __attribute__((unused))
155 match_be32(uint32_t x)
164 match_be32_assert(uint32_t x, const char *where)
166 unsigned int y = get_be32();
169 fprintf(stderr, "%s: 0x%x: expected be%u, got be%u\n", where, pos - 4, x, y);
173 #define match_be32_assert(x) match_be32_assert(x, WHERE)
176 match_byte(uint8_t b)
178 if (pos < n && data[pos] == b)
188 match_byte_assert(uint8_t b, const char *where)
192 fprintf(stderr, "%s: 0x%x: expected %02x, got %02x: ", where, pos, b, data[pos]);
193 hex_dump(stderr, pos, 64);
197 #define match_byte_assert(b) match_byte_assert(b, WHERE)
200 match_bytes(int start, const int *bytes, size_t n_bytes)
202 for (size_t i = 0; i < n_bytes; i++)
203 if (bytes[i] >= 0 && data[start + i] != bytes[i])
209 xmemdup0(const void *p, size_t n)
211 char *s = malloc(n + 1);
222 match_byte_assert(1);
226 static bool __attribute__((unused))
229 return (p >= ' ' && p < 127) || p == '\r' || p == '\n' || p == '\t';
233 count_zeros(const uint8_t *p)
241 static bool __attribute__((unused))
242 all_utf8(const char *p_, size_t len)
244 const uint8_t *p = (const uint8_t *) p_;
245 for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
249 mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
250 if ((uc < 32 && uc != '\n') || uc == 127 || uc == 0xfffd)
259 int len = data[pos] + data[pos + 1] * 256;
260 char *s = xmemdup0(&data[pos + 2], len);
268 int len = data[pos++];
270 return get_string2();
273 char *s = xmemdup0(&data[pos], len);
280 match_string1_assert(const char *exp, const char *where)
283 char *act = get_string1();
284 if (strcmp(act, exp))
286 fprintf(stderr, "%s: 0x%x: expected \"%s\", got \"%s\"\n",
287 where, start, exp, act);
291 #define match_string1_assert(x) match_string1_assert(x, WHERE)
294 match_string2_assert(const char *exp, const char *where)
297 char *act = get_string2();
298 if (strcmp(act, exp))
300 fprintf(stderr, "%s: 0x%x: expected \"%s\", got \"%s\"\n",
301 where, start, exp, act);
305 #define match_string2_assert(x) match_string2_assert(x, WHERE)
308 get_string4(const char *where)
311 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
312 /*&& all_ascii(&data[pos + 4], data[pos])*/)
314 assert(data[pos + 3] == 0);
315 int len = data[pos] + data[pos + 1] * 256 + data[pos + 2] * 65536;
316 char *s = malloc(len + 1);
318 memcpy(s, &data[pos + 4], len);
325 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
329 #define get_string4() get_string4(WHERE)
332 get_padded_string(int len)
334 char *s = xmemdup0(&data[pos], len);
340 get_string_be(const char *where)
343 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
344 /*&& all_ascii(&data[pos + 4], data[pos])*/)
346 int len = data[pos + 2] * 256 + data[pos + 3];
347 char *s = malloc(len + 1);
349 memcpy(s, &data[pos + 4], len);
356 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
360 #define get_string_be() get_string_be(WHERE)
369 static void __attribute__((unused))
370 hex_dump(FILE *stream, int ofs, int n)
373 for (int i = 0; i < n; i++)
375 int c = data[ofs + i];
376 n_ascii += is_ascii(c);
377 fprintf(stream, " %02x", c);
382 for (int i = 0; i < n; i++)
384 int c = data[ofs + i];
385 putc(c >= 32 && c < 127 ? c : '.', stream);
391 static void __attribute__((unused))
392 char_dump(FILE *stream, int ofs, int n)
394 for (int i = 0; i < n; i++)
396 int c = data[ofs + i];
397 putc(c >= 32 && c < 127 ? c : '.', stream);
404 compare_int(const void *a_, const void *b_)
408 return *a < *b ? -1 : *a > *b;
413 format_name (int format, char *buf)
418 case 2: return "AHEX";
419 case 3: return "COMMA";
420 case 4: return "DOLLAR";
421 case 5: case 40: return "F";
423 case 7: return "PIBHEX";
425 case 9: return "PIB";
426 case 10: return "PK";
427 case 11: return "RB";
428 case 12: return "RBHEX";
432 case 20: return "DATE";
433 case 21: return "TIME";
434 case 22: return "DATETIME";
435 case 23: return "ADATE";
436 case 24: return "JDATE";
437 case 25: return "DTIME";
438 case 26: return "WKDAY";
439 case 27: return "MONTH";
440 case 28: return "MOYR";
441 case 29: return "QYR";
442 case 30: return "WKYR";
443 case 31: return "PCT";
444 case 32: return "DOT";
445 case 33: return "CCA";
446 case 34: return "CCB";
447 case 35: return "CCC";
448 case 36: return "CCD";
449 case 37: return "CCE";
450 case 38: return "EDATE";
451 case 39: return "SDATE";
452 default: sprintf(buf, "(%d)", format); return buf;
461 int fmt = data[pos++];
463 printf ("%s%d.%d", format_name(fmt, buf), w, d);
467 parse_heading(const char *name)
469 match_u16_assert(0xffff);
471 match_string2_assert(name);
475 match_zeros_assert(int count, const char *where)
477 for (int i = 0; i < count; i++)
481 "%s: %#x: expected %d zeros here but offset %d is %#"PRIx8": ",
482 where, pos, count, i, data[pos + i]);
483 hex_dump (stderr, pos, 64);
488 #define match_zeros_assert(count) match_zeros_assert(count, WHERE)
491 put_safe(const char *s)
499 else if (*s < 0x20 || *s > 0x7e)
500 printf ("\\x%02"PRIx8, (uint8_t) *s);
507 static void parse_flexible(void);
510 parse_DspString(void)
512 printf("%#x: DspString(", pos);
515 printf("%f, \"", get_double());
516 printf("%s\")\n", get_string1());
520 match_byte_assert(1);
523 match_byte_assert(0);
524 match_byte_assert(1);
525 put_safe(get_string1());
531 match_DspString(void)
533 match_byte_assert(5);
534 match_byte_assert(0x80);
539 match_DspSimpleText(void)
541 match_byte_assert(3);
542 match_byte_assert(0x80);
543 match_byte_assert(0);
546 match_zeros_assert(3);
547 if (!match_byte(0x10))
548 match_byte_assert(0);
549 match_zeros_assert(4);
554 parse_weirdness(void)
556 match_byte_assert(1);
558 match_zeros_assert(12);
559 match_byte_assert(0x90);
560 match_byte_assert(1);
561 match_zeros_assert(5);
563 match_zeros_assert(3);
564 puts(get_padded_string(32));
568 match_NavTreeViewItem(void)
570 match_byte_assert(7);
571 match_byte_assert(0x80);
572 match_zeros_assert(1);
573 if (!match_byte(0) && !match_byte(7) && !match_byte(2))
574 match_byte_assert(8);
575 match_zeros_assert(3);
577 match_byte_assert(0);
578 match_byte_assert(1);
579 match_byte_assert(0);
582 match_zeros_assert(7);
584 match_byte_assert(1);
585 match_zeros_assert(5);
590 match_byte_assert(0);
592 match_zeros_assert(11);
593 match_byte_assert(1);
594 match_zeros_assert(3);
596 match_byte_assert(0);
599 match_zeros_assert(2);
601 match_u32_assert(11000);
604 match_u32_assert(11000);
605 match_u32_assert(8500);
610 match_byte_assert(1);
615 get_string4(); /* page title */
616 match_byte_assert(1);
617 match_byte_assert(1);
618 match_zeros_assert(3);
619 get_string4(); /* page number */
620 match_byte_assert(0);
627 match_zeros_assert(3);
629 //fprintf(stderr, "%#x ", pos - 16);
633 parse_DspNumber(void)
635 match_byte_assert(1);
636 printf("DspNumber(");
638 match_byte_assert(0x80);
640 printf (" %f", get_double());
641 printf (" \"%s\")\n", get_string1());
645 match_DspNumber(void)
647 match_byte_assert(0x2a);
648 match_byte_assert(0x80);
655 match_byte_assert(0);
656 match_DspSimpleText();
657 parse_flexible(); /* DspString or DspNumber. */
663 match_byte_assert(0x27);
664 match_byte_assert(0x80);
671 match_byte_assert(2);
678 match_byte_assert(9);
679 match_byte_assert(0x80);
684 parse_PMModelItemInfo(void)
686 match_byte_assert(0);
687 pos += 1; /* Counter */
688 match_zeros_assert(7);
691 match_byte_assert(0xe);
692 match_byte_assert(0);
696 match_PMModelItemInfo(void)
698 match_byte_assert(0x54);
699 match_byte_assert(0x80);
700 parse_PMModelItemInfo();
701 match_DspSimpleText();
706 match_PMPivotItemTree(void)
708 match_byte_assert(0x52);
709 match_byte_assert(0x80);
710 match_byte_assert(0);
711 match_PMModelItemInfo();
717 match_byte_assert(2);
718 match_zeros_assert(24);
719 match_byte_assert(1);
720 match_zeros_assert(7);
721 match_DspSimpleText();
726 parse_NavOleItem(void)
728 match_byte_assert(0);
729 match_byte_assert(1);
730 match_zeros_assert(2);
732 match_zeros_assert(9);
733 match_byte_assert(1);
734 match_zeros_assert(10);
735 match_byte_assert(1);
736 match_zeros_assert(6);
737 match_byte_assert(1);
739 match_byte_assert(0);
741 match_zeros_assert(11);
742 match_byte_assert(1);
743 match_zeros_assert(3);
745 match_byte_assert(0);
752 if (data[pos] == 0xff && data[pos + 1] == 0xff)
754 match_u16_assert(0xffff);
756 char *heading = get_string2();
757 if (!strcmp(heading, "DspCell"))
759 else if (!strcmp(heading, "DspNumber"))
761 else if (!strcmp(heading, "DspString"))
763 else if (!strcmp(heading, "NavHead"))
765 else if (!strcmp(heading, "IndexedCollection"))
766 match_zeros_assert(14);
767 else if (!strcmp(heading, "NavOleItem"))
771 fprintf(stderr, "don't know %s at offset 0x%x: ", heading, start);
772 hex_dump(stderr, pos, 64);
776 else if (data[pos + 1] == 0x80)
778 if (data[pos] == 0x2a && data[pos + 1] == 0x80)
780 else if (data[pos] == 0x27 && data[pos + 1] == 0x80)
782 else if (data[pos] == 0x5 && data[pos + 1] == 0x80)
784 else if (data[pos] == 0x7 && data[pos + 1] == 0x80)
785 match_NavTreeViewItem();
786 else if (data[pos] == 0x3 && data[pos + 1] == 0x80)
787 match_DspSimpleText();
788 else if ((data[pos] == 0x3c || data[pos] == 0x39)
789 && data[pos + 1] == 0x80)
795 /* match_byte_assert(0x01);
796 match_byte_assert(0x02);
797 match_byte_assert(0x0d); */
799 else if (data[pos] == 0x15 && data[pos + 1] == 0x80)
803 match_byte_assert(2);
804 printf ("15 80(%f", get_double());
805 printf (" %s)\n", get_string1());
807 else if (data[pos] == 0x9 && data[pos + 1] == 0x80)
813 fprintf (stderr, "bad record 0x%02x at offset %x\n",
815 hex_dump (stderr, pos, 64);
819 else if (match_byte(0xa))
821 match_zeros_assert(5);
827 fprintf (stderr, "bad record start at offset %x: ", pos);
828 hex_dump (stderr, pos, 64);
836 main(int argc, char *argv[])
838 bool print_offsets = false;
841 int c = getopt (argc, argv, "o");
848 print_offsets = true;
855 if (argc - optind != 1)
857 fprintf (stderr, "usage: %s FILE.bin", argv[0]);
861 const char *filename = argv[optind];
862 int fd = open(filename, O_RDONLY);
865 fprintf (stderr, "%s: open failed (%s)", filename, strerror (errno));
876 data = malloc(n + 256);
882 if (read(fd, data, n) != n)
887 for (int i = 0; i < 256; i++)
888 data[n + i] = i % 2 ? 0xaa : 0x55;
891 setvbuf (stdout, NULL, _IOLBF, 0);
893 match_byte_assert(4);
895 match_string1_assert("SPSS Output Document");
897 match_byte_assert(0x63);
899 parse_heading("NavRoot");
900 match_byte_assert(2);
901 match_zeros_assert(32);
903 parse_heading("DspSimpleText");
904 match_zeros_assert(10);
906 parse_heading("DspString");
909 parse_heading("NavTreeViewItem");
910 match_byte_assert(0);
913 match_byte_assert(2);
914 match_byte_assert(0);
915 match_byte_assert(1);
916 match_zeros_assert(9);
920 match_u32_assert(0x18);
922 match_u32_assert(0xffffffd8);
923 match_u32_assert(0xffffffde);
924 match_u32_assert(0x18);
926 match_u32_assert(0xffffffd8);
927 match_u32_assert(0x28);
928 match_u32_assert(0x28);
932 match_zeros_assert(5);
935 match_u32_assert(11000);
938 match_u32_assert(11000);
939 match_u32_assert(8500);
944 match_byte_assert(1);
949 get_string4(); /* page title */
950 match_byte_assert(1);
951 match_byte_assert(1);
952 match_zeros_assert(3);
953 get_string4(); /* page number */
954 match_byte_assert(0);
959 if (data[pos + 9] != 'L')
961 parse_heading("NavLog");
968 puts(get_padded_string(32));
970 match_u32_assert(132);
971 match_zeros_assert(8);
973 printf ("0x%x\n", pos);
975 match_byte_assert(0);
977 parse_heading("NavHead");
979 match_NavTreeViewItem();
980 match_zeros_assert(3);
982 parse_heading("NavTitle");
984 match_DspSimpleText();
986 match_NavTreeViewItem();
988 match_byte_assert(1);
989 match_byte_assert(1);
990 match_u32_assert(-19);
991 match_zeros_assert(12);
992 match_byte_assert(0xbc);
993 match_byte_assert(2);
994 match_zeros_assert(9);
995 match_byte_assert(0x22);
996 puts(get_padded_string(32));
997 match_u32_assert(80);
998 match_zeros_assert(8);
1001 match_byte_assert(0);
1003 parse_heading("NavNote");
1004 match_byte_assert(2);
1005 match_zeros_assert(8);
1006 match_u32_assert(24);
1008 match_u32_assert(-40);
1010 match_u32_assert(2);
1011 match_u32_assert(1);
1012 match_DspSimpleText();
1014 match_NavTreeViewItem();
1015 match_byte_assert(1);
1017 parse_heading("PTPivotController");
1018 match_byte_assert(2);
1020 match_u32_assert(100);
1021 match_u32_assert(100);
1022 match_u32_assert(100);
1023 match_u32_assert(100);
1025 parse_heading("PVPivotView");
1026 match_u32_assert(5);
1027 match_byte_assert(0);
1029 parse_heading("PMPivotModel");
1030 match_byte_assert(3);
1032 parse_heading("NDimensional__DspCell");
1033 match_byte_assert(0);
1034 match_u32_assert(1);
1036 parse_heading("IndexedCollection");
1037 match_byte_assert(0);
1039 match_zeros_assert(3);
1040 match_byte_assert(1);
1041 match_byte_assert(0);
1042 match_zeros_assert(7);
1044 while (data[pos] != 1)
1052 match_byte_assert(1);
1053 match_byte_assert(0);
1054 puts(get_string1());
1056 match_u32_assert(2);
1057 puts(get_string1());
1059 match_byte_assert(0);
1060 match_byte_assert(1);
1061 match_byte_assert(0);
1062 match_byte_assert(0);
1063 match_byte_assert(0);
1064 match_byte_assert(1);
1065 match_byte_assert(0);
1069 parse_heading("PMPivotItemTree");
1070 match_byte_assert(0);
1072 parse_heading("AbstractTreeBranch");
1073 match_byte_assert(0);
1075 parse_heading("PMModelItemInfo");
1076 parse_PMModelItemInfo();
1077 match_DspSimpleText();
1080 match_u32_assert(7);
1081 match_PMPivotItemTree();
1083 match_u32_assert(0);
1084 match_PMPivotItemTree();
1086 match_u32_assert(0);
1087 match_PMPivotItemTree();
1089 match_u32_assert(6);
1090 match_PMPivotItemTree();
1092 match_u32_assert(0);
1093 match_PMPivotItemTree();
1095 match_u32_assert(0);
1096 match_PMPivotItemTree();
1098 match_u32_assert(0);
1099 match_PMPivotItemTree();
1101 match_u32_assert(0);
1102 match_PMPivotItemTree();
1104 match_u32_assert(0);
1105 match_PMPivotItemTree();
1107 match_u32_assert(0);
1108 match_PMPivotItemTree();
1110 match_u32_assert(2);
1111 match_PMPivotItemTree();
1113 match_u32_assert(0);
1114 match_PMPivotItemTree();
1116 match_u32_assert(0);
1117 match_PMPivotItemTree();
1119 match_u32_assert(0);
1120 match_PMPivotItemTree();
1122 match_u32_assert(0);
1123 match_PMPivotItemTree();
1125 match_u32_assert(2);
1126 match_PMPivotItemTree();
1128 match_u32_assert(0);
1129 match_PMPivotItemTree();
1131 match_u32_assert(0);
1135 while (data[pos] != 0xff || data[pos + 1] != 0xff)
1137 parse_heading("PVViewDimension");
1140 for (i = 0; data[pos + i] != 0xff || data[pos + i + 1] != 0xff; i++)
1142 hex_dump(stdout, pos, i);
1144 printf ("%#x: end of successful parse\n", pos);