13 #include "u8-mbtouc.h"
15 static const char *filename;
24 #define STR(x) XSTR(x)
25 #define WHERE __FILE__":" STR(__LINE__)
37 memcpy(&x, &data[pos], 4);
42 static unsigned long long int
46 memcpy(&x, &data[pos], 8);
55 x = (data[pos] << 24) | (data[pos + 1] << 16) | (data[pos + 2] << 8) | data[pos + 3];
64 memcpy(&x, &data[pos], 2);
73 memcpy(&x, &data[pos], 8);
78 static double __attribute__((unused))
82 memcpy(&x, &data[pos], 4);
106 match_u32_assert(uint32_t x, const char *where)
108 unsigned int y = get_u32();
111 fprintf(stderr, "%s: 0x%x: expected i%u, got i%u\n", where, pos - 4, x, y);
115 #define match_u32_assert(x) match_u32_assert(x, WHERE)
118 match_u16_assert(uint16_t x, const char *where)
120 unsigned int y = get_u16();
123 fprintf(stderr, "%s: 0x%x: expected u16:%u, got u16:%u\n", where, pos - 2, x, y);
127 #define match_u16_assert(x) match_u16_assert(x, WHERE)
129 static bool __attribute__((unused))
130 match_u64(uint64_t x)
138 static void __attribute__((unused))
139 match_u64_assert(uint64_t x, const char *where)
141 unsigned long long int y = get_u64();
144 fprintf(stderr, "%s: 0x%x: expected u64:%lu, got u64:%llu\n", where, pos - 8, x, y);
148 #define match_u64_assert(x) match_u64_assert(x, WHERE)
150 static bool __attribute__((unused))
151 match_be32(uint32_t x)
160 match_be32_assert(uint32_t x, const char *where)
162 unsigned int y = get_be32();
165 fprintf(stderr, "%s: 0x%x: expected be%u, got be%u\n", where, pos - 4, x, y);
169 #define match_be32_assert(x) match_be32_assert(x, WHERE)
172 match_byte(uint8_t b)
174 if (pos < n && data[pos] == b)
184 match_byte_assert(uint8_t b, const char *where)
188 fprintf(stderr, "%s: 0x%x: expected %02x, got %02x\n", where, pos, b, data[pos]);
192 #define match_byte_assert(b) match_byte_assert(b, WHERE)
195 match_bytes(int start, const int *bytes, size_t n_bytes)
197 for (size_t i = 0; i < n_bytes; i++)
198 if (bytes[i] >= 0 && data[start + i] != bytes[i])
204 xmemdup0(const void *p, size_t n)
206 char *s = malloc(n + 1);
217 match_byte_assert(1);
221 static bool __attribute__((unused))
224 return (p >= ' ' && p < 127) || p == '\r' || p == '\n' || p == '\t';
228 count_zeros(const uint8_t *p)
236 static bool __attribute__((unused))
237 all_utf8(const char *p_, size_t len)
239 const uint8_t *p = (const uint8_t *) p_;
240 for (size_t ofs = 0, mblen; ofs < len; ofs += mblen)
244 mblen = u8_mbtouc (&uc, p + ofs, len - ofs);
245 if ((uc < 32 && uc != '\n') || uc == 127 || uc == 0xfffd)
254 int len = data[pos++];
255 char *s = xmemdup0(&data[pos], len);
263 int len = data[pos] + data[pos + 1] * 256;
264 char *s = xmemdup0(&data[pos + 2], len);
270 get_string(const char *where)
273 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
274 /*&& all_ascii(&data[pos + 4], data[pos])*/)
276 int len = data[pos] + data[pos + 1] * 256;
277 char *s = malloc(len + 1);
279 memcpy(s, &data[pos + 4], len);
286 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
290 #define get_string() get_string(WHERE)
293 get_string_be(const char *where)
296 /*data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0*/
297 /*&& all_ascii(&data[pos + 4], data[pos])*/)
299 int len = data[pos + 2] * 256 + data[pos + 3];
300 char *s = malloc(len + 1);
302 memcpy(s, &data[pos + 4], len);
309 fprintf(stderr, "%s: 0x%x: expected string\n", where, pos);
313 #define get_string_be() get_string_be(WHERE)
322 static void __attribute__((unused))
323 hex_dump(FILE *stream, int ofs, int n)
326 for (int i = 0; i < n; i++)
328 int c = data[ofs + i];
329 n_ascii += is_ascii(c);
330 fprintf(stream, " %02x", c);
335 for (int i = 0; i < n; i++)
337 int c = data[ofs + i];
338 putc(c >= 32 && c < 127 ? c : '.', stream);
344 static void __attribute__((unused))
345 char_dump(FILE *stream, int ofs, int n)
347 for (int i = 0; i < n; i++)
349 int c = data[ofs + i];
350 putc(c >= 32 && c < 127 ? c : '.', stream);
357 compare_int(const void *a_, const void *b_)
361 return *a < *b ? -1 : *a > *b;
366 format_name (int format, char *buf)
371 case 2: return "AHEX";
372 case 3: return "COMMA";
373 case 4: return "DOLLAR";
374 case 5: case 40: return "F";
376 case 7: return "PIBHEX";
378 case 9: return "PIB";
379 case 10: return "PK";
380 case 11: return "RB";
381 case 12: return "RBHEX";
385 case 20: return "DATE";
386 case 21: return "TIME";
387 case 22: return "DATETIME";
388 case 23: return "ADATE";
389 case 24: return "JDATE";
390 case 25: return "DTIME";
391 case 26: return "WKDAY";
392 case 27: return "MONTH";
393 case 28: return "MOYR";
394 case 29: return "QYR";
395 case 30: return "WKYR";
396 case 31: return "PCT";
397 case 32: return "DOT";
398 case 33: return "CCA";
399 case 34: return "CCB";
400 case 35: return "CCC";
401 case 36: return "CCD";
402 case 37: return "CCE";
403 case 38: return "EDATE";
404 case 39: return "SDATE";
405 default: sprintf(buf, "(%d)", format); return buf;
412 match_byte_assert(1);
415 int fmt = get_byte();
417 printf ("%s%d.%d ", format_name (fmt, buf), w, d);
419 match_byte_assert(0x80);
420 match_byte_assert(2);
421 printf ("%f ", get_double ());
422 printf ("\"%s\"\n", get_string1 ());
426 if (data[pos] == 0xff)
428 printf ("\nff exit");
432 if (data[pos] == 0x80 && data[pos + 1] == 1)
437 int fmt = get_byte();
439 printf ("\n%% %s%d.%d\n", format_name (fmt, buf), w, d);
441 else if (data[pos] == 0x80 && data[pos + 1] == 2)
444 printf ("\n%f ", get_double ());
445 printf ("'%s'\n", get_string1 ());
447 else if (data[pos] == 0x80 && data[pos + 1] == 0 && data[pos + 2] == 3)
449 else if (data[pos] == 0x80 && count_zeros(&data[pos + 1]) == 10)
451 else if (data[pos] == 0x1 && data[pos + 1] == 0xff)
454 printf ("\n\"%s\"\n", get_string2 ());
456 else if (data[pos] == 0x1 && data[pos + 1])
459 printf ("\n\"%s\"\n", get_string1 ());
462 printf ("%02x ", get_byte());
469 static const int cell_prefix[] = {
471 0x00, 0x00, 0x00, 0x00, 0x00, -1 /* 00 or 10 */, 0x00, 0x00, 0x00, 0x00,
473 /*13 14 15 16 17 18 19 */
474 -1, 0x80, 0x01, -1, -1, -1, -1,
476 size_t cell_prefix_len = sizeof cell_prefix / sizeof *cell_prefix;
477 if (!match_bytes(pos, cell_prefix, cell_prefix_len))
479 printf ("match failed at %x\n", pos);
484 printf ("cell %s%d.%d ",
485 format_name (data[pos + 18], buf),
489 int len = cell_prefix_len;
490 if (data[pos + 19] == 0)
492 assert (data[pos + 13] == 5);
493 if (data[pos + 20] == 0)
495 int count = (data[pos + 22]);
496 printf ("%d %d \"%.*s\"",
497 data[pos + 21], data[pos + 22],
498 count, &data[pos + 23]);
501 else if (data[pos + 20] == 1
502 && data[pos + 21] == 0xff
503 && data[pos + 22] == 0xff)
506 printf ("%d \"%.*s\"", count, data[pos + 23],
510 else if (data[pos + 20] == 1 && data[pos + 21] == 255)
512 int count = data[pos + 22] + (data[pos + 23] << 8);
514 count, &data[pos + 24]);
517 else if (data[pos + 20] == 1)
519 int count = (data[pos + 21]);
521 count, &data[pos + 22]);
527 else if (data[pos + 19] == 128 && data[pos + 20] == 2)
529 /* pos + 13 is usually 22...53, and it's 3 more than the
530 " xx 80" separator between cells */
531 printf ("xxx%x ", data[pos + 13]);
532 double d = *(double *) &data[pos + 21];
539 sysmis = {.b = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xff}};
545 if (data[pos + 29] < 0xff
546 && all_utf8((char *) &data[pos + 30], data[pos + 29]))
548 printf (" \"%.*s\"", (int) data[pos + 29],
550 len += data[pos + 29] + 1;
555 else if (data[pos + 19] == 128 && data[pos + 20] == 1 &&
558 if (data[pos + 23] < 0xff
559 && all_utf8((char *) &data[pos + 24], data[pos + 23]))
561 printf (" \"%.*s\"", (int) data[pos + 23],
563 len = 24 + data[pos + 23];
570 printf ("xxx%d %d %d %d",
571 data[pos + 19], data[pos + 20],
572 data[pos + 21], data[pos + 22]);
579 dump_category (int level, uint8_t *delim, int *delim_len)
581 int cat_index = get_u32();
582 assert (cat_index < 256);
585 match_u32_assert (1);
589 if (!match_u16(0xe74) && !match_u16(0xffff))
592 for (int i = 0; i < level; i++)
595 for (int i = 0; ; i++, pos++)
596 if (data[pos] == 5 && data[pos + 1] == 0x80)
601 match_byte_assert (5);
602 match_byte_assert (0x80);
607 match_byte_assert (1);
608 match_byte_assert (2);
609 match_byte_assert (0x28);
610 match_byte_assert (5);
611 match_byte_assert (0);
612 match_byte_assert (1);
614 printf (" \"%s\"", get_string1());
616 int n_children = get_u32();
617 assert (n_children < 256);
619 printf (" (group with %d children)", n_children);
621 printf (" (category #%d)", cat_index);
625 for (int i = 0; i < 2; i++)
627 if (data[pos + dlen] == 0xff)
635 memcpy(delim, &data[pos], dlen);
639 for (int i = 0; i < n_children; i++)
640 dump_category (level + 1, delim, delim_len);
644 dump_PMModelItemInfo(int ndims)
647 if (data[pos + 9] && data[pos + 9] != 0xff)//count_zeros (&data[pos + 9]) < 4)
651 match_byte_assert (0);
655 dump_category (0, delim, &delim_len);
658 for (int i = 1; i < ndims; i++)
660 for (int j = 0; ; j++, pos++)
662 assert (pos + j + delim_len < n);
663 if (!memcmp(&data[pos], delim, delim_len))
668 dump_category (0, delim, &delim_len);
673 main(int argc, char *argv[])
675 bool print_offsets = false;
678 int c = getopt (argc, argv, "o");
685 print_offsets = true;
692 if (argc - optind != 1)
694 fprintf (stderr, "usage: %s FILE.bin", argv[0]);
698 const char *filename = argv[optind];
699 int fd = open(filename, O_RDONLY);
702 fprintf (stderr, "%s: open failed (%s)", filename, strerror (errno));
719 if (read(fd, data, n) != n)
726 setvbuf (stdout, NULL, _IOLBF, 0);
729 unsigned int rtflen = get_u32();
730 pos += rtflen - 1; /* Almost past SPSS syntax */
732 pos += 0x45 + data[pos + 0x44]; /* Just past the string */
733 pos += 0x1a + data[pos + 0x19]; /* Just past the string again */
734 pos += 0x66 + data[pos + 0x65]; /* Just past the third string */
737 pos += rtflen - 1; /* Almost past 2nd RTF */
738 pos += 0x64 + data[pos + 0x63]; /* Just past the fourth string */
740 if (data[pos + 0x114] == 0xff)
741 pos += 0x117 + data[pos + 0x115] + 256 * data[pos + 0x116];
743 pos += 0x115 + data[pos + 0x114];
744 pos += 0x18 + data[pos + 0x17]; /* Just past "<none>" or dataset name. */
745 pos += 0x18 + data[pos + 0x17]; /* Just past "<none>" or dataset name. */
746 pos += count_zeros(&data[pos]); /* Skip optional zeros. */
747 pos += 0x18 + data[pos + 0x17]; /* Just past "<none>" or dataset name. */
748 pos += 0x3e + data[pos + 0x3d]; /* Skip past "100" etc. */
749 pos += count_zeros(&data[pos]); /* Skip optional zeros. */
750 pos += 0x18 + data[pos + 0x17]; /* Just past "User-defined...". */
751 pos += 0x18 + data[pos + 0x17]; /* Just past "Statistics are based... */
752 if (data[pos + 0x19] == 0xff)
753 pos += 0x1c + data[pos + 0x1a] + 256 * data[pos + 0x1b];
755 pos += 0x1a + data[pos + 0x19];
756 pos += 0x61 + data[pos + 0x60]; /* Just past "Cluster_Notes". */
757 pos += 0x992 + data[pos + 0x991]; /* Just past "Cluster". */
760 pos += rtflen - 1; /* Almost past RTF with filesystem path */
761 pos += 0x45 + data[pos + 0x44]; /* Just past "Statistics" */
762 pos += 0x1a + data[pos + 0x19]; /* Just past "Cluster". */
763 fwrite (&data[pos], 1, n - pos, stdout);
769 unsigned int prev_end = 0;
770 for (pos = 0; pos + 50 < n; pos++)
772 if (data[pos + 0] == 0xff &&
773 data[pos + 1] == 0xff &&
774 data[pos + 2] == 0 &&
777 int len = data[pos + 4] + (data[pos + 5] << 8);
778 if (len < 3 || pos + len + 6 >= n || !all_utf8 ((char *) &data[pos + 6], len))
781 printf ("+%04x %04x...%04x: %-25.*s\n",
782 pos - prev_end, pos, pos + 6 + len,
783 len < 50 ? (int) len : 50, &data[pos + 6]);
784 prev_end = pos + 6 + len;
789 for (pos = 0; pos + 50 < n; pos++)
791 if (data[pos + 0] == 'L' &&
792 data[pos + 1] == 'o' &&
793 data[pos + 2] == 'g' &&
794 !all_utf8((char *) &data[pos + 3], 1) &&
795 data[pos - 1] != 'v')
798 printf ("%04x: ", pos);
799 unsigned int p = pos;
800 while (all_utf8 ((char *) &data[p], 1))
802 hex_dump (stdout, p - 28, 38);
806 unsigned int prev_end = 0;
809 for (pos = 2; pos + 50 < n; )
811 static const int cell_prefix[] = {
813 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, -1 /* 00 or 10 */, 0x00, 0x00, 0x00, 0x00, -1,
815 /*14 15 16 17 18 19 */
816 0x80, 0x01, -1, -1, -1, -1,
818 size_t cell_prefix_len = sizeof cell_prefix / sizeof *cell_prefix;
819 if (match_bytes(pos, cell_prefix, cell_prefix_len))
824 printf ("%04x ", prev_end);
825 hex_dump (stdout, prev_end, pos - prev_end);
829 printf ("cell %s%d.%d ",
830 format_name (data[pos + 18], buf),
834 int len = cell_prefix_len;
835 if (data[pos + 19] == 0)
837 assert (data[pos + 13] == 5);
838 if (data[pos + 20] == 0)
840 int count = (data[pos + 22]);
841 printf ("%d %d \"%.*s\"\n",
842 data[pos + 21], data[pos + 22],
843 count, &data[pos + 23]);
846 else if (data[pos + 20] == 1
847 && data[pos + 21] == 0xff
848 && data[pos + 22] == 0xff)
851 printf ("%d \"%.*s\"\n", count, data[pos + 23],
855 else if (data[pos + 20] == 1 && data[pos + 21] == 255)
857 int count = data[pos + 22] + (data[pos + 23] << 8);
858 printf ("\"%.*s\"\n",
859 count, &data[pos + 24]);
862 else if (data[pos + 20] == 1)
864 int count = (data[pos + 21]);
865 printf ("\"%.*s\"\n",
866 count, &data[pos + 22]);
872 else if (data[pos + 19] == 128 && data[pos + 20] == 2)
874 /* pos + 13 is usually 22...53, and it's 3 more than the
875 " xx 80" separator between cells */
876 printf ("xxx%x ", data[pos + 13]);
877 double d = *(double *) &data[pos + 21];
884 sysmis = {.b = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xff}};
890 if (data[pos + 29] < 0xff
891 && all_utf8((char *) &data[pos + 30], data[pos + 29]))
893 printf (" \"%.*s\"", (int) data[pos + 29],
895 len += data[pos + 29] + 1;
902 else if (data[pos + 19] == 128 && data[pos + 20] == 1 &&
905 if (data[pos + 23] < 0xff
906 && all_utf8((char *) &data[pos + 24], data[pos + 23]))
908 printf (" \"%.*s\"\n", (int) data[pos + 23],
910 len = 24 + data[pos + 23];
917 printf ("xxx%d %d %d %d\n",
918 data[pos + 19], data[pos + 20],
919 data[pos + 21], data[pos + 22]);
927 static const int record_prefix[] = {
928 0xff, 0xff, 0x00, 0x00,
930 size_t record_prefix_len = sizeof record_prefix / sizeof *record_prefix;
931 if (match_bytes(pos, record_prefix, record_prefix_len))
933 int len = record_prefix_len;
934 int slen = data[pos + 4] + (data[pos + 5] << 8);
935 if (slen >= 2 && slen < 256 && all_utf8((char *) &data[pos + 6], slen))
940 printf ("%04x ", prev_end);
941 hex_dump (stdout, prev_end, pos - prev_end);
946 printf ("rec:%-20.*s ", slen, &data[pos + 6]);
948 title = xmemdup0(&data[pos + 6], slen);
949 sum += data[pos+len];
953 if (!strcmp(title, "DspNumber"))
955 else if (!strcmp(title, "PMModelItemInfo"))
958 dump_PMModelItemInfo(ndims);
960 else if (!strcmp(title, "NDimensional__DspCell"))
962 match_byte_assert(0);
971 static const int number_prefix[] = {
974 size_t number_prefix_len = sizeof number_prefix / sizeof *number_prefix;
975 if (match_bytes(pos, number_prefix, number_prefix_len))
980 printf ("%04x ", prev_end);
981 hex_dump (stdout, prev_end, pos - prev_end);
985 double d = *(double *) &data[pos + number_prefix_len];
986 printf ("float %f\n", d);
993 if (!memcmp (&data[pos + 4], "{\\rtf", 5))
995 int len = data[pos] + (data[pos + 1] << 8) + (data[pos + 2] << 16)
996 + (data[pos + 3] << 24);
997 if (len < n - pos - 4)
1002 printf ("%04x ", prev_end);
1003 hex_dump (stdout, prev_end, pos - prev_end);
1014 if (data[pos] && data[pos + 1] && data[pos + 2] >= 0xfe
1015 && data[pos + 3] == 0xff && data[pos + 4] && data[pos + 4] != 0xff)
1017 if (prev_end != pos)
1020 printf ("%04x ", prev_end);
1021 hex_dump (stdout, prev_end, pos - prev_end);
1025 static int prev_num;
1026 int32_t num = data[pos] + (data[pos + 1] << 8)
1027 + (data[pos + 2] << 16) + (data[pos + 3] << 24);
1028 printf ("%d (%+d) ", num, num - prev_num);
1035 static const int font_prefix[] =
1037 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x41, 0x72, 0x69, 0x61, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
1039 size_t font_prefix_len = sizeof font_prefix / sizeof *font_prefix;
1040 if (match_bytes(pos, font_prefix, font_prefix_len))
1042 if (prev_end != pos)
1045 printf ("%04x ", prev_end);
1046 hex_dump (stdout, prev_end, pos - prev_end);
1052 pos += font_prefix_len;
1057 static const int string_prefix[] = {
1058 0x05, 0x80, 0x01, 0x02, 0x28, 0x05, 0x00, 0x01
1060 size_t string_prefix_len = sizeof string_prefix / sizeof *string_prefix;
1061 if (match_bytes(pos, string_prefix, string_prefix_len) && data[pos + string_prefix_len] != 255)
1063 if (prev_end != pos)
1066 printf ("%04x ", prev_end);
1067 hex_dump (stdout, prev_end, pos - prev_end);
1071 int len = data[pos + 8];
1072 printf ("string %.*s\n", len, &data[pos + 9]);
1077 if (match_bytes(pos, string_prefix, string_prefix_len) && data[pos + string_prefix_len] == 255)
1079 if (prev_end != pos)
1082 printf ("%04x ", prev_end);
1083 hex_dump (stdout, prev_end, pos - prev_end);
1087 int len = data[pos + 9] + (data[pos + 10] << 8);
1088 printf ("\nlongstring %.*s\n", len, &data[pos + 11]);
1094 if (data[pos] == 0 && data[pos + 1] == 0xff
1095 && (!data[pos + 2] || data[pos + 2] == 0xff) && data[pos + 3] == 0xff
1096 && data[pos + 4] == 0)
1099 if (prev_end != pos)
1102 printf ("%04x ", prev_end);
1103 hex_dump (stdout, prev_end, pos - prev_end);
1110 if (!is_ascii(data[pos]))
1116 unsigned int start = pos;
1117 unsigned int end = pos + 1;
1118 while (is_ascii(data[end]))
1121 unsigned int len = end - start;
1128 unsigned int len2 = data[start - 2] + (data[start - 1] << 8);
1129 unsigned int len3 = data[start - 1];
1131 if (len2 && len2 <= len)
1136 else if (len3 && len3 <= len)
1153 unsigned real_start = start - length_bytes;
1154 if (prev_end != real_start)
1157 printf ("%04x ", prev_end);
1158 hex_dump (stdout, prev_end, real_start - prev_end);
1161 printf ("%04x ", real_start);
1162 printf ("\"%.*s\"\n",
1163 (int) end - start, (char *) &data[start]);