From ffcf1cd46432799392194fb6058bc52833acad3f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 2 Jan 2020 18:06:45 +0000 Subject: [PATCH] character encoding, node depth --- dump-spo.c | 144 ++++++++++---------------------------------- spo-notes | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 206 insertions(+), 112 deletions(-) diff --git a/dump-spo.c b/dump-spo.c index 35cd8b5dcc..2a75bef6a9 100644 --- a/dump-spo.c +++ b/dump-spo.c @@ -1537,27 +1537,6 @@ main(int argc, char *argv[]) continue; } -#if 0 - static const int col_prefix[] = { - 0x11, 0x80, 0x00, -1, 0x00, 0x00, 0x00, 0x01, 0x00 - }; - size_t col_prefix_len = sizeof col_prefix / sizeof *col_prefix; - if (match_bytes(pos, col_prefix, col_prefix_len)) - { - if (prev_end != pos) - { - if (print_offsets) - printf ("%04x ", prev_end); - hex_dump (stdout, prev_end, pos - prev_end); - } - - printf ("col %d\n", data[pos + 3]); - pos += col_prefix_len - 1; - prev_end = pos + 1; - continue; - } -#endif - static const int record_prefix[] = { 0xff, 0xff, 0x00, 0x00, }; @@ -1632,11 +1611,8 @@ main(int argc, char *argv[]) } } - static const int string_prefix[] = { - 0x80, 0x01, 0x02, 0x28, 0x05, 0x00, 0x01 - }; - size_t string_prefix_len = sizeof string_prefix / sizeof *string_prefix; - if (match_bytes(pos, string_prefix, string_prefix_len) && data[pos + string_prefix_len] != 255) + if (data[pos] && data[pos + 1] && data[pos + 2] >= 0xfe + && data[pos + 3] == 0xff && data[pos + 4] && data[pos + 4] != 0xff) { if (prev_end != pos) { @@ -1646,13 +1622,22 @@ main(int argc, char *argv[]) } prev_end = pos; - int len = data[pos + 7]; - printf ("string %.*s\n", len, &data[pos + 8]); - pos += 8 + len - 1; + static int prev_num; + int32_t num = data[pos] + (data[pos + 1] << 8) + + (data[pos + 2] << 16) + (data[pos + 3] << 24); + printf ("%d (%+d) ", num, num - prev_num); + prev_num = num; + pos += 4 - 1; prev_end = pos + 1; continue; } - if (match_bytes(pos, string_prefix, string_prefix_len) && data[pos + string_prefix_len] == 255) + + static const int font_prefix[] = + { + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x41, 0x72, 0x69, 0x61, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + }; + size_t font_prefix_len = sizeof font_prefix / sizeof *font_prefix; + if (match_bytes(pos, font_prefix, font_prefix_len)) { if (prev_end != pos) { @@ -1662,115 +1647,52 @@ main(int argc, char *argv[]) } prev_end = pos; - int len = data[pos + 8] + (data[pos + 9] << 8); - printf ("\nlongstring %.*s\n", len, &data[pos + 10]); - pos += 10 + len - 1; - prev_end = pos + 1; - continue; - } - - + printf ("font\n"); -#if 0 - static const int heading_prefix[] = { - -1, 0x00, 0x00, 0x00, 0x50, 0x80, 0x00, 0x52, 0x80, 0x00, -1, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, - 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x05, 0x80, 0x01, 0x02, 0x28, 0x05, 0x00, 0x01 - }; - size_t heading_prefix_len = sizeof heading_prefix / sizeof *heading_prefix; - if (match_bytes(pos, heading_prefix, heading_prefix_len)) - { - if (prev_end != pos) - { - if (print_offsets) - printf ("%04x ", prev_end); - hex_dump (stdout, prev_end, pos - prev_end); - } - - printf ("heading %d %d\n", data[pos],data[pos + 10]); - pos += heading_prefix_len - 1; + pos += font_prefix_len - 1; prev_end = pos + 1; continue; } -#if 0 - static const int font_prefix[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1, 0x80, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, -1, - 0x80, 0x00, -1, 0x00, -1, 0x00, 0xc8, 0x00, -1, -1, -1, -1, -1, - 0x00, -1, 0x00, 0x00, 0x00, 0x01, 0x00, -1, - 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, -1, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, -1 /* 12 or 22 */, + static const int string_prefix[] = { + 0x80, 0x01, 0x02, 0x28, 0x05, 0x00, 0x01 }; - size_t font_prefix_len = sizeof font_prefix / sizeof *font_prefix; - if (match_bytes(pos, font_prefix, font_prefix_len)) + size_t string_prefix_len = sizeof string_prefix / sizeof *string_prefix; + if (match_bytes(pos, string_prefix, string_prefix_len) && data[pos + string_prefix_len] != 255) { if (prev_end != pos) { if (print_offsets) - printf ("%04x", prev_end); + printf ("%04x ", prev_end); hex_dump (stdout, prev_end, pos - prev_end); } + prev_end = pos; - printf ("font %d %d %d %d %d %d %d %d %d %d\n", - data[pos + 24], data[pos + 26], - data[pos + 30], data[pos + 31], data[pos + 32], - data[pos + 33], data[pos + 34], data[pos + 36], - data[pos + 58], data[pos + 59]); - pos += font_prefix_len - 1; + int len = data[pos + 7]; + printf ("string %.*s\n", len, &data[pos + 8]); + pos += 8 + len - 1; prev_end = pos + 1; continue; } -#endif - - static const int table_prefix[] = { - -1 /* ed or e9 */, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbc, 0x02, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x41, 0x72, 0x69, - 0x61, 0x6c, 0x00, -1, 0x00, -1, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, -1, 0x00, 0x00, 0x00, -1, - }; - size_t table_prefix_len = sizeof table_prefix / sizeof *table_prefix; - if (match_bytes(pos, table_prefix, table_prefix_len)) + if (match_bytes(pos, string_prefix, string_prefix_len) && data[pos + string_prefix_len] == 255) { if (prev_end != pos) { if (print_offsets) - printf ("%04x", prev_end); + printf ("%04x ", prev_end); hex_dump (stdout, prev_end, pos - prev_end); } + prev_end = pos; - printf ("table %d\n", data[pos + 72]); - pos += table_prefix_len - 1; + int len = data[pos + 8] + (data[pos + 9] << 8); + printf ("\nlongstring %.*s\n", len, &data[pos + 10]); + pos += 10 + len - 1; prev_end = pos + 1; continue; } - static const int dim_prefix[] = { - 0x00, 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, -1, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x80, 0x01, 0x02, 0x28, - 0x05, 0x00, - }; - size_t dim_prefix_len = sizeof dim_prefix / sizeof *dim_prefix; - if (match_bytes(pos, dim_prefix, dim_prefix_len)) - { - if (prev_end != pos) - { - if (print_offsets) - printf ("%04x", prev_end); - hex_dump (stdout, prev_end, pos - prev_end); - } + - printf ("dim %d\n", data[pos + 8]); - pos += dim_prefix_len - 1; - prev_end = pos + 1; - continue; - } -#endif if (!is_ascii(data[pos])) continue; diff --git a/spo-notes b/spo-notes index 25d124a71a..b7c1512e68 100644 --- a/spo-notes +++ b/spo-notes @@ -25,6 +25,20 @@ and the DspString case: 01 02 28 (00|05) 00 (00|01) often followed by a string +Charset numbers come from the list for \fcharset in the RTF +specification: +http://latex2rtf.sourceforge.net/rtfspec_6.html#rtfspec_10 The charset +number appears 4 bytes before the font name string, e.g. here it is +0xb2 or 178 (Arabic): + + 01 01 ed ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 bc 02 00 00 00 00 00 b2 00 00 00 22 41 72 61 62 69 63 20 54 72 61 6e 73 70 61 72 65 6e 74 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 ............................."Arabic Transparent..............P............... + +and here it is 0xee or 238 (Polish): + + 01 01 ed ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 bc 02 00 00 00 00 00 ee 00 00 00 22 41 72 69 61 6c 00 61 00 6c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 ............................."Arial.a.l.......................P............... + +It is usually 0 for "ANSI". + DspString is the label of the object that is currently selected. The currently selected object is always the first in the file @@ -322,7 +336,8 @@ rec:NavLog NavTreeViewItem: last bit is the number of children plus one, e.g. here it has 0x12+1 -children: +children. It's actually the total number of nodes including the root +Output node. rec:NavTreeViewItem .. .. .. .. .. 02 .. 01 .. .. .. .. .. .. .. .. .. 01 .. .. .. .. .. .. .. .. .. 18 .. .. .. .. .. .. .. de ff ff ff 18 .. .. .. .. .. .. .. 28 .. .. .. 28 .. .. .. cb 02 .. .. f0 86 .. .. .. .. .. .. .. f8 2a .. .. 34 21 .. .. 01 .. 0f .. 02 .. f6 04 .. .. f6 04 .. .. f6 04 .. .. f6 04 .. .. .. .. f0 .. .. .. 7c 02 01 .. ...............................................(...(.................*..4!..............................|... "(Continued)" @@ -331,3 +346,160 @@ rtf 01 01 .. .. .. rtf .. 13 .. + + +Node depth: + +The "04 00 00 00 04 00 00" is the number of nodes above the node, in +this case 4, three Headings and the Output node. The first 04 is +reliable, the second one is sometimes 01. + + 00 01 + float 0.000000 + 18 00 00 00 + -46782 5f 02 00 00 + -48782 04 00 00 00 04 00 00 + cell F40.2 "LogZ" + 07 80 00 02 00 00 00 7b 00 01 00 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 01 01 08 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 90 + font + rtf + +The node numbering is insane: in decimal each node's number decreases +(these numbers are negative) by 2000 then by 34 alternately, except +that if a node is collapsed it *increases* by 2000 then decreases by +2000. Below (test15.spo), LogI and LogN are collapsed: + + 5:rec:NavLog 02 00 00 00 00 00 00 00 00 18 00 00 00 + 6:-83394 (-83394) 5f 02 00 00 + 7:-85394 (-2000) 04 00 00 00 04 00 00 00 + 12:"LogAC" + 18:-34 (+85360) 18 00 00 00 00 00 00 00 28 00 00 00 28 00 00 00 cb 02 00 00 92 4d 01 00 00 00 00 00 00 f8 2a 00 00 34 21 00 00 01 00 0f 00 02 00 f6 04 00 00 f6 04 00 00 f6 04 00 00 f6 04 00 00 00 00 f0 00 00 00 7c 02 01 00 ........(...(........M........*..4!..............................|... + 31:-2000 (-1966) 01 00 00 00 01 00 00 + 32:cell F40.2 "LogA" + 39:-2034 (-34) b3 02 00 00 + 40:-4034 (-2000) 01 00 00 00 01 00 00 + 41:cell F40.2 "LogB" + 48:-4068 (-34) 5f 02 00 00 + 49:-6068 (-2000) 01 00 00 00 01 00 00 + 50:cell F40.2 "LogC" + 57:-6102 (-34) 5f 02 00 00 + 58:-8102 (-2000) 01 00 00 00 01 00 00 + 59:cell F40.2 "LogD" + 66:-8136 (-34) 5f 02 00 00 + 67:-10136 (-2000) 01 00 00 00 01 00 00 + 68:cell F40.2 "LogE" + 75:-10170 (-34) 5f 02 00 00 + 76:-12170 (-2000) 01 00 00 00 01 00 00 + 77:cell F40.2 "LogF" + 88:-12204 (-34) 5f 02 00 00 + 89:-14204 (-2000) 02 00 00 00 02 00 00 + 90:cell F40.2 "LogG" + 97:-14238 (-34) 5f 02 00 00 + 98:-16238 (-2000) 02 00 00 00 02 00 00 + 99:cell F40.2 "LogH" + 106:-16272 (-34) 5f 02 00 00 + 107:-18272 (-2000) 02 00 00 00 02 00 00 + 108:cell F40.2 "LogI" + 115:-16272 (+2000) 5f 02 00 00 + 116:-18272 (-2000) 02 00 00 00 02 00 00 + 117:cell F40.2 "LogJ" + 124:-18306 (-34) 5f 02 00 00 + 125:-20306 (-2000) 02 00 00 00 02 00 00 + 126:cell F40.2 "LogK" + 133:-20340 (-34) 5f 02 00 00 + 134:-22340 (-2000) 02 00 00 00 02 00 00 + 135:cell F40.2 "LogL" + 142:-22374 (-34) 5f 02 00 00 + 143:-24374 (-2000) 02 00 00 00 02 00 00 + 144:cell F40.2 "LogM" + 151:-24408 (-34) 5f 02 00 00 + 152:-26408 (-2000) 02 00 00 00 02 00 00 + 153:cell F40.2 "LogN" + 160:-24408 (+2000) 5f 02 00 00 + 161:-26408 (-2000) 02 00 00 00 02 00 00 + 162:cell F40.2 "LogO" + 169:-26442 (-34) 5f 02 00 00 + 170:-28442 (-2000) 02 00 00 00 02 00 00 + 171:cell F40.2 "LogP" + 178:-28476 (-34) 5f 02 00 00 + 179:-30476 (-2000) 02 00 00 00 02 00 00 + 180:cell F40.2 "LogQ" + 187:-30510 (-34) 5f 02 00 00 + 188:-32510 (-2000) 02 00 00 00 02 00 00 + 189:cell F40.2 "LogR" + 196:-32544 (-34) 5f 02 00 00 + 197:-34544 (-2000) 02 00 00 00 02 00 00 + 198:cell F40.2 "LogS" + 205:-34578 (-34) 5f 02 00 00 + 206:-36578 (-2000) 02 00 00 00 02 00 00 + 207:cell F40.2 "LogT" + 214:-36612 (-34) 5f 02 00 00 + 215:-38612 (-2000) 02 00 00 00 02 00 00 + 216:cell F40.2 "LogU" + 223:-38646 (-34) 5f 02 00 00 + 224:-40646 (-2000) 02 00 00 00 01 00 00 + 225:cell F40.2 "LogV" + 236:-40680 (-34) 5f 02 00 00 + 237:-42680 (-2000) 03 00 00 00 03 00 00 + 238:cell F40.2 "LogW" + 245:-42714 (-34) 5f 02 00 00 + 246:-44714 (-2000) 03 00 00 00 03 00 00 + 247:cell F40.2 "LogX" + 254:-44748 (-34) 5f 02 00 00 + 255:-46748 (-2000) 03 00 00 00 03 00 00 + 256:cell F40.2 "LogY" + 267:-46782 (-34) 5f 02 00 00 + 268:-48782 (-2000) 04 00 00 00 04 00 00 + 269:cell F40.2 "LogZ" + 276:-48816 (-34) 5f 02 00 00 + 277:-50816 (-2000) 04 00 00 00 04 00 00 + 278:cell F40.2 "LogAA" + 285:-50850 (-34) 5f 02 00 00 + 286:-52850 (-2000) 04 00 00 00 04 00 00 + 287:cell F40.2 "LogAB" + 294:-52884 (-34) 5f 02 00 00 + 295:-54884 (-2000) 04 00 00 00 01 00 00 + 296:cell F40.2 "LogAC" + 303:-54918 (-34) 5f 02 00 00 + 304:-56918 (-2000) 04 00 00 00 04 00 00 + 305:cell F40.2 "LogAC" + 312:-56952 (-34) 5f 02 00 00 + 313:-58952 (-2000) 04 00 00 00 04 00 00 + 314:cell F40.2 "LogAC" + 321:-58986 (-34) 5f 02 00 00 + 322:-60986 (-2000) 04 00 00 00 04 00 00 + 323:cell F40.2 "LogAC" + 330:-61020 (-34) 5f 02 00 00 + 331:-63020 (-2000) 04 00 00 00 04 00 00 + 332:cell F40.2 "LogAC" + 339:-63054 (-34) 5f 02 00 00 + 340:-65054 (-2000) 04 00 00 00 04 00 00 + 341:cell F40.2 "LogAC" + 348:-65088 (-34) 5f 02 00 00 + 349:-67088 (-2000) 04 00 00 00 04 00 00 + 350:cell F40.2 "LogAC" + 357:-67122 (-34) 5f 02 00 00 + 358:-69122 (-2000) 04 00 00 00 04 00 00 + 359:cell F40.2 "LogAC" + 366:-69156 (-34) 5f 02 00 00 + 367:-71156 (-2000) 04 00 00 00 04 00 00 + 368:cell F40.2 "LogAC" + 375:-71190 (-34) 5f 02 00 00 + 376:-73190 (-2000) 04 00 00 00 04 00 00 + 377:cell F40.2 "LogAC" + 384:-73224 (-34) 5f 02 00 00 + 385:-75224 (-2000) 04 00 00 00 04 00 00 + 386:cell F40.2 "LogAC" + 393:-75258 (-34) 5f 02 00 00 + 394:-77258 (-2000) 04 00 00 00 04 00 00 + 395:cell F40.2 "LogAC" + 402:-77292 (-34) 5f 02 00 00 + 403:-79292 (-2000) 04 00 00 00 04 00 00 + 404:cell F40.2 "LogAC" + 411:-79326 (-34) 5f 02 00 00 + 412:-81326 (-2000) 04 00 00 00 04 00 00 + 413:cell F40.2 "LogAC" + 420:-81360 (-34) 5f 02 00 00 + 421:-83360 (-2000) 04 00 00 00 04 00 00 + 422:cell F40.2 "LogAC" + -- 2.30.2