1 AT_BANNER([i18n recoding])
3 # CHECK_I18N_RECODE([TITLE], [FROM-CODING], [TO-CODING],
4 # [FROM-TEXT], [TO-TEXT])
6 # Converts FROM-TEXT from FROM-CODING to TO-CODING and checks that the result
7 # is TO-TEXT. The "printf" program is applied to both FROM-TEXT and TO-TEXT to
8 # allow for backslash-escapes. (Hex escapes are not portable; use octal
10 m4_define([CHECK_I18N_RECODE],
11 [AT_SETUP([convert $1])
14 dnl Skip the test if this host doesn't know the source and target encodings.
15 AT_CHECK([i18n-test supports_encodings '$2' '$3'])
16 AT_CHECK_UNQUOTED([i18n-test recode '$2' '$3' `printf '$4'`], [0], [`printf '$5'`
20 CHECK_I18N_RECODE([reflexively], [ASCII], [ASCII], [abc], [abc])
21 CHECK_I18N_RECODE([without any change], [ASCII], [UTF-8], [abc], [abc])
23 CHECK_I18N_RECODE([from ISO-8859-1 to UTF-8], [ISO-8859-1], [UTF-8],
25 CHECK_I18N_RECODE([from UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1],
28 # 0xc0 == 0300 is invalid in UTF-8
29 CHECK_I18N_RECODE([invalid UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1],
31 # 0xc2 == 0302 is the first byte of a 2-byte UTF-8 sequence
32 CHECK_I18N_RECODE([truncated UTF-8 to ISO-8559-1], [UTF-8], [ISO-8859-1],
35 dnl The input to this test is 7 bytes long and the expected output is 9 bytes.
36 dnl So it should exercise the E2BIG case
37 CHECK_I18N_RECODE([from ISO-8859-1 to UTF-8 with overflow],
38 [ISO-8859-1], [UTF-8],
39 [Tsch\374\337!], [Tsch\303\274\303\237!])
41 AT_SETUP([convert unknown encoding])
43 AT_CHECK([i18n-test recode nonexistent1 nonexistent2 asdf], [0], [asdf
45 [Warning: cannot create a converter for `nonexistent1' to `nonexistent2': Invalid argument
49 AT_BANNER([i18n concatenation])
51 # CHECK_I18N_CONCAT([HEAD], [TAIL], [ENCODING], [MAX-LEN], [ANSWER])
53 # Concatenates HEAD and TAIL, omitting as many characters from HEAD as needed
54 # to make the result come out to no more than MAX-LEN bytes if it was expressed
55 # in ENCODING, and checks that the answer matches ANSWER. HEAD, TAIL, and
56 # ANSWER are all in UTF-8. The "printf" program is applied to HEAD, TAIL, and
57 # ANSWER to allow for backslash-escapes. (Hex escapes are not portable; use
58 # octal escapes instead.)
59 m4_define([CHECK_I18N_CONCAT],
60 [AT_SETUP([m4_if([$2], [], [truncate "$1" to $4 bytes in $3],
61 [truncate "$1" + "$2" to $4 bytes in $3])])
64 dnl Skip the test if this host doesn't know the encoding.
65 AT_CHECK([i18n-test supports_encodings '$3'])
67 [i18n-test concat "`printf '$1'`" "`printf '$2'`" '$3' '$4'], [0],
72 CHECK_I18N_CONCAT([abc], [], [UTF-8], [6], [abc])
73 CHECK_I18N_CONCAT([], [xyz], [UTF-8], [6], [xyz])
74 CHECK_I18N_CONCAT([], [], [UTF-8], [6], [])
75 CHECK_I18N_CONCAT([abcdefghij], [], [UTF-8], [6], [abcdef])
76 CHECK_I18N_CONCAT([], [tuvwxyz], [UTF-8], [6], [tuvwxyz])
78 CHECK_I18N_CONCAT([abc], [xyz], [UTF-8], [6], [abcxyz])
79 CHECK_I18N_CONCAT([abcd], [xyz], [UTF-8], [6], [abcxyz])
80 CHECK_I18N_CONCAT([abc], [uvwxyz], [UTF-8], [6], [uvwxyz])
82 # x in a box ( x⃞ ) is U+0078, U+20DE, 4 bytes in UTF-8, and one grapheme
84 CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [0], [y])
85 CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [1], [y])
86 CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [2], [y])
87 CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [3], [y])
88 CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [4], [y])
89 CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [5], [x\342\203\236y])
91 # éèä is only 3 bytes in ISO-8859-1.
92 CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [0], [xyz])
93 CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [1], [xyz])
94 CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [2], [xyz])
95 CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [3], [xyz])
96 CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [4],
98 CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [5],
99 [\303\251\303\250xyz])
100 CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [6],
101 [\303\251\303\250\303\244xyz])