X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=tests%2Flibpspp%2Fi18n.at;h=5444a78ff6cfa9e059b2e1252284765598ecd7f7;hb=9d1d71e732eeed85ca3002b264e1269cdd005a3f;hp=ed29a7d69ca65cde5e28c23af21a1d80c3a2f737;hpb=f5099c58d17e8f66a74a84918e688ef17936d392;p=pspp-builds.git diff --git a/tests/libpspp/i18n.at b/tests/libpspp/i18n.at index ed29a7d6..5444a78f 100644 --- a/tests/libpspp/i18n.at +++ b/tests/libpspp/i18n.at @@ -1,50 +1,111 @@ -AT_BANNER([i18n routines]) +AT_BANNER([i18n recoding]) -# CHECK_I18N([TITLE], [FROM-CODING], [TO-CODING], [FROM-TEXT], [TO-TEXT]) +m4_divert_push([PREPARE_TESTS]) +supports_encodings () { + case "$host" in + *-*-linux* | *-*-*-gnu*) + dnl GNU/Linux always has the encodings we want. We can't ask + dnl config.charset about them because it has a special case here + dnl too and won't tell us. + return 0 + ;; + *) + for encoding in "$@"; do + $SHELL $top_srcdir/gl/config.charset "$host" | grep '$2' || return 77 + done + ;; + esac +} +m4_divert_pop([PREPARE_TESTS]) + +# CHECK_I18N_RECODE([TITLE], [FROM-CODING], [TO-CODING], +# [FROM-TEXT], [TO-TEXT]) # # Converts FROM-TEXT from FROM-CODING to TO-CODING and checks that the result -# is TO-TEXT. The "printf" program is applied to both FROM-TEXT and TO-TEXT -# to allow for backslash-escapes. (Be aware that hex escapes are not portable; -# use octal escapes instead.) -m4_define([CHECK_I18N], +# is TO-TEXT. The "printf" program is applied to both FROM-TEXT and TO-TEXT to +# allow for backslash-escapes. (Hex escapes are not portable; use octal +# escapes instead.) +m4_define([CHECK_I18N_RECODE], [AT_SETUP([convert $1]) AT_KEYWORDS([i18n]) dnl Skip the test if this host doesn't know the source and target encodings. - AT_CHECK( - [case "$host" in - *-*-linux* | *-*-*-gnu*) - dnl GNU/Linux always has the encodings we want. We can't ask - dnl config.charset about them because it has a special case here - dnl too and won't tell us. - ;; - *) - $SHELL $top_srcdir/gl/config.charset "$host" | grep '$2' || exit 77 - $SHELL $top_srcdir/gl/config.charset "$host" | grep '$3' || exit 77 - ;; - esac - ], [0], [ignore]) - AT_CHECK_UNQUOTED([i18n-test '$2' '$3' `printf '$4'`], [0], [`printf '$5'` + AT_CHECK([supports_encodings '$2' '$3']) + AT_CHECK_UNQUOTED([i18n-test recode '$2' '$3' `printf '$4'`], [0], [`printf '$5'` ]) AT_CLEANUP]) -CHECK_I18N([reflexively], [ASCII], [ASCII], [abc], [abc]) -CHECK_I18N([without any change], [ASCII], [UTF-8], [abc], [abc]) +CHECK_I18N_RECODE([reflexively], [ASCII], [ASCII], [abc], [abc]) +CHECK_I18N_RECODE([without any change], [ASCII], [UTF-8], [abc], [abc]) -CHECK_I18N([from ISO-8859-1 to UTF-8], [ISO-8859-1], [UTF-8], - [\242], [\302\242]) -CHECK_I18N([from UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1], - [\302\242], [\242]) +CHECK_I18N_RECODE([from ISO-8859-1 to UTF-8], [ISO-8859-1], [UTF-8], + [\242], [\302\242]) +CHECK_I18N_RECODE([from UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1], + [\302\242], [\242]) # 0xc0 == 0300 is invalid in UTF-8 -CHECK_I18N([invalid UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1], - [xy\300z], [xy?z]) +CHECK_I18N_RECODE([invalid UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1], + [xy\300z], [xy?z]) # 0xc2 == 0302 is the first byte of a 2-byte UTF-8 sequence -CHECK_I18N([truncated UTF-8 to ISO-8559-1], [UTF-8], [ISO-8859-1], - [xy\302], [xy?]) +CHECK_I18N_RECODE([truncated UTF-8 to ISO-8559-1], [UTF-8], [ISO-8859-1], + [xy\302], [xy?]) dnl The input to this test is 7 bytes long and the expected output is 9 bytes. dnl So it should exercise the E2BIG case -CHECK_I18N([from ISO-8859-1 to UTF-8 with overflow], [ISO-8859-1], [UTF-8], - [Tsch\374\337!], [Tsch\303\274\303\237!]) +CHECK_I18N_RECODE([from ISO-8859-1 to UTF-8 with overflow], + [ISO-8859-1], [UTF-8], + [Tsch\374\337!], [Tsch\303\274\303\237!]) + +AT_BANNER([i18n concatenation]) + +# CHECK_I18N_CONCAT([HEAD], [TAIL], [ENCODING], [MAX-LEN], [ANSWER]) +# +# Concatenates HEAD and TAIL, omitting as many characters from HEAD as needed +# to make the result come out to no more than MAX-LEN bytes if it was expressed +# in ENCODING, and checks that the answer matches ANSWER. HEAD, TAIL, and +# ANSWER are all in UTF-8. The "printf" program is applied to HEAD, TAIL, and +# ANSWER to allow for backslash-escapes. (Hex escapes are not portable; use +# octal escapes instead.) +m4_define([CHECK_I18N_CONCAT], + [AT_SETUP([m4_if([$2], [], [truncate "$1" to $4 bytes in $3], + [truncate "$1" + "$2" to $4 bytes in $3])]) + AT_KEYWORDS([i18n]) + + dnl Skip the test if this host doesn't know the encoding. + AT_CHECK([supports_encodings '$3']) + AT_CHECK_UNQUOTED( + [i18n-test concat "`printf '$1'`" "`printf '$2'`" '$3' '$4'], [0], + [`printf '$5'` +]) + AT_CLEANUP]) + +CHECK_I18N_CONCAT([abc], [], [UTF-8], [6], [abc]) +CHECK_I18N_CONCAT([], [xyz], [UTF-8], [6], [xyz]) +CHECK_I18N_CONCAT([], [], [UTF-8], [6], []) +CHECK_I18N_CONCAT([abcdefghij], [], [UTF-8], [6], [abcdef]) +CHECK_I18N_CONCAT([], [tuvwxyz], [UTF-8], [6], [tuvwxyz]) + +CHECK_I18N_CONCAT([abc], [xyz], [UTF-8], [6], [abcxyz]) +CHECK_I18N_CONCAT([abcd], [xyz], [UTF-8], [6], [abcxyz]) +CHECK_I18N_CONCAT([abc], [uvwxyz], [UTF-8], [6], [uvwxyz]) + +# x in a box ( x⃞ ) is U+0078, U+20DE, 4 bytes in UTF-8, and one grapheme +# cluster. +CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [0], [y]) +CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [1], [y]) +CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [2], [y]) +CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [3], [y]) +CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [4], [y]) +CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [5], [x\342\203\236y]) +# éèä is only 3 bytes in ISO-8859-1. +CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [0], [xyz]) +CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [1], [xyz]) +CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [2], [xyz]) +CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [3], [xyz]) +CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [4], + [\303\251xyz]) +CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [5], + [\303\251\303\250xyz]) +CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [6], + [\303\251\303\250\303\244xyz])