dnl PSPP - a program for statistical analysis. dnl Copyright (C) 2017 Free Software Foundation, Inc. dnl dnl This program is free software: you can redistribute it and/or modify dnl it under the terms of the GNU General Public License as published by dnl the Free Software Foundation, either version 3 of the License, or dnl (at your option) any later version. dnl dnl This program is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the dnl GNU General Public License for more details. dnl dnl You should have received a copy of the GNU General Public License dnl along with this program. If not, see . dnl AT_BANNER([i18n recoding]) # CHECK_I18N_RECODE([TITLE], [FROM-CODING], [TO-CODING], # [FROM-TEXT], [TO-TEXT]) # # Converts FROM-TEXT from FROM-CODING to TO-CODING and checks that the result # is TO-TEXT. Octal backslash-escapes are supported in FROM-TEXT and TO-TEXT. m4_define([CHECK_I18N_RECODE], [AT_SETUP([convert $1]) AT_KEYWORDS([i18n]) dnl Skip the test if this host doesn't know the source and target encodings. AT_CHECK([i18n-test supports_encodings '$2' '$3']) AT_CHECK_UNQUOTED([i18n-test recode '$2' '$3' '$4'], [0], [`printf '$5'` ]) AT_CLEANUP]) CHECK_I18N_RECODE([reflexively], [ASCII], [ASCII], [abc], [abc]) CHECK_I18N_RECODE([without any change], [ASCII], [UTF-8], [abc], [abc]) CHECK_I18N_RECODE([from ISO-8859-1 to UTF-8], [ISO-8859-1], [UTF-8], [\242], [\302\242]) CHECK_I18N_RECODE([from UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1], [\302\242], [\242]) # 0xc0 == 0300 is invalid in UTF-8 CHECK_I18N_RECODE([invalid UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1], [xy\300z], [xy?z]) # 0xc2 == 0302 is the first byte of a 2-byte UTF-8 sequence CHECK_I18N_RECODE([truncated UTF-8 to ISO-8559-1], [UTF-8], [ISO-8859-1], [xy\302], [xy?]) # Checks for a bug that caused the last character to be dropped in conversions # from encodings that have combining diacritics (e.g. windows-1258). CHECK_I18N_RECODE([dropped final character in windows-1258], [windows-1258], [UTF-8], [aeiou], [aeiou]) dnl The input to this test is 7 bytes long and the expected output is 9 bytes. dnl So it should exercise the E2BIG case CHECK_I18N_RECODE([from ISO-8859-1 to UTF-8 with overflow], [ISO-8859-1], [UTF-8], [Tsch\374\337!], [Tsch\303\274\303\237!]) AT_SETUP([convert unknown encoding]) AT_KEYWORDS([i18n]) AT_CHECK([i18n-test recode nonexistent1 nonexistent2 asdf], [0], [asdf ], [Warning: cannot create a converter for `nonexistent1' to `nonexistent2': Invalid argument ]) AT_CLEANUP AT_BANNER([i18n concatenation]) # CHECK_I18N_CONCAT([HEAD], [TAIL], [ENCODING], [MAX-LEN], [ANSWER]) # # Concatenates HEAD and TAIL, omitting as many characters from HEAD as needed # to make the result come out to no more than MAX-LEN bytes if it was expressed # in ENCODING, and checks that the answer matches ANSWER. HEAD, TAIL, and # ANSWER are all in UTF-8. The "printf" program is applied to HEAD, TAIL, and # ANSWER to allow for backslash-escapes. (Hex escapes are not portable; use # octal escapes instead.) m4_define([CHECK_I18N_CONCAT], [AT_SETUP([m4_if([$2], [], [truncate "$1" to $4 bytes in $3], [truncate "$1" + "$2" to $4 bytes in $3])]) AT_KEYWORDS([i18n]) dnl Skip the test if this host doesn't know the encoding. AT_CHECK([i18n-test supports_encodings '$3']) AT_CHECK_UNQUOTED( [i18n-test concat '$1' '$2' '$3' '$4'], [0], [`printf '$5'` ]) AT_CLEANUP]) CHECK_I18N_CONCAT([abc], [], [UTF-8], [6], [abc]) CHECK_I18N_CONCAT([], [xyz], [UTF-8], [6], [xyz]) CHECK_I18N_CONCAT([], [], [UTF-8], [6], []) CHECK_I18N_CONCAT([abcdefghij], [], [UTF-8], [6], [abcdef]) CHECK_I18N_CONCAT([], [tuvwxyz], [UTF-8], [6], [tuvwxyz]) CHECK_I18N_CONCAT([abc], [xyz], [UTF-8], [6], [abcxyz]) CHECK_I18N_CONCAT([abcd], [xyz], [UTF-8], [6], [abcxyz]) CHECK_I18N_CONCAT([abc], [uvwxyz], [UTF-8], [6], [uvwxyz]) # x in a box ( x⃞ ) is U+0078, U+20DE, 4 bytes in UTF-8, and one grapheme # cluster. CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [0], [y]) CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [1], [y]) CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [2], [y]) CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [3], [y]) CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [4], [y]) CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [5], [x\342\203\236y]) # éèä is only 3 bytes in ISO-8859-1. CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [0], [xyz]) CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [1], [xyz]) CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [2], [xyz]) CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [3], [xyz]) CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [4], [\303\251xyz]) CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [5], [\303\251\303\250xyz]) CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [6], [\303\251\303\250\303\244xyz])