From 7caed4c8228cf00ed5061b474f32a47b5ff7c40f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 24 Apr 2013 22:26:06 -0700 Subject: [PATCH] u8-istream: Fix reading UTF-16 and UTF-32 files. Before this commit, u8_istream_for_fd() would obtain the correct encoding from encoding_guess_head_encoding() for UTF-16 and UTF-32 files, and then it would ignore it and use the default encoding returned by encoding_guess_parse_encoding(). This commit fixes the problem. --- src/libpspp/u8-istream.c | 8 +++++--- tests/libpspp/u8-istream.at | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/src/libpspp/u8-istream.c b/src/libpspp/u8-istream.c index 22135dbb33..2486ca1ada 100644 --- a/src/libpspp/u8-istream.c +++ b/src/libpspp/u8-istream.c @@ -134,12 +134,14 @@ u8_istream_for_fd (const char *fromcode, int fd) { if (encoding_guess_encoding_is_auto (fromcode) && !strcmp (encoding, "ASCII")) - is->state = S_AUTO; + { + is->state = S_AUTO; + encoding = encoding_guess_parse_encoding (fromcode); + } else is->state = S_CONVERT; - is->converter = iconv_open ("UTF-8", - encoding_guess_parse_encoding (fromcode)); + is->converter = iconv_open ("UTF-8", encoding); if (is->converter == (iconv_t) -1) goto error; } diff --git a/tests/libpspp/u8-istream.at b/tests/libpspp/u8-istream.at index 9757f9605b..24af08cc7c 100644 --- a/tests/libpspp/u8-istream.at +++ b/tests/libpspp/u8-istream.at @@ -137,6 +137,40 @@ entrée ]) AT_CLEANUP +AT_SETUP([read UTF-16 as Auto]) +AT_KEYWORDS([u8_istream]) +AT_CHECK([i18n-test supports_encodings UTF-16 UTF-16BE UTF-16LE]) +AT_CHECK([printf '\0e\0n\0t\0r\0\351\0e\0\n' | u8-istream-test read - Auto], + [0], [entrée +]) +AT_CHECK([printf 'e\0n\0t\0r\0\351\0e\0\n\0' | u8-istream-test read - Auto], + [0], [entrée +]) +AT_CHECK([printf '\376\377\0e\0n\0t\0r\0\351\0e\0\n' | u8-istream-test read - Auto], + [0], [entrée +]) +AT_CHECK([printf '\377\376e\0n\0t\0r\0\351\0e\0\n\0' | u8-istream-test read - Auto], + [0], [entrée +]) +AT_CLEANUP + +AT_SETUP([read UTF-32 as Auto]) +AT_KEYWORDS([u8_istream]) +AT_CHECK([i18n-test supports_encodings UTF-16 UTF-16BE UTF-16LE]) +AT_CHECK([printf '\0\0\0e\0\0\0n\0\0\0t\0\0\0r\0\0\0\351\0\0\0e\0\0\0\n' | u8-istream-test read - Auto], + [0], [entrée +]) +AT_CHECK([printf 'e\0\0\0n\0\0\0t\0\0\0r\0\0\0\351\0\0\0e\0\0\0\n\0\0\0' | u8-istream-test read - Auto], + [0], [entrée +]) +AT_CHECK([printf '\0\0\376\377\0\0\0e\0\0\0n\0\0\0t\0\0\0r\0\0\0\351\0\0\0e\0\0\0\n' | u8-istream-test read - Auto], + [0], [entrée +]) +AT_CHECK([printf '\377\376\0\0e\0\0\0n\0\0\0t\0\0\0r\0\0\0\351\0\0\0e\0\0\0\n\0\0\0' | u8-istream-test read - Auto], + [0], [entrée +]) +AT_CLEANUP + AT_SETUP([read EUC-JP as Auto,EUC-JP]) AT_KEYWORDS([u8_istream]) AT_CHECK([i18n-test supports_encodings EUC-JP]) -- 2.30.2