From 287f1aa484434ad1817de8623ba3b092e26733a9 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 9 Sep 2014 08:51:44 -0700 Subject: [PATCH] u8-istream: Fix handling of partial reads. The u8-istream code did not retry upon a partial read, assuming that that was the end of the file. When the partial read was shorter than ENCODING_GUESS_MIN, this could cause the encoding guesser, in turn, to guess the wrong encoding (especially if the encoding was really UTF-16 and the partial read was an odd number of bytes). Reported at https://bugs.debian.org/760841. Reported by Friedrich Beckmann and Steven Chamberlain. --- src/libpspp/u8-istream.c | 20 ++++++++++++++------ tests/libpspp/u8-istream.at | 4 +++- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/libpspp/u8-istream.c b/src/libpspp/u8-istream.c index 2486ca1ada..6347a6701a 100644 --- a/src/libpspp/u8-istream.c +++ b/src/libpspp/u8-istream.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2010, 2011, 2012, 2013 Free Software Foundation, Inc. + Copyright (C) 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -203,14 +203,22 @@ fill_buffer (struct u8_istream *is) is->head = is->buffer; /* Read more input. */ + n = 0; do { - n = read (is->fd, is->buffer + is->length, - U8_ISTREAM_BUFFER_SIZE - is->length); + ssize_t retval = read (is->fd, is->buffer + is->length, + U8_ISTREAM_BUFFER_SIZE - is->length); + if (retval > 0) + { + n += retval; + is->length += retval; + } + else if (retval == 0) + return n; + else if (errno != EINTR) + return n > 0 ? n : -1; } - while (n < 0 && errno == EINTR); - if (n > 0) - is->length += n; + while (is->length < U8_ISTREAM_BUFFER_SIZE); return n; } diff --git a/tests/libpspp/u8-istream.at b/tests/libpspp/u8-istream.at index 24af08cc7c..5b5b4e006e 100644 --- a/tests/libpspp/u8-istream.at +++ b/tests/libpspp/u8-istream.at @@ -140,7 +140,9 @@ AT_CLEANUP AT_SETUP([read UTF-16 as Auto]) AT_KEYWORDS([u8_istream]) AT_CHECK([i18n-test supports_encodings UTF-16 UTF-16BE UTF-16LE]) -AT_CHECK([printf '\0e\0n\0t\0r\0\351\0e\0\n' | u8-istream-test read - Auto], +dnl The "sleep 1" checks for a bug in which u8-istream did not properly +dnl handle receiving data in multiple chunks. +AT_CHECK([{ printf '\0e\0n\0t\0'; sleep 1; printf 'r\0\351\0e\0\n'; } | u8-istream-test read - Auto], [0], [entrée ]) AT_CHECK([printf 'e\0n\0t\0r\0\351\0e\0\n\0' | u8-istream-test read - Auto], -- 2.30.2