From 0697a4277a80981c438518e844ae8174ea9b8453 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@cs.stanford.edu>
Date: Wed, 24 Apr 2013 22:14:01 -0700
Subject: [PATCH] u8-istream: Fix handling of UTF-8 byte order marks.

The UTF-8 special case state didn't skip over an initial byte order mark.
This fixes it.

Reported by Dr. Holger Handstein <post@handundstein.de>.
Signed-off-by: Ben Pfaff <blp@cs.stanford.edu>
---
 src/libpspp/u8-istream.c    | 11 +++++++++--
 tests/libpspp/u8-istream.at |  5 +++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/libpspp/u8-istream.c b/src/libpspp/u8-istream.c
index b172b164f5..22135dbb33 100644
--- a/src/libpspp/u8-istream.c
+++ b/src/libpspp/u8-istream.c
@@ -1,5 +1,5 @@
 /* PSPP - a program for statistical analysis.
-   Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc.
+   Copyright (C) 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -122,7 +122,14 @@ u8_istream_for_fd (const char *fromcode, int fd)
 
   encoding = encoding_guess_head_encoding (fromcode, is->buffer, is->length);
   if (is_encoding_utf8 (encoding))
-    is->state = S_UTF8;
+    {
+      unsigned int bom_len;
+
+      is->state = S_UTF8;
+      bom_len = encoding_guess_bom_length (encoding, is->buffer, is->length);
+      is->head += bom_len;
+      is->length -= bom_len;
+    }
   else
     {
       if (encoding_guess_encoding_is_auto (fromcode)
diff --git a/tests/libpspp/u8-istream.at b/tests/libpspp/u8-istream.at
index 842e77336d..9757f9605b 100644
--- a/tests/libpspp/u8-istream.at
+++ b/tests/libpspp/u8-istream.at
@@ -109,6 +109,11 @@ Auto mode
 entrée
 UTF-8 mode
 ])
+AT_CHECK([printf '\357\273\277entr\303\251e\n' | u8-istream-test read - Auto], [0], [dnl
+UTF-8 mode
+entrée
+UTF-8 mode
+])
 AT_CLEANUP
 
 AT_SETUP([read ISO-8859-1 as Auto,ISO-8859-1])
-- 
2.30.2