X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fencoding-guesser.c;h=7d10015e2ea2961febf0efb8edb47d66e30033a8;hb=refs%2Fbuilds%2F20110716030503%2Fpspp;hp=9042e93a2fd41db02012d19f206c378748adc7c8;hpb=f3668539947d5baed813a4f8436d6cf36abeedd2;p=pspp diff --git a/src/libpspp/encoding-guesser.c b/src/libpspp/encoding-guesser.c index 9042e93a2f..7d10015e2e 100644 --- a/src/libpspp/encoding-guesser.c +++ b/src/libpspp/encoding-guesser.c @@ -250,10 +250,6 @@ encoding_guess_head_encoding (const char *encoding, || !encoding_guess_tail_is_utf8 (data, n)) return fallback_encoding; - if (!c_strcasecmp (fallback_encoding, "UTF-8") - || !c_strcasecmp (fallback_encoding, "UTF8")) - return "UTF-8"; - return "ASCII"; } @@ -287,3 +283,30 @@ encoding_guess_tail_is_utf8 (const void *data, size_t n) : is_all_utf8_text (data, n)); } +/* Attempts to guess the encoding of a text file based on ENCODING, an encoding + name in one of the forms described at the top of encoding-guesser.h, and the + SIZE byts in DATA, which contains the entire contents of the file. Returns + the guessed encoding, which might be ENCODING itself or a suffix of it or a + statically allocated string. + + Encoding autodetection only takes place if ENCODING actually specifies + autodetection. See encoding-guesser.h for details. */ +const char * +encoding_guess_whole_file (const char *encoding, const void *text, size_t size) +{ + const char *guess; + + guess = encoding_guess_head_encoding (encoding, text, size); + if (!strcmp (guess, "ASCII") && encoding_guess_encoding_is_auto (encoding)) + { + size_t ofs = encoding_guess_count_ascii (text, size); + if (ofs < size) + return encoding_guess_tail_encoding (encoding, + (const char *) text + ofs, + size - ofs); + else + return encoding_guess_parse_encoding (encoding); + } + else + return guess; +}