From: Eric Blake Date: Tue, 5 Oct 2010 22:39:32 +0000 (-0600) Subject: memmem, strstr, strcasestr: fix bug with long periodic needle X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c823199df2cc03b6bd70d0a2fef5999af82792fe;p=pspp memmem, strstr, strcasestr: fix bug with long periodic needle * lib/str-two-way.h (two_way_long_needle): Avoid bug with long periodic needle having false positive. * m4/memmem.m4 (gl_FUNC_MEMMEM_SIMPLE): Detect bug in glibc 2.12 and cygwin 1.7.7. (gl_FUNC_MEMMEM): Be more pessimistic when cross-compiling. * m4/strcasestr.m4 (gl_FUNC_STRCASESTR_SIMPLE) (gl_FUNC_STRCASESTR): Likewise. * m4/strstr.m4 (gl_FUNC_STRSTR_SIMPLE, gl_FUNC_STRSTR): Likewise. * tests/test-memmem.c (main): Expose the bug. * tests/test-strcasestr.c (main): Likewise. * tests/test-strstr.c (main): Likewise. * tests/test-c-strcasestr.c (main): Likewise. * doc/glibc-functions/memmem.texi (memmem): Document the bug. * doc/posix-functions/strstr.texi (strstr): Likewise. * doc/glibc-functions/strcasestr.texi (strcasestr): Likewise. Reported via http://sourceware.org/bugzilla/show_bug.cgi?id=12092 Signed-off-by: Eric Blake --- diff --git a/ChangeLog b/ChangeLog index 53a861f991..19b5be8314 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2010-10-05 Eric Blake + + memmem, strstr, strcasestr: fix bug with long periodic needle + * lib/str-two-way.h (two_way_long_needle): Avoid bug with long + periodic needle having false positive. + * m4/memmem.m4 (gl_FUNC_MEMMEM_SIMPLE): Detect bug in glibc 2.12 + and cygwin 1.7.7. + (gl_FUNC_MEMMEM): Be more pessimistic when cross-compiling. + * m4/strcasestr.m4 (gl_FUNC_STRCASESTR_SIMPLE) + (gl_FUNC_STRCASESTR): Likewise. + * m4/strstr.m4 (gl_FUNC_STRSTR_SIMPLE, gl_FUNC_STRSTR): Likewise. + * tests/test-memmem.c (main): Expose the bug. + * tests/test-strcasestr.c (main): Likewise. + * tests/test-strstr.c (main): Likewise. + * tests/test-c-strcasestr.c (main): Likewise. + * doc/glibc-functions/memmem.texi (memmem): Document the bug. + * doc/posix-functions/strstr.texi (strstr): Likewise. + * doc/glibc-functions/strcasestr.texi (strcasestr): Likewise. + Reported via http://sourceware.org/bugzilla/show_bug.cgi?id=12092 + 2010-10-05 Paul Eggert parse-datetime: do some more renaming diff --git a/doc/glibc-functions/memmem.texi b/doc/glibc-functions/memmem.texi index c7e3d739f4..0cfcdd9382 100644 --- a/doc/glibc-functions/memmem.texi +++ b/doc/glibc-functions/memmem.texi @@ -13,6 +13,10 @@ MacOS X 10.3, FreeBSD 5.2.1, OpenBSD 4.0, AIX 4.3.2, HP-UX 11, IRIX 6.5, OSF/1 5 @item This function has reversed arguments on some older platforms: Linux libc 5.0.9 +@item +This function can trigger false positives for long periodic needles on +some platforms: +glibc 2.12, Cygwin 1.7.7. @end itemize Portability problems fixed by Gnulib module @code{memmem}: diff --git a/doc/glibc-functions/strcasestr.texi b/doc/glibc-functions/strcasestr.texi index 7ce39f42e7..d75aeaadc9 100644 --- a/doc/glibc-functions/strcasestr.texi +++ b/doc/glibc-functions/strcasestr.texi @@ -15,6 +15,11 @@ mingw, BeOS. @item This function can trigger memchr bugs on some platforms: glibc 2.10. + +@item +This function can trigger false positives for long periodic needles on +some platforms: +glibc 2.12, Cygwin 1.7.7. @end itemize Portability problems fixed by Gnulib module @code{strcasestr}: diff --git a/doc/posix-functions/strstr.texi b/doc/posix-functions/strstr.texi index 61bb35b823..40d20b0274 100644 --- a/doc/posix-functions/strstr.texi +++ b/doc/posix-functions/strstr.texi @@ -12,6 +12,10 @@ or @code{strstr}: @item This function can trigger memchr bugs on some platforms: glibc 2.10. +@item +This function can trigger false positives for long periodic needles on +some platforms: +glibc 2.12, Cygwin 1.7.7. @end itemize Portability problems fixed by Gnulib @code{strstr}: diff --git a/lib/str-two-way.h b/lib/str-two-way.h index dbc2f889fb..5f150af6af 100644 --- a/lib/str-two-way.h +++ b/lib/str-two-way.h @@ -370,8 +370,8 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len, a byte out of place, there can be no match until after the mismatch. */ shift = needle_len - period; - memory = 0; } + memory = 0; j += shift; continue; } diff --git a/m4/memmem.m4 b/m4/memmem.m4 index d02b993593..553a81cedd 100644 --- a/m4/memmem.m4 +++ b/m4/memmem.m4 @@ -1,4 +1,4 @@ -# memmem.m4 serial 15 +# memmem.m4 serial 16 dnl Copyright (C) 2002, 2003, 2004, 2007, 2008, 2009, 2010 Free Software dnl Foundation, Inc. dnl This file is free software; the Free Software Foundation @@ -16,6 +16,45 @@ AC_DEFUN([gl_FUNC_MEMMEM_SIMPLE], AC_CHECK_DECLS_ONCE([memmem]) if test $ac_cv_have_decl_memmem = no; then HAVE_DECL_MEMMEM=0 + else + dnl Detect http://sourceware.org/bugzilla/show_bug.cgi?id=12092. + AC_CACHE_CHECK([whether memmem works], + [gl_cv_func_memmem_works_always], + [AC_RUN_IFELSE([AC_LANG_PROGRAM([[ +#include /* for memmem */ +#define P "_EF_BF_BD" +#define HAYSTACK "F_BD_CE_BD" P P P P "_C3_88_20" P P P "_C3_A7_20" P +#define NEEDLE P P P P P +]], [[return !!memmem (HAYSTACK, strlen (HAYSTACK), NEEDLE, strlen (NEEDLE)); + ]])], + [gl_cv_func_memmem_works_always=yes], + [gl_cv_func_memmem_works_always=no], + [dnl glibc 2.12 and cygwin 1.7.7 have a known bug. Assume that it + dnl works on all other platforms, even if it is not linear. + AC_EGREP_CPP([Lucky user], + [ +#ifdef __GNU_LIBRARY__ + #include + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ > 12) || (__GLIBC__ > 2) + Lucky user + #endif +#elif defined __CYGWIN__ + #include + #if CYGWIN_VERSION_DLL_MAJOR >= 1007 && CYGWIN_VERSION_DLL_MINOR > 7 + Lucky user + #endif +#else + Lucky user +#endif + ], + [gl_cv_func_memmem_works_always=yes], + [gl_cv_func_memmem_works_always="guessing no"]) + ]) + ]) + if test "$gl_cv_func_memmem_works_always" != yes; then + REPLACE_MEMMEM=1 + AC_LIBOBJ([memmem]) + fi fi gl_PREREQ_MEMMEM ]) # gl_FUNC_MEMMEM_SIMPLE @@ -24,9 +63,9 @@ dnl Additionally, check that memmem is efficient and handles empty needles. AC_DEFUN([gl_FUNC_MEMMEM], [ AC_REQUIRE([gl_FUNC_MEMMEM_SIMPLE]) - if test $ac_cv_have_decl_memmem = yes; then + if test $HAVE_MEMMEM = 1 && test $REPLACE_MEMMEM = 0; then AC_CACHE_CHECK([whether memmem works in linear time], - [gl_cv_func_memmem_works], + [gl_cv_func_memmem_works_fast], [AC_RUN_IFELSE([AC_LANG_PROGRAM([[ #include /* for signal */ #include /* for memmem */ @@ -52,29 +91,29 @@ static void quit (int sig) { exit (sig + 128); } } /* Check for empty needle behavior. */ return !result || !memmem ("a", 1, 0, 0);]])], - [gl_cv_func_memmem_works=yes], [gl_cv_func_memmem_works=no], - [dnl Only glibc >= 2.9 and cygwin >= 1.7.0 are known to have a - dnl memmem that works in linear time. + [gl_cv_func_memmem_works_fast=yes], [gl_cv_func_memmem_works_fast=no], + [dnl Only glibc > 2.12 and cygwin > 1.7.7 are known to have a + dnl bug-free memmem that works in linear time. AC_EGREP_CPP([Lucky user], [ #include #ifdef __GNU_LIBRARY__ - #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 9) || (__GLIBC__ > 2) + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ > 12) || (__GLIBC__ > 2) Lucky user #endif #endif #ifdef __CYGWIN__ #include - #if CYGWIN_VERSION_DLL_MAJOR >= 1007 + #if CYGWIN_VERSION_DLL_MAJOR >= 1007 && CYGWIN_VERSION_DLL_MINOR > 7 Lucky user #endif #endif ], - [gl_cv_func_memmem_works=yes], - [gl_cv_func_memmem_works="guessing no"]) + [gl_cv_func_memmem_works_fast=yes], + [gl_cv_func_memmem_works_fast="guessing no"]) ]) ]) - if test "$gl_cv_func_memmem_works" != yes; then + if test "$gl_cv_func_memmem_works_fast" != yes; then REPLACE_MEMMEM=1 AC_LIBOBJ([memmem]) fi diff --git a/m4/strcasestr.m4 b/m4/strcasestr.m4 index 50ac0a7728..f0c4a6eaeb 100644 --- a/m4/strcasestr.m4 +++ b/m4/strcasestr.m4 @@ -1,4 +1,4 @@ -# strcasestr.m4 serial 14 +# strcasestr.m4 serial 15 dnl Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -19,6 +19,45 @@ AC_DEFUN([gl_FUNC_STRCASESTR_SIMPLE], else if test "$gl_cv_func_memchr_works" != yes; then REPLACE_STRCASESTR=1 + else + dnl Detect http://sourceware.org/bugzilla/show_bug.cgi?id=12092. + AC_CACHE_CHECK([whether strcasestr works], + [gl_cv_func_strcasestr_works_always], + [AC_RUN_IFELSE([AC_LANG_PROGRAM([[ +#include /* for strcasestr */ +#define P "_EF_BF_BD" +#define HAYSTACK "F_BD_CE_BD" P P P P "_C3_88_20" P P P "_C3_A7_20" P +#define NEEDLE P P P P P +]], [[return !!strcasestr (HAYSTACK, NEEDLE); + ]])], + [gl_cv_func_strcasestr_works_always=yes], + [gl_cv_func_strcasestr_works_always=no], + [dnl glibc 2.12 and cygwin 1.7.7 have a known bug. Assume that it + dnl works on all other platforms, even if it is not linear. + AC_EGREP_CPP([Lucky user], + [ +#ifdef __GNU_LIBRARY__ + #include + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ > 12) || (__GLIBC__ > 2) + Lucky user + #endif +#elif defined __CYGWIN__ + #include + #if CYGWIN_VERSION_DLL_MAJOR >= 1007 && CYGWIN_VERSION_DLL_MINOR > 7 + Lucky user + #endif +#else + Lucky user +#endif + ], + [gl_cv_func_strcasestr_works_always=yes], + [gl_cv_func_strcasestr_works_always="guessing no"]) + ]) + ]) + if test "$gl_cv_func_strcasestr_works_always" != yes; then + REPLACE_STRCASESTR=1 + AC_LIBOBJ([strcasestr]) + fi fi fi if test $HAVE_STRCASESTR = 0 || test $REPLACE_STRCASESTR = 1; then @@ -36,7 +75,7 @@ AC_DEFUN([gl_FUNC_STRCASESTR], [gl_cv_func_strcasestr_linear], [AC_RUN_IFELSE([AC_LANG_PROGRAM([[ #include /* for signal */ -#include /* for memmem */ +#include /* for strcasestr */ #include /* for malloc */ #include /* for alarm */ static void quit (int sig) { exit (sig + 128); } @@ -61,19 +100,19 @@ static void quit (int sig) { exit (sig + 128); } } return !result;]])], [gl_cv_func_strcasestr_linear=yes], [gl_cv_func_strcasestr_linear=no], - [dnl Only glibc >= 2.9 and cygwin >= 1.7.0 are known to have a + [dnl Only glibc > 2.12 and cygwin > 1.7.7 are known to have a dnl strcasestr that works in linear time. AC_EGREP_CPP([Lucky user], [ #include #ifdef __GNU_LIBRARY__ - #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 9) || (__GLIBC__ > 2) + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ > 12) || (__GLIBC__ > 2) Lucky user #endif #endif #ifdef __CYGWIN__ #include - #if CYGWIN_VERSION_DLL_MAJOR >= 1007 + #if CYGWIN_VERSION_DLL_MAJOR >= 1007 && CYGWIN_VERSION_DLL_MINOR > 7 Lucky user #endif #endif diff --git a/m4/strstr.m4 b/m4/strstr.m4 index 3e45a7a082..1880150d00 100644 --- a/m4/strstr.m4 +++ b/m4/strstr.m4 @@ -1,4 +1,4 @@ -# strstr.m4 serial 8 +# strstr.m4 serial 9 dnl Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -12,6 +12,45 @@ AC_DEFUN([gl_FUNC_STRSTR_SIMPLE], if test "$gl_cv_func_memchr_works" != yes; then REPLACE_STRSTR=1 AC_LIBOBJ([strstr]) + else + dnl Detect http://sourceware.org/bugzilla/show_bug.cgi?id=12092. + AC_CACHE_CHECK([whether strstr works], + [gl_cv_func_strstr_works_always], + [AC_RUN_IFELSE([AC_LANG_PROGRAM([[ +#include /* for strstr */ +#define P "_EF_BF_BD" +#define HAYSTACK "F_BD_CE_BD" P P P P "_C3_88_20" P P P "_C3_A7_20" P +#define NEEDLE P P P P P +]], [[return !!strstr (HAYSTACK, NEEDLE); + ]])], + [gl_cv_func_strstr_works_always=yes], + [gl_cv_func_strstr_works_always=no], + [dnl glibc 2.12 and cygwin 1.7.7 have a known bug. Assume that it + dnl works on all other platforms, even if it is not linear. + AC_EGREP_CPP([Lucky user], + [ +#ifdef __GNU_LIBRARY__ + #include + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ > 12) || (__GLIBC__ > 2) + Lucky user + #endif +#elif defined __CYGWIN__ + #include + #if CYGWIN_VERSION_DLL_MAJOR >= 1007 && CYGWIN_VERSION_DLL_MINOR > 7 + Lucky user + #endif +#else + Lucky user +#endif + ], + [gl_cv_func_strstr_works_always=yes], + [gl_cv_func_strstr_works_always="guessing no"]) + ]) + ]) + if test "$gl_cv_func_strstr_works_always" != yes; then + REPLACE_STRSTR=1 + AC_LIBOBJ([strstr]) + fi fi ]) # gl_FUNC_STRSTR_SIMPLE @@ -24,7 +63,7 @@ AC_DEFUN([gl_FUNC_STRSTR], [gl_cv_func_strstr_linear], [AC_RUN_IFELSE([AC_LANG_PROGRAM([[ #include /* for signal */ -#include /* for memmem */ +#include /* for strstr */ #include /* for malloc */ #include /* for alarm */ static void quit (int sig) { exit (sig + 128); } @@ -49,19 +88,19 @@ static void quit (int sig) { exit (sig + 128); } } return !result;]])], [gl_cv_func_strstr_linear=yes], [gl_cv_func_strstr_linear=no], - [dnl Only glibc >= 2.9 and cygwin >= 1.7.0 are known to have a - dnl strstr that works in linear time. + [dnl Only glibc > 2.12 and cygwin > 1.7.7 are known to have a + dnl bug-free strstr that works in linear time. AC_EGREP_CPP([Lucky user], [ #include #ifdef __GNU_LIBRARY__ - #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 9) || (__GLIBC__ > 2) + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ > 12) || (__GLIBC__ > 2) Lucky user #endif #endif #ifdef __CYGWIN__ #include - #if CYGWIN_VERSION_DLL_MAJOR >= 1007 + #if CYGWIN_VERSION_DLL_MAJOR >= 1007 && CYGWIN_VERSION_DLL_MINOR > 7 Lucky user #endif #endif diff --git a/tests/test-c-strcasestr.c b/tests/test-c-strcasestr.c index 73723c729a..52d55e8044 100644 --- a/tests/test-c-strcasestr.c +++ b/tests/test-c-strcasestr.c @@ -58,6 +58,25 @@ main () ASSERT (result == input + 11); } + /* Check that a long periodic needle does not cause false positives. */ + { + const char input[] = ("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_A7_20_EF_BF_BD"); + const char need[] = "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"; + const char *result = c_strcasestr (input, need); + ASSERT (result == NULL); + } + { + const char input[] = ("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_A7_20_EF_BF_BD_DA_B5_C2_A6_20" + "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"); + const char need[] = "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"; + const char *result = c_strcasestr (input, need); + ASSERT (result == input + 115); + } + /* Check that a very long haystack is handled quickly if the needle is short and occurs near the beginning. */ { diff --git a/tests/test-memmem.c b/tests/test-memmem.c index 3a21213b0d..da0d1be372 100644 --- a/tests/test-memmem.c +++ b/tests/test-memmem.c @@ -89,6 +89,25 @@ main (int argc, char *argv[]) ASSERT (result == input); } + /* Check that a long periodic needle does not cause false positives. */ + { + const char input[] = ("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_A7_20_EF_BF_BD"); + const char need[] = "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"; + const char *result = memmem (input, strlen (input), need, strlen (need)); + ASSERT (result == NULL); + } + { + const char input[] = ("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_A7_20_EF_BF_BD_DA_B5_C2_A6_20" + "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"); + const char need[] = "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"; + const char *result = memmem (input, strlen (input), need, strlen (need)); + ASSERT (result == input + 115); + } + /* Check that a very long haystack is handled quickly if the needle is short and occurs near the beginning. */ { diff --git a/tests/test-strcasestr.c b/tests/test-strcasestr.c index 1f38fad22e..c56fbae945 100644 --- a/tests/test-strcasestr.c +++ b/tests/test-strcasestr.c @@ -71,6 +71,25 @@ main () ASSERT (result == input + 11); } + /* Check that a long periodic needle does not cause false positives. */ + { + const char input[] = ("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_A7_20_EF_BF_BD"); + const char need[] = "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"; + const char *result = strcasestr (input, need); + ASSERT (result == NULL); + } + { + const char input[] = ("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_A7_20_EF_BF_BD_DA_B5_C2_A6_20" + "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"); + const char need[] = "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"; + const char *result = strcasestr (input, need); + ASSERT (result == input + 115); + } + /* Check that a very long haystack is handled quickly if the needle is short and occurs near the beginning. */ { diff --git a/tests/test-strstr.c b/tests/test-strstr.c index 1482e7de82..1ddd015276 100644 --- a/tests/test-strstr.c +++ b/tests/test-strstr.c @@ -91,6 +91,25 @@ main (int argc, char *argv[]) ASSERT (result == input + 11); } + /* Check that a long periodic needle does not cause false positives. */ + { + const char input[] = ("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_A7_20_EF_BF_BD"); + const char need[] = "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"; + const char *result = strstr (input, need); + ASSERT (result == NULL); + } + { + const char input[] = ("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_A7_20_EF_BF_BD_DA_B5_C2_A6_20" + "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"); + const char need[] = "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"; + const char *result = strstr (input, need); + ASSERT (result == input + 115); + } + /* Check that a very long haystack is handled quickly if the needle is short and occurs near the beginning. */ {