-/* Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007,2008 Free Software Foundation, Inc.
+/* Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007,2008 Free Software
+ Foundation, Inc.
This file is part of the GNU C Library.
This program is free software; you can redistribute it and/or modify
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+/* This particular implementation was written by Eric Blake, 2008. */
+
#ifndef _LIBC
# include <config.h>
#endif
-#include <stddef.h>
+/* Specification of memmem. */
#include <string.h>
-#include <stdbool.h>
-#include "malloca.h"
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
#ifndef _LIBC
# define __builtin_expect(expr, val) (expr)
#endif
-/* Knuth-Morris-Pratt algorithm.
- See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
- Return a boolean indicating success. */
+/* We use the Two-Way string matching algorithm, which guarantees
+ linear complexity with constant space. Additionally, for long
+ needles, we also use a bad character shift table similar to the
+ Boyer-Moore algorithm to achieve sub-linear performance.
+
+ See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
+ and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
+*/
+
+/* Point at which computing a bad-byte shift table is likely to be
+ worthwhile. Small needles should not compute a table, since it
+ adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a
+ speedup no greater than a factor of NEEDLE_LEN. The larger the
+ needle, the better the potential performance gain. On the other
+ hand, on non-POSIX systems with CHAR_BIT larger than eight, the
+ memory required for the table is prohibitive. */
+#if CHAR_BIT < 10
+# define LONG_NEEDLE_THRESHOLD 32U
+#else
+# define LONG_NEEDLE_THRESHOLD SIZE_MAX
+#endif
+
+#define MAX(a, b) ((a < b) ? (b) : (a))
+
+/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
+ Return the index of the first byte in the right half, and set
+ *PERIOD to the global period of the right half.
-static bool
-knuth_morris_pratt (const unsigned char *haystack,
- const unsigned char *last_haystack,
- const unsigned char *needle, size_t m,
- const unsigned char **resultp)
+ The global period of a string is the smallest index (possibly its
+ length) at which all remaining bytes in the string are repetitions
+ of the prefix (the last repetition may be a subset of the prefix).
+
+ When NEEDLE is factored into two halves, a local period is the
+ length of the smallest word that shares a suffix with the left half
+ and shares a prefix with the right half. All factorizations of a
+ non-empty NEEDLE have a local period of at least 1 and no greater
+ than NEEDLE_LEN.
+
+ A critical factorization has the property that the local period
+ equals the global period. All strings have at least one critical
+ factorization with the left half smaller than the global period.
+
+ Given an ordered alphabet, a critical factorization can be computed
+ in linear time, with 2 * NEEDLE_LEN comparisons, by computing the
+ larger of two ordered maximal suffixes. The ordered maximal
+ suffixes are determined by lexicographic comparison of
+ periodicity. */
+static size_t
+critical_factorization (const unsigned char *needle, size_t needle_len,
+ size_t *period)
{
- /* Allocate the table. */
- size_t *table = (size_t *) nmalloca (m, sizeof (size_t));
- if (table == NULL)
- return false;
- /* Fill the table.
- For 0 < i < m:
- 0 < table[i] <= i is defined such that
- forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x],
- and table[i] is as large as possible with this property.
- This implies:
- 1) For 0 < i < m:
- If table[i] < i,
- needle[table[i]..i-1] = needle[0..i-1-table[i]].
- 2) For 0 < i < m:
- rhaystack[0..i-1] == needle[0..i-1]
- and exists h, i <= h < m: rhaystack[h] != needle[h]
- implies
- forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1].
- table[0] remains uninitialized. */
- {
- size_t i, j;
-
- /* i = 1: Nothing to verify for x = 0. */
- table[1] = 1;
- j = 0;
-
- for (i = 2; i < m; i++)
- {
- /* Here: j = i-1 - table[i-1].
- The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
- for x < table[i-1], by induction.
- Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */
- unsigned char b = needle[i - 1];
-
- for (;;)
- {
- /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
- is known to hold for x < i-1-j.
- Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */
- if (b == needle[j])
- {
- /* Set table[i] := i-1-j. */
- table[i] = i - ++j;
- break;
- }
- /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
- for x = i-1-j, because
- needle[i-1] != needle[j] = needle[i-1-x]. */
- if (j == 0)
- {
- /* The inequality holds for all possible x. */
- table[i] = i;
- break;
- }
- /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
- for i-1-j < x < i-1-j+table[j], because for these x:
- needle[x..i-2]
- = needle[x-(i-1-j)..j-1]
- != needle[0..j-1-(x-(i-1-j))] (by definition of table[j])
- = needle[0..i-2-x],
- hence needle[x..i-1] != needle[0..i-1-x].
- Furthermore
- needle[i-1-j+table[j]..i-2]
- = needle[table[j]..j-1]
- = needle[0..j-1-table[j]] (by definition of table[j]). */
- j = j - table[j];
- }
- /* Here: j = i - table[i]. */
- }
- }
-
- /* Search, using the table to accelerate the processing. */
- {
- size_t j;
- const unsigned char *rhaystack;
- const unsigned char *phaystack;
-
- *resultp = NULL;
- j = 0;
- rhaystack = haystack;
- phaystack = haystack;
- /* Invariant: phaystack = rhaystack + j. */
- while (phaystack != last_haystack)
- if (needle[j] == *phaystack)
+ /* Index of last byte of left half, or SIZE_MAX. */
+ size_t max_suffix, max_suffix_rev;
+ size_t j; /* Index into NEEDLE for current candidate suffix. */
+ size_t k; /* Offset into current period. */
+ size_t p; /* Intermediate period. */
+ unsigned char a, b; /* Current comparison bytes. */
+
+ /* Invariants:
+ 0 <= j < NEEDLE_LEN - 1
+ -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed)
+ min(max_suffix, max_suffix_rev) < global period of NEEDLE
+ 1 <= p <= global period of NEEDLE
+ p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j]
+ 1 <= k <= p
+ */
+
+ /* Perform lexicographic search. */
+ max_suffix = SIZE_MAX;
+ j = 0;
+ k = p = 1;
+ while (j + k < needle_len)
+ {
+ a = needle[j + k];
+ b = needle[max_suffix + k];
+ if (a < b)
+ {
+ /* Suffix is smaller, period is entire prefix so far. */
+ j += k;
+ k = 1;
+ p = j - max_suffix;
+ }
+ else if (a == b)
{
- j++;
- phaystack++;
- if (j == m)
+ /* Advance through repetition of the current period. */
+ if (k != p)
+ ++k;
+ else
{
- /* The entire needle has been found. */
- *resultp = rhaystack;
- break;
+ j += p;
+ k = 1;
}
}
- else if (j > 0)
+ else /* b < a */
+ {
+ /* Suffix is larger, start over from current location. */
+ max_suffix = j++;
+ k = p = 1;
+ }
+ }
+ *period = p;
+
+ /* Perform reverse lexicographic search. */
+ max_suffix_rev = SIZE_MAX;
+ j = 0;
+ k = p = 1;
+ while (j + k < needle_len)
+ {
+ a = needle[j + k];
+ b = needle[max_suffix_rev + k];
+ if (b < a)
+ {
+ /* Suffix is smaller, period is entire prefix so far. */
+ j += k;
+ k = 1;
+ p = j - max_suffix_rev;
+ }
+ else if (a == b)
+ {
+ /* Advance through repetition of the current period. */
+ if (k != p)
+ ++k;
+ else
+ {
+ j += p;
+ k = 1;
+ }
+ }
+ else /* a < b */
+ {
+ /* Suffix is larger, start over from current location. */
+ max_suffix_rev = j++;
+ k = p = 1;
+ }
+ }
+
+ /* Choose the longer suffix. Return the first byte of the right
+ half, rather than the last byte of the left half. */
+ if (max_suffix_rev + 1 < max_suffix + 1)
+ return max_suffix + 1;
+ *period = p;
+ return max_suffix_rev + 1;
+}
+
+/* Return the first location of NEEDLE within HAYSTACK, or NULL. This
+ method requires 0 < NEEDLE_LEN <= HAYSTACK_LEN, and is optimized
+ for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD. Performance is linear,
+ with 2 * NEEDLE_LEN comparisons in preparation, and at most 2 *
+ HAYSTACK_LEN - NEEDLE_LEN comparisons in searching. */
+static void *
+two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
+ const unsigned char *needle, size_t needle_len)
+{
+ size_t i; /* Index into current byte of NEEDLE. */
+ size_t j; /* Index into current window of HAYSTACK. */
+ size_t period; /* The period of the right half of needle. */
+ size_t suffix; /* The index of the right half of needle. */
+
+ /* Factor the needle into two halves, such that the left half is
+ smaller than the global period, and the right half is
+ periodic (with a period as large as NEEDLE_LEN - suffix). */
+ suffix = critical_factorization (needle, needle_len, &period);
+
+ /* Perform the search. Each iteration compares the right half
+ first. */
+ if (memcmp (needle, needle + period, suffix) == 0)
+ {
+ /* Entire needle is periodic; a mismatch can only advance by the
+ period, so use memory to avoid rescanning known occurrences
+ of the period. */
+ size_t memory = 0;
+ j = 0;
+ while (j <= haystack_len - needle_len)
{
- /* Found a match of needle[0..j-1], mismatch at needle[j]. */
- rhaystack += table[j];
- j -= table[j];
+ /* Scan for matches in right half. */
+ i = MAX (suffix, memory);
+ while (i < needle_len && needle[i] == haystack[i + j])
+ ++i;
+ if (needle_len <= i)
+ {
+ /* Scan for matches in left half. */
+ i = suffix - 1;
+ while (memory < i + 1 && needle[i] == haystack[i + j])
+ --i;
+ if (i + 1 < memory + 1)
+ return (void *) (haystack + j);
+ /* No match, so remember how many repetitions of period
+ on the right half were scanned. */
+ j += period;
+ memory = needle_len - period;
+ }
+ else
+ {
+ j += i - suffix + 1;
+ memory = 0;
+ }
}
- else
+ }
+ else
+ {
+ /* The two halves of needle are distinct; no extra memory is
+ required, and any mismatch results in a maximal shift. */
+ period = MAX (suffix, needle_len - suffix) + 1;
+ j = 0;
+ while (j <= haystack_len - needle_len)
{
- /* Found a mismatch at needle[0] already. */
- rhaystack++;
- phaystack++;
+ /* Scan for matches in right half. */
+ i = suffix;
+ while (i < needle_len && needle[i] == haystack[i + j])
+ ++i;
+ if (needle_len <= i)
+ {
+ /* Scan for matches in left half. */
+ i = suffix - 1;
+ while (i != SIZE_MAX && needle[i] == haystack[i + j])
+ --i;
+ if (i == SIZE_MAX)
+ return (void *) (haystack + j);
+ j += period;
+ }
+ else
+ j += i - suffix + 1;
}
- }
+ }
+ return NULL;
+}
+
+/* Return the first location of NEEDLE within HAYSTACK, or NULL. This
+ method requires 0 < NEEDLE_LEN <= HAYSTACK_LEN, and is optimized
+ for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN. Performance is linear,
+ with 3 * NEEDLE_LEN + (1U << CHAR_BIT) operations in preparation,
+ and at most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons in searching.
+ The extra initialization cost allows for potential sublinear
+ performance O(HAYSTACK_LEN / NEEDLE_LEN). */
+static void *
+two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
+ const unsigned char *needle, size_t needle_len)
+{
+ size_t i; /* Index into current byte of NEEDLE. */
+ size_t j; /* Index into current window of HAYSTACK. */
+ size_t period; /* The period of the right half of needle. */
+ size_t suffix; /* The index of the right half of needle. */
+ size_t shift_table[1U << CHAR_BIT]; /* See below. */
+
+ /* Factor the needle into two halves, such that the left half is
+ smaller than the global period, and the right half is
+ periodic (with a period as large as NEEDLE_LEN - suffix). */
+ suffix = critical_factorization (needle, needle_len, &period);
- freea (table);
- return true;
+ /* Populate shift_table. For each possible byte value c,
+ shift_table[c] is the distance from the last occurrence of c to
+ the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE.
+ shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0. */
+ for (i = 0; i < 1U << CHAR_BIT; i++)
+ shift_table[i] = needle_len;
+ for (i = 0; i < needle_len; i++)
+ shift_table[needle[i]] = needle_len - i - 1;
+
+ /* Perform the search. Each iteration compares the right half
+ first. */
+ if (memcmp (needle, needle + period, suffix) == 0)
+ {
+ /* Entire needle is periodic; a mismatch can only advance by the
+ period, so use memory to avoid rescanning known occurrences
+ of the period. */
+ size_t memory = 0;
+ j = 0;
+ while (j <= haystack_len - needle_len)
+ {
+ /* Check the last byte first; if it does not match, then
+ shift to the next possible match location. */
+ size_t shift = shift_table[haystack[j + needle_len - 1]];
+ if (0 < shift)
+ {
+ if (memory && shift < period)
+ {
+ /* Since needle is periodic, but the last period has
+ a byte out of place, there can be no match until
+ after the mismatch. */
+ shift = needle_len - period;
+ memory = 0;
+ }
+ j += shift;
+ continue;
+ }
+ /* Scan for matches in right half. The last byte has
+ already been matched, by virtue of the shift table. */
+ i = MAX (suffix, memory);
+ while (i < needle_len - 1 && needle[i] == haystack[i + j])
+ ++i;
+ if (needle_len - 1 <= i)
+ {
+ /* Scan for matches in left half. */
+ i = suffix - 1;
+ while (memory < i + 1 && needle[i] == haystack[i + j])
+ --i;
+ if (i + 1 < memory + 1)
+ return (void *) (haystack + j);
+ /* No match, so remember how many repetitions of period
+ on the right half were scanned. */
+ j += period;
+ memory = needle_len - period;
+ }
+ else
+ {
+ j += i - suffix + 1;
+ memory = 0;
+ }
+ }
+ }
+ else
+ {
+ /* The two halves of needle are distinct; no extra memory is
+ required, and any mismatch results in a maximal shift. */
+ period = MAX (suffix, needle_len - suffix) + 1;
+ j = 0;
+ while (j <= haystack_len - needle_len)
+ {
+ /* Check the last byte first; if it does not match, then
+ shift to the next possible match location. */
+ size_t shift = shift_table[haystack[j + needle_len - 1]];
+ if (0 < shift)
+ {
+ j += shift;
+ continue;
+ }
+ /* Scan for matches in right half. The last byte has
+ already been matched, by virtue of the shift table. */
+ i = suffix;
+ while (i < needle_len - 1 && needle[i] == haystack[i + j])
+ ++i;
+ if (needle_len - 1 <= i)
+ {
+ /* Scan for matches in left half. */
+ i = suffix - 1;
+ while (i != SIZE_MAX && needle[i] == haystack[i + j])
+ --i;
+ if (i == SIZE_MAX)
+ return (void *) (haystack + j);
+ j += period;
+ }
+ else
+ j += i - suffix + 1;
+ }
+ }
+ return NULL;
}
/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK
not an array of 'char' values. See ISO C 99 section 6.2.6.1. */
const unsigned char *haystack = (const unsigned char *) haystack_start;
const unsigned char *needle = (const unsigned char *) needle_start;
- const unsigned char *last_haystack = haystack + haystack_len;
- const unsigned char *last_needle = needle + needle_len;
if (needle_len == 0)
/* The first occurrence of the empty string is deemed to occur at
if (__builtin_expect (haystack_len < needle_len, 0))
return NULL;
- /* Use optimizations in memchr when possible. */
- if (__builtin_expect (needle_len == 1, 0))
- return memchr (haystack, *needle, haystack_len);
-
- /* Minimizing the worst-case complexity:
- Let n = haystack_len, m = needle_len.
- The naïve algorithm is O(n*m) worst-case.
- The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
- memory allocation.
- To achieve linear complexity and yet amortize the cost of the
- memory allocation, we activate the Knuth-Morris-Pratt algorithm
- only once the naïve algorithm has already run for some time; more
- precisely, when
- - the outer loop count is >= 10,
- - the average number of comparisons per outer loop is >= 5,
- - the total number of comparisons is >= m.
- But we try it only once. If the memory allocation attempt failed,
- we don't retry it. */
- {
- bool try_kmp = true;
- size_t outer_loop_count = 0;
- size_t comparison_count = 0;
-
- /* Speed up the following searches of needle by caching its first
- byte. */
- unsigned char b = *needle++;
-
- for (;; haystack++)
- {
- if (haystack == last_haystack)
- /* No match. */
- return NULL;
-
- /* See whether it's advisable to use an asymptotically faster
- algorithm. */
- if (try_kmp
- && outer_loop_count >= 10
- && comparison_count >= 5 * outer_loop_count)
- {
- /* See if needle + comparison_count now reaches the end of
- needle. */
- if (comparison_count >= needle_len)
- {
- /* Try the Knuth-Morris-Pratt algorithm. */
- const unsigned char *result;
- if (knuth_morris_pratt (haystack, last_haystack,
- needle - 1, needle_len, &result))
- return (void *) result;
- try_kmp = false;
- }
- }
-
- outer_loop_count++;
- comparison_count++;
- if (*haystack == b)
- /* The first byte matches. */
- {
- const unsigned char *rhaystack = haystack + 1;
- const unsigned char *rneedle = needle;
-
- for (;; rhaystack++, rneedle++)
- {
- if (rneedle == last_needle)
- /* Found a match. */
- return (void *) haystack;
- if (rhaystack == last_haystack)
- /* No match. */
- return NULL;
- comparison_count++;
- if (*rhaystack != *rneedle)
- /* Nothing in this round. */
- break;
- }
- }
- }
- }
-
- return NULL;
+ /* Use optimizations in memchr when possible, to reduce the search
+ size of haystack using a linear algorithm with a smaller
+ coefficient. However, avoid memchr for long needles, since we
+ can often achieve sublinear performance. */
+ if (needle_len < LONG_NEEDLE_THRESHOLD)
+ {
+ haystack = memchr (haystack, *needle, haystack_len);
+ if (!haystack || __builtin_expect (needle_len == 1, 0))
+ return (void *) haystack;
+ haystack_len -= haystack - (const unsigned char *) haystack_start;
+ if (haystack_len < needle_len)
+ return NULL;
+ return two_way_short_needle (haystack, haystack_len, needle, needle_len);
+ }
+ else
+ return two_way_long_needle (haystack, haystack_len, needle, needle_len);
}
+
+#undef LONG_NEEDLE_THRESHOLD
+#undef MAX