From fffd5faca72521880531fdebe05675828fad0628 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 21 Jun 2010 16:16:44 -0600 Subject: [PATCH 1/1] memmem: slight optimization For any needle, the factorization 0/n has a local period of 1, so it is a critical factorization iff the entire needle consists only of a single repeated byte, in which case 1/n-1 would also be critical. Starting with a comparison of x[0] and x[1] in the maximal suffix check will either find the 0/n case or move on to something else, so we can optimize and start with the x[1] vs. x[2] case to begin with. To avoid out-of-bounds references, we must then special case needles of length two or less. However, for these cases, we can determine a critical factorization without any probes of the needle (we already require a non-empty needle; a 1-byte needle can factor as either 0/1 or 1/0 but the rest of our code assumes a non-empty suffix; and of the two 2-byte needle patterns, "aa" can factor as either 0/2 or 1/1 but with best performance for 1/1, and "ab" must be factored as 1/1). * lib/str-two-way.h (critical_factorization): Update comments. Reduce work during factorization phase. Reported by Carlos Bueno . Signed-off-by: Eric Blake --- ChangeLog | 7 ++++++ lib/str-two-way.h | 59 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/ChangeLog b/ChangeLog index 29c0096312..182d7b2aa3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2010-06-22 Eric Blake + + memmem: slight optimization + * lib/str-two-way.h (critical_factorization): Update comments. + Reduce work during factorization phase. + Reported by Carlos Bueno . + 2010-06-21 Bruno Haible Fix HAVE_CALLOC_POSIX misnomer. diff --git a/lib/str-two-way.h b/lib/str-two-way.h index 3f67741bb2..dbc2f889fb 100644 --- a/lib/str-two-way.h +++ b/lib/str-two-way.h @@ -95,26 +95,37 @@ A critical factorization has the property that the local period equals the global period. All strings have at least one critical factorization with the left half smaller than the global period. + And while some strings have more than one critical factorization, + it is provable that with an ordered alphabet, at least one of the + critical factorizations corresponds to a maximal suffix. Given an ordered alphabet, a critical factorization can be computed in linear time, with 2 * NEEDLE_LEN comparisons, by computing the - larger of two ordered maximal suffixes. The ordered maximal - suffixes are determined by lexicographic comparison of + shorter of two ordered maximal suffixes. The ordered maximal + suffixes are determined by lexicographic comparison while tracking periodicity. */ static size_t critical_factorization (const unsigned char *needle, size_t needle_len, size_t *period) { - /* Index of last byte of left half, or SIZE_MAX. */ + /* Index of last byte of left half. */ size_t max_suffix, max_suffix_rev; size_t j; /* Index into NEEDLE for current candidate suffix. */ size_t k; /* Offset into current period. */ size_t p; /* Intermediate period. */ unsigned char a, b; /* Current comparison bytes. */ + /* Special case NEEDLE_LEN of 1 or 2 (all callers already filtered + out 0-length needles. */ + if (needle_len < 3) + { + *period = 1; + return needle_len - 1; + } + /* Invariants: - 0 <= j < NEEDLE_LEN - 1 - -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed) + 1 <= j < NEEDLE_LEN - 1 + 0 <= max_suffix{,_rev} < j min(max_suffix, max_suffix_rev) < global period of NEEDLE 1 <= p <= global period of NEEDLE p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j] @@ -122,9 +133,8 @@ critical_factorization (const unsigned char *needle, size_t needle_len, */ /* Perform lexicographic search. */ - max_suffix = SIZE_MAX; - j = 0; - k = p = 1; + max_suffix = 0; + j = k = p = 1; while (j + k < needle_len) { a = CANON_ELEMENT (needle[j + k]); @@ -157,9 +167,8 @@ critical_factorization (const unsigned char *needle, size_t needle_len, *period = p; /* Perform reverse lexicographic search. */ - max_suffix_rev = SIZE_MAX; - j = 0; - k = p = 1; + max_suffix_rev = 0; + j = k = p = 1; while (j + k < needle_len) { a = CANON_ELEMENT (needle[j + k]); @@ -190,8 +199,20 @@ critical_factorization (const unsigned char *needle, size_t needle_len, } } - /* Choose the longer suffix. Return the first byte of the right - half, rather than the last byte of the left half. */ + /* Choose the shorter suffix. Return the index of the first byte of + the right half, rather than the last byte of the left half. + + For some examples, 'banana' has two critical factorizations, both + exposed by the two lexicographic extreme suffixes of 'anana' and + 'nana', where both suffixes have a period of 2. On the other + hand, with 'aab' and 'bba', both strings have a single critical + factorization of the last byte, with the suffix having a period + of 1. While the maximal lexicographic suffix of 'aab' is 'b', + the maximal lexicographic suffix of 'bba' is 'ba', which is not a + critical factorization. Conversely, the maximal reverse + lexicographic suffix of 'a' works for 'bba', but not 'ab' for + 'aab'. The shorter suffix of the two will always be a critical + factorization. */ if (max_suffix_rev + 1 < max_suffix + 1) return max_suffix + 1; *period = p; @@ -226,9 +247,9 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, first. */ if (CMP_FUNC (needle, needle + period, suffix) == 0) { - /* Entire needle is periodic; a mismatch can only advance by the - period, so use memory to avoid rescanning known occurrences - of the period. */ + /* Entire needle is periodic; a mismatch in the left half can + only advance by the period, so use memory to avoid rescanning + known occurrences of the period in the right half. */ size_t memory = 0; j = 0; while (AVAILABLE (haystack, haystack_len, j, needle_len)) @@ -330,9 +351,9 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len, first. */ if (CMP_FUNC (needle, needle + period, suffix) == 0) { - /* Entire needle is periodic; a mismatch can only advance by the - period, so use memory to avoid rescanning known occurrences - of the period. */ + /* Entire needle is periodic; a mismatch in the left half can + only advance by the period, so use memory to avoid rescanning + known occurrences of the period in the right half. */ size_t memory = 0; size_t shift; j = 0; -- 2.30.2