- {
- /* Minimizing the worst-case complexity:
- Let n = mbslen(haystack), m = mbslen(needle).
- The naïve algorithm is O(n*m) worst-case.
- The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
- memory allocation.
- To achieve linear complexity and yet amortize the cost of the
- memory allocation, we activate the Knuth-Morris-Pratt algorithm
- only once the naïve algorithm has already run for some time; more
- precisely, when
- - the outer loop count is >= 10,
- - the average number of comparisons per outer loop is >= 5,
- - the total number of comparisons is >= m.
- But we try it only once. If the memory allocation attempt failed,
- we don't retry it. */
- bool try_kmp = true;
- size_t outer_loop_count = 0;
- size_t comparison_count = 0;
- size_t last_ccount = 0; /* last comparison count */
- mbui_iterator_t iter_needle_last_ccount; /* = needle + last_ccount */
-
- mbchar_t b;
- mbui_iterator_t iter_haystack;
-
- mbui_init (iter_needle_last_ccount, needle);
-
- mb_copy (&b, &mbui_cur (iter_needle));
- if (b.wc_valid)
- b.wc = towlower (b.wc);
-
- mbui_init (iter_haystack, haystack);
- for (;; mbui_advance (iter_haystack))
- {
- mbchar_t c;
-
- if (!mbui_avail (iter_haystack))
- /* No match. */
- return NULL;
-
- /* See whether it's advisable to use an asymptotically faster
- algorithm. */
- if (try_kmp
- && outer_loop_count >= 10
- && comparison_count >= 5 * outer_loop_count)
- {
- /* See if needle + comparison_count now reaches the end of
- needle. */
- size_t count = comparison_count - last_ccount;
- for (;
- count > 0 && mbui_avail (iter_needle_last_ccount);
- count--)
- mbui_advance (iter_needle_last_ccount);
- last_ccount = comparison_count;
- if (!mbui_avail (iter_needle_last_ccount))
- {
- /* Try the Knuth-Morris-Pratt algorithm. */
- const char *result;
- bool success =
- knuth_morris_pratt_multibyte (haystack, needle,
- &result);
- if (success)
- return (char *) result;
- try_kmp = false;
- }
- }
-
- outer_loop_count++;
- comparison_count++;
- mb_copy (&c, &mbui_cur (iter_haystack));
- if (c.wc_valid)
- c.wc = towlower (c.wc);
- if (mb_equal (c, b))
- /* The first character matches. */
- {
- mbui_iterator_t rhaystack;
- mbui_iterator_t rneedle;
-
- memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));
- mbui_advance (rhaystack);
-
- mbui_init (rneedle, needle);
- if (!mbui_avail (rneedle))
- abort ();
- mbui_advance (rneedle);
-
- for (;; mbui_advance (rhaystack), mbui_advance (rneedle))
- {
- if (!mbui_avail (rneedle))
- /* Found a match. */
- return (char *) mbui_cur_ptr (iter_haystack);
- if (!mbui_avail (rhaystack))
- /* No match. */
- return NULL;
- comparison_count++;
- if (!mb_caseequal (mbui_cur (rhaystack),
- mbui_cur (rneedle)))
- /* Nothing in this round. */
- break;
- }
- }
- }
- }
+ {
+ /* Minimizing the worst-case complexity:
+ Let n = mbslen(haystack), m = mbslen(needle).
+ The naïve algorithm is O(n*m) worst-case.
+ The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
+ memory allocation.
+ To achieve linear complexity and yet amortize the cost of the
+ memory allocation, we activate the Knuth-Morris-Pratt algorithm
+ only once the naïve algorithm has already run for some time; more
+ precisely, when
+ - the outer loop count is >= 10,
+ - the average number of comparisons per outer loop is >= 5,
+ - the total number of comparisons is >= m.
+ But we try it only once. If the memory allocation attempt failed,
+ we don't retry it. */
+ bool try_kmp = true;
+ size_t outer_loop_count = 0;
+ size_t comparison_count = 0;
+ size_t last_ccount = 0; /* last comparison count */
+ mbui_iterator_t iter_needle_last_ccount; /* = needle + last_ccount */
+
+ mbchar_t b;
+ mbui_iterator_t iter_haystack;
+
+ mbui_init (iter_needle_last_ccount, needle);
+
+ mb_copy (&b, &mbui_cur (iter_needle));
+ if (b.wc_valid)
+ b.wc = towlower (b.wc);
+
+ mbui_init (iter_haystack, haystack);
+ for (;; mbui_advance (iter_haystack))
+ {
+ mbchar_t c;
+
+ if (!mbui_avail (iter_haystack))
+ /* No match. */
+ return NULL;
+
+ /* See whether it's advisable to use an asymptotically faster
+ algorithm. */
+ if (try_kmp
+ && outer_loop_count >= 10
+ && comparison_count >= 5 * outer_loop_count)
+ {
+ /* See if needle + comparison_count now reaches the end of
+ needle. */
+ size_t count = comparison_count - last_ccount;
+ for (;
+ count > 0 && mbui_avail (iter_needle_last_ccount);
+ count--)
+ mbui_advance (iter_needle_last_ccount);
+ last_ccount = comparison_count;
+ if (!mbui_avail (iter_needle_last_ccount))
+ {
+ /* Try the Knuth-Morris-Pratt algorithm. */
+ const char *result;
+ bool success =
+ knuth_morris_pratt_multibyte (haystack, needle,
+ &result);
+ if (success)
+ return (char *) result;
+ try_kmp = false;
+ }
+ }
+
+ outer_loop_count++;
+ comparison_count++;
+ mb_copy (&c, &mbui_cur (iter_haystack));
+ if (c.wc_valid)
+ c.wc = towlower (c.wc);
+ if (mb_equal (c, b))
+ /* The first character matches. */
+ {
+ mbui_iterator_t rhaystack;
+ mbui_iterator_t rneedle;
+
+ memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));
+ mbui_advance (rhaystack);
+
+ mbui_init (rneedle, needle);
+ if (!mbui_avail (rneedle))
+ abort ();
+ mbui_advance (rneedle);
+
+ for (;; mbui_advance (rhaystack), mbui_advance (rneedle))
+ {
+ if (!mbui_avail (rneedle))
+ /* Found a match. */
+ return (char *) mbui_cur_ptr (iter_haystack);
+ if (!mbui_avail (rhaystack))
+ /* No match. */
+ return NULL;
+ comparison_count++;
+ if (!mb_caseequal (mbui_cur (rhaystack),
+ mbui_cur (rneedle)))
+ /* Nothing in this round. */
+ break;
+ }
+ }
+ }
+ }