1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write to the Free Software Foundation,
19 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21 static void re_string_construct_common (const char *str, Idx len,
23 RE_TRANSLATE_TYPE trans, bool icase,
24 const re_dfa_t *dfa) internal_function;
25 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
26 const re_node_set *nodes,
27 re_hashval_t hash) internal_function;
28 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
29 const re_node_set *nodes,
31 re_hashval_t hash) internal_function;
33 /* Functions for string operation. */
35 /* This function allocate the buffers. It is necessary to call
36 re_string_reconstruct before using the object. */
40 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
41 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
46 /* Ensure at least one character fits into the buffers. */
47 if (init_len < dfa->mb_cur_max)
48 init_len = dfa->mb_cur_max;
49 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
50 re_string_construct_common (str, len, pstr, trans, icase, dfa);
52 ret = re_string_realloc_buffers (pstr, init_buf_len);
53 if (BE (ret != REG_NOERROR, 0))
56 pstr->word_char = dfa->word_char;
57 pstr->word_ops_used = dfa->word_ops_used;
58 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
59 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
60 pstr->valid_raw_len = pstr->valid_len;
64 /* This function allocate the buffers, and initialize them. */
68 re_string_construct (re_string_t *pstr, const char *str, Idx len,
69 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
72 memset (pstr, '\0', sizeof (re_string_t));
73 re_string_construct_common (str, len, pstr, trans, icase, dfa);
77 ret = re_string_realloc_buffers (pstr, len + 1);
78 if (BE (ret != REG_NOERROR, 0))
81 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
86 if (dfa->mb_cur_max > 1)
90 ret = build_wcs_upper_buffer (pstr);
91 if (BE (ret != REG_NOERROR, 0))
93 if (pstr->valid_raw_len >= len)
95 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
97 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
98 if (BE (ret != REG_NOERROR, 0))
103 #endif /* RE_ENABLE_I18N */
104 build_upper_buffer (pstr);
108 #ifdef RE_ENABLE_I18N
109 if (dfa->mb_cur_max > 1)
110 build_wcs_buffer (pstr);
112 #endif /* RE_ENABLE_I18N */
115 re_string_translate_buffer (pstr);
118 pstr->valid_len = pstr->bufs_len;
119 pstr->valid_raw_len = pstr->bufs_len;
127 /* Helper functions for re_string_allocate, and re_string_construct. */
131 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
133 #ifdef RE_ENABLE_I18N
134 if (pstr->mb_cur_max > 1)
138 /* Avoid overflow. */
139 size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
140 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
143 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
144 if (BE (new_wcs == NULL, 0))
147 if (pstr->offsets != NULL)
149 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
150 if (BE (new_offsets == NULL, 0))
152 pstr->offsets = new_offsets;
155 #endif /* RE_ENABLE_I18N */
156 if (pstr->mbs_allocated)
158 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
160 if (BE (new_mbs == NULL, 0))
164 pstr->bufs_len = new_buf_len;
171 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
172 RE_TRANSLATE_TYPE trans, bool icase,
175 pstr->raw_mbs = (const unsigned char *) str;
180 pstr->mbs_allocated = (trans != NULL || icase);
181 pstr->mb_cur_max = dfa->mb_cur_max;
182 pstr->is_utf8 = dfa->is_utf8;
183 pstr->map_notascii = dfa->map_notascii;
184 pstr->stop = pstr->len;
185 pstr->raw_stop = pstr->stop;
188 #ifdef RE_ENABLE_I18N
190 /* Build wide character buffer PSTR->WCS.
191 If the byte sequence of the string are:
192 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
193 Then wide character buffer will be:
194 <wc1> , WEOF , <wc2> , WEOF , <wc3>
195 We use WEOF for padding, they indicate that the position isn't
196 a first byte of a multibyte character.
198 Note that this function assumes PSTR->VALID_LEN elements are already
199 built and starts from PSTR->VALID_LEN. */
203 build_wcs_buffer (re_string_t *pstr)
206 unsigned char buf[MB_LEN_MAX];
207 assert (MB_LEN_MAX >= pstr->mb_cur_max);
209 unsigned char buf[64];
212 Idx byte_idx, end_idx, remain_len;
215 /* Build the buffers from pstr->valid_len to either pstr->len or
217 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
218 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
223 remain_len = end_idx - byte_idx;
224 prev_st = pstr->cur_state;
225 /* Apply the translation if we need. */
226 if (BE (pstr->trans != NULL, 0))
230 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
232 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
233 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
235 p = (const char *) buf;
238 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
239 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
240 if (BE (mbclen == (size_t) -2, 0))
242 /* The buffer doesn't have enough space, finish to build. */
243 pstr->cur_state = prev_st;
246 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
248 /* We treat these cases as a singlebyte character. */
250 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
251 if (BE (pstr->trans != NULL, 0))
252 wc = pstr->trans[wc];
253 pstr->cur_state = prev_st;
256 /* Write wide character and padding. */
257 pstr->wcs[byte_idx++] = wc;
258 /* Write paddings. */
259 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
260 pstr->wcs[byte_idx++] = WEOF;
262 pstr->valid_len = byte_idx;
263 pstr->valid_raw_len = byte_idx;
266 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
267 but for REG_ICASE. */
271 build_wcs_upper_buffer (re_string_t *pstr)
274 Idx src_idx, byte_idx, end_idx, remain_len;
277 char buf[MB_LEN_MAX];
278 assert (MB_LEN_MAX >= pstr->mb_cur_max);
283 byte_idx = pstr->valid_len;
284 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
286 /* The following optimization assumes that ASCII characters can be
287 mapped to wide characters with a simple cast. */
288 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
290 while (byte_idx < end_idx)
294 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
295 && mbsinit (&pstr->cur_state))
297 /* In case of a singlebyte character. */
299 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
300 /* The next step uses the assumption that wchar_t is encoded
301 ASCII-safe: all ASCII values can be converted like this. */
302 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
307 remain_len = end_idx - byte_idx;
308 prev_st = pstr->cur_state;
309 mbclen = __mbrtowc (&wc,
310 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
311 + byte_idx), remain_len, &pstr->cur_state);
312 if (BE (mbclen < (size_t) -2, 1))
320 mbcdlen = wcrtomb (buf, wcu, &prev_st);
321 if (BE (mbclen == mbcdlen, 1))
322 memcpy (pstr->mbs + byte_idx, buf, mbclen);
330 memcpy (pstr->mbs + byte_idx,
331 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
332 pstr->wcs[byte_idx++] = wcu;
333 /* Write paddings. */
334 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
335 pstr->wcs[byte_idx++] = WEOF;
337 else if (mbclen == (size_t) -1 || mbclen == 0)
339 /* It is an invalid character or '\0'. Just use the byte. */
340 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
341 pstr->mbs[byte_idx] = ch;
342 /* And also cast it to wide char. */
343 pstr->wcs[byte_idx++] = (wchar_t) ch;
344 if (BE (mbclen == (size_t) -1, 0))
345 pstr->cur_state = prev_st;
349 /* The buffer doesn't have enough space, finish to build. */
350 pstr->cur_state = prev_st;
354 pstr->valid_len = byte_idx;
355 pstr->valid_raw_len = byte_idx;
359 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
364 remain_len = end_idx - byte_idx;
365 prev_st = pstr->cur_state;
366 if (BE (pstr->trans != NULL, 0))
370 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
372 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
373 buf[i] = pstr->trans[ch];
375 p = (const char *) buf;
378 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
379 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
380 if (BE (mbclen < (size_t) -2, 1))
388 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
389 if (BE (mbclen == mbcdlen, 1))
390 memcpy (pstr->mbs + byte_idx, buf, mbclen);
391 else if (mbcdlen != (size_t) -1)
395 if (byte_idx + mbcdlen > pstr->bufs_len)
397 pstr->cur_state = prev_st;
401 if (pstr->offsets == NULL)
403 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
405 if (pstr->offsets == NULL)
408 if (!pstr->offsets_needed)
410 for (i = 0; i < (size_t) byte_idx; ++i)
411 pstr->offsets[i] = i;
412 pstr->offsets_needed = 1;
415 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
416 pstr->wcs[byte_idx] = wcu;
417 pstr->offsets[byte_idx] = src_idx;
418 for (i = 1; i < mbcdlen; ++i)
420 pstr->offsets[byte_idx + i]
421 = src_idx + (i < mbclen ? i : mbclen - 1);
422 pstr->wcs[byte_idx + i] = WEOF;
424 pstr->len += mbcdlen - mbclen;
425 if (pstr->raw_stop > src_idx)
426 pstr->stop += mbcdlen - mbclen;
427 end_idx = (pstr->bufs_len > pstr->len)
428 ? pstr->len : pstr->bufs_len;
434 memcpy (pstr->mbs + byte_idx, p, mbclen);
437 memcpy (pstr->mbs + byte_idx, p, mbclen);
439 if (BE (pstr->offsets_needed != 0, 0))
442 for (i = 0; i < mbclen; ++i)
443 pstr->offsets[byte_idx + i] = src_idx + i;
447 pstr->wcs[byte_idx++] = wcu;
448 /* Write paddings. */
449 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
450 pstr->wcs[byte_idx++] = WEOF;
452 else if (mbclen == (size_t) -1 || mbclen == 0)
454 /* It is an invalid character or '\0'. Just use the byte. */
455 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
457 if (BE (pstr->trans != NULL, 0))
458 ch = pstr->trans [ch];
459 pstr->mbs[byte_idx] = ch;
461 if (BE (pstr->offsets_needed != 0, 0))
462 pstr->offsets[byte_idx] = src_idx;
465 /* And also cast it to wide char. */
466 pstr->wcs[byte_idx++] = (wchar_t) ch;
467 if (BE (mbclen == (size_t) -1, 0))
468 pstr->cur_state = prev_st;
472 /* The buffer doesn't have enough space, finish to build. */
473 pstr->cur_state = prev_st;
477 pstr->valid_len = byte_idx;
478 pstr->valid_raw_len = src_idx;
482 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
487 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
494 /* Skip the characters which are not necessary to check. */
495 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
496 rawbuf_idx < new_raw_idx;)
500 remain_len = pstr->len - rawbuf_idx;
501 prev_st = pstr->cur_state;
502 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
503 remain_len, &pstr->cur_state);
504 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
506 /* We treat these cases as a single byte character. */
507 if (mbclen == 0 || remain_len == 0)
510 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
512 pstr->cur_state = prev_st;
516 /* Then proceed the next character. */
517 rawbuf_idx += mbclen;
522 #endif /* RE_ENABLE_I18N */
524 /* Build the buffer PSTR->MBS, and apply the translation if we need.
525 This function is used in case of REG_ICASE. */
529 build_upper_buffer (re_string_t *pstr)
531 Idx char_idx, end_idx;
532 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
534 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
536 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
537 if (BE (pstr->trans != NULL, 0))
538 ch = pstr->trans[ch];
540 pstr->mbs[char_idx] = toupper (ch);
542 pstr->mbs[char_idx] = ch;
544 pstr->valid_len = char_idx;
545 pstr->valid_raw_len = char_idx;
548 /* Apply TRANS to the buffer in PSTR. */
552 re_string_translate_buffer (re_string_t *pstr)
554 Idx buf_idx, end_idx;
555 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
557 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
559 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
560 pstr->mbs[buf_idx] = pstr->trans[ch];
563 pstr->valid_len = buf_idx;
564 pstr->valid_raw_len = buf_idx;
567 /* This function re-construct the buffers.
568 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
569 convert to upper case in case of REG_ICASE, apply translation. */
573 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
577 if (BE (pstr->raw_mbs_idx <= idx, 0))
578 offset = idx - pstr->raw_mbs_idx;
582 #ifdef RE_ENABLE_I18N
583 if (pstr->mb_cur_max > 1)
584 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
585 #endif /* RE_ENABLE_I18N */
586 pstr->len = pstr->raw_len;
587 pstr->stop = pstr->raw_stop;
589 pstr->raw_mbs_idx = 0;
590 pstr->valid_raw_len = 0;
591 pstr->offsets_needed = 0;
592 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
593 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
594 if (!pstr->mbs_allocated)
595 pstr->mbs = (unsigned char *) pstr->raw_mbs;
599 if (BE (offset != 0, 1))
601 /* Should the already checked characters be kept? */
602 if (BE (offset < pstr->valid_raw_len, 1))
604 /* Yes, move them to the front of the buffer. */
605 #ifdef RE_ENABLE_I18N
606 if (BE (pstr->offsets_needed, 0))
608 Idx low = 0, high = pstr->valid_len, mid;
611 mid = (high + low) / 2;
612 if (pstr->offsets[mid] > offset)
614 else if (pstr->offsets[mid] < offset)
620 if (pstr->offsets[mid] < offset)
622 pstr->tip_context = re_string_context_at (pstr, mid - 1,
624 /* This can be quite complicated, so handle specially
625 only the common and easy case where the character with
626 different length representation of lower and upper
627 case is present at or after offset. */
628 if (pstr->valid_len > offset
629 && mid == offset && pstr->offsets[mid] == offset)
631 memmove (pstr->wcs, pstr->wcs + offset,
632 (pstr->valid_len - offset) * sizeof (wint_t));
633 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
634 pstr->valid_len -= offset;
635 pstr->valid_raw_len -= offset;
636 for (low = 0; low < pstr->valid_len; low++)
637 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
641 /* Otherwise, just find out how long the partial multibyte
642 character at offset is and fill it with WEOF/255. */
643 pstr->len = pstr->raw_len - idx + offset;
644 pstr->stop = pstr->raw_stop - idx + offset;
645 pstr->offsets_needed = 0;
646 while (mid > 0 && pstr->offsets[mid - 1] == offset)
648 while (mid < pstr->valid_len)
649 if (pstr->wcs[mid] != WEOF)
653 if (mid == pstr->valid_len)
657 pstr->valid_len = pstr->offsets[mid] - offset;
660 for (low = 0; low < pstr->valid_len; ++low)
661 pstr->wcs[low] = WEOF;
662 memset (pstr->mbs, 255, pstr->valid_len);
665 pstr->valid_raw_len = pstr->valid_len;
671 pstr->tip_context = re_string_context_at (pstr, offset - 1,
673 #ifdef RE_ENABLE_I18N
674 if (pstr->mb_cur_max > 1)
675 memmove (pstr->wcs, pstr->wcs + offset,
676 (pstr->valid_len - offset) * sizeof (wint_t));
677 #endif /* RE_ENABLE_I18N */
678 if (BE (pstr->mbs_allocated, 0))
679 memmove (pstr->mbs, pstr->mbs + offset,
680 pstr->valid_len - offset);
681 pstr->valid_len -= offset;
682 pstr->valid_raw_len -= offset;
684 assert (pstr->valid_len > 0);
690 #ifdef RE_ENABLE_I18N
691 /* No, skip all characters until IDX. */
692 Idx prev_valid_len = pstr->valid_len;
694 if (BE (pstr->offsets_needed, 0))
696 pstr->len = pstr->raw_len - idx + offset;
697 pstr->stop = pstr->raw_stop - idx + offset;
698 pstr->offsets_needed = 0;
702 #ifdef RE_ENABLE_I18N
703 if (pstr->mb_cur_max > 1)
710 const unsigned char *raw, *p, *end;
712 /* Special case UTF-8. Multi-byte chars start with any
713 byte other than 0x80 - 0xbf. */
714 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
715 end = raw + (offset - pstr->mb_cur_max);
716 if (end < pstr->raw_mbs)
718 p = raw + offset - 1;
720 /* We know the wchar_t encoding is UCS4, so for the simple
721 case, ASCII characters, skip the conversion step. */
722 if (isascii (*p) && BE (pstr->trans == NULL, 1))
724 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
725 /* pstr->valid_len = 0; */
730 for (; p >= end; --p)
731 if ((*p & 0xc0) != 0x80)
735 Idx mlen = raw + pstr->len - p;
736 unsigned char buf[6];
739 if (BE (pstr->trans != NULL, 0))
741 int i = mlen < 6 ? mlen : 6;
743 buf[i] = pstr->trans[p[i]];
745 /* XXX Don't use mbrtowc, we know which conversion
746 to use (UTF-8 -> UCS4). */
747 memset (&cur_state, 0, sizeof (cur_state));
748 mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
750 if (raw + offset - p <= mbclen
751 && mbclen < (size_t) -2)
753 memset (&pstr->cur_state, '\0',
755 pstr->valid_len = mbclen - (raw + offset - p);
763 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
766 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
768 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
769 && IS_WIDE_WORD_CHAR (wc))
771 : ((IS_WIDE_NEWLINE (wc)
772 && pstr->newline_anchor)
773 ? CONTEXT_NEWLINE : 0));
774 if (BE (pstr->valid_len, 0))
776 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
777 pstr->wcs[wcs_idx] = WEOF;
778 if (pstr->mbs_allocated)
779 memset (pstr->mbs, 255, pstr->valid_len);
781 pstr->valid_raw_len = pstr->valid_len;
784 #endif /* RE_ENABLE_I18N */
786 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
787 pstr->valid_raw_len = 0;
790 pstr->tip_context = (bitset_contain (pstr->word_char, c)
792 : ((IS_NEWLINE (c) && pstr->newline_anchor)
793 ? CONTEXT_NEWLINE : 0));
796 if (!BE (pstr->mbs_allocated, 0))
799 pstr->raw_mbs_idx = idx;
801 pstr->stop -= offset;
803 /* Then build the buffers. */
804 #ifdef RE_ENABLE_I18N
805 if (pstr->mb_cur_max > 1)
809 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
810 if (BE (ret != REG_NOERROR, 0))
814 build_wcs_buffer (pstr);
817 #endif /* RE_ENABLE_I18N */
818 if (BE (pstr->mbs_allocated, 0))
821 build_upper_buffer (pstr);
822 else if (pstr->trans != NULL)
823 re_string_translate_buffer (pstr);
826 pstr->valid_len = pstr->len;
833 internal_function __attribute ((pure))
834 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
839 /* Handle the common (easiest) cases first. */
840 if (BE (!pstr->mbs_allocated, 1))
841 return re_string_peek_byte (pstr, idx);
843 #ifdef RE_ENABLE_I18N
844 if (pstr->mb_cur_max > 1
845 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
846 return re_string_peek_byte (pstr, idx);
849 off = pstr->cur_idx + idx;
850 #ifdef RE_ENABLE_I18N
851 if (pstr->offsets_needed)
852 off = pstr->offsets[off];
855 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
857 #ifdef RE_ENABLE_I18N
858 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
859 this function returns CAPITAL LETTER I instead of first byte of
860 DOTLESS SMALL LETTER I. The latter would confuse the parser,
861 since peek_byte_case doesn't advance cur_idx in any way. */
862 if (pstr->offsets_needed && !isascii (ch))
863 return re_string_peek_byte (pstr, idx);
870 internal_function __attribute ((pure))
871 re_string_fetch_byte_case (re_string_t *pstr)
873 if (BE (!pstr->mbs_allocated, 1))
874 return re_string_fetch_byte (pstr);
876 #ifdef RE_ENABLE_I18N
877 if (pstr->offsets_needed)
882 /* For tr_TR.UTF-8 [[:islower:]] there is
883 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
884 in that case the whole multi-byte character and return
885 the original letter. On the other side, with
886 [[: DOTLESS SMALL LETTER I return [[:I, as doing
887 anything else would complicate things too much. */
889 if (!re_string_first_byte (pstr, pstr->cur_idx))
890 return re_string_fetch_byte (pstr);
892 off = pstr->offsets[pstr->cur_idx];
893 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
896 return re_string_fetch_byte (pstr);
898 re_string_skip_bytes (pstr,
899 re_string_char_size_at (pstr, pstr->cur_idx));
904 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
909 re_string_destruct (re_string_t *pstr)
911 #ifdef RE_ENABLE_I18N
913 re_free (pstr->offsets);
914 #endif /* RE_ENABLE_I18N */
915 if (pstr->mbs_allocated)
919 /* Return the context at IDX in INPUT. */
923 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
926 if (BE (! REG_VALID_INDEX (idx), 0))
927 /* In this case, we use the value stored in input->tip_context,
928 since we can't know the character in input->mbs[-1] here. */
929 return input->tip_context;
930 if (BE (idx == input->len, 0))
931 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
932 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
933 #ifdef RE_ENABLE_I18N
934 if (input->mb_cur_max > 1)
938 while(input->wcs[wc_idx] == WEOF)
941 /* It must not happen. */
942 assert (REG_VALID_INDEX (wc_idx));
945 if (! REG_VALID_INDEX (wc_idx))
946 return input->tip_context;
948 wc = input->wcs[wc_idx];
949 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
951 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
952 ? CONTEXT_NEWLINE : 0);
957 c = re_string_byte_at (input, idx);
958 if (bitset_contain (input->word_char, c))
960 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
964 /* Functions for set operation. */
968 re_node_set_alloc (re_node_set *set, Idx size)
972 set->elems = re_malloc (Idx, size);
973 if (BE (set->elems == NULL, 0))
980 re_node_set_init_1 (re_node_set *set, Idx elem)
984 set->elems = re_malloc (Idx, 1);
985 if (BE (set->elems == NULL, 0))
987 set->alloc = set->nelem = 0;
990 set->elems[0] = elem;
996 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
999 set->elems = re_malloc (Idx, 2);
1000 if (BE (set->elems == NULL, 0))
1005 set->elems[0] = elem1;
1012 set->elems[0] = elem1;
1013 set->elems[1] = elem2;
1017 set->elems[0] = elem2;
1018 set->elems[1] = elem1;
1024 static reg_errcode_t
1026 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1028 dest->nelem = src->nelem;
1031 dest->alloc = dest->nelem;
1032 dest->elems = re_malloc (Idx, dest->alloc);
1033 if (BE (dest->elems == NULL, 0))
1035 dest->alloc = dest->nelem = 0;
1038 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1041 re_node_set_init_empty (dest);
1045 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1046 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1047 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1049 static reg_errcode_t
1051 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1052 const re_node_set *src2)
1054 Idx i1, i2, is, id, delta, sbase;
1055 if (src1->nelem == 0 || src2->nelem == 0)
1058 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1059 conservative estimate. */
1060 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1062 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1063 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1064 if (BE (new_elems == NULL, 0))
1066 dest->elems = new_elems;
1067 dest->alloc = new_alloc;
1070 /* Find the items in the intersection of SRC1 and SRC2, and copy
1071 into the top of DEST those that are not already in DEST itself. */
1072 sbase = dest->nelem + src1->nelem + src2->nelem;
1073 i1 = src1->nelem - 1;
1074 i2 = src2->nelem - 1;
1075 id = dest->nelem - 1;
1078 if (src1->elems[i1] == src2->elems[i2])
1080 /* Try to find the item in DEST. Maybe we could binary search? */
1081 while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1084 if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1085 dest->elems[--sbase] = src1->elems[i1];
1087 if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1091 /* Lower the highest of the two items. */
1092 else if (src1->elems[i1] < src2->elems[i2])
1094 if (! REG_VALID_INDEX (--i2))
1099 if (! REG_VALID_INDEX (--i1))
1104 id = dest->nelem - 1;
1105 is = dest->nelem + src1->nelem + src2->nelem - 1;
1106 delta = is - sbase + 1;
1108 /* Now copy. When DELTA becomes zero, the remaining
1109 DEST elements are already in place; this is more or
1110 less the same loop that is in re_node_set_merge. */
1111 dest->nelem += delta;
1112 if (delta > 0 && REG_VALID_INDEX (id))
1115 if (dest->elems[is] > dest->elems[id])
1117 /* Copy from the top. */
1118 dest->elems[id + delta--] = dest->elems[is--];
1124 /* Slide from the bottom. */
1125 dest->elems[id + delta] = dest->elems[id];
1126 if (! REG_VALID_INDEX (--id))
1131 /* Copy remaining SRC elements. */
1132 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1137 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1138 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1140 static reg_errcode_t
1142 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1143 const re_node_set *src2)
1146 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1148 dest->alloc = src1->nelem + src2->nelem;
1149 dest->elems = re_malloc (Idx, dest->alloc);
1150 if (BE (dest->elems == NULL, 0))
1155 if (src1 != NULL && src1->nelem > 0)
1156 return re_node_set_init_copy (dest, src1);
1157 else if (src2 != NULL && src2->nelem > 0)
1158 return re_node_set_init_copy (dest, src2);
1160 re_node_set_init_empty (dest);
1163 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1165 if (src1->elems[i1] > src2->elems[i2])
1167 dest->elems[id++] = src2->elems[i2++];
1170 if (src1->elems[i1] == src2->elems[i2])
1172 dest->elems[id++] = src1->elems[i1++];
1174 if (i1 < src1->nelem)
1176 memcpy (dest->elems + id, src1->elems + i1,
1177 (src1->nelem - i1) * sizeof (Idx));
1178 id += src1->nelem - i1;
1180 else if (i2 < src2->nelem)
1182 memcpy (dest->elems + id, src2->elems + i2,
1183 (src2->nelem - i2) * sizeof (Idx));
1184 id += src2->nelem - i2;
1190 /* Calculate the union set of the sets DEST and SRC. And store it to
1191 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1193 static reg_errcode_t
1195 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1197 Idx is, id, sbase, delta;
1198 if (src == NULL || src->nelem == 0)
1200 if (dest->alloc < 2 * src->nelem + dest->nelem)
1202 Idx new_alloc = 2 * (src->nelem + dest->alloc);
1203 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1204 if (BE (new_buffer == NULL, 0))
1206 dest->elems = new_buffer;
1207 dest->alloc = new_alloc;
1210 if (BE (dest->nelem == 0, 0))
1212 dest->nelem = src->nelem;
1213 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1217 /* Copy into the top of DEST the items of SRC that are not
1218 found in DEST. Maybe we could binary search in DEST? */
1219 for (sbase = dest->nelem + 2 * src->nelem,
1220 is = src->nelem - 1, id = dest->nelem - 1;
1221 REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1223 if (dest->elems[id] == src->elems[is])
1225 else if (dest->elems[id] < src->elems[is])
1226 dest->elems[--sbase] = src->elems[is--];
1227 else /* if (dest->elems[id] > src->elems[is]) */
1231 if (REG_VALID_INDEX (is))
1233 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1235 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1238 id = dest->nelem - 1;
1239 is = dest->nelem + 2 * src->nelem - 1;
1240 delta = is - sbase + 1;
1244 /* Now copy. When DELTA becomes zero, the remaining
1245 DEST elements are already in place. */
1246 dest->nelem += delta;
1249 if (dest->elems[is] > dest->elems[id])
1251 /* Copy from the top. */
1252 dest->elems[id + delta--] = dest->elems[is--];
1258 /* Slide from the bottom. */
1259 dest->elems[id + delta] = dest->elems[id];
1260 if (! REG_VALID_INDEX (--id))
1262 /* Copy remaining SRC elements. */
1263 memcpy (dest->elems, dest->elems + sbase,
1264 delta * sizeof (Idx));
1273 /* Insert the new element ELEM to the re_node_set* SET.
1274 SET should not already have ELEM.
1275 Return true if successful. */
1279 re_node_set_insert (re_node_set *set, Idx elem)
1282 /* In case the set is empty. */
1283 if (set->alloc == 0)
1284 return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1286 if (BE (set->nelem, 0) == 0)
1288 /* We already guaranteed above that set->alloc != 0. */
1289 set->elems[0] = elem;
1294 /* Realloc if we need. */
1295 if (set->alloc == set->nelem)
1298 set->alloc = set->alloc * 2;
1299 new_elems = re_realloc (set->elems, Idx, set->alloc);
1300 if (BE (new_elems == NULL, 0))
1302 set->elems = new_elems;
1305 /* Move the elements which follows the new element. Test the
1306 first element separately to skip a check in the inner loop. */
1307 if (elem < set->elems[0])
1310 for (idx = set->nelem; idx > 0; idx--)
1311 set->elems[idx] = set->elems[idx - 1];
1315 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1316 set->elems[idx] = set->elems[idx - 1];
1319 /* Insert the new element. */
1320 set->elems[idx] = elem;
1325 /* Insert the new element ELEM to the re_node_set* SET.
1326 SET should not already have any element greater than or equal to ELEM.
1327 Return true if successful. */
1331 re_node_set_insert_last (re_node_set *set, Idx elem)
1333 /* Realloc if we need. */
1334 if (set->alloc == set->nelem)
1337 set->alloc = (set->alloc + 1) * 2;
1338 new_elems = re_realloc (set->elems, Idx, set->alloc);
1339 if (BE (new_elems == NULL, 0))
1341 set->elems = new_elems;
1344 /* Insert the new element. */
1345 set->elems[set->nelem++] = elem;
1349 /* Compare two node sets SET1 and SET2.
1350 Return true if SET1 and SET2 are equivalent. */
1353 internal_function __attribute ((pure))
1354 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1357 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1359 for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1360 if (set1->elems[i] != set2->elems[i])
1365 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1368 internal_function __attribute ((pure))
1369 re_node_set_contains (const re_node_set *set, Idx elem)
1371 __re_size_t idx, right, mid;
1372 if (! REG_VALID_NONZERO_INDEX (set->nelem))
1375 /* Binary search the element. */
1377 right = set->nelem - 1;
1380 mid = (idx + right) / 2;
1381 if (set->elems[mid] < elem)
1386 return set->elems[idx] == elem ? idx + 1 : 0;
1391 re_node_set_remove_at (re_node_set *set, Idx idx)
1393 if (idx < 0 || idx >= set->nelem)
1396 for (; idx < set->nelem; idx++)
1397 set->elems[idx] = set->elems[idx + 1];
1401 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1402 Or return REG_MISSING if an error occurred. */
1406 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1408 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1410 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1411 Idx *new_nexts, *new_indices;
1412 re_node_set *new_edests, *new_eclosures;
1413 re_token_t *new_nodes;
1414 size_t max_object_size =
1415 MAX (sizeof (re_token_t),
1416 MAX (sizeof (re_node_set),
1419 /* Avoid overflows. */
1420 if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
1423 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1424 if (BE (new_nodes == NULL, 0))
1426 dfa->nodes = new_nodes;
1427 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1428 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1429 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1430 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1431 if (BE (new_nexts == NULL || new_indices == NULL
1432 || new_edests == NULL || new_eclosures == NULL, 0))
1434 dfa->nexts = new_nexts;
1435 dfa->org_indices = new_indices;
1436 dfa->edests = new_edests;
1437 dfa->eclosures = new_eclosures;
1438 dfa->nodes_alloc = new_nodes_alloc;
1440 dfa->nodes[dfa->nodes_len] = token;
1441 dfa->nodes[dfa->nodes_len].constraint = 0;
1442 #ifdef RE_ENABLE_I18N
1444 int type = token.type;
1445 dfa->nodes[dfa->nodes_len].accept_mb =
1446 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1449 dfa->nexts[dfa->nodes_len] = REG_MISSING;
1450 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1451 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1452 return dfa->nodes_len++;
1455 static inline re_hashval_t
1457 calc_state_hash (const re_node_set *nodes, unsigned int context)
1459 re_hashval_t hash = nodes->nelem + context;
1461 for (i = 0 ; i < nodes->nelem ; i++)
1462 hash += nodes->elems[i];
1466 /* Search for the state whose node_set is equivalent to NODES.
1467 Return the pointer to the state, if we found it in the DFA.
1468 Otherwise create the new one and return it. In case of an error
1469 return NULL and set the error code in ERR.
1470 Note: - We assume NULL as the invalid state, then it is possible that
1471 return value is NULL and ERR is REG_NOERROR.
1472 - We never return non-NULL value in case of any errors, it is for
1475 static re_dfastate_t *
1477 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1478 const re_node_set *nodes)
1481 re_dfastate_t *new_state;
1482 struct re_state_table_entry *spot;
1485 /* Suppress bogus uninitialized-variable warnings. */
1488 if (BE (nodes->nelem == 0, 0))
1493 hash = calc_state_hash (nodes, 0);
1494 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1496 for (i = 0 ; i < spot->num ; i++)
1498 re_dfastate_t *state = spot->array[i];
1499 if (hash != state->hash)
1501 if (re_node_set_compare (&state->nodes, nodes))
1505 /* There are no appropriate state in the dfa, create the new one. */
1506 new_state = create_ci_newstate (dfa, nodes, hash);
1507 if (BE (new_state == NULL, 0))
1513 /* Search for the state whose node_set is equivalent to NODES and
1514 whose context is equivalent to CONTEXT.
1515 Return the pointer to the state, if we found it in the DFA.
1516 Otherwise create the new one and return it. In case of an error
1517 return NULL and set the error code in ERR.
1518 Note: - We assume NULL as the invalid state, then it is possible that
1519 return value is NULL and ERR is REG_NOERROR.
1520 - We never return non-NULL value in case of any errors, it is for
1523 static re_dfastate_t *
1525 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1526 const re_node_set *nodes, unsigned int context)
1529 re_dfastate_t *new_state;
1530 struct re_state_table_entry *spot;
1533 /* Suppress bogus uninitialized-variable warnings. */
1536 if (nodes->nelem == 0)
1541 hash = calc_state_hash (nodes, context);
1542 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1544 for (i = 0 ; i < spot->num ; i++)
1546 re_dfastate_t *state = spot->array[i];
1547 if (state->hash == hash
1548 && state->context == context
1549 && re_node_set_compare (state->entrance_nodes, nodes))
1552 /* There are no appropriate state in `dfa', create the new one. */
1553 new_state = create_cd_newstate (dfa, nodes, context, hash);
1554 if (BE (new_state == NULL, 0))
1560 /* Finish initialization of the new state NEWSTATE, and using its hash value
1561 HASH put in the appropriate bucket of DFA's state table. Return value
1562 indicates the error code if failed. */
1564 static reg_errcode_t
1565 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1568 struct re_state_table_entry *spot;
1572 newstate->hash = hash;
1573 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1574 if (BE (err != REG_NOERROR, 0))
1576 for (i = 0; i < newstate->nodes.nelem; i++)
1578 Idx elem = newstate->nodes.elems[i];
1579 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1580 if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
1584 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1585 if (BE (spot->alloc <= spot->num, 0))
1587 Idx new_alloc = 2 * spot->num + 2;
1588 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1590 if (BE (new_array == NULL, 0))
1592 spot->array = new_array;
1593 spot->alloc = new_alloc;
1595 spot->array[spot->num++] = newstate;
1600 free_state (re_dfastate_t *state)
1602 re_node_set_free (&state->non_eps_nodes);
1603 re_node_set_free (&state->inveclosure);
1604 if (state->entrance_nodes != &state->nodes)
1606 re_node_set_free (state->entrance_nodes);
1607 re_free (state->entrance_nodes);
1609 re_node_set_free (&state->nodes);
1610 re_free (state->word_trtable);
1611 re_free (state->trtable);
1615 /* Create the new state which is independ of contexts.
1616 Return the new state if succeeded, otherwise return NULL. */
1618 static re_dfastate_t *
1620 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1625 re_dfastate_t *newstate;
1627 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1628 if (BE (newstate == NULL, 0))
1630 err = re_node_set_init_copy (&newstate->nodes, nodes);
1631 if (BE (err != REG_NOERROR, 0))
1637 newstate->entrance_nodes = &newstate->nodes;
1638 for (i = 0 ; i < nodes->nelem ; i++)
1640 re_token_t *node = dfa->nodes + nodes->elems[i];
1641 re_token_type_t type = node->type;
1642 if (type == CHARACTER && !node->constraint)
1644 #ifdef RE_ENABLE_I18N
1645 newstate->accept_mb |= node->accept_mb;
1646 #endif /* RE_ENABLE_I18N */
1648 /* If the state has the halt node, the state is a halt state. */
1649 if (type == END_OF_RE)
1651 else if (type == OP_BACK_REF)
1652 newstate->has_backref = 1;
1653 else if (type == ANCHOR || node->constraint)
1654 newstate->has_constraint = 1;
1656 err = register_state (dfa, newstate, hash);
1657 if (BE (err != REG_NOERROR, 0))
1659 free_state (newstate);
1665 /* Create the new state which is depend on the context CONTEXT.
1666 Return the new state if succeeded, otherwise return NULL. */
1668 static re_dfastate_t *
1670 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1671 unsigned int context, re_hashval_t hash)
1673 Idx i, nctx_nodes = 0;
1675 re_dfastate_t *newstate;
1677 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1678 if (BE (newstate == NULL, 0))
1680 err = re_node_set_init_copy (&newstate->nodes, nodes);
1681 if (BE (err != REG_NOERROR, 0))
1687 newstate->context = context;
1688 newstate->entrance_nodes = &newstate->nodes;
1690 for (i = 0 ; i < nodes->nelem ; i++)
1692 re_token_t *node = dfa->nodes + nodes->elems[i];
1693 re_token_type_t type = node->type;
1694 unsigned int constraint = node->constraint;
1696 if (type == CHARACTER && !constraint)
1698 #ifdef RE_ENABLE_I18N
1699 newstate->accept_mb |= node->accept_mb;
1700 #endif /* RE_ENABLE_I18N */
1702 /* If the state has the halt node, the state is a halt state. */
1703 if (type == END_OF_RE)
1705 else if (type == OP_BACK_REF)
1706 newstate->has_backref = 1;
1710 if (newstate->entrance_nodes == &newstate->nodes)
1712 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1713 if (BE (newstate->entrance_nodes == NULL, 0))
1715 free_state (newstate);
1718 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1720 newstate->has_constraint = 1;
1723 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1725 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1730 err = register_state (dfa, newstate, hash);
1731 if (BE (err != REG_NOERROR, 0))
1733 free_state (newstate);