pintos-os.org Git - pspp/blob - lib/regexec.c

   1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License along
  17    with this program; if not, write to the Free Software Foundation,
  18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  19
  20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  21                                      int n) internal_function;
  22 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
  23 static void match_ctx_free (re_match_context_t *cache) internal_function;
  24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
  25                                           int str_idx, int from, int to)
  26      internal_function;
  27 static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
  28      internal_function;
  29 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
  30                                            int str_idx) internal_function;
  31 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  32                                                    int node, int str_idx)
  33      internal_function;
  34 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  35                            re_dfastate_t **limited_sts, int last_node,
  36                            int last_str_idx)
  37      internal_function;
  38 static reg_errcode_t re_search_internal (const regex_t *preg,
  39                                          const char *string, int length,
  40                                          int start, int range, int stop,
  41                                          size_t nmatch, regmatch_t pmatch[],
  42                                          int eflags) internal_function;
  43 static int re_search_2_stub (struct re_pattern_buffer *bufp,
  44                              const char *string1, int length1,
  45                              const char *string2, int length2,
  46                              int start, int range, struct re_registers *regs,
  47                              int stop, int ret_len) internal_function;
  48 static int re_search_stub (struct re_pattern_buffer *bufp,
  49                            const char *string, int length, int start,
  50                            int range, int stop, struct re_registers *regs,
  51                            int ret_len) internal_function;
  52 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  53                               int nregs, int regs_allocated) internal_function;
  54 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
  55      internal_function;
  56 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
  57                            int *p_match_first)
  58      internal_function;
  59 static int check_halt_state_context (const re_match_context_t *mctx,
  60                                      const re_dfastate_t *state, int idx)
  61      internal_function;
  62 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
  63                          regmatch_t *prev_idx_match, int cur_node,
  64                          int cur_idx, int nmatch) internal_function;
  65 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  66                                       int str_idx, int dest_node, int nregs,
  67                                       regmatch_t *regs,
  68                                       re_node_set *eps_via_nodes) internal_function;
  69 static reg_errcode_t set_regs (const regex_t *preg,
  70                                const re_match_context_t *mctx,
  71                                size_t nmatch, regmatch_t *pmatch,
  72                                int fl_backtrack) internal_function;
  73 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
  74
  75 #ifdef RE_ENABLE_I18N
  76 static int sift_states_iter_mb (const re_match_context_t *mctx,
  77                                 re_sift_context_t *sctx,
  78                                 int node_idx, int str_idx, int max_str_idx) internal_function;
  79 #endif /* RE_ENABLE_I18N */
  80 static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
  81                                            re_sift_context_t *sctx) internal_function;
  82 static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
  83                                           re_sift_context_t *sctx, int str_idx,
  84                                           re_node_set *cur_dest) internal_function;
  85 static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
  86                                               re_sift_context_t *sctx,
  87                                               int str_idx,
  88                                               re_node_set *dest_nodes) internal_function;
  89 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
  90                                             re_node_set *dest_nodes,
  91                                             const re_node_set *candidates) internal_function;
  92 static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
  93                              int dst_node, int dst_idx, int src_node,
  94                              int src_idx) internal_function;
  95 static int check_dst_limits_calc_pos_1 (re_match_context_t *mctx,
  96                                         int boundaries, int subexp_idx,
  97                                         int from_node, int bkref_idx) internal_function;
  98 static int check_dst_limits_calc_pos (re_match_context_t *mctx,
  99                                       int limit, int subexp_idx,
 100                                       int node, int str_idx,
 101                                       int bkref_idx) internal_function;
 102 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
 103                                           re_node_set *dest_nodes,
 104                                           const re_node_set *candidates,
 105                                           re_node_set *limits,
 106                                           struct re_backref_cache_entry *bkref_ents,
 107                                           int str_idx) internal_function;
 108 static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
 109                                         re_sift_context_t *sctx,
 110                                         int str_idx, const re_node_set *candidates) internal_function;
 111 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
 112                                         re_dfastate_t **src, int num) internal_function;
 113 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
 114                                          re_match_context_t *mctx) internal_function;
 115 static re_dfastate_t *transit_state (reg_errcode_t *err,
 116                                      re_match_context_t *mctx,
 117                                      re_dfastate_t *state) internal_function;
 118 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
 119                                             re_match_context_t *mctx,
 120                                             re_dfastate_t *next_state) internal_function;
 121 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
 122                                                 re_node_set *cur_nodes,
 123                                                 int str_idx) internal_function;
 124 #if 0
 125 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
 126                                         re_match_context_t *mctx,
 127                                         re_dfastate_t *pstate) internal_function;
 128 #endif
 129 #ifdef RE_ENABLE_I18N
 130 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
 131                                        re_dfastate_t *pstate) internal_function;
 132 #endif /* RE_ENABLE_I18N */
 133 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
 134                                           const re_node_set *nodes) internal_function;
 135 static reg_errcode_t get_subexp (re_match_context_t *mctx,
 136                                  int bkref_node, int bkref_str_idx) internal_function;
 137 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
 138                                      const re_sub_match_top_t *sub_top,
 139                                      re_sub_match_last_t *sub_last,
 140                                      int bkref_node, int bkref_str) internal_function;
 141 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 142                              int subexp_idx, int type) internal_function;
 143 static reg_errcode_t check_arrival (re_match_context_t *mctx,
 144                                     state_array_t *path, int top_node,
 145                                     int top_str, int last_node, int last_str,
 146                                     int type) internal_function;
 147 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
 148                                                    int str_idx,
 149                                                    re_node_set *cur_nodes,
 150                                                    re_node_set *next_nodes) internal_function;
 151 static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
 152                                                re_node_set *cur_nodes,
 153                                                int ex_subexp, int type) internal_function;
 154 static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
 155                                                    re_node_set *dst_nodes,
 156                                                    int target, int ex_subexp,
 157                                                    int type) internal_function;
 158 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 159                                          re_node_set *cur_nodes, int cur_str,
 160                                          int subexp_num, int type) internal_function;
 161 static int build_trtable (re_dfa_t *dfa,
 162                           re_dfastate_t *state) internal_function;
 163 #ifdef RE_ENABLE_I18N
 164 static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
 165                                     const re_string_t *input, int idx) internal_function;
 166 # ifdef _LIBC
 167 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 168                                                    size_t name_len) internal_function;
 169 # endif /* _LIBC */
 170 #endif /* RE_ENABLE_I18N */
 171 static int group_nodes_into_DFAstates (re_dfa_t *dfa,
 172                                        const re_dfastate_t *state,
 173                                        re_node_set *states_node,
 174                                        bitset *states_ch) internal_function;
 175 static int check_node_accept (const re_match_context_t *mctx,
 176                               const re_token_t *node, int idx) internal_function;
 177 static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
 178 \f
 179 /* Entry point for POSIX code.  */
 180
 181 /* regexec searches for a given pattern, specified by PREG, in the
 182    string STRING.
 183
 184    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 185    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 186    least NMATCH elements, and we set them to the offsets of the
 187    corresponding matched substrings.
 188
 189    EFLAGS specifies `execution flags' which affect matching: if
 190    REG_NOTBOL is set, then ^ does not match at the beginning of the
 191    string; if REG_NOTEOL is set, then $ does not match at the end.
 192
 193    We return 0 if we find a match and REG_NOMATCH if not.  */
 194
 195 int
 196 regexec (const regex_t *__restrict preg, const char *__restrict string,
 197          size_t nmatch, regmatch_t pmatch[], int eflags)
 198 {
 199   reg_errcode_t err;
 200   int start, length;
 201 #ifdef _LIBC
 202   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
 203 #endif
 204
 205   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
 206     return REG_BADPAT;
 207
 208   if (eflags & REG_STARTEND)
 209     {
 210       start = pmatch[0].rm_so;
 211       length = pmatch[0].rm_eo;
 212     }
 213   else
 214     {
 215       start = 0;
 216       length = strlen (string);
 217     }
 218
 219   __libc_lock_lock (dfa->lock);
 220   if (preg->re_no_sub)
 221     err = re_search_internal (preg, string, length, start, length - start,
 222                               length, 0, NULL, eflags);
 223   else
 224     err = re_search_internal (preg, string, length, start, length - start,
 225                               length, nmatch, pmatch, eflags);
 226   __libc_lock_unlock (dfa->lock);
 227   return err != REG_NOERROR;
 228 }
 229
 230 #ifdef _LIBC
 231 # include <shlib-compat.h>
 232 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
 233
 234 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
 235 __typeof__ (__regexec) __compat_regexec;
 236
 237 int
 238 attribute_compat_text_section
 239 __compat_regexec (const regex_t *__restrict preg,
 240                   const char *__restrict string, size_t nmatch,
 241                   regmatch_t pmatch[], int eflags)
 242 {
 243   return regexec (preg, string, nmatch, pmatch,
 244                   eflags & (REG_NOTBOL | REG_NOTEOL));
 245 }
 246 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
 247 # endif
 248 #endif
 249
 250 /* Entry points for GNU code.  */
 251
 252 /* re_match, re_search, re_match_2, re_search_2
 253
 254    The former two functions operate on STRING with length LENGTH,
 255    while the later two operate on concatenation of STRING1 and STRING2
 256    with lengths LENGTH1 and LENGTH2, respectively.
 257
 258    re_match() matches the compiled pattern in BUFP against the string,
 259    starting at index START.
 260
 261    re_search() first tries matching at index START, then it tries to match
 262    starting from index START + 1, and so on.  The last start position tried
 263    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 264    way as re_match().)
 265
 266    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 267    the first STOP characters of the concatenation of the strings should be
 268    concerned.
 269
 270    If REGS is not NULL, and BUFP->re_no_sub is not set, the offsets of the match
 271    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
 272    computed relative to the concatenation, not relative to the individual
 273    strings.)
 274
 275    On success, re_match* functions return the length of the match, re_search*
 276    return the position of the start of the match.  Return value -1 means no
 277    match was found and -2 indicates an internal error.  */
 278
 279 int
 280 re_match (struct re_pattern_buffer *bufp, const char *string,
 281           int length, int start, struct re_registers *regs)
 282 {
 283   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
 284 }
 285 #ifdef _LIBC
 286 weak_alias (__re_match, re_match)
 287 #endif
 288
 289 int
 290 re_search (struct re_pattern_buffer *bufp, const char *string,
 291            int length, int start, int range, struct re_registers *regs)
 292 {
 293   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
 294 }
 295 #ifdef _LIBC
 296 weak_alias (__re_search, re_search)
 297 #endif
 298
 299 int
 300 re_match_2 (struct re_pattern_buffer *bufp,
 301             const char *string1, int length1,
 302             const char *string2, int length2,
 303             int start, struct re_registers *regs, int stop)
 304 {
 305   return re_search_2_stub (bufp, string1, length1, string2, length2,
 306                            start, 0, regs, stop, 1);
 307 }
 308 #ifdef _LIBC
 309 weak_alias (__re_match_2, re_match_2)
 310 #endif
 311
 312 int
 313 re_search_2 (struct re_pattern_buffer *bufp,
 314              const char *string1, int length1,
 315              const char *string2, int length2,
 316              int start, int range, struct re_registers *regs, int stop)
 317 {
 318   return re_search_2_stub (bufp, string1, length1, string2, length2,
 319                            start, range, regs, stop, 0);
 320 }
 321 #ifdef _LIBC
 322 weak_alias (__re_search_2, re_search_2)
 323 #endif
 324
 325 static int
 326 internal_function
 327 re_search_2_stub (struct re_pattern_buffer *bufp,
 328                   const char *string1, int length1,
 329                   const char *string2, int length2,
 330                   int start, int range, struct re_registers *regs, int stop,
 331                   int ret_len)
 332 {
 333   const char *str;
 334   int rval;
 335   int len = length1 + length2;
 336   int free_str = 0;
 337
 338   if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
 339     return -2;
 340
 341   /* Concatenate the strings.  */
 342   if (length2 > 0)
 343     if (length1 > 0)
 344       {
 345         char *s = re_malloc (char, len);
 346
 347         if (BE (s == NULL, 0))
 348           return -2;
 349         memcpy (s, string1, length1);
 350         memcpy (s + length1, string2, length2);
 351         str = s;
 352         free_str = 1;
 353       }
 354     else
 355       str = string2;
 356   else
 357     str = string1;
 358
 359   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 360                          ret_len);
 361   if (free_str)
 362     re_free ((char *) str);
 363   return rval;
 364 }
 365
 366 /* The parameters have the same meaning as those of re_search.
 367    Additional parameters:
 368    If RET_LEN is nonzero the length of the match is returned (re_match style);
 369    otherwise the position of the match is returned.  */
 370
 371 static int
 372 internal_function
 373 re_search_stub (struct re_pattern_buffer *bufp,
 374                 const char *string, int length,
 375                 int start, int range, int stop, struct re_registers *regs,
 376                 int ret_len)
 377 {
 378   reg_errcode_t result;
 379   regmatch_t *pmatch;
 380   int nregs, rval;
 381   int eflags = 0;
 382 #ifdef _LIBC
 383   re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer;
 384 #endif
 385
 386   /* Check for out-of-range.  */
 387   if (BE (start < 0 || start > length, 0))
 388     return -1;
 389   if (BE (start + range > length, 0))
 390     range = length - start;
 391   else if (BE (start + range < 0, 0))
 392     range = -start;
 393
 394   __libc_lock_lock (dfa->lock);
 395
 396   eflags |= (bufp->re_not_bol) ? REG_NOTBOL : 0;
 397   eflags |= (bufp->re_not_eol) ? REG_NOTEOL : 0;
 398
 399   /* Compile fastmap if we haven't yet.  */
 400   if (range > 0 && bufp->re_fastmap != NULL && !bufp->re_fastmap_accurate)
 401     re_compile_fastmap (bufp);
 402
 403   if (BE (bufp->re_no_sub, 0))
 404     regs = NULL;
 405
 406   /* We need at least 1 register.  */
 407   if (regs == NULL)
 408     nregs = 1;
 409   else if (BE (bufp->re_regs_allocated == REG_FIXED
 410                && regs->rm_num_regs < bufp->re_nsub + 1, 0))
 411     {
 412       nregs = regs->rm_num_regs;
 413       if (BE (nregs < 1, 0))
 414         {
 415           /* Nothing can be copied to regs.  */
 416           regs = NULL;
 417           nregs = 1;
 418         }
 419     }
 420   else
 421     nregs = bufp->re_nsub + 1;
 422   pmatch = re_malloc (regmatch_t, nregs);
 423   if (BE (pmatch == NULL, 0))
 424     {
 425       rval = -2;
 426       goto out;
 427     }
 428
 429   result = re_search_internal (bufp, string, length, start, range, stop,
 430                                nregs, pmatch, eflags);
 431
 432   rval = 0;
 433
 434   /* I hope we needn't fill ther regs with -1's when no match was found.  */
 435   if (result != REG_NOERROR)
 436     rval = -1;
 437   else if (regs != NULL)
 438     {
 439       /* If caller wants register contents data back, copy them.  */
 440       bufp->re_regs_allocated = re_copy_regs (regs, pmatch, nregs,
 441                                               bufp->re_regs_allocated);
 442       if (BE (bufp->re_regs_allocated == REG_UNALLOCATED, 0))
 443         rval = -2;
 444     }
 445
 446   if (BE (rval == 0, 1))
 447     {
 448       if (ret_len)
 449         {
 450           assert (pmatch[0].rm_so == start);
 451           rval = pmatch[0].rm_eo - start;
 452         }
 453       else
 454         rval = pmatch[0].rm_so;
 455     }
 456   re_free (pmatch);
 457  out:
 458   __libc_lock_unlock (dfa->lock);
 459   return rval;
 460 }
 461
 462 static unsigned
 463 internal_function
 464 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, int nregs,
 465               int regs_allocated)
 466 {
 467   int rval = REG_REALLOCATE;
 468   int i;
 469   int need_regs = nregs + 1;
 470   /* We need one extra element beyond `rm_num_regs' for the `-1' marker GNU code
 471      uses.  */
 472
 473   /* Have the register data arrays been allocated?  */
 474   if (regs_allocated == REG_UNALLOCATED)
 475     { /* No.  So allocate them with malloc.  */
 476       regs->rm_start = re_malloc (regoff_t, need_regs);
 477       regs->rm_end = re_malloc (regoff_t, need_regs);
 478       if (BE (regs->rm_start == NULL, 0) || BE (regs->rm_end == NULL, 0))
 479         return REG_UNALLOCATED;
 480       regs->rm_num_regs = need_regs;
 481     }
 482   else if (regs_allocated == REG_REALLOCATE)
 483     { /* Yes.  If we need more elements than were already
 484          allocated, reallocate them.  If we need fewer, just
 485          leave it alone.  */
 486       if (BE (need_regs > regs->rm_num_regs, 0))
 487         {
 488           regoff_t *new_start =
 489             re_realloc (regs->rm_start, regoff_t, need_regs);
 490           regoff_t *new_end = re_realloc (regs->rm_end, regoff_t, need_regs);
 491           if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
 492             return REG_UNALLOCATED;
 493           regs->rm_start = new_start;
 494           regs->rm_end = new_end;
 495           regs->rm_num_regs = need_regs;
 496         }
 497     }
 498   else
 499     {
 500       assert (regs_allocated == REG_FIXED);
 501       /* This function may not be called with REG_FIXED and nregs too big.  */
 502       assert (regs->rm_num_regs >= nregs);
 503       rval = REG_FIXED;
 504     }
 505
 506   /* Copy the regs.  */
 507   for (i = 0; i < nregs; ++i)
 508     {
 509       regs->rm_start[i] = pmatch[i].rm_so;
 510       regs->rm_end[i] = pmatch[i].rm_eo;
 511     }
 512   for ( ; i < regs->rm_num_regs; ++i)
 513     regs->rm_start[i] = regs->rm_end[i] = -1;
 514
 515   return rval;
 516 }
 517
 518 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 519    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 520    this memory for recording register information.  STARTS and ENDS
 521    must be allocated using the malloc library routine, and must each
 522    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 523
 524    If NUM_REGS == 0, then subsequent matches should allocate their own
 525    register data.
 526
 527    Unless this function is called, the first search or match using
 528    PATTERN_BUFFER will allocate its own register data, without
 529    freeing the old data.  */
 530
 531 void
 532 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
 533                   unsigned int num_regs, regoff_t *starts, regoff_t *ends)
 534 {
 535   if (num_regs)
 536     {
 537       bufp->re_regs_allocated = REG_REALLOCATE;
 538       regs->rm_num_regs = num_regs;
 539       regs->rm_start = starts;
 540       regs->rm_end = ends;
 541     }
 542   else
 543     {
 544       bufp->re_regs_allocated = REG_UNALLOCATED;
 545       regs->rm_num_regs = 0;
 546       regs->rm_start = regs->rm_end = NULL;
 547     }
 548 }
 549 #ifdef _LIBC
 550 weak_alias (__re_set_registers, re_set_registers)
 551 #endif
 552 \f
 553 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 554    them unless specifically requested.  */
 555
 556 #if defined _REGEX_RE_COMP || defined _LIBC
 557 int
 558 # ifdef _LIBC
 559 weak_function
 560 # endif
 561 re_exec (const char *s)
 562 {
 563   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 564 }
 565 #endif /* _REGEX_RE_COMP */
 566 \f
 567 /* Internal entry point.  */
 568
 569 /* Searches for a compiled pattern PREG in the string STRING, whose
 570    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 571    mingings with regexec.  START, and RANGE have the same meanings
 572    with re_search.
 573    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 574    otherwise return the error code.
 575    Note: We assume front end functions already check ranges.
 576    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
 577
 578 static reg_errcode_t
 579 internal_function
 580 re_search_internal (const regex_t *preg,
 581                     const char *string, int length,
 582                     int start, int range, int stop,
 583                     size_t nmatch, regmatch_t pmatch[],
 584                     int eflags)
 585 {
 586   reg_errcode_t err;
 587   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
 588   int left_lim, right_lim, incr;
 589   int fl_longest_match, match_first, match_kind, match_last = -1;
 590   int extra_nmatch;
 591   int sb, ch;
 592 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
 593   re_match_context_t mctx = { .dfa = dfa };
 594 #else
 595   re_match_context_t mctx;
 596 #endif
 597   char *fastmap = (preg->re_fastmap != NULL && preg->re_fastmap_accurate
 598                    && range && !preg->re_can_be_null) ? preg->re_fastmap : NULL;
 599   unsigned REG_TRANSLATE_TYPE t =
 600     (unsigned REG_TRANSLATE_TYPE) preg->re_translate;
 601
 602 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
 603   memset (&mctx, '\0', sizeof (re_match_context_t));
 604   mctx.dfa = dfa;
 605 #endif
 606
 607   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
 608   nmatch -= extra_nmatch;
 609
 610   /* Check if the DFA haven't been compiled.  */
 611   if (BE (preg->re_used == 0 || dfa->init_state == NULL
 612           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 613           || dfa->init_state_begbuf == NULL, 0))
 614     return REG_NOMATCH;
 615
 616 #ifdef DEBUG
 617   /* We assume front-end functions already check them.  */
 618   assert (start + range >= 0 && start + range <= length);
 619 #endif
 620
 621   /* If initial states with non-begbuf contexts have no elements,
 622      the regex must be anchored.  If preg->re_newline_anchor is set,
 623      we'll never use init_state_nl, so do not check it.  */
 624   if (dfa->init_state->nodes.nelem == 0
 625       && dfa->init_state_word->nodes.nelem == 0
 626       && (dfa->init_state_nl->nodes.nelem == 0
 627           || !preg->re_newline_anchor))
 628     {
 629       if (start != 0 && start + range != 0)
 630         return REG_NOMATCH;
 631       start = range = 0;
 632     }
 633
 634   /* We must check the longest matching, if nmatch > 0.  */
 635   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 636
 637   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
 638                             preg->re_translate,
 639                             preg->re_syntax & REG_IGNORE_CASE, dfa);
 640   if (BE (err != REG_NOERROR, 0))
 641     goto free_return;
 642   mctx.input.stop = stop;
 643   mctx.input.raw_stop = stop;
 644   mctx.input.newline_anchor = preg->re_newline_anchor;
 645
 646   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
 647   if (BE (err != REG_NOERROR, 0))
 648     goto free_return;
 649
 650   /* We will log all the DFA states through which the dfa pass,
 651      if nmatch > 1, or this dfa has "multibyte node", which is a
 652      back-reference or a node which can accept multibyte character or
 653      multi character collating element.  */
 654   if (nmatch > 1 || dfa->has_mb_node)
 655     {
 656       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
 657       if (BE (mctx.state_log == NULL, 0))
 658         {
 659           err = REG_ESPACE;
 660           goto free_return;
 661         }
 662     }
 663   else
 664     mctx.state_log = NULL;
 665
 666   match_first = start;
 667   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 668                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 669
 670   /* Check incrementally whether of not the input string match.  */
 671   incr = (range < 0) ? -1 : 1;
 672   left_lim = (range < 0) ? start + range : start;
 673   right_lim = (range < 0) ? start : start + range;
 674   sb = dfa->mb_cur_max == 1;
 675   match_kind =
 676     (fastmap
 677      ? ((sb || !(preg->re_syntax & REG_IGNORE_CASE || t) ? 4 : 0)
 678         | (range >= 0 ? 2 : 0)
 679         | (t != NULL ? 1 : 0))
 680      : 8);
 681
 682   for (;; match_first += incr)
 683     {
 684       err = REG_NOMATCH;
 685       if (match_first < left_lim || right_lim < match_first)
 686         goto free_return;
 687
 688       /* Advance as rapidly as possible through the string, until we
 689          find a plausible place to start matching.  This may be done
 690          with varying efficiency, so there are various possibilities:
 691          only the most common of them are specialized, in order to
 692          save on code size.  We use a switch statement for speed.  */
 693       switch (match_kind)
 694         {
 695         case 8:
 696           /* No fastmap.  */
 697           break;
 698
 699         case 7:
 700           /* Fastmap with single-byte translation, match forward.  */
 701           while (BE (match_first < right_lim, 1)
 702                  && !fastmap[t[(unsigned char) string[match_first]]])
 703             ++match_first;
 704           goto forward_match_found_start_or_reached_end;
 705
 706         case 6:
 707           /* Fastmap without translation, match forward.  */
 708           while (BE (match_first < right_lim, 1)
 709                  && !fastmap[(unsigned char) string[match_first]])
 710             ++match_first;
 711
 712         forward_match_found_start_or_reached_end:
 713           if (BE (match_first == right_lim, 0))
 714             {
 715               ch = match_first >= length
 716                        ? 0 : (unsigned char) string[match_first];
 717               if (!fastmap[t ? t[ch] : ch])
 718                 goto free_return;
 719             }
 720           break;
 721
 722         case 4:
 723         case 5:
 724           /* Fastmap without multi-byte translation, match backwards.  */
 725           while (match_first >= left_lim)
 726             {
 727               ch = match_first >= length
 728                        ? 0 : (unsigned char) string[match_first];
 729               if (fastmap[t ? t[ch] : ch])
 730                 break;
 731               --match_first;
 732             }
 733           if (match_first < left_lim)
 734             goto free_return;
 735           break;
 736
 737         default:
 738           /* In this case, we can't determine easily the current byte,
 739              since it might be a component byte of a multibyte
 740              character.  Then we use the constructed buffer instead.  */
 741           for (;;)
 742             {
 743               /* If MATCH_FIRST is out of the valid range, reconstruct the
 744                  buffers.  */
 745               unsigned int offset = match_first - mctx.input.raw_mbs_idx;
 746               if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
 747                 {
 748                   err = re_string_reconstruct (&mctx.input, match_first,
 749                                                eflags);
 750                   if (BE (err != REG_NOERROR, 0))
 751                     goto free_return;
 752
 753                   offset = match_first - mctx.input.raw_mbs_idx;
 754                 }
 755               /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 756                  Note that MATCH_FIRST must not be smaller than 0.  */
 757               ch = (match_first >= length
 758                     ? 0 : re_string_byte_at (&mctx.input, offset));
 759               if (fastmap[ch])
 760                 break;
 761               match_first += incr;
 762               if (match_first < left_lim || match_first > right_lim)
 763                 {
 764                   err = REG_NOMATCH;
 765                   goto free_return;
 766                 }
 767             }
 768           break;
 769         }
 770
 771       /* Reconstruct the buffers so that the matcher can assume that
 772          the matching starts from the beginning of the buffer.  */
 773       err = re_string_reconstruct (&mctx.input, match_first, eflags);
 774       if (BE (err != REG_NOERROR, 0))
 775         goto free_return;
 776
 777 #ifdef RE_ENABLE_I18N
 778      /* Don't consider this char as a possible match start if it part,
 779         yet isn't the head, of a multibyte character.  */
 780       if (!sb && !re_string_first_byte (&mctx.input, 0))
 781         continue;
 782 #endif
 783
 784       /* It seems to be appropriate one, then use the matcher.  */
 785       /* We assume that the matching starts from 0.  */
 786       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 787       match_last = check_matching (&mctx, fl_longest_match,
 788                                    range >= 0 ? &match_first : NULL);
 789       if (match_last != -1)
 790         {
 791           if (BE (match_last == -2, 0))
 792             {
 793               err = REG_ESPACE;
 794               goto free_return;
 795             }
 796           else
 797             {
 798               mctx.match_last = match_last;
 799               if ((!preg->re_no_sub && nmatch > 1) || dfa->nbackref)
 800                 {
 801                   re_dfastate_t *pstate = mctx.state_log[match_last];
 802                   mctx.last_node = check_halt_state_context (&mctx, pstate,
 803                                                              match_last);
 804                 }
 805               if ((!preg->re_no_sub && nmatch > 1 && dfa->has_plural_match)
 806                   || dfa->nbackref)
 807                 {
 808                   err = prune_impossible_nodes (&mctx);
 809                   if (err == REG_NOERROR)
 810                     break;
 811                   if (BE (err != REG_NOMATCH, 0))
 812                     goto free_return;
 813                   match_last = -1;
 814                 }
 815               else
 816                 break; /* We found a match.  */
 817             }
 818         }
 819
 820       match_ctx_clean (&mctx);
 821     }
 822
 823 #ifdef DEBUG
 824   assert (match_last != -1);
 825   assert (err == REG_NOERROR);
 826 #endif
 827
 828   /* Set pmatch[] if we need.  */
 829   if (nmatch > 0)
 830     {
 831       int reg_idx;
 832
 833       /* Initialize registers.  */
 834       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 835         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 836
 837       /* Set the points where matching start/end.  */
 838       pmatch[0].rm_so = 0;
 839       pmatch[0].rm_eo = mctx.match_last;
 840
 841       if (!preg->re_no_sub && nmatch > 1)
 842         {
 843           err = set_regs (preg, &mctx, nmatch, pmatch,
 844                           dfa->has_plural_match && dfa->nbackref > 0);
 845           if (BE (err != REG_NOERROR, 0))
 846             goto free_return;
 847         }
 848
 849       /* At last, add the offset to the each registers, since we slided
 850          the buffers so that we could assume that the matching starts
 851          from 0.  */
 852       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 853         if (pmatch[reg_idx].rm_so != -1)
 854           {
 855 #ifdef RE_ENABLE_I18N
 856             if (BE (mctx.input.offsets_needed != 0, 0))
 857               {
 858                 pmatch[reg_idx].rm_so =
 859                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
 860                    ? mctx.input.valid_raw_len
 861                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
 862                 pmatch[reg_idx].rm_eo =
 863                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
 864                    ? mctx.input.valid_raw_len
 865                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
 866               }
 867 #else
 868             assert (mctx.input.offsets_needed == 0);
 869 #endif
 870             pmatch[reg_idx].rm_so += match_first;
 871             pmatch[reg_idx].rm_eo += match_first;
 872           }
 873       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
 874         {
 875           pmatch[nmatch + reg_idx].rm_so = -1;
 876           pmatch[nmatch + reg_idx].rm_eo = -1;
 877         }
 878
 879       if (dfa->subexp_map)
 880         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
 881           if (dfa->subexp_map[reg_idx] != reg_idx)
 882             {
 883               pmatch[reg_idx + 1].rm_so
 884                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
 885               pmatch[reg_idx + 1].rm_eo
 886                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
 887             }
 888     }
 889
 890  free_return:
 891   re_free (mctx.state_log);
 892   if (dfa->nbackref)
 893     match_ctx_free (&mctx);
 894   re_string_destruct (&mctx.input);
 895   return err;
 896 }
 897
 898 static reg_errcode_t
 899 internal_function
 900 prune_impossible_nodes (re_match_context_t *mctx)
 901 {
 902   re_dfa_t *const dfa = mctx->dfa;
 903   int halt_node, match_last;
 904   reg_errcode_t ret;
 905   re_dfastate_t **sifted_states;
 906   re_dfastate_t **lim_states = NULL;
 907   re_sift_context_t sctx;
 908 #ifdef DEBUG
 909   assert (mctx->state_log != NULL);
 910 #endif
 911   match_last = mctx->match_last;
 912   halt_node = mctx->last_node;
 913   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 914   if (BE (sifted_states == NULL, 0))
 915     {
 916       ret = REG_ESPACE;
 917       goto free_return;
 918     }
 919   if (dfa->nbackref)
 920     {
 921       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
 922       if (BE (lim_states == NULL, 0))
 923         {
 924           ret = REG_ESPACE;
 925           goto free_return;
 926         }
 927       while (1)
 928         {
 929           memset (lim_states, '\0',
 930                   sizeof (re_dfastate_t *) * (match_last + 1));
 931           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 932                          match_last);
 933           ret = sift_states_backward (mctx, &sctx);
 934           re_node_set_free (&sctx.limits);
 935           if (BE (ret != REG_NOERROR, 0))
 936               goto free_return;
 937           if (sifted_states[0] != NULL || lim_states[0] != NULL)
 938             break;
 939           do
 940             {
 941               --match_last;
 942               if (match_last < 0)
 943                 {
 944                   ret = REG_NOMATCH;
 945                   goto free_return;
 946                 }
 947             } while (mctx->state_log[match_last] == NULL
 948                      || !mctx->state_log[match_last]->halt);
 949           halt_node = check_halt_state_context (mctx,
 950                                                 mctx->state_log[match_last],
 951                                                 match_last);
 952         }
 953       ret = merge_state_array (dfa, sifted_states, lim_states,
 954                                match_last + 1);
 955       re_free (lim_states);
 956       lim_states = NULL;
 957       if (BE (ret != REG_NOERROR, 0))
 958         goto free_return;
 959     }
 960   else
 961     {
 962       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
 963       ret = sift_states_backward (mctx, &sctx);
 964       re_node_set_free (&sctx.limits);
 965       if (BE (ret != REG_NOERROR, 0))
 966         goto free_return;
 967     }
 968   re_free (mctx->state_log);
 969   mctx->state_log = sifted_states;
 970   sifted_states = NULL;
 971   mctx->last_node = halt_node;
 972   mctx->match_last = match_last;
 973   ret = REG_NOERROR;
 974  free_return:
 975   re_free (sifted_states);
 976   re_free (lim_states);
 977   return ret;
 978 }
 979
 980 /* Acquire an initial state and return it.
 981    We must select appropriate initial state depending on the context,
 982    since initial states may have constraints like "\<", "^", etc..  */
 983
 984 static inline re_dfastate_t *
 985 __attribute ((always_inline)) internal_function
 986 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
 987                             int idx)
 988 {
 989   re_dfa_t *const dfa = mctx->dfa;
 990   if (dfa->init_state->has_constraint)
 991     {
 992       unsigned int context;
 993       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
 994       if (IS_WORD_CONTEXT (context))
 995         return dfa->init_state_word;
 996       else if (IS_ORDINARY_CONTEXT (context))
 997         return dfa->init_state;
 998       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
 999         return dfa->init_state_begbuf;
1000       else if (IS_NEWLINE_CONTEXT (context))
1001         return dfa->init_state_nl;
1002       else if (IS_BEGBUF_CONTEXT (context))
1003         {
1004           /* It is relatively rare case, then calculate on demand.  */
1005           return re_acquire_state_context (err, dfa,
1006                                            dfa->init_state->entrance_nodes,
1007                                            context);
1008         }
1009       else
1010         /* Must not happen?  */
1011         return dfa->init_state;
1012     }
1013   else
1014     return dfa->init_state;
1015 }
1016
1017 /* Check whether the regular expression match input string INPUT or not,
1018    and return the index where the matching end, return -1 if not match,
1019    or return -2 in case of an error.
1020    FL_LONGEST_MATCH means we want the POSIX longest matching.
1021    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1022    next place where we may want to try matching.
1023    Note that the matcher assume that the maching starts from the current
1024    index of the buffer.  */
1025
1026 static int
1027 internal_function
1028 check_matching (re_match_context_t *mctx, int fl_longest_match,
1029                 int *p_match_first)
1030 {
1031   re_dfa_t *const dfa = mctx->dfa;
1032   reg_errcode_t err;
1033   int match = 0;
1034   int match_last = -1;
1035   int cur_str_idx = re_string_cur_idx (&mctx->input);
1036   re_dfastate_t *cur_state;
1037   int at_init_state = p_match_first != NULL;
1038   int next_start_idx = cur_str_idx;
1039
1040   err = REG_NOERROR;
1041   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1042   /* An initial state must not be NULL (invalid).  */
1043   if (BE (cur_state == NULL, 0))
1044     {
1045       assert (err == REG_ESPACE);
1046       return -2;
1047     }
1048
1049   if (mctx->state_log != NULL)
1050     {
1051       mctx->state_log[cur_str_idx] = cur_state;
1052
1053       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1054          later.  E.g. Processing back references.  */
1055       if (BE (dfa->nbackref, 0))
1056         {
1057           at_init_state = 0;
1058           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1059           if (BE (err != REG_NOERROR, 0))
1060             return err;
1061
1062           if (cur_state->has_backref)
1063             {
1064               err = transit_state_bkref (mctx, &cur_state->nodes);
1065               if (BE (err != REG_NOERROR, 0))
1066                 return err;
1067             }
1068         }
1069     }
1070
1071   /* If the RE accepts NULL string.  */
1072   if (BE (cur_state->halt, 0))
1073     {
1074       if (!cur_state->has_constraint
1075           || check_halt_state_context (mctx, cur_state, cur_str_idx))
1076         {
1077           if (!fl_longest_match)
1078             return cur_str_idx;
1079           else
1080             {
1081               match_last = cur_str_idx;
1082               match = 1;
1083             }
1084         }
1085     }
1086
1087   while (!re_string_eoi (&mctx->input))
1088     {
1089       re_dfastate_t *old_state = cur_state;
1090       int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1091
1092       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1093           || (BE (next_char_idx >= mctx->input.valid_len, 0)
1094               && mctx->input.valid_len < mctx->input.len))
1095         {
1096           err = extend_buffers (mctx);
1097           if (BE (err != REG_NOERROR, 0))
1098             {
1099               assert (err == REG_ESPACE);
1100               return -2;
1101             }
1102         }
1103
1104       cur_state = transit_state (&err, mctx, cur_state);
1105       if (mctx->state_log != NULL)
1106         cur_state = merge_state_with_log (&err, mctx, cur_state);
1107
1108       if (cur_state == NULL)
1109         {
1110           /* Reached the invalid state or an error.  Try to recover a valid
1111              state using the state log, if available and if we have not
1112              already found a valid (even if not the longest) match.  */
1113           if (BE (err != REG_NOERROR, 0))
1114             return -2;
1115
1116           if (mctx->state_log == NULL
1117               || (match && !fl_longest_match)
1118               || (cur_state = find_recover_state (&err, mctx)) == NULL)
1119             break;
1120         }
1121
1122       if (BE (at_init_state, 0))
1123         {
1124           if (old_state == cur_state)
1125             next_start_idx = next_char_idx;
1126           else
1127             at_init_state = 0;
1128         }
1129
1130       if (cur_state->halt)
1131         {
1132           /* Reached a halt state.
1133              Check the halt state can satisfy the current context.  */
1134           if (!cur_state->has_constraint
1135               || check_halt_state_context (mctx, cur_state,
1136                                            re_string_cur_idx (&mctx->input)))
1137             {
1138               /* We found an appropriate halt state.  */
1139               match_last = re_string_cur_idx (&mctx->input);
1140               match = 1;
1141
1142               /* We found a match, do not modify match_first below.  */
1143               p_match_first = NULL;
1144               if (!fl_longest_match)
1145                 break;
1146             }
1147         }
1148     }
1149
1150   if (p_match_first)
1151     *p_match_first += next_start_idx;
1152
1153   return match_last;
1154 }
1155
1156 /* Check NODE match the current context.  */
1157
1158 static int
1159 internal_function
1160 check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
1161 {
1162   re_token_type_t type = dfa->nodes[node].type;
1163   unsigned int constraint = dfa->nodes[node].constraint;
1164   if (type != END_OF_RE)
1165     return 0;
1166   if (!constraint)
1167     return 1;
1168   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1169     return 0;
1170   return 1;
1171 }
1172
1173 /* Check the halt state STATE match the current context.
1174    Return 0 if not match, if the node, STATE has, is a halt node and
1175    match the context, return the node.  */
1176
1177 static int
1178 internal_function
1179 check_halt_state_context (const re_match_context_t *mctx,
1180                           const re_dfastate_t *state, int idx)
1181 {
1182   int i;
1183   unsigned int context;
1184 #ifdef DEBUG
1185   assert (state->halt);
1186 #endif
1187   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1188   for (i = 0; i < state->nodes.nelem; ++i)
1189     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1190       return state->nodes.elems[i];
1191   return 0;
1192 }
1193
1194 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1195    corresponding to the DFA).
1196    Return the destination node, and update EPS_VIA_NODES, return -1 in case
1197    of errors.  */
1198
1199 static int
1200 internal_function
1201 proceed_next_node (const re_match_context_t *mctx,
1202                    int nregs, regmatch_t *regs, int *pidx, int node,
1203                    re_node_set *eps_via_nodes, struct re_fail_stack_t *fs)
1204 {
1205   re_dfa_t *const dfa = mctx->dfa;
1206   int i, err;
1207   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1208     {
1209       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1210       re_node_set *edests = &dfa->edests[node];
1211       int dest_node;
1212       err = re_node_set_insert (eps_via_nodes, node);
1213       if (BE (err < 0, 0))
1214         return -2;
1215       /* Pick up a valid destination, or return -1 if none is found.  */
1216       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1217         {
1218           int candidate = edests->elems[i];
1219           if (!re_node_set_contains (cur_nodes, candidate))
1220             continue;
1221           if (dest_node == -1)
1222             dest_node = candidate;
1223
1224           else
1225             {
1226               /* In order to avoid infinite loop like "(a*)*", return the second
1227                  epsilon-transition if the first was already considered.  */
1228               if (re_node_set_contains (eps_via_nodes, dest_node))
1229                 return candidate;
1230
1231               /* Otherwise, push the second epsilon-transition on the fail stack.  */
1232               else if (fs != NULL
1233                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1234                                            eps_via_nodes))
1235                 return -2;
1236
1237               /* We know we are going to exit.  */
1238               break;
1239             }
1240         }
1241       return dest_node;
1242     }
1243   else
1244     {
1245       int naccepted = 0;
1246       re_token_type_t type = dfa->nodes[node].type;
1247
1248 #ifdef RE_ENABLE_I18N
1249       if (dfa->nodes[node].accept_mb)
1250         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1251       else
1252 #endif /* RE_ENABLE_I18N */
1253       if (type == OP_BACK_REF)
1254         {
1255           int subexp_idx = dfa->nodes[node].opr.idx + 1;
1256           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1257           if (fs != NULL)
1258             {
1259               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1260                 return -1;
1261               else if (naccepted)
1262                 {
1263                   char *buf = (char *) re_string_get_buffer (&mctx->input);
1264                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1265                               naccepted) != 0)
1266                     return -1;
1267                 }
1268             }
1269
1270           if (naccepted == 0)
1271             {
1272               int dest_node;
1273               err = re_node_set_insert (eps_via_nodes, node);
1274               if (BE (err < 0, 0))
1275                 return -2;
1276               dest_node = dfa->edests[node].elems[0];
1277               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1278                                         dest_node))
1279                 return dest_node;
1280             }
1281         }
1282
1283       if (naccepted != 0
1284           || check_node_accept (mctx, dfa->nodes + node, *pidx))
1285         {
1286           int dest_node = dfa->nexts[node];
1287           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1288           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1289                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1290                                                dest_node)))
1291             return -1;
1292           re_node_set_empty (eps_via_nodes);
1293           return dest_node;
1294         }
1295     }
1296   return -1;
1297 }
1298
1299 static reg_errcode_t
1300 internal_function
1301 push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1302                  int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1303 {
1304   reg_errcode_t err;
1305   int num = fs->num++;
1306   if (fs->num == fs->alloc)
1307     {
1308       struct re_fail_stack_ent_t *new_array =
1309         re_realloc (fs->stack, struct re_fail_stack_ent_t, fs->alloc * 2);
1310       if (new_array == NULL)
1311         return REG_ESPACE;
1312       fs->alloc *= 2;
1313       fs->stack = new_array;
1314     }
1315   fs->stack[num].idx = str_idx;
1316   fs->stack[num].node = dest_node;
1317   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1318   if (fs->stack[num].regs == NULL)
1319     return REG_ESPACE;
1320   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1321   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1322   return err;
1323 }
1324
1325 static int
1326 internal_function
1327 pop_fail_stack (struct re_fail_stack_t *fs, int *pidx,
1328                 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1329 {
1330   int num = --fs->num;
1331   assert (num >= 0);
1332   *pidx = fs->stack[num].idx;
1333   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1334   re_node_set_free (eps_via_nodes);
1335   re_free (fs->stack[num].regs);
1336   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1337   return fs->stack[num].node;
1338 }
1339
1340 /* Set the positions where the subexpressions are starts/ends to registers
1341    PMATCH.
1342    Note: We assume that pmatch[0] is already set, and
1343    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1344
1345 static reg_errcode_t
1346 internal_function
1347 set_regs (const regex_t *preg, const re_match_context_t *mctx,
1348           size_t nmatch, regmatch_t *pmatch, int fl_backtrack)
1349 {
1350   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
1351   int idx, cur_node;
1352   re_node_set eps_via_nodes;
1353   struct re_fail_stack_t *fs;
1354   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1355   regmatch_t *prev_idx_match;
1356   int prev_idx_match_malloced = 0;
1357
1358 #ifdef DEBUG
1359   assert (nmatch > 1);
1360   assert (mctx->state_log != NULL);
1361 #endif
1362   if (fl_backtrack)
1363     {
1364       fs = &fs_body;
1365       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1366       if (fs->stack == NULL)
1367         return REG_ESPACE;
1368     }
1369   else
1370     fs = NULL;
1371
1372   cur_node = dfa->init_node;
1373   re_node_set_init_empty (&eps_via_nodes);
1374
1375   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1376     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1377   else
1378     {
1379       prev_idx_match = re_malloc (regmatch_t, nmatch);
1380       if (prev_idx_match == NULL)
1381         {
1382           free_fail_stack_return (fs);
1383           return REG_ESPACE;
1384         }
1385       prev_idx_match_malloced = 1;
1386     }
1387   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1388
1389   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1390     {
1391       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1392
1393       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1394         {
1395           int reg_idx;
1396           if (fs)
1397             {
1398               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1399                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1400                   break;
1401               if (reg_idx == nmatch)
1402                 {
1403                   re_node_set_free (&eps_via_nodes);
1404                   if (prev_idx_match_malloced)
1405                     re_free (prev_idx_match);
1406                   return free_fail_stack_return (fs);
1407                 }
1408               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1409                                          &eps_via_nodes);
1410             }
1411           else
1412             {
1413               re_node_set_free (&eps_via_nodes);
1414               if (prev_idx_match_malloced)
1415                 re_free (prev_idx_match);
1416               return REG_NOERROR;
1417             }
1418         }
1419
1420       /* Proceed to next node.  */
1421       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1422                                     &eps_via_nodes, fs);
1423
1424       if (BE (cur_node < 0, 0))
1425         {
1426           if (BE (cur_node == -2, 0))
1427             {
1428               re_node_set_free (&eps_via_nodes);
1429               if (prev_idx_match_malloced)
1430                 re_free (prev_idx_match);
1431               free_fail_stack_return (fs);
1432               return REG_ESPACE;
1433             }
1434           if (fs)
1435             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1436                                        &eps_via_nodes);
1437           else
1438             {
1439               re_node_set_free (&eps_via_nodes);
1440               if (prev_idx_match_malloced)
1441                 re_free (prev_idx_match);
1442               return REG_NOMATCH;
1443             }
1444         }
1445     }
1446   re_node_set_free (&eps_via_nodes);
1447   if (prev_idx_match_malloced)
1448     re_free (prev_idx_match);
1449   return free_fail_stack_return (fs);
1450 }
1451
1452 static reg_errcode_t
1453 internal_function
1454 free_fail_stack_return (struct re_fail_stack_t *fs)
1455 {
1456   if (fs)
1457     {
1458       int fs_idx;
1459       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1460         {
1461           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1462           re_free (fs->stack[fs_idx].regs);
1463         }
1464       re_free (fs->stack);
1465     }
1466   return REG_NOERROR;
1467 }
1468
1469 static void
1470 internal_function
1471 update_regs (re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match,
1472              int cur_node, int cur_idx, int nmatch)
1473 {
1474   int type = dfa->nodes[cur_node].type;
1475   if (type == OP_OPEN_SUBEXP)
1476     {
1477       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1478
1479       /* We are at the first node of this sub expression.  */
1480       if (reg_num < nmatch)
1481         {
1482           pmatch[reg_num].rm_so = cur_idx;
1483           pmatch[reg_num].rm_eo = -1;
1484         }
1485     }
1486   else if (type == OP_CLOSE_SUBEXP)
1487     {
1488       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1489       if (reg_num < nmatch)
1490         {
1491           /* We are at the last node of this sub expression.  */
1492           if (pmatch[reg_num].rm_so < cur_idx)
1493             {
1494               pmatch[reg_num].rm_eo = cur_idx;
1495               /* This is a non-empty match or we are not inside an optional
1496                  subexpression.  Accept this right away.  */
1497               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1498             }
1499           else
1500             {
1501               if (dfa->nodes[cur_node].opt_subexp
1502                   && prev_idx_match[reg_num].rm_so != -1)
1503                 /* We transited through an empty match for an optional
1504                    subexpression, like (a?)*, and this is not the subexp's
1505                    first match.  Copy back the old content of the registers
1506                    so that matches of an inner subexpression are undone as
1507                    well, like in ((a?))*.  */
1508                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1509               else
1510                 /* We completed a subexpression, but it may be part of
1511                    an optional one, so do not update PREV_IDX_MATCH.  */
1512                 pmatch[reg_num].rm_eo = cur_idx;
1513             }
1514         }
1515     }
1516 }
1517
1518 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1519    and sift the nodes in each states according to the following rules.
1520    Updated state_log will be wrote to STATE_LOG.
1521
1522    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1523      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1524         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1525         the LAST_NODE, we throw away the node `a'.
1526      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1527         string `s' and transit to `b':
1528         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1529            away the node `a'.
1530         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1531             thrown away, we throw away the node `a'.
1532      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1533         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1534            node `a'.
1535         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1536             we throw away the node `a'.  */
1537
1538 #define STATE_NODE_CONTAINS(state,node) \
1539   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1540
1541 static reg_errcode_t
1542 internal_function
1543 sift_states_backward (re_match_context_t *mctx, re_sift_context_t *sctx)
1544 {
1545   reg_errcode_t err;
1546   int null_cnt = 0;
1547   int str_idx = sctx->last_str_idx;
1548   re_node_set cur_dest;
1549
1550 #ifdef DEBUG
1551   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1552 #endif
1553
1554   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1555      transit to the last_node and the last_node itself.  */
1556   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1557   if (BE (err != REG_NOERROR, 0))
1558     return err;
1559   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1560   if (BE (err != REG_NOERROR, 0))
1561     goto free_return;
1562
1563   /* Then check each states in the state_log.  */
1564   while (str_idx > 0)
1565     {
1566       /* Update counters.  */
1567       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1568       if (null_cnt > mctx->max_mb_elem_len)
1569         {
1570           memset (sctx->sifted_states, '\0',
1571                   sizeof (re_dfastate_t *) * str_idx);
1572           re_node_set_free (&cur_dest);
1573           return REG_NOERROR;
1574         }
1575       re_node_set_empty (&cur_dest);
1576       --str_idx;
1577
1578       if (mctx->state_log[str_idx])
1579         {
1580           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1581           if (BE (err != REG_NOERROR, 0))
1582             goto free_return;
1583         }
1584
1585       /* Add all the nodes which satisfy the following conditions:
1586          - It can epsilon transit to a node in CUR_DEST.
1587          - It is in CUR_SRC.
1588          And update state_log.  */
1589       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1590       if (BE (err != REG_NOERROR, 0))
1591         goto free_return;
1592     }
1593   err = REG_NOERROR;
1594  free_return:
1595   re_node_set_free (&cur_dest);
1596   return err;
1597 }
1598
1599 static reg_errcode_t
1600 internal_function
1601 build_sifted_states (re_match_context_t *mctx, re_sift_context_t *sctx,
1602                      int str_idx, re_node_set *cur_dest)
1603 {
1604   re_dfa_t *const dfa = mctx->dfa;
1605   re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1606   int i;
1607
1608   /* Then build the next sifted state.
1609      We build the next sifted state on `cur_dest', and update
1610      `sifted_states[str_idx]' with `cur_dest'.
1611      Note:
1612      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1613      `cur_src' points the node_set of the old `state_log[str_idx]'
1614      (with the epsilon nodes pre-filtered out).  */
1615   for (i = 0; i < cur_src->nelem; i++)
1616     {
1617       int prev_node = cur_src->elems[i];
1618       int naccepted = 0;
1619       int ret;
1620
1621 #ifdef DEBUG
1622       re_token_type_t type = dfa->nodes[prev_node].type;
1623       assert (!IS_EPSILON_NODE (type));
1624 #endif
1625 #ifdef RE_ENABLE_I18N
1626       /* If the node may accept `multi byte'.  */
1627       if (dfa->nodes[prev_node].accept_mb)
1628         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1629                                          str_idx, sctx->last_str_idx);
1630 #endif /* RE_ENABLE_I18N */
1631
1632       /* We don't check backreferences here.
1633          See update_cur_sifted_state().  */
1634       if (!naccepted
1635           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1636           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1637                                   dfa->nexts[prev_node]))
1638         naccepted = 1;
1639
1640       if (naccepted == 0)
1641         continue;
1642
1643       if (sctx->limits.nelem)
1644         {
1645           int to_idx = str_idx + naccepted;
1646           if (check_dst_limits (mctx, &sctx->limits,
1647                                 dfa->nexts[prev_node], to_idx,
1648                                 prev_node, str_idx))
1649             continue;
1650         }
1651       ret = re_node_set_insert (cur_dest, prev_node);
1652       if (BE (ret == -1, 0))
1653         return REG_ESPACE;
1654     }
1655
1656   return REG_NOERROR;
1657 }
1658
1659 /* Helper functions.  */
1660
1661 static reg_errcode_t
1662 internal_function
1663 clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
1664 {
1665   int top = mctx->state_log_top;
1666
1667   if (next_state_log_idx >= mctx->input.bufs_len
1668       || (next_state_log_idx >= mctx->input.valid_len
1669           && mctx->input.valid_len < mctx->input.len))
1670     {
1671       reg_errcode_t err;
1672       err = extend_buffers (mctx);
1673       if (BE (err != REG_NOERROR, 0))
1674         return err;
1675     }
1676
1677   if (top < next_state_log_idx)
1678     {
1679       memset (mctx->state_log + top + 1, '\0',
1680               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1681       mctx->state_log_top = next_state_log_idx;
1682     }
1683   return REG_NOERROR;
1684 }
1685
1686 static reg_errcode_t
1687 internal_function
1688 merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src,
1689                    int num)
1690 {
1691   int st_idx;
1692   reg_errcode_t err;
1693   for (st_idx = 0; st_idx < num; ++st_idx)
1694     {
1695       if (dst[st_idx] == NULL)
1696         dst[st_idx] = src[st_idx];
1697       else if (src[st_idx] != NULL)
1698         {
1699           re_node_set merged_set;
1700           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1701                                         &src[st_idx]->nodes);
1702           if (BE (err != REG_NOERROR, 0))
1703             return err;
1704           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1705           re_node_set_free (&merged_set);
1706           if (BE (err != REG_NOERROR, 0))
1707             return err;
1708         }
1709     }
1710   return REG_NOERROR;
1711 }
1712
1713 static reg_errcode_t
1714 internal_function
1715 update_cur_sifted_state (re_match_context_t *mctx, re_sift_context_t *sctx,
1716                          int str_idx, re_node_set *dest_nodes)
1717 {
1718   re_dfa_t *const dfa = mctx->dfa;
1719   reg_errcode_t err;
1720   const re_node_set *candidates;
1721   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1722                 : &mctx->state_log[str_idx]->nodes);
1723
1724   if (dest_nodes->nelem == 0)
1725     sctx->sifted_states[str_idx] = NULL;
1726   else
1727     {
1728       if (candidates)
1729         {
1730           /* At first, add the nodes which can epsilon transit to a node in
1731              DEST_NODE.  */
1732           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1733           if (BE (err != REG_NOERROR, 0))
1734             return err;
1735
1736           /* Then, check the limitations in the current sift_context.  */
1737           if (sctx->limits.nelem)
1738             {
1739               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1740                                          mctx->bkref_ents, str_idx);
1741               if (BE (err != REG_NOERROR, 0))
1742                 return err;
1743             }
1744         }
1745
1746       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1747       if (BE (err != REG_NOERROR, 0))
1748         return err;
1749     }
1750
1751   if (candidates && mctx->state_log[str_idx]->has_backref)
1752     {
1753       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1754       if (BE (err != REG_NOERROR, 0))
1755         return err;
1756     }
1757   return REG_NOERROR;
1758 }
1759
1760 static reg_errcode_t
1761 internal_function
1762 add_epsilon_src_nodes (re_dfa_t *dfa, re_node_set *dest_nodes,
1763                        const re_node_set *candidates)
1764 {
1765   reg_errcode_t err = REG_NOERROR;
1766   int i;
1767
1768   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1769   if (BE (err != REG_NOERROR, 0))
1770     return err;
1771
1772   if (!state->inveclosure.alloc)
1773     {
1774       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1775       if (BE (err != REG_NOERROR, 0))
1776         return REG_ESPACE;
1777       for (i = 0; i < dest_nodes->nelem; i++)
1778         re_node_set_merge (&state->inveclosure,
1779                            dfa->inveclosures + dest_nodes->elems[i]);
1780     }
1781   return re_node_set_add_intersect (dest_nodes, candidates,
1782                                     &state->inveclosure);
1783 }
1784
1785 static reg_errcode_t
1786 internal_function
1787 sub_epsilon_src_nodes (re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1788                        const re_node_set *candidates)
1789 {
1790     int ecl_idx;
1791     reg_errcode_t err;
1792     re_node_set *inv_eclosure = dfa->inveclosures + node;
1793     re_node_set except_nodes;
1794     re_node_set_init_empty (&except_nodes);
1795     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1796       {
1797         int cur_node = inv_eclosure->elems[ecl_idx];
1798         if (cur_node == node)
1799           continue;
1800         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1801           {
1802             int edst1 = dfa->edests[cur_node].elems[0];
1803             int edst2 = ((dfa->edests[cur_node].nelem > 1)
1804                          ? dfa->edests[cur_node].elems[1] : -1);
1805             if ((!re_node_set_contains (inv_eclosure, edst1)
1806                  && re_node_set_contains (dest_nodes, edst1))
1807                 || (edst2 > 0
1808                     && !re_node_set_contains (inv_eclosure, edst2)
1809                     && re_node_set_contains (dest_nodes, edst2)))
1810               {
1811                 err = re_node_set_add_intersect (&except_nodes, candidates,
1812                                                  dfa->inveclosures + cur_node);
1813                 if (BE (err != REG_NOERROR, 0))
1814                   {
1815                     re_node_set_free (&except_nodes);
1816                     return err;
1817                   }
1818               }
1819           }
1820       }
1821     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1822       {
1823         int cur_node = inv_eclosure->elems[ecl_idx];
1824         if (!re_node_set_contains (&except_nodes, cur_node))
1825           {
1826             int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1827             re_node_set_remove_at (dest_nodes, idx);
1828           }
1829       }
1830     re_node_set_free (&except_nodes);
1831     return REG_NOERROR;
1832 }
1833
1834 static int
1835 internal_function
1836 check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
1837                   int dst_node, int dst_idx, int src_node, int src_idx)
1838 {
1839   re_dfa_t *const dfa = mctx->dfa;
1840   int lim_idx, src_pos, dst_pos;
1841
1842   int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1843   int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1844   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1845     {
1846       int subexp_idx;
1847       struct re_backref_cache_entry *ent;
1848       ent = mctx->bkref_ents + limits->elems[lim_idx];
1849       subexp_idx = dfa->nodes[ent->node].opr.idx;
1850
1851       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1852                                            subexp_idx, dst_node, dst_idx,
1853                                            dst_bkref_idx);
1854       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1855                                            subexp_idx, src_node, src_idx,
1856                                            src_bkref_idx);
1857
1858       /* In case of:
1859          <src> <dst> ( <subexp> )
1860          ( <subexp> ) <src> <dst>
1861          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1862       if (src_pos == dst_pos)
1863         continue; /* This is unrelated limitation.  */
1864       else
1865         return 1;
1866     }
1867   return 0;
1868 }
1869
1870 static int
1871 internal_function
1872 check_dst_limits_calc_pos_1 (re_match_context_t *mctx, int boundaries,
1873                              int subexp_idx, int from_node, int bkref_idx)
1874 {
1875   re_dfa_t *const dfa = mctx->dfa;
1876   re_node_set *eclosures = dfa->eclosures + from_node;
1877   int node_idx;
1878
1879   /* Else, we are on the boundary: examine the nodes on the epsilon
1880      closure.  */
1881   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1882     {
1883       int node = eclosures->elems[node_idx];
1884       switch (dfa->nodes[node].type)
1885         {
1886         case OP_BACK_REF:
1887           if (bkref_idx != -1)
1888             {
1889               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1890               do
1891                 {
1892                   int dst, cpos;
1893
1894                   if (ent->node != node)
1895                     continue;
1896
1897                   if (subexp_idx
1898                       < CHAR_BIT * sizeof ent->eps_reachable_subexps_map
1899                       && !(ent->eps_reachable_subexps_map & (1u << subexp_idx)))
1900                     continue;
1901
1902                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
1903                      OP_CLOSE_SUBEXP cases below.  But, if the
1904                      destination node is the same node as the source
1905                      node, don't recurse because it would cause an
1906                      infinite loop: a regex that exhibits this behavior
1907                      is ()\1*\1*  */
1908                   dst = dfa->edests[node].elems[0];
1909                   if (dst == from_node)
1910                     {
1911                       if (boundaries & 1)
1912                         return -1;
1913                       else /* if (boundaries & 2) */
1914                         return 0;
1915                     }
1916
1917                   cpos =
1918                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1919                                                  dst, bkref_idx);
1920                   if (cpos == -1 /* && (boundaries & 1) */)
1921                     return -1;
1922                   if (cpos == 0 && (boundaries & 2))
1923                     return 0;
1924
1925                   if (subexp_idx
1926                       < CHAR_BIT * sizeof ent->eps_reachable_subexps_map)
1927                     ent->eps_reachable_subexps_map &= ~(1u << subexp_idx);
1928                 }
1929               while (ent++->more);
1930             }
1931           break;
1932
1933         case OP_OPEN_SUBEXP:
1934           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1935             return -1;
1936           break;
1937
1938         case OP_CLOSE_SUBEXP:
1939           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1940             return 0;
1941           break;
1942
1943         default:
1944             break;
1945         }
1946     }
1947
1948   return (boundaries & 2) ? 1 : 0;
1949 }
1950
1951 static int
1952 internal_function
1953 check_dst_limits_calc_pos (re_match_context_t *mctx, int limit, int subexp_idx,
1954                            int from_node, int str_idx, int bkref_idx)
1955 {
1956   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1957   int boundaries;
1958
1959   /* If we are outside the range of the subexpression, return -1 or 1.  */
1960   if (str_idx < lim->subexp_from)
1961     return -1;
1962
1963   if (lim->subexp_to < str_idx)
1964     return 1;
1965
1966   /* If we are within the subexpression, return 0.  */
1967   boundaries = (str_idx == lim->subexp_from);
1968   boundaries |= (str_idx == lim->subexp_to) << 1;
1969   if (boundaries == 0)
1970     return 0;
1971
1972   /* Else, examine epsilon closure.  */
1973   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1974                                       from_node, bkref_idx);
1975 }
1976
1977 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1978    which are against limitations from DEST_NODES. */
1979
1980 static reg_errcode_t
1981 internal_function
1982 check_subexp_limits (re_dfa_t *dfa, re_node_set *dest_nodes,
1983                      const re_node_set *candidates, re_node_set *limits,
1984                      struct re_backref_cache_entry *bkref_ents, int str_idx)
1985 {
1986   reg_errcode_t err;
1987   int node_idx, lim_idx;
1988
1989   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1990     {
1991       int subexp_idx;
1992       struct re_backref_cache_entry *ent;
1993       ent = bkref_ents + limits->elems[lim_idx];
1994
1995       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1996         continue; /* This is unrelated limitation.  */
1997
1998       subexp_idx = dfa->nodes[ent->node].opr.idx;
1999       if (ent->subexp_to == str_idx)
2000         {
2001           int ops_node = -1;
2002           int cls_node = -1;
2003           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2004             {
2005               int node = dest_nodes->elems[node_idx];
2006               re_token_type_t type = dfa->nodes[node].type;
2007               if (type == OP_OPEN_SUBEXP
2008                   && subexp_idx == dfa->nodes[node].opr.idx)
2009                 ops_node = node;
2010               else if (type == OP_CLOSE_SUBEXP
2011                        && subexp_idx == dfa->nodes[node].opr.idx)
2012                 cls_node = node;
2013             }
2014
2015           /* Check the limitation of the open subexpression.  */
2016           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2017           if (ops_node >= 0)
2018             {
2019               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2020                                            candidates);
2021               if (BE (err != REG_NOERROR, 0))
2022                 return err;
2023             }
2024
2025           /* Check the limitation of the close subexpression.  */
2026           if (cls_node >= 0)
2027             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2028               {
2029                 int node = dest_nodes->elems[node_idx];
2030                 if (!re_node_set_contains (dfa->inveclosures + node,
2031                                            cls_node)
2032                     && !re_node_set_contains (dfa->eclosures + node,
2033                                               cls_node))
2034                   {
2035                     /* It is against this limitation.
2036                        Remove it form the current sifted state.  */
2037                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2038                                                  candidates);
2039                     if (BE (err != REG_NOERROR, 0))
2040                       return err;
2041                     --node_idx;
2042                   }
2043               }
2044         }
2045       else /* (ent->subexp_to != str_idx)  */
2046         {
2047           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2048             {
2049               int node = dest_nodes->elems[node_idx];
2050               re_token_type_t type = dfa->nodes[node].type;
2051               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2052                 {
2053                   if (subexp_idx != dfa->nodes[node].opr.idx)
2054                     continue;
2055                   /* It is against this limitation.
2056                      Remove it form the current sifted state.  */
2057                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2058                                                candidates);
2059                   if (BE (err != REG_NOERROR, 0))
2060                     return err;
2061                 }
2062             }
2063         }
2064     }
2065   return REG_NOERROR;
2066 }
2067
2068 static reg_errcode_t
2069 internal_function
2070 sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx,
2071                    int str_idx, const re_node_set *candidates)
2072 {
2073   re_dfa_t *const dfa = mctx->dfa;
2074   reg_errcode_t err;
2075   int node_idx, node;
2076   re_sift_context_t local_sctx;
2077   int first_idx = search_cur_bkref_entry (mctx, str_idx);
2078
2079   if (first_idx == -1)
2080     return REG_NOERROR;
2081
2082   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2083
2084   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2085     {
2086       int enabled_idx;
2087       re_token_type_t type;
2088       struct re_backref_cache_entry *entry;
2089       node = candidates->elems[node_idx];
2090       type = dfa->nodes[node].type;
2091       /* Avoid infinite loop for the REs like "()\1+".  */
2092       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2093         continue;
2094       if (type != OP_BACK_REF)
2095         continue;
2096
2097       entry = mctx->bkref_ents + first_idx;
2098       enabled_idx = first_idx;
2099       do
2100         {
2101           int subexp_len, to_idx, dst_node, ret;
2102           re_dfastate_t *cur_state;
2103
2104           if (entry->node != node)
2105             continue;
2106           subexp_len = entry->subexp_to - entry->subexp_from;
2107           to_idx = str_idx + subexp_len;
2108           dst_node = (subexp_len ? dfa->nexts[node]
2109                       : dfa->edests[node].elems[0]);
2110
2111           if (to_idx > sctx->last_str_idx
2112               || sctx->sifted_states[to_idx] == NULL
2113               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2114               || check_dst_limits (mctx, &sctx->limits, node,
2115                                    str_idx, dst_node, to_idx))
2116             continue;
2117
2118           if (local_sctx.sifted_states == NULL)
2119             {
2120               local_sctx = *sctx;
2121               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2122               if (BE (err != REG_NOERROR, 0))
2123                 goto free_return;
2124             }
2125           local_sctx.last_node = node;
2126           local_sctx.last_str_idx = str_idx;
2127           ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2128           if (BE (ret < 0, 0))
2129             {
2130               err = REG_ESPACE;
2131               goto free_return;
2132             }
2133           cur_state = local_sctx.sifted_states[str_idx];
2134           err = sift_states_backward (mctx, &local_sctx);
2135           if (BE (err != REG_NOERROR, 0))
2136             goto free_return;
2137           if (sctx->limited_states != NULL)
2138             {
2139               err = merge_state_array (dfa, sctx->limited_states,
2140                                        local_sctx.sifted_states,
2141                                        str_idx + 1);
2142               if (BE (err != REG_NOERROR, 0))
2143                 goto free_return;
2144             }
2145           local_sctx.sifted_states[str_idx] = cur_state;
2146           re_node_set_remove (&local_sctx.limits, enabled_idx);
2147
2148           /* mctx->bkref_ents may have changed, reload the pointer.  */
2149           entry = mctx->bkref_ents + enabled_idx;
2150         }
2151       while (enabled_idx++, entry++->more);
2152     }
2153   err = REG_NOERROR;
2154  free_return:
2155   if (local_sctx.sifted_states != NULL)
2156     {
2157       re_node_set_free (&local_sctx.limits);
2158     }
2159
2160   return err;
2161 }
2162
2163
2164 #ifdef RE_ENABLE_I18N
2165 static int
2166 internal_function
2167 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2168                      int node_idx, int str_idx, int max_str_idx)
2169 {
2170   re_dfa_t *const dfa = mctx->dfa;
2171   int naccepted;
2172   /* Check the node can accept `multi byte'.  */
2173   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2174   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2175       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2176                             dfa->nexts[node_idx]))
2177     /* The node can't accept the `multi byte', or the
2178        destination was already thrown away, then the node
2179        could't accept the current input `multi byte'.   */
2180     naccepted = 0;
2181   /* Otherwise, it is sure that the node could accept
2182      `naccepted' bytes input.  */
2183   return naccepted;
2184 }
2185 #endif /* RE_ENABLE_I18N */
2186
2187 \f
2188 /* Functions for state transition.  */
2189
2190 /* Return the next state to which the current state STATE will transit by
2191    accepting the current input byte, and update STATE_LOG if necessary.
2192    If STATE can accept a multibyte char/collating element/back reference
2193    update the destination of STATE_LOG.  */
2194
2195 static re_dfastate_t *
2196 internal_function
2197 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2198                re_dfastate_t *state)
2199 {
2200   re_dfastate_t **trtable;
2201   unsigned char ch;
2202
2203 #ifdef RE_ENABLE_I18N
2204   /* If the current state can accept multibyte.  */
2205   if (BE (state->accept_mb, 0))
2206     {
2207       *err = transit_state_mb (mctx, state);
2208       if (BE (*err != REG_NOERROR, 0))
2209         return NULL;
2210     }
2211 #endif /* RE_ENABLE_I18N */
2212
2213   /* Then decide the next state with the single byte.  */
2214 #if 0
2215   if (0)
2216     /* don't use transition table  */
2217     return transit_state_sb (err, mctx, state);
2218 #endif
2219
2220   /* Use transition table  */
2221   ch = re_string_fetch_byte (&mctx->input);
2222   for (;;)
2223     {
2224       trtable = state->trtable;
2225       if (BE (trtable != NULL, 1))
2226         return trtable[ch];
2227
2228       trtable = state->word_trtable;
2229       if (BE (trtable != NULL, 1))
2230         {
2231           unsigned int context;
2232           context
2233             = re_string_context_at (&mctx->input,
2234                                     re_string_cur_idx (&mctx->input) - 1,
2235                                     mctx->eflags);
2236           if (IS_WORD_CONTEXT (context))
2237             return trtable[ch + SBC_MAX];
2238           else
2239             return trtable[ch];
2240         }
2241
2242       if (!build_trtable (mctx->dfa, state))
2243         {
2244           *err = REG_ESPACE;
2245           return NULL;
2246         }
2247
2248       /* Retry, we now have a transition table.  */
2249     }
2250 }
2251
2252 /* Update the state_log if we need */
2253 re_dfastate_t *
2254 internal_function
2255 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2256                       re_dfastate_t *next_state)
2257 {
2258   re_dfa_t *const dfa = mctx->dfa;
2259   int cur_idx = re_string_cur_idx (&mctx->input);
2260
2261   if (cur_idx > mctx->state_log_top)
2262     {
2263       mctx->state_log[cur_idx] = next_state;
2264       mctx->state_log_top = cur_idx;
2265     }
2266   else if (mctx->state_log[cur_idx] == 0)
2267     {
2268       mctx->state_log[cur_idx] = next_state;
2269     }
2270   else
2271     {
2272       re_dfastate_t *pstate;
2273       unsigned int context;
2274       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2275       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2276          the destination of a multibyte char/collating element/
2277          back reference.  Then the next state is the union set of
2278          these destinations and the results of the transition table.  */
2279       pstate = mctx->state_log[cur_idx];
2280       log_nodes = pstate->entrance_nodes;
2281       if (next_state != NULL)
2282         {
2283           table_nodes = next_state->entrance_nodes;
2284           *err = re_node_set_init_union (&next_nodes, table_nodes,
2285                                              log_nodes);
2286           if (BE (*err != REG_NOERROR, 0))
2287             return NULL;
2288         }
2289       else
2290         next_nodes = *log_nodes;
2291       /* Note: We already add the nodes of the initial state,
2292          then we don't need to add them here.  */
2293
2294       context = re_string_context_at (&mctx->input,
2295                                       re_string_cur_idx (&mctx->input) - 1,
2296                                       mctx->eflags);
2297       next_state = mctx->state_log[cur_idx]
2298         = re_acquire_state_context (err, dfa, &next_nodes, context);
2299       /* We don't need to check errors here, since the return value of
2300          this function is next_state and ERR is already set.  */
2301
2302       if (table_nodes != NULL)
2303         re_node_set_free (&next_nodes);
2304     }
2305
2306   if (BE (dfa->nbackref, 0) && next_state != NULL)
2307     {
2308       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2309          later.  We must check them here, since the back references in the
2310          next state might use them.  */
2311       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2312                                         cur_idx);
2313       if (BE (*err != REG_NOERROR, 0))
2314         return NULL;
2315
2316       /* If the next state has back references.  */
2317       if (next_state->has_backref)
2318         {
2319           *err = transit_state_bkref (mctx, &next_state->nodes);
2320           if (BE (*err != REG_NOERROR, 0))
2321             return NULL;
2322           next_state = mctx->state_log[cur_idx];
2323         }
2324     }
2325
2326   return next_state;
2327 }
2328
2329 /* Skip bytes in the input that correspond to part of a
2330    multi-byte match, then look in the log for a state
2331    from which to restart matching.  */
2332 static re_dfastate_t *
2333 internal_function
2334 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2335 {
2336   re_dfastate_t *cur_state = NULL;
2337   do
2338     {
2339       int max = mctx->state_log_top;
2340       int cur_str_idx = re_string_cur_idx (&mctx->input);
2341
2342       do
2343         {
2344           if (++cur_str_idx > max)
2345             return NULL;
2346           re_string_skip_bytes (&mctx->input, 1);
2347         }
2348       while (mctx->state_log[cur_str_idx] == NULL);
2349
2350       cur_state = merge_state_with_log (err, mctx, NULL);
2351     }
2352   while (*err == REG_NOERROR && cur_state == NULL);
2353   return cur_state;
2354 }
2355
2356 /* Helper functions for transit_state.  */
2357
2358 /* From the node set CUR_NODES, pick up the nodes whose types are
2359    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2360    expression. And register them to use them later for evaluating the
2361    correspoding back references.  */
2362
2363 static reg_errcode_t
2364 internal_function
2365 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2366                            int str_idx)
2367 {
2368   re_dfa_t *const dfa = mctx->dfa;
2369   int node_idx;
2370   reg_errcode_t err;
2371
2372   /* TODO: This isn't efficient.
2373            Because there might be more than one nodes whose types are
2374            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2375            nodes.
2376            E.g. RE: (a){2}  */
2377   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2378     {
2379       int node = cur_nodes->elems[node_idx];
2380       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2381           && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map
2382           && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx))
2383         {
2384           err = match_ctx_add_subtop (mctx, node, str_idx);
2385           if (BE (err != REG_NOERROR, 0))
2386             return err;
2387         }
2388     }
2389   return REG_NOERROR;
2390 }
2391
2392 #if 0
2393 /* Return the next state to which the current state STATE will transit by
2394    accepting the current input byte.  */
2395
2396 static re_dfastate_t *
2397 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2398                   re_dfastate_t *state)
2399 {
2400   re_dfa_t *const dfa = mctx->dfa;
2401   re_node_set next_nodes;
2402   re_dfastate_t *next_state;
2403   int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2404   unsigned int context;
2405
2406   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2407   if (BE (*err != REG_NOERROR, 0))
2408     return NULL;
2409   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2410     {
2411       int cur_node = state->nodes.elems[node_cnt];
2412       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2413         {
2414           *err = re_node_set_merge (&next_nodes,
2415                                     dfa->eclosures + dfa->nexts[cur_node]);
2416           if (BE (*err != REG_NOERROR, 0))
2417             {
2418               re_node_set_free (&next_nodes);
2419               return NULL;
2420             }
2421         }
2422     }
2423   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2424   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2425   /* We don't need to check errors here, since the return value of
2426      this function is next_state and ERR is already set.  */
2427
2428   re_node_set_free (&next_nodes);
2429   re_string_skip_bytes (&mctx->input, 1);
2430   return next_state;
2431 }
2432 #endif
2433
2434 #ifdef RE_ENABLE_I18N
2435 static reg_errcode_t
2436 internal_function
2437 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2438 {
2439   re_dfa_t *const dfa = mctx->dfa;
2440   reg_errcode_t err;
2441   int i;
2442
2443   for (i = 0; i < pstate->nodes.nelem; ++i)
2444     {
2445       re_node_set dest_nodes, *new_nodes;
2446       int cur_node_idx = pstate->nodes.elems[i];
2447       int naccepted, dest_idx;
2448       unsigned int context;
2449       re_dfastate_t *dest_state;
2450
2451       if (!dfa->nodes[cur_node_idx].accept_mb)
2452         continue;
2453
2454       if (dfa->nodes[cur_node_idx].constraint)
2455         {
2456           context = re_string_context_at (&mctx->input,
2457                                           re_string_cur_idx (&mctx->input),
2458                                           mctx->eflags);
2459           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2460                                            context))
2461             continue;
2462         }
2463
2464       /* How many bytes the node can accept?  */
2465       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2466                                            re_string_cur_idx (&mctx->input));
2467       if (naccepted == 0)
2468         continue;
2469
2470       /* The node can accepts `naccepted' bytes.  */
2471       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2472       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2473                                : mctx->max_mb_elem_len);
2474       err = clean_state_log_if_needed (mctx, dest_idx);
2475       if (BE (err != REG_NOERROR, 0))
2476         return err;
2477 #ifdef DEBUG
2478       assert (dfa->nexts[cur_node_idx] != -1);
2479 #endif
2480       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2481
2482       dest_state = mctx->state_log[dest_idx];
2483       if (dest_state == NULL)
2484         dest_nodes = *new_nodes;
2485       else
2486         {
2487           err = re_node_set_init_union (&dest_nodes,
2488                                         dest_state->entrance_nodes, new_nodes);
2489           if (BE (err != REG_NOERROR, 0))
2490             return err;
2491         }
2492       context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
2493       mctx->state_log[dest_idx]
2494         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2495       if (dest_state != NULL)
2496         re_node_set_free (&dest_nodes);
2497       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2498         return err;
2499     }
2500   return REG_NOERROR;
2501 }
2502 #endif /* RE_ENABLE_I18N */
2503
2504 static reg_errcode_t
2505 internal_function
2506 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2507 {
2508   re_dfa_t *const dfa = mctx->dfa;
2509   reg_errcode_t err;
2510   int i;
2511   int cur_str_idx = re_string_cur_idx (&mctx->input);
2512
2513   for (i = 0; i < nodes->nelem; ++i)
2514     {
2515       int dest_str_idx, prev_nelem, bkc_idx;
2516       int node_idx = nodes->elems[i];
2517       unsigned int context;
2518       const re_token_t *node = dfa->nodes + node_idx;
2519       re_node_set *new_dest_nodes;
2520
2521       /* Check whether `node' is a backreference or not.  */
2522       if (node->type != OP_BACK_REF)
2523         continue;
2524
2525       if (node->constraint)
2526         {
2527           context = re_string_context_at (&mctx->input, cur_str_idx,
2528                                           mctx->eflags);
2529           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2530             continue;
2531         }
2532
2533       /* `node' is a backreference.
2534          Check the substring which the substring matched.  */
2535       bkc_idx = mctx->nbkref_ents;
2536       err = get_subexp (mctx, node_idx, cur_str_idx);
2537       if (BE (err != REG_NOERROR, 0))
2538         goto free_return;
2539
2540       /* And add the epsilon closures (which is `new_dest_nodes') of
2541          the backreference to appropriate state_log.  */
2542 #ifdef DEBUG
2543       assert (dfa->nexts[node_idx] != -1);
2544 #endif
2545       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2546         {
2547           int subexp_len;
2548           re_dfastate_t *dest_state;
2549           struct re_backref_cache_entry *bkref_ent;
2550           bkref_ent = mctx->bkref_ents + bkc_idx;
2551           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2552             continue;
2553           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2554           new_dest_nodes = (subexp_len == 0
2555                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2556                             : dfa->eclosures + dfa->nexts[node_idx]);
2557           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2558                           - bkref_ent->subexp_from);
2559           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2560                                           mctx->eflags);
2561           dest_state = mctx->state_log[dest_str_idx];
2562           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2563                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2564           /* Add `new_dest_node' to state_log.  */
2565           if (dest_state == NULL)
2566             {
2567               mctx->state_log[dest_str_idx]
2568                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2569                                             context);
2570               if (BE (mctx->state_log[dest_str_idx] == NULL
2571                       && err != REG_NOERROR, 0))
2572                 goto free_return;
2573             }
2574           else
2575             {
2576               re_node_set dest_nodes;
2577               err = re_node_set_init_union (&dest_nodes,
2578                                             dest_state->entrance_nodes,
2579                                             new_dest_nodes);
2580               if (BE (err != REG_NOERROR, 0))
2581                 {
2582                   re_node_set_free (&dest_nodes);
2583                   goto free_return;
2584                 }
2585               mctx->state_log[dest_str_idx]
2586                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2587               re_node_set_free (&dest_nodes);
2588               if (BE (mctx->state_log[dest_str_idx] == NULL
2589                       && err != REG_NOERROR, 0))
2590                 goto free_return;
2591             }
2592           /* We need to check recursively if the backreference can epsilon
2593              transit.  */
2594           if (subexp_len == 0
2595               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2596             {
2597               err = check_subexp_matching_top (mctx, new_dest_nodes,
2598                                                cur_str_idx);
2599               if (BE (err != REG_NOERROR, 0))
2600                 goto free_return;
2601               err = transit_state_bkref (mctx, new_dest_nodes);
2602               if (BE (err != REG_NOERROR, 0))
2603                 goto free_return;
2604             }
2605         }
2606     }
2607   err = REG_NOERROR;
2608  free_return:
2609   return err;
2610 }
2611
2612 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2613    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2614    Note that we might collect inappropriate candidates here.
2615    However, the cost of checking them strictly here is too high, then we
2616    delay these checking for prune_impossible_nodes().  */
2617
2618 static reg_errcode_t
2619 internal_function
2620 get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
2621 {
2622   re_dfa_t *const dfa = mctx->dfa;
2623   int subexp_num, sub_top_idx;
2624   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2625   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2626   int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2627   if (cache_idx != -1)
2628     {
2629       const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2630       do
2631         if (entry->node == bkref_node)
2632           return REG_NOERROR; /* We already checked it.  */
2633       while (entry++->more);
2634     }
2635
2636   subexp_num = dfa->nodes[bkref_node].opr.idx;
2637
2638   /* For each sub expression  */
2639   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2640     {
2641       reg_errcode_t err;
2642       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2643       re_sub_match_last_t *sub_last;
2644       int sub_last_idx, sl_str, bkref_str_off;
2645
2646       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2647         continue; /* It isn't related.  */
2648
2649       sl_str = sub_top->str_idx;
2650       bkref_str_off = bkref_str_idx;
2651       /* At first, check the last node of sub expressions we already
2652          evaluated.  */
2653       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2654         {
2655           int sl_str_diff;
2656           sub_last = sub_top->lasts[sub_last_idx];
2657           sl_str_diff = sub_last->str_idx - sl_str;
2658           /* The matched string by the sub expression match with the substring
2659              at the back reference?  */
2660           if (sl_str_diff > 0)
2661             {
2662               if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2663                 {
2664                   /* Not enough chars for a successful match.  */
2665                   if (bkref_str_off + sl_str_diff > mctx->input.len)
2666                     break;
2667
2668                   err = clean_state_log_if_needed (mctx,
2669                                                    bkref_str_off
2670                                                    + sl_str_diff);
2671                   if (BE (err != REG_NOERROR, 0))
2672                     return err;
2673                   buf = (const char *) re_string_get_buffer (&mctx->input);
2674                 }
2675               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2676                 break; /* We don't need to search this sub expression any more.  */
2677             }
2678           bkref_str_off += sl_str_diff;
2679           sl_str += sl_str_diff;
2680           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2681                                 bkref_str_idx);
2682
2683           /* Reload buf, since the preceding call might have reallocated
2684              the buffer.  */
2685           buf = (const char *) re_string_get_buffer (&mctx->input);
2686
2687           if (err == REG_NOMATCH)
2688             continue;
2689           if (BE (err != REG_NOERROR, 0))
2690             return err;
2691         }
2692
2693       if (sub_last_idx < sub_top->nlasts)
2694         continue;
2695       if (sub_last_idx > 0)
2696         ++sl_str;
2697       /* Then, search for the other last nodes of the sub expression.  */
2698       for (; sl_str <= bkref_str_idx; ++sl_str)
2699         {
2700           int cls_node, sl_str_off;
2701           const re_node_set *nodes;
2702           sl_str_off = sl_str - sub_top->str_idx;
2703           /* The matched string by the sub expression match with the substring
2704              at the back reference?  */
2705           if (sl_str_off > 0)
2706             {
2707               if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2708                 {
2709                   /* If we are at the end of the input, we cannot match.  */
2710                   if (bkref_str_off >= mctx->input.len)
2711                     break;
2712
2713                   err = extend_buffers (mctx);
2714                   if (BE (err != REG_NOERROR, 0))
2715                     return err;
2716
2717                   buf = (const char *) re_string_get_buffer (&mctx->input);
2718                 }
2719               if (buf [bkref_str_off++] != buf[sl_str - 1])
2720                 break; /* We don't need to search this sub expression
2721                           any more.  */
2722             }
2723           if (mctx->state_log[sl_str] == NULL)
2724             continue;
2725           /* Does this state have a ')' of the sub expression?  */
2726           nodes = &mctx->state_log[sl_str]->nodes;
2727           cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2728           if (cls_node == -1)
2729             continue; /* No.  */
2730           if (sub_top->path == NULL)
2731             {
2732               sub_top->path = re_calloc (state_array_t,
2733                                          sl_str - sub_top->str_idx + 1);
2734               if (sub_top->path == NULL)
2735                 return REG_ESPACE;
2736             }
2737           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2738              in the current context?  */
2739           err = check_arrival (mctx, sub_top->path, sub_top->node,
2740                                sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2741           if (err == REG_NOMATCH)
2742               continue;
2743           if (BE (err != REG_NOERROR, 0))
2744               return err;
2745           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2746           if (BE (sub_last == NULL, 0))
2747             return REG_ESPACE;
2748           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2749                                 bkref_str_idx);
2750           if (err == REG_NOMATCH)
2751             continue;
2752         }
2753     }
2754   return REG_NOERROR;
2755 }
2756
2757 /* Helper functions for get_subexp().  */
2758
2759 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2760    If it can arrive, register the sub expression expressed with SUB_TOP
2761    and SUB_LAST.  */
2762
2763 static reg_errcode_t
2764 internal_function
2765 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2766                 re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
2767 {
2768   reg_errcode_t err;
2769   int to_idx;
2770   /* Can the subexpression arrive the back reference?  */
2771   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2772                        sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2773   if (err != REG_NOERROR)
2774     return err;
2775   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2776                              sub_last->str_idx);
2777   if (BE (err != REG_NOERROR, 0))
2778     return err;
2779   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2780   return clean_state_log_if_needed (mctx, to_idx);
2781 }
2782
2783 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2784    Search '(' if FL_OPEN, or search ')' otherwise.
2785    TODO: This function isn't efficient...
2786          Because there might be more than one nodes whose types are
2787          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2788          nodes.
2789          E.g. RE: (a){2}  */
2790
2791 static int
2792 internal_function
2793 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2794                   int subexp_idx, int type)
2795 {
2796   int cls_idx;
2797   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2798     {
2799       int cls_node = nodes->elems[cls_idx];
2800       const re_token_t *node = dfa->nodes + cls_node;
2801       if (node->type == type
2802           && node->opr.idx == subexp_idx)
2803         return cls_node;
2804     }
2805   return -1;
2806 }
2807
2808 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2809    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2810    heavily reused.
2811    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2812
2813 static reg_errcode_t
2814 internal_function
2815 check_arrival (re_match_context_t *mctx, state_array_t *path,
2816                int top_node, int top_str, int last_node, int last_str,
2817                int type)
2818 {
2819   re_dfa_t *const dfa = mctx->dfa;
2820   reg_errcode_t err;
2821   int subexp_num, backup_cur_idx, str_idx, null_cnt;
2822   re_dfastate_t *cur_state = NULL;
2823   re_node_set *cur_nodes, next_nodes;
2824   re_dfastate_t **backup_state_log;
2825   unsigned int context;
2826
2827   subexp_num = dfa->nodes[top_node].opr.idx;
2828   /* Extend the buffer if we need.  */
2829   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2830     {
2831       re_dfastate_t **new_array;
2832       int old_alloc = path->alloc;
2833       path->alloc += last_str + mctx->max_mb_elem_len + 1;
2834       new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2835       if (new_array == NULL)
2836         {
2837           path->alloc = old_alloc;
2838           return REG_ESPACE;
2839         }
2840       path->array = new_array;
2841       memset (new_array + old_alloc, '\0',
2842               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2843     }
2844
2845   str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2846
2847   /* Temporary modify MCTX.  */
2848   backup_state_log = mctx->state_log;
2849   backup_cur_idx = mctx->input.cur_idx;
2850   mctx->state_log = path->array;
2851   mctx->input.cur_idx = str_idx;
2852
2853   /* Setup initial node set.  */
2854   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2855   if (str_idx == top_str)
2856     {
2857       err = re_node_set_init_1 (&next_nodes, top_node);
2858       if (BE (err != REG_NOERROR, 0))
2859         return err;
2860       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2861       if (BE (err != REG_NOERROR, 0))
2862         {
2863           re_node_set_free (&next_nodes);
2864           return err;
2865         }
2866     }
2867   else
2868     {
2869       cur_state = mctx->state_log[str_idx];
2870       if (cur_state && cur_state->has_backref)
2871         {
2872           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2873           if (BE ( err != REG_NOERROR, 0))
2874             return err;
2875         }
2876       else
2877         re_node_set_init_empty (&next_nodes);
2878     }
2879   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2880     {
2881       if (next_nodes.nelem)
2882         {
2883           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2884                                     subexp_num, type);
2885           if (BE ( err != REG_NOERROR, 0))
2886             {
2887               re_node_set_free (&next_nodes);
2888               return err;
2889             }
2890         }
2891       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2892       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2893         {
2894           re_node_set_free (&next_nodes);
2895           return err;
2896         }
2897       mctx->state_log[str_idx] = cur_state;
2898     }
2899
2900   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2901     {
2902       re_node_set_empty (&next_nodes);
2903       if (mctx->state_log[str_idx + 1])
2904         {
2905           err = re_node_set_merge (&next_nodes,
2906                                    &mctx->state_log[str_idx + 1]->nodes);
2907           if (BE (err != REG_NOERROR, 0))
2908             {
2909               re_node_set_free (&next_nodes);
2910               return err;
2911             }
2912         }
2913       if (cur_state)
2914         {
2915           err = check_arrival_add_next_nodes (mctx, str_idx,
2916                                               &cur_state->non_eps_nodes, &next_nodes);
2917           if (BE (err != REG_NOERROR, 0))
2918             {
2919               re_node_set_free (&next_nodes);
2920               return err;
2921             }
2922         }
2923       ++str_idx;
2924       if (next_nodes.nelem)
2925         {
2926           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2927           if (BE (err != REG_NOERROR, 0))
2928             {
2929               re_node_set_free (&next_nodes);
2930               return err;
2931             }
2932           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2933                                     subexp_num, type);
2934           if (BE ( err != REG_NOERROR, 0))
2935             {
2936               re_node_set_free (&next_nodes);
2937               return err;
2938             }
2939         }
2940       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2941       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2942       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2943         {
2944           re_node_set_free (&next_nodes);
2945           return err;
2946         }
2947       mctx->state_log[str_idx] = cur_state;
2948       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2949     }
2950   re_node_set_free (&next_nodes);
2951   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2952                : &mctx->state_log[last_str]->nodes);
2953   path->next_idx = str_idx;
2954
2955   /* Fix MCTX.  */
2956   mctx->state_log = backup_state_log;
2957   mctx->input.cur_idx = backup_cur_idx;
2958
2959   /* Then check the current node set has the node LAST_NODE.  */
2960   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2961     return REG_NOERROR;
2962
2963   return REG_NOMATCH;
2964 }
2965
2966 /* Helper functions for check_arrival.  */
2967
2968 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2969    to NEXT_NODES.
2970    TODO: This function is similar to the functions transit_state*(),
2971          however this function has many additional works.
2972          Can't we unify them?  */
2973
2974 static reg_errcode_t
2975 internal_function
2976 check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
2977                               re_node_set *cur_nodes,
2978                               re_node_set *next_nodes)
2979 {
2980   re_dfa_t *const dfa = mctx->dfa;
2981   int result;
2982   int cur_idx;
2983   reg_errcode_t err;
2984   re_node_set union_set;
2985   re_node_set_init_empty (&union_set);
2986   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2987     {
2988       int naccepted = 0;
2989       int cur_node = cur_nodes->elems[cur_idx];
2990 #ifdef DEBUG
2991       re_token_type_t type = dfa->nodes[cur_node].type;
2992       assert (!IS_EPSILON_NODE (type));
2993 #endif
2994 #ifdef RE_ENABLE_I18N
2995       /* If the node may accept `multi byte'.  */
2996       if (dfa->nodes[cur_node].accept_mb)
2997         {
2998           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
2999                                                str_idx);
3000           if (naccepted > 1)
3001             {
3002               re_dfastate_t *dest_state;
3003               int next_node = dfa->nexts[cur_node];
3004               int next_idx = str_idx + naccepted;
3005               dest_state = mctx->state_log[next_idx];
3006               re_node_set_empty (&union_set);
3007               if (dest_state)
3008                 {
3009                   err = re_node_set_merge (&union_set, &dest_state->nodes);
3010                   if (BE (err != REG_NOERROR, 0))
3011                     {
3012                       re_node_set_free (&union_set);
3013                       return err;
3014                     }
3015                 }
3016               result = re_node_set_insert (&union_set, next_node);
3017               if (BE (result < 0, 0))
3018                 {
3019                   re_node_set_free (&union_set);
3020                   return REG_ESPACE;
3021                 }
3022               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3023                                                             &union_set);
3024               if (BE (mctx->state_log[next_idx] == NULL
3025                       && err != REG_NOERROR, 0))
3026                 {
3027                   re_node_set_free (&union_set);
3028                   return err;
3029                 }
3030             }
3031         }
3032 #endif /* RE_ENABLE_I18N */
3033       if (naccepted
3034           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3035         {
3036           result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3037           if (BE (result < 0, 0))
3038             {
3039               re_node_set_free (&union_set);
3040               return REG_ESPACE;
3041             }
3042         }
3043     }
3044   re_node_set_free (&union_set);
3045   return REG_NOERROR;
3046 }
3047
3048 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3049    CUR_NODES, however exclude the nodes which are:
3050     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3051     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3052 */
3053
3054 static reg_errcode_t
3055 internal_function
3056 check_arrival_expand_ecl (re_dfa_t *dfa, re_node_set *cur_nodes,
3057                           int ex_subexp, int type)
3058 {
3059   reg_errcode_t err;
3060   int idx, outside_node;
3061   re_node_set new_nodes;
3062 #ifdef DEBUG
3063   assert (cur_nodes->nelem);
3064 #endif
3065   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3066   if (BE (err != REG_NOERROR, 0))
3067     return err;
3068   /* Create a new node set NEW_NODES with the nodes which are epsilon
3069      closures of the node in CUR_NODES.  */
3070
3071   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3072     {
3073       int cur_node = cur_nodes->elems[idx];
3074       re_node_set *eclosure = dfa->eclosures + cur_node;
3075       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3076       if (outside_node == -1)
3077         {
3078           /* There are no problematic nodes, just merge them.  */
3079           err = re_node_set_merge (&new_nodes, eclosure);
3080           if (BE (err != REG_NOERROR, 0))
3081             {
3082               re_node_set_free (&new_nodes);
3083               return err;
3084             }
3085         }
3086       else
3087         {
3088           /* There are problematic nodes, re-calculate incrementally.  */
3089           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3090                                               ex_subexp, type);
3091           if (BE (err != REG_NOERROR, 0))
3092             {
3093               re_node_set_free (&new_nodes);
3094               return err;
3095             }
3096         }
3097     }
3098   re_node_set_free (cur_nodes);
3099   *cur_nodes = new_nodes;
3100   return REG_NOERROR;
3101 }
3102
3103 /* Helper function for check_arrival_expand_ecl.
3104    Check incrementally the epsilon closure of TARGET, and if it isn't
3105    problematic append it to DST_NODES.  */
3106
3107 static reg_errcode_t
3108 internal_function
3109 check_arrival_expand_ecl_sub (re_dfa_t *dfa, re_node_set *dst_nodes,
3110                               int target, int ex_subexp, int type)
3111 {
3112   int cur_node;
3113   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3114     {
3115       int err;
3116
3117       if (dfa->nodes[cur_node].type == type
3118           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3119         {
3120           if (type == OP_CLOSE_SUBEXP)
3121             {
3122               err = re_node_set_insert (dst_nodes, cur_node);
3123               if (BE (err == -1, 0))
3124                 return REG_ESPACE;
3125             }
3126           break;
3127         }
3128       err = re_node_set_insert (dst_nodes, cur_node);
3129       if (BE (err == -1, 0))
3130         return REG_ESPACE;
3131       if (dfa->edests[cur_node].nelem == 0)
3132         break;
3133       if (dfa->edests[cur_node].nelem == 2)
3134         {
3135           reg_errcode_t ret =
3136             check_arrival_expand_ecl_sub (dfa, dst_nodes,
3137                                           dfa->edests[cur_node].elems[1],
3138                                           ex_subexp, type);
3139           if (BE (ret != REG_NOERROR, 0))
3140             return ret;
3141         }
3142       cur_node = dfa->edests[cur_node].elems[0];
3143     }
3144   return REG_NOERROR;
3145 }
3146
3147
3148 /* For all the back references in the current state, calculate the
3149    destination of the back references by the appropriate entry
3150    in MCTX->BKREF_ENTS.  */
3151
3152 static reg_errcode_t
3153 internal_function
3154 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3155                     int cur_str, int subexp_num, int type)
3156 {
3157   re_dfa_t *const dfa = mctx->dfa;
3158   reg_errcode_t err;
3159   int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3160   struct re_backref_cache_entry *ent;
3161
3162   if (cache_idx_start == -1)
3163     return REG_NOERROR;
3164
3165  restart:
3166   ent = mctx->bkref_ents + cache_idx_start;
3167   do
3168     {
3169       int to_idx, next_node;
3170
3171       /* Is this entry ENT is appropriate?  */
3172       if (!re_node_set_contains (cur_nodes, ent->node))
3173         continue; /* No.  */
3174
3175       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3176       /* Calculate the destination of the back reference, and append it
3177          to MCTX->STATE_LOG.  */
3178       if (to_idx == cur_str)
3179         {
3180           /* The backreference did epsilon transit, we must re-check all the
3181              node in the current state.  */
3182           re_node_set new_dests;
3183           reg_errcode_t err2, err3;
3184           next_node = dfa->edests[ent->node].elems[0];
3185           if (re_node_set_contains (cur_nodes, next_node))
3186             continue;
3187           err = re_node_set_init_1 (&new_dests, next_node);
3188           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3189           err3 = re_node_set_merge (cur_nodes, &new_dests);
3190           re_node_set_free (&new_dests);
3191           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3192                   || err3 != REG_NOERROR, 0))
3193             {
3194               err = (err != REG_NOERROR ? err
3195                      : (err2 != REG_NOERROR ? err2 : err3));
3196               return err;
3197             }
3198           /* TODO: It is still inefficient...  */
3199           goto restart;
3200         }
3201       else
3202         {
3203           re_node_set union_set;
3204           next_node = dfa->nexts[ent->node];
3205           if (mctx->state_log[to_idx])
3206             {
3207               int ret;
3208               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3209                                         next_node))
3210                 continue;
3211               err = re_node_set_init_copy (&union_set,
3212                                            &mctx->state_log[to_idx]->nodes);
3213               ret = re_node_set_insert (&union_set, next_node);
3214               if (BE (err != REG_NOERROR || ret < 0, 0))
3215                 {
3216                   re_node_set_free (&union_set);
3217                   err = err != REG_NOERROR ? err : REG_ESPACE;
3218                   return err;
3219                 }
3220             }
3221           else
3222             {
3223               err = re_node_set_init_1 (&union_set, next_node);
3224               if (BE (err != REG_NOERROR, 0))
3225                 return err;
3226             }
3227           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3228           re_node_set_free (&union_set);
3229           if (BE (mctx->state_log[to_idx] == NULL
3230                   && err != REG_NOERROR, 0))
3231             return err;
3232         }
3233     }
3234   while (ent++->more);
3235   return REG_NOERROR;
3236 }
3237
3238 /* Build transition table for the state.
3239    Return 1 if succeeded, otherwise return NULL.  */
3240
3241 static int
3242 internal_function
3243 build_trtable (re_dfa_t *dfa, re_dfastate_t *state)
3244 {
3245   reg_errcode_t err;
3246   int i, j, ch, need_word_trtable = 0;
3247   unsigned int elem, mask;
3248   int dests_node_malloced = 0, dest_states_malloced = 0;
3249   int ndests; /* Number of the destination states from `state'.  */
3250   re_dfastate_t **trtable;
3251   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3252   re_node_set follows, *dests_node;
3253   bitset *dests_ch;
3254   bitset acceptable;
3255
3256   /* We build DFA states which corresponds to the destination nodes
3257      from `state'.  `dests_node[i]' represents the nodes which i-th
3258      destination state contains, and `dests_ch[i]' represents the
3259      characters which i-th destination state accepts.  */
3260   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3261     dests_node = (re_node_set *)
3262       alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3263   else
3264     {
3265       dests_node = (re_node_set *)
3266         malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3267       if (BE (dests_node == NULL, 0))
3268         return 0;
3269       dests_node_malloced = 1;
3270     }
3271   dests_ch = (bitset *) (dests_node + SBC_MAX);
3272
3273   /* Initialize transiton table.  */
3274   state->word_trtable = state->trtable = NULL;
3275
3276   /* At first, group all nodes belonging to `state' into several
3277      destinations.  */
3278   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3279   if (BE (ndests <= 0, 0))
3280     {
3281       if (dests_node_malloced)
3282         free (dests_node);
3283       /* Return 0 in case of an error, 1 otherwise.  */
3284       if (ndests == 0)
3285         {
3286           state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
3287           return 1;
3288         }
3289       return 0;
3290     }
3291
3292   err = re_node_set_alloc (&follows, ndests + 1);
3293   if (BE (err != REG_NOERROR, 0))
3294     goto out_free;
3295
3296   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3297                          + ndests * 3 * sizeof (re_dfastate_t *)))
3298     dest_states = (re_dfastate_t **)
3299       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3300   else
3301     {
3302       dest_states = (re_dfastate_t **)
3303         malloc (ndests * 3 * sizeof (re_dfastate_t *));
3304       if (BE (dest_states == NULL, 0))
3305         {
3306 out_free:
3307           if (dest_states_malloced)
3308             free (dest_states);
3309           re_node_set_free (&follows);
3310           for (i = 0; i < ndests; ++i)
3311             re_node_set_free (dests_node + i);
3312           if (dests_node_malloced)
3313             free (dests_node);
3314           return 0;
3315         }
3316       dest_states_malloced = 1;
3317     }
3318   dest_states_word = dest_states + ndests;
3319   dest_states_nl = dest_states_word + ndests;
3320   bitset_empty (acceptable);
3321
3322   /* Then build the states for all destinations.  */
3323   for (i = 0; i < ndests; ++i)
3324     {
3325       int next_node;
3326       re_node_set_empty (&follows);
3327       /* Merge the follows of this destination states.  */
3328       for (j = 0; j < dests_node[i].nelem; ++j)
3329         {
3330           next_node = dfa->nexts[dests_node[i].elems[j]];
3331           if (next_node != -1)
3332             {
3333               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3334               if (BE (err != REG_NOERROR, 0))
3335                 goto out_free;
3336             }
3337         }
3338       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3339       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3340         goto out_free;
3341       /* If the new state has context constraint,
3342          build appropriate states for these contexts.  */
3343       if (dest_states[i]->has_constraint)
3344         {
3345           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3346                                                           CONTEXT_WORD);
3347           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3348             goto out_free;
3349
3350           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3351             need_word_trtable = 1;
3352
3353           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3354                                                         CONTEXT_NEWLINE);
3355           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3356             goto out_free;
3357         }
3358       else
3359         {
3360           dest_states_word[i] = dest_states[i];
3361           dest_states_nl[i] = dest_states[i];
3362         }
3363       bitset_merge (acceptable, dests_ch[i]);
3364     }
3365
3366   if (!BE (need_word_trtable, 0))
3367     {
3368       /* We don't care about whether the following character is a word
3369          character, or we are in a single-byte character set so we can
3370          discern by looking at the character code: allocate a
3371          256-entry transition table.  */
3372       trtable = state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
3373       if (BE (trtable == NULL, 0))
3374         goto out_free;
3375
3376       /* For all characters ch...:  */
3377       for (i = 0; i < BITSET_UINTS; ++i)
3378         for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3379              elem;
3380              mask <<= 1, elem >>= 1, ++ch)
3381           if (BE (elem & 1, 0))
3382             {
3383               /* There must be exactly one destination which accepts
3384                  character ch.  See group_nodes_into_DFAstates.  */
3385               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3386                 ;
3387
3388               /* j-th destination accepts the word character ch.  */
3389               if (dfa->word_char[i] & mask)
3390                 trtable[ch] = dest_states_word[j];
3391               else
3392                 trtable[ch] = dest_states[j];
3393             }
3394     }
3395   else
3396     {
3397       /* We care about whether the following character is a word
3398          character, and we are in a multi-byte character set: discern
3399          by looking at the character code: build two 256-entry
3400          transition tables, one starting at trtable[0] and one
3401          starting at trtable[SBC_MAX].  */
3402       trtable = state->word_trtable = re_calloc (re_dfastate_t *, 2 * SBC_MAX);
3403       if (BE (trtable == NULL, 0))
3404         goto out_free;
3405
3406       /* For all characters ch...:  */
3407       for (i = 0; i < BITSET_UINTS; ++i)
3408         for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3409              elem;
3410              mask <<= 1, elem >>= 1, ++ch)
3411           if (BE (elem & 1, 0))
3412             {
3413               /* There must be exactly one destination which accepts
3414                  character ch.  See group_nodes_into_DFAstates.  */
3415               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3416                 ;
3417
3418               /* j-th destination accepts the word character ch.  */
3419               trtable[ch] = dest_states[j];
3420               trtable[ch + SBC_MAX] = dest_states_word[j];
3421             }
3422     }
3423
3424   /* new line */
3425   if (bitset_contain (acceptable, NEWLINE_CHAR))
3426     {
3427       /* The current state accepts newline character.  */
3428       for (j = 0; j < ndests; ++j)
3429         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3430           {
3431             /* k-th destination accepts newline character.  */
3432             trtable[NEWLINE_CHAR] = dest_states_nl[j];
3433             if (need_word_trtable)
3434               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3435             /* There must be only one destination which accepts
3436                newline.  See group_nodes_into_DFAstates.  */
3437             break;
3438           }
3439     }
3440
3441   if (dest_states_malloced)
3442     free (dest_states);
3443
3444   re_node_set_free (&follows);
3445   for (i = 0; i < ndests; ++i)
3446     re_node_set_free (dests_node + i);
3447
3448   if (dests_node_malloced)
3449     free (dests_node);
3450
3451   return 1;
3452 }
3453
3454 /* Group all nodes belonging to STATE into several destinations.
3455    Then for all destinations, set the nodes belonging to the destination
3456    to DESTS_NODE[i] and set the characters accepted by the destination
3457    to DEST_CH[i].  This function return the number of destinations.  */
3458
3459 static int
3460 internal_function
3461 group_nodes_into_DFAstates (re_dfa_t *dfa, const re_dfastate_t *state,
3462                             re_node_set *dests_node, bitset *dests_ch)
3463 {
3464   reg_errcode_t err;
3465   int result;
3466   int i, j, k;
3467   int ndests; /* Number of the destinations from `state'.  */
3468   bitset accepts; /* Characters a node can accept.  */
3469   const re_node_set *cur_nodes = &state->nodes;
3470   bitset_empty (accepts);
3471   ndests = 0;
3472
3473   /* For all the nodes belonging to `state',  */
3474   for (i = 0; i < cur_nodes->nelem; ++i)
3475     {
3476       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3477       re_token_type_t type = node->type;
3478       unsigned int constraint = node->constraint;
3479
3480       /* Enumerate all single byte character this node can accept.  */
3481       if (type == CHARACTER)
3482         bitset_set (accepts, node->opr.c);
3483       else if (type == SIMPLE_BRACKET)
3484         {
3485           bitset_merge (accepts, node->opr.sbcset);
3486         }
3487       else if (type == OP_PERIOD)
3488         {
3489 #ifdef RE_ENABLE_I18N
3490           if (dfa->mb_cur_max > 1)
3491             bitset_merge (accepts, dfa->sb_char);
3492           else
3493 #endif
3494             bitset_set_all (accepts);
3495           if (!(dfa->syntax & REG_DOT_NEWLINE))
3496             bitset_clear (accepts, '\n');
3497           if (dfa->syntax & REG_DOT_NOT_NULL)
3498             bitset_clear (accepts, '\0');
3499         }
3500 #ifdef RE_ENABLE_I18N
3501       else if (type == OP_UTF8_PERIOD)
3502         {
3503           memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
3504           if (!(dfa->syntax & REG_DOT_NEWLINE))
3505             bitset_clear (accepts, '\n');
3506           if (dfa->syntax & REG_DOT_NOT_NULL)
3507             bitset_clear (accepts, '\0');
3508         }
3509 #endif
3510       else
3511         continue;
3512
3513       /* Check the `accepts' and sift the characters which are not
3514          match it the context.  */
3515       if (constraint)
3516         {
3517           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3518             {
3519               int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3520               bitset_empty (accepts);
3521               if (accepts_newline)
3522                 bitset_set (accepts, NEWLINE_CHAR);
3523               else
3524                 continue;
3525             }
3526           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3527             {
3528               bitset_empty (accepts);
3529               continue;
3530             }
3531
3532           if (constraint & NEXT_WORD_CONSTRAINT)
3533             {
3534               unsigned int any_set = 0;
3535               if (type == CHARACTER && !node->word_char)
3536                 {
3537                   bitset_empty (accepts);
3538                   continue;
3539                 }
3540 #ifdef RE_ENABLE_I18N
3541               if (dfa->mb_cur_max > 1)
3542                 for (j = 0; j < BITSET_UINTS; ++j)
3543                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3544               else
3545 #endif
3546                 for (j = 0; j < BITSET_UINTS; ++j)
3547                   any_set |= (accepts[j] &= dfa->word_char[j]);
3548               if (!any_set)
3549                 continue;
3550             }
3551           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3552             {
3553               unsigned int any_set = 0;
3554               if (type == CHARACTER && node->word_char)
3555                 {
3556                   bitset_empty (accepts);
3557                   continue;
3558                 }
3559 #ifdef RE_ENABLE_I18N
3560               if (dfa->mb_cur_max > 1)
3561                 for (j = 0; j < BITSET_UINTS; ++j)
3562                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3563               else
3564 #endif
3565                 for (j = 0; j < BITSET_UINTS; ++j)
3566                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3567               if (!any_set)
3568                 continue;
3569             }
3570         }
3571
3572       /* Then divide `accepts' into DFA states, or create a new
3573          state.  Above, we make sure that accepts is not empty.  */
3574       for (j = 0; j < ndests; ++j)
3575         {
3576           bitset intersec; /* Intersection sets, see below.  */
3577           bitset remains;
3578           /* Flags, see below.  */
3579           int has_intersec, not_subset, not_consumed;
3580
3581           /* Optimization, skip if this state doesn't accept the character.  */
3582           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3583             continue;
3584
3585           /* Enumerate the intersection set of this state and `accepts'.  */
3586           has_intersec = 0;
3587           for (k = 0; k < BITSET_UINTS; ++k)
3588             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3589           /* And skip if the intersection set is empty.  */
3590           if (!has_intersec)
3591             continue;
3592
3593           /* Then check if this state is a subset of `accepts'.  */
3594           not_subset = not_consumed = 0;
3595           for (k = 0; k < BITSET_UINTS; ++k)
3596             {
3597               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3598               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3599             }
3600
3601           /* If this state isn't a subset of `accepts', create a
3602              new group state, which has the `remains'. */
3603           if (not_subset)
3604             {
3605               bitset_copy (dests_ch[ndests], remains);
3606               bitset_copy (dests_ch[j], intersec);
3607               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3608               if (BE (err != REG_NOERROR, 0))
3609                 goto error_return;
3610               ++ndests;
3611             }
3612
3613           /* Put the position in the current group. */
3614           result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3615           if (BE (result < 0, 0))
3616             goto error_return;
3617
3618           /* If all characters are consumed, go to next node. */
3619           if (!not_consumed)
3620             break;
3621         }
3622       /* Some characters remain, create a new group. */
3623       if (j == ndests)
3624         {
3625           bitset_copy (dests_ch[ndests], accepts);
3626           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3627           if (BE (err != REG_NOERROR, 0))
3628             goto error_return;
3629           ++ndests;
3630           bitset_empty (accepts);
3631         }
3632     }
3633   return ndests;
3634  error_return:
3635   for (j = 0; j < ndests; ++j)
3636     re_node_set_free (dests_node + j);
3637   return -1;
3638 }
3639
3640 #ifdef RE_ENABLE_I18N
3641 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3642    Return the number of the bytes the node accepts.
3643    STR_IDX is the current index of the input string.
3644
3645    This function handles the nodes which can accept one character, or
3646    one collating element like '.', '[a-z]', opposite to the other nodes
3647    can only accept one byte.  */
3648
3649 static int
3650 internal_function
3651 check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
3652                          const re_string_t *input, int str_idx)
3653 {
3654   const re_token_t *node = dfa->nodes + node_idx;
3655   int char_len, elem_len;
3656   int i;
3657
3658   if (BE (node->type == OP_UTF8_PERIOD, 0))
3659     {
3660       unsigned char c = re_string_byte_at (input, str_idx), d;
3661       if (BE (c < 0xc2, 1))
3662         return 0;
3663
3664       if (str_idx + 2 > input->len)
3665         return 0;
3666
3667       d = re_string_byte_at (input, str_idx + 1);
3668       if (c < 0xe0)
3669         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3670       else if (c < 0xf0)
3671         {
3672           char_len = 3;
3673           if (c == 0xe0 && d < 0xa0)
3674             return 0;
3675         }
3676       else if (c < 0xf8)
3677         {
3678           char_len = 4;
3679           if (c == 0xf0 && d < 0x90)
3680             return 0;
3681         }
3682       else if (c < 0xfc)
3683         {
3684           char_len = 5;
3685           if (c == 0xf8 && d < 0x88)
3686             return 0;
3687         }
3688       else if (c < 0xfe)
3689         {
3690           char_len = 6;
3691           if (c == 0xfc && d < 0x84)
3692             return 0;
3693         }
3694       else
3695         return 0;
3696
3697       if (str_idx + char_len > input->len)
3698         return 0;
3699
3700       for (i = 1; i < char_len; ++i)
3701         {
3702           d = re_string_byte_at (input, str_idx + i);
3703           if (d < 0x80 || d > 0xbf)
3704             return 0;
3705         }
3706       return char_len;
3707     }
3708
3709   char_len = re_string_char_size_at (input, str_idx);
3710   if (node->type == OP_PERIOD)
3711     {
3712       if (char_len <= 1)
3713         return 0;
3714       /* FIXME: I don't think this if is needed, as both '\n'
3715          and '\0' are char_len == 1.  */
3716       /* '.' accepts any one character except the following two cases.  */
3717       if ((!(dfa->syntax & REG_DOT_NEWLINE) &&
3718            re_string_byte_at (input, str_idx) == '\n') ||
3719           ((dfa->syntax & REG_DOT_NOT_NULL) &&
3720            re_string_byte_at (input, str_idx) == '\0'))
3721         return 0;
3722       return char_len;
3723     }
3724
3725   elem_len = re_string_elem_size_at (input, str_idx);
3726   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3727     return 0;
3728
3729   if (node->type == COMPLEX_BRACKET)
3730     {
3731       const re_charset_t *cset = node->opr.mbcset;
3732 # ifdef _LIBC
3733       const unsigned char *pin
3734         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3735       int j;
3736       uint32_t nrules;
3737 # endif /* _LIBC */
3738       int match_len = 0;
3739       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3740                     ? re_string_wchar_at (input, str_idx) : 0);
3741
3742       /* match with multibyte character?  */
3743       for (i = 0; i < cset->nmbchars; ++i)
3744         if (wc == cset->mbchars[i])
3745           {
3746             match_len = char_len;
3747             goto check_node_accept_bytes_match;
3748           }
3749       /* match with character_class?  */
3750       for (i = 0; i < cset->nchar_classes; ++i)
3751         {
3752           wctype_t wt = cset->char_classes[i];
3753           if (__iswctype (wc, wt))
3754             {
3755               match_len = char_len;
3756               goto check_node_accept_bytes_match;
3757             }
3758         }
3759
3760 # ifdef _LIBC
3761       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3762       if (nrules != 0)
3763         {
3764           unsigned int in_collseq = 0;
3765           const int32_t *table, *indirect;
3766           const unsigned char *weights, *extra;
3767           const char *collseqwc;
3768           int32_t idx;
3769           /* This #include defines a local function!  */
3770 #  include <locale/weight.h>
3771
3772           /* match with collating_symbol?  */
3773           if (cset->ncoll_syms)
3774             extra = (const unsigned char *)
3775               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3776           for (i = 0; i < cset->ncoll_syms; ++i)
3777             {
3778               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3779               /* Compare the length of input collating element and
3780                  the length of current collating element.  */
3781               if (*coll_sym != elem_len)
3782                 continue;
3783               /* Compare each bytes.  */
3784               for (j = 0; j < *coll_sym; j++)
3785                 if (pin[j] != coll_sym[1 + j])
3786                   break;
3787               if (j == *coll_sym)
3788                 {
3789                   /* Match if every bytes is equal.  */
3790                   match_len = j;
3791                   goto check_node_accept_bytes_match;
3792                 }
3793             }
3794
3795           if (cset->nranges)
3796             {
3797               if (elem_len <= char_len)
3798                 {
3799                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3800                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3801                 }
3802               else
3803                 in_collseq = find_collation_sequence_value (pin, elem_len);
3804             }
3805           /* match with range expression?  */
3806           for (i = 0; i < cset->nranges; ++i)
3807             if (cset->range_starts[i] <= in_collseq
3808                 && in_collseq <= cset->range_ends[i])
3809               {
3810                 match_len = elem_len;
3811                 goto check_node_accept_bytes_match;
3812               }
3813
3814           /* match with equivalence_class?  */
3815           if (cset->nequiv_classes)
3816             {
3817               const unsigned char *cp = pin;
3818               table = (const int32_t *)
3819                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3820               weights = (const unsigned char *)
3821                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3822               extra = (const unsigned char *)
3823                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3824               indirect = (const int32_t *)
3825                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3826               idx = findidx (&cp);
3827               if (idx > 0)
3828                 for (i = 0; i < cset->nequiv_classes; ++i)
3829                   {
3830                     int32_t equiv_class_idx = cset->equiv_classes[i];
3831                     size_t weight_len = weights[idx];
3832                     if (weight_len == weights[equiv_class_idx])
3833                       {
3834                         int cnt = 0;
3835                         while (cnt <= weight_len
3836                                && (weights[equiv_class_idx + 1 + cnt]
3837                                    == weights[idx + 1 + cnt]))
3838                           ++cnt;
3839                         if (cnt > weight_len)
3840                           {
3841                             match_len = elem_len;
3842                             goto check_node_accept_bytes_match;
3843                           }
3844                       }
3845                   }
3846             }
3847         }
3848       else
3849 # endif /* _LIBC */
3850         {
3851           /* match with range expression?  */
3852 #if __GNUC__ >= 2
3853           wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3854 #else
3855           wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3856           cmp_buf[2] = wc;
3857 #endif
3858           for (i = 0; i < cset->nranges; ++i)
3859             {
3860               cmp_buf[0] = cset->range_starts[i];
3861               cmp_buf[4] = cset->range_ends[i];
3862               if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3863                   && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3864                 {
3865                   match_len = char_len;
3866                   goto check_node_accept_bytes_match;
3867                 }
3868             }
3869         }
3870     check_node_accept_bytes_match:
3871       if (!cset->non_match)
3872         return match_len;
3873       else
3874         {
3875           if (match_len > 0)
3876             return 0;
3877           else
3878             return (elem_len > char_len) ? elem_len : char_len;
3879         }
3880     }
3881   return 0;
3882 }
3883
3884 # ifdef _LIBC
3885 static unsigned int
3886 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3887 {
3888   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3889   if (nrules == 0)
3890     {
3891       if (mbs_len == 1)
3892         {
3893           /* No valid character.  Match it as a single byte character.  */
3894           const unsigned char *collseq = (const unsigned char *)
3895             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3896           return collseq[mbs[0]];
3897         }
3898       return UINT_MAX;
3899     }
3900   else
3901     {
3902       int32_t idx;
3903       const unsigned char *extra = (const unsigned char *)
3904         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3905       int32_t extrasize = (const unsigned char *)
3906         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3907
3908       for (idx = 0; idx < extrasize;)
3909         {
3910           int mbs_cnt, found = 0;
3911           int32_t elem_mbs_len;
3912           /* Skip the name of collating element name.  */
3913           idx = idx + extra[idx] + 1;
3914           elem_mbs_len = extra[idx++];
3915           if (mbs_len == elem_mbs_len)
3916             {
3917               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3918                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3919                   break;
3920               if (mbs_cnt == elem_mbs_len)
3921                 /* Found the entry.  */
3922                 found = 1;
3923             }
3924           /* Skip the byte sequence of the collating element.  */
3925           idx += elem_mbs_len;
3926           /* Adjust for the alignment.  */
3927           idx = (idx + 3) & ~3;
3928           /* Skip the collation sequence value.  */
3929           idx += sizeof (uint32_t);
3930           /* Skip the wide char sequence of the collating element.  */
3931           idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3932           /* If we found the entry, return the sequence value.  */
3933           if (found)
3934             return *(uint32_t *) (extra + idx);
3935           /* Skip the collation sequence value.  */
3936           idx += sizeof (uint32_t);
3937         }
3938       return UINT_MAX;
3939     }
3940 }
3941 # endif /* _LIBC */
3942 #endif /* RE_ENABLE_I18N */
3943
3944 /* Check whether the node accepts the byte which is IDX-th
3945    byte of the INPUT.  */
3946
3947 static int
3948 internal_function
3949 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
3950                    int idx)
3951 {
3952   unsigned char ch;
3953   ch = re_string_byte_at (&mctx->input, idx);
3954   switch (node->type)
3955     {
3956     case CHARACTER:
3957       if (node->opr.c != ch)
3958         return 0;
3959       break;
3960
3961     case SIMPLE_BRACKET:
3962       if (!bitset_contain (node->opr.sbcset, ch))
3963         return 0;
3964       break;
3965
3966 #ifdef RE_ENABLE_I18N
3967     case OP_UTF8_PERIOD:
3968       if (ch >= 0x80)
3969         return 0;
3970       /* FALLTHROUGH */
3971 #endif
3972     case OP_PERIOD:
3973       if ((ch == '\n' && !(mctx->dfa->syntax & REG_DOT_NEWLINE))
3974           || (ch == '\0' && (mctx->dfa->syntax & REG_DOT_NOT_NULL)))
3975         return 0;
3976       break;
3977
3978     default:
3979       return 0;
3980     }
3981
3982   if (node->constraint)
3983     {
3984       /* The node has constraints.  Check whether the current context
3985          satisfies the constraints.  */
3986       unsigned int context = re_string_context_at (&mctx->input, idx,
3987                                                    mctx->eflags);
3988       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3989         return 0;
3990     }
3991
3992   return 1;
3993 }
3994
3995 /* Extend the buffers, if the buffers have run out.  */
3996
3997 static reg_errcode_t
3998 internal_function
3999 extend_buffers (re_match_context_t *mctx)
4000 {
4001   reg_errcode_t ret;
4002   re_string_t *pstr = &mctx->input;
4003
4004   /* Double the lengthes of the buffers.  */
4005   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4006   if (BE (ret != REG_NOERROR, 0))
4007     return ret;
4008
4009   if (mctx->state_log != NULL)
4010     {
4011       /* And double the length of state_log.  */
4012       /* XXX We have no indication of the size of this buffer.  If this
4013          allocation fail we have no indication that the state_log array
4014          does not have the right size.  */
4015       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4016                                               pstr->bufs_len + 1);
4017       if (BE (new_array == NULL, 0))
4018         return REG_ESPACE;
4019       mctx->state_log = new_array;
4020     }
4021
4022   /* Then reconstruct the buffers.  */
4023   if (pstr->icase)
4024     {
4025 #ifdef RE_ENABLE_I18N
4026       if (pstr->mb_cur_max > 1)
4027         {
4028           ret = build_wcs_upper_buffer (pstr);
4029           if (BE (ret != REG_NOERROR, 0))
4030             return ret;
4031         }
4032       else
4033 #endif /* RE_ENABLE_I18N  */
4034         build_upper_buffer (pstr);
4035     }
4036   else
4037     {
4038 #ifdef RE_ENABLE_I18N
4039       if (pstr->mb_cur_max > 1)
4040         build_wcs_buffer (pstr);
4041       else
4042 #endif /* RE_ENABLE_I18N  */
4043         {
4044           if (pstr->trans != NULL)
4045             re_string_translate_buffer (pstr);
4046         }
4047     }
4048   return REG_NOERROR;
4049 }
4050
4051 \f
4052 /* Functions for matching context.  */
4053
4054 /* Initialize MCTX.  */
4055
4056 static reg_errcode_t
4057 internal_function
4058 match_ctx_init (re_match_context_t *mctx, int eflags, int n)
4059 {
4060   mctx->eflags = eflags;
4061   mctx->match_last = -1;
4062   if (n > 0)
4063     {
4064       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4065       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4066       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4067         return REG_ESPACE;
4068     }
4069   /* Already zero-ed by the caller.
4070      else
4071        mctx->bkref_ents = NULL;
4072      mctx->nbkref_ents = 0;
4073      mctx->nsub_tops = 0;  */
4074   mctx->abkref_ents = n;
4075   mctx->max_mb_elem_len = 1;
4076   mctx->asub_tops = n;
4077   return REG_NOERROR;
4078 }
4079
4080 /* Clean the entries which depend on the current input in MCTX.
4081    This function must be invoked when the matcher changes the start index
4082    of the input, or changes the input string.  */
4083
4084 static void
4085 internal_function
4086 match_ctx_clean (re_match_context_t *mctx)
4087 {
4088   int st_idx;
4089   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4090     {
4091       int sl_idx;
4092       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4093       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4094         {
4095           re_sub_match_last_t *last = top->lasts[sl_idx];
4096           re_free (last->path.array);
4097           re_free (last);
4098         }
4099       re_free (top->lasts);
4100       if (top->path)
4101         {
4102           re_free (top->path->array);
4103           re_free (top->path);
4104         }
4105       free (top);
4106     }
4107
4108   mctx->nsub_tops = 0;
4109   mctx->nbkref_ents = 0;
4110 }
4111
4112 /* Free all the memory associated with MCTX.  */
4113
4114 static void
4115 internal_function
4116 match_ctx_free (re_match_context_t *mctx)
4117 {
4118   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4119   match_ctx_clean (mctx);
4120   re_free (mctx->sub_tops);
4121   re_free (mctx->bkref_ents);
4122 }
4123
4124 /* Add a new backreference entry to MCTX.
4125    Note that we assume that caller never call this function with duplicate
4126    entry, and call with STR_IDX which isn't smaller than any existing entry.
4127 */
4128
4129 static reg_errcode_t
4130 internal_function
4131 match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx,
4132                      int from, int to)
4133 {
4134   if (mctx->nbkref_ents >= mctx->abkref_ents)
4135     {
4136       struct re_backref_cache_entry* new_entry;
4137       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4138                               mctx->abkref_ents * 2);
4139       if (BE (new_entry == NULL, 0))
4140         {
4141           re_free (mctx->bkref_ents);
4142           return REG_ESPACE;
4143         }
4144       mctx->bkref_ents = new_entry;
4145       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4146               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4147       mctx->abkref_ents *= 2;
4148     }
4149   if (mctx->nbkref_ents > 0
4150       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4151     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4152
4153   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4154   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4155   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4156   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4157
4158   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4159      If bit N is clear, means that this entry won't epsilon-transition to
4160      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4161      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4162      such node.
4163
4164      A backreference does not epsilon-transition unless it is empty, so set
4165      to all zeros if FROM != TO.  */
4166   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4167     = (from == to ? -1 : 0);
4168
4169   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4170   if (mctx->max_mb_elem_len < to - from)
4171     mctx->max_mb_elem_len = to - from;
4172   return REG_NOERROR;
4173 }
4174
4175 /* Search for the first entry which has the same str_idx, or -1 if none is
4176    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4177
4178 static int
4179 internal_function
4180 search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
4181 {
4182   int left, right, mid, last;
4183   last = right = mctx->nbkref_ents;
4184   for (left = 0; left < right;)
4185     {
4186       mid = (left + right) / 2;
4187       if (mctx->bkref_ents[mid].str_idx < str_idx)
4188         left = mid + 1;
4189       else
4190         right = mid;
4191     }
4192   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4193     return left;
4194   else
4195     return -1;
4196 }
4197
4198 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4199    at STR_IDX.  */
4200
4201 static reg_errcode_t
4202 internal_function
4203 match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
4204 {
4205 #ifdef DEBUG
4206   assert (mctx->sub_tops != NULL);
4207   assert (mctx->asub_tops > 0);
4208 #endif
4209   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4210     {
4211       int new_asub_tops = mctx->asub_tops * 2;
4212       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4213                                                    re_sub_match_top_t *,
4214                                                    new_asub_tops);
4215       if (BE (new_array == NULL, 0))
4216         return REG_ESPACE;
4217       mctx->sub_tops = new_array;
4218       mctx->asub_tops = new_asub_tops;
4219     }
4220   mctx->sub_tops[mctx->nsub_tops] = re_calloc (re_sub_match_top_t, 1);
4221   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4222     return REG_ESPACE;
4223   mctx->sub_tops[mctx->nsub_tops]->node = node;
4224   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4225   return REG_NOERROR;
4226 }
4227
4228 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4229    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4230
4231 static re_sub_match_last_t *
4232 internal_function
4233 match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
4234 {
4235   re_sub_match_last_t *new_entry;
4236   if (BE (subtop->nlasts == subtop->alasts, 0))
4237     {
4238       int new_alasts = 2 * subtop->alasts + 1;
4239       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4240                                                     re_sub_match_last_t *,
4241                                                     new_alasts);
4242       if (BE (new_array == NULL, 0))
4243         return NULL;
4244       subtop->lasts = new_array;
4245       subtop->alasts = new_alasts;
4246     }
4247   new_entry = re_calloc (re_sub_match_last_t, 1);
4248   if (BE (new_entry != NULL, 1))
4249     {
4250       subtop->lasts[subtop->nlasts] = new_entry;
4251       new_entry->node = node;
4252       new_entry->str_idx = str_idx;
4253       ++subtop->nlasts;
4254     }
4255   return new_entry;
4256 }
4257
4258 static void
4259 internal_function
4260 sift_ctx_init (re_sift_context_t *sctx,
4261                re_dfastate_t **sifted_sts,
4262                re_dfastate_t **limited_sts,
4263                int last_node, int last_str_idx)
4264 {
4265   sctx->sifted_states = sifted_sts;
4266   sctx->limited_states = limited_sts;
4267   sctx->last_node = last_node;
4268   sctx->last_str_idx = last_str_idx;
4269   re_node_set_init_empty (&sctx->limits);
4270 }