1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation,
   3    Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   6 
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11 
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16 
  17    You should have received a copy of the GNU General Public License along
  18    with this program; if not, write to the Free Software Foundation,
  19    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  20 
  21 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  22                                      Idx n) internal_function;
  23 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
  24 static void match_ctx_free (re_match_context_t *cache) internal_function;
  25 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
  26                                           Idx str_idx, Idx from, Idx to)
  27      internal_function;
  28 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
  29      internal_function;
  30 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
  31                                            Idx str_idx) internal_function;
  32 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  33                                                     Idx node, Idx str_idx)
  34      internal_function;
  35 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  36                            re_dfastate_t **limited_sts, Idx last_node,
  37                            Idx last_str_idx)
  38      internal_function;
  39 static reg_errcode_t re_search_internal (const regex_t *preg,
  40                                          const char *string, Idx length,
  41                                          Idx start, Idx last_start, Idx stop,
  42                                          size_t nmatch, regmatch_t pmatch[],
  43                                          int eflags) internal_function;
  44 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
  45                                   const char *string1, Idx length1,
  46                                   const char *string2, Idx length2,
  47                                   Idx start, regoff_t range,
  48                                   struct re_registers *regs,
  49                                   Idx stop, bool ret_len) internal_function;
  50 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
  51                                 const char *string, Idx length, Idx start,
  52                                 regoff_t range, Idx stop,
  53                                 struct re_registers *regs,
  54                                 bool ret_len) internal_function;
  55 static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  56                                   Idx nregs, int regs_allocated)
  57      internal_function;
  58 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
  59      internal_function;
  60 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
  61                            Idx *p_match_first) internal_function;
  62 static Idx check_halt_state_context (const re_match_context_t *mctx,
  63                                      const re_dfastate_t *state, Idx idx)
  64      internal_function;
  65 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
  66                          regmatch_t *prev_idx_match, Idx cur_node,
  67                          Idx cur_idx, Idx nmatch) internal_function;
  68 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  69                                       Idx str_idx, Idx dest_node, Idx nregs,
  70                                       regmatch_t *regs,
  71                                       re_node_set *eps_via_nodes)
  72      internal_function;
  73 static reg_errcode_t set_regs (const regex_t *preg,
  74                                const re_match_context_t *mctx,
  75                                size_t nmatch, regmatch_t *pmatch,
  76                                bool fl_backtrack) internal_function;
  77 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
  78      internal_function;
  79 
  80 #ifdef RE_ENABLE_I18N
  81 static int sift_states_iter_mb (const re_match_context_t *mctx,
  82                                 re_sift_context_t *sctx,
  83                                 Idx node_idx, Idx str_idx, Idx max_str_idx)
  84      internal_function;
  85 #endif /* RE_ENABLE_I18N */
  86 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
  87                                            re_sift_context_t *sctx)
  88      internal_function;
  89 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
  90                                           re_sift_context_t *sctx, Idx str_idx,
  91                                           re_node_set *cur_dest)
  92      internal_function;
  93 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
  94                                               re_sift_context_t *sctx,
  95                                               Idx str_idx,
  96                                               re_node_set *dest_nodes)
  97      internal_function;
  98 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
  99                                             re_node_set *dest_nodes,
 100                                             const re_node_set *candidates)
 101      internal_function;
 102 static bool check_dst_limits (const re_match_context_t *mctx,
 103                               const re_node_set *limits,
 104                               Idx dst_node, Idx dst_idx, Idx src_node,
 105                               Idx src_idx) internal_function;
 106 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
 107                                         int boundaries, Idx subexp_idx,
 108                                         Idx from_node, Idx bkref_idx)
 109      internal_function;
 110 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
 111                                       Idx limit, Idx subexp_idx,
 112                                       Idx node, Idx str_idx,
 113                                       Idx bkref_idx) internal_function;
 114 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
 115                                           re_node_set *dest_nodes,
 116                                           const re_node_set *candidates,
 117                                           re_node_set *limits,
 118                                           struct re_backref_cache_entry *bkref_ents,
 119                                           Idx str_idx) internal_function;
 120 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
 121                                         re_sift_context_t *sctx,
 122                                         Idx str_idx, const re_node_set *candidates)
 123      internal_function;
 124 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
 125                                         re_dfastate_t **dst,
 126                                         re_dfastate_t **src, Idx num)
 127      internal_function;
 128 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
 129                                          re_match_context_t *mctx) internal_function;
 130 static re_dfastate_t *transit_state (reg_errcode_t *err,
 131                                      re_match_context_t *mctx,
 132                                      re_dfastate_t *state) internal_function;
 133 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
 134                                             re_match_context_t *mctx,
 135                                             re_dfastate_t *next_state)
 136      internal_function;
 137 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
 138                                                 re_node_set *cur_nodes,
 139                                                 Idx str_idx) internal_function;
 140 #if 0
 141 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
 142                                         re_match_context_t *mctx,
 143                                         re_dfastate_t *pstate)
 144      internal_function;
 145 #endif
 146 #ifdef RE_ENABLE_I18N
 147 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
 148                                        re_dfastate_t *pstate)
 149      internal_function;
 150 #endif /* RE_ENABLE_I18N */
 151 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
 152                                           const re_node_set *nodes)
 153      internal_function;
 154 static reg_errcode_t get_subexp (re_match_context_t *mctx,
 155                                  Idx bkref_node, Idx bkref_str_idx)
 156      internal_function;
 157 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
 158                                      const re_sub_match_top_t *sub_top,
 159                                      re_sub_match_last_t *sub_last,
 160                                      Idx bkref_node, Idx bkref_str)
 161      internal_function;
 162 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 163                              Idx subexp_idx, int type) internal_function;
 164 static reg_errcode_t check_arrival (re_match_context_t *mctx,
 165                                     state_array_t *path, Idx top_node,
 166                                     Idx top_str, Idx last_node, Idx last_str,
 167                                     int type) internal_function;
 168 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
 169                                                    Idx str_idx,
 170                                                    re_node_set *cur_nodes,
 171                                                    re_node_set *next_nodes)
 172      internal_function;
 173 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
 174                                                re_node_set *cur_nodes,
 175                                                Idx ex_subexp, int type)
 176      internal_function;
 177 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
 178                                                    re_node_set *dst_nodes,
 179                                                    Idx target, Idx ex_subexp,
 180                                                    int type) internal_function;
 181 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 182                                          re_node_set *cur_nodes, Idx cur_str,
 183                                          Idx subexp_num, int type)
 184      internal_function;
 185 static bool build_trtable (const re_dfa_t *dfa,
 186                            re_dfastate_t *state) internal_function;
 187 #ifdef RE_ENABLE_I18N
 188 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 189                                     const re_string_t *input, Idx idx)
 190      internal_function;
 191 # ifdef _LIBC
 192 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 193                                                    size_t name_len)
 194      internal_function;
 195 # endif /* _LIBC */
 196 #endif /* RE_ENABLE_I18N */
 197 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
 198                                        const re_dfastate_t *state,
 199                                        re_node_set *states_node,
 200                                        bitset_t *states_ch) internal_function;
 201 static bool check_node_accept (const re_match_context_t *mctx,
 202                                const re_token_t *node, Idx idx)
 203      internal_function;
 204 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
 205      internal_function;
 206 
 207 /* Entry point for POSIX code.  */
 208 
 209 /* regexec searches for a given pattern, specified by PREG, in the
 210    string STRING.
 211 
 212    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 213    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 214    least NMATCH elements, and we set them to the offsets of the
 215    corresponding matched substrings.
 216 
 217    EFLAGS specifies `execution flags' which affect matching: if
 218    REG_NOTBOL is set, then ^ does not match at the beginning of the
 219    string; if REG_NOTEOL is set, then $ does not match at the end.
 220 
 221    We return 0 if we find a match and REG_NOMATCH if not.  */
 222 
 223 int
 224 regexec (preg, string, nmatch, pmatch, eflags)
 225     const regex_t *_Restrict_ preg;
 226     const char *_Restrict_ string;
 227     size_t nmatch;
 228     regmatch_t pmatch[_Restrict_arr_];
 229     int eflags;
 230 {
 231   reg_errcode_t err;
 232   Idx start, length;
 233 #ifdef _LIBC
 234   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 235 #endif
 236 
 237   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
 238     return REG_BADPAT;
 239 
 240   if (eflags & REG_STARTEND)
 241     {
 242       start = pmatch[0].rm_so;
 243       length = pmatch[0].rm_eo;
 244     }
 245   else
 246     {
 247       start = 0;
 248       length = strlen (string);
 249     }
 250 
 251   __libc_lock_lock (dfa->lock);
 252   if (preg->no_sub)
 253     err = re_search_internal (preg, string, length, start, length,
 254                               length, 0, NULL, eflags);
 255   else
 256     err = re_search_internal (preg, string, length, start, length,
 257                               length, nmatch, pmatch, eflags);
 258   __libc_lock_unlock (dfa->lock);
 259   return err != REG_NOERROR;
 260 }
 261 
 262 #ifdef _LIBC
 263 # include <shlib-compat.h>
 264 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
 265 
 266 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
 267 __typeof__ (__regexec) __compat_regexec;
 268 
 269 int
 270 attribute_compat_text_section
 271 __compat_regexec (const regex_t *_Restrict_ preg,
 272                   const char *_Restrict_ string, size_t nmatch,
 273                   regmatch_t pmatch[], int eflags)
 274 {
 275   return regexec (preg, string, nmatch, pmatch,
 276                   eflags & (REG_NOTBOL | REG_NOTEOL));
 277 }
 278 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
 279 # endif
 280 #endif
 281 
 282 /* Entry points for GNU code.  */
 283 
 284 /* re_match, re_search, re_match_2, re_search_2
 285 
 286    The former two functions operate on STRING with length LENGTH,
 287    while the later two operate on concatenation of STRING1 and STRING2
 288    with lengths LENGTH1 and LENGTH2, respectively.
 289 
 290    re_match() matches the compiled pattern in BUFP against the string,
 291    starting at index START.
 292 
 293    re_search() first tries matching at index START, then it tries to match
 294    starting from index START + 1, and so on.  The last start position tried
 295    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 296    way as re_match().)
 297 
 298    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 299    the first STOP characters of the concatenation of the strings should be
 300    concerned.
 301 
 302    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 303    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
 304    computed relative to the concatenation, not relative to the individual
 305    strings.)
 306 
 307    On success, re_match* functions return the length of the match, re_search*
 308    return the position of the start of the match.  Return value -1 means no
 309    match was found and -2 indicates an internal error.  */
 310 
 311 regoff_t
 312 re_match (bufp, string, length, start, regs)
 313     struct re_pattern_buffer *bufp;
 314     const char *string;
 315     Idx length, start;
 316     struct re_registers *regs;
 317 {
 318   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
 319 }
 320 #ifdef _LIBC
 321 weak_alias (__re_match, re_match)
 322 #endif
 323 
 324 regoff_t
 325 re_search (bufp, string, length, start, range, regs)
 326     struct re_pattern_buffer *bufp;
 327     const char *string;
 328     Idx length, start;
 329     regoff_t range;
 330     struct re_registers *regs;
 331 {
 332   return re_search_stub (bufp, string, length, start, range, length, regs,
 333                          false);
 334 }
 335 #ifdef _LIBC
 336 weak_alias (__re_search, re_search)
 337 #endif
 338 
 339 regoff_t
 340 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
 341     struct re_pattern_buffer *bufp;
 342     const char *string1, *string2;
 343     Idx length1, length2, start, stop;
 344     struct re_registers *regs;
 345 {
 346   return re_search_2_stub (bufp, string1, length1, string2, length2,
 347                            start, 0, regs, stop, true);
 348 }
 349 #ifdef _LIBC
 350 weak_alias (__re_match_2, re_match_2)
 351 #endif
 352 
 353 regoff_t
 354 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
 355     struct re_pattern_buffer *bufp;
 356     const char *string1, *string2;
 357     Idx length1, length2, start, stop;
 358     regoff_t range;
 359     struct re_registers *regs;
 360 {
 361   return re_search_2_stub (bufp, string1, length1, string2, length2,
 362                            start, range, regs, stop, false);
 363 }
 364 #ifdef _LIBC
 365 weak_alias (__re_search_2, re_search_2)
 366 #endif
 367 
 368 static regoff_t
 369 internal_function
 370 re_search_2_stub (struct re_pattern_buffer *bufp,
 371                   const char *string1, Idx length1,
 372                   const char *string2, Idx length2,
 373                   Idx start, regoff_t range, struct re_registers *regs,
 374                   Idx stop, bool ret_len)
 375 {
 376   const char *str;
 377   regoff_t rval;
 378   Idx len = length1 + length2;
 379   char *s = NULL;
 380 
 381   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
 382     return -2;
 383 
 384   /* Concatenate the strings.  */
 385   if (length2 > 0)
 386     if (length1 > 0)
 387       {
 388         s = re_malloc (char, len);
 389 
 390         if (BE (s == NULL, 0))
 391           return -2;
 392 #ifdef _LIBC
 393         memcpy (__mempcpy (s, string1, length1), string2, length2);
 394 #else
 395         memcpy (s, string1, length1);
 396         memcpy (s + length1, string2, length2);
 397 #endif
 398         str = s;
 399       }
 400     else
 401       str = string2;
 402   else
 403     str = string1;
 404 
 405   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 406                          ret_len);
 407   re_free (s);
 408   return rval;
 409 }
 410 
 411 /* The parameters have the same meaning as those of re_search.
 412    Additional parameters:
 413    If RET_LEN is true the length of the match is returned (re_match style);
 414    otherwise the position of the match is returned.  */
 415 
 416 static regoff_t
 417 internal_function
 418 re_search_stub (struct re_pattern_buffer *bufp,
 419                 const char *string, Idx length,
 420                 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
 421                 bool ret_len)
 422 {
 423   reg_errcode_t result;
 424   regmatch_t *pmatch;
 425   Idx nregs;
 426   regoff_t rval;
 427   int eflags = 0;
 428 #ifdef _LIBC
 429   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
 430 #endif
 431   Idx last_start = start + range;
 432 
 433   /* Check for out-of-range.  */
 434   if (BE (start < 0 || start > length, 0))
 435     return -1;
 436   if (BE (length < last_start || (0 <= range && last_start < start), 0))
 437     last_start = length;
 438   else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
 439     last_start = 0;
 440 
 441   __libc_lock_lock (dfa->lock);
 442 
 443   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 444   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 445 
 446   /* Compile fastmap if we haven't yet.  */
 447   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 448     re_compile_fastmap (bufp);
 449 
 450   if (BE (bufp->no_sub, 0))
 451     regs = NULL;
 452 
 453   /* We need at least 1 register.  */
 454   if (regs == NULL)
 455     nregs = 1;
 456   else if (BE (bufp->regs_allocated == REGS_FIXED
 457                && regs->num_regs <= bufp->re_nsub, 0))
 458     {
 459       nregs = regs->num_regs;
 460       if (BE (nregs < 1, 0))
 461         {
 462           /* Nothing can be copied to regs.  */
 463           regs = NULL;
 464           nregs = 1;
 465         }
 466     }
 467   else
 468     nregs = bufp->re_nsub + 1;
 469   pmatch = re_malloc (regmatch_t, nregs);
 470   if (BE (pmatch == NULL, 0))
 471     {
 472       rval = -2;
 473       goto out;
 474     }
 475 
 476   result = re_search_internal (bufp, string, length, start, last_start, stop,
 477                                nregs, pmatch, eflags);
 478 
 479   rval = 0;
 480 
 481   /* I hope we needn't fill ther regs with -1's when no match was found.  */
 482   if (result != REG_NOERROR)
 483     rval = -1;
 484   else if (regs != NULL)
 485     {
 486       /* If caller wants register contents data back, copy them.  */
 487       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 488                                            bufp->regs_allocated);
 489       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
 490         rval = -2;
 491     }
 492 
 493   if (BE (rval == 0, 1))
 494     {
 495       if (ret_len)
 496         {
 497           assert (pmatch[0].rm_so == start);
 498           rval = pmatch[0].rm_eo - start;
 499         }
 500       else
 501         rval = pmatch[0].rm_so;
 502     }
 503   re_free (pmatch);
 504  out:
 505   __libc_lock_unlock (dfa->lock);
 506   return rval;
 507 }
 508 
 509 static unsigned int
 510 internal_function
 511 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
 512               int regs_allocated)
 513 {
 514   int rval = REGS_REALLOCATE;
 515   Idx i;
 516   Idx need_regs = nregs + 1;
 517   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
 518      uses.  */
 519 
 520   /* Have the register data arrays been allocated?  */
 521   if (regs_allocated == REGS_UNALLOCATED)
 522     { /* No.  So allocate them with malloc.  */
 523       regs->start = re_malloc (regoff_t, need_regs);
 524       if (BE (regs->start == NULL, 0))
 525         return REGS_UNALLOCATED;
 526       regs->end = re_malloc (regoff_t, need_regs);
 527       if (BE (regs->end == NULL, 0))
 528         {
 529           re_free (regs->start);
 530           return REGS_UNALLOCATED;
 531         }
 532       regs->num_regs = need_regs;
 533     }
 534   else if (regs_allocated == REGS_REALLOCATE)
 535     { /* Yes.  If we need more elements than were already
 536          allocated, reallocate them.  If we need fewer, just
 537          leave it alone.  */
 538       if (BE (need_regs > regs->num_regs, 0))
 539         {
 540           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
 541           regoff_t *new_end;
 542           if (BE (new_start == NULL, 0))
 543             return REGS_UNALLOCATED;
 544           new_end = re_realloc (regs->end, regoff_t, need_regs);
 545           if (BE (new_end == NULL, 0))
 546             {
 547               re_free (new_start);
 548               return REGS_UNALLOCATED;
 549             }
 550           regs->start = new_start;
 551           regs->end = new_end;
 552           regs->num_regs = need_regs;
 553         }
 554     }
 555   else
 556     {
 557       assert (regs_allocated == REGS_FIXED);
 558       /* This function may not be called with REGS_FIXED and nregs too big.  */
 559       assert (regs->num_regs >= nregs);
 560       rval = REGS_FIXED;
 561     }
 562 
 563   /* Copy the regs.  */
 564   for (i = 0; i < nregs; ++i)
 565     {
 566       regs->start[i] = pmatch[i].rm_so;
 567       regs->end[i] = pmatch[i].rm_eo;
 568     }
 569   for ( ; i < regs->num_regs; ++i)
 570     regs->start[i] = regs->end[i] = -1;
 571 
 572   return rval;
 573 }
 574 
 575 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 576    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 577    this memory for recording register information.  STARTS and ENDS
 578    must be allocated using the malloc library routine, and must each
 579    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 580 
 581    If NUM_REGS == 0, then subsequent matches should allocate their own
 582    register data.
 583 
 584    Unless this function is called, the first search or match using
 585    PATTERN_BUFFER will allocate its own register data, without
 586    freeing the old data.  */
 587 
 588 void
 589 re_set_registers (bufp, regs, num_regs, starts, ends)
 590     struct re_pattern_buffer *bufp;
 591     struct re_registers *regs;
 592     __re_size_t num_regs;
 593     regoff_t *starts, *ends;
 594 {
 595   if (num_regs)
 596     {
 597       bufp->regs_allocated = REGS_REALLOCATE;
 598       regs->num_regs = num_regs;
 599       regs->start = starts;
 600       regs->end = ends;
 601     }
 602   else
 603     {
 604       bufp->regs_allocated = REGS_UNALLOCATED;
 605       regs->num_regs = 0;
 606       regs->start = regs->end = NULL;
 607     }
 608 }
 609 #ifdef _LIBC
 610 weak_alias (__re_set_registers, re_set_registers)
 611 #endif
 612 
 613 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 614    them unless specifically requested.  */
 615 
 616 #if defined _REGEX_RE_COMP || defined _LIBC
 617 int
 618 # ifdef _LIBC
 619 weak_function
 620 # endif
 621 re_exec (s)
 622      const char *s;
 623 {
 624   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 625 }
 626 #endif /* _REGEX_RE_COMP */
 627 
 628 /* Internal entry point.  */
 629 
 630 /* Searches for a compiled pattern PREG in the string STRING, whose
 631    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 632    meaning as with regexec.  LAST_START is START + RANGE, where
 633    START and RANGE have the same meaning as with re_search.
 634    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 635    otherwise return the error code.
 636    Note: We assume front end functions already check ranges.
 637    (0 <= LAST_START && LAST_START <= LENGTH)  */
 638 
 639 static reg_errcode_t
 640 internal_function
 641 re_search_internal (const regex_t *preg,
 642                     const char *string, Idx length,
 643                     Idx start, Idx last_start, Idx stop,
 644                     size_t nmatch, regmatch_t pmatch[],
 645                     int eflags)
 646 {
 647   reg_errcode_t err;
 648   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
 649   Idx left_lim, right_lim;
 650   int incr;
 651   bool fl_longest_match;
 652   int match_kind;
 653   Idx match_first;
 654   Idx match_last = REG_MISSING;
 655   Idx extra_nmatch;
 656   bool sb;
 657   int ch;
 658 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
 659   re_match_context_t mctx = { .dfa = dfa };
 660 #else
 661   re_match_context_t mctx;
 662 #endif
 663   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
 664                     && start != last_start && !preg->can_be_null)
 665                    ? preg->fastmap : NULL);
 666   RE_TRANSLATE_TYPE t = preg->translate;
 667 
 668 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
 669   memset (&mctx, '\0', sizeof (re_match_context_t));
 670   mctx.dfa = dfa;
 671 #endif
 672 
 673   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
 674   nmatch -= extra_nmatch;
 675 
 676   /* Check if the DFA haven't been compiled.  */
 677   if (BE (preg->used == 0 || dfa->init_state == NULL
 678           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 679           || dfa->init_state_begbuf == NULL, 0))
 680     return REG_NOMATCH;
 681 
 682 #ifdef DEBUG
 683   /* We assume front-end functions already check them.  */
 684   assert (0 <= last_start && last_start <= length);
 685 #endif
 686 
 687   /* If initial states with non-begbuf contexts have no elements,
 688      the regex must be anchored.  If preg->newline_anchor is set,
 689      we'll never use init_state_nl, so do not check it.  */
 690   if (dfa->init_state->nodes.nelem == 0
 691       && dfa->init_state_word->nodes.nelem == 0
 692       && (dfa->init_state_nl->nodes.nelem == 0
 693           || !preg->newline_anchor))
 694     {
 695       if (start != 0 && last_start != 0)
 696         return REG_NOMATCH;
 697       start = last_start = 0;
 698     }
 699 
 700   /* We must check the longest matching, if nmatch > 0.  */
 701   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 702 
 703   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
 704                             preg->translate, preg->syntax & RE_ICASE, dfa);
 705   if (BE (err != REG_NOERROR, 0))
 706     goto free_return;
 707   mctx.input.stop = stop;
 708   mctx.input.raw_stop = stop;
 709   mctx.input.newline_anchor = preg->newline_anchor;
 710 
 711   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
 712   if (BE (err != REG_NOERROR, 0))
 713     goto free_return;
 714 
 715   /* We will log all the DFA states through which the dfa pass,
 716      if nmatch > 1, or this dfa has "multibyte node", which is a
 717      back-reference or a node which can accept multibyte character or
 718      multi character collating element.  */
 719   if (nmatch > 1 || dfa->has_mb_node)
 720     {
 721       /* Avoid overflow.  */
 722       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
 723         {
 724           err = REG_ESPACE;
 725           goto free_return;
 726         }
 727 
 728       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
 729       if (BE (mctx.state_log == NULL, 0))
 730         {
 731           err = REG_ESPACE;
 732           goto free_return;
 733         }
 734     }
 735   else
 736     mctx.state_log = NULL;
 737 
 738   match_first = start;
 739   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 740                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 741 
 742   /* Check incrementally whether of not the input string match.  */
 743   incr = (last_start < start) ? -1 : 1;
 744   left_lim = (last_start < start) ? last_start : start;
 745   right_lim = (last_start < start) ? start : last_start;
 746   sb = dfa->mb_cur_max == 1;
 747   match_kind =
 748     (fastmap
 749      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
 750         | (start <= last_start ? 2 : 0)
 751         | (t != NULL ? 1 : 0))
 752      : 8);
 753 
 754   for (;; match_first += incr)
 755     {
 756       err = REG_NOMATCH;
 757       if (match_first < left_lim || right_lim < match_first)
 758         goto free_return;
 759 
 760       /* Advance as rapidly as possible through the string, until we
 761          find a plausible place to start matching.  This may be done
 762          with varying efficiency, so there are various possibilities:
 763          only the most common of them are specialized, in order to
 764          save on code size.  We use a switch statement for speed.  */
 765       switch (match_kind)
 766         {
 767         case 8:
 768           /* No fastmap.  */
 769           break;
 770 
 771         case 7:
 772           /* Fastmap with single-byte translation, match forward.  */
 773           while (BE (match_first < right_lim, 1)
 774                  && !fastmap[t[(unsigned char) string[match_first]]])
 775             ++match_first;
 776           goto forward_match_found_start_or_reached_end;
 777 
 778         case 6:
 779           /* Fastmap without translation, match forward.  */
 780           while (BE (match_first < right_lim, 1)
 781                  && !fastmap[(unsigned char) string[match_first]])
 782             ++match_first;
 783 
 784         forward_match_found_start_or_reached_end:
 785           if (BE (match_first == right_lim, 0))
 786             {
 787               ch = match_first >= length
 788                        ? 0 : (unsigned char) string[match_first];
 789               if (!fastmap[t ? t[ch] : ch])
 790                 goto free_return;
 791             }
 792           break;
 793 
 794         case 4:
 795         case 5:
 796           /* Fastmap without multi-byte translation, match backwards.  */
 797           while (match_first >= left_lim)
 798             {
 799               ch = match_first >= length
 800                        ? 0 : (unsigned char) string[match_first];
 801               if (fastmap[t ? t[ch] : ch])
 802                 break;
 803               --match_first;
 804             }
 805           if (match_first < left_lim)
 806             goto free_return;
 807           break;
 808 
 809         default:
 810           /* In this case, we can't determine easily the current byte,
 811              since it might be a component byte of a multibyte
 812              character.  Then we use the constructed buffer instead.  */
 813           for (;;)
 814             {
 815               /* If MATCH_FIRST is out of the valid range, reconstruct the
 816                  buffers.  */
 817               __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
 818               if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
 819                 {
 820                   err = re_string_reconstruct (&mctx.input, match_first,
 821                                                eflags);
 822                   if (BE (err != REG_NOERROR, 0))
 823                     goto free_return;
 824 
 825                   offset = match_first - mctx.input.raw_mbs_idx;
 826                 }
 827               /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 828                  Note that MATCH_FIRST must not be smaller than 0.  */
 829               ch = (match_first >= length
 830                     ? 0 : re_string_byte_at (&mctx.input, offset));
 831               if (fastmap[ch])
 832                 break;
 833               match_first += incr;
 834               if (match_first < left_lim || match_first > right_lim)
 835                 {
 836                   err = REG_NOMATCH;
 837                   goto free_return;
 838                 }
 839             }
 840           break;
 841         }
 842 
 843       /* Reconstruct the buffers so that the matcher can assume that
 844          the matching starts from the beginning of the buffer.  */
 845       err = re_string_reconstruct (&mctx.input, match_first, eflags);
 846       if (BE (err != REG_NOERROR, 0))
 847         goto free_return;
 848 
 849 #ifdef RE_ENABLE_I18N
 850      /* Don't consider this char as a possible match start if it part,
 851         yet isn't the head, of a multibyte character.  */
 852       if (!sb && !re_string_first_byte (&mctx.input, 0))
 853         continue;
 854 #endif
 855 
 856       /* It seems to be appropriate one, then use the matcher.  */
 857       /* We assume that the matching starts from 0.  */
 858       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 859       match_last = check_matching (&mctx, fl_longest_match,
 860                                    start <= last_start ? &match_first : NULL);
 861       if (match_last != REG_MISSING)
 862         {
 863           if (BE (match_last == REG_ERROR, 0))
 864             {
 865               err = REG_ESPACE;
 866               goto free_return;
 867             }
 868           else
 869             {
 870               mctx.match_last = match_last;
 871               if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 872                 {
 873                   re_dfastate_t *pstate = mctx.state_log[match_last];
 874                   mctx.last_node = check_halt_state_context (&mctx, pstate,
 875                                                              match_last);
 876                 }
 877               if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 878                   || dfa->nbackref)
 879                 {
 880                   err = prune_impossible_nodes (&mctx);
 881                   if (err == REG_NOERROR)
 882                     break;
 883                   if (BE (err != REG_NOMATCH, 0))
 884                     goto free_return;
 885                   match_last = REG_MISSING;
 886                 }
 887               else
 888                 break; /* We found a match.  */
 889             }
 890         }
 891 
 892       match_ctx_clean (&mctx);
 893     }
 894 
 895 #ifdef DEBUG
 896   assert (match_last != REG_MISSING);
 897   assert (err == REG_NOERROR);
 898 #endif
 899 
 900   /* Set pmatch[] if we need.  */
 901   if (nmatch > 0)
 902     {
 903       Idx reg_idx;
 904 
 905       /* Initialize registers.  */
 906       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 907         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 908 
 909       /* Set the points where matching start/end.  */
 910       pmatch[0].rm_so = 0;
 911       pmatch[0].rm_eo = mctx.match_last;
 912       /* FIXME: This function should fail if mctx.match_last exceeds
 913          the maximum possible regoff_t value.  We need a new error
 914          code REG_OVERFLOW.  */
 915 
 916       if (!preg->no_sub && nmatch > 1)
 917         {
 918           err = set_regs (preg, &mctx, nmatch, pmatch,
 919                           dfa->has_plural_match && dfa->nbackref > 0);
 920           if (BE (err != REG_NOERROR, 0))
 921             goto free_return;
 922         }
 923 
 924       /* At last, add the offset to the each registers, since we slided
 925          the buffers so that we could assume that the matching starts
 926          from 0.  */
 927       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 928         if (pmatch[reg_idx].rm_so != -1)
 929           {
 930 #ifdef RE_ENABLE_I18N
 931             if (BE (mctx.input.offsets_needed != 0, 0))
 932               {
 933                 pmatch[reg_idx].rm_so =
 934                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
 935                    ? mctx.input.valid_raw_len
 936                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
 937                 pmatch[reg_idx].rm_eo =
 938                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
 939                    ? mctx.input.valid_raw_len
 940                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
 941               }
 942 #else
 943             assert (mctx.input.offsets_needed == 0);
 944 #endif
 945             pmatch[reg_idx].rm_so += match_first;
 946             pmatch[reg_idx].rm_eo += match_first;
 947           }
 948       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
 949         {
 950           pmatch[nmatch + reg_idx].rm_so = -1;
 951           pmatch[nmatch + reg_idx].rm_eo = -1;
 952         }
 953 
 954       if (dfa->subexp_map)
 955         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
 956           if (dfa->subexp_map[reg_idx] != reg_idx)
 957             {
 958               pmatch[reg_idx + 1].rm_so
 959                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
 960               pmatch[reg_idx + 1].rm_eo
 961                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
 962             }
 963     }
 964 
 965  free_return:
 966   re_free (mctx.state_log);
 967   if (dfa->nbackref)
 968     match_ctx_free (&mctx);
 969   re_string_destruct (&mctx.input);
 970   return err;
 971 }
 972 
 973 static reg_errcode_t
 974 internal_function
 975 prune_impossible_nodes (re_match_context_t *mctx)
 976 {
 977   const re_dfa_t *const dfa = mctx->dfa;
 978   Idx halt_node, match_last;
 979   reg_errcode_t ret;
 980   re_dfastate_t **sifted_states;
 981   re_dfastate_t **lim_states = NULL;
 982   re_sift_context_t sctx;
 983 #ifdef DEBUG
 984   assert (mctx->state_log != NULL);
 985 #endif
 986   match_last = mctx->match_last;
 987   halt_node = mctx->last_node;
 988 
 989   /* Avoid overflow.  */
 990   if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
 991     return REG_ESPACE;
 992 
 993   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 994   if (BE (sifted_states == NULL, 0))
 995     {
 996       ret = REG_ESPACE;
 997       goto free_return;
 998     }
 999   if (dfa->nbackref)
1000     {
1001       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
1002       if (BE (lim_states == NULL, 0))
1003         {
1004           ret = REG_ESPACE;
1005           goto free_return;
1006         }
1007       while (1)
1008         {
1009           memset (lim_states, '\0',
1010                   sizeof (re_dfastate_t *) * (match_last + 1));
1011           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1012                          match_last);
1013           ret = sift_states_backward (mctx, &sctx);
1014           re_node_set_free (&sctx.limits);
1015           if (BE (ret != REG_NOERROR, 0))
1016               goto free_return;
1017           if (sifted_states[0] != NULL || lim_states[0] != NULL)
1018             break;
1019           do
1020             {
1021               --match_last;
1022               if (! REG_VALID_INDEX (match_last))
1023                 {
1024                   ret = REG_NOMATCH;
1025                   goto free_return;
1026                 }
1027             } while (mctx->state_log[match_last] == NULL
1028                      || !mctx->state_log[match_last]->halt);
1029           halt_node = check_halt_state_context (mctx,
1030                                                 mctx->state_log[match_last],
1031                                                 match_last);
1032         }
1033       ret = merge_state_array (dfa, sifted_states, lim_states,
1034                                match_last + 1);
1035       re_free (lim_states);
1036       lim_states = NULL;
1037       if (BE (ret != REG_NOERROR, 0))
1038         goto free_return;
1039     }
1040   else
1041     {
1042       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1043       ret = sift_states_backward (mctx, &sctx);
1044       re_node_set_free (&sctx.limits);
1045       if (BE (ret != REG_NOERROR, 0))
1046         goto free_return;
1047     }
1048   re_free (mctx->state_log);
1049   mctx->state_log = sifted_states;
1050   sifted_states = NULL;
1051   mctx->last_node = halt_node;
1052   mctx->match_last = match_last;
1053   ret = REG_NOERROR;
1054  free_return:
1055   re_free (sifted_states);
1056   re_free (lim_states);
1057   return ret;
1058 }
1059 
1060 /* Acquire an initial state and return it.
1061    We must select appropriate initial state depending on the context,
1062    since initial states may have constraints like "\<", "^", etc..  */
1063 
1064 static inline re_dfastate_t *
1065 __attribute ((always_inline)) internal_function
1066 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1067                             Idx idx)
1068 {
1069   const re_dfa_t *const dfa = mctx->dfa;
1070   if (dfa->init_state->has_constraint)
1071     {
1072       unsigned int context;
1073       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1074       if (IS_WORD_CONTEXT (context))
1075         return dfa->init_state_word;
1076       else if (IS_ORDINARY_CONTEXT (context))
1077         return dfa->init_state;
1078       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1079         return dfa->init_state_begbuf;
1080       else if (IS_NEWLINE_CONTEXT (context))
1081         return dfa->init_state_nl;
1082       else if (IS_BEGBUF_CONTEXT (context))
1083         {
1084           /* It is relatively rare case, then calculate on demand.  */
1085           return re_acquire_state_context (err, dfa,
1086                                            dfa->init_state->entrance_nodes,
1087                                            context);
1088         }
1089       else
1090         /* Must not happen?  */
1091         return dfa->init_state;
1092     }
1093   else
1094     return dfa->init_state;
1095 }
1096 
1097 /* Check whether the regular expression match input string INPUT or not,
1098    and return the index where the matching end.  Return REG_MISSING if
1099    there is no match, and return REG_ERROR in case of an error.
1100    FL_LONGEST_MATCH means we want the POSIX longest matching.
1101    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1102    next place where we may want to try matching.
1103    Note that the matcher assume that the maching starts from the current
1104    index of the buffer.  */
1105 
1106 static Idx
1107 internal_function
1108 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1109                 Idx *p_match_first)
1110 {
1111   const re_dfa_t *const dfa = mctx->dfa;
1112   reg_errcode_t err;
1113   Idx match = 0;
1114   Idx match_last = REG_MISSING;
1115   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1116   re_dfastate_t *cur_state;
1117   bool at_init_state = p_match_first != NULL;
1118   Idx next_start_idx = cur_str_idx;
1119 
1120   err = REG_NOERROR;
1121   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1122   /* An initial state must not be NULL (invalid).  */
1123   if (BE (cur_state == NULL, 0))
1124     {
1125       assert (err == REG_ESPACE);
1126       return REG_ERROR;
1127     }
1128 
1129   if (mctx->state_log != NULL)
1130     {
1131       mctx->state_log[cur_str_idx] = cur_state;
1132 
1133       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1134          later.  E.g. Processing back references.  */
1135       if (BE (dfa->nbackref, 0))
1136         {
1137           at_init_state = false;
1138           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1139           if (BE (err != REG_NOERROR, 0))
1140             return err;
1141 
1142           if (cur_state->has_backref)
1143             {
1144               err = transit_state_bkref (mctx, &cur_state->nodes);
1145               if (BE (err != REG_NOERROR, 0))
1146                 return err;
1147             }
1148         }
1149     }
1150 
1151   /* If the RE accepts NULL string.  */
1152   if (BE (cur_state->halt, 0))
1153     {
1154       if (!cur_state->has_constraint
1155           || check_halt_state_context (mctx, cur_state, cur_str_idx))
1156         {
1157           if (!fl_longest_match)
1158             return cur_str_idx;
1159           else
1160             {
1161               match_last = cur_str_idx;
1162               match = 1;
1163             }
1164         }
1165     }
1166 
1167   while (!re_string_eoi (&mctx->input))
1168     {
1169       re_dfastate_t *old_state = cur_state;
1170       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1171 
1172       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1173           || (BE (next_char_idx >= mctx->input.valid_len, 0)
1174               && mctx->input.valid_len < mctx->input.len))
1175         {
1176           err = extend_buffers (mctx);
1177           if (BE (err != REG_NOERROR, 0))
1178             {
1179               assert (err == REG_ESPACE);
1180               return REG_ERROR;
1181             }
1182         }
1183 
1184       cur_state = transit_state (&err, mctx, cur_state);
1185       if (mctx->state_log != NULL)
1186         cur_state = merge_state_with_log (&err, mctx, cur_state);
1187 
1188       if (cur_state == NULL)
1189         {
1190           /* Reached the invalid state or an error.  Try to recover a valid
1191              state using the state log, if available and if we have not
1192              already found a valid (even if not the longest) match.  */
1193           if (BE (err != REG_NOERROR, 0))
1194             return REG_ERROR;
1195 
1196           if (mctx->state_log == NULL
1197               || (match && !fl_longest_match)
1198               || (cur_state = find_recover_state (&err, mctx)) == NULL)
1199             break;
1200         }
1201 
1202       if (BE (at_init_state, 0))
1203         {
1204           if (old_state == cur_state)
1205             next_start_idx = next_char_idx;
1206           else
1207             at_init_state = false;
1208         }
1209 
1210       if (cur_state->halt)
1211         {
1212           /* Reached a halt state.
1213              Check the halt state can satisfy the current context.  */
1214           if (!cur_state->has_constraint
1215               || check_halt_state_context (mctx, cur_state,
1216                                            re_string_cur_idx (&mctx->input)))
1217             {
1218               /* We found an appropriate halt state.  */
1219               match_last = re_string_cur_idx (&mctx->input);
1220               match = 1;
1221 
1222               /* We found a match, do not modify match_first below.  */
1223               p_match_first = NULL;
1224               if (!fl_longest_match)
1225                 break;
1226             }
1227         }
1228     }
1229 
1230   if (p_match_first)
1231     *p_match_first += next_start_idx;
1232 
1233   return match_last;
1234 }
1235 
1236 /* Check NODE match the current context.  */
1237 
1238 static bool
1239 internal_function
1240 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1241 {
1242   re_token_type_t type = dfa->nodes[node].type;
1243   unsigned int constraint = dfa->nodes[node].constraint;
1244   if (type != END_OF_RE)
1245     return false;
1246   if (!constraint)
1247     return true;
1248   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1249     return false;
1250   return true;
1251 }
1252 
1253 /* Check the halt state STATE match the current context.
1254    Return 0 if not match, if the node, STATE has, is a halt node and
1255    match the context, return the node.  */
1256 
1257 static Idx
1258 internal_function
1259 check_halt_state_context (const re_match_context_t *mctx,
1260                           const re_dfastate_t *state, Idx idx)
1261 {
1262   Idx i;
1263   unsigned int context;
1264 #ifdef DEBUG
1265   assert (state->halt);
1266 #endif
1267   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1268   for (i = 0; i < state->nodes.nelem; ++i)
1269     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1270       return state->nodes.elems[i];
1271   return 0;
1272 }
1273 
1274 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1275    corresponding to the DFA).
1276    Return the destination node, and update EPS_VIA_NODES;
1277    return REG_MISSING in case of errors.  */
1278 
1279 static Idx
1280 internal_function
1281 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1282                    Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1283                    struct re_fail_stack_t *fs)
1284 {
1285   const re_dfa_t *const dfa = mctx->dfa;
1286   Idx i;
1287   bool ok;
1288   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1289     {
1290       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1291       re_node_set *edests = &dfa->edests[node];
1292       Idx dest_node;
1293       ok = re_node_set_insert (eps_via_nodes, node);
1294       if (BE (! ok, 0))
1295         return REG_ERROR;
1296       /* Pick up a valid destination, or return REG_MISSING if none
1297          is found.  */
1298       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1299         {
1300           Idx candidate = edests->elems[i];
1301           if (!re_node_set_contains (cur_nodes, candidate))
1302             continue;
1303           if (dest_node == REG_MISSING)
1304             dest_node = candidate;
1305 
1306           else
1307             {
1308               /* In order to avoid infinite loop like "(a*)*", return the second
1309                  epsilon-transition if the first was already considered.  */
1310               if (re_node_set_contains (eps_via_nodes, dest_node))
1311                 return candidate;
1312 
1313               /* Otherwise, push the second epsilon-transition on the fail stack.  */
1314               else if (fs != NULL
1315                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1316                                            eps_via_nodes))
1317                 return REG_ERROR;
1318 
1319               /* We know we are going to exit.  */
1320               break;
1321             }
1322         }
1323       return dest_node;
1324     }
1325   else
1326     {
1327       Idx naccepted = 0;
1328       re_token_type_t type = dfa->nodes[node].type;
1329 
1330 #ifdef RE_ENABLE_I18N
1331       if (dfa->nodes[node].accept_mb)
1332         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1333       else
1334 #endif /* RE_ENABLE_I18N */
1335       if (type == OP_BACK_REF)
1336         {
1337           Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1338           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1339           if (fs != NULL)
1340             {
1341               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1342                 return REG_MISSING;
1343               else if (naccepted)
1344                 {
1345                   char *buf = (char *) re_string_get_buffer (&mctx->input);
1346                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1347                               naccepted) != 0)
1348                     return REG_MISSING;
1349                 }
1350             }
1351 
1352           if (naccepted == 0)
1353             {
1354               Idx dest_node;
1355               ok = re_node_set_insert (eps_via_nodes, node);
1356               if (BE (! ok, 0))
1357                 return REG_ERROR;
1358               dest_node = dfa->edests[node].elems[0];
1359               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1360                                         dest_node))
1361                 return dest_node;
1362             }
1363         }
1364 
1365       if (naccepted != 0
1366           || check_node_accept (mctx, dfa->nodes + node, *pidx))
1367         {
1368           Idx dest_node = dfa->nexts[node];
1369           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1370           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1371                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1372                                                dest_node)))
1373             return REG_MISSING;
1374           re_node_set_empty (eps_via_nodes);
1375           return dest_node;
1376         }
1377     }
1378   return REG_MISSING;
1379 }
1380 
1381 static reg_errcode_t
1382 internal_function
1383 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1384                  Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1385 {
1386   reg_errcode_t err;
1387   Idx num = fs->num++;
1388   if (fs->num == fs->alloc)
1389     {
1390       struct re_fail_stack_ent_t *new_array;
1391       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1392                                        * fs->alloc * 2));
1393       if (new_array == NULL)
1394         return REG_ESPACE;
1395       fs->alloc *= 2;
1396       fs->stack = new_array;
1397     }
1398   fs->stack[num].idx = str_idx;
1399   fs->stack[num].node = dest_node;
1400   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1401   if (fs->stack[num].regs == NULL)
1402     return REG_ESPACE;
1403   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1404   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1405   return err;
1406 }
1407 
1408 static Idx
1409 internal_function
1410 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1411                 regmatch_t *regs, re_node_set *eps_via_nodes)
1412 {
1413   Idx num = --fs->num;
1414   assert (REG_VALID_INDEX (num));
1415   *pidx = fs->stack[num].idx;
1416   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1417   re_node_set_free (eps_via_nodes);
1418   re_free (fs->stack[num].regs);
1419   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1420   return fs->stack[num].node;
1421 }
1422 
1423 /* Set the positions where the subexpressions are starts/ends to registers
1424    PMATCH.
1425    Note: We assume that pmatch[0] is already set, and
1426    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1427 
1428 static reg_errcode_t
1429 internal_function
1430 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1431           regmatch_t *pmatch, bool fl_backtrack)
1432 {
1433   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1434   Idx idx, cur_node;
1435   re_node_set eps_via_nodes;
1436   struct re_fail_stack_t *fs;
1437   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1438   regmatch_t *prev_idx_match;
1439   bool prev_idx_match_malloced = false;
1440 
1441 #ifdef DEBUG
1442   assert (nmatch > 1);
1443   assert (mctx->state_log != NULL);
1444 #endif
1445   if (fl_backtrack)
1446     {
1447       fs = &fs_body;
1448       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1449       if (fs->stack == NULL)
1450         return REG_ESPACE;
1451     }
1452   else
1453     fs = NULL;
1454 
1455   cur_node = dfa->init_node;
1456   re_node_set_init_empty (&eps_via_nodes);
1457 
1458   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1459     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1460   else
1461     {
1462       prev_idx_match = re_malloc (regmatch_t, nmatch);
1463       if (prev_idx_match == NULL)
1464         {
1465           free_fail_stack_return (fs);
1466           return REG_ESPACE;
1467         }
1468       prev_idx_match_malloced = true;
1469     }
1470   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1471 
1472   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1473     {
1474       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1475 
1476       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1477         {
1478           Idx reg_idx;
1479           if (fs)
1480             {
1481               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1482                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1483                   break;
1484               if (reg_idx == nmatch)
1485                 {
1486                   re_node_set_free (&eps_via_nodes);
1487                   if (prev_idx_match_malloced)
1488                     re_free (prev_idx_match);
1489                   return free_fail_stack_return (fs);
1490                 }
1491               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1492                                          &eps_via_nodes);
1493             }
1494           else
1495             {
1496               re_node_set_free (&eps_via_nodes);
1497               if (prev_idx_match_malloced)
1498                 re_free (prev_idx_match);
1499               return REG_NOERROR;
1500             }
1501         }
1502 
1503       /* Proceed to next node.  */
1504       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1505                                     &eps_via_nodes, fs);
1506 
1507       if (BE (! REG_VALID_INDEX (cur_node), 0))
1508         {
1509           if (BE (cur_node == REG_ERROR, 0))
1510             {
1511               re_node_set_free (&eps_via_nodes);
1512               if (prev_idx_match_malloced)
1513                 re_free (prev_idx_match);
1514               free_fail_stack_return (fs);
1515               return REG_ESPACE;
1516             }
1517           if (fs)
1518             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1519                                        &eps_via_nodes);
1520           else
1521             {
1522               re_node_set_free (&eps_via_nodes);
1523               if (prev_idx_match_malloced)
1524                 re_free (prev_idx_match);
1525               return REG_NOMATCH;
1526             }
1527         }
1528     }
1529   re_node_set_free (&eps_via_nodes);
1530   if (prev_idx_match_malloced)
1531     re_free (prev_idx_match);
1532   return free_fail_stack_return (fs);
1533 }
1534 
1535 static reg_errcode_t
1536 internal_function
1537 free_fail_stack_return (struct re_fail_stack_t *fs)
1538 {
1539   if (fs)
1540     {
1541       Idx fs_idx;
1542       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1543         {
1544           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1545           re_free (fs->stack[fs_idx].regs);
1546         }
1547       re_free (fs->stack);
1548     }
1549   return REG_NOERROR;
1550 }
1551 
1552 static void
1553 internal_function
1554 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1555              regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1556 {
1557   int type = dfa->nodes[cur_node].type;
1558   if (type == OP_OPEN_SUBEXP)
1559     {
1560       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1561 
1562       /* We are at the first node of this sub expression.  */
1563       if (reg_num < nmatch)
1564         {
1565           pmatch[reg_num].rm_so = cur_idx;
1566           pmatch[reg_num].rm_eo = -1;
1567         }
1568     }
1569   else if (type == OP_CLOSE_SUBEXP)
1570     {
1571       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1572       if (reg_num < nmatch)
1573         {
1574           /* We are at the last node of this sub expression.  */
1575           if (pmatch[reg_num].rm_so < cur_idx)
1576             {
1577               pmatch[reg_num].rm_eo = cur_idx;
1578               /* This is a non-empty match or we are not inside an optional
1579                  subexpression.  Accept this right away.  */
1580               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1581             }
1582           else
1583             {
1584               if (dfa->nodes[cur_node].opt_subexp
1585                   && prev_idx_match[reg_num].rm_so != -1)
1586                 /* We transited through an empty match for an optional
1587                    subexpression, like (a?)*, and this is not the subexp's
1588                    first match.  Copy back the old content of the registers
1589                    so that matches of an inner subexpression are undone as
1590                    well, like in ((a?))*.  */
1591                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1592               else
1593                 /* We completed a subexpression, but it may be part of
1594                    an optional one, so do not update PREV_IDX_MATCH.  */
1595                 pmatch[reg_num].rm_eo = cur_idx;
1596             }
1597         }
1598     }
1599 }
1600 
1601 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1602    and sift the nodes in each states according to the following rules.
1603    Updated state_log will be wrote to STATE_LOG.
1604 
1605    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1606      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1607         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1608         the LAST_NODE, we throw away the node `a'.
1609      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1610         string `s' and transit to `b':
1611         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1612            away the node `a'.
1613         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1614             thrown away, we throw away the node `a'.
1615      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1616         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1617            node `a'.
1618         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1619             we throw away the node `a'.  */
1620 
1621 #define STATE_NODE_CONTAINS(state,node) \
1622   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1623 
1624 static reg_errcode_t
1625 internal_function
1626 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1627 {
1628   reg_errcode_t err;
1629   int null_cnt = 0;
1630   Idx str_idx = sctx->last_str_idx;
1631   re_node_set cur_dest;
1632 
1633 #ifdef DEBUG
1634   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1635 #endif
1636 
1637   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1638      transit to the last_node and the last_node itself.  */
1639   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1640   if (BE (err != REG_NOERROR, 0))
1641     return err;
1642   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1643   if (BE (err != REG_NOERROR, 0))
1644     goto free_return;
1645 
1646   /* Then check each states in the state_log.  */
1647   while (str_idx > 0)
1648     {
1649       /* Update counters.  */
1650       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1651       if (null_cnt > mctx->max_mb_elem_len)
1652         {
1653           memset (sctx->sifted_states, '\0',
1654                   sizeof (re_dfastate_t *) * str_idx);
1655           re_node_set_free (&cur_dest);
1656           return REG_NOERROR;
1657         }
1658       re_node_set_empty (&cur_dest);
1659       --str_idx;
1660 
1661       if (mctx->state_log[str_idx])
1662         {
1663           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1664           if (BE (err != REG_NOERROR, 0))
1665             goto free_return;
1666         }
1667 
1668       /* Add all the nodes which satisfy the following conditions:
1669          - It can epsilon transit to a node in CUR_DEST.
1670          - It is in CUR_SRC.
1671          And update state_log.  */
1672       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1673       if (BE (err != REG_NOERROR, 0))
1674         goto free_return;
1675     }
1676   err = REG_NOERROR;
1677  free_return:
1678   re_node_set_free (&cur_dest);
1679   return err;
1680 }
1681 
1682 static reg_errcode_t
1683 internal_function
1684 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1685                      Idx str_idx, re_node_set *cur_dest)
1686 {
1687   const re_dfa_t *const dfa = mctx->dfa;
1688   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1689   Idx i;
1690 
1691   /* Then build the next sifted state.
1692      We build the next sifted state on `cur_dest', and update
1693      `sifted_states[str_idx]' with `cur_dest'.
1694      Note:
1695      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1696      `cur_src' points the node_set of the old `state_log[str_idx]'
1697      (with the epsilon nodes pre-filtered out).  */
1698   for (i = 0; i < cur_src->nelem; i++)
1699     {
1700       Idx prev_node = cur_src->elems[i];
1701       int naccepted = 0;
1702       bool ok;
1703 
1704 #ifdef DEBUG
1705       re_token_type_t type = dfa->nodes[prev_node].type;
1706       assert (!IS_EPSILON_NODE (type));
1707 #endif
1708 #ifdef RE_ENABLE_I18N
1709       /* If the node may accept `multi byte'.  */
1710       if (dfa->nodes[prev_node].accept_mb)
1711         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1712                                          str_idx, sctx->last_str_idx);
1713 #endif /* RE_ENABLE_I18N */
1714 
1715       /* We don't check backreferences here.
1716          See update_cur_sifted_state().  */
1717       if (!naccepted
1718           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1719           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1720                                   dfa->nexts[prev_node]))
1721         naccepted = 1;
1722 
1723       if (naccepted == 0)
1724         continue;
1725 
1726       if (sctx->limits.nelem)
1727         {
1728           Idx to_idx = str_idx + naccepted;
1729           if (check_dst_limits (mctx, &sctx->limits,
1730                                 dfa->nexts[prev_node], to_idx,
1731                                 prev_node, str_idx))
1732             continue;
1733         }
1734       ok = re_node_set_insert (cur_dest, prev_node);
1735       if (BE (! ok, 0))
1736         return REG_ESPACE;
1737     }
1738 
1739   return REG_NOERROR;
1740 }
1741 
1742 /* Helper functions.  */
1743 
1744 static reg_errcode_t
1745 internal_function
1746 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1747 {
1748   Idx top = mctx->state_log_top;
1749 
1750   if (next_state_log_idx >= mctx->input.bufs_len
1751       || (next_state_log_idx >= mctx->input.valid_len
1752           && mctx->input.valid_len < mctx->input.len))
1753     {
1754       reg_errcode_t err;
1755       err = extend_buffers (mctx);
1756       if (BE (err != REG_NOERROR, 0))
1757         return err;
1758     }
1759 
1760   if (top < next_state_log_idx)
1761     {
1762       memset (mctx->state_log + top + 1, '\0',
1763               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1764       mctx->state_log_top = next_state_log_idx;
1765     }
1766   return REG_NOERROR;
1767 }
1768 
1769 static reg_errcode_t
1770 internal_function
1771 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1772                    re_dfastate_t **src, Idx num)
1773 {
1774   Idx st_idx;
1775   reg_errcode_t err;
1776   for (st_idx = 0; st_idx < num; ++st_idx)
1777     {
1778       if (dst[st_idx] == NULL)
1779         dst[st_idx] = src[st_idx];
1780       else if (src[st_idx] != NULL)
1781         {
1782           re_node_set merged_set;
1783           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1784                                         &src[st_idx]->nodes);
1785           if (BE (err != REG_NOERROR, 0))
1786             return err;
1787           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1788           re_node_set_free (&merged_set);
1789           if (BE (err != REG_NOERROR, 0))
1790             return err;
1791         }
1792     }
1793   return REG_NOERROR;
1794 }
1795 
1796 static reg_errcode_t
1797 internal_function
1798 update_cur_sifted_state (const re_match_context_t *mctx,
1799                          re_sift_context_t *sctx, Idx str_idx,
1800                          re_node_set *dest_nodes)
1801 {
1802   const re_dfa_t *const dfa = mctx->dfa;
1803   reg_errcode_t err = REG_NOERROR;
1804   const re_node_set *candidates;
1805   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1806                 : &mctx->state_log[str_idx]->nodes);
1807 
1808   if (dest_nodes->nelem == 0)
1809     sctx->sifted_states[str_idx] = NULL;
1810   else
1811     {
1812       if (candidates)
1813         {
1814           /* At first, add the nodes which can epsilon transit to a node in
1815              DEST_NODE.  */
1816           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1817           if (BE (err != REG_NOERROR, 0))
1818             return err;
1819 
1820           /* Then, check the limitations in the current sift_context.  */
1821           if (sctx->limits.nelem)
1822             {
1823               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1824                                          mctx->bkref_ents, str_idx);
1825               if (BE (err != REG_NOERROR, 0))
1826                 return err;
1827             }
1828         }
1829 
1830       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1831       if (BE (err != REG_NOERROR, 0))
1832         return err;
1833     }
1834 
1835   if (candidates && mctx->state_log[str_idx]->has_backref)
1836     {
1837       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1838       if (BE (err != REG_NOERROR, 0))
1839         return err;
1840     }
1841   return REG_NOERROR;
1842 }
1843 
1844 static reg_errcode_t
1845 internal_function
1846 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1847                        const re_node_set *candidates)
1848 {
1849   reg_errcode_t err = REG_NOERROR;
1850   Idx i;
1851 
1852   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1853   if (BE (err != REG_NOERROR, 0))
1854     return err;
1855 
1856   if (!state->inveclosure.alloc)
1857     {
1858       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1859       if (BE (err != REG_NOERROR, 0))
1860         return REG_ESPACE;
1861       for (i = 0; i < dest_nodes->nelem; i++)
1862         re_node_set_merge (&state->inveclosure,
1863                            dfa->inveclosures + dest_nodes->elems[i]);
1864     }
1865   return re_node_set_add_intersect (dest_nodes, candidates,
1866                                     &state->inveclosure);
1867 }
1868 
1869 static reg_errcode_t
1870 internal_function
1871 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1872                        const re_node_set *candidates)
1873 {
1874     Idx ecl_idx;
1875     reg_errcode_t err;
1876     re_node_set *inv_eclosure = dfa->inveclosures + node;
1877     re_node_set except_nodes;
1878     re_node_set_init_empty (&except_nodes);
1879     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1880       {
1881         Idx cur_node = inv_eclosure->elems[ecl_idx];
1882         if (cur_node == node)
1883           continue;
1884         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1885           {
1886             Idx edst1 = dfa->edests[cur_node].elems[0];
1887             Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1888                          ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1889             if ((!re_node_set_contains (inv_eclosure, edst1)
1890                  && re_node_set_contains (dest_nodes, edst1))
1891                 || (REG_VALID_NONZERO_INDEX (edst2)
1892                     && !re_node_set_contains (inv_eclosure, edst2)
1893                     && re_node_set_contains (dest_nodes, edst2)))
1894               {
1895                 err = re_node_set_add_intersect (&except_nodes, candidates,
1896                                                  dfa->inveclosures + cur_node);
1897                 if (BE (err != REG_NOERROR, 0))
1898                   {
1899                     re_node_set_free (&except_nodes);
1900                     return err;
1901                   }
1902               }
1903           }
1904       }
1905     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1906       {
1907         Idx cur_node = inv_eclosure->elems[ecl_idx];
1908         if (!re_node_set_contains (&except_nodes, cur_node))
1909           {
1910             Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1911             re_node_set_remove_at (dest_nodes, idx);
1912           }
1913       }
1914     re_node_set_free (&except_nodes);
1915     return REG_NOERROR;
1916 }
1917 
1918 static bool
1919 internal_function
1920 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1921                   Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1922 {
1923   const re_dfa_t *const dfa = mctx->dfa;
1924   Idx lim_idx, src_pos, dst_pos;
1925 
1926   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1927   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1928   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1929     {
1930       Idx subexp_idx;
1931       struct re_backref_cache_entry *ent;
1932       ent = mctx->bkref_ents + limits->elems[lim_idx];
1933       subexp_idx = dfa->nodes[ent->node].opr.idx;
1934 
1935       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1936                                            subexp_idx, dst_node, dst_idx,
1937                                            dst_bkref_idx);
1938       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1939                                            subexp_idx, src_node, src_idx,
1940                                            src_bkref_idx);
1941 
1942       /* In case of:
1943          <src> <dst> ( <subexp> )
1944          ( <subexp> ) <src> <dst>
1945          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1946       if (src_pos == dst_pos)
1947         continue; /* This is unrelated limitation.  */
1948       else
1949         return true;
1950     }
1951   return false;
1952 }
1953 
1954 static int
1955 internal_function
1956 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1957                              Idx subexp_idx, Idx from_node, Idx bkref_idx)
1958 {
1959   const re_dfa_t *const dfa = mctx->dfa;
1960   const re_node_set *eclosures = dfa->eclosures + from_node;
1961   Idx node_idx;
1962 
1963   /* Else, we are on the boundary: examine the nodes on the epsilon
1964      closure.  */
1965   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1966     {
1967       Idx node = eclosures->elems[node_idx];
1968       switch (dfa->nodes[node].type)
1969         {
1970         case OP_BACK_REF:
1971           if (bkref_idx != REG_MISSING)
1972             {
1973               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1974               do
1975                 {
1976                   Idx dst;
1977                   int cpos;
1978 
1979                   if (ent->node != node)
1980                     continue;
1981 
1982                   if (subexp_idx < BITSET_WORD_BITS
1983                       && !(ent->eps_reachable_subexps_map
1984                            & ((bitset_word_t) 1 << subexp_idx)))
1985                     continue;
1986 
1987                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
1988                      OP_CLOSE_SUBEXP cases below.  But, if the
1989                      destination node is the same node as the source
1990                      node, don't recurse because it would cause an
1991                      infinite loop: a regex that exhibits this behavior
1992                      is ()\1*\1*  */
1993                   dst = dfa->edests[node].elems[0];
1994                   if (dst == from_node)
1995                     {
1996                       if (boundaries & 1)
1997                         return -1;
1998                       else /* if (boundaries & 2) */
1999                         return 0;
2000                     }
2001 
2002                   cpos =
2003                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2004                                                  dst, bkref_idx);
2005                   if (cpos == -1 /* && (boundaries & 1) */)
2006                     return -1;
2007                   if (cpos == 0 && (boundaries & 2))
2008                     return 0;
2009 
2010                   if (subexp_idx < BITSET_WORD_BITS)
2011                     ent->eps_reachable_subexps_map
2012                       &= ~((bitset_word_t) 1 << subexp_idx);
2013                 }
2014               while (ent++->more);
2015             }
2016           break;
2017 
2018         case OP_OPEN_SUBEXP:
2019           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2020             return -1;
2021           break;
2022 
2023         case OP_CLOSE_SUBEXP:
2024           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2025             return 0;
2026           break;
2027 
2028         default:
2029             break;
2030         }
2031     }
2032 
2033   return (boundaries & 2) ? 1 : 0;
2034 }
2035 
2036 static int
2037 internal_function
2038 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2039                            Idx subexp_idx, Idx from_node, Idx str_idx,
2040                            Idx bkref_idx)
2041 {
2042   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2043   int boundaries;
2044 
2045   /* If we are outside the range of the subexpression, return -1 or 1.  */
2046   if (str_idx < lim->subexp_from)
2047     return -1;
2048 
2049   if (lim->subexp_to < str_idx)
2050     return 1;
2051 
2052   /* If we are within the subexpression, return 0.  */
2053   boundaries = (str_idx == lim->subexp_from);
2054   boundaries |= (str_idx == lim->subexp_to) << 1;
2055   if (boundaries == 0)
2056     return 0;
2057 
2058   /* Else, examine epsilon closure.  */
2059   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2060                                       from_node, bkref_idx);
2061 }
2062 
2063 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2064    which are against limitations from DEST_NODES. */
2065 
2066 static reg_errcode_t
2067 internal_function
2068 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2069                      const re_node_set *candidates, re_node_set *limits,
2070                      struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2071 {
2072   reg_errcode_t err;
2073   Idx node_idx, lim_idx;
2074 
2075   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2076     {
2077       Idx subexp_idx;
2078       struct re_backref_cache_entry *ent;
2079       ent = bkref_ents + limits->elems[lim_idx];
2080 
2081       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2082         continue; /* This is unrelated limitation.  */
2083 
2084       subexp_idx = dfa->nodes[ent->node].opr.idx;
2085       if (ent->subexp_to == str_idx)
2086         {
2087           Idx ops_node = REG_MISSING;
2088           Idx cls_node = REG_MISSING;
2089           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2090             {
2091               Idx node = dest_nodes->elems[node_idx];
2092               re_token_type_t type = dfa->nodes[node].type;
2093               if (type == OP_OPEN_SUBEXP
2094                   && subexp_idx == dfa->nodes[node].opr.idx)
2095                 ops_node = node;
2096               else if (type == OP_CLOSE_SUBEXP
2097                        && subexp_idx == dfa->nodes[node].opr.idx)
2098                 cls_node = node;
2099             }
2100 
2101           /* Check the limitation of the open subexpression.  */
2102           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2103           if (REG_VALID_INDEX (ops_node))
2104             {
2105               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2106                                            candidates);
2107               if (BE (err != REG_NOERROR, 0))
2108                 return err;
2109             }
2110 
2111           /* Check the limitation of the close subexpression.  */
2112           if (REG_VALID_INDEX (cls_node))
2113             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2114               {
2115                 Idx node = dest_nodes->elems[node_idx];
2116                 if (!re_node_set_contains (dfa->inveclosures + node,
2117                                            cls_node)
2118                     && !re_node_set_contains (dfa->eclosures + node,
2119                                               cls_node))
2120                   {
2121                     /* It is against this limitation.
2122                        Remove it form the current sifted state.  */
2123                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2124                                                  candidates);
2125                     if (BE (err != REG_NOERROR, 0))
2126                       return err;
2127                     --node_idx;
2128                   }
2129               }
2130         }
2131       else /* (ent->subexp_to != str_idx)  */
2132         {
2133           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2134             {
2135               Idx node = dest_nodes->elems[node_idx];
2136               re_token_type_t type = dfa->nodes[node].type;
2137               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2138                 {
2139                   if (subexp_idx != dfa->nodes[node].opr.idx)
2140                     continue;
2141                   /* It is against this limitation.
2142                      Remove it form the current sifted state.  */
2143                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2144                                                candidates);
2145                   if (BE (err != REG_NOERROR, 0))
2146                     return err;
2147                 }
2148             }
2149         }
2150     }
2151   return REG_NOERROR;
2152 }
2153 
2154 static reg_errcode_t
2155 internal_function
2156 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2157                    Idx str_idx, const re_node_set *candidates)
2158 {
2159   const re_dfa_t *const dfa = mctx->dfa;
2160   reg_errcode_t err;
2161   Idx node_idx, node;
2162   re_sift_context_t local_sctx;
2163   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2164 
2165   if (first_idx == REG_MISSING)
2166     return REG_NOERROR;
2167 
2168   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2169 
2170   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2171     {
2172       Idx enabled_idx;
2173       re_token_type_t type;
2174       struct re_backref_cache_entry *entry;
2175       node = candidates->elems[node_idx];
2176       type = dfa->nodes[node].type;
2177       /* Avoid infinite loop for the REs like "()\1+".  */
2178       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2179         continue;
2180       if (type != OP_BACK_REF)
2181         continue;
2182 
2183       entry = mctx->bkref_ents + first_idx;
2184       enabled_idx = first_idx;
2185       do
2186         {
2187           Idx subexp_len;
2188           Idx to_idx;
2189           Idx dst_node;
2190           bool ok;
2191           re_dfastate_t *cur_state;
2192 
2193           if (entry->node != node)
2194             continue;
2195           subexp_len = entry->subexp_to - entry->subexp_from;
2196           to_idx = str_idx + subexp_len;
2197           dst_node = (subexp_len ? dfa->nexts[node]
2198                       : dfa->edests[node].elems[0]);
2199 
2200           if (to_idx > sctx->last_str_idx
2201               || sctx->sifted_states[to_idx] == NULL
2202               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2203               || check_dst_limits (mctx, &sctx->limits, node,
2204                                    str_idx, dst_node, to_idx))
2205             continue;
2206 
2207           if (local_sctx.sifted_states == NULL)
2208             {
2209               local_sctx = *sctx;
2210               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2211               if (BE (err != REG_NOERROR, 0))
2212                 goto free_return;
2213             }
2214           local_sctx.last_node = node;
2215           local_sctx.last_str_idx = str_idx;
2216           ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2217           if (BE (! ok, 0))
2218             {
2219               err = REG_ESPACE;
2220               goto free_return;
2221             }
2222           cur_state = local_sctx.sifted_states[str_idx];
2223           err = sift_states_backward (mctx, &local_sctx);
2224           if (BE (err != REG_NOERROR, 0))
2225             goto free_return;
2226           if (sctx->limited_states != NULL)
2227             {
2228               err = merge_state_array (dfa, sctx->limited_states,
2229                                        local_sctx.sifted_states,
2230                                        str_idx + 1);
2231               if (BE (err != REG_NOERROR, 0))
2232                 goto free_return;
2233             }
2234           local_sctx.sifted_states[str_idx] = cur_state;
2235           re_node_set_remove (&local_sctx.limits, enabled_idx);
2236 
2237           /* mctx->bkref_ents may have changed, reload the pointer.  */
2238           entry = mctx->bkref_ents + enabled_idx;
2239         }
2240       while (enabled_idx++, entry++->more);
2241     }
2242   err = REG_NOERROR;
2243  free_return:
2244   if (local_sctx.sifted_states != NULL)
2245     {
2246       re_node_set_free (&local_sctx.limits);
2247     }
2248 
2249   return err;
2250 }
2251 
2252 
2253 #ifdef RE_ENABLE_I18N
2254 static int
2255 internal_function
2256 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2257                      Idx node_idx, Idx str_idx, Idx max_str_idx)
2258 {
2259   const re_dfa_t *const dfa = mctx->dfa;
2260   int naccepted;
2261   /* Check the node can accept `multi byte'.  */
2262   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2263   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2264       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2265                             dfa->nexts[node_idx]))
2266     /* The node can't accept the `multi byte', or the
2267        destination was already thrown away, then the node
2268        could't accept the current input `multi byte'.   */
2269     naccepted = 0;
2270   /* Otherwise, it is sure that the node could accept
2271      `naccepted' bytes input.  */
2272   return naccepted;
2273 }
2274 #endif /* RE_ENABLE_I18N */
2275 
2276 
2277 /* Functions for state transition.  */
2278 
2279 /* Return the next state to which the current state STATE will transit by
2280    accepting the current input byte, and update STATE_LOG if necessary.
2281    If STATE can accept a multibyte char/collating element/back reference
2282    update the destination of STATE_LOG.  */
2283 
2284 static re_dfastate_t *
2285 internal_function
2286 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2287                re_dfastate_t *state)
2288 {
2289   re_dfastate_t **trtable;
2290   unsigned char ch;
2291 
2292 #ifdef RE_ENABLE_I18N
2293   /* If the current state can accept multibyte.  */
2294   if (BE (state->accept_mb, 0))
2295     {
2296       *err = transit_state_mb (mctx, state);
2297       if (BE (*err != REG_NOERROR, 0))
2298         return NULL;
2299     }
2300 #endif /* RE_ENABLE_I18N */
2301 
2302   /* Then decide the next state with the single byte.  */
2303 #if 0
2304   if (0)
2305     /* don't use transition table  */
2306     return transit_state_sb (err, mctx, state);
2307 #endif
2308 
2309   /* Use transition table  */
2310   ch = re_string_fetch_byte (&mctx->input);
2311   for (;;)
2312     {
2313       trtable = state->trtable;
2314       if (BE (trtable != NULL, 1))
2315         return trtable[ch];
2316 
2317       trtable = state->word_trtable;
2318       if (BE (trtable != NULL, 1))
2319         {
2320           unsigned int context;
2321           context
2322             = re_string_context_at (&mctx->input,
2323                                     re_string_cur_idx (&mctx->input) - 1,
2324                                     mctx->eflags);
2325           if (IS_WORD_CONTEXT (context))
2326             return trtable[ch + SBC_MAX];
2327           else
2328             return trtable[ch];
2329         }
2330 
2331       if (!build_trtable (mctx->dfa, state))
2332         {
2333           *err = REG_ESPACE;
2334           return NULL;
2335         }
2336 
2337       /* Retry, we now have a transition table.  */
2338     }
2339 }
2340 
2341 /* Update the state_log if we need */
2342 static re_dfastate_t *
2343 internal_function
2344 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2345                       re_dfastate_t *next_state)
2346 {
2347   const re_dfa_t *const dfa = mctx->dfa;
2348   Idx cur_idx = re_string_cur_idx (&mctx->input);
2349 
2350   if (cur_idx > mctx->state_log_top)
2351     {
2352       mctx->state_log[cur_idx] = next_state;
2353       mctx->state_log_top = cur_idx;
2354     }
2355   else if (mctx->state_log[cur_idx] == 0)
2356     {
2357       mctx->state_log[cur_idx] = next_state;
2358     }
2359   else
2360     {
2361       re_dfastate_t *pstate;
2362       unsigned int context;
2363       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2364       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2365          the destination of a multibyte char/collating element/
2366          back reference.  Then the next state is the union set of
2367          these destinations and the results of the transition table.  */
2368       pstate = mctx->state_log[cur_idx];
2369       log_nodes = pstate->entrance_nodes;
2370       if (next_state != NULL)
2371         {
2372           table_nodes = next_state->entrance_nodes;
2373           *err = re_node_set_init_union (&next_nodes, table_nodes,
2374                                              log_nodes);
2375           if (BE (*err != REG_NOERROR, 0))
2376             return NULL;
2377         }
2378       else
2379         next_nodes = *log_nodes;
2380       /* Note: We already add the nodes of the initial state,
2381          then we don't need to add them here.  */
2382 
2383       context = re_string_context_at (&mctx->input,
2384                                       re_string_cur_idx (&mctx->input) - 1,
2385                                       mctx->eflags);
2386       next_state = mctx->state_log[cur_idx]
2387         = re_acquire_state_context (err, dfa, &next_nodes, context);
2388       /* We don't need to check errors here, since the return value of
2389          this function is next_state and ERR is already set.  */
2390 
2391       if (table_nodes != NULL)
2392         re_node_set_free (&next_nodes);
2393     }
2394 
2395   if (BE (dfa->nbackref, 0) && next_state != NULL)
2396     {
2397       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2398          later.  We must check them here, since the back references in the
2399          next state might use them.  */
2400       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2401                                         cur_idx);
2402       if (BE (*err != REG_NOERROR, 0))
2403         return NULL;
2404 
2405       /* If the next state has back references.  */
2406       if (next_state->has_backref)
2407         {
2408           *err = transit_state_bkref (mctx, &next_state->nodes);
2409           if (BE (*err != REG_NOERROR, 0))
2410             return NULL;
2411           next_state = mctx->state_log[cur_idx];
2412         }
2413     }
2414 
2415   return next_state;
2416 }
2417 
2418 /* Skip bytes in the input that correspond to part of a
2419    multi-byte match, then look in the log for a state
2420    from which to restart matching.  */
2421 static re_dfastate_t *
2422 internal_function
2423 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2424 {
2425   re_dfastate_t *cur_state;
2426   do
2427     {
2428       Idx max = mctx->state_log_top;
2429       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2430 
2431       do
2432         {
2433           if (++cur_str_idx > max)
2434             return NULL;
2435           re_string_skip_bytes (&mctx->input, 1);
2436         }
2437       while (mctx->state_log[cur_str_idx] == NULL);
2438 
2439       cur_state = merge_state_with_log (err, mctx, NULL);
2440     }
2441   while (*err == REG_NOERROR && cur_state == NULL);
2442   return cur_state;
2443 }
2444 
2445 /* Helper functions for transit_state.  */
2446 
2447 /* From the node set CUR_NODES, pick up the nodes whose types are
2448    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2449    expression. And register them to use them later for evaluating the
2450    correspoding back references.  */
2451 
2452 static reg_errcode_t
2453 internal_function
2454 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2455                            Idx str_idx)
2456 {
2457   const re_dfa_t *const dfa = mctx->dfa;
2458   Idx node_idx;
2459   reg_errcode_t err;
2460 
2461   /* TODO: This isn't efficient.
2462            Because there might be more than one nodes whose types are
2463            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2464            nodes.
2465            E.g. RE: (a){2}  */
2466   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2467     {
2468       Idx node = cur_nodes->elems[node_idx];
2469       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2470           && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2471           && (dfa->used_bkref_map
2472               & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2473         {
2474           err = match_ctx_add_subtop (mctx, node, str_idx);
2475           if (BE (err != REG_NOERROR, 0))
2476             return err;
2477         }
2478     }
2479   return REG_NOERROR;
2480 }
2481 
2482 #if 0
2483 /* Return the next state to which the current state STATE will transit by
2484    accepting the current input byte.  */
2485 
2486 static re_dfastate_t *
2487 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2488                   re_dfastate_t *state)
2489 {
2490   const re_dfa_t *const dfa = mctx->dfa;
2491   re_node_set next_nodes;
2492   re_dfastate_t *next_state;
2493   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2494   unsigned int context;
2495 
2496   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2497   if (BE (*err != REG_NOERROR, 0))
2498     return NULL;
2499   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2500     {
2501       Idx cur_node = state->nodes.elems[node_cnt];
2502       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2503         {
2504           *err = re_node_set_merge (&next_nodes,
2505                                     dfa->eclosures + dfa->nexts[cur_node]);
2506           if (BE (*err != REG_NOERROR, 0))
2507             {
2508               re_node_set_free (&next_nodes);
2509               return NULL;
2510             }
2511         }
2512     }
2513   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2514   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2515   /* We don't need to check errors here, since the return value of
2516      this function is next_state and ERR is already set.  */
2517 
2518   re_node_set_free (&next_nodes);
2519   re_string_skip_bytes (&mctx->input, 1);
2520   return next_state;
2521 }
2522 #endif
2523 
2524 #ifdef RE_ENABLE_I18N
2525 static reg_errcode_t
2526 internal_function
2527 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2528 {
2529   const re_dfa_t *const dfa = mctx->dfa;
2530   reg_errcode_t err;
2531   Idx i;
2532 
2533   for (i = 0; i < pstate->nodes.nelem; ++i)
2534     {
2535       re_node_set dest_nodes, *new_nodes;
2536       Idx cur_node_idx = pstate->nodes.elems[i];
2537       int naccepted;
2538       Idx dest_idx;
2539       unsigned int context;
2540       re_dfastate_t *dest_state;
2541 
2542       if (!dfa->nodes[cur_node_idx].accept_mb)
2543         continue;
2544 
2545       if (dfa->nodes[cur_node_idx].constraint)
2546         {
2547           context = re_string_context_at (&mctx->input,
2548                                           re_string_cur_idx (&mctx->input),
2549                                           mctx->eflags);
2550           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2551                                            context))
2552             continue;
2553         }
2554 
2555       /* How many bytes the node can accept?  */
2556       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2557                                            re_string_cur_idx (&mctx->input));
2558       if (naccepted == 0)
2559         continue;
2560 
2561       /* The node can accepts `naccepted' bytes.  */
2562       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2563       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2564                                : mctx->max_mb_elem_len);
2565       err = clean_state_log_if_needed (mctx, dest_idx);
2566       if (BE (err != REG_NOERROR, 0))
2567         return err;
2568 #ifdef DEBUG
2569       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2570 #endif
2571       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2572 
2573       dest_state = mctx->state_log[dest_idx];
2574       if (dest_state == NULL)
2575         dest_nodes = *new_nodes;
2576       else
2577         {
2578           err = re_node_set_init_union (&dest_nodes,
2579                                         dest_state->entrance_nodes, new_nodes);
2580           if (BE (err != REG_NOERROR, 0))
2581             return err;
2582         }
2583       context = re_string_context_at (&mctx->input, dest_idx - 1,
2584                                       mctx->eflags);
2585       mctx->state_log[dest_idx]
2586         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2587       if (dest_state != NULL)
2588         re_node_set_free (&dest_nodes);
2589       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2590         return err;
2591     }
2592   return REG_NOERROR;
2593 }
2594 #endif /* RE_ENABLE_I18N */
2595 
2596 static reg_errcode_t
2597 internal_function
2598 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2599 {
2600   const re_dfa_t *const dfa = mctx->dfa;
2601   reg_errcode_t err;
2602   Idx i;
2603   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2604 
2605   for (i = 0; i < nodes->nelem; ++i)
2606     {
2607       Idx dest_str_idx, prev_nelem, bkc_idx;
2608       Idx node_idx = nodes->elems[i];
2609       unsigned int context;
2610       const re_token_t *node = dfa->nodes + node_idx;
2611       re_node_set *new_dest_nodes;
2612 
2613       /* Check whether `node' is a backreference or not.  */
2614       if (node->type != OP_BACK_REF)
2615         continue;
2616 
2617       if (node->constraint)
2618         {
2619           context = re_string_context_at (&mctx->input, cur_str_idx,
2620                                           mctx->eflags);
2621           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2622             continue;
2623         }
2624 
2625       /* `node' is a backreference.
2626          Check the substring which the substring matched.  */
2627       bkc_idx = mctx->nbkref_ents;
2628       err = get_subexp (mctx, node_idx, cur_str_idx);
2629       if (BE (err != REG_NOERROR, 0))
2630         goto free_return;
2631 
2632       /* And add the epsilon closures (which is `new_dest_nodes') of
2633          the backreference to appropriate state_log.  */
2634 #ifdef DEBUG
2635       assert (dfa->nexts[node_idx] != REG_MISSING);
2636 #endif
2637       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2638         {
2639           Idx subexp_len;
2640           re_dfastate_t *dest_state;
2641           struct re_backref_cache_entry *bkref_ent;
2642           bkref_ent = mctx->bkref_ents + bkc_idx;
2643           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2644             continue;
2645           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2646           new_dest_nodes = (subexp_len == 0
2647                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2648                             : dfa->eclosures + dfa->nexts[node_idx]);
2649           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2650                           - bkref_ent->subexp_from);
2651           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2652                                           mctx->eflags);
2653           dest_state = mctx->state_log[dest_str_idx];
2654           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2655                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2656           /* Add `new_dest_node' to state_log.  */
2657           if (dest_state == NULL)
2658             {
2659               mctx->state_log[dest_str_idx]
2660                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2661                                             context);
2662               if (BE (mctx->state_log[dest_str_idx] == NULL
2663                       && err != REG_NOERROR, 0))
2664                 goto free_return;
2665             }
2666           else
2667             {
2668               re_node_set dest_nodes;
2669               err = re_node_set_init_union (&dest_nodes,
2670                                             dest_state->entrance_nodes,
2671                                             new_dest_nodes);
2672               if (BE (err != REG_NOERROR, 0))
2673                 {
2674                   re_node_set_free (&dest_nodes);
2675                   goto free_return;
2676                 }
2677               mctx->state_log[dest_str_idx]
2678                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2679               re_node_set_free (&dest_nodes);
2680               if (BE (mctx->state_log[dest_str_idx] == NULL
2681                       && err != REG_NOERROR, 0))
2682                 goto free_return;
2683             }
2684           /* We need to check recursively if the backreference can epsilon
2685              transit.  */
2686           if (subexp_len == 0
2687               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2688             {
2689               err = check_subexp_matching_top (mctx, new_dest_nodes,
2690                                                cur_str_idx);
2691               if (BE (err != REG_NOERROR, 0))
2692                 goto free_return;
2693               err = transit_state_bkref (mctx, new_dest_nodes);
2694               if (BE (err != REG_NOERROR, 0))
2695                 goto free_return;
2696             }
2697         }
2698     }
2699   err = REG_NOERROR;
2700  free_return:
2701   return err;
2702 }
2703 
2704 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2705    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2706    Note that we might collect inappropriate candidates here.
2707    However, the cost of checking them strictly here is too high, then we
2708    delay these checking for prune_impossible_nodes().  */
2709 
2710 static reg_errcode_t
2711 internal_function
2712 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2713 {
2714   const re_dfa_t *const dfa = mctx->dfa;
2715   Idx subexp_num, sub_top_idx;
2716   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2717   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2718   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2719   if (cache_idx != REG_MISSING)
2720     {
2721       const struct re_backref_cache_entry *entry
2722         = mctx->bkref_ents + cache_idx;
2723       do
2724         if (entry->node == bkref_node)
2725           return REG_NOERROR; /* We already checked it.  */
2726       while (entry++->more);
2727     }
2728 
2729   subexp_num = dfa->nodes[bkref_node].opr.idx;
2730 
2731   /* For each sub expression  */
2732   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2733     {
2734       reg_errcode_t err;
2735       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2736       re_sub_match_last_t *sub_last;
2737       Idx sub_last_idx, sl_str, bkref_str_off;
2738 
2739       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2740         continue; /* It isn't related.  */
2741 
2742       sl_str = sub_top->str_idx;
2743       bkref_str_off = bkref_str_idx;
2744       /* At first, check the last node of sub expressions we already
2745          evaluated.  */
2746       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2747         {
2748           regoff_t sl_str_diff;
2749           sub_last = sub_top->lasts[sub_last_idx];
2750           sl_str_diff = sub_last->str_idx - sl_str;
2751           /* The matched string by the sub expression match with the substring
2752              at the back reference?  */
2753           if (sl_str_diff > 0)
2754             {
2755               if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2756                 {
2757                   /* Not enough chars for a successful match.  */
2758                   if (bkref_str_off + sl_str_diff > mctx->input.len)
2759                     break;
2760 
2761                   err = clean_state_log_if_needed (mctx,
2762                                                    bkref_str_off
2763                                                    + sl_str_diff);
2764                   if (BE (err != REG_NOERROR, 0))
2765                     return err;
2766                   buf = (const char *) re_string_get_buffer (&mctx->input);
2767                 }
2768               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2769                 /* We don't need to search this sub expression any more.  */
2770                 break;
2771             }
2772           bkref_str_off += sl_str_diff;
2773           sl_str += sl_str_diff;
2774           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2775                                 bkref_str_idx);
2776 
2777           /* Reload buf, since the preceding call might have reallocated
2778              the buffer.  */
2779           buf = (const char *) re_string_get_buffer (&mctx->input);
2780 
2781           if (err == REG_NOMATCH)
2782             continue;
2783           if (BE (err != REG_NOERROR, 0))
2784             return err;
2785         }
2786 
2787       if (sub_last_idx < sub_top->nlasts)
2788         continue;
2789       if (sub_last_idx > 0)
2790         ++sl_str;
2791       /* Then, search for the other last nodes of the sub expression.  */
2792       for (; sl_str <= bkref_str_idx; ++sl_str)
2793         {
2794           Idx cls_node;
2795           regoff_t sl_str_off;
2796           const re_node_set *nodes;
2797           sl_str_off = sl_str - sub_top->str_idx;
2798           /* The matched string by the sub expression match with the substring
2799              at the back reference?  */
2800           if (sl_str_off > 0)
2801             {
2802               if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2803                 {
2804                   /* If we are at the end of the input, we cannot match.  */
2805                   if (bkref_str_off >= mctx->input.len)
2806                     break;
2807 
2808                   err = extend_buffers (mctx);
2809                   if (BE (err != REG_NOERROR, 0))
2810                     return err;
2811 
2812                   buf = (const char *) re_string_get_buffer (&mctx->input);
2813                 }
2814               if (buf [bkref_str_off++] != buf[sl_str - 1])
2815                 break; /* We don't need to search this sub expression
2816                           any more.  */
2817             }
2818           if (mctx->state_log[sl_str] == NULL)
2819             continue;
2820           /* Does this state have a ')' of the sub expression?  */
2821           nodes = &mctx->state_log[sl_str]->nodes;
2822           cls_node = find_subexp_node (dfa, nodes, subexp_num,
2823                                        OP_CLOSE_SUBEXP);
2824           if (cls_node == REG_MISSING)
2825             continue; /* No.  */
2826           if (sub_top->path == NULL)
2827             {
2828               sub_top->path = calloc (sizeof (state_array_t),
2829                                       sl_str - sub_top->str_idx + 1);
2830               if (sub_top->path == NULL)
2831                 return REG_ESPACE;
2832             }
2833           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2834              in the current context?  */
2835           err = check_arrival (mctx, sub_top->path, sub_top->node,
2836                                sub_top->str_idx, cls_node, sl_str,
2837                                OP_CLOSE_SUBEXP);
2838           if (err == REG_NOMATCH)
2839               continue;
2840           if (BE (err != REG_NOERROR, 0))
2841               return err;
2842           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2843           if (BE (sub_last == NULL, 0))
2844             return REG_ESPACE;
2845           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2846                                 bkref_str_idx);
2847           if (err == REG_NOMATCH)
2848             continue;
2849         }
2850     }
2851   return REG_NOERROR;
2852 }
2853 
2854 /* Helper functions for get_subexp().  */
2855 
2856 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2857    If it can arrive, register the sub expression expressed with SUB_TOP
2858    and SUB_LAST.  */
2859 
2860 static reg_errcode_t
2861 internal_function
2862 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2863                 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2864 {
2865   reg_errcode_t err;
2866   Idx to_idx;
2867   /* Can the subexpression arrive the back reference?  */
2868   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2869                        sub_last->str_idx, bkref_node, bkref_str,
2870                        OP_OPEN_SUBEXP);
2871   if (err != REG_NOERROR)
2872     return err;
2873   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2874                              sub_last->str_idx);
2875   if (BE (err != REG_NOERROR, 0))
2876     return err;
2877   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2878   return clean_state_log_if_needed (mctx, to_idx);
2879 }
2880 
2881 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2882    Search '(' if FL_OPEN, or search ')' otherwise.
2883    TODO: This function isn't efficient...
2884          Because there might be more than one nodes whose types are
2885          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2886          nodes.
2887          E.g. RE: (a){2}  */
2888 
2889 static Idx
2890 internal_function
2891 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2892                   Idx subexp_idx, int type)
2893 {
2894   Idx cls_idx;
2895   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2896     {
2897       Idx cls_node = nodes->elems[cls_idx];
2898       const re_token_t *node = dfa->nodes + cls_node;
2899       if (node->type == type
2900           && node->opr.idx == subexp_idx)
2901         return cls_node;
2902     }
2903   return REG_MISSING;
2904 }
2905 
2906 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2907    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2908    heavily reused.
2909    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2910 
2911 static reg_errcode_t
2912 internal_function
2913 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2914                Idx top_str, Idx last_node, Idx last_str, int type)
2915 {
2916   const re_dfa_t *const dfa = mctx->dfa;
2917   reg_errcode_t err = REG_NOERROR;
2918   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2919   re_dfastate_t *cur_state = NULL;
2920   re_node_set *cur_nodes, next_nodes;
2921   re_dfastate_t **backup_state_log;
2922   unsigned int context;
2923 
2924   subexp_num = dfa->nodes[top_node].opr.idx;
2925   /* Extend the buffer if we need.  */
2926   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2927     {
2928       re_dfastate_t **new_array;
2929       Idx old_alloc = path->alloc;
2930       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
2931       if (BE (new_alloc < old_alloc, 0)
2932           || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2933         return REG_ESPACE;
2934       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2935       if (BE (new_array == NULL, 0))
2936         return REG_ESPACE;
2937       path->array = new_array;
2938       path->alloc = new_alloc;
2939       memset (new_array + old_alloc, '\0',
2940               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2941     }
2942 
2943   str_idx = path->next_idx ? path->next_idx : top_str;
2944 
2945   /* Temporary modify MCTX.  */
2946   backup_state_log = mctx->state_log;
2947   backup_cur_idx = mctx->input.cur_idx;
2948   mctx->state_log = path->array;
2949   mctx->input.cur_idx = str_idx;
2950 
2951   /* Setup initial node set.  */
2952   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2953   if (str_idx == top_str)
2954     {
2955       err = re_node_set_init_1 (&next_nodes, top_node);
2956       if (BE (err != REG_NOERROR, 0))
2957         return err;
2958       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2959       if (BE (err != REG_NOERROR, 0))
2960         {
2961           re_node_set_free (&next_nodes);
2962           return err;
2963         }
2964     }
2965   else
2966     {
2967       cur_state = mctx->state_log[str_idx];
2968       if (cur_state && cur_state->has_backref)
2969         {
2970           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2971           if (BE (err != REG_NOERROR, 0))
2972             return err;
2973         }
2974       else
2975         re_node_set_init_empty (&next_nodes);
2976     }
2977   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2978     {
2979       if (next_nodes.nelem)
2980         {
2981           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2982                                     subexp_num, type);
2983           if (BE (err != REG_NOERROR, 0))
2984             {
2985               re_node_set_free (&next_nodes);
2986               return err;
2987             }
2988         }
2989       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2990       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2991         {
2992           re_node_set_free (&next_nodes);
2993           return err;
2994         }
2995       mctx->state_log[str_idx] = cur_state;
2996     }
2997 
2998   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2999     {
3000       re_node_set_empty (&next_nodes);
3001       if (mctx->state_log[str_idx + 1])
3002         {
3003           err = re_node_set_merge (&next_nodes,
3004                                    &mctx->state_log[str_idx + 1]->nodes);
3005           if (BE (err != REG_NOERROR, 0))
3006             {
3007               re_node_set_free (&next_nodes);
3008               return err;
3009             }
3010         }
3011       if (cur_state)
3012         {
3013           err = check_arrival_add_next_nodes (mctx, str_idx,
3014                                               &cur_state->non_eps_nodes,
3015                                               &next_nodes);
3016           if (BE (err != REG_NOERROR, 0))
3017             {
3018               re_node_set_free (&next_nodes);
3019               return err;
3020             }
3021         }
3022       ++str_idx;
3023       if (next_nodes.nelem)
3024         {
3025           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3026           if (BE (err != REG_NOERROR, 0))
3027             {
3028               re_node_set_free (&next_nodes);
3029               return err;
3030             }
3031           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3032                                     subexp_num, type);
3033           if (BE (err != REG_NOERROR, 0))
3034             {
3035               re_node_set_free (&next_nodes);
3036               return err;
3037             }
3038         }
3039       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3040       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3041       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3042         {
3043           re_node_set_free (&next_nodes);
3044           return err;
3045         }
3046       mctx->state_log[str_idx] = cur_state;
3047       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3048     }
3049   re_node_set_free (&next_nodes);
3050   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3051                : &mctx->state_log[last_str]->nodes);
3052   path->next_idx = str_idx;
3053 
3054   /* Fix MCTX.  */
3055   mctx->state_log = backup_state_log;
3056   mctx->input.cur_idx = backup_cur_idx;
3057 
3058   /* Then check the current node set has the node LAST_NODE.  */
3059   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3060     return REG_NOERROR;
3061 
3062   return REG_NOMATCH;
3063 }
3064 
3065 /* Helper functions for check_arrival.  */
3066 
3067 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3068    to NEXT_NODES.
3069    TODO: This function is similar to the functions transit_state*(),
3070          however this function has many additional works.
3071          Can't we unify them?  */
3072 
3073 static reg_errcode_t
3074 internal_function
3075 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3076                               re_node_set *cur_nodes, re_node_set *next_nodes)
3077 {
3078   const re_dfa_t *const dfa = mctx->dfa;
3079   bool ok;
3080   Idx cur_idx;
3081   reg_errcode_t err = REG_NOERROR;
3082   re_node_set union_set;
3083   re_node_set_init_empty (&union_set);
3084   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3085     {
3086       int naccepted = 0;
3087       Idx cur_node = cur_nodes->elems[cur_idx];
3088 #ifdef DEBUG
3089       re_token_type_t type = dfa->nodes[cur_node].type;
3090       assert (!IS_EPSILON_NODE (type));
3091 #endif
3092 #ifdef RE_ENABLE_I18N
3093       /* If the node may accept `multi byte'.  */
3094       if (dfa->nodes[cur_node].accept_mb)
3095         {
3096           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3097                                                str_idx);
3098           if (naccepted > 1)
3099             {
3100               re_dfastate_t *dest_state;
3101               Idx next_node = dfa->nexts[cur_node];
3102               Idx next_idx = str_idx + naccepted;
3103               dest_state = mctx->state_log[next_idx];
3104               re_node_set_empty (&union_set);
3105               if (dest_state)
3106                 {
3107                   err = re_node_set_merge (&union_set, &dest_state->nodes);
3108                   if (BE (err != REG_NOERROR, 0))
3109                     {
3110                       re_node_set_free (&union_set);
3111                       return err;
3112                     }
3113                 }
3114               ok = re_node_set_insert (&union_set, next_node);
3115               if (BE (! ok, 0))
3116                 {
3117                   re_node_set_free (&union_set);
3118                   return REG_ESPACE;
3119                 }
3120               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3121                                                             &union_set);
3122               if (BE (mctx->state_log[next_idx] == NULL
3123                       && err != REG_NOERROR, 0))
3124                 {
3125                   re_node_set_free (&union_set);
3126                   return err;
3127                 }
3128             }
3129         }
3130 #endif /* RE_ENABLE_I18N */
3131       if (naccepted
3132           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3133         {
3134           ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3135           if (BE (! ok, 0))
3136             {
3137               re_node_set_free (&union_set);
3138               return REG_ESPACE;
3139             }
3140         }
3141     }
3142   re_node_set_free (&union_set);
3143   return REG_NOERROR;
3144 }
3145 
3146 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3147    CUR_NODES, however exclude the nodes which are:
3148     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3149     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3150 */
3151 
3152 static reg_errcode_t
3153 internal_function
3154 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3155                           Idx ex_subexp, int type)
3156 {
3157   reg_errcode_t err;
3158   Idx idx, outside_node;
3159   re_node_set new_nodes;
3160 #ifdef DEBUG
3161   assert (cur_nodes->nelem);
3162 #endif
3163   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3164   if (BE (err != REG_NOERROR, 0))
3165     return err;
3166   /* Create a new node set NEW_NODES with the nodes which are epsilon
3167      closures of the node in CUR_NODES.  */
3168 
3169   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3170     {
3171       Idx cur_node = cur_nodes->elems[idx];
3172       const re_node_set *eclosure = dfa->eclosures + cur_node;
3173       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3174       if (outside_node == REG_MISSING)
3175         {
3176           /* There are no problematic nodes, just merge them.  */
3177           err = re_node_set_merge (&new_nodes, eclosure);
3178           if (BE (err != REG_NOERROR, 0))
3179             {
3180               re_node_set_free (&new_nodes);
3181               return err;
3182             }
3183         }
3184       else
3185         {
3186           /* There are problematic nodes, re-calculate incrementally.  */
3187           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3188                                               ex_subexp, type);
3189           if (BE (err != REG_NOERROR, 0))
3190             {
3191               re_node_set_free (&new_nodes);
3192               return err;
3193             }
3194         }
3195     }
3196   re_node_set_free (cur_nodes);
3197   *cur_nodes = new_nodes;
3198   return REG_NOERROR;
3199 }
3200 
3201 /* Helper function for check_arrival_expand_ecl.
3202    Check incrementally the epsilon closure of TARGET, and if it isn't
3203    problematic append it to DST_NODES.  */
3204 
3205 static reg_errcode_t
3206 internal_function
3207 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3208                               Idx target, Idx ex_subexp, int type)
3209 {
3210   Idx cur_node;
3211   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3212     {
3213       bool ok;
3214 
3215       if (dfa->nodes[cur_node].type == type
3216           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3217         {
3218           if (type == OP_CLOSE_SUBEXP)
3219             {
3220               ok = re_node_set_insert (dst_nodes, cur_node);
3221               if (BE (! ok, 0))
3222                 return REG_ESPACE;
3223             }
3224           break;
3225         }
3226       ok = re_node_set_insert (dst_nodes, cur_node);
3227       if (BE (! ok, 0))
3228         return REG_ESPACE;
3229       if (dfa->edests[cur_node].nelem == 0)
3230         break;
3231       if (dfa->edests[cur_node].nelem == 2)
3232         {
3233           reg_errcode_t err;
3234           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3235                                               dfa->edests[cur_node].elems[1],
3236                                               ex_subexp, type);
3237           if (BE (err != REG_NOERROR, 0))
3238             return err;
3239         }
3240       cur_node = dfa->edests[cur_node].elems[0];
3241     }
3242   return REG_NOERROR;
3243 }
3244 
3245 
3246 /* For all the back references in the current state, calculate the
3247    destination of the back references by the appropriate entry
3248    in MCTX->BKREF_ENTS.  */
3249 
3250 static reg_errcode_t
3251 internal_function
3252 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3253                     Idx cur_str, Idx subexp_num, int type)
3254 {
3255   const re_dfa_t *const dfa = mctx->dfa;
3256   reg_errcode_t err;
3257   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3258   struct re_backref_cache_entry *ent;
3259 
3260   if (cache_idx_start == REG_MISSING)
3261     return REG_NOERROR;
3262 
3263  restart:
3264   ent = mctx->bkref_ents + cache_idx_start;
3265   do
3266     {
3267       Idx to_idx, next_node;
3268 
3269       /* Is this entry ENT is appropriate?  */
3270       if (!re_node_set_contains (cur_nodes, ent->node))
3271         continue; /* No.  */
3272 
3273       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3274       /* Calculate the destination of the back reference, and append it
3275          to MCTX->STATE_LOG.  */
3276       if (to_idx == cur_str)
3277         {
3278           /* The backreference did epsilon transit, we must re-check all the
3279              node in the current state.  */
3280           re_node_set new_dests;
3281           reg_errcode_t err2, err3;
3282           next_node = dfa->edests[ent->node].elems[0];
3283           if (re_node_set_contains (cur_nodes, next_node))
3284             continue;
3285           err = re_node_set_init_1 (&new_dests, next_node);
3286           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3287           err3 = re_node_set_merge (cur_nodes, &new_dests);
3288           re_node_set_free (&new_dests);
3289           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3290                   || err3 != REG_NOERROR, 0))
3291             {
3292               err = (err != REG_NOERROR ? err
3293                      : (err2 != REG_NOERROR ? err2 : err3));
3294               return err;
3295             }
3296           /* TODO: It is still inefficient...  */
3297           goto restart;
3298         }
3299       else
3300         {
3301           re_node_set union_set;
3302           next_node = dfa->nexts[ent->node];
3303           if (mctx->state_log[to_idx])
3304             {
3305               bool ok;
3306               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3307                                         next_node))
3308                 continue;
3309               err = re_node_set_init_copy (&union_set,
3310                                            &mctx->state_log[to_idx]->nodes);
3311               ok = re_node_set_insert (&union_set, next_node);
3312               if (BE (err != REG_NOERROR || ! ok, 0))
3313                 {
3314                   re_node_set_free (&union_set);
3315                   err = err != REG_NOERROR ? err : REG_ESPACE;
3316                   return err;
3317                 }
3318             }
3319           else
3320             {
3321               err = re_node_set_init_1 (&union_set, next_node);
3322               if (BE (err != REG_NOERROR, 0))
3323                 return err;
3324             }
3325           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3326           re_node_set_free (&union_set);
3327           if (BE (mctx->state_log[to_idx] == NULL
3328                   && err != REG_NOERROR, 0))
3329             return err;
3330         }
3331     }
3332   while (ent++->more);
3333   return REG_NOERROR;
3334 }
3335 
3336 /* Build transition table for the state.
3337    Return true if successful.  */
3338 
3339 static bool
3340 internal_function
3341 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3342 {
3343   reg_errcode_t err;
3344   Idx i, j;
3345   int ch;
3346   bool need_word_trtable = false;
3347   bitset_word_t elem, mask;
3348   bool dests_node_malloced = false;
3349   bool dest_states_malloced = false;
3350   Idx ndests; /* Number of the destination states from `state'.  */
3351   re_dfastate_t **trtable;
3352   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3353   re_node_set follows, *dests_node;
3354   bitset_t *dests_ch;
3355   bitset_t acceptable;
3356 
3357   struct dests_alloc
3358   {
3359     re_node_set dests_node[SBC_MAX];
3360     bitset_t dests_ch[SBC_MAX];
3361   } *dests_alloc;
3362 
3363   /* We build DFA states which corresponds to the destination nodes
3364      from `state'.  `dests_node[i]' represents the nodes which i-th
3365      destination state contains, and `dests_ch[i]' represents the
3366      characters which i-th destination state accepts.  */
3367   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3368     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3369   else
3370     {
3371       dests_alloc = re_malloc (struct dests_alloc, 1);
3372       if (BE (dests_alloc == NULL, 0))
3373         return false;
3374       dests_node_malloced = true;
3375     }
3376   dests_node = dests_alloc->dests_node;
3377   dests_ch = dests_alloc->dests_ch;
3378 
3379   /* Initialize transiton table.  */
3380   state->word_trtable = state->trtable = NULL;
3381 
3382   /* At first, group all nodes belonging to `state' into several
3383      destinations.  */
3384   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3385   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3386     {
3387       if (dests_node_malloced)
3388         free (dests_alloc);
3389       if (ndests == 0)
3390         {
3391           state->trtable = (re_dfastate_t **)
3392             calloc (sizeof (re_dfastate_t *), SBC_MAX);
3393           return true;
3394         }
3395       return false;
3396     }
3397 
3398   err = re_node_set_alloc (&follows, ndests + 1);
3399   if (BE (err != REG_NOERROR, 0))
3400     goto out_free;
3401 
3402   /* Avoid arithmetic overflow in size calculation.  */
3403   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3404             / (3 * sizeof (re_dfastate_t *)))
3405            < ndests),
3406           0))
3407     goto out_free;
3408 
3409   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3410                          + ndests * 3 * sizeof (re_dfastate_t *)))
3411     dest_states = (re_dfastate_t **)
3412       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3413   else
3414     {
3415       dest_states = (re_dfastate_t **)
3416         malloc (ndests * 3 * sizeof (re_dfastate_t *));
3417       if (BE (dest_states == NULL, 0))
3418         {
3419 out_free:
3420           if (dest_states_malloced)
3421             free (dest_states);
3422           re_node_set_free (&follows);
3423           for (i = 0; i < ndests; ++i)
3424             re_node_set_free (dests_node + i);
3425           if (dests_node_malloced)
3426             free (dests_alloc);
3427           return false;
3428         }
3429       dest_states_malloced = true;
3430     }
3431   dest_states_word = dest_states + ndests;
3432   dest_states_nl = dest_states_word + ndests;
3433   bitset_empty (acceptable);
3434 
3435   /* Then build the states for all destinations.  */
3436   for (i = 0; i < ndests; ++i)
3437     {
3438       Idx next_node;
3439       re_node_set_empty (&follows);
3440       /* Merge the follows of this destination states.  */
3441       for (j = 0; j < dests_node[i].nelem; ++j)
3442         {
3443           next_node = dfa->nexts[dests_node[i].elems[j]];
3444           if (next_node != REG_MISSING)
3445             {
3446               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3447               if (BE (err != REG_NOERROR, 0))
3448                 goto out_free;
3449             }
3450         }
3451       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3452       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3453         goto out_free;
3454       /* If the new state has context constraint,
3455          build appropriate states for these contexts.  */
3456       if (dest_states[i]->has_constraint)
3457         {
3458           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3459                                                           CONTEXT_WORD);
3460           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3461             goto out_free;
3462 
3463           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3464             need_word_trtable = true;
3465 
3466           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3467                                                         CONTEXT_NEWLINE);
3468           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3469             goto out_free;
3470         }
3471       else
3472         {
3473           dest_states_word[i] = dest_states[i];
3474           dest_states_nl[i] = dest_states[i];
3475         }
3476       bitset_merge (acceptable, dests_ch[i]);
3477     }
3478 
3479   if (!BE (need_word_trtable, 0))
3480     {
3481       /* We don't care about whether the following character is a word
3482          character, or we are in a single-byte character set so we can
3483          discern by looking at the character code: allocate a
3484          256-entry transition table.  */
3485       trtable = state->trtable =
3486         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3487       if (BE (trtable == NULL, 0))
3488         goto out_free;
3489 
3490       /* For all characters ch...:  */
3491       for (i = 0; i < BITSET_WORDS; ++i)
3492         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3493              elem;
3494              mask <<= 1, elem >>= 1, ++ch)
3495           if (BE (elem & 1, 0))
3496             {
3497               /* There must be exactly one destination which accepts
3498                  character ch.  See group_nodes_into_DFAstates.  */
3499               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3500                 ;
3501 
3502               /* j-th destination accepts the word character ch.  */
3503               if (dfa->word_char[i] & mask)
3504                 trtable[ch] = dest_states_word[j];
3505               else
3506                 trtable[ch] = dest_states[j];
3507             }
3508     }
3509   else
3510     {
3511       /* We care about whether the following character is a word
3512          character, and we are in a multi-byte character set: discern
3513          by looking at the character code: build two 256-entry
3514          transition tables, one starting at trtable[0] and one
3515          starting at trtable[SBC_MAX].  */
3516       trtable = state->word_trtable =
3517         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3518       if (BE (trtable == NULL, 0))
3519         goto out_free;
3520 
3521       /* For all characters ch...:  */
3522       for (i = 0; i < BITSET_WORDS; ++i)
3523         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3524              elem;
3525              mask <<= 1, elem >>= 1, ++ch)
3526           if (BE (elem & 1, 0))
3527             {
3528               /* There must be exactly one destination which accepts
3529                  character ch.  See group_nodes_into_DFAstates.  */
3530               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3531                 ;
3532 
3533               /* j-th destination accepts the word character ch.  */
3534               trtable[ch] = dest_states[j];
3535               trtable[ch + SBC_MAX] = dest_states_word[j];
3536             }
3537     }
3538 
3539   /* new line */
3540   if (bitset_contain (acceptable, NEWLINE_CHAR))
3541     {
3542       /* The current state accepts newline character.  */
3543       for (j = 0; j < ndests; ++j)
3544         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3545           {
3546             /* k-th destination accepts newline character.  */
3547             trtable[NEWLINE_CHAR] = dest_states_nl[j];
3548             if (need_word_trtable)
3549               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3550             /* There must be only one destination which accepts
3551                newline.  See group_nodes_into_DFAstates.  */
3552             break;
3553           }
3554     }
3555 
3556   if (dest_states_malloced)
3557     free (dest_states);
3558 
3559   re_node_set_free (&follows);
3560   for (i = 0; i < ndests; ++i)
3561     re_node_set_free (dests_node + i);
3562 
3563   if (dests_node_malloced)
3564     free (dests_alloc);
3565 
3566   return true;
3567 }
3568 
3569 /* Group all nodes belonging to STATE into several destinations.
3570    Then for all destinations, set the nodes belonging to the destination
3571    to DESTS_NODE[i] and set the characters accepted by the destination
3572    to DEST_CH[i].  This function return the number of destinations.  */
3573 
3574 static Idx
3575 internal_function
3576 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3577                             re_node_set *dests_node, bitset_t *dests_ch)
3578 {
3579   reg_errcode_t err;
3580   bool ok;
3581   Idx i, j, k;
3582   Idx ndests; /* Number of the destinations from `state'.  */
3583   bitset_t accepts; /* Characters a node can accept.  */
3584   const re_node_set *cur_nodes = &state->nodes;
3585   bitset_empty (accepts);
3586   ndests = 0;
3587 
3588   /* For all the nodes belonging to `state',  */
3589   for (i = 0; i < cur_nodes->nelem; ++i)
3590     {
3591       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3592       re_token_type_t type = node->type;
3593       unsigned int constraint = node->constraint;
3594 
3595       /* Enumerate all single byte character this node can accept.  */
3596       if (type == CHARACTER)
3597         bitset_set (accepts, node->opr.c);
3598       else if (type == SIMPLE_BRACKET)
3599         {
3600           bitset_merge (accepts, node->opr.sbcset);
3601         }
3602       else if (type == OP_PERIOD)
3603         {
3604 #ifdef RE_ENABLE_I18N
3605           if (dfa->mb_cur_max > 1)
3606             bitset_merge (accepts, dfa->sb_char);
3607           else
3608 #endif
3609             bitset_set_all (accepts);
3610           if (!(dfa->syntax & RE_DOT_NEWLINE))
3611             bitset_clear (accepts, '\n');
3612           if (dfa->syntax & RE_DOT_NOT_NULL)
3613             bitset_clear (accepts, '\0');
3614         }
3615 #ifdef RE_ENABLE_I18N
3616       else if (type == OP_UTF8_PERIOD)
3617         {
3618           if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3619             memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3620           else
3621             bitset_merge (accepts, utf8_sb_map);
3622           if (!(dfa->syntax & RE_DOT_NEWLINE))
3623             bitset_clear (accepts, '\n');
3624           if (dfa->syntax & RE_DOT_NOT_NULL)
3625             bitset_clear (accepts, '\0');
3626         }
3627 #endif
3628       else
3629         continue;
3630 
3631       /* Check the `accepts' and sift the characters which are not
3632          match it the context.  */
3633       if (constraint)
3634         {
3635           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3636             {
3637               bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3638               bitset_empty (accepts);
3639               if (accepts_newline)
3640                 bitset_set (accepts, NEWLINE_CHAR);
3641               else
3642                 continue;
3643             }
3644           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3645             {
3646               bitset_empty (accepts);
3647               continue;
3648             }
3649 
3650           if (constraint & NEXT_WORD_CONSTRAINT)
3651             {
3652               bitset_word_t any_set = 0;
3653               if (type == CHARACTER && !node->word_char)
3654                 {
3655                   bitset_empty (accepts);
3656                   continue;
3657                 }
3658 #ifdef RE_ENABLE_I18N
3659               if (dfa->mb_cur_max > 1)
3660                 for (j = 0; j < BITSET_WORDS; ++j)
3661                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3662               else
3663 #endif
3664                 for (j = 0; j < BITSET_WORDS; ++j)
3665                   any_set |= (accepts[j] &= dfa->word_char[j]);
3666               if (!any_set)
3667                 continue;
3668             }
3669           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3670             {
3671               bitset_word_t any_set = 0;
3672               if (type == CHARACTER && node->word_char)
3673                 {
3674                   bitset_empty (accepts);
3675                   continue;
3676                 }
3677 #ifdef RE_ENABLE_I18N
3678               if (dfa->mb_cur_max > 1)
3679                 for (j = 0; j < BITSET_WORDS; ++j)
3680                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3681               else
3682 #endif
3683                 for (j = 0; j < BITSET_WORDS; ++j)
3684                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3685               if (!any_set)
3686                 continue;
3687             }
3688         }
3689 
3690       /* Then divide `accepts' into DFA states, or create a new
3691          state.  Above, we make sure that accepts is not empty.  */
3692       for (j = 0; j < ndests; ++j)
3693         {
3694           bitset_t intersec; /* Intersection sets, see below.  */
3695           bitset_t remains;
3696           /* Flags, see below.  */
3697           bitset_word_t has_intersec, not_subset, not_consumed;
3698 
3699           /* Optimization, skip if this state doesn't accept the character.  */
3700           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3701             continue;
3702 
3703           /* Enumerate the intersection set of this state and `accepts'.  */
3704           has_intersec = 0;
3705           for (k = 0; k < BITSET_WORDS; ++k)
3706             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3707           /* And skip if the intersection set is empty.  */
3708           if (!has_intersec)
3709             continue;
3710 
3711           /* Then check if this state is a subset of `accepts'.  */
3712           not_subset = not_consumed = 0;
3713           for (k = 0; k < BITSET_WORDS; ++k)
3714             {
3715               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3716               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3717             }
3718 
3719           /* If this state isn't a subset of `accepts', create a
3720              new group state, which has the `remains'. */
3721           if (not_subset)
3722             {
3723               bitset_copy (dests_ch[ndests], remains);
3724               bitset_copy (dests_ch[j], intersec);
3725               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3726               if (BE (err != REG_NOERROR, 0))
3727                 goto error_return;
3728               ++ndests;
3729             }
3730 
3731           /* Put the position in the current group. */
3732           ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3733           if (BE (! ok, 0))
3734             goto error_return;
3735 
3736           /* If all characters are consumed, go to next node. */
3737           if (!not_consumed)
3738             break;
3739         }
3740       /* Some characters remain, create a new group. */
3741       if (j == ndests)
3742         {
3743           bitset_copy (dests_ch[ndests], accepts);
3744           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3745           if (BE (err != REG_NOERROR, 0))
3746             goto error_return;
3747           ++ndests;
3748           bitset_empty (accepts);
3749         }
3750     }
3751   return ndests;
3752  error_return:
3753   for (j = 0; j < ndests; ++j)
3754     re_node_set_free (dests_node + j);
3755   return REG_MISSING;
3756 }
3757 
3758 #ifdef RE_ENABLE_I18N
3759 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3760    Return the number of the bytes the node accepts.
3761    STR_IDX is the current index of the input string.
3762 
3763    This function handles the nodes which can accept one character, or
3764    one collating element like '.', '[a-z]', opposite to the other nodes
3765    can only accept one byte.  */
3766 
3767 static int
3768 internal_function
3769 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3770                          const re_string_t *input, Idx str_idx)
3771 {
3772   const re_token_t *node = dfa->nodes + node_idx;
3773   int char_len, elem_len;
3774   Idx i;
3775 
3776   if (BE (node->type == OP_UTF8_PERIOD, 0))
3777     {
3778       unsigned char c = re_string_byte_at (input, str_idx), d;
3779       if (BE (c < 0xc2, 1))
3780         return 0;
3781 
3782       if (str_idx + 2 > input->len)
3783         return 0;
3784 
3785       d = re_string_byte_at (input, str_idx + 1);
3786       if (c < 0xe0)
3787         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3788       else if (c < 0xf0)
3789         {
3790           char_len = 3;
3791           if (c == 0xe0 && d < 0xa0)
3792             return 0;
3793         }
3794       else if (c < 0xf8)
3795         {
3796           char_len = 4;
3797           if (c == 0xf0 && d < 0x90)
3798             return 0;
3799         }
3800       else if (c < 0xfc)
3801         {
3802           char_len = 5;
3803           if (c == 0xf8 && d < 0x88)
3804             return 0;
3805         }
3806       else if (c < 0xfe)
3807         {
3808           char_len = 6;
3809           if (c == 0xfc && d < 0x84)
3810             return 0;
3811         }
3812       else
3813         return 0;
3814 
3815       if (str_idx + char_len > input->len)
3816         return 0;
3817 
3818       for (i = 1; i < char_len; ++i)
3819         {
3820           d = re_string_byte_at (input, str_idx + i);
3821           if (d < 0x80 || d > 0xbf)
3822             return 0;
3823         }
3824       return char_len;
3825     }
3826 
3827   char_len = re_string_char_size_at (input, str_idx);
3828   if (node->type == OP_PERIOD)
3829     {
3830       if (char_len <= 1)
3831         return 0;
3832       /* FIXME: I don't think this if is needed, as both '\n'
3833          and '\0' are char_len == 1.  */
3834       /* '.' accepts any one character except the following two cases.  */
3835       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3836            re_string_byte_at (input, str_idx) == '\n') ||
3837           ((dfa->syntax & RE_DOT_NOT_NULL) &&
3838            re_string_byte_at (input, str_idx) == '\0'))
3839         return 0;
3840       return char_len;
3841     }
3842 
3843   elem_len = re_string_elem_size_at (input, str_idx);
3844   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3845     return 0;
3846 
3847   if (node->type == COMPLEX_BRACKET)
3848     {
3849       const re_charset_t *cset = node->opr.mbcset;
3850 # ifdef _LIBC
3851       const unsigned char *pin
3852         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3853       Idx j;
3854       uint32_t nrules;
3855 # endif /* _LIBC */
3856       int match_len = 0;
3857       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3858                     ? re_string_wchar_at (input, str_idx) : 0);
3859 
3860       /* match with multibyte character?  */
3861       for (i = 0; i < cset->nmbchars; ++i)
3862         if (wc == cset->mbchars[i])
3863           {
3864             match_len = char_len;
3865             goto check_node_accept_bytes_match;
3866           }
3867       /* match with character_class?  */
3868       for (i = 0; i < cset->nchar_classes; ++i)
3869         {
3870           wctype_t wt = cset->char_classes[i];
3871           if (__iswctype (wc, wt))
3872             {
3873               match_len = char_len;
3874               goto check_node_accept_bytes_match;
3875             }
3876         }
3877 
3878 # ifdef _LIBC
3879       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3880       if (nrules != 0)
3881         {
3882           unsigned int in_collseq = 0;
3883           const int32_t *table, *indirect;
3884           const unsigned char *weights, *extra;
3885           const char *collseqwc;
3886           int32_t idx;
3887           /* This #include defines a local function!  */
3888 #  include <locale/weight.h>
3889 
3890           /* match with collating_symbol?  */
3891           if (cset->ncoll_syms)
3892             extra = (const unsigned char *)
3893               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3894           for (i = 0; i < cset->ncoll_syms; ++i)
3895             {
3896               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3897               /* Compare the length of input collating element and
3898                  the length of current collating element.  */
3899               if (*coll_sym != elem_len)
3900                 continue;
3901               /* Compare each bytes.  */
3902               for (j = 0; j < *coll_sym; j++)
3903                 if (pin[j] != coll_sym[1 + j])
3904                   break;
3905               if (j == *coll_sym)
3906                 {
3907                   /* Match if every bytes is equal.  */
3908                   match_len = j;
3909                   goto check_node_accept_bytes_match;
3910                 }
3911             }
3912 
3913           if (cset->nranges)
3914             {
3915               if (elem_len <= char_len)
3916                 {
3917                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3918                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3919                 }
3920               else
3921                 in_collseq = find_collation_sequence_value (pin, elem_len);
3922             }
3923           /* match with range expression?  */
3924           for (i = 0; i < cset->nranges; ++i)
3925             if (cset->range_starts[i] <= in_collseq
3926                 && in_collseq <= cset->range_ends[i])
3927               {
3928                 match_len = elem_len;
3929                 goto check_node_accept_bytes_match;
3930               }
3931 
3932           /* match with equivalence_class?  */
3933           if (cset->nequiv_classes)
3934             {
3935               const unsigned char *cp = pin;
3936               table = (const int32_t *)
3937                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3938               weights = (const unsigned char *)
3939                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3940               extra = (const unsigned char *)
3941                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3942               indirect = (const int32_t *)
3943                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3944               idx = findidx (&cp);
3945               if (idx > 0)
3946                 for (i = 0; i < cset->nequiv_classes; ++i)
3947                   {
3948                     int32_t equiv_class_idx = cset->equiv_classes[i];
3949                     size_t weight_len = weights[idx];
3950                     if (weight_len == weights[equiv_class_idx])
3951                       {
3952                         Idx cnt = 0;
3953                         while (cnt <= weight_len
3954                                && (weights[equiv_class_idx + 1 + cnt]
3955                                    == weights[idx + 1 + cnt]))
3956                           ++cnt;
3957                         if (cnt > weight_len)
3958                           {
3959                             match_len = elem_len;
3960                             goto check_node_accept_bytes_match;
3961                           }
3962                       }
3963                   }
3964             }
3965         }
3966       else
3967 # endif /* _LIBC */
3968         {
3969           /* match with range expression?  */
3970 #if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
3971           wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3972 #else
3973           wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3974           cmp_buf[2] = wc;
3975 #endif
3976           for (i = 0; i < cset->nranges; ++i)
3977             {
3978               cmp_buf[0] = cset->range_starts[i];
3979               cmp_buf[4] = cset->range_ends[i];
3980               if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3981                   && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3982                 {
3983                   match_len = char_len;
3984                   goto check_node_accept_bytes_match;
3985                 }
3986             }
3987         }
3988     check_node_accept_bytes_match:
3989       if (!cset->non_match)
3990         return match_len;
3991       else
3992         {
3993           if (match_len > 0)
3994             return 0;
3995           else
3996             return (elem_len > char_len) ? elem_len : char_len;
3997         }
3998     }
3999   return 0;
4000 }
4001 
4002 # ifdef _LIBC
4003 static unsigned int
4004 internal_function
4005 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4006 {
4007   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4008   if (nrules == 0)
4009     {
4010       if (mbs_len == 1)
4011         {
4012           /* No valid character.  Match it as a single byte character.  */
4013           const unsigned char *collseq = (const unsigned char *)
4014             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4015           return collseq[mbs[0]];
4016         }
4017       return UINT_MAX;
4018     }
4019   else
4020     {
4021       int32_t idx;
4022       const unsigned char *extra = (const unsigned char *)
4023         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4024       int32_t extrasize = (const unsigned char *)
4025         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4026 
4027       for (idx = 0; idx < extrasize;)
4028         {
4029           int mbs_cnt;
4030           bool found = false;
4031           int32_t elem_mbs_len;
4032           /* Skip the name of collating element name.  */
4033           idx = idx + extra[idx] + 1;
4034           elem_mbs_len = extra[idx++];
4035           if (mbs_len == elem_mbs_len)
4036             {
4037               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4038                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4039                   break;
4040               if (mbs_cnt == elem_mbs_len)
4041                 /* Found the entry.  */
4042                 found = true;
4043             }
4044           /* Skip the byte sequence of the collating element.  */
4045           idx += elem_mbs_len;
4046           /* Adjust for the alignment.  */
4047           idx = (idx + 3) & ~3;
4048           /* Skip the collation sequence value.  */
4049           idx += sizeof (uint32_t);
4050           /* Skip the wide char sequence of the collating element.  */
4051           idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4052           /* If we found the entry, return the sequence value.  */
4053           if (found)
4054             return *(uint32_t *) (extra + idx);
4055           /* Skip the collation sequence value.  */
4056           idx += sizeof (uint32_t);
4057         }
4058       return UINT_MAX;
4059     }
4060 }
4061 # endif /* _LIBC */
4062 #endif /* RE_ENABLE_I18N */
4063 
4064 /* Check whether the node accepts the byte which is IDX-th
4065    byte of the INPUT.  */
4066 
4067 static bool
4068 internal_function
4069 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4070                    Idx idx)
4071 {
4072   unsigned char ch;
4073   ch = re_string_byte_at (&mctx->input, idx);
4074   switch (node->type)
4075     {
4076     case CHARACTER:
4077       if (node->opr.c != ch)
4078         return false;
4079       break;
4080 
4081     case SIMPLE_BRACKET:
4082       if (!bitset_contain (node->opr.sbcset, ch))
4083         return false;
4084       break;
4085 
4086 #ifdef RE_ENABLE_I18N
4087     case OP_UTF8_PERIOD:
4088       if (ch >= ASCII_CHARS)
4089         return false;
4090       /* FALLTHROUGH */
4091 #endif
4092     case OP_PERIOD:
4093       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4094           || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4095         return false;
4096       break;
4097 
4098     default:
4099       return false;
4100     }
4101 
4102   if (node->constraint)
4103     {
4104       /* The node has constraints.  Check whether the current context
4105          satisfies the constraints.  */
4106       unsigned int context = re_string_context_at (&mctx->input, idx,
4107                                                    mctx->eflags);
4108       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4109         return false;
4110     }
4111 
4112   return true;
4113 }
4114 
4115 /* Extend the buffers, if the buffers have run out.  */
4116 
4117 static reg_errcode_t
4118 internal_function
4119 extend_buffers (re_match_context_t *mctx)
4120 {
4121   reg_errcode_t ret;
4122   re_string_t *pstr = &mctx->input;
4123 
4124   /* Avoid overflow.  */
4125   if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4126     return REG_ESPACE;
4127 
4128   /* Double the lengthes of the buffers.  */
4129   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4130   if (BE (ret != REG_NOERROR, 0))
4131     return ret;
4132 
4133   if (mctx->state_log != NULL)
4134     {
4135       /* And double the length of state_log.  */
4136       /* XXX We have no indication of the size of this buffer.  If this
4137          allocation fail we have no indication that the state_log array
4138          does not have the right size.  */
4139       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4140                                               pstr->bufs_len + 1);
4141       if (BE (new_array == NULL, 0))
4142         return REG_ESPACE;
4143       mctx->state_log = new_array;
4144     }
4145 
4146   /* Then reconstruct the buffers.  */
4147   if (pstr->icase)
4148     {
4149 #ifdef RE_ENABLE_I18N
4150       if (pstr->mb_cur_max > 1)
4151         {
4152           ret = build_wcs_upper_buffer (pstr);
4153           if (BE (ret != REG_NOERROR, 0))
4154             return ret;
4155         }
4156       else
4157 #endif /* RE_ENABLE_I18N  */
4158         build_upper_buffer (pstr);
4159     }
4160   else
4161     {
4162 #ifdef RE_ENABLE_I18N
4163       if (pstr->mb_cur_max > 1)
4164         build_wcs_buffer (pstr);
4165       else
4166 #endif /* RE_ENABLE_I18N  */
4167         {
4168           if (pstr->trans != NULL)
4169             re_string_translate_buffer (pstr);
4170         }
4171     }
4172   return REG_NOERROR;
4173 }
4174 
4175 
4176 /* Functions for matching context.  */
4177 
4178 /* Initialize MCTX.  */
4179 
4180 static reg_errcode_t
4181 internal_function
4182 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4183 {
4184   mctx->eflags = eflags;
4185   mctx->match_last = REG_MISSING;
4186   if (n > 0)
4187     {
4188       /* Avoid overflow.  */
4189       size_t max_object_size =
4190         MAX (sizeof (struct re_backref_cache_entry),
4191              sizeof (re_sub_match_top_t *));
4192       if (BE (SIZE_MAX / max_object_size < n, 0))
4193         return REG_ESPACE;
4194 
4195       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4196       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4197       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4198         return REG_ESPACE;
4199     }
4200   /* Already zero-ed by the caller.
4201      else
4202        mctx->bkref_ents = NULL;
4203      mctx->nbkref_ents = 0;
4204      mctx->nsub_tops = 0;  */
4205   mctx->abkref_ents = n;
4206   mctx->max_mb_elem_len = 1;
4207   mctx->asub_tops = n;
4208   return REG_NOERROR;
4209 }
4210 
4211 /* Clean the entries which depend on the current input in MCTX.
4212    This function must be invoked when the matcher changes the start index
4213    of the input, or changes the input string.  */
4214 
4215 static void
4216 internal_function
4217 match_ctx_clean (re_match_context_t *mctx)
4218 {
4219   Idx st_idx;
4220   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4221     {
4222       Idx sl_idx;
4223       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4224       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4225         {
4226           re_sub_match_last_t *last = top->lasts[sl_idx];
4227           re_free (last->path.array);
4228           re_free (last);
4229         }
4230       re_free (top->lasts);
4231       if (top->path)
4232         {
4233           re_free (top->path->array);
4234           re_free (top->path);
4235         }
4236       free (top);
4237     }
4238 
4239   mctx->nsub_tops = 0;
4240   mctx->nbkref_ents = 0;
4241 }
4242 
4243 /* Free all the memory associated with MCTX.  */
4244 
4245 static void
4246 internal_function
4247 match_ctx_free (re_match_context_t *mctx)
4248 {
4249   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4250   match_ctx_clean (mctx);
4251   re_free (mctx->sub_tops);
4252   re_free (mctx->bkref_ents);
4253 }
4254 
4255 /* Add a new backreference entry to MCTX.
4256    Note that we assume that caller never call this function with duplicate
4257    entry, and call with STR_IDX which isn't smaller than any existing entry.
4258 */
4259 
4260 static reg_errcode_t
4261 internal_function
4262 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4263                      Idx to)
4264 {
4265   if (mctx->nbkref_ents >= mctx->abkref_ents)
4266     {
4267       struct re_backref_cache_entry* new_entry;
4268       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4269                               mctx->abkref_ents * 2);
4270       if (BE (new_entry == NULL, 0))
4271         {
4272           re_free (mctx->bkref_ents);
4273           return REG_ESPACE;
4274         }
4275       mctx->bkref_ents = new_entry;
4276       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4277               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4278       mctx->abkref_ents *= 2;
4279     }
4280   if (mctx->nbkref_ents > 0
4281       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4282     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4283 
4284   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4285   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4286   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4287   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4288 
4289   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4290      If bit N is clear, means that this entry won't epsilon-transition to
4291      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4292      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4293      such node.
4294 
4295      A backreference does not epsilon-transition unless it is empty, so set
4296      to all zeros if FROM != TO.  */
4297   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4298     = (from == to ? -1 : 0);
4299 
4300   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4301   if (mctx->max_mb_elem_len < to - from)
4302     mctx->max_mb_elem_len = to - from;
4303   return REG_NOERROR;
4304 }
4305 
4306 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4307    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4308 
4309 static Idx
4310 internal_function
4311 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4312 {
4313   Idx left, right, mid, last;
4314   last = right = mctx->nbkref_ents;
4315   for (left = 0; left < right;)
4316     {
4317       mid = (left + right) / 2;
4318       if (mctx->bkref_ents[mid].str_idx < str_idx)
4319         left = mid + 1;
4320       else
4321         right = mid;
4322     }
4323   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4324     return left;
4325   else
4326     return REG_MISSING;
4327 }
4328 
4329 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4330    at STR_IDX.  */
4331 
4332 static reg_errcode_t
4333 internal_function
4334 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4335 {
4336 #ifdef DEBUG
4337   assert (mctx->sub_tops != NULL);
4338   assert (mctx->asub_tops > 0);
4339 #endif
4340   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4341     {
4342       Idx new_asub_tops = mctx->asub_tops * 2;
4343       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4344                                                    re_sub_match_top_t *,
4345                                                    new_asub_tops);
4346       if (BE (new_array == NULL, 0))
4347         return REG_ESPACE;
4348       mctx->sub_tops = new_array;
4349       mctx->asub_tops = new_asub_tops;
4350     }
4351   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4352   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4353     return REG_ESPACE;
4354   mctx->sub_tops[mctx->nsub_tops]->node = node;
4355   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4356   return REG_NOERROR;
4357 }
4358 
4359 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4360    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4361 
4362 static re_sub_match_last_t *
4363 internal_function
4364 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4365 {
4366   re_sub_match_last_t *new_entry;
4367   if (BE (subtop->nlasts == subtop->alasts, 0))
4368     {
4369       Idx new_alasts = 2 * subtop->alasts + 1;
4370       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4371                                                     re_sub_match_last_t *,
4372                                                     new_alasts);
4373       if (BE (new_array == NULL, 0))
4374         return NULL;
4375       subtop->lasts = new_array;
4376       subtop->alasts = new_alasts;
4377     }
4378   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4379   if (BE (new_entry != NULL, 1))
4380     {
4381       subtop->lasts[subtop->nlasts] = new_entry;
4382       new_entry->node = node;
4383       new_entry->str_idx = str_idx;
4384       ++subtop->nlasts;
4385     }
4386   return new_entry;
4387 }
4388 
4389 static void
4390 internal_function
4391 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4392                re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4393 {
4394   sctx->sifted_states = sifted_sts;
4395   sctx->limited_states = limited_sts;
4396   sctx->last_node = last_node;
4397   sctx->last_str_idx = last_str_idx;
4398   re_node_set_init_empty (&sctx->limits);
4399 }