Print this page
8993 sync regcomp(3C) with upstream

Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libc/port/regex/engine.c
          +++ new/usr/src/lib/libc/port/regex/engine.c
↓ open down ↓ 26 lines elided ↑ open up ↑
  27   27   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28   28   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29   29   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30   30   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31   31   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32   32   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33   33   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34   34   * SUCH DAMAGE.
  35   35   */
  36   36  
       37 +#include <stdbool.h>
       38 +
  37   39  /*
  38   40   * The matching engine and friends.  This file is #included by regexec.c
  39   41   * after suitable #defines of a variety of macros used herein, so that
  40   42   * different state representations can be used without duplicating masses
  41   43   * of code.
  42   44   */
  43   45  
  44   46  #ifdef SNAMES
  45   47  #define matcher smatcher
  46      -#define fast    sfast
  47      -#define slow    sslow
       48 +#define walk    swalk
  48   49  #define dissect sdissect
  49   50  #define backref sbackref
  50   51  #define step    sstep
  51   52  #define print   sprint
  52   53  #define at      sat
  53   54  #define match   smat
  54   55  #endif
  55   56  #ifdef LNAMES
  56   57  #define matcher lmatcher
  57      -#define fast    lfast
  58      -#define slow    lslow
       58 +#define walk    lwalk
  59   59  #define dissect ldissect
  60   60  #define backref lbackref
  61   61  #define step    lstep
  62   62  #define print   lprint
  63   63  #define at      lat
  64   64  #define match   lmat
  65   65  #endif
  66   66  #ifdef MNAMES
  67   67  #define matcher mmatcher
  68      -#define fast    mfast
  69      -#define slow    mslow
       68 +#define walk    mwalk
  70   69  #define dissect mdissect
  71   70  #define backref mbackref
  72   71  #define step    mstep
  73   72  #define print   mprint
  74   73  #define at      mat
  75   74  #define match   mmat
  76   75  #endif
  77   76  
  78   77  /* another structure passed up and down to avoid zillions of parameters */
  79   78  struct match {
↓ open down ↓ 17 lines elided ↑ open up ↑
  97   96  #ifdef __cplusplus
  98   97  extern "C" {
  99   98  #endif
 100   99  
 101  100  /* === engine.c === */
 102  101  static int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int);
 103  102  static const char *dissect(struct match *, const char *, const char *,
 104  103      sopno, sopno);
 105  104  static const char *backref(struct match *, const char *, const char *, sopno,
 106  105      sopno, sopno, int);
 107      -static const char *fast(struct match *, const char *, const char *, sopno,
 108      -    sopno);
 109      -static const char *slow(struct match *, const char *, const char *, sopno,
 110      -    sopno);
      106 +static const char *walk(struct match *m, const char *start, const char *stop,
      107 +    sopno startst, sopno stopst, bool fast);
 111  108  static states step(struct re_guts *, sopno, sopno, states, wint_t, states);
 112  109  #define MAX_RECURSION   100
 113  110  #define BOL     (OUT-1)
 114  111  #define EOL     (BOL-1)
 115  112  #define BOLEOL  (BOL-2)
 116  113  #define NOTHING (BOL-3)
 117  114  #define BOW     (BOL-4)
 118  115  #define EOW     (BOL-5)
 119  116  #define BADCHAR (BOL-6)
 120  117  #define NONCHAR(c)      ((c) <= OUT)
↓ open down ↓ 125 lines elided ↑ open up ↑
 246  243          ZAPSTATE(&m->mbs);
 247  244  
 248  245          /* Adjust start according to moffset, to speed things up */
 249  246          if (dp != NULL && g->moffset > -1)
 250  247                  start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
 251  248  
 252  249          SP("mloop", m->st, *start);
 253  250  
 254  251          /* this loop does only one repetition except for backrefs */
 255  252          for (;;) {
 256      -                endp = fast(m, start, stop, gf, gl);
      253 +                endp = walk(m, start, stop, gf, gl, true);
 257  254                  if (endp == NULL) {             /* a miss */
 258  255                          if (m->pmatch != NULL)
 259  256                                  free((char *)m->pmatch);
 260  257                          if (m->lastpos != NULL)
 261  258                                  free((char *)m->lastpos);
 262  259                          STATETEARDOWN(m);
 263  260                          return (REG_NOMATCH);
 264  261                  }
 265  262                  if (nmatch == 0 && !g->backrefs)
 266  263                          break;          /* no further info needed */
 267  264  
 268  265                  /* where? */
 269  266                  assert(m->coldp != NULL);
 270  267                  for (;;) {
 271  268                          NOTE("finding start");
 272      -                        endp = slow(m, m->coldp, stop, gf, gl);
      269 +                        endp = walk(m, m->coldp, stop, gf, gl, false);
 273  270                          if (endp != NULL)
 274  271                                  break;
 275  272                          assert(m->coldp < m->endp);
 276  273                          m->coldp += XMBRTOWC(NULL, m->coldp,
 277  274                              m->endp - m->coldp, &m->mbs, 0);
 278  275                  }
 279  276                  if (nmatch == 1 && !g->backrefs)
 280  277                          break;          /* no further info needed */
 281  278  
 282  279                  /* oh my, it wants the subexpressions... */
↓ open down ↓ 24 lines elided ↑ open up ↑
 307  304                  if (dp != NULL)
 308  305                          break;
 309  306  
 310  307                  /* uh-oh... we couldn't find a subexpression-level match */
 311  308                  assert(g->backrefs);    /* must be back references doing it */
 312  309                  assert(g->nplus == 0 || m->lastpos != NULL);
 313  310                  for (;;) {
 314  311                          if (dp != NULL || endp <= m->coldp)
 315  312                                  break;          /* defeat */
 316  313                          NOTE("backoff");
 317      -                        endp = slow(m, m->coldp, endp-1, gf, gl);
      314 +                        endp = walk(m, m->coldp, endp-1, gf, gl, false);
 318  315                          if (endp == NULL)
 319  316                                  break;          /* defeat */
 320  317                          /* try it on a shorter possibility */
 321  318  #ifndef NDEBUG
 322  319                          for (i = 1; i <= m->g->nsub; i++) {
 323  320                                  assert(m->pmatch[i].rm_so == -1);
 324  321                                  assert(m->pmatch[i].rm_eo == -1);
 325  322                          }
 326  323  #endif
 327  324                          NOTE("backoff dissect");
↓ open down ↓ 47 lines elided ↑ open up ↑
 375  372          sopno es;               /* end sop of current subRE */
 376  373          const char *sp;         /* start of string matched by it */
 377  374          const char *stp;        /* string matched by it cannot pass here */
 378  375          const char *rest;       /* start of rest of string */
 379  376          const char *tail;       /* string unmatched by rest of RE */
 380  377          sopno ssub;             /* start sop of subsubRE */
 381  378          sopno esub;             /* end sop of subsubRE */
 382  379          const char *ssp;        /* start of string matched by subsubRE */
 383  380          const char *sep;        /* end of string matched by subsubRE */
 384  381          const char *oldssp;     /* previous ssp */
 385      -        const char *dp __unused;
      382 +        const char *dp;
 386  383  
      384 +        (void) dp;
 387  385          AT("diss", start, stop, startst, stopst);
 388  386          sp = start;
 389  387          for (ss = startst; ss < stopst; ss = es) {
 390  388                  /* identify end of subRE */
 391  389                  es = ss;
 392  390                  switch (OP(m->g->strip[es])) {
 393  391                  case OPLUS_:
 394  392                  case OQUEST_:
 395  393                          es += OPND(m->g->strip[es]);
 396  394                          break;
 397  395                  case OCH_:
 398      -                        while (OP(m->g->strip[es]) != O_CH)
      396 +                        while (OP(m->g->strip[es]) != (sop)O_CH)
 399  397                                  es += OPND(m->g->strip[es]);
 400  398                          break;
 401  399                  }
 402  400                  es++;
 403  401  
 404  402                  /* figure out what it matched */
 405  403                  switch (OP(m->g->strip[ss])) {
 406  404                  case OEND:
 407  405                          assert(0);
 408  406                          break;
↓ open down ↓ 11 lines elided ↑ open up ↑
 420  418                          break;
 421  419                  case OBACK_:
 422  420                  case O_BACK:
 423  421                          assert(0);
 424  422                          break;
 425  423                  /* cases where length of match is hard to find */
 426  424                  case OQUEST_:
 427  425                          stp = stop;
 428  426                          for (;;) {
 429  427                                  /* how long could this one be? */
 430      -                                rest = slow(m, sp, stp, ss, es);
      428 +                                rest = walk(m, sp, stp, ss, es, false);
 431  429                                  assert(rest != NULL);   /* it did match */
 432  430                                  /* could the rest match the rest? */
 433      -                                tail = slow(m, rest, stop, es, stopst);
      431 +                                tail = walk(m, rest, stop, es, stopst, false);
 434  432                                  if (tail == stop)
 435  433                                          break;          /* yes! */
 436  434                                  /* no -- try a shorter match for this one */
 437  435                                  stp = rest - 1;
 438  436                                  assert(stp >= sp);      /* it did work */
 439  437                          }
 440  438                          ssub = ss + 1;
 441  439                          esub = es - 1;
 442  440                          /* did innards match? */
 443      -                        if (slow(m, sp, rest, ssub, esub) != NULL) {
      441 +                        if (walk(m, sp, rest, ssub, esub, false) != NULL) {
 444  442                                  dp = dissect(m, sp, rest, ssub, esub);
 445  443                                  assert(dp == rest);
 446      -#if defined(__lint)
 447      -                                (void) dp;
 448      -#endif
 449  444                          } else          /* no */
 450  445                                  assert(sp == rest);
 451  446                          sp = rest;
 452  447                          break;
 453  448                  case OPLUS_:
 454  449                          stp = stop;
 455  450                          for (;;) {
 456  451                                  /* how long could this one be? */
 457      -                                rest = slow(m, sp, stp, ss, es);
      452 +                                rest = walk(m, sp, stp, ss, es, false);
 458  453                                  assert(rest != NULL);   /* it did match */
 459  454                                  /* could the rest match the rest? */
 460      -                                tail = slow(m, rest, stop, es, stopst);
      455 +                                tail = walk(m, rest, stop, es, stopst, false);
 461  456                                  if (tail == stop)
 462  457                                          break;          /* yes! */
 463  458                                  /* no -- try a shorter match for this one */
 464  459                                  stp = rest - 1;
 465  460                                  assert(stp >= sp);      /* it did work */
 466  461                          }
 467  462                          ssub = ss + 1;
 468  463                          esub = es - 1;
 469  464                          ssp = sp;
 470  465                          oldssp = ssp;
 471  466                          for (;;) {      /* find last match of innards */
 472      -                                sep = slow(m, ssp, rest, ssub, esub);
      467 +                                sep = walk(m, ssp, rest, ssub, esub, false);
 473  468                                  if (sep == NULL || sep == ssp)
 474  469                                          break;  /* failed or matched null */
 475  470                                  oldssp = ssp;   /* on to next try */
 476  471                                  ssp = sep;
 477  472                          }
 478  473                          if (sep == NULL) {
 479  474                                  /* last successful match */
 480  475                                  sep = ssp;
 481  476                                  ssp = oldssp;
 482  477                          }
 483  478                          assert(sep == rest);    /* must exhaust substring */
 484      -                        assert(slow(m, ssp, sep, ssub, esub) == rest);
      479 +                        assert(walk(m, ssp, sep, ssub, esub, false) == rest);
 485  480                          dp = dissect(m, ssp, sep, ssub, esub);
 486  481                          assert(dp == sep);
 487  482                          sp = rest;
 488  483                          break;
 489  484                  case OCH_:
 490  485                          stp = stop;
 491  486                          for (;;) {
 492  487                                  /* how long could this one be? */
 493      -                                rest = slow(m, sp, stp, ss, es);
      488 +                                rest = walk(m, sp, stp, ss, es, false);
 494  489                                  assert(rest != NULL);   /* it did match */
 495  490                                  /* could the rest match the rest? */
 496      -                                tail = slow(m, rest, stop, es, stopst);
      491 +                                tail = walk(m, rest, stop, es, stopst, false);
 497  492                                  if (tail == stop)
 498  493                                          break;          /* yes! */
 499  494                                  /* no -- try a shorter match for this one */
 500  495                                  stp = rest - 1;
 501  496                                  assert(stp >= sp);      /* it did work */
 502  497                          }
 503  498                          ssub = ss + 1;
 504  499                          esub = ss + OPND(m->g->strip[ss]) - 1;
 505  500                          assert(OP(m->g->strip[esub]) == OOR1);
 506  501                          for (;;) {      /* find first matching branch */
 507      -                                if (slow(m, sp, rest, ssub, esub) == rest)
      502 +                                if (walk(m, sp, rest, ssub, esub,
      503 +                                    false) == rest)
 508  504                                          break;  /* it matched all of it */
 509  505                                  /* that one missed, try next one */
 510  506                                  assert(OP(m->g->strip[esub]) == OOR1);
 511  507                                  esub++;
 512  508                                  assert(OP(m->g->strip[esub]) == OOR2);
 513  509                                  ssub = esub + 1;
 514  510                                  esub += OPND(m->g->strip[esub]);
 515      -                                if (OP(m->g->strip[esub]) == OOR2)
      511 +                                if (OP(m->g->strip[esub]) == (sop)OOR2)
 516  512                                          esub--;
 517  513                                  else
 518  514                                          assert(OP(m->g->strip[esub]) == O_CH);
 519  515                          }
 520  516                          dp = dissect(m, sp, rest, ssub, esub);
 521  517                          assert(dp == rest);
 522  518                          sp = rest;
 523  519                          break;
 524  520                  case O_PLUS:
 525  521                  case O_QUEST:
↓ open down ↓ 104 lines elided ↑ open up ↑
 630  626                          }
 631  627                          return (NULL);
 632  628                  case O_QUEST:
 633  629                          break;
 634  630                  case OOR1:      /* matches null but needs to skip */
 635  631                          ss++;
 636  632                          s = m->g->strip[ss];
 637  633                          do {
 638  634                                  assert(OP(s) == OOR2);
 639  635                                  ss += OPND(s);
 640      -                        } while (OP(s = m->g->strip[ss]) != O_CH);
      636 +                        } while (OP(s = m->g->strip[ss]) != (sop)O_CH);
 641  637                          /* note that the ss++ gets us past the O_CH */
 642  638                          break;
 643  639                  default:        /* have to make a choice */
 644  640                          hard = 1;
 645  641                          break;
 646  642                  }
 647  643          if (!hard) {            /* that was it! */
 648  644                  if (sp != stop)
 649  645                          return (NULL);
 650  646                  return (sp);
↓ open down ↓ 12 lines elided ↑ open up ↑
 663  659                  assert(m->pmatch[i].rm_so != -1);
 664  660                  len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
 665  661                  if (len == 0 && rec++ > MAX_RECURSION)
 666  662                          return (NULL);
 667  663                  assert(stop - m->beginp >= len);
 668  664                  if (sp > stop - len)
 669  665                          return (NULL);  /* not enough left to match */
 670  666                  ssp = m->offp + m->pmatch[i].rm_so;
 671  667                  if (memcmp(sp, ssp, len) != 0)
 672  668                          return (NULL);
 673      -                while (m->g->strip[ss] != SOP(O_BACK, i))
      669 +                while (m->g->strip[ss] != (sop)SOP(O_BACK, i))
 674  670                          ss++;
 675  671                  return (backref(m, sp+len, stop, ss+1, stopst, lev, rec));
 676  672          case OQUEST_:           /* to null or not */
 677  673                  dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 678  674                  if (dp != NULL)
 679  675                          return (dp);    /* not */
 680  676                  return (backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
 681  677          case OPLUS_:
 682  678                  assert(m->lastpos != NULL);
 683  679                  assert(lev+1 <= m->g->nplus);
↓ open down ↓ 10 lines elided ↑ open up ↑
 694  690                  return (dp);
 695  691          case OCH_:              /* find the right one, if any */
 696  692                  ssub = ss + 1;
 697  693                  esub = ss + OPND(s) - 1;
 698  694                  assert(OP(m->g->strip[esub]) == OOR1);
 699  695                  for (;;) {      /* find first matching branch */
 700  696                          dp = backref(m, sp, stop, ssub, esub, lev, rec);
 701  697                          if (dp != NULL)
 702  698                                  return (dp);
 703  699                          /* that one missed, try next one */
 704      -                        if (OP(m->g->strip[esub]) == O_CH)
      700 +                        if (OP(m->g->strip[esub]) == (sop)O_CH)
 705  701                                  return (NULL);  /* there is none */
 706  702                          esub++;
 707      -                        assert(OP(m->g->strip[esub]) == OOR2);
      703 +                        assert(OP(m->g->strip[esub]) == (sop)OOR2);
 708  704                          ssub = esub + 1;
 709  705                          esub += OPND(m->g->strip[esub]);
 710      -                        if (OP(m->g->strip[esub]) == OOR2)
      706 +                        if (OP(m->g->strip[esub]) == (sop)OOR2)
 711  707                                  esub--;
 712  708                          else
 713  709                                  assert(OP(m->g->strip[esub]) == O_CH);
 714  710                  }
 715  711                  /* NOTREACHED */
 716  712                  break;
 717  713          case OLPAREN:           /* must undo assignment if rest fails */
 718  714                  i = OPND(s);
 719  715                  assert(0 < i && i <= m->g->nsub);
 720  716                  offsave = m->pmatch[i].rm_so;
↓ open down ↓ 17 lines elided ↑ open up ↑
 738  734                  assert(0);
 739  735                  break;
 740  736          }
 741  737  
 742  738          /* "can't happen" */
 743  739          assert(0);
 744  740          return (NULL);
 745  741  }
 746  742  
 747  743  /*
 748      - * fast - step through the string at top speed
      744 + * Step through the string either quickly or slowly.  Returns where it ended
      745 + * or NULL.
 749  746   */
 750  747  static const char *
 751      -fast(struct match *m, const char *start, const char *stop, sopno startst,
 752      -    sopno stopst)
      748 +walk(struct match *m, const char *start, const char *stop, sopno startst,
      749 +    sopno stopst, bool fast)
 753  750  {
 754  751          states st = m->st;
 755  752          states fresh = m->fresh;
 756      -        states tmp = m->tmp;
 757      -        const char *p = start;
 758      -        wint_t c;
 759      -        wint_t lastc;           /* previous c */
 760      -        wint_t flagch;
 761      -        int i;
 762      -        const char *coldp;      /* last p after which no match was underway */
 763      -        size_t clen;
 764      -
 765      -        CLEAR(st);
 766      -        SET1(st, startst);
 767      -        SP("fast", st, *p);
 768      -        st = step(m->g, startst, stopst, st, NOTHING, st);
 769      -        ASSIGN(fresh, st);
 770      -        SP("start", st, *p);
 771      -        coldp = NULL;
 772      -        if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
 773      -                c = OUT;
 774      -        else {
 775      -                /*
 776      -                 * XXX Wrong if the previous character was multi-byte.
 777      -                 * Newline never is (in encodings supported by FreeBSD),
 778      -                 * so this only breaks the ISWORD tests below.
 779      -                 */
 780      -                c = (uch)*(start - 1);
 781      -        }
 782      -        for (;;) {
 783      -                /* next character */
 784      -                lastc = c;
 785      -                if (p == m->endp) {
 786      -                        clen = 0;
 787      -                        c = OUT;
 788      -                } else
 789      -                        clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
 790      -                if (EQ(st, fresh))
 791      -                        coldp = p;
 792      -
 793      -                /* is there an EOL and/or BOL between lastc and c? */
 794      -                flagch = '\0';
 795      -                i = 0;
 796      -                if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
 797      -                    (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
 798      -                        flagch = BOL;
 799      -                        i = m->g->nbol;
 800      -                }
 801      -                if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
 802      -                    (c == OUT && !(m->eflags&REG_NOTEOL))) {
 803      -                        flagch = (flagch == BOL) ? BOLEOL : EOL;
 804      -                        i += m->g->neol;
 805      -                }
 806      -                if (i != 0) {
 807      -                        for (; i > 0; i--)
 808      -                                st = step(m->g, startst, stopst, st,
 809      -                                    flagch, st);
 810      -                        SP("boleol", st, c);
 811      -                }
 812      -
 813      -                /* how about a word boundary? */
 814      -                if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
 815      -                    (c != OUT && ISWORD(c))) {
 816      -                        flagch = BOW;
 817      -                }
 818      -                if ((lastc != OUT && ISWORD(lastc)) &&
 819      -                    (flagch == EOL || (c != OUT && !ISWORD(c)))) {
 820      -                        flagch = EOW;
 821      -                }
 822      -                if (flagch == BOW || flagch == EOW) {
 823      -                        st = step(m->g, startst, stopst, st, flagch, st);
 824      -                        SP("boweow", st, c);
 825      -                }
 826      -
 827      -                /* are we done? */
 828      -                if (ISSET(st, stopst) || p == stop || clen > stop - p)
 829      -                        break;          /* NOTE BREAK OUT */
 830      -
 831      -                /* no, we must deal with this character */
 832      -                ASSIGN(tmp, st);
 833      -                ASSIGN(st, fresh);
 834      -                assert(c != OUT);
 835      -                st = step(m->g, startst, stopst, tmp, c, st);
 836      -                SP("aft", st, c);
 837      -                assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
 838      -                p += clen;
 839      -        }
 840      -
 841      -        assert(coldp != NULL);
 842      -        m->coldp = coldp;
 843      -        if (ISSET(st, stopst))
 844      -                return (p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
 845      -        else
 846      -                return (NULL);
 847      -}
 848      -
 849      -/*
 850      - * slow - step through the string more deliberately
 851      - */
 852      -static const char *
 853      -slow(struct match *m, const char *start, const char *stop, sopno startst,
 854      -    sopno stopst)
 855      -{
 856      -        states st = m->st;
 857  753          states empty = m->empty;
 858  754          states tmp = m->tmp;
 859  755          const char *p = start;
 860  756          wint_t c;
 861  757          wint_t lastc;           /* previous c */
 862  758          wint_t flagch;
 863  759          int i;
 864  760          const char *matchp;     /* last p at which a match ended */
 865  761          size_t clen;
 866  762  
 867  763          AT("slow", start, stop, startst, stopst);
 868  764          CLEAR(st);
 869  765          SET1(st, startst);
 870  766          SP("sstart", st, *p);
 871  767          st = step(m->g, startst, stopst, st, NOTHING, st);
      768 +        if (fast)
      769 +                ASSIGN(fresh, st);
 872  770          matchp = NULL;
 873  771          if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
 874  772                  c = OUT;
 875  773          else {
 876  774                  /*
 877  775                   * XXX Wrong if the previous character was multi-byte.
 878  776                   * Newline never is (in encodings supported by FreeBSD),
 879  777                   * so this only breaks the ISWORD tests below.
 880  778                   */
 881  779                  c = (uch)*(start - 1);
 882  780          }
 883  781          for (;;) {
 884  782                  /* next character */
 885  783                  lastc = c;
 886  784                  if (p == m->endp) {
 887  785                          c = OUT;
 888  786                          clen = 0;
 889  787                  } else
 890  788                          clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
 891  789  
      790 +                if (fast && EQ(st, fresh))
      791 +                        matchp = p;
      792 +
 892  793                  /* is there an EOL and/or BOL between lastc and c? */
 893  794                  flagch = '\0';
 894  795                  i = 0;
 895  796                  if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
 896  797                      (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
 897  798                          flagch = BOL;
 898  799                          i = m->g->nbol;
 899  800                  }
 900  801                  if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
 901  802                      (c == OUT && !(m->eflags&REG_NOTEOL))) {
↓ open down ↓ 15 lines elided ↑ open up ↑
 917  818                  if ((lastc != OUT && ISWORD(lastc)) &&
 918  819                      (flagch == EOL || (c != OUT && !ISWORD(c)))) {
 919  820                          flagch = EOW;
 920  821                  }
 921  822                  if (flagch == BOW || flagch == EOW) {
 922  823                          st = step(m->g, startst, stopst, st, flagch, st);
 923  824                          SP("sboweow", st, c);
 924  825                  }
 925  826  
 926  827                  /* are we done? */
 927      -                if (ISSET(st, stopst))
 928      -                        matchp = p;
 929      -                if (EQ(st, empty) || p == stop || clen > stop - p)
      828 +                if (ISSET(st, stopst)) {
      829 +                        if (fast)
      830 +                                break;
      831 +                        else
      832 +                                matchp = p;
      833 +                }
      834 +                if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p))
 930  835                          break;          /* NOTE BREAK OUT */
 931  836  
 932  837                  /* no, we must deal with this character */
 933  838                  ASSIGN(tmp, st);
 934      -                ASSIGN(st, empty);
      839 +                if (fast)
      840 +                        ASSIGN(st, fresh);
      841 +                else
      842 +                        ASSIGN(st, empty);
 935  843                  assert(c != OUT);
 936  844                  st = step(m->g, startst, stopst, tmp, c, st);
 937  845                  SP("saft", st, c);
 938  846                  assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
 939  847                  p += clen;
 940  848          }
 941  849  
 942      -        return (matchp);
      850 +        if (fast) {
      851 +                assert(matchp != NULL);
      852 +                m->coldp = matchp;
      853 +                if (ISSET(st, stopst))
      854 +                        return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
      855 +                else
      856 +                        return (NULL);
      857 +        } else
      858 +                return (matchp);
 943  859  }
 944  860  
 945      -
 946  861  /*
 947  862   * step - map set of states reachable before char to set reachable after
 948  863   */
 949  864  static states
 950  865  step(struct re_guts *g,
 951  866      sopno start,        /* start state within strip */
 952  867      sopno stop,         /* state after stop state within strip */
 953  868      states bef,         /* states reachable before */
 954  869      wint_t ch,          /* character or NONCHAR code */
 955  870      states aft)         /* states already known reachable after */
↓ open down ↓ 65 lines elided ↑ open up ↑
1021  936                          break;
1022  937                  case O_QUEST:           /* just an empty */
1023  938                          FWD(aft, aft, 1);
1024  939                          break;
1025  940                  case OLPAREN:           /* not significant here */
1026  941                  case ORPAREN:
1027  942                          FWD(aft, aft, 1);
1028  943                          break;
1029  944                  case OCH_:              /* mark the first two branches */
1030  945                          FWD(aft, aft, 1);
1031      -                        assert(OP(g->strip[pc+OPND(s)]) == OOR2);
      946 +                        assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
1032  947                          FWD(aft, aft, OPND(s));
1033  948                          break;
1034  949                  case OOR1:              /* done a branch, find the O_CH */
1035  950                          if (ISSTATEIN(aft, here)) {
1036  951                                  for (look = 1;
1037      -                                    OP(s = g->strip[pc+look]) != O_CH;
      952 +                                    OP(s = g->strip[pc+look]) != (sop)O_CH;
1038  953                                      look += OPND(s))
1039      -                                        assert(OP(s) == OOR2);
      954 +                                        assert(OP(s) == (sop)OOR2);
1040  955                                  FWD(aft, aft, look + 1);
1041  956                          }
1042  957                          break;
1043  958                  case OOR2:              /* propagate OCH_'s marking */
1044  959                          FWD(aft, aft, 1);
1045      -                        if (OP(g->strip[pc+OPND(s)]) != O_CH) {
1046      -                                assert(OP(g->strip[pc+OPND(s)]) == OOR2);
      960 +                        if (OP(g->strip[pc+OPND(s)]) != (sop)O_CH) {
      961 +                                assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
1047  962                                  FWD(aft, aft, OPND(s));
1048  963                          }
1049  964                          break;
1050  965                  case O_CH:              /* just empty */
1051  966                          FWD(aft, aft, 1);
1052  967                          break;
1053  968                  default:                /* ooooops... */
1054  969                          assert(0);
1055  970                          break;
1056  971                  }
↓ open down ↓ 60 lines elided ↑ open up ↑
1117 1032          if (isprint((uch)ch) || ch == ' ')
1118 1033                  (void) sprintf(pbuf, "%c", ch);
1119 1034          else
1120 1035                  (void) sprintf(pbuf, "\\%o", ch);
1121 1036          return (pbuf);
1122 1037  }
1123 1038  #endif
1124 1039  #endif
1125 1040  
1126 1041  #undef  matcher
1127      -#undef  fast
1128      -#undef  slow
     1042 +#undef  walk
1129 1043  #undef  dissect
1130 1044  #undef  backref
1131 1045  #undef  step
1132 1046  #undef  print
1133 1047  #undef  at
1134 1048  #undef  match
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX