Print this page
8993 sync regcomp(3C) with upstream


  17  * 2. Redistributions in binary form must reproduce the above copyright
  18  *    notice, this list of conditions and the following disclaimer in the
  19  *    documentation and/or other materials provided with the distribution.
  20  * 3. Neither the name of the University nor the names of its contributors
  21  *    may be used to endorse or promote products derived from this software
  22  *    without specific prior written permission.
  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34  * SUCH DAMAGE.
  35  */
  36 


  37 /*
  38  * The matching engine and friends.  This file is #included by regexec.c
  39  * after suitable #defines of a variety of macros used herein, so that
  40  * different state representations can be used without duplicating masses
  41  * of code.
  42  */
  43 
  44 #ifdef SNAMES
  45 #define matcher smatcher
  46 #define fast    sfast
  47 #define slow    sslow
  48 #define dissect sdissect
  49 #define backref sbackref
  50 #define step    sstep
  51 #define print   sprint
  52 #define at      sat
  53 #define match   smat
  54 #endif
  55 #ifdef LNAMES
  56 #define matcher lmatcher
  57 #define fast    lfast
  58 #define slow    lslow
  59 #define dissect ldissect
  60 #define backref lbackref
  61 #define step    lstep
  62 #define print   lprint
  63 #define at      lat
  64 #define match   lmat
  65 #endif
  66 #ifdef MNAMES
  67 #define matcher mmatcher
  68 #define fast    mfast
  69 #define slow    mslow
  70 #define dissect mdissect
  71 #define backref mbackref
  72 #define step    mstep
  73 #define print   mprint
  74 #define at      mat
  75 #define match   mmat
  76 #endif
  77 
  78 /* another structure passed up and down to avoid zillions of parameters */
  79 struct match {
  80         struct re_guts *g;
  81         int eflags;
  82         regmatch_t *pmatch;     /* [nsub+1] (0 element unused) */
  83         const char *offp;       /* offsets work from here */
  84         const char *beginp;     /* start of string -- virtual NUL precedes */
  85         const char *endp;       /* end of string -- virtual NUL here */
  86         const char *coldp;      /* can be no match starting before here */
  87         const char **lastpos;   /* [nplus+1] */
  88         STATEVARS;
  89         states st;              /* current states */
  90         states fresh;           /* states for a fresh start */
  91         states tmp;             /* temporary */
  92         states empty;           /* empty set of states */
  93         mbstate_t mbs;          /* multibyte conversion state */
  94 };
  95 
  96 /* ========= begin header generated by ./mkh ========= */
  97 #ifdef __cplusplus
  98 extern "C" {
  99 #endif
 100 
 101 /* === engine.c === */
 102 static int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int);
 103 static const char *dissect(struct match *, const char *, const char *,
 104     sopno, sopno);
 105 static const char *backref(struct match *, const char *, const char *, sopno,
 106     sopno, sopno, int);
 107 static const char *fast(struct match *, const char *, const char *, sopno,
 108     sopno);
 109 static const char *slow(struct match *, const char *, const char *, sopno,
 110     sopno);
 111 static states step(struct re_guts *, sopno, sopno, states, wint_t, states);
 112 #define MAX_RECURSION   100
 113 #define BOL     (OUT-1)
 114 #define EOL     (BOL-1)
 115 #define BOLEOL  (BOL-2)
 116 #define NOTHING (BOL-3)
 117 #define BOW     (BOL-4)
 118 #define EOW     (BOL-5)
 119 #define BADCHAR (BOL-6)
 120 #define NONCHAR(c)      ((c) <= OUT)
 121 #ifdef REDEBUG
 122 static void print(struct match *, const char *, states, int, FILE *);
 123 #endif
 124 #ifdef REDEBUG
 125 static void at(struct match *, const char *, const char *, const char *,
 126     sopno, sopno);
 127 #endif
 128 #ifdef REDEBUG
 129 static const char *pchar(int ch);
 130 #endif


 236         m->lastpos = NULL;
 237         m->offp = string;
 238         m->beginp = start;
 239         m->endp = stop;
 240         STATESETUP(m, 4);
 241         SETUP(m->st);
 242         SETUP(m->fresh);
 243         SETUP(m->tmp);
 244         SETUP(m->empty);
 245         CLEAR(m->empty);
 246         ZAPSTATE(&m->mbs);
 247 
 248         /* Adjust start according to moffset, to speed things up */
 249         if (dp != NULL && g->moffset > -1)
 250                 start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
 251 
 252         SP("mloop", m->st, *start);
 253 
 254         /* this loop does only one repetition except for backrefs */
 255         for (;;) {
 256                 endp = fast(m, start, stop, gf, gl);
 257                 if (endp == NULL) {             /* a miss */
 258                         if (m->pmatch != NULL)
 259                                 free((char *)m->pmatch);
 260                         if (m->lastpos != NULL)
 261                                 free((char *)m->lastpos);
 262                         STATETEARDOWN(m);
 263                         return (REG_NOMATCH);
 264                 }
 265                 if (nmatch == 0 && !g->backrefs)
 266                         break;          /* no further info needed */
 267 
 268                 /* where? */
 269                 assert(m->coldp != NULL);
 270                 for (;;) {
 271                         NOTE("finding start");
 272                         endp = slow(m, m->coldp, stop, gf, gl);
 273                         if (endp != NULL)
 274                                 break;
 275                         assert(m->coldp < m->endp);
 276                         m->coldp += XMBRTOWC(NULL, m->coldp,
 277                             m->endp - m->coldp, &m->mbs, 0);
 278                 }
 279                 if (nmatch == 1 && !g->backrefs)
 280                         break;          /* no further info needed */
 281 
 282                 /* oh my, it wants the subexpressions... */
 283                 if (m->pmatch == NULL)
 284                         m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
 285                             sizeof (regmatch_t));
 286                 if (m->pmatch == NULL) {
 287                         STATETEARDOWN(m);
 288                         return (REG_ESPACE);
 289                 }
 290                 for (i = 1; i <= m->g->nsub; i++)
 291                         m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
 292                 if (!g->backrefs && !(m->eflags&REG_BACKR)) {


 297                                 m->lastpos = malloc((g->nplus+1) *
 298                                     sizeof (const char *));
 299                         if (g->nplus > 0 && m->lastpos == NULL) {
 300                                 free(m->pmatch);
 301                                 STATETEARDOWN(m);
 302                                 return (REG_ESPACE);
 303                         }
 304                         NOTE("backref dissect");
 305                         dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
 306                 }
 307                 if (dp != NULL)
 308                         break;
 309 
 310                 /* uh-oh... we couldn't find a subexpression-level match */
 311                 assert(g->backrefs); /* must be back references doing it */
 312                 assert(g->nplus == 0 || m->lastpos != NULL);
 313                 for (;;) {
 314                         if (dp != NULL || endp <= m->coldp)
 315                                 break;          /* defeat */
 316                         NOTE("backoff");
 317                         endp = slow(m, m->coldp, endp-1, gf, gl);
 318                         if (endp == NULL)
 319                                 break;          /* defeat */
 320                         /* try it on a shorter possibility */
 321 #ifndef NDEBUG
 322                         for (i = 1; i <= m->g->nsub; i++) {
 323                                 assert(m->pmatch[i].rm_so == -1);
 324                                 assert(m->pmatch[i].rm_eo == -1);
 325                         }
 326 #endif
 327                         NOTE("backoff dissect");
 328                         dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
 329                 }
 330                 assert(dp == NULL || dp == endp);
 331                 if (dp != NULL)         /* found a shorter one */
 332                         break;
 333 
 334                 /* despite initial appearances, there is no match here */
 335                 NOTE("false alarm");
 336                 /* recycle starting later */
 337                 start = m->coldp + XMBRTOWC(NULL, m->coldp,


 365 
 366 /*
 367  * dissect - figure out what matched what, no back references
 368  */
 369 static const char *
 370 dissect(struct match *m, const char *start, const char *stop, sopno startst,
 371     sopno stopst)
 372 {
 373         int i;
 374         sopno ss;               /* start sop of current subRE */
 375         sopno es;               /* end sop of current subRE */
 376         const char *sp;         /* start of string matched by it */
 377         const char *stp;        /* string matched by it cannot pass here */
 378         const char *rest;       /* start of rest of string */
 379         const char *tail;       /* string unmatched by rest of RE */
 380         sopno ssub;             /* start sop of subsubRE */
 381         sopno esub;             /* end sop of subsubRE */
 382         const char *ssp;        /* start of string matched by subsubRE */
 383         const char *sep;        /* end of string matched by subsubRE */
 384         const char *oldssp;     /* previous ssp */
 385         const char *dp __unused;
 386 

 387         AT("diss", start, stop, startst, stopst);
 388         sp = start;
 389         for (ss = startst; ss < stopst; ss = es) {
 390                 /* identify end of subRE */
 391                 es = ss;
 392                 switch (OP(m->g->strip[es])) {
 393                 case OPLUS_:
 394                 case OQUEST_:
 395                         es += OPND(m->g->strip[es]);
 396                         break;
 397                 case OCH_:
 398                         while (OP(m->g->strip[es]) != O_CH)
 399                                 es += OPND(m->g->strip[es]);
 400                         break;
 401                 }
 402                 es++;
 403 
 404                 /* figure out what it matched */
 405                 switch (OP(m->g->strip[ss])) {
 406                 case OEND:
 407                         assert(0);
 408                         break;
 409                 case OCHAR:
 410                         sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
 411                         break;
 412                 case OBOL:
 413                 case OEOL:
 414                 case OBOW:
 415                 case OEOW:
 416                         break;
 417                 case OANY:
 418                 case OANYOF:
 419                         sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
 420                         break;
 421                 case OBACK_:
 422                 case O_BACK:
 423                         assert(0);
 424                         break;
 425                 /* cases where length of match is hard to find */
 426                 case OQUEST_:
 427                         stp = stop;
 428                         for (;;) {
 429                                 /* how long could this one be? */
 430                                 rest = slow(m, sp, stp, ss, es);
 431                                 assert(rest != NULL);   /* it did match */
 432                                 /* could the rest match the rest? */
 433                                 tail = slow(m, rest, stop, es, stopst);
 434                                 if (tail == stop)
 435                                         break;          /* yes! */
 436                                 /* no -- try a shorter match for this one */
 437                                 stp = rest - 1;
 438                                 assert(stp >= sp);   /* it did work */
 439                         }
 440                         ssub = ss + 1;
 441                         esub = es - 1;
 442                         /* did innards match? */
 443                         if (slow(m, sp, rest, ssub, esub) != NULL) {
 444                                 dp = dissect(m, sp, rest, ssub, esub);
 445                                 assert(dp == rest);
 446 #if defined(__lint)
 447                                 (void) dp;
 448 #endif
 449                         } else          /* no */
 450                                 assert(sp == rest);
 451                         sp = rest;
 452                         break;
 453                 case OPLUS_:
 454                         stp = stop;
 455                         for (;;) {
 456                                 /* how long could this one be? */
 457                                 rest = slow(m, sp, stp, ss, es);
 458                                 assert(rest != NULL);   /* it did match */
 459                                 /* could the rest match the rest? */
 460                                 tail = slow(m, rest, stop, es, stopst);
 461                                 if (tail == stop)
 462                                         break;          /* yes! */
 463                                 /* no -- try a shorter match for this one */
 464                                 stp = rest - 1;
 465                                 assert(stp >= sp);   /* it did work */
 466                         }
 467                         ssub = ss + 1;
 468                         esub = es - 1;
 469                         ssp = sp;
 470                         oldssp = ssp;
 471                         for (;;) {      /* find last match of innards */
 472                                 sep = slow(m, ssp, rest, ssub, esub);
 473                                 if (sep == NULL || sep == ssp)
 474                                         break;  /* failed or matched null */
 475                                 oldssp = ssp;   /* on to next try */
 476                                 ssp = sep;
 477                         }
 478                         if (sep == NULL) {
 479                                 /* last successful match */
 480                                 sep = ssp;
 481                                 ssp = oldssp;
 482                         }
 483                         assert(sep == rest);    /* must exhaust substring */
 484                         assert(slow(m, ssp, sep, ssub, esub) == rest);
 485                         dp = dissect(m, ssp, sep, ssub, esub);
 486                         assert(dp == sep);
 487                         sp = rest;
 488                         break;
 489                 case OCH_:
 490                         stp = stop;
 491                         for (;;) {
 492                                 /* how long could this one be? */
 493                                 rest = slow(m, sp, stp, ss, es);
 494                                 assert(rest != NULL);   /* it did match */
 495                                 /* could the rest match the rest? */
 496                                 tail = slow(m, rest, stop, es, stopst);
 497                                 if (tail == stop)
 498                                         break;          /* yes! */
 499                                 /* no -- try a shorter match for this one */
 500                                 stp = rest - 1;
 501                                 assert(stp >= sp);   /* it did work */
 502                         }
 503                         ssub = ss + 1;
 504                         esub = ss + OPND(m->g->strip[ss]) - 1;
 505                         assert(OP(m->g->strip[esub]) == OOR1);
 506                         for (;;) {      /* find first matching branch */
 507                                 if (slow(m, sp, rest, ssub, esub) == rest)

 508                                         break;  /* it matched all of it */
 509                                 /* that one missed, try next one */
 510                                 assert(OP(m->g->strip[esub]) == OOR1);
 511                                 esub++;
 512                                 assert(OP(m->g->strip[esub]) == OOR2);
 513                                 ssub = esub + 1;
 514                                 esub += OPND(m->g->strip[esub]);
 515                                 if (OP(m->g->strip[esub]) == OOR2)
 516                                         esub--;
 517                                 else
 518                                         assert(OP(m->g->strip[esub]) == O_CH);
 519                         }
 520                         dp = dissect(m, sp, rest, ssub, esub);
 521                         assert(dp == rest);
 522                         sp = rest;
 523                         break;
 524                 case O_PLUS:
 525                 case O_QUEST:
 526                 case OOR1:
 527                 case OOR2:
 528                 case O_CH:
 529                         assert(0);
 530                         break;
 531                 case OLPAREN:
 532                         i = OPND(m->g->strip[ss]);
 533                         assert(0 < i && i <= m->g->nsub);
 534                         m->pmatch[i].rm_so = sp - m->offp;
 535                         break;


 620                                 break;
 621                         }
 622                         return (NULL);
 623                 case OEOW:
 624                         if (((sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
 625                             (sp < m->endp && *sp == '\n' &&
 626                             (m->g->cflags&REG_NEWLINE)) ||
 627                             (sp < m->endp && !ISWORD(*sp))) &&
 628                             (sp > m->beginp && ISWORD(*(sp-1)))) {
 629                                 break;
 630                         }
 631                         return (NULL);
 632                 case O_QUEST:
 633                         break;
 634                 case OOR1:      /* matches null but needs to skip */
 635                         ss++;
 636                         s = m->g->strip[ss];
 637                         do {
 638                                 assert(OP(s) == OOR2);
 639                                 ss += OPND(s);
 640                         } while (OP(s = m->g->strip[ss]) != O_CH);
 641                         /* note that the ss++ gets us past the O_CH */
 642                         break;
 643                 default:        /* have to make a choice */
 644                         hard = 1;
 645                         break;
 646                 }
 647         if (!hard) {            /* that was it! */
 648                 if (sp != stop)
 649                         return (NULL);
 650                 return (sp);
 651         }
 652         ss--;                   /* adjust for the for's final increment */
 653 
 654         /* the hard stuff */
 655         AT("hard", sp, stop, ss, stopst);
 656         s = m->g->strip[ss];
 657         switch (OP(s)) {
 658         case OBACK_:            /* the vilest depths */
 659                 i = OPND(s);
 660                 assert(0 < i && i <= m->g->nsub);
 661                 if (m->pmatch[i].rm_eo == -1)
 662                         return (NULL);
 663                 assert(m->pmatch[i].rm_so != -1);
 664                 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
 665                 if (len == 0 && rec++ > MAX_RECURSION)
 666                         return (NULL);
 667                 assert(stop - m->beginp >= len);
 668                 if (sp > stop - len)
 669                         return (NULL);  /* not enough left to match */
 670                 ssp = m->offp + m->pmatch[i].rm_so;
 671                 if (memcmp(sp, ssp, len) != 0)
 672                         return (NULL);
 673                 while (m->g->strip[ss] != SOP(O_BACK, i))
 674                         ss++;
 675                 return (backref(m, sp+len, stop, ss+1, stopst, lev, rec));
 676         case OQUEST_:           /* to null or not */
 677                 dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 678                 if (dp != NULL)
 679                         return (dp);    /* not */
 680                 return (backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
 681         case OPLUS_:
 682                 assert(m->lastpos != NULL);
 683                 assert(lev+1 <= m->g->nplus);
 684                 m->lastpos[lev+1] = sp;
 685                 return (backref(m, sp, stop, ss+1, stopst, lev+1, rec));
 686         case O_PLUS:
 687                 if (sp == m->lastpos[lev])   /* last pass matched null */
 688                         return (backref(m, sp, stop, ss+1, stopst, lev-1, rec));
 689                 /* try another pass */
 690                 m->lastpos[lev] = sp;
 691                 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
 692                 if (dp == NULL)
 693                         return (backref(m, sp, stop, ss+1, stopst, lev-1, rec));
 694                 return (dp);
 695         case OCH_:              /* find the right one, if any */
 696                 ssub = ss + 1;
 697                 esub = ss + OPND(s) - 1;
 698                 assert(OP(m->g->strip[esub]) == OOR1);
 699                 for (;;) {      /* find first matching branch */
 700                         dp = backref(m, sp, stop, ssub, esub, lev, rec);
 701                         if (dp != NULL)
 702                                 return (dp);
 703                         /* that one missed, try next one */
 704                         if (OP(m->g->strip[esub]) == O_CH)
 705                                 return (NULL);  /* there is none */
 706                         esub++;
 707                         assert(OP(m->g->strip[esub]) == OOR2);
 708                         ssub = esub + 1;
 709                         esub += OPND(m->g->strip[esub]);
 710                         if (OP(m->g->strip[esub]) == OOR2)
 711                                 esub--;
 712                         else
 713                                 assert(OP(m->g->strip[esub]) == O_CH);
 714                 }
 715                 /* NOTREACHED */
 716                 break;
 717         case OLPAREN:           /* must undo assignment if rest fails */
 718                 i = OPND(s);
 719                 assert(0 < i && i <= m->g->nsub);
 720                 offsave = m->pmatch[i].rm_so;
 721                 m->pmatch[i].rm_so = sp - m->offp;
 722                 dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 723                 if (dp != NULL)
 724                         return (dp);
 725                 m->pmatch[i].rm_so = offsave;
 726                 return (NULL);
 727         case ORPAREN:           /* must undo assignment if rest fails */
 728                 i = OPND(s);
 729                 assert(0 < i && i <= m->g->nsub);
 730                 offsave = m->pmatch[i].rm_eo;
 731                 m->pmatch[i].rm_eo = sp - m->offp;
 732                 dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 733                 if (dp != NULL)
 734                         return (dp);
 735                 m->pmatch[i].rm_eo = offsave;
 736                 return (NULL);
 737         default:                /* uh oh */
 738                 assert(0);
 739                 break;
 740         }
 741 
 742         /* "can't happen" */
 743         assert(0);
 744         return (NULL);
 745 }
 746 
 747 /*
 748  * fast - step through the string at top speed

 749  */
 750 static const char *
 751 fast(struct match *m, const char *start, const char *stop, sopno startst,
 752     sopno stopst)
 753 {
 754         states st = m->st;
 755         states fresh = m->fresh;
 756         states tmp = m->tmp;
 757         const char *p = start;
 758         wint_t c;
 759         wint_t lastc;           /* previous c */
 760         wint_t flagch;
 761         int i;
 762         const char *coldp;      /* last p after which no match was underway */
 763         size_t clen;
 764 
 765         CLEAR(st);
 766         SET1(st, startst);
 767         SP("fast", st, *p);
 768         st = step(m->g, startst, stopst, st, NOTHING, st);
 769         ASSIGN(fresh, st);
 770         SP("start", st, *p);
 771         coldp = NULL;
 772         if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
 773                 c = OUT;
 774         else {
 775                 /*
 776                  * XXX Wrong if the previous character was multi-byte.
 777                  * Newline never is (in encodings supported by FreeBSD),
 778                  * so this only breaks the ISWORD tests below.
 779                  */
 780                 c = (uch)*(start - 1);
 781         }
 782         for (;;) {
 783                 /* next character */
 784                 lastc = c;
 785                 if (p == m->endp) {
 786                         clen = 0;
 787                         c = OUT;
 788                 } else
 789                         clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
 790                 if (EQ(st, fresh))
 791                         coldp = p;
 792 
 793                 /* is there an EOL and/or BOL between lastc and c? */
 794                 flagch = '\0';
 795                 i = 0;
 796                 if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
 797                     (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
 798                         flagch = BOL;
 799                         i = m->g->nbol;
 800                 }
 801                 if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
 802                     (c == OUT && !(m->eflags&REG_NOTEOL))) {
 803                         flagch = (flagch == BOL) ? BOLEOL : EOL;
 804                         i += m->g->neol;
 805                 }
 806                 if (i != 0) {
 807                         for (; i > 0; i--)
 808                                 st = step(m->g, startst, stopst, st,
 809                                     flagch, st);
 810                         SP("boleol", st, c);
 811                 }
 812 
 813                 /* how about a word boundary? */
 814                 if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
 815                     (c != OUT && ISWORD(c))) {
 816                         flagch = BOW;
 817                 }
 818                 if ((lastc != OUT && ISWORD(lastc)) &&
 819                     (flagch == EOL || (c != OUT && !ISWORD(c)))) {
 820                         flagch = EOW;
 821                 }
 822                 if (flagch == BOW || flagch == EOW) {
 823                         st = step(m->g, startst, stopst, st, flagch, st);
 824                         SP("boweow", st, c);
 825                 }
 826 
 827                 /* are we done? */
 828                 if (ISSET(st, stopst) || p == stop || clen > stop - p)
 829                         break;          /* NOTE BREAK OUT */
 830 
 831                 /* no, we must deal with this character */
 832                 ASSIGN(tmp, st);
 833                 ASSIGN(st, fresh);
 834                 assert(c != OUT);
 835                 st = step(m->g, startst, stopst, tmp, c, st);
 836                 SP("aft", st, c);
 837                 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
 838                 p += clen;
 839         }
 840 
 841         assert(coldp != NULL);
 842         m->coldp = coldp;
 843         if (ISSET(st, stopst))
 844                 return (p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
 845         else
 846                 return (NULL);
 847 }
 848 
 849 /*
 850  * slow - step through the string more deliberately
 851  */
 852 static const char *
 853 slow(struct match *m, const char *start, const char *stop, sopno startst,
 854     sopno stopst)
 855 {
 856         states st = m->st;
 857         states empty = m->empty;
 858         states tmp = m->tmp;
 859         const char *p = start;
 860         wint_t c;
 861         wint_t lastc;           /* previous c */
 862         wint_t flagch;
 863         int i;
 864         const char *matchp;     /* last p at which a match ended */
 865         size_t clen;
 866 
 867         AT("slow", start, stop, startst, stopst);
 868         CLEAR(st);
 869         SET1(st, startst);
 870         SP("sstart", st, *p);
 871         st = step(m->g, startst, stopst, st, NOTHING, st);


 872         matchp = NULL;
 873         if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
 874                 c = OUT;
 875         else {
 876                 /*
 877                  * XXX Wrong if the previous character was multi-byte.
 878                  * Newline never is (in encodings supported by FreeBSD),
 879                  * so this only breaks the ISWORD tests below.
 880                  */
 881                 c = (uch)*(start - 1);
 882         }
 883         for (;;) {
 884                 /* next character */
 885                 lastc = c;
 886                 if (p == m->endp) {
 887                         c = OUT;
 888                         clen = 0;
 889                 } else
 890                         clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
 891 



 892                 /* is there an EOL and/or BOL between lastc and c? */
 893                 flagch = '\0';
 894                 i = 0;
 895                 if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
 896                     (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
 897                         flagch = BOL;
 898                         i = m->g->nbol;
 899                 }
 900                 if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
 901                     (c == OUT && !(m->eflags&REG_NOTEOL))) {
 902                         flagch = (flagch == BOL) ? BOLEOL : EOL;
 903                         i += m->g->neol;
 904                 }
 905                 if (i != 0) {
 906                         for (; i > 0; i--)
 907                                 st = step(m->g, startst, stopst, st,
 908                                     flagch, st);
 909                         SP("sboleol", st, c);
 910                 }
 911 
 912                 /* how about a word boundary? */
 913                 if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
 914                     (c != OUT && ISWORD(c))) {
 915                         flagch = BOW;
 916                 }
 917                 if ((lastc != OUT && ISWORD(lastc)) &&
 918                     (flagch == EOL || (c != OUT && !ISWORD(c)))) {
 919                         flagch = EOW;
 920                 }
 921                 if (flagch == BOW || flagch == EOW) {
 922                         st = step(m->g, startst, stopst, st, flagch, st);
 923                         SP("sboweow", st, c);
 924                 }
 925 
 926                 /* are we done? */
 927                 if (ISSET(st, stopst))



 928                         matchp = p;
 929                 if (EQ(st, empty) || p == stop || clen > stop - p)

 930                         break;          /* NOTE BREAK OUT */
 931 
 932                 /* no, we must deal with this character */
 933                 ASSIGN(tmp, st);



 934                 ASSIGN(st, empty);
 935                 assert(c != OUT);
 936                 st = step(m->g, startst, stopst, tmp, c, st);
 937                 SP("saft", st, c);
 938                 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
 939                 p += clen;
 940         }
 941 








 942         return (matchp);
 943 }
 944 
 945 
 946 /*
 947  * step - map set of states reachable before char to set reachable after
 948  */
 949 static states
 950 step(struct re_guts *g,
 951     sopno start,        /* start state within strip */
 952     sopno stop,         /* state after stop state within strip */
 953     states bef,         /* states reachable before */
 954     wint_t ch,          /* character or NONCHAR code */
 955     states aft)         /* states already known reachable after */
 956 {
 957         cset *cs;
 958         sop s;
 959         sopno pc;
 960         onestate here;          /* note, macros know this name */
 961         sopno look;
 962         int i;
 963 
 964         for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
 965                 s = g->strip[pc];


1011                         BACK(aft, aft, OPND(s));
1012                         if (!i && ISSETBACK(aft, OPND(s))) {
1013                                 /* oho, must reconsider loop body */
1014                                 pc -= OPND(s) + 1;
1015                                 INIT(here, pc);
1016                         }
1017                         break;
1018                 case OQUEST_:           /* two branches, both forward */
1019                         FWD(aft, aft, 1);
1020                         FWD(aft, aft, OPND(s));
1021                         break;
1022                 case O_QUEST:           /* just an empty */
1023                         FWD(aft, aft, 1);
1024                         break;
1025                 case OLPAREN:           /* not significant here */
1026                 case ORPAREN:
1027                         FWD(aft, aft, 1);
1028                         break;
1029                 case OCH_:              /* mark the first two branches */
1030                         FWD(aft, aft, 1);
1031                         assert(OP(g->strip[pc+OPND(s)]) == OOR2);
1032                         FWD(aft, aft, OPND(s));
1033                         break;
1034                 case OOR1:              /* done a branch, find the O_CH */
1035                         if (ISSTATEIN(aft, here)) {
1036                                 for (look = 1;
1037                                     OP(s = g->strip[pc+look]) != O_CH;
1038                                     look += OPND(s))
1039                                         assert(OP(s) == OOR2);
1040                                 FWD(aft, aft, look + 1);
1041                         }
1042                         break;
1043                 case OOR2:              /* propagate OCH_'s marking */
1044                         FWD(aft, aft, 1);
1045                         if (OP(g->strip[pc+OPND(s)]) != O_CH) {
1046                                 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
1047                                 FWD(aft, aft, OPND(s));
1048                         }
1049                         break;
1050                 case O_CH:              /* just empty */
1051                         FWD(aft, aft, 1);
1052                         break;
1053                 default:                /* ooooops... */
1054                         assert(0);
1055                         break;
1056                 }
1057         }
1058 
1059         return (aft);
1060 }
1061 
1062 #ifdef REDEBUG
1063 /*
1064  * print - print a set of states
1065  */
1066 static void


1107  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1108  * duplicate here avoids having a debugging-capable regexec.o tied to
1109  * a matching debug.o, and this is convenient.  It all disappears in
1110  * the non-debug compilation anyway, so it doesn't matter much.
1111  */
1112 static const char *
1113 pchar(int ch)
1114 {
1115         static char pbuf[10];
1116 
1117         if (isprint((uch)ch) || ch == ' ')
1118                 (void) sprintf(pbuf, "%c", ch);
1119         else
1120                 (void) sprintf(pbuf, "\\%o", ch);
1121         return (pbuf);
1122 }
1123 #endif
1124 #endif
1125 
1126 #undef  matcher
1127 #undef  fast
1128 #undef  slow
1129 #undef  dissect
1130 #undef  backref
1131 #undef  step
1132 #undef  print
1133 #undef  at
1134 #undef  match


  17  * 2. Redistributions in binary form must reproduce the above copyright
  18  *    notice, this list of conditions and the following disclaimer in the
  19  *    documentation and/or other materials provided with the distribution.
  20  * 3. Neither the name of the University nor the names of its contributors
  21  *    may be used to endorse or promote products derived from this software
  22  *    without specific prior written permission.
  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34  * SUCH DAMAGE.
  35  */
  36 
  37 #include <stdbool.h>
  38 
  39 /*
  40  * The matching engine and friends.  This file is #included by regexec.c
  41  * after suitable #defines of a variety of macros used herein, so that
  42  * different state representations can be used without duplicating masses
  43  * of code.
  44  */
  45 
  46 #ifdef SNAMES
  47 #define matcher smatcher
  48 #define walk    swalk

  49 #define dissect sdissect
  50 #define backref sbackref
  51 #define step    sstep
  52 #define print   sprint
  53 #define at      sat
  54 #define match   smat
  55 #endif
  56 #ifdef LNAMES
  57 #define matcher lmatcher
  58 #define walk    lwalk

  59 #define dissect ldissect
  60 #define backref lbackref
  61 #define step    lstep
  62 #define print   lprint
  63 #define at      lat
  64 #define match   lmat
  65 #endif
  66 #ifdef MNAMES
  67 #define matcher mmatcher
  68 #define walk    mwalk

  69 #define dissect mdissect
  70 #define backref mbackref
  71 #define step    mstep
  72 #define print   mprint
  73 #define at      mat
  74 #define match   mmat
  75 #endif
  76 
  77 /* another structure passed up and down to avoid zillions of parameters */
  78 struct match {
  79         struct re_guts *g;
  80         int eflags;
  81         regmatch_t *pmatch;     /* [nsub+1] (0 element unused) */
  82         const char *offp;       /* offsets work from here */
  83         const char *beginp;     /* start of string -- virtual NUL precedes */
  84         const char *endp;       /* end of string -- virtual NUL here */
  85         const char *coldp;      /* can be no match starting before here */
  86         const char **lastpos;   /* [nplus+1] */
  87         STATEVARS;
  88         states st;              /* current states */
  89         states fresh;           /* states for a fresh start */
  90         states tmp;             /* temporary */
  91         states empty;           /* empty set of states */
  92         mbstate_t mbs;          /* multibyte conversion state */
  93 };
  94 
  95 /* ========= begin header generated by ./mkh ========= */
  96 #ifdef __cplusplus
  97 extern "C" {
  98 #endif
  99 
 100 /* === engine.c === */
 101 static int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int);
 102 static const char *dissect(struct match *, const char *, const char *,
 103     sopno, sopno);
 104 static const char *backref(struct match *, const char *, const char *, sopno,
 105     sopno, sopno, int);
 106 static const char *walk(struct match *m, const char *start, const char *stop,
 107     sopno startst, sopno stopst, bool fast);


 108 static states step(struct re_guts *, sopno, sopno, states, wint_t, states);
 109 #define MAX_RECURSION   100
 110 #define BOL     (OUT-1)
 111 #define EOL     (BOL-1)
 112 #define BOLEOL  (BOL-2)
 113 #define NOTHING (BOL-3)
 114 #define BOW     (BOL-4)
 115 #define EOW     (BOL-5)
 116 #define BADCHAR (BOL-6)
 117 #define NONCHAR(c)      ((c) <= OUT)
 118 #ifdef REDEBUG
 119 static void print(struct match *, const char *, states, int, FILE *);
 120 #endif
 121 #ifdef REDEBUG
 122 static void at(struct match *, const char *, const char *, const char *,
 123     sopno, sopno);
 124 #endif
 125 #ifdef REDEBUG
 126 static const char *pchar(int ch);
 127 #endif


 233         m->lastpos = NULL;
 234         m->offp = string;
 235         m->beginp = start;
 236         m->endp = stop;
 237         STATESETUP(m, 4);
 238         SETUP(m->st);
 239         SETUP(m->fresh);
 240         SETUP(m->tmp);
 241         SETUP(m->empty);
 242         CLEAR(m->empty);
 243         ZAPSTATE(&m->mbs);
 244 
 245         /* Adjust start according to moffset, to speed things up */
 246         if (dp != NULL && g->moffset > -1)
 247                 start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
 248 
 249         SP("mloop", m->st, *start);
 250 
 251         /* this loop does only one repetition except for backrefs */
 252         for (;;) {
 253                 endp = walk(m, start, stop, gf, gl, true);
 254                 if (endp == NULL) {             /* a miss */
 255                         if (m->pmatch != NULL)
 256                                 free((char *)m->pmatch);
 257                         if (m->lastpos != NULL)
 258                                 free((char *)m->lastpos);
 259                         STATETEARDOWN(m);
 260                         return (REG_NOMATCH);
 261                 }
 262                 if (nmatch == 0 && !g->backrefs)
 263                         break;          /* no further info needed */
 264 
 265                 /* where? */
 266                 assert(m->coldp != NULL);
 267                 for (;;) {
 268                         NOTE("finding start");
 269                         endp = walk(m, m->coldp, stop, gf, gl, false);
 270                         if (endp != NULL)
 271                                 break;
 272                         assert(m->coldp < m->endp);
 273                         m->coldp += XMBRTOWC(NULL, m->coldp,
 274                             m->endp - m->coldp, &m->mbs, 0);
 275                 }
 276                 if (nmatch == 1 && !g->backrefs)
 277                         break;          /* no further info needed */
 278 
 279                 /* oh my, it wants the subexpressions... */
 280                 if (m->pmatch == NULL)
 281                         m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
 282                             sizeof (regmatch_t));
 283                 if (m->pmatch == NULL) {
 284                         STATETEARDOWN(m);
 285                         return (REG_ESPACE);
 286                 }
 287                 for (i = 1; i <= m->g->nsub; i++)
 288                         m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
 289                 if (!g->backrefs && !(m->eflags&REG_BACKR)) {


 294                                 m->lastpos = malloc((g->nplus+1) *
 295                                     sizeof (const char *));
 296                         if (g->nplus > 0 && m->lastpos == NULL) {
 297                                 free(m->pmatch);
 298                                 STATETEARDOWN(m);
 299                                 return (REG_ESPACE);
 300                         }
 301                         NOTE("backref dissect");
 302                         dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
 303                 }
 304                 if (dp != NULL)
 305                         break;
 306 
 307                 /* uh-oh... we couldn't find a subexpression-level match */
 308                 assert(g->backrefs); /* must be back references doing it */
 309                 assert(g->nplus == 0 || m->lastpos != NULL);
 310                 for (;;) {
 311                         if (dp != NULL || endp <= m->coldp)
 312                                 break;          /* defeat */
 313                         NOTE("backoff");
 314                         endp = walk(m, m->coldp, endp-1, gf, gl, false);
 315                         if (endp == NULL)
 316                                 break;          /* defeat */
 317                         /* try it on a shorter possibility */
 318 #ifndef NDEBUG
 319                         for (i = 1; i <= m->g->nsub; i++) {
 320                                 assert(m->pmatch[i].rm_so == -1);
 321                                 assert(m->pmatch[i].rm_eo == -1);
 322                         }
 323 #endif
 324                         NOTE("backoff dissect");
 325                         dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
 326                 }
 327                 assert(dp == NULL || dp == endp);
 328                 if (dp != NULL)         /* found a shorter one */
 329                         break;
 330 
 331                 /* despite initial appearances, there is no match here */
 332                 NOTE("false alarm");
 333                 /* recycle starting later */
 334                 start = m->coldp + XMBRTOWC(NULL, m->coldp,


 362 
 363 /*
 364  * dissect - figure out what matched what, no back references
 365  */
 366 static const char *
 367 dissect(struct match *m, const char *start, const char *stop, sopno startst,
 368     sopno stopst)
 369 {
 370         int i;
 371         sopno ss;               /* start sop of current subRE */
 372         sopno es;               /* end sop of current subRE */
 373         const char *sp;         /* start of string matched by it */
 374         const char *stp;        /* string matched by it cannot pass here */
 375         const char *rest;       /* start of rest of string */
 376         const char *tail;       /* string unmatched by rest of RE */
 377         sopno ssub;             /* start sop of subsubRE */
 378         sopno esub;             /* end sop of subsubRE */
 379         const char *ssp;        /* start of string matched by subsubRE */
 380         const char *sep;        /* end of string matched by subsubRE */
 381         const char *oldssp;     /* previous ssp */
 382         const char *dp;
 383 
 384         (void) dp;
 385         AT("diss", start, stop, startst, stopst);
 386         sp = start;
 387         for (ss = startst; ss < stopst; ss = es) {
 388                 /* identify end of subRE */
 389                 es = ss;
 390                 switch (OP(m->g->strip[es])) {
 391                 case OPLUS_:
 392                 case OQUEST_:
 393                         es += OPND(m->g->strip[es]);
 394                         break;
 395                 case OCH_:
 396                         while (OP(m->g->strip[es]) != (sop)O_CH)
 397                                 es += OPND(m->g->strip[es]);
 398                         break;
 399                 }
 400                 es++;
 401 
 402                 /* figure out what it matched */
 403                 switch (OP(m->g->strip[ss])) {
 404                 case OEND:
 405                         assert(0);
 406                         break;
 407                 case OCHAR:
 408                         sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
 409                         break;
 410                 case OBOL:
 411                 case OEOL:
 412                 case OBOW:
 413                 case OEOW:
 414                         break;
 415                 case OANY:
 416                 case OANYOF:
 417                         sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
 418                         break;
 419                 case OBACK_:
 420                 case O_BACK:
 421                         assert(0);
 422                         break;
 423                 /* cases where length of match is hard to find */
 424                 case OQUEST_:
 425                         stp = stop;
 426                         for (;;) {
 427                                 /* how long could this one be? */
 428                                 rest = walk(m, sp, stp, ss, es, false);
 429                                 assert(rest != NULL);   /* it did match */
 430                                 /* could the rest match the rest? */
 431                                 tail = walk(m, rest, stop, es, stopst, false);
 432                                 if (tail == stop)
 433                                         break;          /* yes! */
 434                                 /* no -- try a shorter match for this one */
 435                                 stp = rest - 1;
 436                                 assert(stp >= sp);   /* it did work */
 437                         }
 438                         ssub = ss + 1;
 439                         esub = es - 1;
 440                         /* did innards match? */
 441                         if (walk(m, sp, rest, ssub, esub, false) != NULL) {
 442                                 dp = dissect(m, sp, rest, ssub, esub);
 443                                 assert(dp == rest);



 444                         } else          /* no */
 445                                 assert(sp == rest);
 446                         sp = rest;
 447                         break;
 448                 case OPLUS_:
 449                         stp = stop;
 450                         for (;;) {
 451                                 /* how long could this one be? */
 452                                 rest = walk(m, sp, stp, ss, es, false);
 453                                 assert(rest != NULL);   /* it did match */
 454                                 /* could the rest match the rest? */
 455                                 tail = walk(m, rest, stop, es, stopst, false);
 456                                 if (tail == stop)
 457                                         break;          /* yes! */
 458                                 /* no -- try a shorter match for this one */
 459                                 stp = rest - 1;
 460                                 assert(stp >= sp);   /* it did work */
 461                         }
 462                         ssub = ss + 1;
 463                         esub = es - 1;
 464                         ssp = sp;
 465                         oldssp = ssp;
 466                         for (;;) {      /* find last match of innards */
 467                                 sep = walk(m, ssp, rest, ssub, esub, false);
 468                                 if (sep == NULL || sep == ssp)
 469                                         break;  /* failed or matched null */
 470                                 oldssp = ssp;   /* on to next try */
 471                                 ssp = sep;
 472                         }
 473                         if (sep == NULL) {
 474                                 /* last successful match */
 475                                 sep = ssp;
 476                                 ssp = oldssp;
 477                         }
 478                         assert(sep == rest);    /* must exhaust substring */
 479                         assert(walk(m, ssp, sep, ssub, esub, false) == rest);
 480                         dp = dissect(m, ssp, sep, ssub, esub);
 481                         assert(dp == sep);
 482                         sp = rest;
 483                         break;
 484                 case OCH_:
 485                         stp = stop;
 486                         for (;;) {
 487                                 /* how long could this one be? */
 488                                 rest = walk(m, sp, stp, ss, es, false);
 489                                 assert(rest != NULL);   /* it did match */
 490                                 /* could the rest match the rest? */
 491                                 tail = walk(m, rest, stop, es, stopst, false);
 492                                 if (tail == stop)
 493                                         break;          /* yes! */
 494                                 /* no -- try a shorter match for this one */
 495                                 stp = rest - 1;
 496                                 assert(stp >= sp);   /* it did work */
 497                         }
 498                         ssub = ss + 1;
 499                         esub = ss + OPND(m->g->strip[ss]) - 1;
 500                         assert(OP(m->g->strip[esub]) == OOR1);
 501                         for (;;) {      /* find first matching branch */
 502                                 if (walk(m, sp, rest, ssub, esub,
 503                                     false) == rest)
 504                                         break;  /* it matched all of it */
 505                                 /* that one missed, try next one */
 506                                 assert(OP(m->g->strip[esub]) == OOR1);
 507                                 esub++;
 508                                 assert(OP(m->g->strip[esub]) == OOR2);
 509                                 ssub = esub + 1;
 510                                 esub += OPND(m->g->strip[esub]);
 511                                 if (OP(m->g->strip[esub]) == (sop)OOR2)
 512                                         esub--;
 513                                 else
 514                                         assert(OP(m->g->strip[esub]) == O_CH);
 515                         }
 516                         dp = dissect(m, sp, rest, ssub, esub);
 517                         assert(dp == rest);
 518                         sp = rest;
 519                         break;
 520                 case O_PLUS:
 521                 case O_QUEST:
 522                 case OOR1:
 523                 case OOR2:
 524                 case O_CH:
 525                         assert(0);
 526                         break;
 527                 case OLPAREN:
 528                         i = OPND(m->g->strip[ss]);
 529                         assert(0 < i && i <= m->g->nsub);
 530                         m->pmatch[i].rm_so = sp - m->offp;
 531                         break;


 616                                 break;
 617                         }
 618                         return (NULL);
 619                 case OEOW:
 620                         if (((sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
 621                             (sp < m->endp && *sp == '\n' &&
 622                             (m->g->cflags&REG_NEWLINE)) ||
 623                             (sp < m->endp && !ISWORD(*sp))) &&
 624                             (sp > m->beginp && ISWORD(*(sp-1)))) {
 625                                 break;
 626                         }
 627                         return (NULL);
 628                 case O_QUEST:
 629                         break;
 630                 case OOR1:      /* matches null but needs to skip */
 631                         ss++;
 632                         s = m->g->strip[ss];
 633                         do {
 634                                 assert(OP(s) == OOR2);
 635                                 ss += OPND(s);
 636                         } while (OP(s = m->g->strip[ss]) != (sop)O_CH);
 637                         /* note that the ss++ gets us past the O_CH */
 638                         break;
 639                 default:        /* have to make a choice */
 640                         hard = 1;
 641                         break;
 642                 }
 643         if (!hard) {            /* that was it! */
 644                 if (sp != stop)
 645                         return (NULL);
 646                 return (sp);
 647         }
 648         ss--;                   /* adjust for the for's final increment */
 649 
 650         /* the hard stuff */
 651         AT("hard", sp, stop, ss, stopst);
 652         s = m->g->strip[ss];
 653         switch (OP(s)) {
 654         case OBACK_:            /* the vilest depths */
 655                 i = OPND(s);
 656                 assert(0 < i && i <= m->g->nsub);
 657                 if (m->pmatch[i].rm_eo == -1)
 658                         return (NULL);
 659                 assert(m->pmatch[i].rm_so != -1);
 660                 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
 661                 if (len == 0 && rec++ > MAX_RECURSION)
 662                         return (NULL);
 663                 assert(stop - m->beginp >= len);
 664                 if (sp > stop - len)
 665                         return (NULL);  /* not enough left to match */
 666                 ssp = m->offp + m->pmatch[i].rm_so;
 667                 if (memcmp(sp, ssp, len) != 0)
 668                         return (NULL);
 669                 while (m->g->strip[ss] != (sop)SOP(O_BACK, i))
 670                         ss++;
 671                 return (backref(m, sp+len, stop, ss+1, stopst, lev, rec));
 672         case OQUEST_:           /* to null or not */
 673                 dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 674                 if (dp != NULL)
 675                         return (dp);    /* not */
 676                 return (backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
 677         case OPLUS_:
 678                 assert(m->lastpos != NULL);
 679                 assert(lev+1 <= m->g->nplus);
 680                 m->lastpos[lev+1] = sp;
 681                 return (backref(m, sp, stop, ss+1, stopst, lev+1, rec));
 682         case O_PLUS:
 683                 if (sp == m->lastpos[lev])   /* last pass matched null */
 684                         return (backref(m, sp, stop, ss+1, stopst, lev-1, rec));
 685                 /* try another pass */
 686                 m->lastpos[lev] = sp;
 687                 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
 688                 if (dp == NULL)
 689                         return (backref(m, sp, stop, ss+1, stopst, lev-1, rec));
 690                 return (dp);
 691         case OCH_:              /* find the right one, if any */
 692                 ssub = ss + 1;
 693                 esub = ss + OPND(s) - 1;
 694                 assert(OP(m->g->strip[esub]) == OOR1);
 695                 for (;;) {      /* find first matching branch */
 696                         dp = backref(m, sp, stop, ssub, esub, lev, rec);
 697                         if (dp != NULL)
 698                                 return (dp);
 699                         /* that one missed, try next one */
 700                         if (OP(m->g->strip[esub]) == (sop)O_CH)
 701                                 return (NULL);  /* there is none */
 702                         esub++;
 703                         assert(OP(m->g->strip[esub]) == (sop)OOR2);
 704                         ssub = esub + 1;
 705                         esub += OPND(m->g->strip[esub]);
 706                         if (OP(m->g->strip[esub]) == (sop)OOR2)
 707                                 esub--;
 708                         else
 709                                 assert(OP(m->g->strip[esub]) == O_CH);
 710                 }
 711                 /* NOTREACHED */
 712                 break;
 713         case OLPAREN:           /* must undo assignment if rest fails */
 714                 i = OPND(s);
 715                 assert(0 < i && i <= m->g->nsub);
 716                 offsave = m->pmatch[i].rm_so;
 717                 m->pmatch[i].rm_so = sp - m->offp;
 718                 dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 719                 if (dp != NULL)
 720                         return (dp);
 721                 m->pmatch[i].rm_so = offsave;
 722                 return (NULL);
 723         case ORPAREN:           /* must undo assignment if rest fails */
 724                 i = OPND(s);
 725                 assert(0 < i && i <= m->g->nsub);
 726                 offsave = m->pmatch[i].rm_eo;
 727                 m->pmatch[i].rm_eo = sp - m->offp;
 728                 dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 729                 if (dp != NULL)
 730                         return (dp);
 731                 m->pmatch[i].rm_eo = offsave;
 732                 return (NULL);
 733         default:                /* uh oh */
 734                 assert(0);
 735                 break;
 736         }
 737 
 738         /* "can't happen" */
 739         assert(0);
 740         return (NULL);
 741 }
 742 
 743 /*
 744  * Step through the string either quickly or slowly.  Returns where it ended
 745  * or NULL.
 746  */
 747 static const char *
 748 walk(struct match *m, const char *start, const char *stop, sopno startst,
 749     sopno stopst, bool fast)
 750 {
 751         states st = m->st;
 752         states fresh = m->fresh;





































































































 753         states empty = m->empty;
 754         states tmp = m->tmp;
 755         const char *p = start;
 756         wint_t c;
 757         wint_t lastc;           /* previous c */
 758         wint_t flagch;
 759         int i;
 760         const char *matchp;     /* last p at which a match ended */
 761         size_t clen;
 762 
 763         AT("slow", start, stop, startst, stopst);
 764         CLEAR(st);
 765         SET1(st, startst);
 766         SP("sstart", st, *p);
 767         st = step(m->g, startst, stopst, st, NOTHING, st);
 768         if (fast)
 769                 ASSIGN(fresh, st);
 770         matchp = NULL;
 771         if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
 772                 c = OUT;
 773         else {
 774                 /*
 775                  * XXX Wrong if the previous character was multi-byte.
 776                  * Newline never is (in encodings supported by FreeBSD),
 777                  * so this only breaks the ISWORD tests below.
 778                  */
 779                 c = (uch)*(start - 1);
 780         }
 781         for (;;) {
 782                 /* next character */
 783                 lastc = c;
 784                 if (p == m->endp) {
 785                         c = OUT;
 786                         clen = 0;
 787                 } else
 788                         clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
 789 
 790                 if (fast && EQ(st, fresh))
 791                         matchp = p;
 792 
 793                 /* is there an EOL and/or BOL between lastc and c? */
 794                 flagch = '\0';
 795                 i = 0;
 796                 if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
 797                     (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
 798                         flagch = BOL;
 799                         i = m->g->nbol;
 800                 }
 801                 if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
 802                     (c == OUT && !(m->eflags&REG_NOTEOL))) {
 803                         flagch = (flagch == BOL) ? BOLEOL : EOL;
 804                         i += m->g->neol;
 805                 }
 806                 if (i != 0) {
 807                         for (; i > 0; i--)
 808                                 st = step(m->g, startst, stopst, st,
 809                                     flagch, st);
 810                         SP("sboleol", st, c);
 811                 }
 812 
 813                 /* how about a word boundary? */
 814                 if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
 815                     (c != OUT && ISWORD(c))) {
 816                         flagch = BOW;
 817                 }
 818                 if ((lastc != OUT && ISWORD(lastc)) &&
 819                     (flagch == EOL || (c != OUT && !ISWORD(c)))) {
 820                         flagch = EOW;
 821                 }
 822                 if (flagch == BOW || flagch == EOW) {
 823                         st = step(m->g, startst, stopst, st, flagch, st);
 824                         SP("sboweow", st, c);
 825                 }
 826 
 827                 /* are we done? */
 828                 if (ISSET(st, stopst)) {
 829                         if (fast)
 830                                 break;
 831                         else
 832                                 matchp = p;
 833                 }
 834                 if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p))
 835                         break;          /* NOTE BREAK OUT */
 836 
 837                 /* no, we must deal with this character */
 838                 ASSIGN(tmp, st);
 839                 if (fast)
 840                         ASSIGN(st, fresh);
 841                 else
 842                         ASSIGN(st, empty);
 843                 assert(c != OUT);
 844                 st = step(m->g, startst, stopst, tmp, c, st);
 845                 SP("saft", st, c);
 846                 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
 847                 p += clen;
 848         }
 849 
 850         if (fast) {
 851                 assert(matchp != NULL);
 852                 m->coldp = matchp;
 853                 if (ISSET(st, stopst))
 854                         return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
 855                 else
 856                         return (NULL);
 857         } else
 858                 return (matchp);
 859 }
 860 

 861 /*
 862  * step - map set of states reachable before char to set reachable after
 863  */
 864 static states
 865 step(struct re_guts *g,
 866     sopno start,        /* start state within strip */
 867     sopno stop,         /* state after stop state within strip */
 868     states bef,         /* states reachable before */
 869     wint_t ch,          /* character or NONCHAR code */
 870     states aft)         /* states already known reachable after */
 871 {
 872         cset *cs;
 873         sop s;
 874         sopno pc;
 875         onestate here;          /* note, macros know this name */
 876         sopno look;
 877         int i;
 878 
 879         for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
 880                 s = g->strip[pc];


 926                         BACK(aft, aft, OPND(s));
 927                         if (!i && ISSETBACK(aft, OPND(s))) {
 928                                 /* oho, must reconsider loop body */
 929                                 pc -= OPND(s) + 1;
 930                                 INIT(here, pc);
 931                         }
 932                         break;
 933                 case OQUEST_:           /* two branches, both forward */
 934                         FWD(aft, aft, 1);
 935                         FWD(aft, aft, OPND(s));
 936                         break;
 937                 case O_QUEST:           /* just an empty */
 938                         FWD(aft, aft, 1);
 939                         break;
 940                 case OLPAREN:           /* not significant here */
 941                 case ORPAREN:
 942                         FWD(aft, aft, 1);
 943                         break;
 944                 case OCH_:              /* mark the first two branches */
 945                         FWD(aft, aft, 1);
 946                         assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
 947                         FWD(aft, aft, OPND(s));
 948                         break;
 949                 case OOR1:              /* done a branch, find the O_CH */
 950                         if (ISSTATEIN(aft, here)) {
 951                                 for (look = 1;
 952                                     OP(s = g->strip[pc+look]) != (sop)O_CH;
 953                                     look += OPND(s))
 954                                         assert(OP(s) == (sop)OOR2);
 955                                 FWD(aft, aft, look + 1);
 956                         }
 957                         break;
 958                 case OOR2:              /* propagate OCH_'s marking */
 959                         FWD(aft, aft, 1);
 960                         if (OP(g->strip[pc+OPND(s)]) != (sop)O_CH) {
 961                                 assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
 962                                 FWD(aft, aft, OPND(s));
 963                         }
 964                         break;
 965                 case O_CH:              /* just empty */
 966                         FWD(aft, aft, 1);
 967                         break;
 968                 default:                /* ooooops... */
 969                         assert(0);
 970                         break;
 971                 }
 972         }
 973 
 974         return (aft);
 975 }
 976 
 977 #ifdef REDEBUG
 978 /*
 979  * print - print a set of states
 980  */
 981 static void


1022  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1023  * duplicate here avoids having a debugging-capable regexec.o tied to
1024  * a matching debug.o, and this is convenient.  It all disappears in
1025  * the non-debug compilation anyway, so it doesn't matter much.
1026  */
1027 static const char *
1028 pchar(int ch)
1029 {
1030         static char pbuf[10];
1031 
1032         if (isprint((uch)ch) || ch == ' ')
1033                 (void) sprintf(pbuf, "%c", ch);
1034         else
1035                 (void) sprintf(pbuf, "\\%o", ch);
1036         return (pbuf);
1037 }
1038 #endif
1039 #endif
1040 
1041 #undef  matcher
1042 #undef  walk

1043 #undef  dissect
1044 #undef  backref
1045 #undef  step
1046 #undef  print
1047 #undef  at
1048 #undef  match