Print this page
8993 sync regcomp(3C) with upstream

@@ -32,43 +32,42 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
+#include <stdbool.h>
+
 /*
  * The matching engine and friends.  This file is #included by regexec.c
  * after suitable #defines of a variety of macros used herein, so that
  * different state representations can be used without duplicating masses
  * of code.
  */
 
 #ifdef SNAMES
 #define matcher smatcher
-#define fast    sfast
-#define slow    sslow
+#define walk    swalk
 #define dissect sdissect
 #define backref sbackref
 #define step    sstep
 #define print   sprint
 #define at      sat
 #define match   smat
 #endif
 #ifdef LNAMES
 #define matcher lmatcher
-#define fast    lfast
-#define slow    lslow
+#define walk    lwalk
 #define dissect ldissect
 #define backref lbackref
 #define step    lstep
 #define print   lprint
 #define at      lat
 #define match   lmat
 #endif
 #ifdef MNAMES
 #define matcher mmatcher
-#define fast    mfast
-#define slow    mslow
+#define walk    mwalk
 #define dissect mdissect
 #define backref mbackref
 #define step    mstep
 #define print   mprint
 #define at      mat

@@ -102,14 +101,12 @@
 static int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int);
 static const char *dissect(struct match *, const char *, const char *,
     sopno, sopno);
 static const char *backref(struct match *, const char *, const char *, sopno,
     sopno, sopno, int);
-static const char *fast(struct match *, const char *, const char *, sopno,
-    sopno);
-static const char *slow(struct match *, const char *, const char *, sopno,
-    sopno);
+static const char *walk(struct match *m, const char *start, const char *stop,
+    sopno startst, sopno stopst, bool fast);
 static states step(struct re_guts *, sopno, sopno, states, wint_t, states);
 #define MAX_RECURSION   100
 #define BOL     (OUT-1)
 #define EOL     (BOL-1)
 #define BOLEOL  (BOL-2)

@@ -251,11 +248,11 @@
 
         SP("mloop", m->st, *start);
 
         /* this loop does only one repetition except for backrefs */
         for (;;) {
-                endp = fast(m, start, stop, gf, gl);
+                endp = walk(m, start, stop, gf, gl, true);
                 if (endp == NULL) {             /* a miss */
                         if (m->pmatch != NULL)
                                 free((char *)m->pmatch);
                         if (m->lastpos != NULL)
                                 free((char *)m->lastpos);

@@ -267,11 +264,11 @@
 
                 /* where? */
                 assert(m->coldp != NULL);
                 for (;;) {
                         NOTE("finding start");
-                        endp = slow(m, m->coldp, stop, gf, gl);
+                        endp = walk(m, m->coldp, stop, gf, gl, false);
                         if (endp != NULL)
                                 break;
                         assert(m->coldp < m->endp);
                         m->coldp += XMBRTOWC(NULL, m->coldp,
                             m->endp - m->coldp, &m->mbs, 0);

@@ -312,11 +309,11 @@
                 assert(g->nplus == 0 || m->lastpos != NULL);
                 for (;;) {
                         if (dp != NULL || endp <= m->coldp)
                                 break;          /* defeat */
                         NOTE("backoff");
-                        endp = slow(m, m->coldp, endp-1, gf, gl);
+                        endp = walk(m, m->coldp, endp-1, gf, gl, false);
                         if (endp == NULL)
                                 break;          /* defeat */
                         /* try it on a shorter possibility */
 #ifndef NDEBUG
                         for (i = 1; i <= m->g->nsub; i++) {

@@ -380,12 +377,13 @@
         sopno ssub;             /* start sop of subsubRE */
         sopno esub;             /* end sop of subsubRE */
         const char *ssp;        /* start of string matched by subsubRE */
         const char *sep;        /* end of string matched by subsubRE */
         const char *oldssp;     /* previous ssp */
-        const char *dp __unused;
+        const char *dp;
 
+        (void) dp;
         AT("diss", start, stop, startst, stopst);
         sp = start;
         for (ss = startst; ss < stopst; ss = es) {
                 /* identify end of subRE */
                 es = ss;

@@ -393,11 +391,11 @@
                 case OPLUS_:
                 case OQUEST_:
                         es += OPND(m->g->strip[es]);
                         break;
                 case OCH_:
-                        while (OP(m->g->strip[es]) != O_CH)
+                        while (OP(m->g->strip[es]) != (sop)O_CH)
                                 es += OPND(m->g->strip[es]);
                         break;
                 }
                 es++;
 

@@ -425,41 +423,38 @@
                 /* cases where length of match is hard to find */
                 case OQUEST_:
                         stp = stop;
                         for (;;) {
                                 /* how long could this one be? */
-                                rest = slow(m, sp, stp, ss, es);
+                                rest = walk(m, sp, stp, ss, es, false);
                                 assert(rest != NULL);   /* it did match */
                                 /* could the rest match the rest? */
-                                tail = slow(m, rest, stop, es, stopst);
+                                tail = walk(m, rest, stop, es, stopst, false);
                                 if (tail == stop)
                                         break;          /* yes! */
                                 /* no -- try a shorter match for this one */
                                 stp = rest - 1;
                                 assert(stp >= sp);      /* it did work */
                         }
                         ssub = ss + 1;
                         esub = es - 1;
                         /* did innards match? */
-                        if (slow(m, sp, rest, ssub, esub) != NULL) {
+                        if (walk(m, sp, rest, ssub, esub, false) != NULL) {
                                 dp = dissect(m, sp, rest, ssub, esub);
                                 assert(dp == rest);
-#if defined(__lint)
-                                (void) dp;
-#endif
                         } else          /* no */
                                 assert(sp == rest);
                         sp = rest;
                         break;
                 case OPLUS_:
                         stp = stop;
                         for (;;) {
                                 /* how long could this one be? */
-                                rest = slow(m, sp, stp, ss, es);
+                                rest = walk(m, sp, stp, ss, es, false);
                                 assert(rest != NULL);   /* it did match */
                                 /* could the rest match the rest? */
-                                tail = slow(m, rest, stop, es, stopst);
+                                tail = walk(m, rest, stop, es, stopst, false);
                                 if (tail == stop)
                                         break;          /* yes! */
                                 /* no -- try a shorter match for this one */
                                 stp = rest - 1;
                                 assert(stp >= sp);      /* it did work */

@@ -467,11 +462,11 @@
                         ssub = ss + 1;
                         esub = es - 1;
                         ssp = sp;
                         oldssp = ssp;
                         for (;;) {      /* find last match of innards */
-                                sep = slow(m, ssp, rest, ssub, esub);
+                                sep = walk(m, ssp, rest, ssub, esub, false);
                                 if (sep == NULL || sep == ssp)
                                         break;  /* failed or matched null */
                                 oldssp = ssp;   /* on to next try */
                                 ssp = sep;
                         }

@@ -479,23 +474,23 @@
                                 /* last successful match */
                                 sep = ssp;
                                 ssp = oldssp;
                         }
                         assert(sep == rest);    /* must exhaust substring */
-                        assert(slow(m, ssp, sep, ssub, esub) == rest);
+                        assert(walk(m, ssp, sep, ssub, esub, false) == rest);
                         dp = dissect(m, ssp, sep, ssub, esub);
                         assert(dp == sep);
                         sp = rest;
                         break;
                 case OCH_:
                         stp = stop;
                         for (;;) {
                                 /* how long could this one be? */
-                                rest = slow(m, sp, stp, ss, es);
+                                rest = walk(m, sp, stp, ss, es, false);
                                 assert(rest != NULL);   /* it did match */
                                 /* could the rest match the rest? */
-                                tail = slow(m, rest, stop, es, stopst);
+                                tail = walk(m, rest, stop, es, stopst, false);
                                 if (tail == stop)
                                         break;          /* yes! */
                                 /* no -- try a shorter match for this one */
                                 stp = rest - 1;
                                 assert(stp >= sp);      /* it did work */

@@ -502,19 +497,20 @@
                         }
                         ssub = ss + 1;
                         esub = ss + OPND(m->g->strip[ss]) - 1;
                         assert(OP(m->g->strip[esub]) == OOR1);
                         for (;;) {      /* find first matching branch */
-                                if (slow(m, sp, rest, ssub, esub) == rest)
+                                if (walk(m, sp, rest, ssub, esub,
+                                    false) == rest)
                                         break;  /* it matched all of it */
                                 /* that one missed, try next one */
                                 assert(OP(m->g->strip[esub]) == OOR1);
                                 esub++;
                                 assert(OP(m->g->strip[esub]) == OOR2);
                                 ssub = esub + 1;
                                 esub += OPND(m->g->strip[esub]);
-                                if (OP(m->g->strip[esub]) == OOR2)
+                                if (OP(m->g->strip[esub]) == (sop)OOR2)
                                         esub--;
                                 else
                                         assert(OP(m->g->strip[esub]) == O_CH);
                         }
                         dp = dissect(m, sp, rest, ssub, esub);

@@ -635,11 +631,11 @@
                         ss++;
                         s = m->g->strip[ss];
                         do {
                                 assert(OP(s) == OOR2);
                                 ss += OPND(s);
-                        } while (OP(s = m->g->strip[ss]) != O_CH);
+                        } while (OP(s = m->g->strip[ss]) != (sop)O_CH);
                         /* note that the ss++ gets us past the O_CH */
                         break;
                 default:        /* have to make a choice */
                         hard = 1;
                         break;

@@ -668,11 +664,11 @@
                 if (sp > stop - len)
                         return (NULL);  /* not enough left to match */
                 ssp = m->offp + m->pmatch[i].rm_so;
                 if (memcmp(sp, ssp, len) != 0)
                         return (NULL);
-                while (m->g->strip[ss] != SOP(O_BACK, i))
+                while (m->g->strip[ss] != (sop)SOP(O_BACK, i))
                         ss++;
                 return (backref(m, sp+len, stop, ss+1, stopst, lev, rec));
         case OQUEST_:           /* to null or not */
                 dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
                 if (dp != NULL)

@@ -699,17 +695,17 @@
                 for (;;) {      /* find first matching branch */
                         dp = backref(m, sp, stop, ssub, esub, lev, rec);
                         if (dp != NULL)
                                 return (dp);
                         /* that one missed, try next one */
-                        if (OP(m->g->strip[esub]) == O_CH)
+                        if (OP(m->g->strip[esub]) == (sop)O_CH)
                                 return (NULL);  /* there is none */
                         esub++;
-                        assert(OP(m->g->strip[esub]) == OOR2);
+                        assert(OP(m->g->strip[esub]) == (sop)OOR2);
                         ssub = esub + 1;
                         esub += OPND(m->g->strip[esub]);
-                        if (OP(m->g->strip[esub]) == OOR2)
+                        if (OP(m->g->strip[esub]) == (sop)OOR2)
                                 esub--;
                         else
                                 assert(OP(m->g->strip[esub]) == O_CH);
                 }
                 /* NOTREACHED */

@@ -743,119 +739,19 @@
         assert(0);
         return (NULL);
 }
 
 /*
- * fast - step through the string at top speed
+ * Step through the string either quickly or slowly.  Returns where it ended
+ * or NULL.
  */
 static const char *
-fast(struct match *m, const char *start, const char *stop, sopno startst,
-    sopno stopst)
+walk(struct match *m, const char *start, const char *stop, sopno startst,
+    sopno stopst, bool fast)
 {
         states st = m->st;
         states fresh = m->fresh;
-        states tmp = m->tmp;
-        const char *p = start;
-        wint_t c;
-        wint_t lastc;           /* previous c */
-        wint_t flagch;
-        int i;
-        const char *coldp;      /* last p after which no match was underway */
-        size_t clen;
-
-        CLEAR(st);
-        SET1(st, startst);
-        SP("fast", st, *p);
-        st = step(m->g, startst, stopst, st, NOTHING, st);
-        ASSIGN(fresh, st);
-        SP("start", st, *p);
-        coldp = NULL;
-        if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
-                c = OUT;
-        else {
-                /*
-                 * XXX Wrong if the previous character was multi-byte.
-                 * Newline never is (in encodings supported by FreeBSD),
-                 * so this only breaks the ISWORD tests below.
-                 */
-                c = (uch)*(start - 1);
-        }
-        for (;;) {
-                /* next character */
-                lastc = c;
-                if (p == m->endp) {
-                        clen = 0;
-                        c = OUT;
-                } else
-                        clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
-                if (EQ(st, fresh))
-                        coldp = p;
-
-                /* is there an EOL and/or BOL between lastc and c? */
-                flagch = '\0';
-                i = 0;
-                if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
-                    (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
-                        flagch = BOL;
-                        i = m->g->nbol;
-                }
-                if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
-                    (c == OUT && !(m->eflags&REG_NOTEOL))) {
-                        flagch = (flagch == BOL) ? BOLEOL : EOL;
-                        i += m->g->neol;
-                }
-                if (i != 0) {
-                        for (; i > 0; i--)
-                                st = step(m->g, startst, stopst, st,
-                                    flagch, st);
-                        SP("boleol", st, c);
-                }
-
-                /* how about a word boundary? */
-                if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
-                    (c != OUT && ISWORD(c))) {
-                        flagch = BOW;
-                }
-                if ((lastc != OUT && ISWORD(lastc)) &&
-                    (flagch == EOL || (c != OUT && !ISWORD(c)))) {
-                        flagch = EOW;
-                }
-                if (flagch == BOW || flagch == EOW) {
-                        st = step(m->g, startst, stopst, st, flagch, st);
-                        SP("boweow", st, c);
-                }
-
-                /* are we done? */
-                if (ISSET(st, stopst) || p == stop || clen > stop - p)
-                        break;          /* NOTE BREAK OUT */
-
-                /* no, we must deal with this character */
-                ASSIGN(tmp, st);
-                ASSIGN(st, fresh);
-                assert(c != OUT);
-                st = step(m->g, startst, stopst, tmp, c, st);
-                SP("aft", st, c);
-                assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
-                p += clen;
-        }
-
-        assert(coldp != NULL);
-        m->coldp = coldp;
-        if (ISSET(st, stopst))
-                return (p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
-        else
-                return (NULL);
-}
-
-/*
- * slow - step through the string more deliberately
- */
-static const char *
-slow(struct match *m, const char *start, const char *stop, sopno startst,
-    sopno stopst)
-{
-        states st = m->st;
         states empty = m->empty;
         states tmp = m->tmp;
         const char *p = start;
         wint_t c;
         wint_t lastc;           /* previous c */

@@ -867,10 +763,12 @@
         AT("slow", start, stop, startst, stopst);
         CLEAR(st);
         SET1(st, startst);
         SP("sstart", st, *p);
         st = step(m->g, startst, stopst, st, NOTHING, st);
+        if (fast)
+                ASSIGN(fresh, st);
         matchp = NULL;
         if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
                 c = OUT;
         else {
                 /*

@@ -887,10 +785,13 @@
                         c = OUT;
                         clen = 0;
                 } else
                         clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
 
+                if (fast && EQ(st, fresh))
+                        matchp = p;
+
                 /* is there an EOL and/or BOL between lastc and c? */
                 flagch = '\0';
                 i = 0;
                 if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
                     (lastc == OUT && !(m->eflags&REG_NOTBOL))) {

@@ -922,29 +823,43 @@
                         st = step(m->g, startst, stopst, st, flagch, st);
                         SP("sboweow", st, c);
                 }
 
                 /* are we done? */
-                if (ISSET(st, stopst))
+                if (ISSET(st, stopst)) {
+                        if (fast)
+                                break;
+                        else
                         matchp = p;
-                if (EQ(st, empty) || p == stop || clen > stop - p)
+                }
+                if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p))
                         break;          /* NOTE BREAK OUT */
 
                 /* no, we must deal with this character */
                 ASSIGN(tmp, st);
+                if (fast)
+                        ASSIGN(st, fresh);
+                else
                 ASSIGN(st, empty);
                 assert(c != OUT);
                 st = step(m->g, startst, stopst, tmp, c, st);
                 SP("saft", st, c);
                 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
                 p += clen;
         }
 
+        if (fast) {
+                assert(matchp != NULL);
+                m->coldp = matchp;
+                if (ISSET(st, stopst))
+                        return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
+                else
+                        return (NULL);
+        } else
         return (matchp);
 }
 
-
 /*
  * step - map set of states reachable before char to set reachable after
  */
 static states
 step(struct re_guts *g,

@@ -1026,26 +941,26 @@
                 case ORPAREN:
                         FWD(aft, aft, 1);
                         break;
                 case OCH_:              /* mark the first two branches */
                         FWD(aft, aft, 1);
-                        assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+                        assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
                         FWD(aft, aft, OPND(s));
                         break;
                 case OOR1:              /* done a branch, find the O_CH */
                         if (ISSTATEIN(aft, here)) {
                                 for (look = 1;
-                                    OP(s = g->strip[pc+look]) != O_CH;
+                                    OP(s = g->strip[pc+look]) != (sop)O_CH;
                                     look += OPND(s))
-                                        assert(OP(s) == OOR2);
+                                        assert(OP(s) == (sop)OOR2);
                                 FWD(aft, aft, look + 1);
                         }
                         break;
                 case OOR2:              /* propagate OCH_'s marking */
                         FWD(aft, aft, 1);
-                        if (OP(g->strip[pc+OPND(s)]) != O_CH) {
-                                assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+                        if (OP(g->strip[pc+OPND(s)]) != (sop)O_CH) {
+                                assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
                                 FWD(aft, aft, OPND(s));
                         }
                         break;
                 case O_CH:              /* just empty */
                         FWD(aft, aft, 1);

@@ -1122,12 +1037,11 @@
 }
 #endif
 #endif
 
 #undef  matcher
-#undef  fast
-#undef  slow
+#undef  walk
 #undef  dissect
 #undef  backref
 #undef  step
 #undef  print
 #undef  at