Print this page
8993 sync regcomp(3C) with upstream
@@ -32,43 +32,42 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+#include <stdbool.h>
+
/*
* The matching engine and friends. This file is #included by regexec.c
* after suitable #defines of a variety of macros used herein, so that
* different state representations can be used without duplicating masses
* of code.
*/
#ifdef SNAMES
#define matcher smatcher
-#define fast sfast
-#define slow sslow
+#define walk swalk
#define dissect sdissect
#define backref sbackref
#define step sstep
#define print sprint
#define at sat
#define match smat
#endif
#ifdef LNAMES
#define matcher lmatcher
-#define fast lfast
-#define slow lslow
+#define walk lwalk
#define dissect ldissect
#define backref lbackref
#define step lstep
#define print lprint
#define at lat
#define match lmat
#endif
#ifdef MNAMES
#define matcher mmatcher
-#define fast mfast
-#define slow mslow
+#define walk mwalk
#define dissect mdissect
#define backref mbackref
#define step mstep
#define print mprint
#define at mat
@@ -102,14 +101,12 @@
static int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int);
static const char *dissect(struct match *, const char *, const char *,
sopno, sopno);
static const char *backref(struct match *, const char *, const char *, sopno,
sopno, sopno, int);
-static const char *fast(struct match *, const char *, const char *, sopno,
- sopno);
-static const char *slow(struct match *, const char *, const char *, sopno,
- sopno);
+static const char *walk(struct match *m, const char *start, const char *stop,
+ sopno startst, sopno stopst, bool fast);
static states step(struct re_guts *, sopno, sopno, states, wint_t, states);
#define MAX_RECURSION 100
#define BOL (OUT-1)
#define EOL (BOL-1)
#define BOLEOL (BOL-2)
@@ -251,11 +248,11 @@
SP("mloop", m->st, *start);
/* this loop does only one repetition except for backrefs */
for (;;) {
- endp = fast(m, start, stop, gf, gl);
+ endp = walk(m, start, stop, gf, gl, true);
if (endp == NULL) { /* a miss */
if (m->pmatch != NULL)
free((char *)m->pmatch);
if (m->lastpos != NULL)
free((char *)m->lastpos);
@@ -267,11 +264,11 @@
/* where? */
assert(m->coldp != NULL);
for (;;) {
NOTE("finding start");
- endp = slow(m, m->coldp, stop, gf, gl);
+ endp = walk(m, m->coldp, stop, gf, gl, false);
if (endp != NULL)
break;
assert(m->coldp < m->endp);
m->coldp += XMBRTOWC(NULL, m->coldp,
m->endp - m->coldp, &m->mbs, 0);
@@ -312,11 +309,11 @@
assert(g->nplus == 0 || m->lastpos != NULL);
for (;;) {
if (dp != NULL || endp <= m->coldp)
break; /* defeat */
NOTE("backoff");
- endp = slow(m, m->coldp, endp-1, gf, gl);
+ endp = walk(m, m->coldp, endp-1, gf, gl, false);
if (endp == NULL)
break; /* defeat */
/* try it on a shorter possibility */
#ifndef NDEBUG
for (i = 1; i <= m->g->nsub; i++) {
@@ -380,12 +377,13 @@
sopno ssub; /* start sop of subsubRE */
sopno esub; /* end sop of subsubRE */
const char *ssp; /* start of string matched by subsubRE */
const char *sep; /* end of string matched by subsubRE */
const char *oldssp; /* previous ssp */
- const char *dp __unused;
+ const char *dp;
+ (void) dp;
AT("diss", start, stop, startst, stopst);
sp = start;
for (ss = startst; ss < stopst; ss = es) {
/* identify end of subRE */
es = ss;
@@ -393,11 +391,11 @@
case OPLUS_:
case OQUEST_:
es += OPND(m->g->strip[es]);
break;
case OCH_:
- while (OP(m->g->strip[es]) != O_CH)
+ while (OP(m->g->strip[es]) != (sop)O_CH)
es += OPND(m->g->strip[es]);
break;
}
es++;
@@ -425,41 +423,38 @@
/* cases where length of match is hard to find */
case OQUEST_:
stp = stop;
for (;;) {
/* how long could this one be? */
- rest = slow(m, sp, stp, ss, es);
+ rest = walk(m, sp, stp, ss, es, false);
assert(rest != NULL); /* it did match */
/* could the rest match the rest? */
- tail = slow(m, rest, stop, es, stopst);
+ tail = walk(m, rest, stop, es, stopst, false);
if (tail == stop)
break; /* yes! */
/* no -- try a shorter match for this one */
stp = rest - 1;
assert(stp >= sp); /* it did work */
}
ssub = ss + 1;
esub = es - 1;
/* did innards match? */
- if (slow(m, sp, rest, ssub, esub) != NULL) {
+ if (walk(m, sp, rest, ssub, esub, false) != NULL) {
dp = dissect(m, sp, rest, ssub, esub);
assert(dp == rest);
-#if defined(__lint)
- (void) dp;
-#endif
} else /* no */
assert(sp == rest);
sp = rest;
break;
case OPLUS_:
stp = stop;
for (;;) {
/* how long could this one be? */
- rest = slow(m, sp, stp, ss, es);
+ rest = walk(m, sp, stp, ss, es, false);
assert(rest != NULL); /* it did match */
/* could the rest match the rest? */
- tail = slow(m, rest, stop, es, stopst);
+ tail = walk(m, rest, stop, es, stopst, false);
if (tail == stop)
break; /* yes! */
/* no -- try a shorter match for this one */
stp = rest - 1;
assert(stp >= sp); /* it did work */
@@ -467,11 +462,11 @@
ssub = ss + 1;
esub = es - 1;
ssp = sp;
oldssp = ssp;
for (;;) { /* find last match of innards */
- sep = slow(m, ssp, rest, ssub, esub);
+ sep = walk(m, ssp, rest, ssub, esub, false);
if (sep == NULL || sep == ssp)
break; /* failed or matched null */
oldssp = ssp; /* on to next try */
ssp = sep;
}
@@ -479,23 +474,23 @@
/* last successful match */
sep = ssp;
ssp = oldssp;
}
assert(sep == rest); /* must exhaust substring */
- assert(slow(m, ssp, sep, ssub, esub) == rest);
+ assert(walk(m, ssp, sep, ssub, esub, false) == rest);
dp = dissect(m, ssp, sep, ssub, esub);
assert(dp == sep);
sp = rest;
break;
case OCH_:
stp = stop;
for (;;) {
/* how long could this one be? */
- rest = slow(m, sp, stp, ss, es);
+ rest = walk(m, sp, stp, ss, es, false);
assert(rest != NULL); /* it did match */
/* could the rest match the rest? */
- tail = slow(m, rest, stop, es, stopst);
+ tail = walk(m, rest, stop, es, stopst, false);
if (tail == stop)
break; /* yes! */
/* no -- try a shorter match for this one */
stp = rest - 1;
assert(stp >= sp); /* it did work */
@@ -502,19 +497,20 @@
}
ssub = ss + 1;
esub = ss + OPND(m->g->strip[ss]) - 1;
assert(OP(m->g->strip[esub]) == OOR1);
for (;;) { /* find first matching branch */
- if (slow(m, sp, rest, ssub, esub) == rest)
+ if (walk(m, sp, rest, ssub, esub,
+ false) == rest)
break; /* it matched all of it */
/* that one missed, try next one */
assert(OP(m->g->strip[esub]) == OOR1);
esub++;
assert(OP(m->g->strip[esub]) == OOR2);
ssub = esub + 1;
esub += OPND(m->g->strip[esub]);
- if (OP(m->g->strip[esub]) == OOR2)
+ if (OP(m->g->strip[esub]) == (sop)OOR2)
esub--;
else
assert(OP(m->g->strip[esub]) == O_CH);
}
dp = dissect(m, sp, rest, ssub, esub);
@@ -635,11 +631,11 @@
ss++;
s = m->g->strip[ss];
do {
assert(OP(s) == OOR2);
ss += OPND(s);
- } while (OP(s = m->g->strip[ss]) != O_CH);
+ } while (OP(s = m->g->strip[ss]) != (sop)O_CH);
/* note that the ss++ gets us past the O_CH */
break;
default: /* have to make a choice */
hard = 1;
break;
@@ -668,11 +664,11 @@
if (sp > stop - len)
return (NULL); /* not enough left to match */
ssp = m->offp + m->pmatch[i].rm_so;
if (memcmp(sp, ssp, len) != 0)
return (NULL);
- while (m->g->strip[ss] != SOP(O_BACK, i))
+ while (m->g->strip[ss] != (sop)SOP(O_BACK, i))
ss++;
return (backref(m, sp+len, stop, ss+1, stopst, lev, rec));
case OQUEST_: /* to null or not */
dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
if (dp != NULL)
@@ -699,17 +695,17 @@
for (;;) { /* find first matching branch */
dp = backref(m, sp, stop, ssub, esub, lev, rec);
if (dp != NULL)
return (dp);
/* that one missed, try next one */
- if (OP(m->g->strip[esub]) == O_CH)
+ if (OP(m->g->strip[esub]) == (sop)O_CH)
return (NULL); /* there is none */
esub++;
- assert(OP(m->g->strip[esub]) == OOR2);
+ assert(OP(m->g->strip[esub]) == (sop)OOR2);
ssub = esub + 1;
esub += OPND(m->g->strip[esub]);
- if (OP(m->g->strip[esub]) == OOR2)
+ if (OP(m->g->strip[esub]) == (sop)OOR2)
esub--;
else
assert(OP(m->g->strip[esub]) == O_CH);
}
/* NOTREACHED */
@@ -743,119 +739,19 @@
assert(0);
return (NULL);
}
/*
- * fast - step through the string at top speed
+ * Step through the string either quickly or slowly. Returns where it ended
+ * or NULL.
*/
static const char *
-fast(struct match *m, const char *start, const char *stop, sopno startst,
- sopno stopst)
+walk(struct match *m, const char *start, const char *stop, sopno startst,
+ sopno stopst, bool fast)
{
states st = m->st;
states fresh = m->fresh;
- states tmp = m->tmp;
- const char *p = start;
- wint_t c;
- wint_t lastc; /* previous c */
- wint_t flagch;
- int i;
- const char *coldp; /* last p after which no match was underway */
- size_t clen;
-
- CLEAR(st);
- SET1(st, startst);
- SP("fast", st, *p);
- st = step(m->g, startst, stopst, st, NOTHING, st);
- ASSIGN(fresh, st);
- SP("start", st, *p);
- coldp = NULL;
- if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL)))
- c = OUT;
- else {
- /*
- * XXX Wrong if the previous character was multi-byte.
- * Newline never is (in encodings supported by FreeBSD),
- * so this only breaks the ISWORD tests below.
- */
- c = (uch)*(start - 1);
- }
- for (;;) {
- /* next character */
- lastc = c;
- if (p == m->endp) {
- clen = 0;
- c = OUT;
- } else
- clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
- if (EQ(st, fresh))
- coldp = p;
-
- /* is there an EOL and/or BOL between lastc and c? */
- flagch = '\0';
- i = 0;
- if ((lastc == '\n' && m->g->cflags®_NEWLINE) ||
- (lastc == OUT && !(m->eflags®_NOTBOL))) {
- flagch = BOL;
- i = m->g->nbol;
- }
- if ((c == '\n' && m->g->cflags®_NEWLINE) ||
- (c == OUT && !(m->eflags®_NOTEOL))) {
- flagch = (flagch == BOL) ? BOLEOL : EOL;
- i += m->g->neol;
- }
- if (i != 0) {
- for (; i > 0; i--)
- st = step(m->g, startst, stopst, st,
- flagch, st);
- SP("boleol", st, c);
- }
-
- /* how about a word boundary? */
- if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
- (c != OUT && ISWORD(c))) {
- flagch = BOW;
- }
- if ((lastc != OUT && ISWORD(lastc)) &&
- (flagch == EOL || (c != OUT && !ISWORD(c)))) {
- flagch = EOW;
- }
- if (flagch == BOW || flagch == EOW) {
- st = step(m->g, startst, stopst, st, flagch, st);
- SP("boweow", st, c);
- }
-
- /* are we done? */
- if (ISSET(st, stopst) || p == stop || clen > stop - p)
- break; /* NOTE BREAK OUT */
-
- /* no, we must deal with this character */
- ASSIGN(tmp, st);
- ASSIGN(st, fresh);
- assert(c != OUT);
- st = step(m->g, startst, stopst, tmp, c, st);
- SP("aft", st, c);
- assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
- p += clen;
- }
-
- assert(coldp != NULL);
- m->coldp = coldp;
- if (ISSET(st, stopst))
- return (p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
- else
- return (NULL);
-}
-
-/*
- * slow - step through the string more deliberately
- */
-static const char *
-slow(struct match *m, const char *start, const char *stop, sopno startst,
- sopno stopst)
-{
- states st = m->st;
states empty = m->empty;
states tmp = m->tmp;
const char *p = start;
wint_t c;
wint_t lastc; /* previous c */
@@ -867,10 +763,12 @@
AT("slow", start, stop, startst, stopst);
CLEAR(st);
SET1(st, startst);
SP("sstart", st, *p);
st = step(m->g, startst, stopst, st, NOTHING, st);
+ if (fast)
+ ASSIGN(fresh, st);
matchp = NULL;
if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL)))
c = OUT;
else {
/*
@@ -887,10 +785,13 @@
c = OUT;
clen = 0;
} else
clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
+ if (fast && EQ(st, fresh))
+ matchp = p;
+
/* is there an EOL and/or BOL between lastc and c? */
flagch = '\0';
i = 0;
if ((lastc == '\n' && m->g->cflags®_NEWLINE) ||
(lastc == OUT && !(m->eflags®_NOTBOL))) {
@@ -922,29 +823,43 @@
st = step(m->g, startst, stopst, st, flagch, st);
SP("sboweow", st, c);
}
/* are we done? */
- if (ISSET(st, stopst))
+ if (ISSET(st, stopst)) {
+ if (fast)
+ break;
+ else
matchp = p;
- if (EQ(st, empty) || p == stop || clen > stop - p)
+ }
+ if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p))
break; /* NOTE BREAK OUT */
/* no, we must deal with this character */
ASSIGN(tmp, st);
+ if (fast)
+ ASSIGN(st, fresh);
+ else
ASSIGN(st, empty);
assert(c != OUT);
st = step(m->g, startst, stopst, tmp, c, st);
SP("saft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
p += clen;
}
+ if (fast) {
+ assert(matchp != NULL);
+ m->coldp = matchp;
+ if (ISSET(st, stopst))
+ return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
+ else
+ return (NULL);
+ } else
return (matchp);
}
-
/*
* step - map set of states reachable before char to set reachable after
*/
static states
step(struct re_guts *g,
@@ -1026,26 +941,26 @@
case ORPAREN:
FWD(aft, aft, 1);
break;
case OCH_: /* mark the first two branches */
FWD(aft, aft, 1);
- assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+ assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
FWD(aft, aft, OPND(s));
break;
case OOR1: /* done a branch, find the O_CH */
if (ISSTATEIN(aft, here)) {
for (look = 1;
- OP(s = g->strip[pc+look]) != O_CH;
+ OP(s = g->strip[pc+look]) != (sop)O_CH;
look += OPND(s))
- assert(OP(s) == OOR2);
+ assert(OP(s) == (sop)OOR2);
FWD(aft, aft, look + 1);
}
break;
case OOR2: /* propagate OCH_'s marking */
FWD(aft, aft, 1);
- if (OP(g->strip[pc+OPND(s)]) != O_CH) {
- assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+ if (OP(g->strip[pc+OPND(s)]) != (sop)O_CH) {
+ assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
FWD(aft, aft, OPND(s));
}
break;
case O_CH: /* just empty */
FWD(aft, aft, 1);
@@ -1122,12 +1037,11 @@
}
#endif
#endif
#undef matcher
-#undef fast
-#undef slow
+#undef walk
#undef dissect
#undef backref
#undef step
#undef print
#undef at