Print this page
8993 sync regcomp(3C) with upstream

*** 32,74 **** * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The matching engine and friends. This file is #included by regexec.c * after suitable #defines of a variety of macros used herein, so that * different state representations can be used without duplicating masses * of code. */ #ifdef SNAMES #define matcher smatcher ! #define fast sfast ! #define slow sslow #define dissect sdissect #define backref sbackref #define step sstep #define print sprint #define at sat #define match smat #endif #ifdef LNAMES #define matcher lmatcher ! #define fast lfast ! #define slow lslow #define dissect ldissect #define backref lbackref #define step lstep #define print lprint #define at lat #define match lmat #endif #ifdef MNAMES #define matcher mmatcher ! #define fast mfast ! #define slow mslow #define dissect mdissect #define backref mbackref #define step mstep #define print mprint #define at mat --- 32,73 ---- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + #include <stdbool.h> + /* * The matching engine and friends. This file is #included by regexec.c * after suitable #defines of a variety of macros used herein, so that * different state representations can be used without duplicating masses * of code. */ #ifdef SNAMES #define matcher smatcher ! #define walk swalk #define dissect sdissect #define backref sbackref #define step sstep #define print sprint #define at sat #define match smat #endif #ifdef LNAMES #define matcher lmatcher ! #define walk lwalk #define dissect ldissect #define backref lbackref #define step lstep #define print lprint #define at lat #define match lmat #endif #ifdef MNAMES #define matcher mmatcher ! #define walk mwalk #define dissect mdissect #define backref mbackref #define step mstep #define print mprint #define at mat
*** 102,115 **** static int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int); static const char *dissect(struct match *, const char *, const char *, sopno, sopno); static const char *backref(struct match *, const char *, const char *, sopno, sopno, sopno, int); ! static const char *fast(struct match *, const char *, const char *, sopno, ! sopno); ! static const char *slow(struct match *, const char *, const char *, sopno, ! sopno); static states step(struct re_guts *, sopno, sopno, states, wint_t, states); #define MAX_RECURSION 100 #define BOL (OUT-1) #define EOL (BOL-1) #define BOLEOL (BOL-2) --- 101,112 ---- static int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int); static const char *dissect(struct match *, const char *, const char *, sopno, sopno); static const char *backref(struct match *, const char *, const char *, sopno, sopno, sopno, int); ! static const char *walk(struct match *m, const char *start, const char *stop, ! sopno startst, sopno stopst, bool fast); static states step(struct re_guts *, sopno, sopno, states, wint_t, states); #define MAX_RECURSION 100 #define BOL (OUT-1) #define EOL (BOL-1) #define BOLEOL (BOL-2)
*** 251,261 **** SP("mloop", m->st, *start); /* this loop does only one repetition except for backrefs */ for (;;) { ! endp = fast(m, start, stop, gf, gl); if (endp == NULL) { /* a miss */ if (m->pmatch != NULL) free((char *)m->pmatch); if (m->lastpos != NULL) free((char *)m->lastpos); --- 248,258 ---- SP("mloop", m->st, *start); /* this loop does only one repetition except for backrefs */ for (;;) { ! endp = walk(m, start, stop, gf, gl, true); if (endp == NULL) { /* a miss */ if (m->pmatch != NULL) free((char *)m->pmatch); if (m->lastpos != NULL) free((char *)m->lastpos);
*** 267,277 **** /* where? */ assert(m->coldp != NULL); for (;;) { NOTE("finding start"); ! endp = slow(m, m->coldp, stop, gf, gl); if (endp != NULL) break; assert(m->coldp < m->endp); m->coldp += XMBRTOWC(NULL, m->coldp, m->endp - m->coldp, &m->mbs, 0); --- 264,274 ---- /* where? */ assert(m->coldp != NULL); for (;;) { NOTE("finding start"); ! endp = walk(m, m->coldp, stop, gf, gl, false); if (endp != NULL) break; assert(m->coldp < m->endp); m->coldp += XMBRTOWC(NULL, m->coldp, m->endp - m->coldp, &m->mbs, 0);
*** 312,322 **** assert(g->nplus == 0 || m->lastpos != NULL); for (;;) { if (dp != NULL || endp <= m->coldp) break; /* defeat */ NOTE("backoff"); ! endp = slow(m, m->coldp, endp-1, gf, gl); if (endp == NULL) break; /* defeat */ /* try it on a shorter possibility */ #ifndef NDEBUG for (i = 1; i <= m->g->nsub; i++) { --- 309,319 ---- assert(g->nplus == 0 || m->lastpos != NULL); for (;;) { if (dp != NULL || endp <= m->coldp) break; /* defeat */ NOTE("backoff"); ! endp = walk(m, m->coldp, endp-1, gf, gl, false); if (endp == NULL) break; /* defeat */ /* try it on a shorter possibility */ #ifndef NDEBUG for (i = 1; i <= m->g->nsub; i++) {
*** 380,391 **** sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ const char *ssp; /* start of string matched by subsubRE */ const char *sep; /* end of string matched by subsubRE */ const char *oldssp; /* previous ssp */ ! const char *dp __unused; AT("diss", start, stop, startst, stopst); sp = start; for (ss = startst; ss < stopst; ss = es) { /* identify end of subRE */ es = ss; --- 377,389 ---- sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ const char *ssp; /* start of string matched by subsubRE */ const char *sep; /* end of string matched by subsubRE */ const char *oldssp; /* previous ssp */ ! const char *dp; + (void) dp; AT("diss", start, stop, startst, stopst); sp = start; for (ss = startst; ss < stopst; ss = es) { /* identify end of subRE */ es = ss;
*** 393,403 **** case OPLUS_: case OQUEST_: es += OPND(m->g->strip[es]); break; case OCH_: ! while (OP(m->g->strip[es]) != O_CH) es += OPND(m->g->strip[es]); break; } es++; --- 391,401 ---- case OPLUS_: case OQUEST_: es += OPND(m->g->strip[es]); break; case OCH_: ! while (OP(m->g->strip[es]) != (sop)O_CH) es += OPND(m->g->strip[es]); break; } es++;
*** 425,465 **** /* cases where length of match is hard to find */ case OQUEST_: stp = stop; for (;;) { /* how long could this one be? */ ! rest = slow(m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ ! tail = slow(m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */ } ssub = ss + 1; esub = es - 1; /* did innards match? */ ! if (slow(m, sp, rest, ssub, esub) != NULL) { dp = dissect(m, sp, rest, ssub, esub); assert(dp == rest); - #if defined(__lint) - (void) dp; - #endif } else /* no */ assert(sp == rest); sp = rest; break; case OPLUS_: stp = stop; for (;;) { /* how long could this one be? */ ! rest = slow(m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ ! tail = slow(m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */ --- 423,460 ---- /* cases where length of match is hard to find */ case OQUEST_: stp = stop; for (;;) { /* how long could this one be? */ ! rest = walk(m, sp, stp, ss, es, false); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ ! tail = walk(m, rest, stop, es, stopst, false); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */ } ssub = ss + 1; esub = es - 1; /* did innards match? */ ! if (walk(m, sp, rest, ssub, esub, false) != NULL) { dp = dissect(m, sp, rest, ssub, esub); assert(dp == rest); } else /* no */ assert(sp == rest); sp = rest; break; case OPLUS_: stp = stop; for (;;) { /* how long could this one be? */ ! rest = walk(m, sp, stp, ss, es, false); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ ! tail = walk(m, rest, stop, es, stopst, false); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */
*** 467,477 **** ssub = ss + 1; esub = es - 1; ssp = sp; oldssp = ssp; for (;;) { /* find last match of innards */ ! sep = slow(m, ssp, rest, ssub, esub); if (sep == NULL || sep == ssp) break; /* failed or matched null */ oldssp = ssp; /* on to next try */ ssp = sep; } --- 462,472 ---- ssub = ss + 1; esub = es - 1; ssp = sp; oldssp = ssp; for (;;) { /* find last match of innards */ ! sep = walk(m, ssp, rest, ssub, esub, false); if (sep == NULL || sep == ssp) break; /* failed or matched null */ oldssp = ssp; /* on to next try */ ssp = sep; }
*** 479,501 **** /* last successful match */ sep = ssp; ssp = oldssp; } assert(sep == rest); /* must exhaust substring */ ! assert(slow(m, ssp, sep, ssub, esub) == rest); dp = dissect(m, ssp, sep, ssub, esub); assert(dp == sep); sp = rest; break; case OCH_: stp = stop; for (;;) { /* how long could this one be? */ ! rest = slow(m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ ! tail = slow(m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */ --- 474,496 ---- /* last successful match */ sep = ssp; ssp = oldssp; } assert(sep == rest); /* must exhaust substring */ ! assert(walk(m, ssp, sep, ssub, esub, false) == rest); dp = dissect(m, ssp, sep, ssub, esub); assert(dp == sep); sp = rest; break; case OCH_: stp = stop; for (;;) { /* how long could this one be? */ ! rest = walk(m, sp, stp, ss, es, false); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ ! tail = walk(m, rest, stop, es, stopst, false); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */
*** 502,520 **** } ssub = ss + 1; esub = ss + OPND(m->g->strip[ss]) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ ! if (slow(m, sp, rest, ssub, esub) == rest) break; /* it matched all of it */ /* that one missed, try next one */ assert(OP(m->g->strip[esub]) == OOR1); esub++; assert(OP(m->g->strip[esub]) == OOR2); ssub = esub + 1; esub += OPND(m->g->strip[esub]); ! if (OP(m->g->strip[esub]) == OOR2) esub--; else assert(OP(m->g->strip[esub]) == O_CH); } dp = dissect(m, sp, rest, ssub, esub); --- 497,516 ---- } ssub = ss + 1; esub = ss + OPND(m->g->strip[ss]) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ ! if (walk(m, sp, rest, ssub, esub, ! false) == rest) break; /* it matched all of it */ /* that one missed, try next one */ assert(OP(m->g->strip[esub]) == OOR1); esub++; assert(OP(m->g->strip[esub]) == OOR2); ssub = esub + 1; esub += OPND(m->g->strip[esub]); ! if (OP(m->g->strip[esub]) == (sop)OOR2) esub--; else assert(OP(m->g->strip[esub]) == O_CH); } dp = dissect(m, sp, rest, ssub, esub);
*** 635,645 **** ss++; s = m->g->strip[ss]; do { assert(OP(s) == OOR2); ss += OPND(s); ! } while (OP(s = m->g->strip[ss]) != O_CH); /* note that the ss++ gets us past the O_CH */ break; default: /* have to make a choice */ hard = 1; break; --- 631,641 ---- ss++; s = m->g->strip[ss]; do { assert(OP(s) == OOR2); ss += OPND(s); ! } while (OP(s = m->g->strip[ss]) != (sop)O_CH); /* note that the ss++ gets us past the O_CH */ break; default: /* have to make a choice */ hard = 1; break;
*** 668,678 **** if (sp > stop - len) return (NULL); /* not enough left to match */ ssp = m->offp + m->pmatch[i].rm_so; if (memcmp(sp, ssp, len) != 0) return (NULL); ! while (m->g->strip[ss] != SOP(O_BACK, i)) ss++; return (backref(m, sp+len, stop, ss+1, stopst, lev, rec)); case OQUEST_: /* to null or not */ dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) --- 664,674 ---- if (sp > stop - len) return (NULL); /* not enough left to match */ ssp = m->offp + m->pmatch[i].rm_so; if (memcmp(sp, ssp, len) != 0) return (NULL); ! while (m->g->strip[ss] != (sop)SOP(O_BACK, i)) ss++; return (backref(m, sp+len, stop, ss+1, stopst, lev, rec)); case OQUEST_: /* to null or not */ dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL)
*** 699,715 **** for (;;) { /* find first matching branch */ dp = backref(m, sp, stop, ssub, esub, lev, rec); if (dp != NULL) return (dp); /* that one missed, try next one */ ! if (OP(m->g->strip[esub]) == O_CH) return (NULL); /* there is none */ esub++; ! assert(OP(m->g->strip[esub]) == OOR2); ssub = esub + 1; esub += OPND(m->g->strip[esub]); ! if (OP(m->g->strip[esub]) == OOR2) esub--; else assert(OP(m->g->strip[esub]) == O_CH); } /* NOTREACHED */ --- 695,711 ---- for (;;) { /* find first matching branch */ dp = backref(m, sp, stop, ssub, esub, lev, rec); if (dp != NULL) return (dp); /* that one missed, try next one */ ! if (OP(m->g->strip[esub]) == (sop)O_CH) return (NULL); /* there is none */ esub++; ! assert(OP(m->g->strip[esub]) == (sop)OOR2); ssub = esub + 1; esub += OPND(m->g->strip[esub]); ! if (OP(m->g->strip[esub]) == (sop)OOR2) esub--; else assert(OP(m->g->strip[esub]) == O_CH); } /* NOTREACHED */
*** 743,861 **** assert(0); return (NULL); } /* ! * fast - step through the string at top speed */ static const char * ! fast(struct match *m, const char *start, const char *stop, sopno startst, ! sopno stopst) { states st = m->st; states fresh = m->fresh; - states tmp = m->tmp; - const char *p = start; - wint_t c; - wint_t lastc; /* previous c */ - wint_t flagch; - int i; - const char *coldp; /* last p after which no match was underway */ - size_t clen; - - CLEAR(st); - SET1(st, startst); - SP("fast", st, *p); - st = step(m->g, startst, stopst, st, NOTHING, st); - ASSIGN(fresh, st); - SP("start", st, *p); - coldp = NULL; - if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL))) - c = OUT; - else { - /* - * XXX Wrong if the previous character was multi-byte. - * Newline never is (in encodings supported by FreeBSD), - * so this only breaks the ISWORD tests below. - */ - c = (uch)*(start - 1); - } - for (;;) { - /* next character */ - lastc = c; - if (p == m->endp) { - clen = 0; - c = OUT; - } else - clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); - if (EQ(st, fresh)) - coldp = p; - - /* is there an EOL and/or BOL between lastc and c? */ - flagch = '\0'; - i = 0; - if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) || - (lastc == OUT && !(m->eflags&REG_NOTBOL))) { - flagch = BOL; - i = m->g->nbol; - } - if ((c == '\n' && m->g->cflags&REG_NEWLINE) || - (c == OUT && !(m->eflags&REG_NOTEOL))) { - flagch = (flagch == BOL) ? BOLEOL : EOL; - i += m->g->neol; - } - if (i != 0) { - for (; i > 0; i--) - st = step(m->g, startst, stopst, st, - flagch, st); - SP("boleol", st, c); - } - - /* how about a word boundary? */ - if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c))) { - flagch = BOW; - } - if ((lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c)))) { - flagch = EOW; - } - if (flagch == BOW || flagch == EOW) { - st = step(m->g, startst, stopst, st, flagch, st); - SP("boweow", st, c); - } - - /* are we done? */ - if (ISSET(st, stopst) || p == stop || clen > stop - p) - break; /* NOTE BREAK OUT */ - - /* no, we must deal with this character */ - ASSIGN(tmp, st); - ASSIGN(st, fresh); - assert(c != OUT); - st = step(m->g, startst, stopst, tmp, c, st); - SP("aft", st, c); - assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); - p += clen; - } - - assert(coldp != NULL); - m->coldp = coldp; - if (ISSET(st, stopst)) - return (p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); - else - return (NULL); - } - - /* - * slow - step through the string more deliberately - */ - static const char * - slow(struct match *m, const char *start, const char *stop, sopno startst, - sopno stopst) - { - states st = m->st; states empty = m->empty; states tmp = m->tmp; const char *p = start; wint_t c; wint_t lastc; /* previous c */ --- 739,757 ---- assert(0); return (NULL); } /* ! * Step through the string either quickly or slowly. Returns where it ended ! * or NULL. */ static const char * ! walk(struct match *m, const char *start, const char *stop, sopno startst, ! sopno stopst, bool fast) { states st = m->st; states fresh = m->fresh; states empty = m->empty; states tmp = m->tmp; const char *p = start; wint_t c; wint_t lastc; /* previous c */
*** 867,876 **** --- 763,774 ---- AT("slow", start, stop, startst, stopst); CLEAR(st); SET1(st, startst); SP("sstart", st, *p); st = step(m->g, startst, stopst, st, NOTHING, st); + if (fast) + ASSIGN(fresh, st); matchp = NULL; if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL))) c = OUT; else { /*
*** 887,896 **** --- 785,797 ---- c = OUT; clen = 0; } else clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); + if (fast && EQ(st, fresh)) + matchp = p; + /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; i = 0; if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) || (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
*** 922,950 **** st = step(m->g, startst, stopst, st, flagch, st); SP("sboweow", st, c); } /* are we done? */ ! if (ISSET(st, stopst)) matchp = p; ! if (EQ(st, empty) || p == stop || clen > stop - p) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ ASSIGN(tmp, st); ASSIGN(st, empty); assert(c != OUT); st = step(m->g, startst, stopst, tmp, c, st); SP("saft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); p += clen; } return (matchp); } - /* * step - map set of states reachable before char to set reachable after */ static states step(struct re_guts *g, --- 823,865 ---- st = step(m->g, startst, stopst, st, flagch, st); SP("sboweow", st, c); } /* are we done? */ ! if (ISSET(st, stopst)) { ! if (fast) ! break; ! else matchp = p; ! } ! if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p)) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ ASSIGN(tmp, st); + if (fast) + ASSIGN(st, fresh); + else ASSIGN(st, empty); assert(c != OUT); st = step(m->g, startst, stopst, tmp, c, st); SP("saft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); p += clen; } + if (fast) { + assert(matchp != NULL); + m->coldp = matchp; + if (ISSET(st, stopst)) + return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); + else + return (NULL); + } else return (matchp); } /* * step - map set of states reachable before char to set reachable after */ static states step(struct re_guts *g,
*** 1026,1051 **** case ORPAREN: FWD(aft, aft, 1); break; case OCH_: /* mark the first two branches */ FWD(aft, aft, 1); ! assert(OP(g->strip[pc+OPND(s)]) == OOR2); FWD(aft, aft, OPND(s)); break; case OOR1: /* done a branch, find the O_CH */ if (ISSTATEIN(aft, here)) { for (look = 1; ! OP(s = g->strip[pc+look]) != O_CH; look += OPND(s)) ! assert(OP(s) == OOR2); FWD(aft, aft, look + 1); } break; case OOR2: /* propagate OCH_'s marking */ FWD(aft, aft, 1); ! if (OP(g->strip[pc+OPND(s)]) != O_CH) { ! assert(OP(g->strip[pc+OPND(s)]) == OOR2); FWD(aft, aft, OPND(s)); } break; case O_CH: /* just empty */ FWD(aft, aft, 1); --- 941,966 ---- case ORPAREN: FWD(aft, aft, 1); break; case OCH_: /* mark the first two branches */ FWD(aft, aft, 1); ! assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2); FWD(aft, aft, OPND(s)); break; case OOR1: /* done a branch, find the O_CH */ if (ISSTATEIN(aft, here)) { for (look = 1; ! OP(s = g->strip[pc+look]) != (sop)O_CH; look += OPND(s)) ! assert(OP(s) == (sop)OOR2); FWD(aft, aft, look + 1); } break; case OOR2: /* propagate OCH_'s marking */ FWD(aft, aft, 1); ! if (OP(g->strip[pc+OPND(s)]) != (sop)O_CH) { ! assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2); FWD(aft, aft, OPND(s)); } break; case O_CH: /* just empty */ FWD(aft, aft, 1);
*** 1122,1133 **** } #endif #endif #undef matcher ! #undef fast ! #undef slow #undef dissect #undef backref #undef step #undef print #undef at --- 1037,1047 ---- } #endif #endif #undef matcher ! #undef walk #undef dissect #undef backref #undef step #undef print #undef at