Print this page
3731 Update nawk to version 20121220

*** 23,52 **** /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ ! /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ ! /* All Rights Reserved */ ! ! #pragma ident "%Z%%M% %I% %E% SMI" #define DEBUG #include "awk.h" #include "y.tab.h" ! #define HAT (NCHARS-1) /* matches ^ in regular expr */ /* NCHARS is 2**n */ ! #define MAXLIN (3 * LINE_MAX) ! #define type(v) (v)->nobj #define left(v) (v)->narg[0] #define right(v) (v)->narg[1] #define parent(v) (v)->nnext #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: #define UNARY case STAR: case PLUS: case QUEST: /* * encoding in tree Nodes: * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): --- 23,73 ---- /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ ! /* ! * Copyright (C) Lucent Technologies 1997 ! * All Rights Reserved ! * ! * Permission to use, copy, modify, and distribute this software and ! * its documentation for any purpose and without fee is hereby ! * granted, provided that the above copyright notice appear in all ! * copies and that both that the copyright notice and this ! * permission notice and warranty disclaimer appear in supporting ! * documentation, and that the name Lucent Technologies or any of ! * its entities not be used in advertising or publicity pertaining ! * to distribution of the software without specific, written prior ! * permission. ! * ! * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, ! * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. ! * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY ! * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ! * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER ! * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ! * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ! * THIS SOFTWARE. ! */ #define DEBUG #include "awk.h" #include "y.tab.h" ! #define HAT (NCHARS+2) /* matches ^ in regular expr */ /* NCHARS is 2**n */ ! #define MAXLIN 22 ! #define type(v) (v)->nobj /* badly overloaded here */ ! #define info(v) (v)->ntype /* badly overloaded here */ #define left(v) (v)->narg[0] #define right(v) (v)->narg[1] #define parent(v) (v)->nnext #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: + #define ELEAF case EMPTYRE: /* empty string in regexp */ #define UNARY case STAR: case PLUS: case QUEST: /* * encoding in tree Nodes: * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
*** 54,72 **** * unary (STAR, PLUS, QUEST): left is child, right is null * binary (CAT, OR): left and right are children * parent contains pointer to parent */ ! int setvec[MAXLIN]; ! int tmpset[MAXLIN]; ! Node *point[MAXLIN]; int rtok; /* next token in current re */ int rlxval; ! uchar *rlxstr; ! uchar *prestr; /* current position in current re */ ! uchar *lastre; /* origin of last re */ static int setcnt; static int poscnt; uchar *patbeg; --- 75,93 ---- * unary (STAR, PLUS, QUEST): left is child, right is null * binary (CAT, OR): left and right are children * parent contains pointer to parent */ ! int *setvec; ! int *tmpset; ! int maxsetvec = 0; int rtok; /* next token in current re */ int rlxval; ! static uchar *rlxstr; ! static uchar *prestr; /* current position in current re */ ! static uchar *lastre; /* origin of last re */ static int setcnt; static int poscnt; uchar *patbeg;
*** 74,134 **** #define NFA 20 /* cache this many dynamic fa's */ fa *fatab[NFA]; int nfatab = 0; /* entries in fatab */ ! static fa *mkdfa(uchar *, int); static int makeinit(fa *, int); static void penter(Node *); static void freetr(Node *); ! static void overflo(char *); static void cfoll(fa *, Node *); static void follow(Node *); ! static Node *reparse(uchar *); static int relex(void); static void freefa(fa *); static int cgoto(fa *, int, int); fa * ! makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */ { int i, use, nuse; fa *pfa; if (compile_time) /* a constant for sure */ return (mkdfa(s, anchor)); for (i = 0; i < nfatab; i++) { /* is it there already? */ if (fatab[i]->anchor == anchor && ! strcmp((char *)fatab[i]->restr, (char *)s) == 0) { ! fatab[i]->use++; return (fatab[i]); } } pfa = mkdfa(s, anchor); if (nfatab < NFA) { /* room for another */ fatab[nfatab] = pfa; ! fatab[nfatab]->use = 1; nfatab++; return (pfa); } use = fatab[0]->use; /* replace least-recently used */ nuse = 0; ! for (i = 1; i < nfatab; i++) if (fatab[i]->use < use) { use = fatab[i]->use; nuse = i; } freefa(fatab[nuse]); fatab[nuse] = pfa; ! pfa->use = 1; return (pfa); } fa * ! mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */ /* anchor = 1 for anchored matches, else 0 */ - { Node *p, *p1; fa *f; p = reparse(s); p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); --- 95,170 ---- #define NFA 20 /* cache this many dynamic fa's */ fa *fatab[NFA]; int nfatab = 0; /* entries in fatab */ ! static fa *mkdfa(const uchar *, int); static int makeinit(fa *, int); static void penter(Node *); static void freetr(Node *); ! static void overflo(const char *); static void cfoll(fa *, Node *); static void follow(Node *); ! static Node *reparse(const uchar *); static int relex(void); static void freefa(fa *); static int cgoto(fa *, int, int); + static Node *regexp(void); + static Node *primary(void); + static Node *concat(Node *); + static Node *alt(Node *); + static Node *unary(Node *); fa * ! makedfa(const uchar *s, int anchor) /* returns dfa for reg expr s */ { int i, use, nuse; fa *pfa; + static int now = 1; + + if (setvec == 0) { /* first time through any RE */ + maxsetvec = MAXLIN; + setvec = (int *)malloc(maxsetvec * sizeof (int)); + tmpset = (int *)malloc(maxsetvec * sizeof (int)); + if (setvec == 0 || tmpset == 0) + overflo("out of space initializing makedfa"); + } if (compile_time) /* a constant for sure */ return (mkdfa(s, anchor)); for (i = 0; i < nfatab; i++) { /* is it there already? */ if (fatab[i]->anchor == anchor && ! strcmp((const char *)fatab[i]->restr, (char *)s) == 0) { ! fatab[i]->use = now++; return (fatab[i]); } } pfa = mkdfa(s, anchor); if (nfatab < NFA) { /* room for another */ fatab[nfatab] = pfa; ! fatab[nfatab]->use = now++; nfatab++; return (pfa); } use = fatab[0]->use; /* replace least-recently used */ nuse = 0; ! for (i = 1; i < nfatab; i++) { if (fatab[i]->use < use) { use = fatab[i]->use; nuse = i; } + } freefa(fatab[nuse]); fatab[nuse] = pfa; ! pfa->use = now++; return (pfa); } fa * ! mkdfa(const uchar *s, int anchor) ! { /* does the real work of making a dfa */ /* anchor = 1 for anchored matches, else 0 */ Node *p, *p1; fa *f; p = reparse(s); p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
*** 137,147 **** /* put FINAL after reg. exp. */ poscnt = 0; penter(p1); /* enter parent pointers and leaf indices */ if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL) ! overflo("no room for fa"); /* penter has computed number of positions in re */ f->accept = poscnt-1; cfoll(f, p1); /* set up follow sets */ freetr(p1); if ((f->posns[0] = --- 173,183 ---- /* put FINAL after reg. exp. */ poscnt = 0; penter(p1); /* enter parent pointers and leaf indices */ if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL) ! overflo("out of space for fa"); /* penter has computed number of positions in re */ f->accept = poscnt-1; cfoll(f, p1); /* set up follow sets */ freetr(p1); if ((f->posns[0] =
*** 158,168 **** } static int makeinit(fa *f, int anchor) { ! register int i, k; f->curstat = 2; f->out[2] = 0; f->reset = 0; k = *(f->re[0].lfollow); --- 194,204 ---- } static int makeinit(fa *f, int anchor) { ! int i, k; f->curstat = 2; f->out[2] = 0; f->reset = 0; k = *(f->re[0].lfollow);
*** 192,204 **** void penter(Node *p) /* set up parent pointers and leaf indices */ { switch (type(p)) { LEAF ! left(p) = (Node *) poscnt; ! point[poscnt++] = p; break; UNARY penter(left(p)); parent(left(p)) = p; break; --- 228,241 ---- void penter(Node *p) /* set up parent pointers and leaf indices */ { switch (type(p)) { + ELEAF LEAF ! info(p) = poscnt; ! poscnt++; break; UNARY penter(left(p)); parent(left(p)) = p; break;
*** 207,226 **** penter(left(p)); penter(right(p)); parent(left(p)) = p; parent(right(p)) = p; break; ! default: ! ERROR "unknown type %d in penter", type(p) FATAL; break; } } static void freetr(Node *p) /* free parse tree */ { switch (type(p)) { LEAF xfree(p); break; UNARY freetr(left(p)); --- 244,264 ---- penter(left(p)); penter(right(p)); parent(left(p)) = p; parent(right(p)) = p; break; ! default: /* can't happen */ ! FATAL("can't happen: unknown type %d in penter", type(p)); break; } } static void freetr(Node *p) /* free parse tree */ { switch (type(p)) { + ELEAF LEAF xfree(p); break; UNARY freetr(left(p));
*** 230,257 **** case OR: freetr(left(p)); freetr(right(p)); xfree(p); break; ! default: ! ERROR "unknown type %d in freetr", type(p) FATAL; break; } } ! uchar * ! cclenter(uchar *p) ! { ! register int i, c; ! uchar *op, *chars, *ret; ! size_t bsize; - init_buf(&chars, &bsize, LINE_INCR); - op = p; - i = 0; - while ((c = *p++) != 0) { - if (c == '\\') { if ((c = *p++) == 't') c = '\t'; else if (c == 'n') c = '\n'; else if (c == 'f') --- 268,316 ---- case OR: freetr(left(p)); freetr(right(p)); xfree(p); break; ! default: /* can't happen */ ! FATAL("can't happen: unknown type %d in freetr", type(p)); break; } } ! /* ! * in the parsing of regular expressions, metacharacters like . have ! * to be seen literally; \056 is not a metacharacter. ! */ ! int ! hexstr(uchar **pp) /* find and eval hex string at pp, return new p */ ! { /* only pick up one 8-bit byte (2 chars) */ ! uchar *p; ! int n = 0; ! int i; ! ! for (i = 0, p = (uchar *)*pp; i < 2 && isxdigit(*p); i++, p++) { ! if (isdigit(*p)) ! n = 16 * n + *p - '0'; ! else if (*p >= 'a' && *p <= 'f') ! n = 16 * n + *p - 'a' + 10; ! else if (*p >= 'A' && *p <= 'F') ! n = 16 * n + *p - 'A' + 10; ! } ! *pp = (uchar *)p; ! return (n); ! } ! ! /* multiple use of arg */ ! #define isoctdigit(c) ((c) >= '0' && (c) <= '7') ! ! int ! quoted(uchar **pp) /* pick up next thing after a \\ */ ! { /* and increment *pp */ ! ! uchar *p = *pp; ! int c; if ((c = *p++) == 't') c = '\t'; else if (c == 'n') c = '\n'; else if (c == 'f')
*** 260,325 **** c = '\r'; else if (c == 'b') c = '\b'; else if (c == '\\') c = '\\'; ! else if (isdigit(c)) { int n = c - '0'; ! if (isdigit(*p)) { n = 8 * n + *p++ - '0'; ! if (isdigit(*p)) n = 8 * n + *p++ - '0'; } c = n; } /* else */ /* c = c; */ ! } else if (c == '-' && i > 0 && chars[i-1] != 0) { if (*p != 0) { ! c = chars[i-1]; ! while ((uchar)c < *p) { /* fails if *p is \\ */ ! expand_buf(&chars, &bsize, i); ! chars[i++] = ++c; } - p++; continue; } } ! expand_buf(&chars, &bsize, i); ! chars[i++] = c; } ! chars[i++] = '\0'; ! dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars)); xfree(op); ! ret = tostring(chars); ! free(chars); ! return (ret); } static void ! overflo(char *s) { ! ERROR "regular expression too big: %s", gettext((char *)s) FATAL; } /* enter follow set of each leaf of vertex v into lfollow[leaf] */ static void cfoll(fa *f, Node *v) { ! register int i; ! register int *p; switch (type(v)) { LEAF ! f->re[(int)left(v)].ltype = type(v); ! f->re[(int)left(v)].lval = (int)right(v); for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; follow(v); /* computes setvec and setcnt */ if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL) ! overflo("follow set overflow"); ! f->re[(int)left(v)].lfollow = p; *p = setcnt; for (i = f->accept; i >= 0; i--) { if (setvec[i] == 1) *++p = i; } --- 319,429 ---- c = '\r'; else if (c == 'b') c = '\b'; else if (c == '\\') c = '\\'; ! else if (c == 'x') { /* hexadecimal goo follows */ ! c = hexstr(&p); /* this adds a null if number is invalid */ ! } else if (isoctdigit(c)) { /* \d \dd \ddd */ int n = c - '0'; ! if (isoctdigit(*p)) { n = 8 * n + *p++ - '0'; ! if (isoctdigit(*p)) n = 8 * n + *p++ - '0'; } c = n; } /* else */ /* c = c; */ ! *pp = p; ! return (c); ! } ! ! uchar * ! cclenter(const uchar *argp) /* add a character class */ ! { ! int i, c, c2; ! uchar *p = (uchar *)argp; ! uchar *op, *bp; ! static uchar *buf = NULL; ! size_t bufsz = 100; ! ! op = p; ! if (buf == 0 && (buf = (uchar *)malloc(bufsz)) == NULL) ! FATAL("out of space for character class [%.10s...] 1", p); ! bp = buf; ! for (i = 0; (c = *p++) != 0; ) { ! if (c == '\\') { ! c = quoted(&p); ! } else if (c == '-' && i > 0 && bp[-1] != 0) { if (*p != 0) { ! c = bp[-1]; ! c2 = *p++; ! if (c2 == '\\') ! c2 = quoted(&p); ! if (c > c2) { /* empty; ignore */ ! bp--; ! i--; ! continue; ! } ! while (c < c2) { ! if (adjbuf(&buf, &bufsz, ! bp - buf + 2, 100, &bp, ! "cclenter1") == 0) ! FATAL( ! "out of space for character class [%.10s...] 2", p); ! *bp++ = ++c; ! i++; } continue; } } ! if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, "cclenter2")) ! FATAL( ! "out of space for character class [%.10s...] 3", p); ! *bp++ = c; ! i++; } ! *bp = 0; ! dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf)); xfree(op); ! return (tostring(buf)); } static void ! overflo(const char *s) { ! FATAL("regular expression too big: %.30s...", gettext((char *)s)); } /* enter follow set of each leaf of vertex v into lfollow[leaf] */ static void cfoll(fa *f, Node *v) { ! int i; ! int *p; switch (type(v)) { + ELEAF LEAF ! f->re[info(v)].ltype = type(v); ! f->re[info(v)].lval.np = right(v); ! while (f->accept >= maxsetvec) { /* guessing here! */ ! maxsetvec *= 4; ! setvec = (int *)realloc(setvec, maxsetvec * ! sizeof (int)); ! tmpset = (int *)realloc(tmpset, maxsetvec * ! sizeof (int)); ! if (setvec == 0 || tmpset == 0) ! overflo("out of space in cfoll()"); ! } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; follow(v); /* computes setvec and setcnt */ if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL) ! overflo("out of space building follow set"); ! f->re[info(v)].lfollow = p; *p = setcnt; for (i = f->accept; i >= 0; i--) { if (setvec[i] == 1) *++p = i; }
*** 330,357 **** case CAT: case OR: cfoll(f, left(v)); cfoll(f, right(v)); break; ! default: ! ERROR "unknown type %d in cfoll", type(v) FATAL; } } /* * collects initially active leaves of p into setvec * returns 0 or 1 depending on whether p matches empty string */ static int first(Node *p) { ! register int b; switch (type(p)) { LEAF ! if (setvec[(int)left(p)] != 1) { ! setvec[(int)left(p)] = 1; setcnt++; } if (type(p) == CCL && (*(uchar *)right(p)) == '\0') return (0); /* empty CCL */ else --- 434,478 ---- case CAT: case OR: cfoll(f, left(v)); cfoll(f, right(v)); break; ! default: /* can't happen */ ! FATAL("can't happen: unknown type %d in cfoll", type(v)); } } /* * collects initially active leaves of p into setvec * returns 0 or 1 depending on whether p matches empty string */ static int first(Node *p) { ! int b, lp; switch (type(p)) { + ELEAF LEAF ! /* look for high-water mark of subscripts */ ! lp = info(p); ! /* guessing here! */ ! while (setcnt >= maxsetvec || lp >= maxsetvec) { ! maxsetvec *= 4; ! setvec = (int *)realloc(setvec, maxsetvec * ! sizeof (int)); ! tmpset = (int *)realloc(tmpset, maxsetvec * ! sizeof (int)); ! if (setvec == 0 || tmpset == 0) ! overflo("out of space in first()"); ! } ! if (type(p) == EMPTYRE) { ! setvec[lp] = 0; ! return (0); ! } ! if (setvec[lp] != 1) { ! setvec[lp] = 1; setcnt++; } if (type(p) == CCL && (*(uchar *)right(p)) == '\0') return (0); /* empty CCL */ else
*** 372,382 **** b = first(right(p)); if (first(left(p)) == 0 || b == 0) return (0); return (1); } ! ERROR "unknown type %d in first", type(p) FATAL; return (-1); } /* collects leaves that can follow v into setvec */ static void --- 493,505 ---- b = first(right(p)); if (first(left(p)) == 0 || b == 0) return (0); return (1); } ! /* can't happen */ ! FATAL("can't happen: unknown type %d in first", type(p)); ! /*NOTREACHED*/ return (-1); } /* collects leaves that can follow v into setvec */ static void
*** 406,440 **** return; } } else /* v is right child */ follow(p); return; - default: - ERROR "unknown type %d in follow", type(p) FATAL; - break; } } static int ! member(uchar c, uchar *s) /* is c in s? */ { while (*s) if (c == *s++) return (1); return (0); } int ! match(fa *f, uchar *p) { ! register int s, ns; s = f->reset ? makeinit(f, 0) : f->initstat; if (f->out[s]) return (1); do { if ((ns = f->gototab[s][*p]) != 0) s = ns; else s = cgoto(f, s, *p); if (f->out[s]) --- 529,564 ---- return; } } else /* v is right child */ follow(p); return; } } static int ! member(int c, const uchar *sarg) /* is c in s? */ { + uchar *s = (uchar *)sarg; + while (*s) if (c == *s++) return (1); return (0); } int ! match(fa *f, const uchar *p0) /* shortest match ? */ { ! int s, ns; ! uchar *p = (uchar *)p0; s = f->reset ? makeinit(f, 0) : f->initstat; if (f->out[s]) return (1); do { + /* assert(*p < NCHARS); */ if ((ns = f->gototab[s][*p]) != 0) s = ns; else s = cgoto(f, s, *p); if (f->out[s])
*** 442,457 **** } while (*p++ != 0); return (0); } int ! pmatch(fa *f, uchar *p) { ! register int s, ns; ! register uchar *q; int i, k; if (f->reset) { f->initstat = s = makeinit(f, 1); } else { s = f->initstat; } --- 566,583 ---- } while (*p++ != 0); return (0); } int ! pmatch(fa *f, const uchar *p0) /* longest match, for sub */ { ! int s, ns; ! uchar *p = (uchar *)p0; ! uchar *q; int i, k; + /* s = f->reset ? makeinit(f,1) : f->initstat; */ if (f->reset) { f->initstat = s = makeinit(f, 1); } else { s = f->initstat; }
*** 460,469 **** --- 586,596 ---- do { q = p; do { if (f->out[s]) /* final state */ patlen = q-p; + /* assert(*q < NCHARS); */ if ((ns = f->gototab[s][*q]) != 0) s = ns; else s = cgoto(f, s, *q); if (s == 1) { /* no transition */
*** 478,488 **** patlen = q - p - 1; /* don't count $ */ if (patlen >= 0) { patbeg = p; return (1); } ! nextin: s = 2; if (f->reset) { for (i = 2; i <= f->curstat; i++) xfree(f->posns[i]); k = *f->posns[0]; --- 605,615 ---- patlen = q - p - 1; /* don't count $ */ if (patlen >= 0) { patbeg = p; return (1); } ! nextin: s = 2; if (f->reset) { for (i = 2; i <= f->curstat; i++) xfree(f->posns[i]); k = *f->posns[0];
*** 500,526 **** } while (*p++ != 0); return (0); } int ! nematch(fa *f, uchar *p) { ! register int s, ns; ! register uchar *q; int i, k; if (f->reset) { f->initstat = s = makeinit(f, 1); } else { s = f->initstat; } patlen = -1; while (*p) { q = p; do { if (f->out[s]) /* final state */ ! patlen = q-p; if ((ns = f->gototab[s][*q]) != 0) s = ns; else s = cgoto(f, s, *q); if (s == 1) { /* no transition */ --- 627,656 ---- } while (*p++ != 0); return (0); } int ! nematch(fa *f, const uchar *p0) /* non-empty match, for sub */ { ! int s, ns; ! uchar *p = (uchar *)p0; ! uchar *q; int i, k; + /* s = f->reset ? makeinit(f,1) : f->initstat; */ if (f->reset) { f->initstat = s = makeinit(f, 1); } else { s = f->initstat; } patlen = -1; while (*p) { q = p; do { if (f->out[s]) /* final state */ ! patlen = q - p; ! /* assert(*q < NCHARS); */ if ((ns = f->gototab[s][*q]) != 0) s = ns; else s = cgoto(f, s, *q); if (s == 1) { /* no transition */
*** 535,545 **** patlen = q - p - 1; /* don't count $ */ if (patlen > 0) { patbeg = p; return (1); } ! nnextin: s = 2; if (f->reset) { for (i = 2; i <= f->curstat; i++) xfree(f->posns[i]); k = *f->posns[0]; --- 665,675 ---- patlen = q - p - 1; /* don't count $ */ if (patlen > 0) { patbeg = p; return (1); } ! nnextin: s = 2; if (f->reset) { for (i = 2; i <= f->curstat; i++) xfree(f->posns[i]); k = *f->posns[0];
*** 557,594 **** p++; } return (0); } - static Node *regexp(void), *primary(void), *concat(Node *); - static Node *alt(Node *), *unary(Node *); - static Node * ! reparse(uchar *p) { /* parses regular expression pointed to by p */ /* uses relex() to scan regular expression */ Node *np; dprintf(("reparse <%s>\n", p)); ! lastre = prestr = p; /* prestr points to string to be parsed */ rtok = relex(); ! if (rtok == '\0') ! ERROR "empty regular expression" FATAL; ! np = regexp(); if (rtok == '\0') { ! return (np); ! } else { ! ERROR "syntax error in regular expression %s at %s", ! lastre, prestr FATAL; } ! /*NOTREACHED*/ ! return (NULL); } static Node * ! regexp(void) { return (alt(concat(primary()))); } static Node * --- 687,722 ---- p++; } return (0); } static Node * ! reparse(const uchar *p) { /* parses regular expression pointed to by p */ /* uses relex() to scan regular expression */ Node *np; dprintf(("reparse <%s>\n", p)); ! /* prestr points to string to be parsed */ ! lastre = prestr = (uchar *)p; rtok = relex(); ! /* GNU compatibility: an empty regexp matches anything */ if (rtok == '\0') { ! /* FATAL("empty regular expression"); previous */ ! return (op2(EMPTYRE, NIL, NIL)); } ! np = regexp(); ! if (rtok != '\0') { ! FATAL("syntax error in regular expression %s at %s", ! lastre, prestr); ! } ! return (np); } static Node * ! regexp(void) /* top-level parse of reg expr */ { return (alt(concat(primary()))); } static Node *
*** 596,611 **** { Node *np; switch (rtok) { case CHAR: ! np = op2(CHAR, NIL, (Node *)rlxval); rtok = relex(); return (unary(np)); case ALL: rtok = relex(); return (unary(op2(ALL, NIL, NIL))); case DOT: rtok = relex(); return (unary(op2(DOT, NIL, NIL))); case CCL: /*LINTED align*/ --- 724,742 ---- { Node *np; switch (rtok) { case CHAR: ! np = op2(CHAR, NIL, itonp(rlxval)); rtok = relex(); return (unary(np)); case ALL: rtok = relex(); return (unary(op2(ALL, NIL, NIL))); + case EMPTYRE: + rtok = relex(); + return (unary(op2(ALL, NIL, NIL))); case DOT: rtok = relex(); return (unary(op2(DOT, NIL, NIL))); case CCL: /*LINTED align*/
*** 617,627 **** np = op2(NCCL, NIL, (Node *)cclenter(rlxstr)); rtok = relex(); return (unary(np)); case '^': rtok = relex(); ! return (unary(op2(CHAR, NIL, (Node *)HAT))); case '$': rtok = relex(); return (unary(op2(CHAR, NIL, NIL))); case '(': rtok = relex(); --- 748,758 ---- np = op2(NCCL, NIL, (Node *)cclenter(rlxstr)); rtok = relex(); return (unary(np)); case '^': rtok = relex(); ! return (unary(op2(CHAR, NIL, itonp(HAT)))); case '$': rtok = relex(); return (unary(op2(CHAR, NIL, NIL))); case '(': rtok = relex();
*** 634,659 **** np = regexp(); if (rtok == ')') { rtok = relex(); return (unary(np)); } else { ! ERROR "syntax error in regular expression %s at %s", ! lastre, prestr FATAL; } default: ! ERROR "illegal primary in regular expression %s at %s", ! lastre, prestr FATAL; } /*NOTREACHED*/ return (NULL); } static Node * concat(Node *np) { switch (rtok) { ! case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': return (concat(op2(CAT, np, primary()))); default: return (np); } } --- 765,791 ---- np = regexp(); if (rtok == ')') { rtok = relex(); return (unary(np)); } else { ! FATAL("syntax error in regular expression %s at %s", ! lastre, prestr); } default: ! FATAL("illegal primary in regular expression %s at %s", ! lastre, prestr); } /*NOTREACHED*/ return (NULL); } static Node * concat(Node *np) { switch (rtok) { ! case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: ! case '$': case '(': return (concat(op2(CAT, np, primary()))); default: return (np); } }
*** 684,699 **** default: return (np); } } static int relex(void) /* lexical analyzer for reparse */ { ! register int c; ! uchar *cbuf; ! int clen, cflag; switch (c = *prestr++) { case '|': return OR; case '*': return STAR; case '+': return PLUS; --- 816,891 ---- default: return (np); } } + /* + * Character class definitions conformant to the POSIX locale as + * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source + * and operating character sets are both ASCII (ISO646) or supersets + * thereof. + * + * Note that to avoid overflowing the temporary buffer used in + * relex(), the expanded character class (prior to range expansion) + * must be less than twice the size of their full name. + */ + + /* + * Because isblank doesn't show up in any of the header files on any + * system i use, it's defined here. if some other locale has a richer + * definition of "blank", define HAS_ISBLANK and provide your own + * version. + * the parentheses here are an attempt to find a path through the maze + * of macro definition and/or function and/or version provided. thanks + * to nelson beebe for the suggestion; let's see if it works everywhere. + */ + + /* #define HAS_ISBLANK */ + #ifndef HAS_ISBLANK + + int + (xisblank)(int c) + { + return (c == ' ' || c == '\t'); + } + + #endif + + struct charclass { + const char *cc_name; + int cc_namelen; + int (*cc_func)(int); + } charclasses[] = { + { "alnum", 5, isalnum }, + { "alpha", 5, isalpha }, + #ifndef HAS_ISBLANK + { "blank", 5, isspace }, /* was isblank */ + #else + { "blank", 5, isblank }, + #endif + { "cntrl", 5, iscntrl }, + { "digit", 5, isdigit }, + { "graph", 5, isgraph }, + { "lower", 5, islower }, + { "print", 5, isprint }, + { "punct", 5, ispunct }, + { "space", 5, isspace }, + { "upper", 5, isupper }, + { "xdigit", 6, isxdigit }, + { NULL, 0, NULL }, + }; + static int relex(void) /* lexical analyzer for reparse */ { ! int c, n; ! int cflag; ! uchar *buf = 0; ! size_t bufsz = 100; ! uchar *bp; ! struct charclass *cc; ! int i; switch (c = *prestr++) { case '|': return OR; case '*': return STAR; case '+': return PLUS;
*** 704,800 **** case '$': case '(': case ')': return (c); case '\\': ! if ((c = *prestr++) == 't') ! c = '\t'; ! else if (c == 'n') ! c = '\n'; ! else if (c == 'f') ! c = '\f'; ! else if (c == 'r') ! c = '\r'; ! else if (c == 'b') ! c = '\b'; ! else if (c == '\\') ! c = '\\'; ! else if (isdigit(c)) { ! int n = c - '0'; ! if (isdigit(*prestr)) { ! n = 8 * n + *prestr++ - '0'; ! if (isdigit(*prestr)) ! n = 8 * n + *prestr++ - '0'; ! } ! c = n; ! } /* else it's now in c */ ! rlxval = c; return (CHAR); default: rlxval = c; return (CHAR); case '[': ! clen = 0; if (*prestr == '^') { cflag = 1; prestr++; } else cflag = 0; ! init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1); for (;;) { if ((c = *prestr++) == '\\') { ! cbuf[clen++] = '\\'; if ((c = *prestr++) == '\0') { ! ERROR ! "nonterminated character class %s", lastre FATAL; } ! cbuf[clen++] = c; } else if (c == ']') { ! cbuf[clen] = 0; ! rlxstr = tostring(cbuf); ! free(cbuf); if (cflag == 0) return (CCL); else return (NCCL); - } else if (c == '\n') { - ERROR "newline in character class %s...", - lastre FATAL; - } else if (c == '\0') { - ERROR "nonterminated character class %s", - lastre FATAL; } else ! cbuf[clen++] = c; } /*NOTREACHED*/ } return (0); } static int cgoto(fa *f, int s, int c) { ! register int i, j, k; ! register int *p, *q; for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; /* compute positions of gototab[s,c] into setvec */ p = f->posns[s]; for (i = 1; i <= *p; i++) { if ((k = f->re[p[i]].ltype) != FINAL) { ! if (k == CHAR && c == f->re[p[i]].lval || ! k == DOT && c != 0 && c != HAT || ! k == ALL && c != 0 || ! k == CCL && ! member(c, (uchar *)f->re[p[i]].lval) || ! k == NCCL && ! !member(c, (uchar *)f->re[p[i]].lval) && ! c != 0 && c != HAT) { q = f->re[p[i]].lfollow; for (j = 1; j <= *q; j++) { if (setvec[q[j]] == 0) { setcnt++; setvec[q[j]] = 1; } } --- 896,1020 ---- case '$': case '(': case ')': return (c); case '\\': ! rlxval = quoted(&prestr); return (CHAR); default: rlxval = c; return (CHAR); case '[': ! if (buf == 0 && (buf = (uchar *)malloc(bufsz)) == NULL) ! FATAL("out of space in reg expr %.10s..", lastre); ! bp = buf; if (*prestr == '^') { cflag = 1; prestr++; } else cflag = 0; ! n = 2 * strlen((const char *) prestr)+1; ! if (!adjbuf(&buf, &bufsz, n, n, &bp, "relex1")) ! FATAL("out of space for reg expr %.10s...", lastre); for (;;) { if ((c = *prestr++) == '\\') { ! *bp++ = '\\'; if ((c = *prestr++) == '\0') { ! FATAL( ! "nonterminated character class %s", lastre); } ! *bp++ = c; ! } else if (c == '[' && *prestr == ':') { ! /* ! * POSIX char class names, Dag-Erling ! * Smorgrav, des@ofug.org ! */ ! for (cc = charclasses; cc->cc_name; cc++) ! if (strncmp((const char *) prestr + 1, ! (const char *) cc->cc_name, ! cc->cc_namelen) == 0) ! break; ! if (cc->cc_name != NULL && ! prestr[1 + cc->cc_namelen] == ':' && ! prestr[2 + cc->cc_namelen] == ']') { ! prestr += cc->cc_namelen + 3; ! for (i = 0; i < NCHARS; i++) { ! if (!adjbuf(&buf, &bufsz, ! bp - buf + 1, 100, &bp, ! "relex2")) ! FATAL( ! "out of space for reg expr %.10s...", lastre); ! if (cc->cc_func(i)) { ! *bp++ = i; ! n++; ! } ! } ! } else ! *bp++ = c; ! } else if (c == '\0') { ! FATAL("nonterminated character class %.20s", ! lastre); ! } else if (bp == buf) { /* 1st char is special */ ! *bp++ = c; } else if (c == ']') { ! *bp++ = 0; ! rlxstr = tostring(buf); if (cflag == 0) return (CCL); else return (NCCL); } else ! *bp++ = c; } /*NOTREACHED*/ } return (0); } static int cgoto(fa *f, int s, int c) { ! int i, j, k; ! int *p, *q; + assert(c == HAT || c < NCHARS); + while (f->accept >= maxsetvec) { /* guessing here! */ + maxsetvec *= 4; + setvec = (int *)realloc(setvec, maxsetvec * sizeof (int)); + tmpset = (int *)realloc(tmpset, maxsetvec * sizeof (int)); + if (setvec == 0 || tmpset == 0) + overflo("out of space in cgoto()"); + } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; /* compute positions of gototab[s,c] into setvec */ p = f->posns[s]; for (i = 1; i <= *p; i++) { if ((k = f->re[p[i]].ltype) != FINAL) { ! if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) || ! (k == DOT && c != 0 && c != HAT) || ! (k == ALL && c != 0) || ! (k == EMPTYRE && c != 0) || ! (k == CCL && ! member(c, (uchar *)f->re[p[i]].lval.up)) || ! ((k == NCCL && ! !member(c, (uchar *)f->re[p[i]].lval.up)) && ! c != 0 && c != HAT)) { q = f->re[p[i]].lfollow; for (j = 1; j <= *q; j++) { + if (q[j] >= maxsetvec) { + maxsetvec *= 4; + setvec = (int *)realloc(setvec, + maxsetvec * sizeof (int)); + tmpset = (int *)realloc(tmpset, + maxsetvec * sizeof (int)); + if (setvec == 0 || + tmpset == 0) + overflo( + "cgoto overflow"); + } if (setvec[q[j]] == 0) { setcnt++; setvec[q[j]] = 1; } }
*** 821,831 **** return (i); different:; } /* add tmpset to current set of states */ ! if (f->curstat >= NSTATES-1) { f->curstat = 2; f->reset = 1; for (i = 2; i < NSTATES; i++) xfree(f->posns[i]); } else --- 1041,1051 ---- return (i); different:; } /* add tmpset to current set of states */ ! if (f->curstat >= NSTATES - 1) { f->curstat = 2; f->reset = 1; for (i = 2; i < NSTATES; i++) xfree(f->posns[i]); } else
*** 846,864 **** f->out[f->curstat] = 0; return (f->curstat); } static void ! freefa(fa *f) { ! ! register int i; if (f == NULL) return; for (i = 0; i <= f->curstat; i++) xfree(f->posns[i]); ! for (i = 0; i <= f->accept; i++) xfree(f->re[i].lfollow); xfree(f->restr); xfree(f); } --- 1066,1086 ---- f->out[f->curstat] = 0; return (f->curstat); } static void ! freefa(fa *f) /* free a finite automaton */ { ! int i; if (f == NULL) return; for (i = 0; i <= f->curstat; i++) xfree(f->posns[i]); ! for (i = 0; i <= f->accept; i++) { xfree(f->re[i].lfollow); + if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) + xfree((f->re[i].lval.np)); + } xfree(f->restr); xfree(f); }