Print this page
3731 Update nawk to version 20121220
*** 23,52 ****
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
! /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
! /* All Rights Reserved */
!
! #pragma ident "%Z%%M% %I% %E% SMI"
#define DEBUG
#include "awk.h"
#include "y.tab.h"
! #define HAT (NCHARS-1) /* matches ^ in regular expr */
/* NCHARS is 2**n */
! #define MAXLIN (3 * LINE_MAX)
! #define type(v) (v)->nobj
#define left(v) (v)->narg[0]
#define right(v) (v)->narg[1]
#define parent(v) (v)->nnext
#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
#define UNARY case STAR: case PLUS: case QUEST:
/*
* encoding in tree Nodes:
* leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
--- 23,73 ----
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
! /*
! * Copyright (C) Lucent Technologies 1997
! * All Rights Reserved
! *
! * Permission to use, copy, modify, and distribute this software and
! * its documentation for any purpose and without fee is hereby
! * granted, provided that the above copyright notice appear in all
! * copies and that both that the copyright notice and this
! * permission notice and warranty disclaimer appear in supporting
! * documentation, and that the name Lucent Technologies or any of
! * its entities not be used in advertising or publicity pertaining
! * to distribution of the software without specific, written prior
! * permission.
! *
! * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
! * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
! * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
! * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
! * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
! * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
! * THIS SOFTWARE.
! */
#define DEBUG
#include "awk.h"
#include "y.tab.h"
! #define HAT (NCHARS+2) /* matches ^ in regular expr */
/* NCHARS is 2**n */
! #define MAXLIN 22
! #define type(v) (v)->nobj /* badly overloaded here */
! #define info(v) (v)->ntype /* badly overloaded here */
#define left(v) (v)->narg[0]
#define right(v) (v)->narg[1]
#define parent(v) (v)->nnext
#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
+ #define ELEAF case EMPTYRE: /* empty string in regexp */
#define UNARY case STAR: case PLUS: case QUEST:
/*
* encoding in tree Nodes:
* leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
*** 54,72 ****
* unary (STAR, PLUS, QUEST): left is child, right is null
* binary (CAT, OR): left and right are children
* parent contains pointer to parent
*/
! int setvec[MAXLIN];
! int tmpset[MAXLIN];
! Node *point[MAXLIN];
int rtok; /* next token in current re */
int rlxval;
! uchar *rlxstr;
! uchar *prestr; /* current position in current re */
! uchar *lastre; /* origin of last re */
static int setcnt;
static int poscnt;
uchar *patbeg;
--- 75,93 ----
* unary (STAR, PLUS, QUEST): left is child, right is null
* binary (CAT, OR): left and right are children
* parent contains pointer to parent
*/
! int *setvec;
! int *tmpset;
! int maxsetvec = 0;
int rtok; /* next token in current re */
int rlxval;
! static uchar *rlxstr;
! static uchar *prestr; /* current position in current re */
! static uchar *lastre; /* origin of last re */
static int setcnt;
static int poscnt;
uchar *patbeg;
*** 74,134 ****
#define NFA 20 /* cache this many dynamic fa's */
fa *fatab[NFA];
int nfatab = 0; /* entries in fatab */
! static fa *mkdfa(uchar *, int);
static int makeinit(fa *, int);
static void penter(Node *);
static void freetr(Node *);
! static void overflo(char *);
static void cfoll(fa *, Node *);
static void follow(Node *);
! static Node *reparse(uchar *);
static int relex(void);
static void freefa(fa *);
static int cgoto(fa *, int, int);
fa *
! makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
{
int i, use, nuse;
fa *pfa;
if (compile_time) /* a constant for sure */
return (mkdfa(s, anchor));
for (i = 0; i < nfatab; i++) { /* is it there already? */
if (fatab[i]->anchor == anchor &&
! strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
! fatab[i]->use++;
return (fatab[i]);
}
}
pfa = mkdfa(s, anchor);
if (nfatab < NFA) { /* room for another */
fatab[nfatab] = pfa;
! fatab[nfatab]->use = 1;
nfatab++;
return (pfa);
}
use = fatab[0]->use; /* replace least-recently used */
nuse = 0;
! for (i = 1; i < nfatab; i++)
if (fatab[i]->use < use) {
use = fatab[i]->use;
nuse = i;
}
freefa(fatab[nuse]);
fatab[nuse] = pfa;
! pfa->use = 1;
return (pfa);
}
fa *
! mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
/* anchor = 1 for anchored matches, else 0 */
- {
Node *p, *p1;
fa *f;
p = reparse(s);
p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
--- 95,170 ----
#define NFA 20 /* cache this many dynamic fa's */
fa *fatab[NFA];
int nfatab = 0; /* entries in fatab */
! static fa *mkdfa(const uchar *, int);
static int makeinit(fa *, int);
static void penter(Node *);
static void freetr(Node *);
! static void overflo(const char *);
static void cfoll(fa *, Node *);
static void follow(Node *);
! static Node *reparse(const uchar *);
static int relex(void);
static void freefa(fa *);
static int cgoto(fa *, int, int);
+ static Node *regexp(void);
+ static Node *primary(void);
+ static Node *concat(Node *);
+ static Node *alt(Node *);
+ static Node *unary(Node *);
fa *
! makedfa(const uchar *s, int anchor) /* returns dfa for reg expr s */
{
int i, use, nuse;
fa *pfa;
+ static int now = 1;
+
+ if (setvec == 0) { /* first time through any RE */
+ maxsetvec = MAXLIN;
+ setvec = (int *)malloc(maxsetvec * sizeof (int));
+ tmpset = (int *)malloc(maxsetvec * sizeof (int));
+ if (setvec == 0 || tmpset == 0)
+ overflo("out of space initializing makedfa");
+ }
if (compile_time) /* a constant for sure */
return (mkdfa(s, anchor));
for (i = 0; i < nfatab; i++) { /* is it there already? */
if (fatab[i]->anchor == anchor &&
! strcmp((const char *)fatab[i]->restr, (char *)s) == 0) {
! fatab[i]->use = now++;
return (fatab[i]);
}
}
pfa = mkdfa(s, anchor);
if (nfatab < NFA) { /* room for another */
fatab[nfatab] = pfa;
! fatab[nfatab]->use = now++;
nfatab++;
return (pfa);
}
use = fatab[0]->use; /* replace least-recently used */
nuse = 0;
! for (i = 1; i < nfatab; i++) {
if (fatab[i]->use < use) {
use = fatab[i]->use;
nuse = i;
}
+ }
freefa(fatab[nuse]);
fatab[nuse] = pfa;
! pfa->use = now++;
return (pfa);
}
fa *
! mkdfa(const uchar *s, int anchor)
! { /* does the real work of making a dfa */
/* anchor = 1 for anchored matches, else 0 */
Node *p, *p1;
fa *f;
p = reparse(s);
p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
*** 137,147 ****
/* put FINAL after reg. exp. */
poscnt = 0;
penter(p1); /* enter parent pointers and leaf indices */
if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
! overflo("no room for fa");
/* penter has computed number of positions in re */
f->accept = poscnt-1;
cfoll(f, p1); /* set up follow sets */
freetr(p1);
if ((f->posns[0] =
--- 173,183 ----
/* put FINAL after reg. exp. */
poscnt = 0;
penter(p1); /* enter parent pointers and leaf indices */
if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
! overflo("out of space for fa");
/* penter has computed number of positions in re */
f->accept = poscnt-1;
cfoll(f, p1); /* set up follow sets */
freetr(p1);
if ((f->posns[0] =
*** 158,168 ****
}
static int
makeinit(fa *f, int anchor)
{
! register int i, k;
f->curstat = 2;
f->out[2] = 0;
f->reset = 0;
k = *(f->re[0].lfollow);
--- 194,204 ----
}
static int
makeinit(fa *f, int anchor)
{
! int i, k;
f->curstat = 2;
f->out[2] = 0;
f->reset = 0;
k = *(f->re[0].lfollow);
*** 192,204 ****
void
penter(Node *p) /* set up parent pointers and leaf indices */
{
switch (type(p)) {
LEAF
! left(p) = (Node *) poscnt;
! point[poscnt++] = p;
break;
UNARY
penter(left(p));
parent(left(p)) = p;
break;
--- 228,241 ----
void
penter(Node *p) /* set up parent pointers and leaf indices */
{
switch (type(p)) {
+ ELEAF
LEAF
! info(p) = poscnt;
! poscnt++;
break;
UNARY
penter(left(p));
parent(left(p)) = p;
break;
*** 207,226 ****
penter(left(p));
penter(right(p));
parent(left(p)) = p;
parent(right(p)) = p;
break;
! default:
! ERROR "unknown type %d in penter", type(p) FATAL;
break;
}
}
static void
freetr(Node *p) /* free parse tree */
{
switch (type(p)) {
LEAF
xfree(p);
break;
UNARY
freetr(left(p));
--- 244,264 ----
penter(left(p));
penter(right(p));
parent(left(p)) = p;
parent(right(p)) = p;
break;
! default: /* can't happen */
! FATAL("can't happen: unknown type %d in penter", type(p));
break;
}
}
static void
freetr(Node *p) /* free parse tree */
{
switch (type(p)) {
+ ELEAF
LEAF
xfree(p);
break;
UNARY
freetr(left(p));
*** 230,257 ****
case OR:
freetr(left(p));
freetr(right(p));
xfree(p);
break;
! default:
! ERROR "unknown type %d in freetr", type(p) FATAL;
break;
}
}
! uchar *
! cclenter(uchar *p)
! {
! register int i, c;
! uchar *op, *chars, *ret;
! size_t bsize;
- init_buf(&chars, &bsize, LINE_INCR);
- op = p;
- i = 0;
- while ((c = *p++) != 0) {
- if (c == '\\') {
if ((c = *p++) == 't')
c = '\t';
else if (c == 'n')
c = '\n';
else if (c == 'f')
--- 268,316 ----
case OR:
freetr(left(p));
freetr(right(p));
xfree(p);
break;
! default: /* can't happen */
! FATAL("can't happen: unknown type %d in freetr", type(p));
break;
}
}
! /*
! * in the parsing of regular expressions, metacharacters like . have
! * to be seen literally; \056 is not a metacharacter.
! */
! int
! hexstr(uchar **pp) /* find and eval hex string at pp, return new p */
! { /* only pick up one 8-bit byte (2 chars) */
! uchar *p;
! int n = 0;
! int i;
!
! for (i = 0, p = (uchar *)*pp; i < 2 && isxdigit(*p); i++, p++) {
! if (isdigit(*p))
! n = 16 * n + *p - '0';
! else if (*p >= 'a' && *p <= 'f')
! n = 16 * n + *p - 'a' + 10;
! else if (*p >= 'A' && *p <= 'F')
! n = 16 * n + *p - 'A' + 10;
! }
! *pp = (uchar *)p;
! return (n);
! }
!
! /* multiple use of arg */
! #define isoctdigit(c) ((c) >= '0' && (c) <= '7')
!
! int
! quoted(uchar **pp) /* pick up next thing after a \\ */
! { /* and increment *pp */
!
! uchar *p = *pp;
! int c;
if ((c = *p++) == 't')
c = '\t';
else if (c == 'n')
c = '\n';
else if (c == 'f')
*** 260,325 ****
c = '\r';
else if (c == 'b')
c = '\b';
else if (c == '\\')
c = '\\';
! else if (isdigit(c)) {
int n = c - '0';
! if (isdigit(*p)) {
n = 8 * n + *p++ - '0';
! if (isdigit(*p))
n = 8 * n + *p++ - '0';
}
c = n;
} /* else */
/* c = c; */
! } else if (c == '-' && i > 0 && chars[i-1] != 0) {
if (*p != 0) {
! c = chars[i-1];
! while ((uchar)c < *p) { /* fails if *p is \\ */
! expand_buf(&chars, &bsize, i);
! chars[i++] = ++c;
}
- p++;
continue;
}
}
! expand_buf(&chars, &bsize, i);
! chars[i++] = c;
}
! chars[i++] = '\0';
! dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
xfree(op);
! ret = tostring(chars);
! free(chars);
! return (ret);
}
static void
! overflo(char *s)
{
! ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
}
/* enter follow set of each leaf of vertex v into lfollow[leaf] */
static void
cfoll(fa *f, Node *v)
{
! register int i;
! register int *p;
switch (type(v)) {
LEAF
! f->re[(int)left(v)].ltype = type(v);
! f->re[(int)left(v)].lval = (int)right(v);
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
follow(v); /* computes setvec and setcnt */
if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
! overflo("follow set overflow");
! f->re[(int)left(v)].lfollow = p;
*p = setcnt;
for (i = f->accept; i >= 0; i--) {
if (setvec[i] == 1)
*++p = i;
}
--- 319,429 ----
c = '\r';
else if (c == 'b')
c = '\b';
else if (c == '\\')
c = '\\';
! else if (c == 'x') { /* hexadecimal goo follows */
! c = hexstr(&p); /* this adds a null if number is invalid */
! } else if (isoctdigit(c)) { /* \d \dd \ddd */
int n = c - '0';
! if (isoctdigit(*p)) {
n = 8 * n + *p++ - '0';
! if (isoctdigit(*p))
n = 8 * n + *p++ - '0';
}
c = n;
} /* else */
/* c = c; */
! *pp = p;
! return (c);
! }
!
! uchar *
! cclenter(const uchar *argp) /* add a character class */
! {
! int i, c, c2;
! uchar *p = (uchar *)argp;
! uchar *op, *bp;
! static uchar *buf = NULL;
! size_t bufsz = 100;
!
! op = p;
! if (buf == 0 && (buf = (uchar *)malloc(bufsz)) == NULL)
! FATAL("out of space for character class [%.10s...] 1", p);
! bp = buf;
! for (i = 0; (c = *p++) != 0; ) {
! if (c == '\\') {
! c = quoted(&p);
! } else if (c == '-' && i > 0 && bp[-1] != 0) {
if (*p != 0) {
! c = bp[-1];
! c2 = *p++;
! if (c2 == '\\')
! c2 = quoted(&p);
! if (c > c2) { /* empty; ignore */
! bp--;
! i--;
! continue;
! }
! while (c < c2) {
! if (adjbuf(&buf, &bufsz,
! bp - buf + 2, 100, &bp,
! "cclenter1") == 0)
! FATAL(
! "out of space for character class [%.10s...] 2", p);
! *bp++ = ++c;
! i++;
}
continue;
}
}
! if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, "cclenter2"))
! FATAL(
! "out of space for character class [%.10s...] 3", p);
! *bp++ = c;
! i++;
}
! *bp = 0;
! dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf));
xfree(op);
! return (tostring(buf));
}
static void
! overflo(const char *s)
{
! FATAL("regular expression too big: %.30s...", gettext((char *)s));
}
/* enter follow set of each leaf of vertex v into lfollow[leaf] */
static void
cfoll(fa *f, Node *v)
{
! int i;
! int *p;
switch (type(v)) {
+ ELEAF
LEAF
! f->re[info(v)].ltype = type(v);
! f->re[info(v)].lval.np = right(v);
! while (f->accept >= maxsetvec) { /* guessing here! */
! maxsetvec *= 4;
! setvec = (int *)realloc(setvec, maxsetvec *
! sizeof (int));
! tmpset = (int *)realloc(tmpset, maxsetvec *
! sizeof (int));
! if (setvec == 0 || tmpset == 0)
! overflo("out of space in cfoll()");
! }
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
follow(v); /* computes setvec and setcnt */
if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
! overflo("out of space building follow set");
! f->re[info(v)].lfollow = p;
*p = setcnt;
for (i = f->accept; i >= 0; i--) {
if (setvec[i] == 1)
*++p = i;
}
*** 330,357 ****
case CAT:
case OR:
cfoll(f, left(v));
cfoll(f, right(v));
break;
! default:
! ERROR "unknown type %d in cfoll", type(v) FATAL;
}
}
/*
* collects initially active leaves of p into setvec
* returns 0 or 1 depending on whether p matches empty string
*/
static int
first(Node *p)
{
! register int b;
switch (type(p)) {
LEAF
! if (setvec[(int)left(p)] != 1) {
! setvec[(int)left(p)] = 1;
setcnt++;
}
if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
return (0); /* empty CCL */
else
--- 434,478 ----
case CAT:
case OR:
cfoll(f, left(v));
cfoll(f, right(v));
break;
! default: /* can't happen */
! FATAL("can't happen: unknown type %d in cfoll", type(v));
}
}
/*
* collects initially active leaves of p into setvec
* returns 0 or 1 depending on whether p matches empty string
*/
static int
first(Node *p)
{
! int b, lp;
switch (type(p)) {
+ ELEAF
LEAF
! /* look for high-water mark of subscripts */
! lp = info(p);
! /* guessing here! */
! while (setcnt >= maxsetvec || lp >= maxsetvec) {
! maxsetvec *= 4;
! setvec = (int *)realloc(setvec, maxsetvec *
! sizeof (int));
! tmpset = (int *)realloc(tmpset, maxsetvec *
! sizeof (int));
! if (setvec == 0 || tmpset == 0)
! overflo("out of space in first()");
! }
! if (type(p) == EMPTYRE) {
! setvec[lp] = 0;
! return (0);
! }
! if (setvec[lp] != 1) {
! setvec[lp] = 1;
setcnt++;
}
if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
return (0); /* empty CCL */
else
*** 372,382 ****
b = first(right(p));
if (first(left(p)) == 0 || b == 0)
return (0);
return (1);
}
! ERROR "unknown type %d in first", type(p) FATAL;
return (-1);
}
/* collects leaves that can follow v into setvec */
static void
--- 493,505 ----
b = first(right(p));
if (first(left(p)) == 0 || b == 0)
return (0);
return (1);
}
! /* can't happen */
! FATAL("can't happen: unknown type %d in first", type(p));
! /*NOTREACHED*/
return (-1);
}
/* collects leaves that can follow v into setvec */
static void
*** 406,440 ****
return;
}
} else /* v is right child */
follow(p);
return;
- default:
- ERROR "unknown type %d in follow", type(p) FATAL;
- break;
}
}
static int
! member(uchar c, uchar *s) /* is c in s? */
{
while (*s)
if (c == *s++)
return (1);
return (0);
}
int
! match(fa *f, uchar *p)
{
! register int s, ns;
s = f->reset ? makeinit(f, 0) : f->initstat;
if (f->out[s])
return (1);
do {
if ((ns = f->gototab[s][*p]) != 0)
s = ns;
else
s = cgoto(f, s, *p);
if (f->out[s])
--- 529,564 ----
return;
}
} else /* v is right child */
follow(p);
return;
}
}
static int
! member(int c, const uchar *sarg) /* is c in s? */
{
+ uchar *s = (uchar *)sarg;
+
while (*s)
if (c == *s++)
return (1);
return (0);
}
int
! match(fa *f, const uchar *p0) /* shortest match ? */
{
! int s, ns;
! uchar *p = (uchar *)p0;
s = f->reset ? makeinit(f, 0) : f->initstat;
if (f->out[s])
return (1);
do {
+ /* assert(*p < NCHARS); */
if ((ns = f->gototab[s][*p]) != 0)
s = ns;
else
s = cgoto(f, s, *p);
if (f->out[s])
*** 442,457 ****
} while (*p++ != 0);
return (0);
}
int
! pmatch(fa *f, uchar *p)
{
! register int s, ns;
! register uchar *q;
int i, k;
if (f->reset) {
f->initstat = s = makeinit(f, 1);
} else {
s = f->initstat;
}
--- 566,583 ----
} while (*p++ != 0);
return (0);
}
int
! pmatch(fa *f, const uchar *p0) /* longest match, for sub */
{
! int s, ns;
! uchar *p = (uchar *)p0;
! uchar *q;
int i, k;
+ /* s = f->reset ? makeinit(f,1) : f->initstat; */
if (f->reset) {
f->initstat = s = makeinit(f, 1);
} else {
s = f->initstat;
}
*** 460,469 ****
--- 586,596 ----
do {
q = p;
do {
if (f->out[s]) /* final state */
patlen = q-p;
+ /* assert(*q < NCHARS); */
if ((ns = f->gototab[s][*q]) != 0)
s = ns;
else
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
*** 478,488 ****
patlen = q - p - 1; /* don't count $ */
if (patlen >= 0) {
patbeg = p;
return (1);
}
! nextin:
s = 2;
if (f->reset) {
for (i = 2; i <= f->curstat; i++)
xfree(f->posns[i]);
k = *f->posns[0];
--- 605,615 ----
patlen = q - p - 1; /* don't count $ */
if (patlen >= 0) {
patbeg = p;
return (1);
}
! nextin:
s = 2;
if (f->reset) {
for (i = 2; i <= f->curstat; i++)
xfree(f->posns[i]);
k = *f->posns[0];
*** 500,526 ****
} while (*p++ != 0);
return (0);
}
int
! nematch(fa *f, uchar *p)
{
! register int s, ns;
! register uchar *q;
int i, k;
if (f->reset) {
f->initstat = s = makeinit(f, 1);
} else {
s = f->initstat;
}
patlen = -1;
while (*p) {
q = p;
do {
if (f->out[s]) /* final state */
! patlen = q-p;
if ((ns = f->gototab[s][*q]) != 0)
s = ns;
else
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
--- 627,656 ----
} while (*p++ != 0);
return (0);
}
int
! nematch(fa *f, const uchar *p0) /* non-empty match, for sub */
{
! int s, ns;
! uchar *p = (uchar *)p0;
! uchar *q;
int i, k;
+ /* s = f->reset ? makeinit(f,1) : f->initstat; */
if (f->reset) {
f->initstat = s = makeinit(f, 1);
} else {
s = f->initstat;
}
patlen = -1;
while (*p) {
q = p;
do {
if (f->out[s]) /* final state */
! patlen = q - p;
! /* assert(*q < NCHARS); */
if ((ns = f->gototab[s][*q]) != 0)
s = ns;
else
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
*** 535,545 ****
patlen = q - p - 1; /* don't count $ */
if (patlen > 0) {
patbeg = p;
return (1);
}
! nnextin:
s = 2;
if (f->reset) {
for (i = 2; i <= f->curstat; i++)
xfree(f->posns[i]);
k = *f->posns[0];
--- 665,675 ----
patlen = q - p - 1; /* don't count $ */
if (patlen > 0) {
patbeg = p;
return (1);
}
! nnextin:
s = 2;
if (f->reset) {
for (i = 2; i <= f->curstat; i++)
xfree(f->posns[i]);
k = *f->posns[0];
*** 557,594 ****
p++;
}
return (0);
}
- static Node *regexp(void), *primary(void), *concat(Node *);
- static Node *alt(Node *), *unary(Node *);
-
static Node *
! reparse(uchar *p)
{
/* parses regular expression pointed to by p */
/* uses relex() to scan regular expression */
Node *np;
dprintf(("reparse <%s>\n", p));
! lastre = prestr = p; /* prestr points to string to be parsed */
rtok = relex();
! if (rtok == '\0')
! ERROR "empty regular expression" FATAL;
! np = regexp();
if (rtok == '\0') {
! return (np);
! } else {
! ERROR "syntax error in regular expression %s at %s",
! lastre, prestr FATAL;
}
! /*NOTREACHED*/
! return (NULL);
}
static Node *
! regexp(void)
{
return (alt(concat(primary())));
}
static Node *
--- 687,722 ----
p++;
}
return (0);
}
static Node *
! reparse(const uchar *p)
{
/* parses regular expression pointed to by p */
/* uses relex() to scan regular expression */
Node *np;
dprintf(("reparse <%s>\n", p));
! /* prestr points to string to be parsed */
! lastre = prestr = (uchar *)p;
rtok = relex();
! /* GNU compatibility: an empty regexp matches anything */
if (rtok == '\0') {
! /* FATAL("empty regular expression"); previous */
! return (op2(EMPTYRE, NIL, NIL));
}
! np = regexp();
! if (rtok != '\0') {
! FATAL("syntax error in regular expression %s at %s",
! lastre, prestr);
! }
! return (np);
}
static Node *
! regexp(void) /* top-level parse of reg expr */
{
return (alt(concat(primary())));
}
static Node *
*** 596,611 ****
{
Node *np;
switch (rtok) {
case CHAR:
! np = op2(CHAR, NIL, (Node *)rlxval);
rtok = relex();
return (unary(np));
case ALL:
rtok = relex();
return (unary(op2(ALL, NIL, NIL)));
case DOT:
rtok = relex();
return (unary(op2(DOT, NIL, NIL)));
case CCL:
/*LINTED align*/
--- 724,742 ----
{
Node *np;
switch (rtok) {
case CHAR:
! np = op2(CHAR, NIL, itonp(rlxval));
rtok = relex();
return (unary(np));
case ALL:
rtok = relex();
return (unary(op2(ALL, NIL, NIL)));
+ case EMPTYRE:
+ rtok = relex();
+ return (unary(op2(ALL, NIL, NIL)));
case DOT:
rtok = relex();
return (unary(op2(DOT, NIL, NIL)));
case CCL:
/*LINTED align*/
*** 617,627 ****
np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
rtok = relex();
return (unary(np));
case '^':
rtok = relex();
! return (unary(op2(CHAR, NIL, (Node *)HAT)));
case '$':
rtok = relex();
return (unary(op2(CHAR, NIL, NIL)));
case '(':
rtok = relex();
--- 748,758 ----
np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
rtok = relex();
return (unary(np));
case '^':
rtok = relex();
! return (unary(op2(CHAR, NIL, itonp(HAT))));
case '$':
rtok = relex();
return (unary(op2(CHAR, NIL, NIL)));
case '(':
rtok = relex();
*** 634,659 ****
np = regexp();
if (rtok == ')') {
rtok = relex();
return (unary(np));
} else {
! ERROR "syntax error in regular expression %s at %s",
! lastre, prestr FATAL;
}
default:
! ERROR "illegal primary in regular expression %s at %s",
! lastre, prestr FATAL;
}
/*NOTREACHED*/
return (NULL);
}
static Node *
concat(Node *np)
{
switch (rtok) {
! case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
return (concat(op2(CAT, np, primary())));
default:
return (np);
}
}
--- 765,791 ----
np = regexp();
if (rtok == ')') {
rtok = relex();
return (unary(np));
} else {
! FATAL("syntax error in regular expression %s at %s",
! lastre, prestr);
}
default:
! FATAL("illegal primary in regular expression %s at %s",
! lastre, prestr);
}
/*NOTREACHED*/
return (NULL);
}
static Node *
concat(Node *np)
{
switch (rtok) {
! case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL:
! case '$': case '(':
return (concat(op2(CAT, np, primary())));
default:
return (np);
}
}
*** 684,699 ****
default:
return (np);
}
}
static int
relex(void) /* lexical analyzer for reparse */
{
! register int c;
! uchar *cbuf;
! int clen, cflag;
switch (c = *prestr++) {
case '|': return OR;
case '*': return STAR;
case '+': return PLUS;
--- 816,891 ----
default:
return (np);
}
}
+ /*
+ * Character class definitions conformant to the POSIX locale as
+ * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
+ * and operating character sets are both ASCII (ISO646) or supersets
+ * thereof.
+ *
+ * Note that to avoid overflowing the temporary buffer used in
+ * relex(), the expanded character class (prior to range expansion)
+ * must be less than twice the size of their full name.
+ */
+
+ /*
+ * Because isblank doesn't show up in any of the header files on any
+ * system i use, it's defined here. if some other locale has a richer
+ * definition of "blank", define HAS_ISBLANK and provide your own
+ * version.
+ * the parentheses here are an attempt to find a path through the maze
+ * of macro definition and/or function and/or version provided. thanks
+ * to nelson beebe for the suggestion; let's see if it works everywhere.
+ */
+
+ /* #define HAS_ISBLANK */
+ #ifndef HAS_ISBLANK
+
+ int
+ (xisblank)(int c)
+ {
+ return (c == ' ' || c == '\t');
+ }
+
+ #endif
+
+ struct charclass {
+ const char *cc_name;
+ int cc_namelen;
+ int (*cc_func)(int);
+ } charclasses[] = {
+ { "alnum", 5, isalnum },
+ { "alpha", 5, isalpha },
+ #ifndef HAS_ISBLANK
+ { "blank", 5, isspace }, /* was isblank */
+ #else
+ { "blank", 5, isblank },
+ #endif
+ { "cntrl", 5, iscntrl },
+ { "digit", 5, isdigit },
+ { "graph", 5, isgraph },
+ { "lower", 5, islower },
+ { "print", 5, isprint },
+ { "punct", 5, ispunct },
+ { "space", 5, isspace },
+ { "upper", 5, isupper },
+ { "xdigit", 6, isxdigit },
+ { NULL, 0, NULL },
+ };
+
static int
relex(void) /* lexical analyzer for reparse */
{
! int c, n;
! int cflag;
! uchar *buf = 0;
! size_t bufsz = 100;
! uchar *bp;
! struct charclass *cc;
! int i;
switch (c = *prestr++) {
case '|': return OR;
case '*': return STAR;
case '+': return PLUS;
*** 704,800 ****
case '$':
case '(':
case ')':
return (c);
case '\\':
! if ((c = *prestr++) == 't')
! c = '\t';
! else if (c == 'n')
! c = '\n';
! else if (c == 'f')
! c = '\f';
! else if (c == 'r')
! c = '\r';
! else if (c == 'b')
! c = '\b';
! else if (c == '\\')
! c = '\\';
! else if (isdigit(c)) {
! int n = c - '0';
! if (isdigit(*prestr)) {
! n = 8 * n + *prestr++ - '0';
! if (isdigit(*prestr))
! n = 8 * n + *prestr++ - '0';
! }
! c = n;
! } /* else it's now in c */
! rlxval = c;
return (CHAR);
default:
rlxval = c;
return (CHAR);
case '[':
! clen = 0;
if (*prestr == '^') {
cflag = 1;
prestr++;
} else
cflag = 0;
! init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
for (;;) {
if ((c = *prestr++) == '\\') {
! cbuf[clen++] = '\\';
if ((c = *prestr++) == '\0') {
! ERROR
! "nonterminated character class %s", lastre FATAL;
}
! cbuf[clen++] = c;
} else if (c == ']') {
! cbuf[clen] = 0;
! rlxstr = tostring(cbuf);
! free(cbuf);
if (cflag == 0)
return (CCL);
else
return (NCCL);
- } else if (c == '\n') {
- ERROR "newline in character class %s...",
- lastre FATAL;
- } else if (c == '\0') {
- ERROR "nonterminated character class %s",
- lastre FATAL;
} else
! cbuf[clen++] = c;
}
/*NOTREACHED*/
}
return (0);
}
static int
cgoto(fa *f, int s, int c)
{
! register int i, j, k;
! register int *p, *q;
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
/* compute positions of gototab[s,c] into setvec */
p = f->posns[s];
for (i = 1; i <= *p; i++) {
if ((k = f->re[p[i]].ltype) != FINAL) {
! if (k == CHAR && c == f->re[p[i]].lval ||
! k == DOT && c != 0 && c != HAT ||
! k == ALL && c != 0 ||
! k == CCL &&
! member(c, (uchar *)f->re[p[i]].lval) ||
! k == NCCL &&
! !member(c, (uchar *)f->re[p[i]].lval) &&
! c != 0 && c != HAT) {
q = f->re[p[i]].lfollow;
for (j = 1; j <= *q; j++) {
if (setvec[q[j]] == 0) {
setcnt++;
setvec[q[j]] = 1;
}
}
--- 896,1020 ----
case '$':
case '(':
case ')':
return (c);
case '\\':
! rlxval = quoted(&prestr);
return (CHAR);
default:
rlxval = c;
return (CHAR);
case '[':
! if (buf == 0 && (buf = (uchar *)malloc(bufsz)) == NULL)
! FATAL("out of space in reg expr %.10s..", lastre);
! bp = buf;
if (*prestr == '^') {
cflag = 1;
prestr++;
} else
cflag = 0;
! n = 2 * strlen((const char *) prestr)+1;
! if (!adjbuf(&buf, &bufsz, n, n, &bp, "relex1"))
! FATAL("out of space for reg expr %.10s...", lastre);
for (;;) {
if ((c = *prestr++) == '\\') {
! *bp++ = '\\';
if ((c = *prestr++) == '\0') {
! FATAL(
! "nonterminated character class %s", lastre);
}
! *bp++ = c;
! } else if (c == '[' && *prestr == ':') {
! /*
! * POSIX char class names, Dag-Erling
! * Smorgrav, des@ofug.org
! */
! for (cc = charclasses; cc->cc_name; cc++)
! if (strncmp((const char *) prestr + 1,
! (const char *) cc->cc_name,
! cc->cc_namelen) == 0)
! break;
! if (cc->cc_name != NULL &&
! prestr[1 + cc->cc_namelen] == ':' &&
! prestr[2 + cc->cc_namelen] == ']') {
! prestr += cc->cc_namelen + 3;
! for (i = 0; i < NCHARS; i++) {
! if (!adjbuf(&buf, &bufsz,
! bp - buf + 1, 100, &bp,
! "relex2"))
! FATAL(
! "out of space for reg expr %.10s...", lastre);
! if (cc->cc_func(i)) {
! *bp++ = i;
! n++;
! }
! }
! } else
! *bp++ = c;
! } else if (c == '\0') {
! FATAL("nonterminated character class %.20s",
! lastre);
! } else if (bp == buf) { /* 1st char is special */
! *bp++ = c;
} else if (c == ']') {
! *bp++ = 0;
! rlxstr = tostring(buf);
if (cflag == 0)
return (CCL);
else
return (NCCL);
} else
! *bp++ = c;
}
/*NOTREACHED*/
}
return (0);
}
static int
cgoto(fa *f, int s, int c)
{
! int i, j, k;
! int *p, *q;
+ assert(c == HAT || c < NCHARS);
+ while (f->accept >= maxsetvec) { /* guessing here! */
+ maxsetvec *= 4;
+ setvec = (int *)realloc(setvec, maxsetvec * sizeof (int));
+ tmpset = (int *)realloc(tmpset, maxsetvec * sizeof (int));
+ if (setvec == 0 || tmpset == 0)
+ overflo("out of space in cgoto()");
+ }
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
/* compute positions of gototab[s,c] into setvec */
p = f->posns[s];
for (i = 1; i <= *p; i++) {
if ((k = f->re[p[i]].ltype) != FINAL) {
! if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) ||
! (k == DOT && c != 0 && c != HAT) ||
! (k == ALL && c != 0) ||
! (k == EMPTYRE && c != 0) ||
! (k == CCL &&
! member(c, (uchar *)f->re[p[i]].lval.up)) ||
! ((k == NCCL &&
! !member(c, (uchar *)f->re[p[i]].lval.up)) &&
! c != 0 && c != HAT)) {
q = f->re[p[i]].lfollow;
for (j = 1; j <= *q; j++) {
+ if (q[j] >= maxsetvec) {
+ maxsetvec *= 4;
+ setvec = (int *)realloc(setvec,
+ maxsetvec * sizeof (int));
+ tmpset = (int *)realloc(tmpset,
+ maxsetvec * sizeof (int));
+ if (setvec == 0 ||
+ tmpset == 0)
+ overflo(
+ "cgoto overflow");
+ }
if (setvec[q[j]] == 0) {
setcnt++;
setvec[q[j]] = 1;
}
}
*** 821,831 ****
return (i);
different:;
}
/* add tmpset to current set of states */
! if (f->curstat >= NSTATES-1) {
f->curstat = 2;
f->reset = 1;
for (i = 2; i < NSTATES; i++)
xfree(f->posns[i]);
} else
--- 1041,1051 ----
return (i);
different:;
}
/* add tmpset to current set of states */
! if (f->curstat >= NSTATES - 1) {
f->curstat = 2;
f->reset = 1;
for (i = 2; i < NSTATES; i++)
xfree(f->posns[i]);
} else
*** 846,864 ****
f->out[f->curstat] = 0;
return (f->curstat);
}
static void
! freefa(fa *f)
{
!
! register int i;
if (f == NULL)
return;
for (i = 0; i <= f->curstat; i++)
xfree(f->posns[i]);
! for (i = 0; i <= f->accept; i++)
xfree(f->re[i].lfollow);
xfree(f->restr);
xfree(f);
}
--- 1066,1086 ----
f->out[f->curstat] = 0;
return (f->curstat);
}
static void
! freefa(fa *f) /* free a finite automaton */
{
! int i;
if (f == NULL)
return;
for (i = 0; i <= f->curstat; i++)
xfree(f->posns[i]);
! for (i = 0; i <= f->accept; i++) {
xfree(f->re[i].lfollow);
+ if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
+ xfree((f->re[i].lval.np));
+ }
xfree(f->restr);
xfree(f);
}