Print this page
3731 Update nawk to version 20121220
@@ -23,30 +23,51 @@
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
#define DEBUG
#include "awk.h"
#include "y.tab.h"
-#define HAT (NCHARS-1) /* matches ^ in regular expr */
+#define HAT (NCHARS+2) /* matches ^ in regular expr */
/* NCHARS is 2**n */
-#define MAXLIN (3 * LINE_MAX)
+#define MAXLIN 22
-#define type(v) (v)->nobj
+#define type(v) (v)->nobj /* badly overloaded here */
+#define info(v) (v)->ntype /* badly overloaded here */
#define left(v) (v)->narg[0]
#define right(v) (v)->narg[1]
#define parent(v) (v)->nnext
#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
+#define ELEAF case EMPTYRE: /* empty string in regexp */
#define UNARY case STAR: case PLUS: case QUEST:
/*
* encoding in tree Nodes:
* leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
@@ -54,19 +75,19 @@
* unary (STAR, PLUS, QUEST): left is child, right is null
* binary (CAT, OR): left and right are children
* parent contains pointer to parent
*/
-int setvec[MAXLIN];
-int tmpset[MAXLIN];
-Node *point[MAXLIN];
+int *setvec;
+int *tmpset;
+int maxsetvec = 0;
int rtok; /* next token in current re */
int rlxval;
-uchar *rlxstr;
-uchar *prestr; /* current position in current re */
-uchar *lastre; /* origin of last re */
+static uchar *rlxstr;
+static uchar *prestr; /* current position in current re */
+static uchar *lastre; /* origin of last re */
static int setcnt;
static int poscnt;
uchar *patbeg;
@@ -74,61 +95,76 @@
#define NFA 20 /* cache this many dynamic fa's */
fa *fatab[NFA];
int nfatab = 0; /* entries in fatab */
-static fa *mkdfa(uchar *, int);
+static fa *mkdfa(const uchar *, int);
static int makeinit(fa *, int);
static void penter(Node *);
static void freetr(Node *);
-static void overflo(char *);
+static void overflo(const char *);
static void cfoll(fa *, Node *);
static void follow(Node *);
-static Node *reparse(uchar *);
+static Node *reparse(const uchar *);
static int relex(void);
static void freefa(fa *);
static int cgoto(fa *, int, int);
+static Node *regexp(void);
+static Node *primary(void);
+static Node *concat(Node *);
+static Node *alt(Node *);
+static Node *unary(Node *);
fa *
-makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
+makedfa(const uchar *s, int anchor) /* returns dfa for reg expr s */
{
int i, use, nuse;
fa *pfa;
+ static int now = 1;
+
+ if (setvec == 0) { /* first time through any RE */
+ maxsetvec = MAXLIN;
+ setvec = (int *)malloc(maxsetvec * sizeof (int));
+ tmpset = (int *)malloc(maxsetvec * sizeof (int));
+ if (setvec == 0 || tmpset == 0)
+ overflo("out of space initializing makedfa");
+ }
if (compile_time) /* a constant for sure */
return (mkdfa(s, anchor));
for (i = 0; i < nfatab; i++) { /* is it there already? */
if (fatab[i]->anchor == anchor &&
- strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
- fatab[i]->use++;
+ strcmp((const char *)fatab[i]->restr, (char *)s) == 0) {
+ fatab[i]->use = now++;
return (fatab[i]);
}
}
pfa = mkdfa(s, anchor);
if (nfatab < NFA) { /* room for another */
fatab[nfatab] = pfa;
- fatab[nfatab]->use = 1;
+ fatab[nfatab]->use = now++;
nfatab++;
return (pfa);
}
use = fatab[0]->use; /* replace least-recently used */
nuse = 0;
- for (i = 1; i < nfatab; i++)
+ for (i = 1; i < nfatab; i++) {
if (fatab[i]->use < use) {
use = fatab[i]->use;
nuse = i;
}
+ }
freefa(fatab[nuse]);
fatab[nuse] = pfa;
- pfa->use = 1;
+ pfa->use = now++;
return (pfa);
}
fa *
-mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
+mkdfa(const uchar *s, int anchor)
+{ /* does the real work of making a dfa */
/* anchor = 1 for anchored matches, else 0 */
-{
Node *p, *p1;
fa *f;
p = reparse(s);
p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
@@ -137,11 +173,11 @@
/* put FINAL after reg. exp. */
poscnt = 0;
penter(p1); /* enter parent pointers and leaf indices */
if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
- overflo("no room for fa");
+ overflo("out of space for fa");
/* penter has computed number of positions in re */
f->accept = poscnt-1;
cfoll(f, p1); /* set up follow sets */
freetr(p1);
if ((f->posns[0] =
@@ -158,11 +194,11 @@
}
static int
makeinit(fa *f, int anchor)
{
- register int i, k;
+ int i, k;
f->curstat = 2;
f->out[2] = 0;
f->reset = 0;
k = *(f->re[0].lfollow);
@@ -192,13 +228,14 @@
void
penter(Node *p) /* set up parent pointers and leaf indices */
{
switch (type(p)) {
+ ELEAF
LEAF
- left(p) = (Node *) poscnt;
- point[poscnt++] = p;
+ info(p) = poscnt;
+ poscnt++;
break;
UNARY
penter(left(p));
parent(left(p)) = p;
break;
@@ -207,20 +244,21 @@
penter(left(p));
penter(right(p));
parent(left(p)) = p;
parent(right(p)) = p;
break;
- default:
- ERROR "unknown type %d in penter", type(p) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in penter", type(p));
break;
}
}
static void
freetr(Node *p) /* free parse tree */
{
switch (type(p)) {
+ ELEAF
LEAF
xfree(p);
break;
UNARY
freetr(left(p));
@@ -230,28 +268,49 @@
case OR:
freetr(left(p));
freetr(right(p));
xfree(p);
break;
- default:
- ERROR "unknown type %d in freetr", type(p) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in freetr", type(p));
break;
}
}
-uchar *
-cclenter(uchar *p)
-{
- register int i, c;
- uchar *op, *chars, *ret;
- size_t bsize;
+/*
+ * in the parsing of regular expressions, metacharacters like . have
+ * to be seen literally; \056 is not a metacharacter.
+ */
+int
+hexstr(uchar **pp) /* find and eval hex string at pp, return new p */
+{ /* only pick up one 8-bit byte (2 chars) */
+ uchar *p;
+ int n = 0;
+ int i;
+
+ for (i = 0, p = (uchar *)*pp; i < 2 && isxdigit(*p); i++, p++) {
+ if (isdigit(*p))
+ n = 16 * n + *p - '0';
+ else if (*p >= 'a' && *p <= 'f')
+ n = 16 * n + *p - 'a' + 10;
+ else if (*p >= 'A' && *p <= 'F')
+ n = 16 * n + *p - 'A' + 10;
+ }
+ *pp = (uchar *)p;
+ return (n);
+}
+
+/* multiple use of arg */
+#define isoctdigit(c) ((c) >= '0' && (c) <= '7')
+
+int
+quoted(uchar **pp) /* pick up next thing after a \\ */
+{ /* and increment *pp */
+
+ uchar *p = *pp;
+ int c;
- init_buf(&chars, &bsize, LINE_INCR);
- op = p;
- i = 0;
- while ((c = *p++) != 0) {
- if (c == '\\') {
if ((c = *p++) == 't')
c = '\t';
else if (c == 'n')
c = '\n';
else if (c == 'f')
@@ -260,66 +319,111 @@
c = '\r';
else if (c == 'b')
c = '\b';
else if (c == '\\')
c = '\\';
- else if (isdigit(c)) {
+ else if (c == 'x') { /* hexadecimal goo follows */
+ c = hexstr(&p); /* this adds a null if number is invalid */
+ } else if (isoctdigit(c)) { /* \d \dd \ddd */
int n = c - '0';
- if (isdigit(*p)) {
+ if (isoctdigit(*p)) {
n = 8 * n + *p++ - '0';
- if (isdigit(*p))
+ if (isoctdigit(*p))
n = 8 * n + *p++ - '0';
}
c = n;
} /* else */
/* c = c; */
- } else if (c == '-' && i > 0 && chars[i-1] != 0) {
+ *pp = p;
+ return (c);
+}
+
+uchar *
+cclenter(const uchar *argp) /* add a character class */
+{
+ int i, c, c2;
+ uchar *p = (uchar *)argp;
+ uchar *op, *bp;
+ static uchar *buf = NULL;
+ size_t bufsz = 100;
+
+ op = p;
+ if (buf == 0 && (buf = (uchar *)malloc(bufsz)) == NULL)
+ FATAL("out of space for character class [%.10s...] 1", p);
+ bp = buf;
+ for (i = 0; (c = *p++) != 0; ) {
+ if (c == '\\') {
+ c = quoted(&p);
+ } else if (c == '-' && i > 0 && bp[-1] != 0) {
if (*p != 0) {
- c = chars[i-1];
- while ((uchar)c < *p) { /* fails if *p is \\ */
- expand_buf(&chars, &bsize, i);
- chars[i++] = ++c;
+ c = bp[-1];
+ c2 = *p++;
+ if (c2 == '\\')
+ c2 = quoted(&p);
+ if (c > c2) { /* empty; ignore */
+ bp--;
+ i--;
+ continue;
+ }
+ while (c < c2) {
+ if (adjbuf(&buf, &bufsz,
+ bp - buf + 2, 100, &bp,
+ "cclenter1") == 0)
+ FATAL(
+ "out of space for character class [%.10s...] 2", p);
+ *bp++ = ++c;
+ i++;
}
- p++;
continue;
}
}
- expand_buf(&chars, &bsize, i);
- chars[i++] = c;
+ if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, "cclenter2"))
+ FATAL(
+ "out of space for character class [%.10s...] 3", p);
+ *bp++ = c;
+ i++;
}
- chars[i++] = '\0';
- dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
+ *bp = 0;
+ dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf));
xfree(op);
- ret = tostring(chars);
- free(chars);
- return (ret);
+ return (tostring(buf));
}
static void
-overflo(char *s)
+overflo(const char *s)
{
- ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
+ FATAL("regular expression too big: %.30s...", gettext((char *)s));
}
/* enter follow set of each leaf of vertex v into lfollow[leaf] */
static void
cfoll(fa *f, Node *v)
{
- register int i;
- register int *p;
+ int i;
+ int *p;
switch (type(v)) {
+ ELEAF
LEAF
- f->re[(int)left(v)].ltype = type(v);
- f->re[(int)left(v)].lval = (int)right(v);
+ f->re[info(v)].ltype = type(v);
+ f->re[info(v)].lval.np = right(v);
+ while (f->accept >= maxsetvec) { /* guessing here! */
+ maxsetvec *= 4;
+ setvec = (int *)realloc(setvec, maxsetvec *
+ sizeof (int));
+ tmpset = (int *)realloc(tmpset, maxsetvec *
+ sizeof (int));
+ if (setvec == 0 || tmpset == 0)
+ overflo("out of space in cfoll()");
+ }
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
follow(v); /* computes setvec and setcnt */
if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
- overflo("follow set overflow");
- f->re[(int)left(v)].lfollow = p;
+ overflo("out of space building follow set");
+ f->re[info(v)].lfollow = p;
*p = setcnt;
for (i = f->accept; i >= 0; i--) {
if (setvec[i] == 1)
*++p = i;
}
@@ -330,28 +434,45 @@
case CAT:
case OR:
cfoll(f, left(v));
cfoll(f, right(v));
break;
- default:
- ERROR "unknown type %d in cfoll", type(v) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in cfoll", type(v));
}
}
/*
* collects initially active leaves of p into setvec
* returns 0 or 1 depending on whether p matches empty string
*/
static int
first(Node *p)
{
- register int b;
+ int b, lp;
switch (type(p)) {
+ ELEAF
LEAF
- if (setvec[(int)left(p)] != 1) {
- setvec[(int)left(p)] = 1;
+ /* look for high-water mark of subscripts */
+ lp = info(p);
+ /* guessing here! */
+ while (setcnt >= maxsetvec || lp >= maxsetvec) {
+ maxsetvec *= 4;
+ setvec = (int *)realloc(setvec, maxsetvec *
+ sizeof (int));
+ tmpset = (int *)realloc(tmpset, maxsetvec *
+ sizeof (int));
+ if (setvec == 0 || tmpset == 0)
+ overflo("out of space in first()");
+ }
+ if (type(p) == EMPTYRE) {
+ setvec[lp] = 0;
+ return (0);
+ }
+ if (setvec[lp] != 1) {
+ setvec[lp] = 1;
setcnt++;
}
if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
return (0); /* empty CCL */
else
@@ -372,11 +493,13 @@
b = first(right(p));
if (first(left(p)) == 0 || b == 0)
return (0);
return (1);
}
- ERROR "unknown type %d in first", type(p) FATAL;
+ /* can't happen */
+ FATAL("can't happen: unknown type %d in first", type(p));
+ /*NOTREACHED*/
return (-1);
}
/* collects leaves that can follow v into setvec */
static void
@@ -406,35 +529,36 @@
return;
}
} else /* v is right child */
follow(p);
return;
- default:
- ERROR "unknown type %d in follow", type(p) FATAL;
- break;
}
}
static int
-member(uchar c, uchar *s) /* is c in s? */
+member(int c, const uchar *sarg) /* is c in s? */
{
+ uchar *s = (uchar *)sarg;
+
while (*s)
if (c == *s++)
return (1);
return (0);
}
int
-match(fa *f, uchar *p)
+match(fa *f, const uchar *p0) /* shortest match ? */
{
- register int s, ns;
+ int s, ns;
+ uchar *p = (uchar *)p0;
s = f->reset ? makeinit(f, 0) : f->initstat;
if (f->out[s])
return (1);
do {
+ /* assert(*p < NCHARS); */
if ((ns = f->gototab[s][*p]) != 0)
s = ns;
else
s = cgoto(f, s, *p);
if (f->out[s])
@@ -442,16 +566,18 @@
} while (*p++ != 0);
return (0);
}
int
-pmatch(fa *f, uchar *p)
+pmatch(fa *f, const uchar *p0) /* longest match, for sub */
{
- register int s, ns;
- register uchar *q;
+ int s, ns;
+ uchar *p = (uchar *)p0;
+ uchar *q;
int i, k;
+ /* s = f->reset ? makeinit(f,1) : f->initstat; */
if (f->reset) {
f->initstat = s = makeinit(f, 1);
} else {
s = f->initstat;
}
@@ -460,10 +586,11 @@
do {
q = p;
do {
if (f->out[s]) /* final state */
patlen = q-p;
+ /* assert(*q < NCHARS); */
if ((ns = f->gototab[s][*q]) != 0)
s = ns;
else
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
@@ -478,11 +605,11 @@
patlen = q - p - 1; /* don't count $ */
if (patlen >= 0) {
patbeg = p;
return (1);
}
- nextin:
+nextin:
s = 2;
if (f->reset) {
for (i = 2; i <= f->curstat; i++)
xfree(f->posns[i]);
k = *f->posns[0];
@@ -500,27 +627,30 @@
} while (*p++ != 0);
return (0);
}
int
-nematch(fa *f, uchar *p)
+nematch(fa *f, const uchar *p0) /* non-empty match, for sub */
{
- register int s, ns;
- register uchar *q;
+ int s, ns;
+ uchar *p = (uchar *)p0;
+ uchar *q;
int i, k;
+ /* s = f->reset ? makeinit(f,1) : f->initstat; */
if (f->reset) {
f->initstat = s = makeinit(f, 1);
} else {
s = f->initstat;
}
patlen = -1;
while (*p) {
q = p;
do {
if (f->out[s]) /* final state */
- patlen = q-p;
+ patlen = q - p;
+ /* assert(*q < NCHARS); */
if ((ns = f->gototab[s][*q]) != 0)
s = ns;
else
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
@@ -535,11 +665,11 @@
patlen = q - p - 1; /* don't count $ */
if (patlen > 0) {
patbeg = p;
return (1);
}
- nnextin:
+nnextin:
s = 2;
if (f->reset) {
for (i = 2; i <= f->curstat; i++)
xfree(f->posns[i]);
k = *f->posns[0];
@@ -557,38 +687,36 @@
p++;
}
return (0);
}
-static Node *regexp(void), *primary(void), *concat(Node *);
-static Node *alt(Node *), *unary(Node *);
-
static Node *
-reparse(uchar *p)
+reparse(const uchar *p)
{
/* parses regular expression pointed to by p */
/* uses relex() to scan regular expression */
Node *np;
dprintf(("reparse <%s>\n", p));
- lastre = prestr = p; /* prestr points to string to be parsed */
+ /* prestr points to string to be parsed */
+ lastre = prestr = (uchar *)p;
rtok = relex();
- if (rtok == '\0')
- ERROR "empty regular expression" FATAL;
- np = regexp();
+ /* GNU compatibility: an empty regexp matches anything */
if (rtok == '\0') {
- return (np);
- } else {
- ERROR "syntax error in regular expression %s at %s",
- lastre, prestr FATAL;
+ /* FATAL("empty regular expression"); previous */
+ return (op2(EMPTYRE, NIL, NIL));
}
- /*NOTREACHED*/
- return (NULL);
+ np = regexp();
+ if (rtok != '\0') {
+ FATAL("syntax error in regular expression %s at %s",
+ lastre, prestr);
+ }
+ return (np);
}
static Node *
-regexp(void)
+regexp(void) /* top-level parse of reg expr */
{
return (alt(concat(primary())));
}
static Node *
@@ -596,16 +724,19 @@
{
Node *np;
switch (rtok) {
case CHAR:
- np = op2(CHAR, NIL, (Node *)rlxval);
+ np = op2(CHAR, NIL, itonp(rlxval));
rtok = relex();
return (unary(np));
case ALL:
rtok = relex();
return (unary(op2(ALL, NIL, NIL)));
+ case EMPTYRE:
+ rtok = relex();
+ return (unary(op2(ALL, NIL, NIL)));
case DOT:
rtok = relex();
return (unary(op2(DOT, NIL, NIL)));
case CCL:
/*LINTED align*/
@@ -617,11 +748,11 @@
np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
rtok = relex();
return (unary(np));
case '^':
rtok = relex();
- return (unary(op2(CHAR, NIL, (Node *)HAT)));
+ return (unary(op2(CHAR, NIL, itonp(HAT))));
case '$':
rtok = relex();
return (unary(op2(CHAR, NIL, NIL)));
case '(':
rtok = relex();
@@ -634,26 +765,27 @@
np = regexp();
if (rtok == ')') {
rtok = relex();
return (unary(np));
} else {
- ERROR "syntax error in regular expression %s at %s",
- lastre, prestr FATAL;
+ FATAL("syntax error in regular expression %s at %s",
+ lastre, prestr);
}
default:
- ERROR "illegal primary in regular expression %s at %s",
- lastre, prestr FATAL;
+ FATAL("illegal primary in regular expression %s at %s",
+ lastre, prestr);
}
/*NOTREACHED*/
return (NULL);
}
static Node *
concat(Node *np)
{
switch (rtok) {
- case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
+ case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL:
+ case '$': case '(':
return (concat(op2(CAT, np, primary())));
default:
return (np);
}
}
@@ -684,16 +816,76 @@
default:
return (np);
}
}
+/*
+ * Character class definitions conformant to the POSIX locale as
+ * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
+ * and operating character sets are both ASCII (ISO646) or supersets
+ * thereof.
+ *
+ * Note that to avoid overflowing the temporary buffer used in
+ * relex(), the expanded character class (prior to range expansion)
+ * must be less than twice the size of their full name.
+ */
+
+/*
+ * Because isblank doesn't show up in any of the header files on any
+ * system i use, it's defined here. if some other locale has a richer
+ * definition of "blank", define HAS_ISBLANK and provide your own
+ * version.
+ * the parentheses here are an attempt to find a path through the maze
+ * of macro definition and/or function and/or version provided. thanks
+ * to nelson beebe for the suggestion; let's see if it works everywhere.
+ */
+
+/* #define HAS_ISBLANK */
+#ifndef HAS_ISBLANK
+
+int
+(xisblank)(int c)
+{
+ return (c == ' ' || c == '\t');
+}
+
+#endif
+
+struct charclass {
+ const char *cc_name;
+ int cc_namelen;
+ int (*cc_func)(int);
+} charclasses[] = {
+ { "alnum", 5, isalnum },
+ { "alpha", 5, isalpha },
+#ifndef HAS_ISBLANK
+ { "blank", 5, isspace }, /* was isblank */
+#else
+ { "blank", 5, isblank },
+#endif
+ { "cntrl", 5, iscntrl },
+ { "digit", 5, isdigit },
+ { "graph", 5, isgraph },
+ { "lower", 5, islower },
+ { "print", 5, isprint },
+ { "punct", 5, ispunct },
+ { "space", 5, isspace },
+ { "upper", 5, isupper },
+ { "xdigit", 6, isxdigit },
+ { NULL, 0, NULL },
+};
+
static int
relex(void) /* lexical analyzer for reparse */
{
- register int c;
- uchar *cbuf;
- int clen, cflag;
+ int c, n;
+ int cflag;
+ uchar *buf = 0;
+ size_t bufsz = 100;
+ uchar *bp;
+ struct charclass *cc;
+ int i;
switch (c = *prestr++) {
case '|': return OR;
case '*': return STAR;
case '+': return PLUS;
@@ -704,97 +896,125 @@
case '$':
case '(':
case ')':
return (c);
case '\\':
- if ((c = *prestr++) == 't')
- c = '\t';
- else if (c == 'n')
- c = '\n';
- else if (c == 'f')
- c = '\f';
- else if (c == 'r')
- c = '\r';
- else if (c == 'b')
- c = '\b';
- else if (c == '\\')
- c = '\\';
- else if (isdigit(c)) {
- int n = c - '0';
- if (isdigit(*prestr)) {
- n = 8 * n + *prestr++ - '0';
- if (isdigit(*prestr))
- n = 8 * n + *prestr++ - '0';
- }
- c = n;
- } /* else it's now in c */
- rlxval = c;
+ rlxval = quoted(&prestr);
return (CHAR);
default:
rlxval = c;
return (CHAR);
case '[':
- clen = 0;
+ if (buf == 0 && (buf = (uchar *)malloc(bufsz)) == NULL)
+ FATAL("out of space in reg expr %.10s..", lastre);
+ bp = buf;
if (*prestr == '^') {
cflag = 1;
prestr++;
} else
cflag = 0;
- init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
+ n = 2 * strlen((const char *) prestr)+1;
+ if (!adjbuf(&buf, &bufsz, n, n, &bp, "relex1"))
+ FATAL("out of space for reg expr %.10s...", lastre);
for (;;) {
if ((c = *prestr++) == '\\') {
- cbuf[clen++] = '\\';
+ *bp++ = '\\';
if ((c = *prestr++) == '\0') {
- ERROR
- "nonterminated character class %s", lastre FATAL;
+ FATAL(
+ "nonterminated character class %s", lastre);
}
- cbuf[clen++] = c;
+ *bp++ = c;
+ } else if (c == '[' && *prestr == ':') {
+ /*
+ * POSIX char class names, Dag-Erling
+ * Smorgrav, des@ofug.org
+ */
+ for (cc = charclasses; cc->cc_name; cc++)
+ if (strncmp((const char *) prestr + 1,
+ (const char *) cc->cc_name,
+ cc->cc_namelen) == 0)
+ break;
+ if (cc->cc_name != NULL &&
+ prestr[1 + cc->cc_namelen] == ':' &&
+ prestr[2 + cc->cc_namelen] == ']') {
+ prestr += cc->cc_namelen + 3;
+ for (i = 0; i < NCHARS; i++) {
+ if (!adjbuf(&buf, &bufsz,
+ bp - buf + 1, 100, &bp,
+ "relex2"))
+ FATAL(
+ "out of space for reg expr %.10s...", lastre);
+ if (cc->cc_func(i)) {
+ *bp++ = i;
+ n++;
+ }
+ }
+ } else
+ *bp++ = c;
+ } else if (c == '\0') {
+ FATAL("nonterminated character class %.20s",
+ lastre);
+ } else if (bp == buf) { /* 1st char is special */
+ *bp++ = c;
} else if (c == ']') {
- cbuf[clen] = 0;
- rlxstr = tostring(cbuf);
- free(cbuf);
+ *bp++ = 0;
+ rlxstr = tostring(buf);
if (cflag == 0)
return (CCL);
else
return (NCCL);
- } else if (c == '\n') {
- ERROR "newline in character class %s...",
- lastre FATAL;
- } else if (c == '\0') {
- ERROR "nonterminated character class %s",
- lastre FATAL;
} else
- cbuf[clen++] = c;
+ *bp++ = c;
}
/*NOTREACHED*/
}
return (0);
}
static int
cgoto(fa *f, int s, int c)
{
- register int i, j, k;
- register int *p, *q;
+ int i, j, k;
+ int *p, *q;
+ assert(c == HAT || c < NCHARS);
+ while (f->accept >= maxsetvec) { /* guessing here! */
+ maxsetvec *= 4;
+ setvec = (int *)realloc(setvec, maxsetvec * sizeof (int));
+ tmpset = (int *)realloc(tmpset, maxsetvec * sizeof (int));
+ if (setvec == 0 || tmpset == 0)
+ overflo("out of space in cgoto()");
+ }
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
/* compute positions of gototab[s,c] into setvec */
p = f->posns[s];
for (i = 1; i <= *p; i++) {
if ((k = f->re[p[i]].ltype) != FINAL) {
- if (k == CHAR && c == f->re[p[i]].lval ||
- k == DOT && c != 0 && c != HAT ||
- k == ALL && c != 0 ||
- k == CCL &&
- member(c, (uchar *)f->re[p[i]].lval) ||
- k == NCCL &&
- !member(c, (uchar *)f->re[p[i]].lval) &&
- c != 0 && c != HAT) {
+ if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) ||
+ (k == DOT && c != 0 && c != HAT) ||
+ (k == ALL && c != 0) ||
+ (k == EMPTYRE && c != 0) ||
+ (k == CCL &&
+ member(c, (uchar *)f->re[p[i]].lval.up)) ||
+ ((k == NCCL &&
+ !member(c, (uchar *)f->re[p[i]].lval.up)) &&
+ c != 0 && c != HAT)) {
q = f->re[p[i]].lfollow;
for (j = 1; j <= *q; j++) {
+ if (q[j] >= maxsetvec) {
+ maxsetvec *= 4;
+ setvec = (int *)realloc(setvec,
+ maxsetvec * sizeof (int));
+ tmpset = (int *)realloc(tmpset,
+ maxsetvec * sizeof (int));
+ if (setvec == 0 ||
+ tmpset == 0)
+ overflo(
+ "cgoto overflow");
+ }
if (setvec[q[j]] == 0) {
setcnt++;
setvec[q[j]] = 1;
}
}
@@ -821,11 +1041,11 @@
return (i);
different:;
}
/* add tmpset to current set of states */
- if (f->curstat >= NSTATES-1) {
+ if (f->curstat >= NSTATES - 1) {
f->curstat = 2;
f->reset = 1;
for (i = 2; i < NSTATES; i++)
xfree(f->posns[i]);
} else
@@ -846,19 +1066,21 @@
f->out[f->curstat] = 0;
return (f->curstat);
}
static void
-freefa(fa *f)
+freefa(fa *f) /* free a finite automaton */
{
-
- register int i;
+ int i;
if (f == NULL)
return;
for (i = 0; i <= f->curstat; i++)
xfree(f->posns[i]);
- for (i = 0; i <= f->accept; i++)
+ for (i = 0; i <= f->accept; i++) {
xfree(f->re[i].lfollow);
+ if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
+ xfree((f->re[i].lval.np));
+ }
xfree(f->restr);
xfree(f);
}