Print this page
3731 Update nawk to version 20121220

@@ -23,30 +23,51 @@
 /*
  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-/*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/*        All Rights Reserved   */
-
-#pragma ident   "%Z%%M% %I%     %E% SMI"
+/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
 
 #define DEBUG
 
 #include "awk.h"
 #include "y.tab.h"
 
-#define HAT     (NCHARS-1)      /* matches ^ in regular expr */
+#define HAT     (NCHARS+2)      /* matches ^ in regular expr */
                                 /* NCHARS is 2**n */
-#define MAXLIN (3 * LINE_MAX)
+#define MAXLIN  22
 
-#define type(v)         (v)->nobj
+#define type(v)         (v)->nobj       /* badly overloaded here */
+#define info(v)         (v)->ntype      /* badly overloaded here */
 #define left(v)         (v)->narg[0]
 #define right(v)        (v)->narg[1]
 #define parent(v)       (v)->nnext
 
 #define LEAF    case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
+#define ELEAF   case EMPTYRE:           /* empty string in regexp */
 #define UNARY   case STAR: case PLUS: case QUEST:
 
 /*
  * encoding in tree Nodes:
  *      leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):

@@ -54,19 +75,19 @@
  *      unary (STAR, PLUS, QUEST): left is child, right is null
  *      binary (CAT, OR): left and right are children
  *      parent contains pointer to parent
  */
 
-int     setvec[MAXLIN];
-int     tmpset[MAXLIN];
-Node    *point[MAXLIN];
+int     *setvec;
+int     *tmpset;
+int     maxsetvec = 0;
 
 int     rtok;           /* next token in current re */
 int     rlxval;
-uchar   *rlxstr;
-uchar   *prestr;        /* current position in current re */
-uchar   *lastre;        /* origin of last re */
+static  uchar   *rlxstr;
+static  uchar   *prestr;        /* current position in current re */
+static  uchar   *lastre;        /* origin of last re */
 
 static  int setcnt;
 static  int poscnt;
 
 uchar   *patbeg;

@@ -74,61 +95,76 @@
 
 #define NFA     20      /* cache this many dynamic fa's */
 fa      *fatab[NFA];
 int     nfatab  = 0;    /* entries in fatab */
 
-static fa       *mkdfa(uchar *, int);
+static fa       *mkdfa(const uchar *, int);
 static int      makeinit(fa *, int);
 static void     penter(Node *);
 static void     freetr(Node *);
-static void     overflo(char *);
+static void     overflo(const char *);
 static void     cfoll(fa *, Node *);
 static void     follow(Node *);
-static Node     *reparse(uchar *);
+static Node     *reparse(const uchar *);
 static int      relex(void);
 static void     freefa(fa *);
 static int      cgoto(fa *, int, int);
+static Node     *regexp(void);
+static Node     *primary(void);
+static Node     *concat(Node *);
+static Node     *alt(Node *);
+static Node     *unary(Node *);
 
 fa *
-makedfa(uchar *s, int anchor)   /* returns dfa for reg expr s */
+makedfa(const uchar *s, int anchor)     /* returns dfa for reg expr s */
 {
         int i, use, nuse;
         fa *pfa;
+        static int now = 1;
+
+        if (setvec == 0) {      /* first time through any RE */
+                maxsetvec = MAXLIN;
+                setvec = (int *)malloc(maxsetvec * sizeof (int));
+                tmpset = (int *)malloc(maxsetvec * sizeof (int));
+                if (setvec == 0 || tmpset == 0)
+                        overflo("out of space initializing makedfa");
+        }
 
         if (compile_time)       /* a constant for sure */
                 return (mkdfa(s, anchor));
         for (i = 0; i < nfatab; i++) {  /* is it there already? */
                 if (fatab[i]->anchor == anchor &&
-                    strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
-                        fatab[i]->use++;
+                    strcmp((const char *)fatab[i]->restr, (char *)s) == 0) {
+                        fatab[i]->use = now++;
                         return (fatab[i]);
                 }
         }
         pfa = mkdfa(s, anchor);
         if (nfatab < NFA) {     /* room for another */
                 fatab[nfatab] = pfa;
-                fatab[nfatab]->use = 1;
+                fatab[nfatab]->use = now++;
                 nfatab++;
                 return (pfa);
         }
         use = fatab[0]->use;    /* replace least-recently used */
         nuse = 0;
-        for (i = 1; i < nfatab; i++)
+        for (i = 1; i < nfatab; i++) {
                 if (fatab[i]->use < use) {
                         use = fatab[i]->use;
                         nuse = i;
                 }
+        }
         freefa(fatab[nuse]);
         fatab[nuse] = pfa;
-        pfa->use = 1;
+        pfa->use = now++;
         return (pfa);
 }
 
 fa *
-mkdfa(uchar *s, int anchor)     /* does the real work of making a dfa */
+mkdfa(const uchar *s, int anchor)
+{                               /* does the real work of making a dfa */
         /* anchor = 1 for anchored matches, else 0 */
-{
         Node *p, *p1;
         fa *f;
 
         p = reparse(s);
         p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);

@@ -137,11 +173,11 @@
                 /* put FINAL after reg.  exp. */
 
         poscnt = 0;
         penter(p1);     /* enter parent pointers and leaf indices */
         if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
-                overflo("no room for fa");
+                overflo("out of space for fa");
         /* penter has computed number of positions in re */
         f->accept = poscnt-1;
         cfoll(f, p1);   /* set up follow sets */
         freetr(p1);
         if ((f->posns[0] =

@@ -158,11 +194,11 @@
 }
 
 static int
 makeinit(fa *f, int anchor)
 {
-        register int i, k;
+        int i, k;
 
         f->curstat = 2;
         f->out[2] = 0;
         f->reset = 0;
         k = *(f->re[0].lfollow);

@@ -192,13 +228,14 @@
 
 void
 penter(Node *p) /* set up parent pointers and leaf indices */
 {
         switch (type(p)) {
+        ELEAF
         LEAF
-                left(p) = (Node *) poscnt;
-                point[poscnt++] = p;
+                info(p) = poscnt;
+                poscnt++;
                 break;
         UNARY
                 penter(left(p));
                 parent(left(p)) = p;
                 break;

@@ -207,20 +244,21 @@
                 penter(left(p));
                 penter(right(p));
                 parent(left(p)) = p;
                 parent(right(p)) = p;
                 break;
-        default:
-                ERROR "unknown type %d in penter", type(p) FATAL;
+        default:        /* can't happen */
+                FATAL("can't happen: unknown type %d in penter", type(p));
                 break;
         }
 }
 
 static void
 freetr(Node *p) /* free parse tree */
 {
         switch (type(p)) {
+        ELEAF
         LEAF
                 xfree(p);
                 break;
         UNARY
                 freetr(left(p));

@@ -230,28 +268,49 @@
         case OR:
                 freetr(left(p));
                 freetr(right(p));
                 xfree(p);
                 break;
-        default:
-                ERROR "unknown type %d in freetr", type(p) FATAL;
+        default:        /* can't happen */
+                FATAL("can't happen: unknown type %d in freetr", type(p));
                 break;
         }
 }
 
-uchar *
-cclenter(uchar *p)
-{
-        register int i, c;
-        uchar *op, *chars, *ret;
-        size_t  bsize;
+/*
+ * in the parsing of regular expressions, metacharacters like . have
+ * to be seen literally;  \056 is not a metacharacter.
+ */
+int
+hexstr(uchar **pp)      /* find and eval hex string at pp, return new p */
+{                       /* only pick up one 8-bit byte (2 chars) */
+        uchar *p;
+        int n = 0;
+        int i;
+
+        for (i = 0, p = (uchar *)*pp; i < 2 && isxdigit(*p); i++, p++) {
+                if (isdigit(*p))
+                        n = 16 * n + *p - '0';
+                else if (*p >= 'a' && *p <= 'f')
+                        n = 16 * n + *p - 'a' + 10;
+                else if (*p >= 'A' && *p <= 'F')
+                        n = 16 * n + *p - 'A' + 10;
+        }
+        *pp = (uchar *)p;
+        return (n);
+}
+
+/* multiple use of arg */
+#define isoctdigit(c)   ((c) >= '0' && (c) <= '7')
+
+int
+quoted(uchar **pp)      /* pick up next thing after a \\ */
+{                       /* and increment *pp */
+
+        uchar *p = *pp;
+        int c;
 
-        init_buf(&chars, &bsize, LINE_INCR);
-        op = p;
-        i = 0;
-        while ((c = *p++) != 0) {
-                if (c == '\\') {
                         if ((c = *p++) == 't')
                                 c = '\t';
                         else if (c == 'n')
                                 c = '\n';
                         else if (c == 'f')

@@ -260,66 +319,111 @@
                                 c = '\r';
                         else if (c == 'b')
                                 c = '\b';
                         else if (c == '\\')
                                 c = '\\';
-                        else if (isdigit(c)) {
+        else if (c == 'x') {    /* hexadecimal goo follows */
+                c = hexstr(&p); /* this adds a null if number is invalid */
+        } else if (isoctdigit(c)) {     /* \d \dd \ddd */
                                 int n = c - '0';
-                                if (isdigit(*p)) {
+                if (isoctdigit(*p)) {
                                         n = 8 * n + *p++ - '0';
-                                        if (isdigit(*p))
+                        if (isoctdigit(*p))
                                                 n = 8 * n + *p++ - '0';
                                 }
                                 c = n;
                         } /* else */
                                 /* c = c; */
-                } else if (c == '-' && i > 0 && chars[i-1] != 0) {
+        *pp = p;
+        return (c);
+}
+
+uchar *
+cclenter(const uchar *argp)             /* add a character class */
+{
+        int i, c, c2;
+        uchar  *p = (uchar *)argp;
+        uchar *op, *bp;
+        static uchar *buf = NULL;
+        size_t  bufsz = 100;
+
+        op = p;
+        if (buf == 0 && (buf = (uchar *)malloc(bufsz)) == NULL)
+                FATAL("out of space for character class [%.10s...] 1", p);
+        bp = buf;
+        for (i = 0; (c = *p++) != 0; ) {
+                if (c == '\\') {
+                        c = quoted(&p);
+                } else if (c == '-' && i > 0 && bp[-1] != 0) {
                         if (*p != 0) {
-                                c = chars[i-1];
-                                while ((uchar)c < *p) { /* fails if *p is \\ */
-                                        expand_buf(&chars, &bsize, i);
-                                        chars[i++] = ++c;
+                                c = bp[-1];
+                                c2 = *p++;
+                                if (c2 == '\\')
+                                        c2 = quoted(&p);
+                                if (c > c2) {   /* empty; ignore */
+                                        bp--;
+                                        i--;
+                                        continue;
+                                }
+                                while (c < c2) {
+                                        if (adjbuf(&buf, &bufsz,
+                                            bp - buf + 2, 100, &bp,
+                                            "cclenter1") == 0)
+                                                FATAL(
+                        "out of space for character class [%.10s...] 2", p);
+                                        *bp++ = ++c;
+                                        i++;
                                 }
-                                p++;
                                 continue;
                         }
                 }
-                expand_buf(&chars, &bsize, i);
-                chars[i++] = c;
+                if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, "cclenter2"))
+                        FATAL(
+                "out of space for character class [%.10s...] 3", p);
+                *bp++ = c;
+                i++;
         }
-        chars[i++] = '\0';
-        dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
+        *bp = 0;
+        dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf));
         xfree(op);
-        ret = tostring(chars);
-        free(chars);
-        return (ret);
+        return (tostring(buf));
 }
 
 static void
-overflo(char *s)
+overflo(const char *s)
 {
-        ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
+        FATAL("regular expression too big: %.30s...", gettext((char *)s));
 }
 
 /* enter follow set of each leaf of vertex v into lfollow[leaf] */
 static void
 cfoll(fa *f, Node *v)
 {
-        register int i;
-        register int *p;
+        int i;
+        int *p;
 
         switch (type(v)) {
+        ELEAF
         LEAF
-                f->re[(int)left(v)].ltype = type(v);
-                f->re[(int)left(v)].lval = (int)right(v);
+                f->re[info(v)].ltype = type(v);
+                f->re[info(v)].lval.np = right(v);
+                while (f->accept >= maxsetvec) {        /* guessing here! */
+                        maxsetvec *= 4;
+                        setvec = (int *)realloc(setvec, maxsetvec *
+                            sizeof (int));
+                        tmpset = (int *)realloc(tmpset, maxsetvec *
+                            sizeof (int));
+                        if (setvec == 0 || tmpset == 0)
+                                overflo("out of space in cfoll()");
+                }
                 for (i = 0; i <= f->accept; i++)
                         setvec[i] = 0;
                 setcnt = 0;
                 follow(v);      /* computes setvec and setcnt */
                 if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
-                        overflo("follow set overflow");
-                f->re[(int)left(v)].lfollow = p;
+                        overflo("out of space building follow set");
+                f->re[info(v)].lfollow = p;
                 *p = setcnt;
                 for (i = f->accept; i >= 0; i--) {
                         if (setvec[i] == 1)
                                 *++p = i;
                 }

@@ -330,28 +434,45 @@
         case CAT:
         case OR:
                 cfoll(f, left(v));
                 cfoll(f, right(v));
                 break;
-        default:
-                ERROR "unknown type %d in cfoll", type(v) FATAL;
+        default:        /* can't happen */
+                FATAL("can't happen: unknown type %d in cfoll", type(v));
         }
 }
 
 /*
  * collects initially active leaves of p into setvec
  * returns 0 or 1 depending on whether p matches empty string
  */
 static int
 first(Node *p)
 {
-        register int b;
+        int b, lp;
 
         switch (type(p)) {
+        ELEAF
         LEAF
-                if (setvec[(int)left(p)] != 1) {
-                        setvec[(int)left(p)] = 1;
+                /* look for high-water mark of subscripts */
+                lp = info(p);
+                /* guessing here! */
+                while (setcnt >= maxsetvec || lp >= maxsetvec) {
+                        maxsetvec *= 4;
+                        setvec = (int *)realloc(setvec, maxsetvec *
+                            sizeof (int));
+                        tmpset = (int *)realloc(tmpset, maxsetvec *
+                            sizeof (int));
+                        if (setvec == 0 || tmpset == 0)
+                                overflo("out of space in first()");
+                }
+                if (type(p) == EMPTYRE) {
+                        setvec[lp] = 0;
+                        return (0);
+                }
+                if (setvec[lp] != 1) {
+                        setvec[lp] = 1;
                         setcnt++;
                 }
                 if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
                         return (0);             /* empty CCL */
                 else

@@ -372,11 +493,13 @@
                 b = first(right(p));
                 if (first(left(p)) == 0 || b == 0)
                         return (0);
                 return (1);
         }
-        ERROR "unknown type %d in first", type(p) FATAL;
+        /* can't happen */
+        FATAL("can't happen: unknown type %d in first", type(p));
+        /*NOTREACHED*/
         return (-1);
 }
 
 /* collects leaves that can follow v into setvec */
 static void

@@ -406,35 +529,36 @@
                                 return;
                         }
                 } else          /* v is right child */
                         follow(p);
                 return;
-        default:
-                ERROR "unknown type %d in follow", type(p) FATAL;
-                break;
         }
 }
 
 static int
-member(uchar c, uchar *s)       /* is c in s? */
+member(int c, const uchar *sarg)        /* is c in s? */
 {
+        uchar *s = (uchar *)sarg;
+
         while (*s)
                 if (c == *s++)
                         return (1);
         return (0);
 }
 
 
 int
-match(fa *f, uchar *p)
+match(fa *f, const uchar *p0)   /* shortest match ? */
 {
-        register int s, ns;
+        int s, ns;
+        uchar *p = (uchar *)p0;
 
         s = f->reset ? makeinit(f, 0) : f->initstat;
         if (f->out[s])
                 return (1);
         do {
+                /* assert(*p < NCHARS); */
                 if ((ns = f->gototab[s][*p]) != 0)
                         s = ns;
                 else
                         s = cgoto(f, s, *p);
                 if (f->out[s])

@@ -442,16 +566,18 @@
         } while (*p++ != 0);
         return (0);
 }
 
 int
-pmatch(fa *f, uchar *p)
+pmatch(fa *f, const uchar *p0)          /* longest match, for sub */
 {
-        register int s, ns;
-        register uchar *q;
+        int s, ns;
+        uchar *p = (uchar *)p0;
+        uchar *q;
         int i, k;
 
+        /* s = f->reset ? makeinit(f,1) : f->initstat; */
         if (f->reset) {
                 f->initstat = s = makeinit(f, 1);
         } else {
                 s = f->initstat;
         }

@@ -460,10 +586,11 @@
         do {
                 q = p;
                 do {
                         if (f->out[s])          /* final state */
                                 patlen = q-p;
+                        /* assert(*q < NCHARS); */
                         if ((ns = f->gototab[s][*q]) != 0)
                                 s = ns;
                         else
                                 s = cgoto(f, s, *q);
                         if (s == 1) {   /* no transition */

@@ -478,11 +605,11 @@
                         patlen = q - p - 1;     /* don't count $ */
                 if (patlen >= 0) {
                         patbeg = p;
                         return (1);
                 }
-        nextin:
+nextin:
                 s = 2;
                 if (f->reset) {
                         for (i = 2; i <= f->curstat; i++)
                                 xfree(f->posns[i]);
                         k = *f->posns[0];

@@ -500,27 +627,30 @@
         } while (*p++ != 0);
         return (0);
 }
 
 int
-nematch(fa *f, uchar *p)
+nematch(fa *f, const uchar *p0)         /* non-empty match, for sub */
 {
-        register int s, ns;
-        register uchar *q;
+        int s, ns;
+        uchar *p = (uchar *)p0;
+        uchar *q;
         int i, k;
 
+        /* s = f->reset ? makeinit(f,1) : f->initstat; */
         if (f->reset) {
                 f->initstat = s = makeinit(f, 1);
         } else {
                 s = f->initstat;
         }
         patlen = -1;
         while (*p) {
                 q = p;
                 do {
                         if (f->out[s])          /* final state */
-                                patlen = q-p;
+                                patlen = q - p;
+                        /* assert(*q < NCHARS); */
                         if ((ns = f->gototab[s][*q]) != 0)
                                 s = ns;
                         else
                                 s = cgoto(f, s, *q);
                         if (s == 1) {   /* no transition */

@@ -535,11 +665,11 @@
                         patlen = q - p - 1;     /* don't count $ */
                 if (patlen > 0) {
                         patbeg = p;
                         return (1);
                 }
-        nnextin:
+nnextin:
                 s = 2;
                 if (f->reset) {
                         for (i = 2; i <= f->curstat; i++)
                                 xfree(f->posns[i]);
                         k = *f->posns[0];

@@ -557,38 +687,36 @@
                 p++;
         }
         return (0);
 }
 
-static Node *regexp(void), *primary(void), *concat(Node *);
-static Node *alt(Node *), *unary(Node *);
-
 static Node *
-reparse(uchar *p)
+reparse(const uchar *p)
 {
         /* parses regular expression pointed to by p */
         /* uses relex() to scan regular expression */
         Node *np;
 
         dprintf(("reparse <%s>\n", p));
-        lastre = prestr = p;    /* prestr points to string to be parsed */
+        /* prestr points to string to be parsed */
+        lastre = prestr = (uchar *)p;
         rtok = relex();
-        if (rtok == '\0')
-                ERROR "empty regular expression" FATAL;
-        np = regexp();
+        /* GNU compatibility: an empty regexp matches anything */
         if (rtok == '\0') {
-                return (np);
-        } else {
-                ERROR "syntax error in regular expression %s at %s",
-                    lastre, prestr FATAL;
+                /* FATAL("empty regular expression"); previous */
+                return (op2(EMPTYRE, NIL, NIL));
         }
-        /*NOTREACHED*/
-        return (NULL);
+        np = regexp();
+        if (rtok != '\0') {
+                FATAL("syntax error in regular expression %s at %s",
+                    lastre, prestr);
+        }
+        return (np);
 }
 
 static Node *
-regexp(void)
+regexp(void)    /* top-level parse of reg expr */
 {
         return (alt(concat(primary())));
 }
 
 static Node *

@@ -596,16 +724,19 @@
 {
         Node *np;
 
         switch (rtok) {
         case CHAR:
-                np = op2(CHAR, NIL, (Node *)rlxval);
+                np = op2(CHAR, NIL, itonp(rlxval));
                 rtok = relex();
                 return (unary(np));
         case ALL:
                 rtok = relex();
                 return (unary(op2(ALL, NIL, NIL)));
+        case EMPTYRE:
+                rtok = relex();
+                return (unary(op2(ALL, NIL, NIL)));
         case DOT:
                 rtok = relex();
                 return (unary(op2(DOT, NIL, NIL)));
         case CCL:
                 /*LINTED align*/

@@ -617,11 +748,11 @@
                 np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
                 rtok = relex();
                 return (unary(np));
         case '^':
                 rtok = relex();
-                return (unary(op2(CHAR, NIL, (Node *)HAT)));
+                return (unary(op2(CHAR, NIL, itonp(HAT))));
         case '$':
                 rtok = relex();
                 return (unary(op2(CHAR, NIL, NIL)));
         case '(':
                 rtok = relex();

@@ -634,26 +765,27 @@
                 np = regexp();
                 if (rtok == ')') {
                         rtok = relex();
                         return (unary(np));
                 } else {
-                        ERROR "syntax error in regular expression %s at %s",
-                            lastre, prestr FATAL;
+                        FATAL("syntax error in regular expression %s at %s",
+                            lastre, prestr);
                 }
         default:
-                ERROR "illegal primary in regular expression %s at %s",
-                    lastre, prestr FATAL;
+                FATAL("illegal primary in regular expression %s at %s",
+                    lastre, prestr);
         }
         /*NOTREACHED*/
         return (NULL);
 }
 
 static Node *
 concat(Node *np)
 {
         switch (rtok) {
-        case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
+        case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL:
+        case '$': case '(':
                 return (concat(op2(CAT, np, primary())));
         default:
                 return (np);
         }
 }

@@ -684,16 +816,76 @@
         default:
                 return (np);
         }
 }
 
+/*
+ * Character class definitions conformant to the POSIX locale as
+ * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
+ * and operating character sets are both ASCII (ISO646) or supersets
+ * thereof.
+ *
+ * Note that to avoid overflowing the temporary buffer used in
+ * relex(), the expanded character class (prior to range expansion)
+ * must be less than twice the size of their full name.
+ */
+
+/*
+ * Because isblank doesn't show up in any of the header files on any
+ * system i use, it's defined here.  if some other locale has a richer
+ * definition of "blank", define HAS_ISBLANK and provide your own
+ * version.
+ * the parentheses here are an attempt to find a path through the maze
+ * of macro definition and/or function and/or version provided.  thanks
+ * to nelson beebe for the suggestion; let's see if it works everywhere.
+ */
+
+/* #define HAS_ISBLANK */
+#ifndef HAS_ISBLANK
+
+int
+(xisblank)(int c)
+{
+        return (c == ' ' || c == '\t');
+}
+
+#endif
+
+struct charclass {
+        const char *cc_name;
+        int cc_namelen;
+        int (*cc_func)(int);
+} charclasses[] = {
+        { "alnum",      5,      isalnum },
+        { "alpha",      5,      isalpha },
+#ifndef HAS_ISBLANK
+        { "blank",      5,      isspace }, /* was isblank */
+#else
+        { "blank",      5,      isblank },
+#endif
+        { "cntrl",      5,      iscntrl },
+        { "digit",      5,      isdigit },
+        { "graph",      5,      isgraph },
+        { "lower",      5,      islower },
+        { "print",      5,      isprint },
+        { "punct",      5,      ispunct },
+        { "space",      5,      isspace },
+        { "upper",      5,      isupper },
+        { "xdigit",     6,      isxdigit },
+        { NULL,         0,      NULL },
+};
+
 static int
 relex(void)             /* lexical analyzer for reparse */
 {
-        register int c;
-        uchar *cbuf;
-        int clen, cflag;
+        int c, n;
+        int cflag;
+        uchar *buf = 0;
+        size_t bufsz = 100;
+        uchar *bp;
+        struct charclass *cc;
+        int i;
 
         switch (c = *prestr++) {
         case '|': return OR;
         case '*': return STAR;
         case '+': return PLUS;

@@ -704,97 +896,125 @@
         case '$':
         case '(':
         case ')':
                 return (c);
         case '\\':
-                if ((c = *prestr++) == 't')
-                        c = '\t';
-                else if (c == 'n')
-                        c = '\n';
-                else if (c == 'f')
-                        c = '\f';
-                else if (c == 'r')
-                        c = '\r';
-                else if (c == 'b')
-                        c = '\b';
-                else if (c == '\\')
-                        c = '\\';
-                else if (isdigit(c)) {
-                        int n = c - '0';
-                        if (isdigit(*prestr)) {
-                                n = 8 * n + *prestr++ - '0';
-                                if (isdigit(*prestr))
-                                        n = 8 * n + *prestr++ - '0';
-                        }
-                        c = n;
-                } /* else it's now in c */
-                rlxval = c;
+                rlxval = quoted(&prestr);
                 return (CHAR);
         default:
                 rlxval = c;
                 return (CHAR);
         case '[':
-                clen = 0;
+                if (buf == 0 && (buf = (uchar *)malloc(bufsz)) == NULL)
+                        FATAL("out of space in reg expr %.10s..", lastre);
+                bp = buf;
                 if (*prestr == '^') {
                         cflag = 1;
                         prestr++;
                 } else
                         cflag = 0;
-                init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
+                n = 2 * strlen((const char *) prestr)+1;
+                if (!adjbuf(&buf, &bufsz, n, n, &bp, "relex1"))
+                        FATAL("out of space for reg expr %.10s...", lastre);
                 for (;;) {
                         if ((c = *prestr++) == '\\') {
-                                cbuf[clen++] = '\\';
+                                *bp++ = '\\';
                                 if ((c = *prestr++) == '\0') {
-                                        ERROR
-                        "nonterminated character class %s", lastre FATAL;
+                                        FATAL(
+                        "nonterminated character class %s", lastre);
                                 }
-                                cbuf[clen++] = c;
+                                *bp++ = c;
+                        } else if (c == '[' && *prestr == ':') {
+                                /*
+                                 * POSIX char class names, Dag-Erling
+                                 * Smorgrav, des@ofug.org
+                                 */
+                                for (cc = charclasses; cc->cc_name; cc++)
+                                        if (strncmp((const char *) prestr + 1,
+                                            (const char *) cc->cc_name,
+                                            cc->cc_namelen) == 0)
+                                                break;
+                                if (cc->cc_name != NULL &&
+                                    prestr[1 + cc->cc_namelen] == ':' &&
+                                    prestr[2 + cc->cc_namelen] == ']') {
+                                        prestr += cc->cc_namelen + 3;
+                                        for (i = 0; i < NCHARS; i++) {
+                                                if (!adjbuf(&buf, &bufsz,
+                                                    bp - buf + 1, 100, &bp,
+                                                    "relex2"))
+                                                        FATAL(
+                                "out of space for reg expr %.10s...", lastre);
+                                                if (cc->cc_func(i)) {
+                                                        *bp++ = i;
+                                                        n++;
+                                                }
+                                        }
+                                } else
+                                        *bp++ = c;
+                        } else if (c == '\0') {
+                                FATAL("nonterminated character class %.20s",
+                                    lastre);
+                        } else if (bp == buf) { /* 1st char is special */
+                                *bp++ = c;
                         } else if (c == ']') {
-                                cbuf[clen] = 0;
-                                rlxstr = tostring(cbuf);
-                                free(cbuf);
+                                *bp++ = 0;
+                                rlxstr = tostring(buf);
                                 if (cflag == 0)
                                         return (CCL);
                                 else
                                         return (NCCL);
-                        } else if (c == '\n') {
-                                ERROR "newline in character class %s...",
-                                    lastre FATAL;
-                        } else if (c == '\0') {
-                                ERROR "nonterminated character class %s",
-                                    lastre FATAL;
                         } else
-                                cbuf[clen++] = c;
+                                *bp++ = c;
                 }
                 /*NOTREACHED*/
         }
         return (0);
 }
 
 static int
 cgoto(fa *f, int s, int c)
 {
-        register int i, j, k;
-        register int *p, *q;
+        int i, j, k;
+        int *p, *q;
 
+        assert(c == HAT || c < NCHARS);
+        while (f->accept >= maxsetvec) {        /* guessing here! */
+                maxsetvec *= 4;
+                setvec = (int *)realloc(setvec, maxsetvec * sizeof (int));
+                tmpset = (int *)realloc(tmpset, maxsetvec * sizeof (int));
+                if (setvec == 0 || tmpset == 0)
+                        overflo("out of space in cgoto()");
+        }
         for (i = 0; i <= f->accept; i++)
                 setvec[i] = 0;
         setcnt = 0;
         /* compute positions of gototab[s,c] into setvec */
         p = f->posns[s];
         for (i = 1; i <= *p; i++) {
                 if ((k = f->re[p[i]].ltype) != FINAL) {
-                        if (k == CHAR && c == f->re[p[i]].lval ||
-                            k == DOT && c != 0 && c != HAT ||
-                            k == ALL && c != 0 ||
-                            k == CCL &&
-                            member(c, (uchar *)f->re[p[i]].lval) ||
-                            k == NCCL &&
-                            !member(c, (uchar *)f->re[p[i]].lval) &&
-                            c != 0 && c != HAT) {
+                        if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) ||
+                            (k == DOT && c != 0 && c != HAT) ||
+                            (k == ALL && c != 0) ||
+                            (k == EMPTYRE && c != 0) ||
+                            (k == CCL &&
+                            member(c, (uchar *)f->re[p[i]].lval.up)) ||
+                            ((k == NCCL &&
+                            !member(c, (uchar *)f->re[p[i]].lval.up)) &&
+                            c != 0 && c != HAT)) {
                                 q = f->re[p[i]].lfollow;
                                 for (j = 1; j <= *q; j++) {
+                                        if (q[j] >= maxsetvec) {
+                                                maxsetvec *= 4;
+                                                setvec = (int *)realloc(setvec,
+                                                    maxsetvec * sizeof (int));
+                                                tmpset = (int *)realloc(tmpset,
+                                                    maxsetvec * sizeof (int));
+                                                if (setvec == 0 ||
+                                                    tmpset == 0)
+                                                        overflo(
+                                                "cgoto overflow");
+                                        }
                                         if (setvec[q[j]] == 0) {
                                                 setcnt++;
                                                 setvec[q[j]] = 1;
                                         }
                                 }

@@ -821,11 +1041,11 @@
                 return (i);
         different:;
         }
 
         /* add tmpset to current set of states */
-        if (f->curstat >= NSTATES-1) {
+        if (f->curstat >= NSTATES - 1) {
                 f->curstat = 2;
                 f->reset = 1;
                 for (i = 2; i < NSTATES; i++)
                         xfree(f->posns[i]);
         } else

@@ -846,19 +1066,21 @@
                 f->out[f->curstat] = 0;
         return (f->curstat);
 }
 
 static void
-freefa(fa *f)
+freefa(fa *f)   /* free a finite automaton */
 {
-
-        register int i;
+        int i;
 
         if (f == NULL)
                 return;
         for (i = 0; i <= f->curstat; i++)
                 xfree(f->posns[i]);
-        for (i = 0; i <= f->accept; i++)
+        for (i = 0; i <= f->accept; i++) {
                 xfree(f->re[i].lfollow);
+                if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
+                        xfree((f->re[i].lval.np));
+        }
         xfree(f->restr);
         xfree(f);
 }