1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 
  23 /*
  24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  25  * Use is subject to license terms.
  26  */
  27 
  28 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  29 /*        All Rights Reserved   */
  30 
  31 #pragma ident   "%Z%%M% %I%     %E% SMI"
  32 
  33 #define DEBUG
  34 
  35 #include "awk.h"
  36 #include "y.tab.h"
  37 
  38 #define HAT     (NCHARS-1)      /* matches ^ in regular expr */
  39                                 /* NCHARS is 2**n */
  40 #define MAXLIN (3 * LINE_MAX)
  41 
  42 #define type(v)         (v)->nobj
  43 #define left(v)         (v)->narg[0]
  44 #define right(v)        (v)->narg[1]
  45 #define parent(v)       (v)->nnext
  46 
  47 #define LEAF    case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
  48 #define UNARY   case STAR: case PLUS: case QUEST:
  49 
  50 /*
  51  * encoding in tree Nodes:
  52  *      leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
  53  *              left is index, right contains value or pointer to value
  54  *      unary (STAR, PLUS, QUEST): left is child, right is null
  55  *      binary (CAT, OR): left and right are children
  56  *      parent contains pointer to parent
  57  */
  58 
  59 int     setvec[MAXLIN];
  60 int     tmpset[MAXLIN];
  61 Node    *point[MAXLIN];
  62 
  63 int     rtok;           /* next token in current re */
  64 int     rlxval;
  65 uchar   *rlxstr;
  66 uchar   *prestr;        /* current position in current re */
  67 uchar   *lastre;        /* origin of last re */
  68 
  69 static  int setcnt;
  70 static  int poscnt;
  71 
  72 uchar   *patbeg;
  73 int     patlen;
  74 
  75 #define NFA     20      /* cache this many dynamic fa's */
  76 fa      *fatab[NFA];
  77 int     nfatab  = 0;    /* entries in fatab */
  78 
  79 static fa       *mkdfa(uchar *, int);
  80 static int      makeinit(fa *, int);
  81 static void     penter(Node *);
  82 static void     freetr(Node *);
  83 static void     overflo(char *);
  84 static void     cfoll(fa *, Node *);
  85 static void     follow(Node *);
  86 static Node     *reparse(uchar *);
  87 static int      relex(void);
  88 static void     freefa(fa *);
  89 static int      cgoto(fa *, int, int);
  90 
  91 fa *
  92 makedfa(uchar *s, int anchor)   /* returns dfa for reg expr s */
  93 {
  94         int i, use, nuse;
  95         fa *pfa;
  96 
  97         if (compile_time)       /* a constant for sure */
  98                 return (mkdfa(s, anchor));
  99         for (i = 0; i < nfatab; i++) {       /* is it there already? */
 100                 if (fatab[i]->anchor == anchor &&
 101                     strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
 102                         fatab[i]->use++;
 103                         return (fatab[i]);
 104                 }
 105         }
 106         pfa = mkdfa(s, anchor);
 107         if (nfatab < NFA) {  /* room for another */
 108                 fatab[nfatab] = pfa;
 109                 fatab[nfatab]->use = 1;
 110                 nfatab++;
 111                 return (pfa);
 112         }
 113         use = fatab[0]->use; /* replace least-recently used */
 114         nuse = 0;
 115         for (i = 1; i < nfatab; i++)
 116                 if (fatab[i]->use < use) {
 117                         use = fatab[i]->use;
 118                         nuse = i;
 119                 }
 120         freefa(fatab[nuse]);
 121         fatab[nuse] = pfa;
 122         pfa->use = 1;
 123         return (pfa);
 124 }
 125 
 126 fa *
 127 mkdfa(uchar *s, int anchor)     /* does the real work of making a dfa */
 128         /* anchor = 1 for anchored matches, else 0 */
 129 {
 130         Node *p, *p1;
 131         fa *f;
 132 
 133         p = reparse(s);
 134         p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
 135                 /* put ALL STAR in front of reg.  exp. */
 136         p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
 137                 /* put FINAL after reg.  exp. */
 138 
 139         poscnt = 0;
 140         penter(p1);     /* enter parent pointers and leaf indices */
 141         if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
 142                 overflo("no room for fa");
 143         /* penter has computed number of positions in re */
 144         f->accept = poscnt-1;
 145         cfoll(f, p1);   /* set up follow sets */
 146         freetr(p1);
 147         if ((f->posns[0] =
 148             (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
 149                         overflo("out of space in makedfa");
 150         }
 151         if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
 152                 overflo("out of space in makedfa");
 153         *f->posns[1] = 0;
 154         f->initstat = makeinit(f, anchor);
 155         f->anchor = anchor;
 156         f->restr = tostring(s);
 157         return (f);
 158 }
 159 
 160 static int
 161 makeinit(fa *f, int anchor)
 162 {
 163         register int i, k;
 164 
 165         f->curstat = 2;
 166         f->out[2] = 0;
 167         f->reset = 0;
 168         k = *(f->re[0].lfollow);
 169         xfree(f->posns[2]);
 170         if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
 171                 overflo("out of space in makeinit");
 172         for (i = 0; i <= k; i++) {
 173                 (f->posns[2])[i] = (f->re[0].lfollow)[i];
 174         }
 175         if ((f->posns[2])[1] == f->accept)
 176                 f->out[2] = 1;
 177         for (i = 0; i < NCHARS; i++)
 178                 f->gototab[2][i] = 0;
 179         f->curstat = cgoto(f, 2, HAT);
 180         if (anchor) {
 181                 *f->posns[2] = k-1;  /* leave out position 0 */
 182                 for (i = 0; i < k; i++) {
 183                         (f->posns[0])[i] = (f->posns[2])[i];
 184                 }
 185 
 186                 f->out[0] = f->out[2];
 187                 if (f->curstat != 2)
 188                         --(*f->posns[f->curstat]);
 189         }
 190         return (f->curstat);
 191 }
 192 
 193 void
 194 penter(Node *p) /* set up parent pointers and leaf indices */
 195 {
 196         switch (type(p)) {
 197         LEAF
 198                 left(p) = (Node *) poscnt;
 199                 point[poscnt++] = p;
 200                 break;
 201         UNARY
 202                 penter(left(p));
 203                 parent(left(p)) = p;
 204                 break;
 205         case CAT:
 206         case OR:
 207                 penter(left(p));
 208                 penter(right(p));
 209                 parent(left(p)) = p;
 210                 parent(right(p)) = p;
 211                 break;
 212         default:
 213                 ERROR "unknown type %d in penter", type(p) FATAL;
 214                 break;
 215         }
 216 }
 217 
 218 static void
 219 freetr(Node *p) /* free parse tree */
 220 {
 221         switch (type(p)) {
 222         LEAF
 223                 xfree(p);
 224                 break;
 225         UNARY
 226                 freetr(left(p));
 227                 xfree(p);
 228                 break;
 229         case CAT:
 230         case OR:
 231                 freetr(left(p));
 232                 freetr(right(p));
 233                 xfree(p);
 234                 break;
 235         default:
 236                 ERROR "unknown type %d in freetr", type(p) FATAL;
 237                 break;
 238         }
 239 }
 240 
 241 uchar *
 242 cclenter(uchar *p)
 243 {
 244         register int i, c;
 245         uchar *op, *chars, *ret;
 246         size_t  bsize;
 247 
 248         init_buf(&chars, &bsize, LINE_INCR);
 249         op = p;
 250         i = 0;
 251         while ((c = *p++) != 0) {
 252                 if (c == '\\') {
 253                         if ((c = *p++) == 't')
 254                                 c = '\t';
 255                         else if (c == 'n')
 256                                 c = '\n';
 257                         else if (c == 'f')
 258                                 c = '\f';
 259                         else if (c == 'r')
 260                                 c = '\r';
 261                         else if (c == 'b')
 262                                 c = '\b';
 263                         else if (c == '\\')
 264                                 c = '\\';
 265                         else if (isdigit(c)) {
 266                                 int n = c - '0';
 267                                 if (isdigit(*p)) {
 268                                         n = 8 * n + *p++ - '0';
 269                                         if (isdigit(*p))
 270                                                 n = 8 * n + *p++ - '0';
 271                                 }
 272                                 c = n;
 273                         } /* else */
 274                                 /* c = c; */
 275                 } else if (c == '-' && i > 0 && chars[i-1] != 0) {
 276                         if (*p != 0) {
 277                                 c = chars[i-1];
 278                                 while ((uchar)c < *p) {      /* fails if *p is \\ */
 279                                         expand_buf(&chars, &bsize, i);
 280                                         chars[i++] = ++c;
 281                                 }
 282                                 p++;
 283                                 continue;
 284                         }
 285                 }
 286                 expand_buf(&chars, &bsize, i);
 287                 chars[i++] = c;
 288         }
 289         chars[i++] = '\0';
 290         dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
 291         xfree(op);
 292         ret = tostring(chars);
 293         free(chars);
 294         return (ret);
 295 }
 296 
 297 static void
 298 overflo(char *s)
 299 {
 300         ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
 301 }
 302 
 303 /* enter follow set of each leaf of vertex v into lfollow[leaf] */
 304 static void
 305 cfoll(fa *f, Node *v)
 306 {
 307         register int i;
 308         register int *p;
 309 
 310         switch (type(v)) {
 311         LEAF
 312                 f->re[(int)left(v)].ltype = type(v);
 313                 f->re[(int)left(v)].lval = (int)right(v);
 314                 for (i = 0; i <= f->accept; i++)
 315                         setvec[i] = 0;
 316                 setcnt = 0;
 317                 follow(v);      /* computes setvec and setcnt */
 318                 if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
 319                         overflo("follow set overflow");
 320                 f->re[(int)left(v)].lfollow = p;
 321                 *p = setcnt;
 322                 for (i = f->accept; i >= 0; i--) {
 323                         if (setvec[i] == 1)
 324                                 *++p = i;
 325                 }
 326                 break;
 327         UNARY
 328                 cfoll(f, left(v));
 329                 break;
 330         case CAT:
 331         case OR:
 332                 cfoll(f, left(v));
 333                 cfoll(f, right(v));
 334                 break;
 335         default:
 336                 ERROR "unknown type %d in cfoll", type(v) FATAL;
 337         }
 338 }
 339 
 340 /*
 341  * collects initially active leaves of p into setvec
 342  * returns 0 or 1 depending on whether p matches empty string
 343  */
 344 static int
 345 first(Node *p)
 346 {
 347         register int b;
 348 
 349         switch (type(p)) {
 350         LEAF
 351                 if (setvec[(int)left(p)] != 1) {
 352                         setvec[(int)left(p)] = 1;
 353                         setcnt++;
 354                 }
 355                 if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
 356                         return (0);             /* empty CCL */
 357                 else
 358                         return (1);
 359         case PLUS:
 360                 if (first(left(p)) == 0)
 361                         return (0);
 362                 return (1);
 363         case STAR:
 364         case QUEST:
 365                 (void) first(left(p));
 366                 return (0);
 367         case CAT:
 368                 if (first(left(p)) == 0 && first(right(p)) == 0)
 369                         return (0);
 370                 return (1);
 371         case OR:
 372                 b = first(right(p));
 373                 if (first(left(p)) == 0 || b == 0)
 374                         return (0);
 375                 return (1);
 376         }
 377         ERROR "unknown type %d in first", type(p) FATAL;
 378         return (-1);
 379 }
 380 
 381 /* collects leaves that can follow v into setvec */
 382 static void
 383 follow(Node *v)
 384 {
 385         Node *p;
 386 
 387         if (type(v) == FINAL)
 388                 return;
 389         p = parent(v);
 390         switch (type(p)) {
 391         case STAR:
 392         case PLUS:
 393                 (void) first(v);
 394                 follow(p);
 395                 return;
 396 
 397         case OR:
 398         case QUEST:
 399                 follow(p);
 400                 return;
 401 
 402         case CAT:
 403                 if (v == left(p)) {     /* v is left child of p */
 404                         if (first(right(p)) == 0) {
 405                                 follow(p);
 406                                 return;
 407                         }
 408                 } else          /* v is right child */
 409                         follow(p);
 410                 return;
 411         default:
 412                 ERROR "unknown type %d in follow", type(p) FATAL;
 413                 break;
 414         }
 415 }
 416 
 417 static int
 418 member(uchar c, uchar *s)       /* is c in s? */
 419 {
 420         while (*s)
 421                 if (c == *s++)
 422                         return (1);
 423         return (0);
 424 }
 425 
 426 
 427 int
 428 match(fa *f, uchar *p)
 429 {
 430         register int s, ns;
 431 
 432         s = f->reset ? makeinit(f, 0) : f->initstat;
 433         if (f->out[s])
 434                 return (1);
 435         do {
 436                 if ((ns = f->gototab[s][*p]) != 0)
 437                         s = ns;
 438                 else
 439                         s = cgoto(f, s, *p);
 440                 if (f->out[s])
 441                         return (1);
 442         } while (*p++ != 0);
 443         return (0);
 444 }
 445 
 446 int
 447 pmatch(fa *f, uchar *p)
 448 {
 449         register int s, ns;
 450         register uchar *q;
 451         int i, k;
 452 
 453         if (f->reset) {
 454                 f->initstat = s = makeinit(f, 1);
 455         } else {
 456                 s = f->initstat;
 457         }
 458         patbeg = p;
 459         patlen = -1;
 460         do {
 461                 q = p;
 462                 do {
 463                         if (f->out[s])               /* final state */
 464                                 patlen = q-p;
 465                         if ((ns = f->gototab[s][*q]) != 0)
 466                                 s = ns;
 467                         else
 468                                 s = cgoto(f, s, *q);
 469                         if (s == 1) {   /* no transition */
 470                                 if (patlen >= 0) {
 471                                         patbeg = p;
 472                                         return (1);
 473                                 } else
 474                                         goto nextin;    /* no match */
 475                         }
 476                 } while (*q++ != 0);
 477                 if (f->out[s])
 478                         patlen = q - p - 1;     /* don't count $ */
 479                 if (patlen >= 0) {
 480                         patbeg = p;
 481                         return (1);
 482                 }
 483         nextin:
 484                 s = 2;
 485                 if (f->reset) {
 486                         for (i = 2; i <= f->curstat; i++)
 487                                 xfree(f->posns[i]);
 488                         k = *f->posns[0];
 489                         if ((f->posns[2] =
 490                             (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
 491                                 overflo("out of space in pmatch");
 492                         }
 493                         for (i = 0; i <= k; i++)
 494                                 (f->posns[2])[i] = (f->posns[0])[i];
 495                         f->initstat = f->curstat = 2;
 496                         f->out[2] = f->out[0];
 497                         for (i = 0; i < NCHARS; i++)
 498                                 f->gototab[2][i] = 0;
 499                 }
 500         } while (*p++ != 0);
 501         return (0);
 502 }
 503 
 504 int
 505 nematch(fa *f, uchar *p)
 506 {
 507         register int s, ns;
 508         register uchar *q;
 509         int i, k;
 510 
 511         if (f->reset) {
 512                 f->initstat = s = makeinit(f, 1);
 513         } else {
 514                 s = f->initstat;
 515         }
 516         patlen = -1;
 517         while (*p) {
 518                 q = p;
 519                 do {
 520                         if (f->out[s])               /* final state */
 521                                 patlen = q-p;
 522                         if ((ns = f->gototab[s][*q]) != 0)
 523                                 s = ns;
 524                         else
 525                                 s = cgoto(f, s, *q);
 526                         if (s == 1) {   /* no transition */
 527                                 if (patlen > 0) {
 528                                         patbeg = p;
 529                                         return (1);
 530                                 } else
 531                                         goto nnextin;   /* no nonempty match */
 532                         }
 533                 } while (*q++ != 0);
 534                 if (f->out[s])
 535                         patlen = q - p - 1;     /* don't count $ */
 536                 if (patlen > 0) {
 537                         patbeg = p;
 538                         return (1);
 539                 }
 540         nnextin:
 541                 s = 2;
 542                 if (f->reset) {
 543                         for (i = 2; i <= f->curstat; i++)
 544                                 xfree(f->posns[i]);
 545                         k = *f->posns[0];
 546                         if ((f->posns[2] =
 547                             (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
 548                                 overflo("out of state space");
 549                         }
 550                         for (i = 0; i <= k; i++)
 551                                 (f->posns[2])[i] = (f->posns[0])[i];
 552                         f->initstat = f->curstat = 2;
 553                         f->out[2] = f->out[0];
 554                         for (i = 0; i < NCHARS; i++)
 555                                 f->gototab[2][i] = 0;
 556                 }
 557                 p++;
 558         }
 559         return (0);
 560 }
 561 
 562 static Node *regexp(void), *primary(void), *concat(Node *);
 563 static Node *alt(Node *), *unary(Node *);
 564 
 565 static Node *
 566 reparse(uchar *p)
 567 {
 568         /* parses regular expression pointed to by p */
 569         /* uses relex() to scan regular expression */
 570         Node *np;
 571 
 572         dprintf(("reparse <%s>\n", p));
 573         lastre = prestr = p;    /* prestr points to string to be parsed */
 574         rtok = relex();
 575         if (rtok == '\0')
 576                 ERROR "empty regular expression" FATAL;
 577         np = regexp();
 578         if (rtok == '\0') {
 579                 return (np);
 580         } else {
 581                 ERROR "syntax error in regular expression %s at %s",
 582                     lastre, prestr FATAL;
 583         }
 584         /*NOTREACHED*/
 585         return (NULL);
 586 }
 587 
 588 static Node *
 589 regexp(void)
 590 {
 591         return (alt(concat(primary())));
 592 }
 593 
 594 static Node *
 595 primary(void)
 596 {
 597         Node *np;
 598 
 599         switch (rtok) {
 600         case CHAR:
 601                 np = op2(CHAR, NIL, (Node *)rlxval);
 602                 rtok = relex();
 603                 return (unary(np));
 604         case ALL:
 605                 rtok = relex();
 606                 return (unary(op2(ALL, NIL, NIL)));
 607         case DOT:
 608                 rtok = relex();
 609                 return (unary(op2(DOT, NIL, NIL)));
 610         case CCL:
 611                 /*LINTED align*/
 612                 np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
 613                 rtok = relex();
 614                 return (unary(np));
 615         case NCCL:
 616                 /*LINTED align*/
 617                 np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
 618                 rtok = relex();
 619                 return (unary(np));
 620         case '^':
 621                 rtok = relex();
 622                 return (unary(op2(CHAR, NIL, (Node *)HAT)));
 623         case '$':
 624                 rtok = relex();
 625                 return (unary(op2(CHAR, NIL, NIL)));
 626         case '(':
 627                 rtok = relex();
 628                 if (rtok == ')') {      /* special pleading for () */
 629                         rtok = relex();
 630                         return (unary(op2(CCL, NIL,
 631                             /*LINTED align*/
 632                             (Node *)tostring((uchar *)""))));
 633                 }
 634                 np = regexp();
 635                 if (rtok == ')') {
 636                         rtok = relex();
 637                         return (unary(np));
 638                 } else {
 639                         ERROR "syntax error in regular expression %s at %s",
 640                             lastre, prestr FATAL;
 641                 }
 642         default:
 643                 ERROR "illegal primary in regular expression %s at %s",
 644                     lastre, prestr FATAL;
 645         }
 646         /*NOTREACHED*/
 647         return (NULL);
 648 }
 649 
 650 static Node *
 651 concat(Node *np)
 652 {
 653         switch (rtok) {
 654         case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
 655                 return (concat(op2(CAT, np, primary())));
 656         default:
 657                 return (np);
 658         }
 659 }
 660 
 661 static Node *
 662 alt(Node *np)
 663 {
 664         if (rtok == OR) {
 665                 rtok = relex();
 666                 return (alt(op2(OR, np, concat(primary()))));
 667         }
 668         return (np);
 669 }
 670 
 671 static Node *
 672 unary(Node *np)
 673 {
 674         switch (rtok) {
 675         case STAR:
 676                 rtok = relex();
 677                 return (unary(op2(STAR, np, NIL)));
 678         case PLUS:
 679                 rtok = relex();
 680                 return (unary(op2(PLUS, np, NIL)));
 681         case QUEST:
 682                 rtok = relex();
 683                 return (unary(op2(QUEST, np, NIL)));
 684         default:
 685                 return (np);
 686         }
 687 }
 688 
 689 static int
 690 relex(void)             /* lexical analyzer for reparse */
 691 {
 692         register int c;
 693         uchar *cbuf;
 694         int clen, cflag;
 695 
 696         switch (c = *prestr++) {
 697         case '|': return OR;
 698         case '*': return STAR;
 699         case '+': return PLUS;
 700         case '?': return QUEST;
 701         case '.': return DOT;
 702         case '\0': prestr--; return '\0';
 703         case '^':
 704         case '$':
 705         case '(':
 706         case ')':
 707                 return (c);
 708         case '\\':
 709                 if ((c = *prestr++) == 't')
 710                         c = '\t';
 711                 else if (c == 'n')
 712                         c = '\n';
 713                 else if (c == 'f')
 714                         c = '\f';
 715                 else if (c == 'r')
 716                         c = '\r';
 717                 else if (c == 'b')
 718                         c = '\b';
 719                 else if (c == '\\')
 720                         c = '\\';
 721                 else if (isdigit(c)) {
 722                         int n = c - '0';
 723                         if (isdigit(*prestr)) {
 724                                 n = 8 * n + *prestr++ - '0';
 725                                 if (isdigit(*prestr))
 726                                         n = 8 * n + *prestr++ - '0';
 727                         }
 728                         c = n;
 729                 } /* else it's now in c */
 730                 rlxval = c;
 731                 return (CHAR);
 732         default:
 733                 rlxval = c;
 734                 return (CHAR);
 735         case '[':
 736                 clen = 0;
 737                 if (*prestr == '^') {
 738                         cflag = 1;
 739                         prestr++;
 740                 } else
 741                         cflag = 0;
 742                 init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
 743                 for (;;) {
 744                         if ((c = *prestr++) == '\\') {
 745                                 cbuf[clen++] = '\\';
 746                                 if ((c = *prestr++) == '\0') {
 747                                         ERROR
 748                         "nonterminated character class %s", lastre FATAL;
 749                                 }
 750                                 cbuf[clen++] = c;
 751                         } else if (c == ']') {
 752                                 cbuf[clen] = 0;
 753                                 rlxstr = tostring(cbuf);
 754                                 free(cbuf);
 755                                 if (cflag == 0)
 756                                         return (CCL);
 757                                 else
 758                                         return (NCCL);
 759                         } else if (c == '\n') {
 760                                 ERROR "newline in character class %s...",
 761                                     lastre FATAL;
 762                         } else if (c == '\0') {
 763                                 ERROR "nonterminated character class %s",
 764                                     lastre FATAL;
 765                         } else
 766                                 cbuf[clen++] = c;
 767                 }
 768                 /*NOTREACHED*/
 769         }
 770         return (0);
 771 }
 772 
 773 static int
 774 cgoto(fa *f, int s, int c)
 775 {
 776         register int i, j, k;
 777         register int *p, *q;
 778 
 779         for (i = 0; i <= f->accept; i++)
 780                 setvec[i] = 0;
 781         setcnt = 0;
 782         /* compute positions of gototab[s,c] into setvec */
 783         p = f->posns[s];
 784         for (i = 1; i <= *p; i++) {
 785                 if ((k = f->re[p[i]].ltype) != FINAL) {
 786                         if (k == CHAR && c == f->re[p[i]].lval ||
 787                             k == DOT && c != 0 && c != HAT ||
 788                             k == ALL && c != 0 ||
 789                             k == CCL &&
 790                             member(c, (uchar *)f->re[p[i]].lval) ||
 791                             k == NCCL &&
 792                             !member(c, (uchar *)f->re[p[i]].lval) &&
 793                             c != 0 && c != HAT) {
 794                                 q = f->re[p[i]].lfollow;
 795                                 for (j = 1; j <= *q; j++) {
 796                                         if (setvec[q[j]] == 0) {
 797                                                 setcnt++;
 798                                                 setvec[q[j]] = 1;
 799                                         }
 800                                 }
 801                         }
 802                 }
 803         }
 804         /* determine if setvec is a previous state */
 805         tmpset[0] = setcnt;
 806         j = 1;
 807         for (i = f->accept; i >= 0; i--)
 808                 if (setvec[i]) {
 809                         tmpset[j++] = i;
 810                 }
 811         /* tmpset == previous state? */
 812         for (i = 1; i <= f->curstat; i++) {
 813                 p = f->posns[i];
 814                 if ((k = tmpset[0]) != p[0])
 815                         goto different;
 816                 for (j = 1; j <= k; j++)
 817                         if (tmpset[j] != p[j])
 818                                 goto different;
 819                 /* setvec is state i */
 820                 f->gototab[s][c] = i;
 821                 return (i);
 822         different:;
 823         }
 824 
 825         /* add tmpset to current set of states */
 826         if (f->curstat >= NSTATES-1) {
 827                 f->curstat = 2;
 828                 f->reset = 1;
 829                 for (i = 2; i < NSTATES; i++)
 830                         xfree(f->posns[i]);
 831         } else
 832                 ++(f->curstat);
 833         for (i = 0; i < NCHARS; i++)
 834                 f->gototab[f->curstat][i] = 0;
 835         xfree(f->posns[f->curstat]);
 836         if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
 837                 overflo("out of space in cgoto");
 838 
 839         f->posns[f->curstat] = p;
 840         f->gototab[s][c] = f->curstat;
 841         for (i = 0; i <= setcnt; i++)
 842                 p[i] = tmpset[i];
 843         if (setvec[f->accept])
 844                 f->out[f->curstat] = 1;
 845         else
 846                 f->out[f->curstat] = 0;
 847         return (f->curstat);
 848 }
 849 
 850 static void
 851 freefa(fa *f)
 852 {
 853 
 854         register int i;
 855 
 856         if (f == NULL)
 857                 return;
 858         for (i = 0; i <= f->curstat; i++)
 859                 xfree(f->posns[i]);
 860         for (i = 0; i <= f->accept; i++)
 861                 xfree(f->re[i].lfollow);
 862         xfree(f->restr);
 863         xfree(f);
 864 }