1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*      Copyright (c) 1988 AT&T     */
  22 /*        All Rights Reserved   */
  23 
  24 
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #ifndef _REGEXP_H
  31 #define _REGEXP_H
  32 
  33 #pragma ident   "%Z%%M% %I%     %E% SMI"        /* SVr4.0 1.9   */
  34 
  35 #include <string.h>
  36 
  37 #ifdef  __cplusplus
  38 extern "C" {
  39 #endif
  40 
  41 #define CBRA    2
  42 #define CCHR    4
  43 #define CDOT    8
  44 #define CCL     12
  45 #define CXCL    16
  46 #define CDOL    20
  47 #define CCEOF   22
  48 #define CKET    24
  49 #define CBACK   36
  50 #define NCCL    40
  51 
  52 #define STAR    01
  53 #define RNGE    03
  54 
  55 #define NBRA    9
  56 
  57 #define PLACE(c)        ep[c >> 3] |= bittab[c & 07]
  58 #define ISTHERE(c)      (ep[c >> 3] & bittab[c & 07])
  59 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0)
  60 
  61 static char     *braslist[NBRA];
  62 static char     *braelist[NBRA];
  63 int     sed, nbra;
  64 char    *loc1, *loc2, *locs;
  65 static int      nodelim;
  66 
  67 int     circf;
  68 static int      low;
  69 static int      size;
  70 
  71 static unsigned char    bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
  72 
  73 #ifdef  __STDC__
  74 int advance(const char *lp, const char *ep);
  75 static void getrnge(const char *str);
  76 #else
  77 int advance();
  78 static void getrnge();
  79 #endif
  80 
  81 char *
  82 #ifdef  __STDC__
  83 compile(char *instring, char *ep, const char *endbuf, int seof)
  84 #else
  85 compile(instring, ep, endbuf, seof)
  86 register char *ep;
  87 char *instring, *endbuf;
  88 int seof;
  89 #endif
  90 {
  91         INIT    /* Dependent declarations and initializations */
  92         register int c;
  93         register int eof = seof;
  94         char *lastep;
  95         int cclcnt;
  96         char bracket[NBRA], *bracketp;
  97         int closed;
  98         int neg;
  99         int lc;
 100         int i, cflg;
 101         int iflag; /* used for non-ascii characters in brackets */
 102 
 103 #ifdef __lint
 104         /* make lint happy */
 105         c = nodelim;
 106 #endif
 107 
 108         lastep = NULL;
 109         if ((c = GETC()) == eof || c == '\n') {
 110                 if (c == '\n') {
 111                         UNGETC(c);
 112                         nodelim = 1;
 113                 }
 114                 if (*ep == 0 && !sed)
 115                         ERROR(41);
 116                 RETURN(ep);
 117         }
 118         bracketp = bracket;
 119         circf = closed = nbra = 0;
 120         if (c == '^')
 121                 circf++;
 122         else
 123                 UNGETC(c);
 124         for (;;) {
 125                 if (ep >= endbuf)
 126                         ERROR(50);
 127                 c = GETC();
 128                 if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
 129                         lastep = ep;
 130                 if (c == eof) {
 131                         *ep++ = CCEOF;
 132                         if (bracketp != bracket)
 133                                 ERROR(42);
 134                         RETURN(ep);
 135                 }
 136                 switch (c) {
 137 
 138                 case '.':
 139                         *ep++ = CDOT;
 140                         continue;
 141 
 142                 case '\n':
 143                         if (!sed) {
 144                                 UNGETC(c);
 145                                 *ep++ = CCEOF;
 146                                 nodelim = 1;
 147                                 if (bracketp != bracket)
 148                                         ERROR(42);
 149                                 RETURN(ep);
 150                         } else ERROR(36);
 151                 case '*':
 152                         if (lastep == NULL || *lastep == CBRA ||
 153                             *lastep == CKET)
 154                                 goto defchar;
 155                         *lastep |= STAR;
 156                         continue;
 157 
 158                 case '$':
 159                         if (PEEKC() != eof && PEEKC() != '\n')
 160                                 goto defchar;
 161                         *ep++ = CDOL;
 162                         continue;
 163 
 164                 case '[':
 165                         if (&ep[17] >= endbuf)
 166                                 ERROR(50);
 167 
 168                         *ep++ = CCL;
 169                         lc = 0;
 170                         for (i = 0; i < 16; i++)
 171                                 ep[i] = 0;
 172 
 173                         neg = 0;
 174                         if ((c = GETC()) == '^') {
 175                                 neg = 1;
 176                                 c = GETC();
 177                         }
 178                         iflag = 1;
 179                         do {
 180                                 c &= 0377;
 181                                 if (c == '\0' || c == '\n')
 182                                         ERROR(49);
 183                                 if ((c & 0200) && iflag) {
 184                                         iflag = 0;
 185                                         if (&ep[32] >= endbuf)
 186                                                 ERROR(50);
 187                                         ep[-1] = CXCL;
 188                                         for (i = 16; i < 32; i++)
 189                                                 ep[i] = 0;
 190                                 }
 191                                 if (c == '-' && lc != 0) {
 192                                         if ((c = GETC()) == ']') {
 193                                                 PLACE('-');
 194                                                 break;
 195                                         }
 196                                         if ((c & 0200) && iflag) {
 197                                                 iflag = 0;
 198                                                 if (&ep[32] >= endbuf)
 199                                                         ERROR(50);
 200                                                 ep[-1] = CXCL;
 201                                                 for (i = 16; i < 32; i++)
 202                                                         ep[i] = 0;
 203                                         }
 204                                         while (lc < c) {
 205                                                 PLACE(lc);
 206                                                 lc++;
 207                                         }
 208                                 }
 209                                 lc = c;
 210                                 PLACE(c);
 211                         } while ((c = GETC()) != ']');
 212 
 213                         if (iflag)
 214                                 iflag = 16;
 215                         else
 216                                 iflag = 32;
 217 
 218                         if (neg) {
 219                                 if (iflag == 32) {
 220                                         for (cclcnt = 0; cclcnt < iflag;
 221                                             cclcnt++)
 222                                                 ep[cclcnt] ^= 0377;
 223                                         ep[0] &= 0376;
 224                                 } else {
 225                                         ep[-1] = NCCL;
 226                                         /* make nulls match so test fails */
 227                                         ep[0] |= 01;
 228                                 }
 229                         }
 230 
 231                         ep += iflag;
 232 
 233                         continue;
 234 
 235                 case '\\':
 236                         switch (c = GETC()) {
 237 
 238                         case '(':
 239                                 if (nbra >= NBRA)
 240                                         ERROR(43);
 241                                 *bracketp++ = (char)nbra;
 242                                 *ep++ = CBRA;
 243                                 *ep++ = (char)nbra++;
 244                                 continue;
 245 
 246                         case ')':
 247                                 if (bracketp <= bracket)
 248                                         ERROR(42);
 249                                 *ep++ = CKET;
 250                                 *ep++ = *--bracketp;
 251                                 closed++;
 252                                 continue;
 253 
 254                         case '{':
 255                                 if (lastep == NULL)
 256                                         goto defchar;
 257                                 *lastep |= RNGE;
 258                                 cflg = 0;
 259                         nlim:
 260                                 c = GETC();
 261                                 i = 0;
 262                                 do {
 263                                         if ('0' <= c && c <= '9')
 264                                                 i = 10 * i + c - '0';
 265                                         else
 266                                                 ERROR(16);
 267                                 } while (((c = GETC()) != '\\') && (c != ','));
 268                                 if (i >= 255)
 269                                         ERROR(11);
 270                                 *ep++ = (char)i;
 271                                 if (c == ',') {
 272                                         if (cflg++)
 273                                                 ERROR(44);
 274                                         if ((c = GETC()) == '\\')
 275                                                 *ep++ = (char)255;
 276                                         else {
 277                                                 UNGETC(c);
 278                                                 goto nlim;
 279                                                 /* get 2'nd number */
 280                                         }
 281                                 }
 282                                 if (GETC() != '}')
 283                                         ERROR(45);
 284                                 if (!cflg)      /* one number */
 285                                         *ep++ = (char)i;
 286                                 else if ((ep[-1] & 0377) < (ep[-2] & 0377))
 287                                         ERROR(46);
 288                                 continue;
 289 
 290                         case '\n':
 291                                 ERROR(36);
 292 
 293                         case 'n':
 294                                 c = '\n';
 295                                 goto defchar;
 296 
 297                         default:
 298                                 if (c >= '1' && c <= '9') {
 299                                         if ((c -= '1') >= closed)
 300                                                 ERROR(25);
 301                                         *ep++ = CBACK;
 302                                         *ep++ = (char)c;
 303                                         continue;
 304                                 }
 305                         }
 306         /* Drop through to default to use \ to turn off special chars */
 307 
 308                 defchar:
 309                 default:
 310                         lastep = ep;
 311                         *ep++ = CCHR;
 312                         *ep++ = (char)c;
 313                 }
 314         }
 315         /*NOTREACHED*/
 316 }
 317 
 318 #ifdef  __STDC__
 319 int
 320 step(const char *p1, const char *p2)
 321 #else
 322 int
 323 step(p1, p2)
 324 register char *p1, *p2;
 325 #endif
 326 {
 327         char c;
 328 
 329 
 330         if (circf) {
 331                 loc1 = (char *)p1;
 332                 return (advance(p1, p2));
 333         }
 334         /* fast check for first character */
 335         if (*p2 == CCHR) {
 336                 c = p2[1];
 337                 do {
 338                         if (*p1 != c)
 339                                 continue;
 340                         if (advance(p1, p2)) {
 341                                 loc1 = (char *)p1;
 342                                 return (1);
 343                         }
 344                 } while (*p1++);
 345                 return (0);
 346         }
 347                 /* regular algorithm */
 348         do {
 349                 if (advance(p1, p2)) {
 350                         loc1 = (char *)p1;
 351                         return (1);
 352                 }
 353         } while (*p1++);
 354         return (0);
 355 }
 356 
 357 int
 358 #ifdef  __STDC__
 359 advance(const char *lp, const char *ep)
 360 #else
 361 advance(lp, ep)
 362 register char *lp, *ep;
 363 #endif
 364 {
 365 #ifdef  __STDC__
 366         const char *curlp;
 367 #else
 368         register char *curlp;
 369 #endif
 370         int c;
 371         char *bbeg;
 372         register char neg;
 373         size_t ct;
 374 
 375         for (;;) {
 376                 neg = 0;
 377                 switch (*ep++) {
 378 
 379                 case CCHR:
 380                         if (*ep++ == *lp++)
 381                                 continue;
 382                         return (0);
 383                         /*FALLTHRU*/
 384 
 385                 case CDOT:
 386                         if (*lp++)
 387                                 continue;
 388                         return (0);
 389                         /*FALLTHRU*/
 390 
 391                 case CDOL:
 392                         if (*lp == 0)
 393                                 continue;
 394                         return (0);
 395                         /*FALLTHRU*/
 396 
 397                 case CCEOF:
 398                         loc2 = (char *)lp;
 399                         return (1);
 400                         /*FALLTHRU*/
 401 
 402                 case CXCL:
 403                         c = (unsigned char)*lp++;
 404                         if (ISTHERE(c)) {
 405                                 ep += 32;
 406                                 continue;
 407                         }
 408                         return (0);
 409                         /*FALLTHRU*/
 410 
 411                 case NCCL:
 412                         neg = 1;
 413                         /*FALLTHRU*/
 414 
 415                 case CCL:
 416                         c = *lp++;
 417                         if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
 418                                 ep += 16;
 419                                 continue;
 420                         }
 421                         return (0);
 422                         /*FALLTHRU*/
 423 
 424                 case CBRA:
 425                         braslist[(int)*ep++] = (char *)lp;
 426                         continue;
 427                         /*FALLTHRU*/
 428 
 429                 case CKET:
 430                         braelist[(int)*ep++] = (char *)lp;
 431                         continue;
 432                         /*FALLTHRU*/
 433 
 434                 case CCHR | RNGE:
 435                         c = *ep++;
 436                         getrnge(ep);
 437                         while (low--)
 438                                 if (*lp++ != c)
 439                                         return (0);
 440                         curlp = lp;
 441                         while (size--)
 442                                 if (*lp++ != c)
 443                                         break;
 444                         if (size < 0)
 445                                 lp++;
 446                         ep += 2;
 447                         goto star;
 448                         /*FALLTHRU*/
 449 
 450                 case CDOT | RNGE:
 451                         getrnge(ep);
 452                         while (low--)
 453                                 if (*lp++ == '\0')
 454                                         return (0);
 455                         curlp = lp;
 456                         while (size--)
 457                                 if (*lp++ == '\0')
 458                                         break;
 459                         if (size < 0)
 460                                 lp++;
 461                         ep += 2;
 462                         goto star;
 463                         /*FALLTHRU*/
 464 
 465                 case CXCL | RNGE:
 466                         getrnge(ep + 32);
 467                         while (low--) {
 468                                 c = (unsigned char)*lp++;
 469                                 if (!ISTHERE(c))
 470                                         return (0);
 471                         }
 472                         curlp = lp;
 473                         while (size--) {
 474                                 c = (unsigned char)*lp++;
 475                                 if (!ISTHERE(c))
 476                                         break;
 477                         }
 478                         if (size < 0)
 479                                 lp++;
 480                         ep += 34;               /* 32 + 2 */
 481                         goto star;
 482                         /*FALLTHRU*/
 483 
 484                 case NCCL | RNGE:
 485                         neg = 1;
 486                         /*FALLTHRU*/
 487 
 488                 case CCL | RNGE:
 489                         getrnge(ep + 16);
 490                         while (low--) {
 491                                 c = *lp++;
 492                                 if (((c & 0200) || !ISTHERE(c)) ^ neg)
 493                                         return (0);
 494                         }
 495                         curlp = lp;
 496                         while (size--) {
 497                                 c = *lp++;
 498                                 if (((c & 0200) || !ISTHERE(c)) ^ neg)
 499                                         break;
 500                         }
 501                         if (size < 0)
 502                                 lp++;
 503                         ep += 18;               /* 16 + 2 */
 504                         goto star;
 505                         /*FALLTHRU*/
 506 
 507                 case CBACK:
 508                         bbeg = braslist[(int)*ep];
 509                         ct = braelist[(int)*ep++] - bbeg;
 510 
 511                         if (ecmp(bbeg, lp, ct)) {
 512                                 lp += ct;
 513                                 continue;
 514                         }
 515                         return (0);
 516                         /*FALLTHRU*/
 517 
 518                 case CBACK | STAR:
 519                         bbeg = braslist[(int)*ep];
 520                         ct = braelist[(int)*ep++] - bbeg;
 521                         curlp = lp;
 522                         while (ecmp(bbeg, lp, ct))
 523                                 lp += ct;
 524 
 525                         while (lp >= curlp) {
 526                                 if (advance(lp, ep))
 527                                         return (1);
 528                                 lp -= ct;
 529                         }
 530                         return (0);
 531                         /*FALLTHRU*/
 532 
 533                 case CDOT | STAR:
 534                         curlp = lp;
 535                         while (*lp++);
 536                         goto star;
 537                         /*FALLTHRU*/
 538 
 539                 case CCHR | STAR:
 540                         curlp = lp;
 541                         while (*lp++ == *ep);
 542                         ep++;
 543                         goto star;
 544                         /*FALLTHRU*/
 545 
 546                 case CXCL | STAR:
 547                         curlp = lp;
 548                         do {
 549                                 c = (unsigned char)*lp++;
 550                         } while (ISTHERE(c));
 551                         ep += 32;
 552                         goto star;
 553                         /*FALLTHRU*/
 554 
 555                 case NCCL | STAR:
 556                         neg = 1;
 557                         /*FALLTHRU*/
 558 
 559                 case CCL | STAR:
 560                         curlp = lp;
 561                         do {
 562                                 c = *lp++;
 563                         } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
 564                         ep += 16;
 565                         goto star;
 566                         /*FALLTHRU*/
 567 
 568                 star:
 569                         do {
 570                                 if (--lp == locs)
 571                                         break;
 572                                 if (advance(lp, ep))
 573                                         return (1);
 574                         } while (lp > curlp);
 575                         return (0);
 576 
 577                 }
 578         }
 579         /*NOTREACHED*/
 580 }
 581 
 582 static void
 583 #ifdef  __STDC__
 584 getrnge(const char *str)
 585 #else
 586 getrnge(str)
 587 register char *str;
 588 #endif
 589 {
 590         low = *str++ & 0377;
 591         size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
 592 }
 593 
 594 #ifdef  __cplusplus
 595 }
 596 #endif
 597 
 598 #endif  /* _REGEXP_H */