1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*      Copyright (c) 1988 AT&T     */
  22 /*        All Rights Reserved   */
  23 
  24 /*
  25  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
  26  *
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #ifndef _REGEXP_H
  32 #define _REGEXP_H
  33 
  34 #include <string.h>
  35 
  36 #ifdef  __cplusplus
  37 extern "C" {
  38 #endif
  39 
  40 #define CBRA    2
  41 #define CCHR    4
  42 #define CDOT    8
  43 #define CCL     12
  44 #define CXCL    16
  45 #define CDOL    20
  46 #define CCEOF   22
  47 #define CKET    24
  48 #define CBACK   36
  49 #define NCCL    40
  50 
  51 #define STAR    01
  52 #define RNGE    03
  53 
  54 #define NBRA    9
  55 
  56 #define PLACE(c)        ep[c >> 3] |= bittab[c & 07]
  57 #define ISTHERE(c)      (ep[c >> 3] & bittab[c & 07])
  58 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0)
  59 
  60 static char     *braslist[NBRA];
  61 static char     *braelist[NBRA];
  62 int     sed, nbra;
  63 char    *loc1, *loc2, *locs;
  64 static int      nodelim;
  65 
  66 int     circf;
  67 static int      low;
  68 static int      size;
  69 
  70 static unsigned char    bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
  71 
  72 int advance(const char *lp, const char *ep);
  73 static void getrnge(const char *str);
  74 
  75 char *
  76 compile(char *instring, char *ep, const char *endbuf, int seof)
  77 {
  78         INIT    /* Dependent declarations and initializations */
  79         register int c;
  80         register int eof = seof;
  81         char *lastep;
  82         int cclcnt;
  83         char bracket[NBRA], *bracketp;
  84         int closed;
  85         int neg;
  86         int lc;
  87         int i, cflg;
  88         int iflag; /* used for non-ascii characters in brackets */
  89 
  90 #ifdef __lint
  91         /* make lint happy */
  92         c = nodelim;
  93 #endif
  94 
  95         lastep = NULL;
  96         if ((c = GETC()) == eof || c == '\n') {
  97                 if (c == '\n') {
  98                         UNGETC(c);
  99                         nodelim = 1;
 100                 }
 101                 if (*ep == 0 && !sed)
 102                         ERROR(41);
 103                 RETURN(ep);
 104         }
 105         bracketp = bracket;
 106         circf = closed = nbra = 0;
 107         if (c == '^')
 108                 circf++;
 109         else
 110                 UNGETC(c);
 111         for (;;) {
 112                 if (ep >= endbuf)
 113                         ERROR(50);
 114                 c = GETC();
 115                 if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
 116                         lastep = ep;
 117                 if (c == eof) {
 118                         *ep++ = CCEOF;
 119                         if (bracketp != bracket)
 120                                 ERROR(42);
 121                         RETURN(ep);
 122                 }
 123                 switch (c) {
 124 
 125                 case '.':
 126                         *ep++ = CDOT;
 127                         continue;
 128 
 129                 case '\n':
 130                         if (!sed) {
 131                                 UNGETC(c);
 132                                 *ep++ = CCEOF;
 133                                 nodelim = 1;
 134                                 if (bracketp != bracket)
 135                                         ERROR(42);
 136                                 RETURN(ep);
 137                         } else ERROR(36);
 138                 case '*':
 139                         if (lastep == NULL || *lastep == CBRA ||
 140                             *lastep == CKET)
 141                                 goto defchar;
 142                         *lastep |= STAR;
 143                         continue;
 144 
 145                 case '$':
 146                         if (PEEKC() != eof && PEEKC() != '\n')
 147                                 goto defchar;
 148                         *ep++ = CDOL;
 149                         continue;
 150 
 151                 case '[':
 152                         if (&ep[17] >= endbuf)
 153                                 ERROR(50);
 154 
 155                         *ep++ = CCL;
 156                         lc = 0;
 157                         for (i = 0; i < 16; i++)
 158                                 ep[i] = 0;
 159 
 160                         neg = 0;
 161                         if ((c = GETC()) == '^') {
 162                                 neg = 1;
 163                                 c = GETC();
 164                         }
 165                         iflag = 1;
 166                         do {
 167                                 c &= 0377;
 168                                 if (c == '\0' || c == '\n')
 169                                         ERROR(49);
 170                                 if ((c & 0200) && iflag) {
 171                                         iflag = 0;
 172                                         if (&ep[32] >= endbuf)
 173                                                 ERROR(50);
 174                                         ep[-1] = CXCL;
 175                                         for (i = 16; i < 32; i++)
 176                                                 ep[i] = 0;
 177                                 }
 178                                 if (c == '-' && lc != 0) {
 179                                         if ((c = GETC()) == ']') {
 180                                                 PLACE('-');
 181                                                 break;
 182                                         }
 183                                         if ((c & 0200) && iflag) {
 184                                                 iflag = 0;
 185                                                 if (&ep[32] >= endbuf)
 186                                                         ERROR(50);
 187                                                 ep[-1] = CXCL;
 188                                                 for (i = 16; i < 32; i++)
 189                                                         ep[i] = 0;
 190                                         }
 191                                         while (lc < c) {
 192                                                 PLACE(lc);
 193                                                 lc++;
 194                                         }
 195                                 }
 196                                 lc = c;
 197                                 PLACE(c);
 198                         } while ((c = GETC()) != ']');
 199 
 200                         if (iflag)
 201                                 iflag = 16;
 202                         else
 203                                 iflag = 32;
 204 
 205                         if (neg) {
 206                                 if (iflag == 32) {
 207                                         for (cclcnt = 0; cclcnt < iflag;
 208                                             cclcnt++)
 209                                                 ep[cclcnt] ^= 0377;
 210                                         ep[0] &= 0376;
 211                                 } else {
 212                                         ep[-1] = NCCL;
 213                                         /* make nulls match so test fails */
 214                                         ep[0] |= 01;
 215                                 }
 216                         }
 217 
 218                         ep += iflag;
 219 
 220                         continue;
 221 
 222                 case '\\':
 223                         switch (c = GETC()) {
 224 
 225                         case '(':
 226                                 if (nbra >= NBRA)
 227                                         ERROR(43);
 228                                 *bracketp++ = (char)nbra;
 229                                 *ep++ = CBRA;
 230                                 *ep++ = (char)nbra++;
 231                                 continue;
 232 
 233                         case ')':
 234                                 if (bracketp <= bracket)
 235                                         ERROR(42);
 236                                 *ep++ = CKET;
 237                                 *ep++ = *--bracketp;
 238                                 closed++;
 239                                 continue;
 240 
 241                         case '{':
 242                                 if (lastep == NULL)
 243                                         goto defchar;
 244                                 *lastep |= RNGE;
 245                                 cflg = 0;
 246                         nlim:
 247                                 c = GETC();
 248                                 i = 0;
 249                                 do {
 250                                         if ('0' <= c && c <= '9')
 251                                                 i = 10 * i + c - '0';
 252                                         else
 253                                                 ERROR(16);
 254                                 } while (((c = GETC()) != '\\') && (c != ','));
 255                                 if (i >= 255)
 256                                         ERROR(11);
 257                                 *ep++ = (char)i;
 258                                 if (c == ',') {
 259                                         if (cflg++)
 260                                                 ERROR(44);
 261                                         if ((c = GETC()) == '\\')
 262                                                 *ep++ = (char)255;
 263                                         else {
 264                                                 UNGETC(c);
 265                                                 goto nlim;
 266                                                 /* get 2'nd number */
 267                                         }
 268                                 }
 269                                 if (GETC() != '}')
 270                                         ERROR(45);
 271                                 if (!cflg)      /* one number */
 272                                         *ep++ = (char)i;
 273                                 else if ((ep[-1] & 0377) < (ep[-2] & 0377))
 274                                         ERROR(46);
 275                                 continue;
 276 
 277                         case '\n':
 278                                 ERROR(36);
 279 
 280                         case 'n':
 281                                 c = '\n';
 282                                 goto defchar;
 283 
 284                         default:
 285                                 if (c >= '1' && c <= '9') {
 286                                         if ((c -= '1') >= closed)
 287                                                 ERROR(25);
 288                                         *ep++ = CBACK;
 289                                         *ep++ = (char)c;
 290                                         continue;
 291                                 }
 292                         }
 293         /* Drop through to default to use \ to turn off special chars */
 294 
 295                 defchar:
 296                 default:
 297                         lastep = ep;
 298                         *ep++ = CCHR;
 299                         *ep++ = (char)c;
 300                 }
 301         }
 302         /*NOTREACHED*/
 303 }
 304 
 305 int
 306 step(const char *p1, const char *p2)
 307 {
 308         char c;
 309 
 310 
 311         if (circf) {
 312                 loc1 = (char *)p1;
 313                 return (advance(p1, p2));
 314         }
 315         /* fast check for first character */
 316         if (*p2 == CCHR) {
 317                 c = p2[1];
 318                 do {
 319                         if (*p1 != c)
 320                                 continue;
 321                         if (advance(p1, p2)) {
 322                                 loc1 = (char *)p1;
 323                                 return (1);
 324                         }
 325                 } while (*p1++);
 326                 return (0);
 327         }
 328                 /* regular algorithm */
 329         do {
 330                 if (advance(p1, p2)) {
 331                         loc1 = (char *)p1;
 332                         return (1);
 333                 }
 334         } while (*p1++);
 335         return (0);
 336 }
 337 
 338 int
 339 advance(const char *lp, const char *ep)
 340 {
 341         const char *curlp;
 342         int c;
 343         char *bbeg;
 344         register char neg;
 345         size_t ct;
 346 
 347         for (;;) {
 348                 neg = 0;
 349                 switch (*ep++) {
 350 
 351                 case CCHR:
 352                         if (*ep++ == *lp++)
 353                                 continue;
 354                         return (0);
 355                         /*FALLTHRU*/
 356 
 357                 case CDOT:
 358                         if (*lp++)
 359                                 continue;
 360                         return (0);
 361                         /*FALLTHRU*/
 362 
 363                 case CDOL:
 364                         if (*lp == 0)
 365                                 continue;
 366                         return (0);
 367                         /*FALLTHRU*/
 368 
 369                 case CCEOF:
 370                         loc2 = (char *)lp;
 371                         return (1);
 372                         /*FALLTHRU*/
 373 
 374                 case CXCL:
 375                         c = (unsigned char)*lp++;
 376                         if (ISTHERE(c)) {
 377                                 ep += 32;
 378                                 continue;
 379                         }
 380                         return (0);
 381                         /*FALLTHRU*/
 382 
 383                 case NCCL:
 384                         neg = 1;
 385                         /*FALLTHRU*/
 386 
 387                 case CCL:
 388                         c = *lp++;
 389                         if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
 390                                 ep += 16;
 391                                 continue;
 392                         }
 393                         return (0);
 394                         /*FALLTHRU*/
 395 
 396                 case CBRA:
 397                         braslist[*ep++] = (char *)lp;
 398                         continue;
 399                         /*FALLTHRU*/
 400 
 401                 case CKET:
 402                         braelist[*ep++] = (char *)lp;
 403                         continue;
 404                         /*FALLTHRU*/
 405 
 406                 case CCHR | RNGE:
 407                         c = *ep++;
 408                         getrnge(ep);
 409                         while (low--)
 410                                 if (*lp++ != c)
 411                                         return (0);
 412                         curlp = lp;
 413                         while (size--)
 414                                 if (*lp++ != c)
 415                                         break;
 416                         if (size < 0)
 417                                 lp++;
 418                         ep += 2;
 419                         goto star;
 420                         /*FALLTHRU*/
 421 
 422                 case CDOT | RNGE:
 423                         getrnge(ep);
 424                         while (low--)
 425                                 if (*lp++ == '\0')
 426                                         return (0);
 427                         curlp = lp;
 428                         while (size--)
 429                                 if (*lp++ == '\0')
 430                                         break;
 431                         if (size < 0)
 432                                 lp++;
 433                         ep += 2;
 434                         goto star;
 435                         /*FALLTHRU*/
 436 
 437                 case CXCL | RNGE:
 438                         getrnge(ep + 32);
 439                         while (low--) {
 440                                 c = (unsigned char)*lp++;
 441                                 if (!ISTHERE(c))
 442                                         return (0);
 443                         }
 444                         curlp = lp;
 445                         while (size--) {
 446                                 c = (unsigned char)*lp++;
 447                                 if (!ISTHERE(c))
 448                                         break;
 449                         }
 450                         if (size < 0)
 451                                 lp++;
 452                         ep += 34;               /* 32 + 2 */
 453                         goto star;
 454                         /*FALLTHRU*/
 455 
 456                 case NCCL | RNGE:
 457                         neg = 1;
 458                         /*FALLTHRU*/
 459 
 460                 case CCL | RNGE:
 461                         getrnge(ep + 16);
 462                         while (low--) {
 463                                 c = *lp++;
 464                                 if (((c & 0200) || !ISTHERE(c)) ^ neg)
 465                                         return (0);
 466                         }
 467                         curlp = lp;
 468                         while (size--) {
 469                                 c = *lp++;
 470                                 if (((c & 0200) || !ISTHERE(c)) ^ neg)
 471                                         break;
 472                         }
 473                         if (size < 0)
 474                                 lp++;
 475                         ep += 18;               /* 16 + 2 */
 476                         goto star;
 477                         /*FALLTHRU*/
 478 
 479                 case CBACK:
 480                         bbeg = braslist[*ep];
 481                         ct = braelist[*ep++] - bbeg;
 482 
 483                         if (ecmp(bbeg, lp, ct)) {
 484                                 lp += ct;
 485                                 continue;
 486                         }
 487                         return (0);
 488                         /*FALLTHRU*/
 489 
 490                 case CBACK | STAR:
 491                         bbeg = braslist[*ep];
 492                         ct = braelist[*ep++] - bbeg;
 493                         curlp = lp;
 494                         while (ecmp(bbeg, lp, ct))
 495                                 lp += ct;
 496 
 497                         while (lp >= curlp) {
 498                                 if (advance(lp, ep))
 499                                         return (1);
 500                                 lp -= ct;
 501                         }
 502                         return (0);
 503                         /*FALLTHRU*/
 504 
 505                 case CDOT | STAR:
 506                         curlp = lp;
 507                         while (*lp++);
 508                         goto star;
 509                         /*FALLTHRU*/
 510 
 511                 case CCHR | STAR:
 512                         curlp = lp;
 513                         while (*lp++ == *ep);
 514                         ep++;
 515                         goto star;
 516                         /*FALLTHRU*/
 517 
 518                 case CXCL | STAR:
 519                         curlp = lp;
 520                         do {
 521                                 c = (unsigned char)*lp++;
 522                         } while (ISTHERE(c));
 523                         ep += 32;
 524                         goto star;
 525                         /*FALLTHRU*/
 526 
 527                 case NCCL | STAR:
 528                         neg = 1;
 529                         /*FALLTHRU*/
 530 
 531                 case CCL | STAR:
 532                         curlp = lp;
 533                         do {
 534                                 c = *lp++;
 535                         } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
 536                         ep += 16;
 537                         goto star;
 538                         /*FALLTHRU*/
 539 
 540                 star:
 541                         do {
 542                                 if (--lp == locs)
 543                                         break;
 544                                 if (advance(lp, ep))
 545                                         return (1);
 546                         } while (lp > curlp);
 547                         return (0);
 548 
 549                 }
 550         }
 551         /*NOTREACHED*/
 552 }
 553 
 554 static void
 555 getrnge(const char *str)
 556 {
 557         low = *str++ & 0377;
 558         size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
 559 }
 560 
 561 #ifdef  __cplusplus
 562 }
 563 #endif
 564 
 565 #endif  /* _REGEXP_H */