1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1988 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #ifndef _REGEXP_H 31 #define _REGEXP_H 32 33 #pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.9 */ 34 35 #include <string.h> 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #define CBRA 2 42 #define CCHR 4 43 #define CDOT 8 44 #define CCL 12 45 #define CXCL 16 46 #define CDOL 20 47 #define CCEOF 22 48 #define CKET 24 49 #define CBACK 36 50 #define NCCL 40 51 52 #define STAR 01 53 #define RNGE 03 54 55 #define NBRA 9 56 57 #define PLACE(c) ep[c >> 3] |= bittab[c & 07] 58 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) 59 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0) 60 61 static char *braslist[NBRA]; 62 static char *braelist[NBRA]; 63 int sed, nbra; 64 char *loc1, *loc2, *locs; 65 static int nodelim; 66 67 int circf; 68 static int low; 69 static int size; 70 71 static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 72 73 #ifdef __STDC__ 74 int advance(const char *lp, const char *ep); 75 static void getrnge(const char *str); 76 #else 77 int advance(); 78 static void getrnge(); 79 #endif 80 81 char * 82 #ifdef __STDC__ 83 compile(char *instring, char *ep, const char *endbuf, int seof) 84 #else 85 compile(instring, ep, endbuf, seof) 86 register char *ep; 87 char *instring, *endbuf; 88 int seof; 89 #endif 90 { 91 INIT /* Dependent declarations and initializations */ 92 register int c; 93 register int eof = seof; 94 char *lastep; 95 int cclcnt; 96 char bracket[NBRA], *bracketp; 97 int closed; 98 int neg; 99 int lc; 100 int i, cflg; 101 int iflag; /* used for non-ascii characters in brackets */ 102 103 #ifdef __lint 104 /* make lint happy */ 105 c = nodelim; 106 #endif 107 108 lastep = NULL; 109 if ((c = GETC()) == eof || c == '\n') { 110 if (c == '\n') { 111 UNGETC(c); 112 nodelim = 1; 113 } 114 if (*ep == 0 && !sed) 115 ERROR(41); 116 RETURN(ep); 117 } 118 bracketp = bracket; 119 circf = closed = nbra = 0; 120 if (c == '^') 121 circf++; 122 else 123 UNGETC(c); 124 for (;;) { 125 if (ep >= endbuf) 126 ERROR(50); 127 c = GETC(); 128 if (c != '*' && ((c != '\\') || (PEEKC() != '{'))) 129 lastep = ep; 130 if (c == eof) { 131 *ep++ = CCEOF; 132 if (bracketp != bracket) 133 ERROR(42); 134 RETURN(ep); 135 } 136 switch (c) { 137 138 case '.': 139 *ep++ = CDOT; 140 continue; 141 142 case '\n': 143 if (!sed) { 144 UNGETC(c); 145 *ep++ = CCEOF; 146 nodelim = 1; 147 if (bracketp != bracket) 148 ERROR(42); 149 RETURN(ep); 150 } else ERROR(36); 151 case '*': 152 if (lastep == NULL || *lastep == CBRA || 153 *lastep == CKET) 154 goto defchar; 155 *lastep |= STAR; 156 continue; 157 158 case '$': 159 if (PEEKC() != eof && PEEKC() != '\n') 160 goto defchar; 161 *ep++ = CDOL; 162 continue; 163 164 case '[': 165 if (&ep[17] >= endbuf) 166 ERROR(50); 167 168 *ep++ = CCL; 169 lc = 0; 170 for (i = 0; i < 16; i++) 171 ep[i] = 0; 172 173 neg = 0; 174 if ((c = GETC()) == '^') { 175 neg = 1; 176 c = GETC(); 177 } 178 iflag = 1; 179 do { 180 c &= 0377; 181 if (c == '\0' || c == '\n') 182 ERROR(49); 183 if ((c & 0200) && iflag) { 184 iflag = 0; 185 if (&ep[32] >= endbuf) 186 ERROR(50); 187 ep[-1] = CXCL; 188 for (i = 16; i < 32; i++) 189 ep[i] = 0; 190 } 191 if (c == '-' && lc != 0) { 192 if ((c = GETC()) == ']') { 193 PLACE('-'); 194 break; 195 } 196 if ((c & 0200) && iflag) { 197 iflag = 0; 198 if (&ep[32] >= endbuf) 199 ERROR(50); 200 ep[-1] = CXCL; 201 for (i = 16; i < 32; i++) 202 ep[i] = 0; 203 } 204 while (lc < c) { 205 PLACE(lc); 206 lc++; 207 } 208 } 209 lc = c; 210 PLACE(c); 211 } while ((c = GETC()) != ']'); 212 213 if (iflag) 214 iflag = 16; 215 else 216 iflag = 32; 217 218 if (neg) { 219 if (iflag == 32) { 220 for (cclcnt = 0; cclcnt < iflag; 221 cclcnt++) 222 ep[cclcnt] ^= 0377; 223 ep[0] &= 0376; 224 } else { 225 ep[-1] = NCCL; 226 /* make nulls match so test fails */ 227 ep[0] |= 01; 228 } 229 } 230 231 ep += iflag; 232 233 continue; 234 235 case '\\': 236 switch (c = GETC()) { 237 238 case '(': 239 if (nbra >= NBRA) 240 ERROR(43); 241 *bracketp++ = (char)nbra; 242 *ep++ = CBRA; 243 *ep++ = (char)nbra++; 244 continue; 245 246 case ')': 247 if (bracketp <= bracket) 248 ERROR(42); 249 *ep++ = CKET; 250 *ep++ = *--bracketp; 251 closed++; 252 continue; 253 254 case '{': 255 if (lastep == NULL) 256 goto defchar; 257 *lastep |= RNGE; 258 cflg = 0; 259 nlim: 260 c = GETC(); 261 i = 0; 262 do { 263 if ('0' <= c && c <= '9') 264 i = 10 * i + c - '0'; 265 else 266 ERROR(16); 267 } while (((c = GETC()) != '\\') && (c != ',')); 268 if (i >= 255) 269 ERROR(11); 270 *ep++ = (char)i; 271 if (c == ',') { 272 if (cflg++) 273 ERROR(44); 274 if ((c = GETC()) == '\\') 275 *ep++ = (char)255; 276 else { 277 UNGETC(c); 278 goto nlim; 279 /* get 2'nd number */ 280 } 281 } 282 if (GETC() != '}') 283 ERROR(45); 284 if (!cflg) /* one number */ 285 *ep++ = (char)i; 286 else if ((ep[-1] & 0377) < (ep[-2] & 0377)) 287 ERROR(46); 288 continue; 289 290 case '\n': 291 ERROR(36); 292 293 case 'n': 294 c = '\n'; 295 goto defchar; 296 297 default: 298 if (c >= '1' && c <= '9') { 299 if ((c -= '1') >= closed) 300 ERROR(25); 301 *ep++ = CBACK; 302 *ep++ = (char)c; 303 continue; 304 } 305 } 306 /* Drop through to default to use \ to turn off special chars */ 307 308 defchar: 309 default: 310 lastep = ep; 311 *ep++ = CCHR; 312 *ep++ = (char)c; 313 } 314 } 315 /*NOTREACHED*/ 316 } 317 318 #ifdef __STDC__ 319 int 320 step(const char *p1, const char *p2) 321 #else 322 int 323 step(p1, p2) 324 register char *p1, *p2; 325 #endif 326 { 327 char c; 328 329 330 if (circf) { 331 loc1 = (char *)p1; 332 return (advance(p1, p2)); 333 } 334 /* fast check for first character */ 335 if (*p2 == CCHR) { 336 c = p2[1]; 337 do { 338 if (*p1 != c) 339 continue; 340 if (advance(p1, p2)) { 341 loc1 = (char *)p1; 342 return (1); 343 } 344 } while (*p1++); 345 return (0); 346 } 347 /* regular algorithm */ 348 do { 349 if (advance(p1, p2)) { 350 loc1 = (char *)p1; 351 return (1); 352 } 353 } while (*p1++); 354 return (0); 355 } 356 357 int 358 #ifdef __STDC__ 359 advance(const char *lp, const char *ep) 360 #else 361 advance(lp, ep) 362 register char *lp, *ep; 363 #endif 364 { 365 #ifdef __STDC__ 366 const char *curlp; 367 #else 368 register char *curlp; 369 #endif 370 int c; 371 char *bbeg; 372 register char neg; 373 size_t ct; 374 375 for (;;) { 376 neg = 0; 377 switch (*ep++) { 378 379 case CCHR: 380 if (*ep++ == *lp++) 381 continue; 382 return (0); 383 /*FALLTHRU*/ 384 385 case CDOT: 386 if (*lp++) 387 continue; 388 return (0); 389 /*FALLTHRU*/ 390 391 case CDOL: 392 if (*lp == 0) 393 continue; 394 return (0); 395 /*FALLTHRU*/ 396 397 case CCEOF: 398 loc2 = (char *)lp; 399 return (1); 400 /*FALLTHRU*/ 401 402 case CXCL: 403 c = (unsigned char)*lp++; 404 if (ISTHERE(c)) { 405 ep += 32; 406 continue; 407 } 408 return (0); 409 /*FALLTHRU*/ 410 411 case NCCL: 412 neg = 1; 413 /*FALLTHRU*/ 414 415 case CCL: 416 c = *lp++; 417 if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) { 418 ep += 16; 419 continue; 420 } 421 return (0); 422 /*FALLTHRU*/ 423 424 case CBRA: 425 braslist[*ep++] = (char *)lp; 426 continue; 427 /*FALLTHRU*/ 428 429 case CKET: 430 braelist[*ep++] = (char *)lp; 431 continue; 432 /*FALLTHRU*/ 433 434 case CCHR | RNGE: 435 c = *ep++; 436 getrnge(ep); 437 while (low--) 438 if (*lp++ != c) 439 return (0); 440 curlp = lp; 441 while (size--) 442 if (*lp++ != c) 443 break; 444 if (size < 0) 445 lp++; 446 ep += 2; 447 goto star; 448 /*FALLTHRU*/ 449 450 case CDOT | RNGE: 451 getrnge(ep); 452 while (low--) 453 if (*lp++ == '\0') 454 return (0); 455 curlp = lp; 456 while (size--) 457 if (*lp++ == '\0') 458 break; 459 if (size < 0) 460 lp++; 461 ep += 2; 462 goto star; 463 /*FALLTHRU*/ 464 465 case CXCL | RNGE: 466 getrnge(ep + 32); 467 while (low--) { 468 c = (unsigned char)*lp++; 469 if (!ISTHERE(c)) 470 return (0); 471 } 472 curlp = lp; 473 while (size--) { 474 c = (unsigned char)*lp++; 475 if (!ISTHERE(c)) 476 break; 477 } 478 if (size < 0) 479 lp++; 480 ep += 34; /* 32 + 2 */ 481 goto star; 482 /*FALLTHRU*/ 483 484 case NCCL | RNGE: 485 neg = 1; 486 /*FALLTHRU*/ 487 488 case CCL | RNGE: 489 getrnge(ep + 16); 490 while (low--) { 491 c = *lp++; 492 if (((c & 0200) || !ISTHERE(c)) ^ neg) 493 return (0); 494 } 495 curlp = lp; 496 while (size--) { 497 c = *lp++; 498 if (((c & 0200) || !ISTHERE(c)) ^ neg) 499 break; 500 } 501 if (size < 0) 502 lp++; 503 ep += 18; /* 16 + 2 */ 504 goto star; 505 /*FALLTHRU*/ 506 507 case CBACK: 508 bbeg = braslist[*ep]; 509 ct = braelist[*ep++] - bbeg; 510 511 if (ecmp(bbeg, lp, ct)) { 512 lp += ct; 513 continue; 514 } 515 return (0); 516 /*FALLTHRU*/ 517 518 case CBACK | STAR: 519 bbeg = braslist[*ep]; 520 ct = braelist[*ep++] - bbeg; 521 curlp = lp; 522 while (ecmp(bbeg, lp, ct)) 523 lp += ct; 524 525 while (lp >= curlp) { 526 if (advance(lp, ep)) 527 return (1); 528 lp -= ct; 529 } 530 return (0); 531 /*FALLTHRU*/ 532 533 case CDOT | STAR: 534 curlp = lp; 535 while (*lp++); 536 goto star; 537 /*FALLTHRU*/ 538 539 case CCHR | STAR: 540 curlp = lp; 541 while (*lp++ == *ep); 542 ep++; 543 goto star; 544 /*FALLTHRU*/ 545 546 case CXCL | STAR: 547 curlp = lp; 548 do { 549 c = (unsigned char)*lp++; 550 } while (ISTHERE(c)); 551 ep += 32; 552 goto star; 553 /*FALLTHRU*/ 554 555 case NCCL | STAR: 556 neg = 1; 557 /*FALLTHRU*/ 558 559 case CCL | STAR: 560 curlp = lp; 561 do { 562 c = *lp++; 563 } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg); 564 ep += 16; 565 goto star; 566 /*FALLTHRU*/ 567 568 star: 569 do { 570 if (--lp == locs) 571 break; 572 if (advance(lp, ep)) 573 return (1); 574 } while (lp > curlp); 575 return (0); 576 577 } 578 } 579 /*NOTREACHED*/ 580 } 581 582 static void 583 #ifdef __STDC__ 584 getrnge(const char *str) 585 #else 586 getrnge(str) 587 register char *str; 588 #endif 589 { 590 low = *str++ & 0377; 591 size = ((*str & 0377) == 255)? 20000: (*str &0377) - low; 592 } 593 594 #ifdef __cplusplus 595 } 596 #endif 597 598 #endif /* _REGEXP_H */