1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1988 AT&T */ 22 /* All Rights Reserved */ 23 24 /* 25 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 26 * 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #ifndef _REGEXP_H 32 #define _REGEXP_H 33 34 #include <string.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #define CBRA 2 41 #define CCHR 4 42 #define CDOT 8 43 #define CCL 12 44 #define CXCL 16 45 #define CDOL 20 46 #define CCEOF 22 47 #define CKET 24 48 #define CBACK 36 49 #define NCCL 40 50 51 #define STAR 01 52 #define RNGE 03 53 54 #define NBRA 9 55 56 #define PLACE(c) ep[c >> 3] |= bittab[c & 07] 57 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) 58 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0) 59 60 static char *braslist[NBRA]; 61 static char *braelist[NBRA]; 62 int sed, nbra; 63 char *loc1, *loc2, *locs; 64 static int nodelim; 65 66 int circf; 67 static int low; 68 static int size; 69 70 static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 71 72 int advance(const char *lp, const char *ep); 73 static void getrnge(const char *str); 74 75 char * 76 compile(char *instring, char *ep, const char *endbuf, int seof) 77 { 78 INIT /* Dependent declarations and initializations */ 79 register int c; 80 register int eof = seof; 81 char *lastep; 82 int cclcnt; 83 char bracket[NBRA], *bracketp; 84 int closed; 85 int neg; 86 int lc; 87 int i, cflg; 88 int iflag; /* used for non-ascii characters in brackets */ 89 90 #ifdef __lint 91 /* make lint happy */ 92 c = nodelim; 93 #endif 94 95 lastep = NULL; 96 if ((c = GETC()) == eof || c == '\n') { 97 if (c == '\n') { 98 UNGETC(c); 99 nodelim = 1; 100 } 101 if (*ep == 0 && !sed) 102 ERROR(41); 103 RETURN(ep); 104 } 105 bracketp = bracket; 106 circf = closed = nbra = 0; 107 if (c == '^') 108 circf++; 109 else 110 UNGETC(c); 111 for (;;) { 112 if (ep >= endbuf) 113 ERROR(50); 114 c = GETC(); 115 if (c != '*' && ((c != '\\') || (PEEKC() != '{'))) 116 lastep = ep; 117 if (c == eof) { 118 *ep++ = CCEOF; 119 if (bracketp != bracket) 120 ERROR(42); 121 RETURN(ep); 122 } 123 switch (c) { 124 125 case '.': 126 *ep++ = CDOT; 127 continue; 128 129 case '\n': 130 if (!sed) { 131 UNGETC(c); 132 *ep++ = CCEOF; 133 nodelim = 1; 134 if (bracketp != bracket) 135 ERROR(42); 136 RETURN(ep); 137 } else ERROR(36); 138 case '*': 139 if (lastep == NULL || *lastep == CBRA || 140 *lastep == CKET) 141 goto defchar; 142 *lastep |= STAR; 143 continue; 144 145 case '$': 146 if (PEEKC() != eof && PEEKC() != '\n') 147 goto defchar; 148 *ep++ = CDOL; 149 continue; 150 151 case '[': 152 if (&ep[17] >= endbuf) 153 ERROR(50); 154 155 *ep++ = CCL; 156 lc = 0; 157 for (i = 0; i < 16; i++) 158 ep[i] = 0; 159 160 neg = 0; 161 if ((c = GETC()) == '^') { 162 neg = 1; 163 c = GETC(); 164 } 165 iflag = 1; 166 do { 167 c &= 0377; 168 if (c == '\0' || c == '\n') 169 ERROR(49); 170 if ((c & 0200) && iflag) { 171 iflag = 0; 172 if (&ep[32] >= endbuf) 173 ERROR(50); 174 ep[-1] = CXCL; 175 for (i = 16; i < 32; i++) 176 ep[i] = 0; 177 } 178 if (c == '-' && lc != 0) { 179 if ((c = GETC()) == ']') { 180 PLACE('-'); 181 break; 182 } 183 if ((c & 0200) && iflag) { 184 iflag = 0; 185 if (&ep[32] >= endbuf) 186 ERROR(50); 187 ep[-1] = CXCL; 188 for (i = 16; i < 32; i++) 189 ep[i] = 0; 190 } 191 while (lc < c) { 192 PLACE(lc); 193 lc++; 194 } 195 } 196 lc = c; 197 PLACE(c); 198 } while ((c = GETC()) != ']'); 199 200 if (iflag) 201 iflag = 16; 202 else 203 iflag = 32; 204 205 if (neg) { 206 if (iflag == 32) { 207 for (cclcnt = 0; cclcnt < iflag; 208 cclcnt++) 209 ep[cclcnt] ^= 0377; 210 ep[0] &= 0376; 211 } else { 212 ep[-1] = NCCL; 213 /* make nulls match so test fails */ 214 ep[0] |= 01; 215 } 216 } 217 218 ep += iflag; 219 220 continue; 221 222 case '\\': 223 switch (c = GETC()) { 224 225 case '(': 226 if (nbra >= NBRA) 227 ERROR(43); 228 *bracketp++ = (char)nbra; 229 *ep++ = CBRA; 230 *ep++ = (char)nbra++; 231 continue; 232 233 case ')': 234 if (bracketp <= bracket) 235 ERROR(42); 236 *ep++ = CKET; 237 *ep++ = *--bracketp; 238 closed++; 239 continue; 240 241 case '{': 242 if (lastep == NULL) 243 goto defchar; 244 *lastep |= RNGE; 245 cflg = 0; 246 nlim: 247 c = GETC(); 248 i = 0; 249 do { 250 if ('0' <= c && c <= '9') 251 i = 10 * i + c - '0'; 252 else 253 ERROR(16); 254 } while (((c = GETC()) != '\\') && (c != ',')); 255 if (i >= 255) 256 ERROR(11); 257 *ep++ = (char)i; 258 if (c == ',') { 259 if (cflg++) 260 ERROR(44); 261 if ((c = GETC()) == '\\') 262 *ep++ = (char)255; 263 else { 264 UNGETC(c); 265 goto nlim; 266 /* get 2'nd number */ 267 } 268 } 269 if (GETC() != '}') 270 ERROR(45); 271 if (!cflg) /* one number */ 272 *ep++ = (char)i; 273 else if ((ep[-1] & 0377) < (ep[-2] & 0377)) 274 ERROR(46); 275 continue; 276 277 case '\n': 278 ERROR(36); 279 280 case 'n': 281 c = '\n'; 282 goto defchar; 283 284 default: 285 if (c >= '1' && c <= '9') { 286 if ((c -= '1') >= closed) 287 ERROR(25); 288 *ep++ = CBACK; 289 *ep++ = (char)c; 290 continue; 291 } 292 } 293 /* Drop through to default to use \ to turn off special chars */ 294 295 defchar: 296 default: 297 lastep = ep; 298 *ep++ = CCHR; 299 *ep++ = (char)c; 300 } 301 } 302 /*NOTREACHED*/ 303 } 304 305 int 306 step(const char *p1, const char *p2) 307 { 308 char c; 309 310 311 if (circf) { 312 loc1 = (char *)p1; 313 return (advance(p1, p2)); 314 } 315 /* fast check for first character */ 316 if (*p2 == CCHR) { 317 c = p2[1]; 318 do { 319 if (*p1 != c) 320 continue; 321 if (advance(p1, p2)) { 322 loc1 = (char *)p1; 323 return (1); 324 } 325 } while (*p1++); 326 return (0); 327 } 328 /* regular algorithm */ 329 do { 330 if (advance(p1, p2)) { 331 loc1 = (char *)p1; 332 return (1); 333 } 334 } while (*p1++); 335 return (0); 336 } 337 338 int 339 advance(const char *lp, const char *ep) 340 { 341 const char *curlp; 342 int c; 343 char *bbeg; 344 register char neg; 345 size_t ct; 346 347 for (;;) { 348 neg = 0; 349 switch (*ep++) { 350 351 case CCHR: 352 if (*ep++ == *lp++) 353 continue; 354 return (0); 355 /*FALLTHRU*/ 356 357 case CDOT: 358 if (*lp++) 359 continue; 360 return (0); 361 /*FALLTHRU*/ 362 363 case CDOL: 364 if (*lp == 0) 365 continue; 366 return (0); 367 /*FALLTHRU*/ 368 369 case CCEOF: 370 loc2 = (char *)lp; 371 return (1); 372 /*FALLTHRU*/ 373 374 case CXCL: 375 c = (unsigned char)*lp++; 376 if (ISTHERE(c)) { 377 ep += 32; 378 continue; 379 } 380 return (0); 381 /*FALLTHRU*/ 382 383 case NCCL: 384 neg = 1; 385 /*FALLTHRU*/ 386 387 case CCL: 388 c = *lp++; 389 if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) { 390 ep += 16; 391 continue; 392 } 393 return (0); 394 /*FALLTHRU*/ 395 396 case CBRA: 397 braslist[*ep++] = (char *)lp; 398 continue; 399 /*FALLTHRU*/ 400 401 case CKET: 402 braelist[*ep++] = (char *)lp; 403 continue; 404 /*FALLTHRU*/ 405 406 case CCHR | RNGE: 407 c = *ep++; 408 getrnge(ep); 409 while (low--) 410 if (*lp++ != c) 411 return (0); 412 curlp = lp; 413 while (size--) 414 if (*lp++ != c) 415 break; 416 if (size < 0) 417 lp++; 418 ep += 2; 419 goto star; 420 /*FALLTHRU*/ 421 422 case CDOT | RNGE: 423 getrnge(ep); 424 while (low--) 425 if (*lp++ == '\0') 426 return (0); 427 curlp = lp; 428 while (size--) 429 if (*lp++ == '\0') 430 break; 431 if (size < 0) 432 lp++; 433 ep += 2; 434 goto star; 435 /*FALLTHRU*/ 436 437 case CXCL | RNGE: 438 getrnge(ep + 32); 439 while (low--) { 440 c = (unsigned char)*lp++; 441 if (!ISTHERE(c)) 442 return (0); 443 } 444 curlp = lp; 445 while (size--) { 446 c = (unsigned char)*lp++; 447 if (!ISTHERE(c)) 448 break; 449 } 450 if (size < 0) 451 lp++; 452 ep += 34; /* 32 + 2 */ 453 goto star; 454 /*FALLTHRU*/ 455 456 case NCCL | RNGE: 457 neg = 1; 458 /*FALLTHRU*/ 459 460 case CCL | RNGE: 461 getrnge(ep + 16); 462 while (low--) { 463 c = *lp++; 464 if (((c & 0200) || !ISTHERE(c)) ^ neg) 465 return (0); 466 } 467 curlp = lp; 468 while (size--) { 469 c = *lp++; 470 if (((c & 0200) || !ISTHERE(c)) ^ neg) 471 break; 472 } 473 if (size < 0) 474 lp++; 475 ep += 18; /* 16 + 2 */ 476 goto star; 477 /*FALLTHRU*/ 478 479 case CBACK: 480 bbeg = braslist[*ep]; 481 ct = braelist[*ep++] - bbeg; 482 483 if (ecmp(bbeg, lp, ct)) { 484 lp += ct; 485 continue; 486 } 487 return (0); 488 /*FALLTHRU*/ 489 490 case CBACK | STAR: 491 bbeg = braslist[*ep]; 492 ct = braelist[*ep++] - bbeg; 493 curlp = lp; 494 while (ecmp(bbeg, lp, ct)) 495 lp += ct; 496 497 while (lp >= curlp) { 498 if (advance(lp, ep)) 499 return (1); 500 lp -= ct; 501 } 502 return (0); 503 /*FALLTHRU*/ 504 505 case CDOT | STAR: 506 curlp = lp; 507 while (*lp++); 508 goto star; 509 /*FALLTHRU*/ 510 511 case CCHR | STAR: 512 curlp = lp; 513 while (*lp++ == *ep); 514 ep++; 515 goto star; 516 /*FALLTHRU*/ 517 518 case CXCL | STAR: 519 curlp = lp; 520 do { 521 c = (unsigned char)*lp++; 522 } while (ISTHERE(c)); 523 ep += 32; 524 goto star; 525 /*FALLTHRU*/ 526 527 case NCCL | STAR: 528 neg = 1; 529 /*FALLTHRU*/ 530 531 case CCL | STAR: 532 curlp = lp; 533 do { 534 c = *lp++; 535 } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg); 536 ep += 16; 537 goto star; 538 /*FALLTHRU*/ 539 540 star: 541 do { 542 if (--lp == locs) 543 break; 544 if (advance(lp, ep)) 545 return (1); 546 } while (lp > curlp); 547 return (0); 548 549 } 550 } 551 /*NOTREACHED*/ 552 } 553 554 static void 555 getrnge(const char *str) 556 { 557 low = *str++ & 0377; 558 size = ((*str & 0377) == 255)? 20000: (*str &0377) - low; 559 } 560 561 #ifdef __cplusplus 562 } 563 #endif 564 565 #endif /* _REGEXP_H */