1 %{ 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License, Version 1.0 only 7 * (the "License"). You may not use this file except in compliance 8 * with the License. 9 * 10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 11 * or http://www.opensolaris.org/os/licensing. 12 * See the License for the specific language governing permissions 13 * and limitations under the License. 14 * 15 * When distributing Covered Code, include this CDDL HEADER in each 16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 17 * If applicable, add the following below this CDDL HEADER, with the 18 * fields enclosed by brackets "[]" replaced with your own identifying 19 * information: Portions Copyright [yyyy] [name of copyright owner] 20 * 21 * CDDL HEADER END 22 */ 23 %} 24 /* 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 32 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 33 /* All Rights Reserved */ 34 35 /* 36 * Copyright 2013 Damian Bogel. All rights reserved. 37 */ 38 39 /* 40 * egrep -- print lines containing (or not containing) a regular expression 41 * 42 * status returns: 43 * 0 - ok, and some matches 44 * 1 - ok, but no matches 45 * 2 - some error; matches irrelevant 46 */ 47 %token CHAR MCHAR DOT MDOT CCL NCCL MCCL NMCCL OR CAT STAR PLUS QUEST 48 %left OR 49 %left CHAR MCHAR DOT CCL NCCL MCCL NMCCL '(' 50 %left CAT 51 %left STAR PLUS QUEST 52 53 %{ 54 #include <stdio.h> 55 #include <ctype.h> 56 #include <memory.h> 57 #include <wchar.h> 58 #include <wctype.h> 59 #include <widec.h> 60 #include <stdlib.h> 61 #include <limits.h> 62 #include <locale.h> 63 64 #define STDIN_FILENAME gettext("(standard input)") 65 66 #define BLKSIZE 512 /* size of reported disk blocks */ 67 #define EBUFSIZ 8192 68 #define MAXLIN 350 69 #define NCHARS 256 70 #define MAXPOS 4000 71 #define NSTATES 64 72 #define FINAL -1 73 #define RIGHT '\n' /* serves as record separator and as $ */ 74 #define LEFT '\n' /* beginning of line */ 75 int gotofn[NSTATES][NCHARS]; 76 int state[NSTATES]; 77 int out[NSTATES]; 78 int line = 1; 79 int *name; 80 int *left; 81 int *right; 82 int *parent; 83 int *foll; 84 int *positions; 85 char *chars; 86 wchar_t *lower; 87 wchar_t *upper; 88 int maxlin, maxclin, maxwclin, maxpos; 89 int nxtpos = 0; 90 int inxtpos; 91 int nxtchar = 0; 92 int *tmpstat; 93 int *initstat; 94 int istat; 95 int nstate = 1; 96 int xstate; 97 int count; 98 int icount; 99 char *input; 100 101 102 wchar_t lyylval; 103 wchar_t nextch(); 104 wchar_t maxmin(); 105 int compare(); 106 void overflo(); 107 108 char reinit = 0; 109 110 long long lnum; 111 int bflag; 112 int cflag; 113 int eflag; 114 int fflag; 115 int Hflag; 116 int hflag; 117 int iflag; 118 int lflag; 119 int nflag; 120 int qflag; 121 int vflag; 122 int nfile; 123 long long blkno; 124 long long tln; 125 int nsucc; 126 int badbotch; 127 extern char *optarg; 128 extern int optind; 129 130 int f; 131 FILE *expfile; 132 %} 133 134 %% 135 s: t 136 { 137 unary(FINAL, $1); 138 line--; 139 } 140 ; 141 t: b r 142 { $$ = node(CAT, $1, $2); } 143 | OR b r OR 144 { $$ = node(CAT, $2, $3); } 145 | OR b r 146 { $$ = node(CAT, $2, $3); } 147 | b r OR 148 { $$ = node(CAT, $1, $2); } 149 ; 150 b: 151 { /* if(multibyte) 152 $$ = mdotenter(); 153 else */ 154 $$ = enter(DOT); 155 $$ = unary(STAR, $$); 156 } 157 ; 158 r: CHAR 159 { $$ = iflag && isalpha($1) ? 160 node(OR, enter(tolower($1)), enter(toupper($1))) : enter($1); } 161 | MCHAR 162 { $$ = (iflag && iswalpha(lyylval)) ? 163 node(OR, mchar(towlower(lyylval)), mchar(towupper(lyylval))) : 164 mchar(lyylval); } 165 | DOT 166 { if(multibyte) 167 $$ = mdotenter(); 168 else 169 $$ = enter(DOT); 170 } 171 | CCL 172 { $$ = cclenter(CCL); } 173 | NCCL 174 { $$ = cclenter(NCCL); } 175 | MCCL 176 { $$ = ccl(CCL); } 177 | NMCCL 178 { $$ = ccl(NCCL); } 179 ; 180 181 r: r OR r 182 { $$ = node(OR, $1, $3); } 183 | r r %prec CAT 184 { $$ = node(CAT, $1, $2); } 185 | r STAR 186 { $$ = unary(STAR, $1); } 187 | r PLUS 188 { $$ = unary(PLUS, $1); } 189 | r QUEST 190 { $$ = unary(QUEST, $1); } 191 | '(' r ')' 192 { $$ = $2; } 193 | error 194 ; 195 196 %% 197 void add(int *, int); 198 void clearg(void); 199 void execute(char *); 200 void follow(int); 201 int mgetc(void); 202 void synerror(void); 203 204 205 void 206 yyerror(char *s) 207 { 208 fprintf(stderr, "egrep: %s\n", s); 209 exit(2); 210 } 211 212 int 213 yylex(void) 214 { 215 extern int yylval; 216 int cclcnt, x, ccount, oldccount; 217 wchar_t c, lc; 218 219 c = nextch(); 220 switch(c) { 221 case '^': 222 yylval = LEFT; 223 return(CHAR); 224 case '$': 225 c = RIGHT; 226 goto defchar; 227 case '|': return (OR); 228 case '*': return (STAR); 229 case '+': return (PLUS); 230 case '?': return (QUEST); 231 case '(': return (c); 232 case ')': return (c); 233 case '.': return(DOT); 234 case '\0': return (0); 235 case RIGHT: return (OR); 236 case '[': 237 x = (multibyte ? MCCL : CCL); 238 cclcnt = 0; 239 count = nxtchar++; 240 if ((c = nextch()) == '^') { 241 x = (multibyte ? NMCCL : NCCL); 242 c = nextch(); 243 } 244 lc = 0; 245 do { 246 if (iflag && iswalpha(c)) 247 c = towlower(c); 248 if (c == '\0') synerror(); 249 if (c == '-' && cclcnt > 0 && lc != 0) { 250 if ((c = nextch()) != 0) { 251 if(c == ']') { 252 chars[nxtchar++] = '-'; 253 cclcnt++; 254 break; 255 } 256 if (iflag && iswalpha(c)) 257 c = towlower(c); 258 if (!multibyte || 259 (c & WCHAR_CSMASK) == (lc & WCHAR_CSMASK) && 260 lc < c && 261 !iswcntrl(c) && !iswcntrl(lc)) { 262 if (nxtchar >= maxclin) 263 if (allocchars() == 0) 264 overflo(); 265 chars[nxtchar++] = '-'; 266 cclcnt++; 267 } 268 } 269 } 270 ccount = oldccount = nxtchar; 271 if(ccount + MB_LEN_MAX >= maxclin) 272 if(allocchars() == 0) 273 overflo(); 274 ccount += wctomb(&chars[ccount], c); 275 cclcnt += ccount - oldccount; 276 nxtchar += ccount - oldccount; 277 lc = c; 278 } while ((c = nextch()) != ']'); 279 chars[count] = cclcnt; 280 return(x); 281 282 case '\\': 283 if ((c = nextch()) == '\0') synerror(); 284 defchar: 285 default: 286 if (c <= 0177) { 287 yylval = c; 288 return (CHAR); 289 } else { 290 lyylval = c; 291 return (MCHAR); 292 } 293 } 294 } 295 296 wchar_t 297 nextch(void) 298 { 299 wchar_t lc; 300 char multic[MB_LEN_MAX]; 301 int length, d; 302 if (fflag) { 303 if ((length = _mbftowc(multic, &lc, mgetc, &d)) < 0) 304 synerror(); 305 if(length == 0) 306 lc = '\0'; 307 } 308 else { 309 if((length = mbtowc(&lc, input, MB_LEN_MAX)) == -1) 310 synerror(); 311 if(length == 0) 312 return(0); 313 input += length; 314 } 315 return(lc); 316 } 317 318 int 319 mgetc(void) 320 { 321 return(getc(expfile)); 322 } 323 324 void 325 synerror(void) 326 { 327 fprintf(stderr, gettext("egrep: syntax error\n")); 328 exit(2); 329 } 330 331 int 332 enter(int x) 333 { 334 if(line >= maxlin) 335 if(alloctree() == 0) 336 overflo(); 337 name[line] = x; 338 left[line] = 0; 339 right[line] = 0; 340 return(line++); 341 } 342 343 int 344 cclenter(int x) 345 { 346 int linno; 347 linno = enter(x); 348 right[linno] = count; 349 return (linno); 350 } 351 352 int 353 node(int x, int l, int r) 354 { 355 if(line >= maxlin) 356 if(alloctree() == 0) 357 overflo(); 358 name[line] = x; 359 left[line] = l; 360 right[line] = r; 361 parent[l] = line; 362 parent[r] = line; 363 return(line++); 364 } 365 366 int 367 unary(int x, int d) 368 { 369 if(line >= maxlin) 370 if(alloctree() == 0) 371 overflo(); 372 name[line] = x; 373 left[line] = d; 374 right[line] = 0; 375 parent[d] = line; 376 return(line++); 377 } 378 379 int 380 allocchars(void) 381 { 382 maxclin += MAXLIN; 383 if((chars = realloc(chars, maxclin)) == (char *)0) 384 return 0; 385 return 1; 386 } 387 388 int 389 alloctree(void) 390 { 391 maxlin += MAXLIN; 392 if((name = (int *)realloc(name, maxlin*sizeof(int))) == (int *)0) 393 return 0; 394 if((left = (int *)realloc(left, maxlin*sizeof(int))) == (int *)0) 395 return 0; 396 if((right = (int *)realloc(right, maxlin*sizeof(int))) == (int *)0) 397 return 0; 398 if((parent = (int *)realloc(parent, maxlin*sizeof(int))) == (int *)0) 399 return 0; 400 if((foll = (int *)realloc(foll, maxlin*sizeof(int))) == (int *)0) 401 return 0; 402 if((tmpstat = (int *)realloc(tmpstat, maxlin*sizeof(int))) == (int *)0) 403 return 0; 404 if((initstat = (int *)realloc(initstat, maxlin*sizeof(int))) == (int *)0) 405 return 0; 406 return 1; 407 } 408 409 void 410 overflo(void) 411 { 412 fprintf(stderr, gettext("egrep: regular expression too long\n")); 413 exit(2); 414 } 415 416 void 417 cfoll(int v) 418 { 419 int i; 420 if (left[v] == 0) { 421 count = 0; 422 for (i=1; i<=line; i++) tmpstat[i] = 0; 423 follow(v); 424 add(foll, v); 425 } 426 else if (right[v] == 0) cfoll(left[v]); 427 else { 428 cfoll(left[v]); 429 cfoll(right[v]); 430 } 431 } 432 433 void 434 cgotofn(void) 435 { 436 int i; 437 count = 0; 438 inxtpos = nxtpos; 439 for (i=3; i<=line; i++) tmpstat[i] = 0; 440 if (cstate(line-1)==0) { 441 tmpstat[line] = 1; 442 count++; 443 out[1] = 1; 444 } 445 for (i=3; i<=line; i++) initstat[i] = tmpstat[i]; 446 count--; /*leave out position 1 */ 447 icount = count; 448 tmpstat[1] = 0; 449 add(state, 1); 450 istat = nxtst(1, LEFT); 451 } 452 453 int 454 nxtst(int s, int c) 455 { 456 int i, num, k; 457 int pos, curpos, number, newpos; 458 num = positions[state[s]]; 459 count = icount; 460 for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 461 pos = state[s] + 1; 462 for (i=0; i<num; i++) { 463 curpos = positions[pos]; 464 k = name[curpos]; 465 if (k >= 0) 466 if ( 467 (k == c) 468 || (k == DOT && dot(c)) 469 || (k == MDOT && mdot(c)) 470 || (k == CCL && dot(c) && member(c, right[curpos], 1)) 471 || (k == NCCL && dot(c) && member(c, right[curpos], 0)) 472 || (k == MCCL && mdot(c) && member(c, right[curpos], 1)) 473 ) { 474 number = positions[foll[curpos]]; 475 newpos = foll[curpos] + 1; 476 for (k=0; k<number; k++) { 477 if (tmpstat[positions[newpos]] != 1) { 478 tmpstat[positions[newpos]] = 1; 479 count++; 480 } 481 newpos++; 482 } 483 } 484 pos++; 485 } 486 if (notin(nstate)) { 487 if (++nstate >= NSTATES) { 488 for (i=1; i<NSTATES; i++) 489 out[i] = 0; 490 for (i=1; i<NSTATES; i++) 491 for (k=0; k<NCHARS; k++) 492 gotofn[i][k] = 0; 493 nstate = 1; 494 nxtpos = inxtpos; 495 reinit = 1; 496 add(state, nstate); 497 if (tmpstat[line] == 1) out[nstate] = 1; 498 return nstate; 499 } 500 add(state, nstate); 501 if (tmpstat[line] == 1) out[nstate] = 1; 502 gotofn[s][c] = nstate; 503 return nstate; 504 } 505 else { 506 gotofn[s][c] = xstate; 507 return xstate; 508 } 509 } 510 511 512 int 513 cstate(int v) 514 { 515 int b; 516 if (left[v] == 0) { 517 if (tmpstat[v] != 1) { 518 tmpstat[v] = 1; 519 count++; 520 } 521 return(1); 522 } 523 else if (right[v] == 0) { 524 if (cstate(left[v]) == 0) return (0); 525 else if (name[v] == PLUS) return (1); 526 else return (0); 527 } 528 else if (name[v] == CAT) { 529 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 530 else return (1); 531 } 532 else { /* name[v] == OR */ 533 b = cstate(right[v]); 534 if (cstate(left[v]) == 0 || b == 0) return (0); 535 else return (1); 536 } 537 } 538 539 540 int 541 dot(int c) 542 { 543 if(multibyte && c >= 0200 && (!iscntrl(c) || c == SS2 && eucw2 || c == SS3 && eucw3)) 544 return(0); 545 if(c == RIGHT || c == LEFT) 546 return(0); 547 return(1); 548 } 549 550 int 551 mdot(int c) 552 { 553 if(c >= 0200 && !iscntrl(c)) 554 return(1); 555 return(0); 556 } 557 558 int 559 member(int symb, int set, int torf) 560 { 561 int i, num, pos, c, lc; 562 if(symb == RIGHT || symb == LEFT) 563 return(0); 564 num = chars[set]; 565 pos = set + 1; 566 lc = 0; 567 if(iflag) 568 symb = tolower(symb); 569 for (i=0; i<num; i++) { 570 c = (unsigned char)chars[pos++]; 571 if(c == '-' && lc != 0 && ++i < num) { 572 c = (unsigned char)chars[pos++]; 573 if(lc <= symb && symb <= c) 574 return(torf); 575 } 576 if (symb == c) 577 return (torf); 578 lc = c; 579 } 580 return(!torf); 581 } 582 583 int 584 notin(int n) 585 { 586 int i, j, pos; 587 for (i=1; i<=n; i++) { 588 if (positions[state[i]] == count) { 589 pos = state[i] + 1; 590 for (j=0; j < count; j++) 591 if (tmpstat[positions[pos++]] != 1) goto nxt; 592 xstate = i; 593 return (0); 594 } 595 nxt: ; 596 } 597 return (1); 598 } 599 600 void 601 add(int *array, int n) 602 { 603 int i; 604 if (nxtpos + count >= maxpos) { 605 maxpos += MAXPOS + count; 606 if((positions = (int *)realloc(positions, maxpos *sizeof(int))) == (int *)0) 607 overflo(); 608 } 609 array[n] = nxtpos; 610 positions[nxtpos++] = count; 611 for (i=3; i <= line; i++) { 612 if (tmpstat[i] == 1) { 613 positions[nxtpos++] = i; 614 } 615 } 616 } 617 618 void 619 follow(int v) 620 { 621 int p; 622 if (v == line) return; 623 p = parent[v]; 624 switch(name[p]) { 625 case STAR: 626 case PLUS: cstate(v); 627 follow(p); 628 return; 629 630 case OR: 631 case QUEST: follow(p); 632 return; 633 634 case CAT: if (v == left[p]) { 635 if (cstate(right[p]) == 0) { 636 follow(p); 637 return; 638 } 639 } 640 else follow(p); 641 return; 642 case FINAL: if (tmpstat[line] != 1) { 643 tmpstat[line] = 1; 644 count++; 645 } 646 return; 647 } 648 } 649 650 #define USAGE "[ -bchilnsv ] [ -e exp ] [ -f file ] [ strings ] [ file ] ..." 651 652 int 653 main(int argc, char **argv) 654 { 655 char c; 656 char nl = '\n'; 657 int errflag = 0; 658 659 (void)setlocale(LC_ALL, ""); 660 661 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 662 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't. */ 663 #endif 664 (void) textdomain(TEXT_DOMAIN); 665 666 while((c = getopt(argc, argv, "ybcie:f:Hhlnvs")) != -1) 667 switch(c) { 668 669 case 'b': 670 bflag++; 671 continue; 672 673 case 'c': 674 cflag++; 675 continue; 676 677 case 'e': 678 eflag++; 679 input = optarg; 680 continue; 681 682 case 'f': 683 fflag++; 684 expfile = fopen(optarg, "r"); 685 if(expfile == NULL) { 686 fprintf(stderr, 687 gettext("egrep: can't open %s\n"), optarg); 688 exit(2); 689 } 690 continue; 691 692 case 'H': 693 if (!lflag) /* H is excluded by l as in GNU grep */ 694 Hflag++; 695 hflag = 0; /* H excludes h */ 696 continue; 697 698 case 'h': 699 hflag++; 700 Hflag = 0; /* h excludes H */ 701 continue; 702 703 case 'y': 704 case 'i': 705 iflag++; 706 continue; 707 708 case 'l': 709 lflag++; 710 Hflag = 0; /* l excludes H */ 711 continue; 712 713 case 'n': 714 nflag++; 715 continue; 716 717 case 'q': 718 case 's': /* Solaris: legacy option */ 719 qflag++; 720 continue; 721 722 case 'v': 723 vflag++; 724 continue; 725 726 case '?': 727 errflag++; 728 } 729 if (errflag || ((argc <= 0) && !fflag && !eflag)) { 730 fprintf(stderr, gettext("usage: egrep %s\n"), gettext(USAGE)); 731 exit(2); 732 } 733 if(!eflag && !fflag) { 734 input = argv[optind]; 735 optind++; 736 } 737 738 argc -= optind; 739 argv = &argv[optind]; 740 741 /* allocate initial space for arrays */ 742 if((name = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 743 overflo(); 744 if((left = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 745 overflo(); 746 if((right = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 747 overflo(); 748 if((parent = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 749 overflo(); 750 if((foll = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 751 overflo(); 752 if((tmpstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 753 overflo(); 754 if((initstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0) 755 overflo(); 756 if((chars = (char *)malloc(MAXLIN)) == (char *)0) 757 overflo(); 758 if((lower = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0) 759 overflo(); 760 if((upper = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0) 761 overflo(); 762 if((positions = (int *)malloc(MAXPOS*sizeof(int))) == (int *)0) 763 overflo(); 764 maxlin = MAXLIN; 765 maxclin = MAXLIN; 766 maxwclin = MAXLIN; 767 maxpos = MAXPOS; 768 769 yyparse(); 770 771 cfoll(line-1); 772 cgotofn(); 773 nfile = argc; 774 if (argc<=0) { 775 execute(0); 776 } 777 else while (--argc >= 0) { 778 if (reinit == 1) clearg(); 779 execute(*argv++); 780 } 781 return (badbotch ? 2 : nsucc==0); 782 } 783 784 void 785 execute(char *file) 786 { 787 char *p; 788 int cstat; 789 wchar_t c; 790 int t; 791 long count; 792 long count1, count2; 793 long nchars; 794 int succ; 795 char *ptr, *ptrend, *lastptr; 796 char *buf; 797 long lBufSiz; 798 FILE *f; 799 int nlflag; 800 801 lBufSiz = EBUFSIZ; 802 if ((buf = malloc (lBufSiz + EBUFSIZ)) == NULL) { 803 exit (2); /* out of memory - BAIL */ 804 } 805 806 if (file) { 807 if ((f = fopen(file, "r")) == NULL) { 808 fprintf(stderr, 809 gettext("egrep: can't open %s\n"), file); 810 badbotch=1; 811 return; 812 } 813 } else { 814 f = stdin; 815 file = STDIN_FILENAME; 816 } 817 lnum = 1; 818 tln = 0; 819 if((count = read(fileno(f), buf, EBUFSIZ)) <= 0) { 820 fclose(f); 821 822 if (cflag && !qflag) { 823 if (Hflag || (nfile > 1 && !hflag)) 824 fprintf(stdout, "%s:", file); 825 fprintf(stdout, "%lld\n", tln); 826 } 827 return; 828 } 829 830 blkno = count; 831 ptr = buf; 832 for(;;) { 833 if((ptrend = memchr(ptr, '\n', buf + count - ptr)) == NULL) { 834 /* 835 move the unused partial record to the head of the buffer 836 */ 837 if (ptr > buf) { 838 count = buf + count - ptr; 839 memmove (buf, ptr, count); 840 ptr = buf; 841 } 842 843 /* 844 Get a bigger buffer if this one is full 845 */ 846 if(count > lBufSiz) { 847 /* 848 expand the buffer 849 */ 850 lBufSiz += EBUFSIZ; 851 if ((buf = realloc (buf, lBufSiz + EBUFSIZ)) == NULL) { 852 exit (2); /* out of memory - BAIL */ 853 } 854 855 ptr = buf; 856 } 857 858 p = buf + count; 859 if((count1 = read(fileno(f), p, EBUFSIZ)) > 0) { 860 count += count1; 861 blkno += count1; 862 continue; 863 } 864 ptrend = ptr + count; 865 nlflag = 0; 866 } else 867 nlflag = 1; 868 *ptrend = '\n'; 869 p = ptr; 870 lastptr = ptr; 871 cstat = istat; 872 succ = 0; 873 for(;;) { 874 if(out[cstat]) { 875 if(multibyte && p > ptr) { 876 wchar_t wchar; 877 int length; 878 char *endptr = p; 879 p = lastptr; 880 while(p < endptr) { 881 length = mbtowc(&wchar, p, MB_LEN_MAX); 882 if(length <= 1) 883 p++; 884 else 885 p += length; 886 } 887 if(p == endptr) { 888 succ = !vflag; 889 break; 890 } 891 cstat = 1; 892 length = mbtowc(&wchar, lastptr, MB_LEN_MAX); 893 if(length <= 1) 894 lastptr++; 895 else 896 lastptr += length; 897 p = lastptr; 898 continue; 899 } 900 succ = !vflag; 901 break; 902 } 903 c = (unsigned char)*p++; 904 if ((t = gotofn[cstat][c]) == 0) 905 cstat = nxtst(cstat, c); 906 else 907 cstat = t; 908 if(c == RIGHT) { 909 if(out[cstat]) { 910 succ = !vflag; 911 break; 912 } 913 succ = vflag; 914 break; 915 } 916 } 917 if (succ) { 918 nsucc = 1; 919 if (lflag || qflag) { 920 if (!qflag) 921 (void) printf("%s\n", file); 922 fclose(f); 923 return; 924 } 925 if (cflag) { 926 tln++; 927 } else { 928 if (Hflag || (nfile > 1 && !hflag)) 929 printf("%s:", file); 930 if (bflag) { 931 nchars = blkno - (buf + count - ptrend) - 2; 932 if(nlflag) 933 nchars++; 934 printf("%lld:", nchars/BLKSIZE); 935 } 936 if (nflag) 937 printf("%lld:", lnum); 938 if(nlflag) 939 nchars = ptrend - ptr + 1; 940 else 941 nchars = ptrend - ptr; 942 fwrite(ptr, (size_t)1, (size_t)nchars, stdout); 943 } 944 } 945 if(!nlflag) 946 break; 947 ptr = ptrend + 1; 948 if(ptr >= buf + count) { 949 ptr = buf; 950 if((count = read(fileno(f), buf, EBUFSIZ)) <= 0) 951 break; 952 blkno += count; 953 } 954 lnum++; 955 if (reinit == 1) 956 clearg(); 957 } 958 fclose(f); 959 if (cflag && !qflag) { 960 if (Hflag || (nfile > 1 && !hflag)) 961 printf("%s:", file); 962 printf("%lld\n", tln); 963 } 964 } 965 966 void 967 clearg(void) 968 { 969 int i, k; 970 for (i=1; i<=nstate; i++) 971 out[i] = 0; 972 for (i=1; i<=nstate; i++) 973 for (k=0; k<NCHARS; k++) 974 gotofn[i][k] = 0; 975 nstate = 1; 976 nxtpos = inxtpos; 977 reinit = 0; 978 count = 0; 979 for (i=3; i<=line; i++) tmpstat[i] = 0; 980 if (cstate(line-1)==0) { 981 tmpstat[line] = 1; 982 count++; 983 out[1] = 1; 984 } 985 for (i=3; i<=line; i++) initstat[i] = tmpstat[i]; 986 count--; /*leave out position 1 */ 987 icount = count; 988 tmpstat[1] = 0; 989 add(state, 1); 990 istat = nxtst(1, LEFT); 991 } 992 993 int 994 mdotenter(void) 995 { 996 int i, x1, x2; 997 x1 = enter(DOT); 998 x2 = enter(MDOT); 999 for(i = 1; i < (int) eucw1; i++) 1000 x2 = node(CAT, x2, enter(MDOT)); 1001 x1 = node(OR, x1, x2); 1002 if(eucw2) { 1003 x2 = enter('\216'); 1004 for(i = 1; i <= (int) eucw2; i++) 1005 x2 = node(CAT, x2, enter(MDOT)); 1006 x1 = node(OR, x1, x2); 1007 } 1008 if(eucw3) { 1009 x2 = enter('\217'); 1010 for(i = 1; i <= (int) eucw3; i++) 1011 x2 = node(CAT, x2, enter(MDOT)); 1012 x1 = node(OR, x1, x2); 1013 } 1014 return(x1); 1015 } 1016 1017 int 1018 mchar(wchar_t c) 1019 { 1020 char multichar[MB_LEN_MAX+1]; 1021 char *p; 1022 int x1, lc, length; 1023 1024 length = wctomb(multichar, c); 1025 p = multichar; 1026 *(p + length) = '\0'; 1027 x1 = enter((unsigned char)*p++); 1028 while(lc = (unsigned char)*p++) 1029 x1 = node(CAT, x1, enter(lc)); 1030 return(x1); 1031 } 1032 1033 int 1034 ccl(int type) 1035 { 1036 wchar_t c, lc; 1037 char multic1[MB_LEN_MAX]; 1038 char multic2[MB_LEN_MAX]; 1039 int x1, x2, length, current, last, cclcnt; 1040 x2 = 0; 1041 current = 0; 1042 last = genrange(type); 1043 nxtchar = count + 1; 1044 cclcnt = 0; 1045 /* create usual character class for single byte characters */ 1046 while(current <= last && (isascii(c = lower[current]) || c <= 0377 && iscntrl(c))) { 1047 cclcnt++; 1048 chars[nxtchar++] = c; 1049 if(lower[current] != upper[current]) { 1050 chars[nxtchar++] = '-'; 1051 chars[nxtchar++] = upper[current]; 1052 cclcnt += 2; 1053 } 1054 current++; 1055 } 1056 1057 if(cclcnt) 1058 chars[count] = cclcnt; 1059 else 1060 nxtchar = count; 1061 if(current > 0) 1062 /* single byte part of character class */ 1063 x2 = cclenter(type); 1064 else if(type == NCCL) 1065 /* all single byte characters match */ 1066 x2 = enter(DOT); 1067 while(current <= last) { 1068 if(upper[current] == lower[current]) 1069 x1 = mchar(lower[current]); 1070 else { 1071 length = wctomb(multic1, lower[current]); 1072 wctomb(multic2, upper[current]); 1073 x1 = range((unsigned char *)multic1, 1074 (unsigned char *)multic2, length); 1075 } 1076 if(x2) 1077 x2 = node(OR, x2, x1); 1078 else 1079 x2 = x1; 1080 current++; 1081 } 1082 return x2; 1083 } 1084 1085 int 1086 range(unsigned char *p1, unsigned char *p2, int length) 1087 { 1088 char multic[MB_LEN_MAX+1]; 1089 char *p; 1090 int i, x1, x2; 1091 if(length == 1) 1092 return(classenter(*p1, *p2)); 1093 if(p1[0] == p2[0]) 1094 return(node(CAT, enter(p1[0]), range(p1+1, p2+1, length - 1))); 1095 p = multic; 1096 for(i = 1; i < length; i++) 1097 *p++ = 0377; 1098 x1 = node(CAT, enter(p1[0]), 1099 range(p1+1, (unsigned char *)multic, length - 1)); 1100 if((unsigned char)(p1[0] + 1) < p2[0]) { 1101 x2 = classenter(p1[0] + 1, p2[0] - 1); 1102 for(i = 1; i < length; i++) 1103 x2 = node(CAT, x2, enter(MDOT)); 1104 x1 = node(OR, x1, x2); 1105 } 1106 p = multic; 1107 for(i = 1; i < length; i++) 1108 *p++ = 0200; 1109 x2 = node(CAT, enter(p2[0]), 1110 range((unsigned char *)multic, p2+1, length - 1)); 1111 return node(OR, x1, x2); 1112 } 1113 1114 int 1115 classenter(int x1, int x2) 1116 { 1117 static int max, min; 1118 if(!max) { 1119 int i; 1120 for(i = 0200; i <= 0377; i++) 1121 if(!iscntrl(i)) 1122 break; 1123 min = i; 1124 for(i = 0377; i >= 0200; i--) 1125 if(!iscntrl(i)) 1126 break; 1127 max = i; 1128 } 1129 if(x1 <= min && x2 >= max) 1130 return enter(MDOT); 1131 if(nxtchar + 4 >= maxclin) 1132 if(allocchars() == 0) 1133 overflo(); 1134 count = nxtchar++; 1135 chars[nxtchar++] = x1; 1136 chars[nxtchar++] = '-'; 1137 chars[nxtchar++] = x2; 1138 chars[count] = 3; 1139 return cclenter(MCCL); 1140 } 1141 1142 int 1143 genrange(int type) 1144 { 1145 char *p, *endp; 1146 int current, nel, i, last, length; 1147 wchar_t c, lc; 1148 1149 current = 0; 1150 p = &chars[count+1]; 1151 endp = &chars[count+1] + chars[count]; 1152 lc = 0; 1153 1154 /* convert character class into union of ranges */ 1155 while(p < endp) { 1156 length = mbtowc(&c, p, MB_LEN_MAX); 1157 p += length; 1158 if(c == '-' && lc != 0) { 1159 length = mbtowc(&c, p, MB_LEN_MAX); 1160 upper[current-1] = c; 1161 p += length; 1162 } else { 1163 lower[current] = c; 1164 upper[current++] = c; 1165 } 1166 lc = c; 1167 } 1168 nel = current; 1169 /* sort lower and upper bounds of ranges */ 1170 qsort((char *)lower, nel, sizeof(wchar_t), compare); 1171 qsort((char *)upper, nel, sizeof(wchar_t), compare); 1172 last = current - 1; 1173 current = 0; 1174 /* combine overlapping or adjacent ranges */ 1175 for(i = 0; i < last; i++) 1176 if(upper[i] >= lower[i+1] - 1) 1177 upper[current] = upper[i+1]; 1178 else { 1179 lower[++current] = lower[i+1]; 1180 upper[current] = upper[i+1]; 1181 } 1182 if(type == NCCL) { 1183 /* find complement of character class */ 1184 int j, next; 1185 i = 0; 1186 while(i <= current && isascii(c=lower[i]) || c <= 0377 && iscntrl(c)) 1187 i++; 1188 if(i > current) { 1189 /* match all multibyte characters */ 1190 if(eucw2) { 1191 lower[i] = maxmin(WCHAR_CS2, 0); 1192 upper[i++] = maxmin(WCHAR_CS2, 1); 1193 } 1194 if(eucw3) { 1195 lower[i] = maxmin(WCHAR_CS3, 0); 1196 upper[i++] = maxmin(WCHAR_CS3, 1); 1197 } 1198 lower[i] = maxmin(WCHAR_CS1, 0); 1199 upper[i++] = maxmin(WCHAR_CS1, 1); 1200 return i - 1; 1201 } 1202 next = current + 1; 1203 if(next + current + 2 >= maxwclin) { 1204 maxwclin += MAXLIN + next + current + 2; 1205 if((lower = (wchar_t *)realloc(lower, maxwclin *sizeof(wchar_t))) == (wchar_t *)0 || 1206 (upper = (wchar_t *)realloc(upper, maxwclin * sizeof(wchar_t))) == (wchar_t *)0) 1207 overflo(); 1208 } 1209 if(eucw2 && lower[i] > maxmin(WCHAR_CS2, 0)) { 1210 lower[next] = maxmin(WCHAR_CS2, 0); 1211 if((lower[i] & WCHAR_CSMASK) != WCHAR_CS2) { 1212 upper[next++] = maxmin(WCHAR_CS2, 1); 1213 if((lower[i] & WCHAR_CSMASK) == WCHAR_CS1 && eucw3) { 1214 lower[next] = maxmin(WCHAR_CS3, 0); 1215 upper[next++] = maxmin(WCHAR_CS3, 1); 1216 } 1217 if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) { 1218 lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0); 1219 upper[next++] = lower[i] - 1; 1220 } 1221 } else 1222 upper[next++] = lower[i] - 1; 1223 } else if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) { 1224 lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0); 1225 upper[next++] = lower[i] - 1; 1226 } 1227 for(j = i; j < current; j++) { 1228 if(upper[j] < maxmin(upper[j] & WCHAR_CSMASK, 1)) { 1229 lower[next] = upper[j] + 1; 1230 if((upper[j] & WCHAR_CSMASK) != (lower[j+1] & WCHAR_CSMASK)) { 1231 upper[next++] = maxmin(upper[j] & WCHAR_CSMASK, 1); 1232 if(eucw3 && (upper[j] & WCHAR_CSMASK) == WCHAR_CS2 && (lower[j+1] & WCHAR_CSMASK) == WCHAR_CS1) { 1233 lower[next] = maxmin(WCHAR_CS3, 0); 1234 upper[next++] = maxmin(WCHAR_CS3, 1); 1235 } 1236 if(lower[j+1] > maxmin(lower[j+1] & WCHAR_CSMASK, 0)) { 1237 lower[next] = maxmin(lower[j+1] & WCHAR_CSMASK, 0); 1238 upper[next++] = lower[j+1] - 1; 1239 } 1240 } else 1241 upper[next++] = lower[j+1] - 1; 1242 } else if(lower[j+1] > maxmin(lower[j+1], 0)) { 1243 lower[next] = maxmin(lower[j+1], 0); 1244 upper[next++] = lower[j+1] - 1; 1245 } 1246 } 1247 if(upper[current] < maxmin(upper[current] & WCHAR_CSMASK, 1)) { 1248 lower[next] = upper[current] + 1; 1249 upper[next++] = maxmin(upper[current] & WCHAR_CSMASK, 1); 1250 } 1251 if((upper[current] & WCHAR_CSMASK) != WCHAR_CS1) { 1252 if((upper[current] & WCHAR_CSMASK) == WCHAR_CS2 && eucw3) { 1253 lower[next] = maxmin(WCHAR_CS3, 0); 1254 upper[next++] = maxmin(WCHAR_CS3, 1); 1255 } 1256 lower[next] = maxmin(WCHAR_CS1, 0); 1257 upper[next++] = maxmin(WCHAR_CS1, 1); 1258 } 1259 for(j = current + 1; j < next; j++) { 1260 lower[i] = lower[j]; 1261 upper[i++] = upper[j]; 1262 } 1263 current = i - 1; 1264 } 1265 return(current); 1266 } 1267 1268 int 1269 compare(wchar_t *c, wchar_t *d) 1270 { 1271 if(*c < *d) 1272 return -1; 1273 if(*c == *d) 1274 return 0; 1275 return 1; 1276 } 1277 1278 wchar_t 1279 maxmin(wchar_t c, int flag) 1280 { 1281 static wchar_t minmax1[2], minmax2[2], minmax3[2]; 1282 1283 if(!minmax1[0]) { 1284 /* compute min and max process codes for all code sets */ 1285 int length, i; 1286 char multic[MB_LEN_MAX], minmax[2]; 1287 for(i = 0377; i >= 0200; i--) 1288 if(!iscntrl(i)) 1289 break; 1290 minmax[1] = i; 1291 for(i = 0240; i <= 0377; i++) 1292 if(!iscntrl(i)) 1293 break; 1294 minmax[0] = i; 1295 for(i = 0; i <= 1; i++) { 1296 length = MB_LEN_MAX; 1297 while(length--) 1298 multic[length] = minmax[i]; 1299 mbtowc(&minmax1[i], multic, MB_LEN_MAX); 1300 if(eucw2) { 1301 multic[0] = SS2; 1302 mbtowc(&minmax2[i], multic, MB_LEN_MAX); 1303 } 1304 if(eucw3) { 1305 multic[0] = SS3; 1306 mbtowc(&minmax3[i], multic, MB_LEN_MAX); 1307 } 1308 } 1309 } 1310 switch(c) { 1311 case WCHAR_CS1: return minmax1[flag]; 1312 case WCHAR_CS2: return minmax2[flag]; 1313 case WCHAR_CS3: return minmax3[flag]; 1314 } 1315 1316 /* NOTREACHED */ 1317 return (0); 1318 }