1 %{
   2 /*
   3  * CDDL HEADER START
   4  *
   5  * The contents of this file are subject to the terms of the
   6  * Common Development and Distribution License, Version 1.0 only
   7  * (the "License").  You may not use this file except in compliance
   8  * with the License.
   9  *
  10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  11  * or http://www.opensolaris.org/os/licensing.
  12  * See the License for the specific language governing permissions
  13  * and limitations under the License.
  14  *
  15  * When distributing Covered Code, include this CDDL HEADER in each
  16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  17  * If applicable, add the following below this CDDL HEADER, with the
  18  * fields enclosed by brackets "[]" replaced with your own identifying
  19  * information: Portions Copyright [yyyy] [name of copyright owner]
  20  *
  21  * CDDL HEADER END
  22  */
  23 %}
  24 /*
  25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  30 /*        All Rights Reserved   */
  31 
  32 /*      Copyright (c) 1987, 1988 Microsoft Corporation  */
  33 /*        All Rights Reserved   */
  34 
  35 %{
  36 #pragma ident   "%Z%%M% %I%     %E% SMI"
  37 %}
  38 
  39 /*
  40  * egrep -- print lines containing (or not containing) a regular expression
  41  *
  42  *      status returns:
  43  *              0 - ok, and some matches
  44  *              1 - ok, but no matches
  45  *              2 - some error; matches irrelevant
  46  */
  47 %token CHAR MCHAR DOT MDOT CCL NCCL MCCL NMCCL OR CAT STAR PLUS QUEST
  48 %left OR
  49 %left CHAR MCHAR DOT CCL NCCL MCCL NMCCL '('
  50 %left CAT
  51 %left STAR PLUS QUEST
  52 
  53 %{
  54 #include <stdio.h>
  55 #include <ctype.h>
  56 #include <memory.h>
  57 #include <wchar.h>
  58 #include <wctype.h>
  59 #include <widec.h>
  60 #include <stdlib.h>
  61 #include <limits.h>
  62 #include <locale.h>
  63 
  64 #define BLKSIZE 512     /* size of reported disk blocks */
  65 #define EBUFSIZ 8192
  66 #define MAXLIN 350
  67 #define NCHARS 256
  68 #define MAXPOS 4000
  69 #define NSTATES 64
  70 #define FINAL -1
  71 #define RIGHT '\n'      /* serves as record separator and as $ */
  72 #define LEFT '\n'       /* beginning of line */
  73 int gotofn[NSTATES][NCHARS];
  74 int state[NSTATES];
  75 int out[NSTATES];
  76 int line  = 1;
  77 int *name;
  78 int *left;
  79 int *right;
  80 int *parent;
  81 int *foll;
  82 int *positions;
  83 char *chars;
  84 wchar_t *lower;
  85 wchar_t *upper;
  86 int maxlin, maxclin, maxwclin, maxpos;
  87 int nxtpos = 0;
  88 int inxtpos;
  89 int nxtchar = 0;
  90 int *tmpstat;
  91 int *initstat;
  92 int istat;
  93 int nstate = 1;
  94 int xstate;
  95 int count;
  96 int icount;
  97 char *input;
  98 
  99 
 100 wchar_t lyylval;
 101 wchar_t nextch();
 102 wchar_t maxmin();
 103 int compare();
 104 void overflo();
 105 
 106 char reinit = 0;
 107 
 108 long long lnum;
 109 int     bflag;
 110 int     cflag;
 111 int     eflag;
 112 int     fflag;
 113 int     hflag;
 114 int     iflag;
 115 int     lflag;
 116 int     nflag;
 117 int     sflag;
 118 int     vflag;
 119 int     nfile;
 120 long long blkno;
 121 long long tln;
 122 int     nsucc;
 123 int     badbotch;
 124 extern  char *optarg;
 125 extern  int optind;
 126 
 127 int     f;
 128 FILE    *expfile;
 129 %}
 130 
 131 %%
 132 s:      t
 133                 { 
 134                   unary(FINAL, $1);
 135                   line--;
 136                 }
 137         ;
 138 t:      b r
 139                 { $$ = node(CAT, $1, $2); }
 140         | OR b r OR
 141                 { $$ = node(CAT, $2, $3); }
 142         | OR b r
 143                 { $$ = node(CAT, $2, $3); }
 144         | b r OR
 145                 { $$ = node(CAT, $1, $2); }
 146         ;
 147 b:
 148                 { /* if(multibyte)
 149                         $$ = mdotenter();
 150                   else */
 151                         $$ = enter(DOT);
 152                   $$ = unary(STAR, $$); 
 153                 }
 154         ;
 155 r:      CHAR
 156                 { $$ = iflag && isalpha($1) ?
 157                 node(OR, enter(tolower($1)), enter(toupper($1))) : enter($1); }
 158         | MCHAR
 159                 { $$ = (iflag && iswalpha(lyylval)) ?
 160                 node(OR, mchar(towlower(lyylval)), mchar(towupper(lyylval))) : 
 161                 mchar(lyylval); }
 162         | DOT
 163                 { if(multibyte)
 164                         $$ = mdotenter();
 165                   else
 166                         $$ = enter(DOT); 
 167                 }
 168         | CCL
 169                 { $$ = cclenter(CCL); }
 170         | NCCL
 171                 { $$ = cclenter(NCCL); }
 172         | MCCL
 173                 { $$ = ccl(CCL); }
 174         | NMCCL
 175                 { $$ = ccl(NCCL); }
 176         ;
 177 
 178 r:      r OR r
 179                 { $$ = node(OR, $1, $3); }
 180         | r r %prec CAT
 181                 { $$ = node(CAT, $1, $2); }
 182         | r STAR
 183                 { $$ = unary(STAR, $1); }
 184         | r PLUS
 185                 { $$ = unary(PLUS, $1); }
 186         | r QUEST
 187                 { $$ = unary(QUEST, $1); }
 188         | '(' r ')'
 189                 { $$ = $2; }
 190         | error 
 191         ;
 192 
 193 %%
 194 void    add(int *, int); 
 195 void    clearg(void);
 196 void    execute(char *);
 197 void    follow(int);
 198 int     mgetc(void);
 199 void    synerror(void);
 200 
 201 
 202 void
 203 yyerror(char *s)
 204 {
 205         fprintf(stderr, "egrep: %s\n", s);
 206         exit(2);
 207 }
 208 
 209 int
 210 yylex(void)
 211 {
 212         extern int yylval;
 213         int cclcnt, x, ccount, oldccount;
 214         wchar_t c, lc;
 215                 
 216         c = nextch();
 217         switch(c) {
 218                 case '^': 
 219                         yylval = LEFT;
 220                         return(CHAR);
 221                 case '$': 
 222                         c = RIGHT;
 223                         goto defchar;
 224                 case '|': return (OR);
 225                 case '*': return (STAR);
 226                 case '+': return (PLUS);
 227                 case '?': return (QUEST);
 228                 case '(': return (c);
 229                 case ')': return (c);
 230                 case '.': return(DOT);
 231                 case '\0': return (0);
 232                 case RIGHT: return (OR);
 233                 case '[': 
 234                         x = (multibyte ? MCCL : CCL);
 235                         cclcnt = 0;
 236                         count = nxtchar++;
 237                         if ((c = nextch()) == '^') {
 238                                 x = (multibyte ? NMCCL : NCCL);
 239                                 c = nextch();
 240                         }
 241                         lc = 0;
 242                         do {
 243                                 if (iflag && iswalpha(c))
 244                                         c = towlower(c);
 245                                 if (c == '\0') synerror();
 246                                 if (c == '-' && cclcnt > 0 && lc != 0) {
 247                                         if ((c = nextch()) != 0) {
 248                                                 if(c == ']') {
 249                                                         chars[nxtchar++] = '-';
 250                                                         cclcnt++;
 251                                                         break;
 252                                                 }
 253                                                 if (iflag && iswalpha(c))
 254                                                         c = towlower(c);
 255                                                 if (!multibyte ||
 256                                                 (c & WCHAR_CSMASK) == (lc & WCHAR_CSMASK) &&
 257                                                 lc < c &&
 258                                                 !iswcntrl(c) && !iswcntrl(lc)) {
 259                                                         if (nxtchar >= maxclin)
 260                                                                 if (allocchars() == 0)
 261                                                                         overflo();
 262                                                         chars[nxtchar++] = '-';
 263                                                         cclcnt++;
 264                                                 }
 265                                         }
 266                                 }
 267                                 ccount = oldccount = nxtchar;
 268                                 if(ccount + MB_LEN_MAX >= maxclin)
 269                                         if(allocchars() == 0)
 270                                                 overflo();
 271                                 ccount += wctomb(&chars[ccount], c);
 272                                 cclcnt += ccount - oldccount;
 273                                 nxtchar += ccount - oldccount;
 274                                 lc = c;
 275                         } while ((c = nextch()) != ']');
 276                         chars[count] = cclcnt;
 277                         return(x);
 278                 
 279                 case '\\':
 280                         if ((c = nextch()) == '\0') synerror();
 281                 defchar:
 282                 default:
 283                         if (c <= 0177) {
 284                                 yylval = c;
 285                                 return (CHAR);
 286                         } else {
 287                                 lyylval = c;
 288                                 return (MCHAR);
 289                         }
 290         }
 291 }
 292 
 293 wchar_t
 294 nextch(void)
 295 {
 296         wchar_t lc;
 297         char multic[MB_LEN_MAX];
 298         int length, d;
 299         if (fflag) {
 300                 if ((length = _mbftowc(multic, &lc, mgetc, &d)) < 0)
 301                         synerror();
 302                 if(length == 0)
 303                         lc = '\0';
 304         }
 305         else  {
 306                 if((length = mbtowc(&lc, input, MB_LEN_MAX)) == -1)
 307                         synerror();
 308                 if(length == 0)
 309                         return(0);
 310                 input += length;
 311         }
 312         return(lc);
 313 }
 314 
 315 int
 316 mgetc(void)
 317 {
 318         return(getc(expfile));
 319 }
 320         
 321 void
 322 synerror(void)
 323 {
 324         fprintf(stderr, gettext("egrep: syntax error\n"));
 325         exit(2);
 326 }
 327 
 328 int
 329 enter(int x)
 330 {
 331         if(line >= maxlin) 
 332                 if(alloctree() == 0)
 333                         overflo();
 334         name[line] = x;
 335         left[line] = 0;
 336         right[line] = 0;
 337         return(line++);
 338 }
 339 
 340 int
 341 cclenter(int x)
 342 {
 343         int linno;
 344         linno = enter(x);
 345         right[linno] = count;
 346         return (linno);
 347 }
 348 
 349 int
 350 node(int x, int l, int r)
 351 {
 352         if(line >= maxlin) 
 353                 if(alloctree() == 0)
 354                         overflo();
 355         name[line] = x;
 356         left[line] = l;
 357         right[line] = r;
 358         parent[l] = line;
 359         parent[r] = line;
 360         return(line++);
 361 }
 362 
 363 int
 364 unary(int x, int d)
 365 {
 366         if(line >= maxlin) 
 367                 if(alloctree() == 0)
 368                         overflo();
 369         name[line] = x;
 370         left[line] = d;
 371         right[line] = 0;
 372         parent[d] = line;
 373         return(line++);
 374 }
 375 
 376 int
 377 allocchars(void)
 378 {
 379         maxclin += MAXLIN;
 380         if((chars = realloc(chars, maxclin)) == (char *)0)
 381                 return 0;
 382         return 1;
 383 }
 384 
 385 int
 386 alloctree(void)
 387 {
 388         maxlin += MAXLIN;
 389         if((name = (int *)realloc(name, maxlin*sizeof(int))) == (int *)0)
 390                 return 0;
 391         if((left = (int *)realloc(left, maxlin*sizeof(int))) == (int *)0)
 392                 return 0;
 393         if((right = (int *)realloc(right, maxlin*sizeof(int))) == (int *)0)
 394                 return 0;
 395         if((parent = (int *)realloc(parent, maxlin*sizeof(int))) == (int *)0)
 396                 return 0;
 397         if((foll = (int *)realloc(foll, maxlin*sizeof(int))) == (int *)0)
 398                 return 0;
 399         if((tmpstat = (int *)realloc(tmpstat, maxlin*sizeof(int))) == (int *)0)
 400                 return 0;
 401         if((initstat = (int *)realloc(initstat, maxlin*sizeof(int))) == (int *)0)
 402                 return 0;
 403         return 1;
 404 }
 405 
 406 void
 407 overflo(void) 
 408 {
 409         fprintf(stderr, gettext("egrep: regular expression too long\n"));
 410         exit(2);
 411 }
 412 
 413 void
 414 cfoll(int v)
 415 {
 416         int i;
 417         if (left[v] == 0) {
 418                 count = 0;
 419                 for (i=1; i<=line; i++) tmpstat[i] = 0;
 420                 follow(v);
 421                 add(foll, v);
 422         }
 423         else if (right[v] == 0) cfoll(left[v]);
 424         else {
 425                 cfoll(left[v]);
 426                 cfoll(right[v]);
 427         }
 428 }
 429 
 430 void
 431 cgotofn(void)
 432 {
 433         int i;
 434         count = 0;
 435         inxtpos = nxtpos;
 436         for (i=3; i<=line; i++) tmpstat[i] = 0;
 437         if (cstate(line-1)==0) {
 438                 tmpstat[line] = 1;
 439                 count++;
 440                 out[1] = 1;
 441         }
 442         for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
 443         count--;                /*leave out position 1 */
 444         icount = count;
 445         tmpstat[1] = 0;
 446         add(state, 1);
 447         istat = nxtst(1, LEFT);
 448 }
 449 
 450 int
 451 nxtst(int s, int c)
 452 {
 453         int i, num, k;
 454         int pos, curpos, number, newpos;
 455         num = positions[state[s]];
 456         count = icount;
 457         for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
 458         pos = state[s] + 1;
 459         for (i=0; i<num; i++) {
 460                 curpos = positions[pos];
 461                 k = name[curpos];
 462                 if (k >= 0)
 463                         if (
 464                                 (k == c)
 465                                 || (k == DOT && dot(c))
 466                                 || (k == MDOT && mdot(c))
 467                                 || (k == CCL && dot(c) && member(c, right[curpos], 1))
 468                                 || (k == NCCL && dot(c) && member(c, right[curpos], 0))
 469                                 || (k == MCCL && mdot(c) && member(c, right[curpos], 1))
 470                         ) {
 471                                 number = positions[foll[curpos]];
 472                                 newpos = foll[curpos] + 1;
 473                                 for (k=0; k<number; k++) {
 474                                         if (tmpstat[positions[newpos]] != 1) {
 475                                                 tmpstat[positions[newpos]] = 1;
 476                                                 count++;
 477                                         }
 478                                         newpos++;
 479                                 }
 480                         }
 481                 pos++;
 482         }
 483         if (notin(nstate)) {
 484                 if (++nstate >= NSTATES) {
 485                         for (i=1; i<NSTATES; i++)
 486                                 out[i] = 0;
 487                         for (i=1; i<NSTATES; i++)
 488                                 for (k=0; k<NCHARS; k++)
 489                                         gotofn[i][k] = 0;
 490                         nstate = 1;
 491                         nxtpos = inxtpos;
 492                         reinit = 1;
 493                         add(state, nstate);
 494                         if (tmpstat[line] == 1) out[nstate] = 1;
 495                         return nstate;
 496                 }
 497                 add(state, nstate);
 498                 if (tmpstat[line] == 1) out[nstate] = 1;
 499                 gotofn[s][c] = nstate;
 500                 return nstate;
 501         }
 502         else {
 503                 gotofn[s][c] = xstate;
 504                 return xstate;
 505         }
 506 }
 507 
 508 
 509 int
 510 cstate(int v) 
 511 {
 512         int b;
 513         if (left[v] == 0) {
 514                 if (tmpstat[v] != 1) {
 515                         tmpstat[v] = 1;
 516                         count++;
 517                 }
 518                 return(1);
 519         }
 520         else if (right[v] == 0) {
 521                 if (cstate(left[v]) == 0) return (0);
 522                 else if (name[v] == PLUS) return (1);
 523                 else return (0);
 524         }
 525         else if (name[v] == CAT) {
 526                 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
 527                 else return (1);
 528         }
 529         else { /* name[v] == OR */
 530                 b = cstate(right[v]);
 531                 if (cstate(left[v]) == 0 || b == 0) return (0);
 532                 else return (1);
 533         }
 534 }
 535 
 536 
 537 int
 538 dot(int c)
 539 {
 540         if(multibyte && c >= 0200 && (!iscntrl(c) || c == SS2 && eucw2 || c == SS3 && eucw3))
 541                 return(0);
 542         if(c == RIGHT || c == LEFT)
 543                 return(0);
 544         return(1);
 545 }
 546 
 547 int
 548 mdot(int c)
 549 {
 550         if(c >= 0200 && !iscntrl(c))
 551                 return(1);
 552         return(0);
 553 }
 554 
 555 int
 556 member(int symb, int set, int torf) 
 557 {
 558         int i, num, pos, c, lc;
 559         if(symb == RIGHT || symb == LEFT)
 560                 return(0);
 561         num = chars[set];
 562         pos = set + 1;
 563         lc = 0;
 564         if(iflag) 
 565                 symb = tolower(symb);
 566         for (i=0; i<num; i++) {
 567                 c = (unsigned char)chars[pos++];
 568                 if(c == '-' && lc != 0 && ++i < num) {
 569                         c = (unsigned char)chars[pos++];
 570                         if(lc <= symb && symb <= c)
 571                                 return(torf);
 572                 }
 573                 if (symb == c)
 574                         return (torf);
 575                 lc = c;
 576         }
 577         return(!torf);
 578 }
 579 
 580 int
 581 notin(int n)
 582 {
 583         int i, j, pos;
 584         for (i=1; i<=n; i++) {
 585                 if (positions[state[i]] == count) {
 586                         pos = state[i] + 1;
 587                         for (j=0; j < count; j++)
 588                                 if (tmpstat[positions[pos++]] != 1) goto nxt;
 589                         xstate = i;
 590                         return (0);
 591                 }
 592                 nxt: ;
 593         }
 594         return (1);
 595 }
 596 
 597 void
 598 add(int *array, int n) 
 599 {
 600         int i;
 601         if (nxtpos + count >= maxpos) { 
 602                 maxpos += MAXPOS + count;
 603                 if((positions = (int *)realloc(positions, maxpos *sizeof(int))) == (int *)0)
 604                         overflo();
 605         }
 606         array[n] = nxtpos;
 607         positions[nxtpos++] = count;
 608         for (i=3; i <= line; i++) {
 609                 if (tmpstat[i] == 1) {
 610                         positions[nxtpos++] = i;
 611                 }
 612         }
 613 }
 614 
 615 void
 616 follow(int v) 
 617 {
 618         int p;
 619         if (v == line) return;
 620         p = parent[v];
 621         switch(name[p]) {
 622                 case STAR:
 623                 case PLUS:      cstate(v);
 624                                 follow(p);
 625                                 return;
 626 
 627                 case OR:
 628                 case QUEST:     follow(p);
 629                                 return;
 630 
 631                 case CAT:       if (v == left[p]) {
 632                                         if (cstate(right[p]) == 0) {
 633                                                 follow(p);
 634                                                 return;
 635                                         }
 636                                 }
 637                                 else follow(p);
 638                                 return;
 639                 case FINAL:     if (tmpstat[line] != 1) {
 640                                         tmpstat[line] = 1;
 641                                         count++;
 642                                 }
 643                                 return;
 644         }
 645 }
 646 
 647 #define USAGE "[ -bchilnsv ] [ -e exp ] [ -f file ] [ strings ] [ file ] ..." 
 648 
 649 int
 650 main(int argc, char **argv)
 651 {
 652         char c;
 653         char nl = '\n';
 654         int errflag = 0;
 655         
 656         (void)setlocale(LC_ALL, "");
 657 
 658 #if !defined(TEXT_DOMAIN)               /* Should be defined by cc -D */
 659         #define TEXT_DOMAIN "SYS_TEST"  /* Use this only if it weren't. */
 660 #endif
 661         (void) textdomain(TEXT_DOMAIN);
 662 
 663         while((c = getopt(argc, argv, "ybcie:f:hlnvs")) != -1)
 664                 switch(c) {
 665 
 666                 case 'b':
 667                         bflag++;
 668                         continue;
 669 
 670                 case 'c':
 671                         cflag++;
 672                         continue;
 673 
 674                 case 'e':
 675                         eflag++;
 676                         input = optarg;
 677                         continue;
 678 
 679                 case 'f':
 680                         fflag++;
 681                         expfile = fopen(optarg, "r");
 682                         if(expfile == NULL) {
 683                                 fprintf(stderr, 
 684                                   gettext("egrep: can't open %s\n"), optarg);
 685                                 exit(2);
 686                         }
 687                         continue;
 688 
 689                 case 'h':
 690                         hflag++;
 691                         continue;
 692 
 693                 case 'y':
 694                 case 'i':
 695                         iflag++;
 696                         continue;
 697 
 698                 case 'l':
 699                         lflag++;
 700                         continue;
 701 
 702                 case 'n':
 703                         nflag++;
 704                         continue;
 705 
 706                 case 's':
 707                         sflag++;
 708                         continue;
 709 
 710                 case 'v':
 711                         vflag++;
 712                         continue;
 713 
 714                 case '?':
 715                         errflag++;
 716                 }
 717         if (errflag || ((argc <= 0) && !fflag && !eflag)) {
 718                 fprintf(stderr, gettext("usage: egrep %s\n"), gettext(USAGE));
 719                 exit(2);
 720         }
 721         if(!eflag && !fflag) {
 722                 input = argv[optind];
 723                 optind++;
 724         }
 725 
 726         argc -= optind;
 727         argv = &argv[optind];
 728         
 729         /* allocate initial space for arrays */
 730         if((name = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 731                 overflo();
 732         if((left = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 733                 overflo();
 734         if((right = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 735                 overflo();
 736         if((parent = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 737                 overflo();
 738         if((foll = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 739                 overflo();
 740         if((tmpstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 741                 overflo();
 742         if((initstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 743                 overflo();
 744         if((chars = (char *)malloc(MAXLIN)) == (char *)0)
 745                 overflo();
 746         if((lower = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
 747                 overflo();
 748         if((upper = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
 749                 overflo();
 750         if((positions = (int *)malloc(MAXPOS*sizeof(int))) == (int *)0)
 751                 overflo();
 752         maxlin = MAXLIN;
 753         maxclin = MAXLIN;
 754         maxwclin = MAXLIN;
 755         maxpos = MAXPOS;
 756                 
 757         yyparse();
 758 
 759         cfoll(line-1);
 760         cgotofn();
 761         nfile = argc;
 762         if (argc<=0) {
 763                 execute(0);
 764         }
 765         else while (--argc >= 0) {
 766                 if (reinit == 1) clearg();
 767                 execute(*argv++);
 768         }
 769         return (badbotch ? 2 : nsucc==0);
 770 }
 771 
 772 void
 773 execute(char *file)
 774 {
 775         char *p;
 776         int cstat;
 777         wchar_t c;
 778         int t;
 779         long count;
 780         long count1, count2;
 781         long nchars;
 782         int succ;
 783         char *ptr, *ptrend, *lastptr;
 784         char *buf;
 785         long lBufSiz;
 786         FILE *f;
 787         int nlflag;
 788 
 789         lBufSiz = EBUFSIZ;
 790         if ((buf = malloc (lBufSiz + EBUFSIZ)) == NULL) {
 791                 exit (2); /* out of memory - BAIL */
 792         }
 793 
 794         if (file) {
 795                 if ((f = fopen(file, "r")) == NULL) {
 796                         fprintf(stderr, 
 797                                 gettext("egrep: can't open %s\n"), file);
 798                         badbotch=1;
 799                         return;
 800                 }
 801         } else {
 802                 file = "<stdin>";
 803                 f = stdin;
 804         }
 805         lnum = 1;
 806         tln = 0;
 807         if((count = read(fileno(f), buf, EBUFSIZ)) <= 0) {
 808                 fclose(f);
 809 
 810                 if (cflag) {
 811                         if (nfile>1 && !hflag)
 812                                 fprintf(stdout, "%s:", file);
 813                         fprintf(stdout, "%lld\n", tln);
 814                 }
 815                 return;
 816         }
 817 
 818         blkno = count;
 819         ptr = buf;
 820         for(;;) {
 821                 if((ptrend = memchr(ptr, '\n', buf + count - ptr)) == NULL) {
 822                         /* 
 823                                 move the unused partial record to the head of the buffer 
 824                         */
 825                         if (ptr > buf) {
 826                                 count = buf + count - ptr;
 827                                 memmove (buf, ptr, count);
 828                                 ptr = buf;
 829                         }
 830 
 831                         /*
 832                                 Get a bigger buffer if this one is full
 833                         */
 834                         if(count > lBufSiz) {
 835                                 /*
 836                                         expand the buffer       
 837                                 */
 838                                 lBufSiz += EBUFSIZ;
 839                                 if ((buf = realloc (buf, lBufSiz + EBUFSIZ)) == NULL) {
 840                                         exit (2); /* out of memory - BAIL */
 841                                 }
 842 
 843                                 ptr = buf;
 844                         }
 845 
 846                         p = buf + count;
 847                         if((count1 = read(fileno(f), p, EBUFSIZ)) > 0) {
 848                                 count += count1;
 849                                 blkno += count1;
 850                                 continue;
 851                         }
 852                         ptrend = ptr + count;
 853                         nlflag = 0;
 854                 } else
 855                         nlflag = 1;
 856                 *ptrend = '\n';
 857                 p = ptr;
 858                 lastptr = ptr;
 859                 cstat = istat;
 860                 succ = 0;
 861                 for(;;) {
 862                         if(out[cstat]) {
 863                                 if(multibyte && p > ptr) {
 864                                         wchar_t wchar;
 865                                         int length;
 866                                         char *endptr = p;
 867                                         p = lastptr;
 868                                         while(p < endptr) {
 869                                                 length = mbtowc(&wchar, p, MB_LEN_MAX);
 870                                                 if(length <= 1)
 871                                                         p++;
 872                                                 else
 873                                                         p += length;
 874                                         }
 875                                         if(p == endptr) {
 876                                                 succ = !vflag;
 877                                                 break;
 878                                         }
 879                                         cstat = 1;
 880                                         length = mbtowc(&wchar, lastptr, MB_LEN_MAX);
 881                                         if(length <= 1)
 882                                                 lastptr++;
 883                                         else
 884                                                 lastptr += length;
 885                                         p = lastptr;
 886                                         continue;
 887                                 }
 888                                 succ = !vflag;
 889                                 break;
 890                         }
 891                         c = (unsigned char)*p++;
 892                         if ((t = gotofn[cstat][c]) == 0)
 893                                 cstat = nxtst(cstat, c);
 894                         else
 895                                 cstat = t;
 896                         if(c == RIGHT) {
 897                                 if(out[cstat]) {
 898                                         succ = !vflag;
 899                                         break;
 900                                 }
 901                                 succ = vflag;
 902                                 break;
 903                         }
 904                 }
 905                 if(succ) {
 906                         nsucc = 1;
 907                         if (cflag) tln++;
 908                         else if (sflag)
 909                                 ;       /* ugh */
 910                         else if (lflag) {
 911                                 printf("%s\n", file);
 912                                 fclose(f);
 913                                 return;
 914                         }
 915                         else {
 916                                 if (nfile > 1 && !hflag) 
 917                                         printf(gettext("%s:"), file);
 918                                 if (bflag) {
 919                                         nchars = blkno - (buf + count - ptrend) - 2;
 920                                         if(nlflag)
 921                                                 nchars++;
 922                                         printf("%lld:", nchars/BLKSIZE);
 923                                 }
 924                                 if (nflag) 
 925                                         printf("%lld:", lnum);
 926                                 if(nlflag)
 927                                         nchars = ptrend - ptr + 1;
 928                                 else
 929                                         nchars = ptrend - ptr;
 930                                 fwrite(ptr, (size_t)1, (size_t)nchars, stdout);
 931                         }
 932                 }
 933                 if(!nlflag)
 934                         break;
 935                 ptr = ptrend + 1;
 936                 if(ptr >= buf + count) {
 937                         ptr = buf;
 938                         if((count = read(fileno(f), buf, EBUFSIZ)) <= 0)
 939                                 break;
 940                         blkno += count;
 941                 }
 942                 lnum++;
 943                 if (reinit == 1) 
 944                         clearg();
 945         }
 946         fclose(f);
 947         if (cflag) {
 948                 if (nfile > 1 && !hflag)
 949                         printf(gettext("%s:"), file);
 950                 printf("%lld\n", tln);
 951         }
 952 }
 953 
 954 void
 955 clearg(void)
 956 {
 957         int i, k;
 958         for (i=1; i<=nstate; i++)
 959                 out[i] = 0;
 960         for (i=1; i<=nstate; i++)
 961                 for (k=0; k<NCHARS; k++)
 962                         gotofn[i][k] = 0;
 963         nstate = 1;
 964         nxtpos = inxtpos;
 965         reinit = 0;
 966         count = 0;
 967         for (i=3; i<=line; i++) tmpstat[i] = 0;
 968         if (cstate(line-1)==0) {
 969                 tmpstat[line] = 1;
 970                 count++;
 971                 out[1] = 1;
 972         }
 973         for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
 974         count--;                /*leave out position 1 */
 975         icount = count;
 976         tmpstat[1] = 0;
 977         add(state, 1);
 978         istat = nxtst(1, LEFT);
 979 }
 980 
 981 int
 982 mdotenter(void)
 983 {
 984         int i, x1, x2;
 985         x1 = enter(DOT);
 986         x2 = enter(MDOT);
 987         for(i = 1; i < (int) eucw1; i++) 
 988                 x2 = node(CAT, x2, enter(MDOT));
 989         x1 = node(OR, x1, x2);
 990         if(eucw2) {
 991                 x2 = enter('\216');
 992                 for(i = 1; i <= (int) eucw2; i++) 
 993                         x2 = node(CAT, x2, enter(MDOT));
 994                 x1 = node(OR, x1, x2);
 995         }
 996         if(eucw3) {
 997                 x2 = enter('\217');
 998                 for(i = 1; i <= (int) eucw3; i++) 
 999                         x2 = node(CAT, x2, enter(MDOT));
1000                 x1 = node(OR, x1, x2);
1001         }
1002         return(x1);
1003 }
1004 
1005 int
1006 mchar(wchar_t c)
1007 {
1008         char multichar[MB_LEN_MAX+1];
1009         char *p;
1010         int x1, lc, length;
1011         
1012         length = wctomb(multichar, c);
1013         p = multichar;
1014         *(p + length) = '\0';
1015         x1 = enter((unsigned char)*p++);
1016         while(lc = (unsigned char)*p++)
1017                 x1 = node(CAT, x1, enter(lc));
1018         return(x1);
1019 }
1020 
1021 int
1022 ccl(int type) 
1023 {
1024         wchar_t c, lc;
1025         char multic1[MB_LEN_MAX];
1026         char multic2[MB_LEN_MAX];
1027         int x1, x2, length, current, last, cclcnt;
1028         x2 = 0;
1029         current = 0;
1030         last = genrange(type);
1031         nxtchar = count + 1;
1032         cclcnt = 0;
1033         /* create usual character class for single byte characters */
1034         while(current <= last && (isascii(c = lower[current]) || c <= 0377 && iscntrl(c))) {
1035                 cclcnt++;
1036                 chars[nxtchar++] = c;
1037                 if(lower[current] != upper[current]) {
1038                         chars[nxtchar++] = '-';
1039                         chars[nxtchar++] = upper[current];
1040                         cclcnt += 2;
1041                 }
1042                 current++;
1043         }
1044         
1045         if(cclcnt)
1046                 chars[count] = cclcnt;
1047         else
1048                 nxtchar = count;
1049         if(current > 0)
1050                 /* single byte part of character class */
1051                 x2 = cclenter(type);
1052         else if(type == NCCL)
1053                 /* all single byte characters match */
1054                 x2 = enter(DOT);
1055         while(current <= last) {
1056                 if(upper[current] == lower[current]) 
1057                         x1 = mchar(lower[current]);
1058                 else {
1059                         length = wctomb(multic1, lower[current]);
1060                         wctomb(multic2, upper[current]);
1061                         x1 = range((unsigned char *)multic1,
1062                             (unsigned char *)multic2, length);
1063                 }
1064                 if(x2)
1065                         x2 = node(OR, x2, x1);
1066                 else
1067                         x2 = x1;
1068                 current++;
1069         }
1070         return x2;
1071 }
1072 
1073 int
1074 range(unsigned char *p1, unsigned char *p2, int length)
1075 {
1076         char multic[MB_LEN_MAX+1];
1077         char *p;
1078         int i, x1, x2;
1079         if(length == 1)
1080                 return(classenter(*p1, *p2));
1081         if(p1[0] == p2[0]) 
1082                 return(node(CAT, enter(p1[0]), range(p1+1, p2+1, length - 1)));         
1083         p = multic;
1084         for(i = 1; i < length; i++)
1085                 *p++ = 0377;
1086         x1 = node(CAT, enter(p1[0]),
1087             range(p1+1, (unsigned char *)multic, length - 1));
1088         if((unsigned char)(p1[0] + 1) < p2[0]) {
1089                 x2 = classenter(p1[0] + 1, p2[0] - 1);
1090                 for(i = 1; i < length; i++)
1091                         x2 = node(CAT, x2, enter(MDOT));
1092                 x1 = node(OR, x1, x2);
1093         }
1094         p = multic;
1095         for(i = 1; i < length; i++) 
1096                 *p++ = 0200;
1097         x2 = node(CAT, enter(p2[0]),
1098             range((unsigned char *)multic, p2+1, length - 1));
1099         return node(OR, x1, x2);
1100 }
1101 
1102 int
1103 classenter(int x1, int x2)
1104 {
1105         static int max, min;
1106         if(!max) {
1107                 int i;
1108                 for(i = 0200; i <= 0377; i++)
1109                         if(!iscntrl(i))
1110                                 break;
1111                 min = i;
1112                 for(i = 0377; i >= 0200; i--)
1113                         if(!iscntrl(i))
1114                                 break;
1115                 max = i;
1116         }
1117         if(x1 <= min && x2 >= max)
1118                 return enter(MDOT);
1119         if(nxtchar + 4 >= maxclin)
1120                 if(allocchars() == 0)   
1121                         overflo();
1122         count = nxtchar++;
1123         chars[nxtchar++] = x1;
1124         chars[nxtchar++] = '-';
1125         chars[nxtchar++] = x2;
1126         chars[count] = 3;
1127         return cclenter(MCCL);
1128 }
1129 
1130 int
1131 genrange(int type)
1132 {
1133         char *p, *endp;
1134         int current, nel, i, last, length;
1135         wchar_t c, lc;
1136 
1137         current = 0;
1138         p = &chars[count+1];
1139         endp = &chars[count+1] + chars[count];
1140         lc = 0;
1141                 
1142         /* convert character class into union of ranges */
1143         while(p < endp) {
1144                 length = mbtowc(&c, p, MB_LEN_MAX);
1145                 p += length;
1146                 if(c == '-' && lc != 0) {
1147                         length = mbtowc(&c, p, MB_LEN_MAX);
1148                         upper[current-1] = c;
1149                         p += length;
1150                 } else {
1151                         lower[current] = c;
1152                         upper[current++] = c;
1153                 }
1154                 lc = c;
1155         }
1156         nel = current;
1157         /* sort lower and upper bounds of ranges */
1158         qsort((char *)lower, nel, sizeof(wchar_t), compare);
1159         qsort((char *)upper, nel, sizeof(wchar_t), compare);
1160         last = current - 1;
1161         current = 0;
1162         /* combine overlapping or adjacent ranges */
1163         for(i = 0; i < last; i++)
1164                 if(upper[i] >= lower[i+1] - 1)
1165                         upper[current] = upper[i+1];
1166                 else {
1167                         lower[++current] = lower[i+1];
1168                         upper[current] = upper[i+1];
1169                 }
1170         if(type == NCCL) {
1171                 /* find complement of character class */
1172                 int j, next;
1173                 i = 0;
1174                 while(i <= current && isascii(c=lower[i]) || c <= 0377 && iscntrl(c))
1175                         i++;
1176                 if(i > current) {
1177                         /* match all multibyte characters */
1178                         if(eucw2) {
1179                                 lower[i] = maxmin(WCHAR_CS2, 0);
1180                                 upper[i++] = maxmin(WCHAR_CS2, 1);
1181                         }
1182                         if(eucw3) {
1183                                 lower[i] = maxmin(WCHAR_CS3, 0);
1184                                 upper[i++] = maxmin(WCHAR_CS3, 1);
1185                         }
1186                         lower[i] = maxmin(WCHAR_CS1, 0);
1187                         upper[i++] = maxmin(WCHAR_CS1, 1);
1188                         return i - 1;
1189                 }
1190                 next = current + 1;
1191                 if(next + current + 2 >= maxwclin) {
1192                         maxwclin += MAXLIN + next + current + 2;
1193                         if((lower = (wchar_t *)realloc(lower, maxwclin *sizeof(wchar_t))) == (wchar_t *)0 ||
1194                            (upper = (wchar_t *)realloc(upper, maxwclin * sizeof(wchar_t))) == (wchar_t *)0)
1195                                 overflo();
1196                 }
1197                 if(eucw2 && lower[i] > maxmin(WCHAR_CS2, 0)) {
1198                         lower[next] = maxmin(WCHAR_CS2, 0);
1199                         if((lower[i] & WCHAR_CSMASK) != WCHAR_CS2) {
1200                                 upper[next++] = maxmin(WCHAR_CS2, 1);
1201                                 if((lower[i] & WCHAR_CSMASK) == WCHAR_CS1 && eucw3) {
1202                                         lower[next] = maxmin(WCHAR_CS3, 0);
1203                                         upper[next++] = maxmin(WCHAR_CS3, 1);
1204                                 }
1205                                 if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1206                                         lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1207                                         upper[next++] = lower[i] - 1;
1208                                 }
1209                         } else
1210                                 upper[next++] = lower[i] - 1;
1211                 } else if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1212                         lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1213                         upper[next++] = lower[i] - 1;
1214                 }
1215                 for(j = i; j < current; j++) {
1216                         if(upper[j] < maxmin(upper[j] & WCHAR_CSMASK, 1)) {
1217                                 lower[next] = upper[j] + 1;
1218                                 if((upper[j] & WCHAR_CSMASK) != (lower[j+1] & WCHAR_CSMASK)) {
1219                                         upper[next++] = maxmin(upper[j] & WCHAR_CSMASK, 1);
1220                                         if(eucw3 && (upper[j] & WCHAR_CSMASK) == WCHAR_CS2 && (lower[j+1] & WCHAR_CSMASK) == WCHAR_CS1) {
1221                                                 lower[next] = maxmin(WCHAR_CS3, 0);
1222                                                 upper[next++] = maxmin(WCHAR_CS3, 1);
1223                                         }
1224                                         if(lower[j+1] > maxmin(lower[j+1] & WCHAR_CSMASK, 0)) {
1225                                                 lower[next] = maxmin(lower[j+1] & WCHAR_CSMASK, 0);
1226                                                 upper[next++] = lower[j+1] - 1;
1227                                         }
1228                                 } else
1229                                         upper[next++] = lower[j+1] - 1;
1230                         } else if(lower[j+1] > maxmin(lower[j+1], 0)) {
1231                                 lower[next] = maxmin(lower[j+1], 0);
1232                                 upper[next++] = lower[j+1] - 1;
1233                         }
1234                 }
1235                 if(upper[current] < maxmin(upper[current] & WCHAR_CSMASK, 1)) {
1236                         lower[next] = upper[current] + 1;
1237                         upper[next++] = maxmin(upper[current] & WCHAR_CSMASK, 1); 
1238                 }
1239                 if((upper[current] & WCHAR_CSMASK) != WCHAR_CS1) {
1240                         if((upper[current] & WCHAR_CSMASK) == WCHAR_CS2 && eucw3) {
1241                                 lower[next] = maxmin(WCHAR_CS3, 0);
1242                                 upper[next++] = maxmin(WCHAR_CS3, 1);
1243                         }
1244                         lower[next] = maxmin(WCHAR_CS1, 0);
1245                         upper[next++] = maxmin(WCHAR_CS1, 1);
1246                 }
1247                 for(j = current + 1; j < next; j++) {
1248                         lower[i] = lower[j];
1249                         upper[i++] = upper[j];
1250                 }
1251                 current = i - 1;
1252         }
1253         return(current);
1254 }
1255 
1256 int
1257 compare(wchar_t *c, wchar_t *d)
1258 {
1259         if(*c < *d)
1260                 return -1;
1261         if(*c == *d)
1262                 return 0;
1263         return 1;
1264 }
1265 
1266 wchar_t
1267 maxmin(wchar_t c, int flag)
1268 {
1269         static wchar_t minmax1[2], minmax2[2], minmax3[2];
1270 
1271         if(!minmax1[0]) {
1272                 /* compute min and max process codes for all code sets */
1273                 int length, i;
1274                 char multic[MB_LEN_MAX], minmax[2];
1275                 for(i = 0377; i >= 0200; i--)
1276                         if(!iscntrl(i))
1277                                 break;
1278                 minmax[1] = i;
1279                 for(i = 0240; i <= 0377; i++)
1280                         if(!iscntrl(i))
1281                                 break;
1282                 minmax[0] = i;
1283                 for(i = 0; i <= 1; i++) {
1284                         length = MB_LEN_MAX;
1285                         while(length--)
1286                                 multic[length] = minmax[i];
1287                         mbtowc(&minmax1[i], multic, MB_LEN_MAX);
1288                         if(eucw2) {
1289                                 multic[0] = SS2;
1290                                 mbtowc(&minmax2[i], multic, MB_LEN_MAX);
1291                         }
1292                         if(eucw3) {
1293                                 multic[0] = SS3;
1294                                 mbtowc(&minmax3[i], multic, MB_LEN_MAX);
1295                         }
1296                 }
1297         }
1298         switch(c) {
1299                 case WCHAR_CS1: return minmax1[flag];
1300                 case WCHAR_CS2: return minmax2[flag];
1301                 case WCHAR_CS3: return minmax3[flag];
1302         }
1303 
1304         /* NOTREACHED */
1305         return (0);
1306 }