1 %{
   2 /*
   3  * CDDL HEADER START
   4  *
   5  * The contents of this file are subject to the terms of the
   6  * Common Development and Distribution License, Version 1.0 only
   7  * (the "License").  You may not use this file except in compliance
   8  * with the License.
   9  *
  10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  11  * or http://www.opensolaris.org/os/licensing.
  12  * See the License for the specific language governing permissions
  13  * and limitations under the License.
  14  *
  15  * When distributing Covered Code, include this CDDL HEADER in each
  16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  17  * If applicable, add the following below this CDDL HEADER, with the
  18  * fields enclosed by brackets "[]" replaced with your own identifying
  19  * information: Portions Copyright [yyyy] [name of copyright owner]
  20  *
  21  * CDDL HEADER END
  22  */
  23 %}
  24 /*
  25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  30 /*        All Rights Reserved   */
  31 
  32 /*      Copyright (c) 1987, 1988 Microsoft Corporation  */
  33 /*        All Rights Reserved   */
  34 
  35 %{
  36 #pragma ident   "%Z%%M% %I%     %E% SMI"
  37 %}
  38 
  39 /*
  40  * egrep -- print lines containing (or not containing) a regular expression
  41  *
  42  *      status returns:
  43  *              0 - ok, and some matches
  44  *              1 - ok, but no matches
  45  *              2 - some error; matches irrelevant
  46  */
  47 %token CHAR MCHAR DOT MDOT CCL NCCL MCCL NMCCL OR CAT STAR PLUS QUEST
  48 %left OR
  49 %left CHAR MCHAR DOT CCL NCCL MCCL NMCCL '('
  50 %left CAT
  51 %left STAR PLUS QUEST
  52 
  53 %{
  54 #include <stdio.h>
  55 #include <ctype.h>
  56 #include <memory.h>
  57 #include <wchar.h>
  58 #include <wctype.h>
  59 #include <widec.h>
  60 #include <stdlib.h>
  61 #include <limits.h>
  62 #include <locale.h>
  63 
  64 #define STDIN_FILENAME gettext("(standard input)")
  65 
  66 #define BLKSIZE 512     /* size of reported disk blocks */
  67 #define EBUFSIZ 8192
  68 #define MAXLIN 350
  69 #define NCHARS 256
  70 #define MAXPOS 4000
  71 #define NSTATES 64
  72 #define FINAL -1
  73 #define RIGHT '\n'      /* serves as record separator and as $ */
  74 #define LEFT '\n'       /* beginning of line */
  75 int gotofn[NSTATES][NCHARS];
  76 int state[NSTATES];
  77 int out[NSTATES];
  78 int line  = 1;
  79 int *name;
  80 int *left;
  81 int *right;
  82 int *parent;
  83 int *foll;
  84 int *positions;
  85 char *chars;
  86 wchar_t *lower;
  87 wchar_t *upper;
  88 int maxlin, maxclin, maxwclin, maxpos;
  89 int nxtpos = 0;
  90 int inxtpos;
  91 int nxtchar = 0;
  92 int *tmpstat;
  93 int *initstat;
  94 int istat;
  95 int nstate = 1;
  96 int xstate;
  97 int count;
  98 int icount;
  99 char *input;
 100 
 101 
 102 wchar_t lyylval;
 103 wchar_t nextch();
 104 wchar_t maxmin();
 105 int compare();
 106 void overflo();
 107 
 108 char reinit = 0;
 109 
 110 long long lnum;
 111 int     bflag;
 112 int     cflag;
 113 int     eflag;
 114 int     fflag;
 115 int     Hflag;
 116 int     hflag;
 117 int     iflag;
 118 int     lflag;
 119 int     nflag;
 120 int     qflag;
 121 int     vflag;
 122 int     nfile;
 123 long long blkno;
 124 long long tln;
 125 int     nsucc;
 126 int     badbotch;
 127 extern  char *optarg;
 128 extern  int optind;
 129 
 130 int     f;
 131 FILE    *expfile;
 132 %}
 133 
 134 %%
 135 s:      t
 136                 { 
 137                   unary(FINAL, $1);
 138                   line--;
 139                 }
 140         ;
 141 t:      b r
 142                 { $$ = node(CAT, $1, $2); }
 143         | OR b r OR
 144                 { $$ = node(CAT, $2, $3); }
 145         | OR b r
 146                 { $$ = node(CAT, $2, $3); }
 147         | b r OR
 148                 { $$ = node(CAT, $1, $2); }
 149         ;
 150 b:
 151                 { /* if(multibyte)
 152                         $$ = mdotenter();
 153                   else */
 154                         $$ = enter(DOT);
 155                   $$ = unary(STAR, $$); 
 156                 }
 157         ;
 158 r:      CHAR
 159                 { $$ = iflag && isalpha($1) ?
 160                 node(OR, enter(tolower($1)), enter(toupper($1))) : enter($1); }
 161         | MCHAR
 162                 { $$ = (iflag && iswalpha(lyylval)) ?
 163                 node(OR, mchar(towlower(lyylval)), mchar(towupper(lyylval))) : 
 164                 mchar(lyylval); }
 165         | DOT
 166                 { if(multibyte)
 167                         $$ = mdotenter();
 168                   else
 169                         $$ = enter(DOT); 
 170                 }
 171         | CCL
 172                 { $$ = cclenter(CCL); }
 173         | NCCL
 174                 { $$ = cclenter(NCCL); }
 175         | MCCL
 176                 { $$ = ccl(CCL); }
 177         | NMCCL
 178                 { $$ = ccl(NCCL); }
 179         ;
 180 
 181 r:      r OR r
 182                 { $$ = node(OR, $1, $3); }
 183         | r r %prec CAT
 184                 { $$ = node(CAT, $1, $2); }
 185         | r STAR
 186                 { $$ = unary(STAR, $1); }
 187         | r PLUS
 188                 { $$ = unary(PLUS, $1); }
 189         | r QUEST
 190                 { $$ = unary(QUEST, $1); }
 191         | '(' r ')'
 192                 { $$ = $2; }
 193         | error 
 194         ;
 195 
 196 %%
 197 void    add(int *, int); 
 198 void    clearg(void);
 199 void    execute(char *);
 200 void    follow(int);
 201 int     mgetc(void);
 202 void    synerror(void);
 203 
 204 
 205 void
 206 yyerror(char *s)
 207 {
 208         fprintf(stderr, "egrep: %s\n", s);
 209         exit(2);
 210 }
 211 
 212 int
 213 yylex(void)
 214 {
 215         extern int yylval;
 216         int cclcnt, x, ccount, oldccount;
 217         wchar_t c, lc;
 218                 
 219         c = nextch();
 220         switch(c) {
 221                 case '^': 
 222                         yylval = LEFT;
 223                         return(CHAR);
 224                 case '$': 
 225                         c = RIGHT;
 226                         goto defchar;
 227                 case '|': return (OR);
 228                 case '*': return (STAR);
 229                 case '+': return (PLUS);
 230                 case '?': return (QUEST);
 231                 case '(': return (c);
 232                 case ')': return (c);
 233                 case '.': return(DOT);
 234                 case '\0': return (0);
 235                 case RIGHT: return (OR);
 236                 case '[': 
 237                         x = (multibyte ? MCCL : CCL);
 238                         cclcnt = 0;
 239                         count = nxtchar++;
 240                         if ((c = nextch()) == '^') {
 241                                 x = (multibyte ? NMCCL : NCCL);
 242                                 c = nextch();
 243                         }
 244                         lc = 0;
 245                         do {
 246                                 if (iflag && iswalpha(c))
 247                                         c = towlower(c);
 248                                 if (c == '\0') synerror();
 249                                 if (c == '-' && cclcnt > 0 && lc != 0) {
 250                                         if ((c = nextch()) != 0) {
 251                                                 if(c == ']') {
 252                                                         chars[nxtchar++] = '-';
 253                                                         cclcnt++;
 254                                                         break;
 255                                                 }
 256                                                 if (iflag && iswalpha(c))
 257                                                         c = towlower(c);
 258                                                 if (!multibyte ||
 259                                                 (c & WCHAR_CSMASK) == (lc & WCHAR_CSMASK) &&
 260                                                 lc < c &&
 261                                                 !iswcntrl(c) && !iswcntrl(lc)) {
 262                                                         if (nxtchar >= maxclin)
 263                                                                 if (allocchars() == 0)
 264                                                                         overflo();
 265                                                         chars[nxtchar++] = '-';
 266                                                         cclcnt++;
 267                                                 }
 268                                         }
 269                                 }
 270                                 ccount = oldccount = nxtchar;
 271                                 if(ccount + MB_LEN_MAX >= maxclin)
 272                                         if(allocchars() == 0)
 273                                                 overflo();
 274                                 ccount += wctomb(&chars[ccount], c);
 275                                 cclcnt += ccount - oldccount;
 276                                 nxtchar += ccount - oldccount;
 277                                 lc = c;
 278                         } while ((c = nextch()) != ']');
 279                         chars[count] = cclcnt;
 280                         return(x);
 281                 
 282                 case '\\':
 283                         if ((c = nextch()) == '\0') synerror();
 284                 defchar:
 285                 default:
 286                         if (c <= 0177) {
 287                                 yylval = c;
 288                                 return (CHAR);
 289                         } else {
 290                                 lyylval = c;
 291                                 return (MCHAR);
 292                         }
 293         }
 294 }
 295 
 296 wchar_t
 297 nextch(void)
 298 {
 299         wchar_t lc;
 300         char multic[MB_LEN_MAX];
 301         int length, d;
 302         if (fflag) {
 303                 if ((length = _mbftowc(multic, &lc, mgetc, &d)) < 0)
 304                         synerror();
 305                 if(length == 0)
 306                         lc = '\0';
 307         }
 308         else  {
 309                 if((length = mbtowc(&lc, input, MB_LEN_MAX)) == -1)
 310                         synerror();
 311                 if(length == 0)
 312                         return(0);
 313                 input += length;
 314         }
 315         return(lc);
 316 }
 317 
 318 int
 319 mgetc(void)
 320 {
 321         return(getc(expfile));
 322 }
 323         
 324 void
 325 synerror(void)
 326 {
 327         fprintf(stderr, gettext("egrep: syntax error\n"));
 328         exit(2);
 329 }
 330 
 331 int
 332 enter(int x)
 333 {
 334         if(line >= maxlin) 
 335                 if(alloctree() == 0)
 336                         overflo();
 337         name[line] = x;
 338         left[line] = 0;
 339         right[line] = 0;
 340         return(line++);
 341 }
 342 
 343 int
 344 cclenter(int x)
 345 {
 346         int linno;
 347         linno = enter(x);
 348         right[linno] = count;
 349         return (linno);
 350 }
 351 
 352 int
 353 node(int x, int l, int r)
 354 {
 355         if(line >= maxlin) 
 356                 if(alloctree() == 0)
 357                         overflo();
 358         name[line] = x;
 359         left[line] = l;
 360         right[line] = r;
 361         parent[l] = line;
 362         parent[r] = line;
 363         return(line++);
 364 }
 365 
 366 int
 367 unary(int x, int d)
 368 {
 369         if(line >= maxlin) 
 370                 if(alloctree() == 0)
 371                         overflo();
 372         name[line] = x;
 373         left[line] = d;
 374         right[line] = 0;
 375         parent[d] = line;
 376         return(line++);
 377 }
 378 
 379 int
 380 allocchars(void)
 381 {
 382         maxclin += MAXLIN;
 383         if((chars = realloc(chars, maxclin)) == (char *)0)
 384                 return 0;
 385         return 1;
 386 }
 387 
 388 int
 389 alloctree(void)
 390 {
 391         maxlin += MAXLIN;
 392         if((name = (int *)realloc(name, maxlin*sizeof(int))) == (int *)0)
 393                 return 0;
 394         if((left = (int *)realloc(left, maxlin*sizeof(int))) == (int *)0)
 395                 return 0;
 396         if((right = (int *)realloc(right, maxlin*sizeof(int))) == (int *)0)
 397                 return 0;
 398         if((parent = (int *)realloc(parent, maxlin*sizeof(int))) == (int *)0)
 399                 return 0;
 400         if((foll = (int *)realloc(foll, maxlin*sizeof(int))) == (int *)0)
 401                 return 0;
 402         if((tmpstat = (int *)realloc(tmpstat, maxlin*sizeof(int))) == (int *)0)
 403                 return 0;
 404         if((initstat = (int *)realloc(initstat, maxlin*sizeof(int))) == (int *)0)
 405                 return 0;
 406         return 1;
 407 }
 408 
 409 void
 410 overflo(void) 
 411 {
 412         fprintf(stderr, gettext("egrep: regular expression too long\n"));
 413         exit(2);
 414 }
 415 
 416 void
 417 cfoll(int v)
 418 {
 419         int i;
 420         if (left[v] == 0) {
 421                 count = 0;
 422                 for (i=1; i<=line; i++) tmpstat[i] = 0;
 423                 follow(v);
 424                 add(foll, v);
 425         }
 426         else if (right[v] == 0) cfoll(left[v]);
 427         else {
 428                 cfoll(left[v]);
 429                 cfoll(right[v]);
 430         }
 431 }
 432 
 433 void
 434 cgotofn(void)
 435 {
 436         int i;
 437         count = 0;
 438         inxtpos = nxtpos;
 439         for (i=3; i<=line; i++) tmpstat[i] = 0;
 440         if (cstate(line-1)==0) {
 441                 tmpstat[line] = 1;
 442                 count++;
 443                 out[1] = 1;
 444         }
 445         for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
 446         count--;                /*leave out position 1 */
 447         icount = count;
 448         tmpstat[1] = 0;
 449         add(state, 1);
 450         istat = nxtst(1, LEFT);
 451 }
 452 
 453 int
 454 nxtst(int s, int c)
 455 {
 456         int i, num, k;
 457         int pos, curpos, number, newpos;
 458         num = positions[state[s]];
 459         count = icount;
 460         for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
 461         pos = state[s] + 1;
 462         for (i=0; i<num; i++) {
 463                 curpos = positions[pos];
 464                 k = name[curpos];
 465                 if (k >= 0)
 466                         if (
 467                                 (k == c)
 468                                 || (k == DOT && dot(c))
 469                                 || (k == MDOT && mdot(c))
 470                                 || (k == CCL && dot(c) && member(c, right[curpos], 1))
 471                                 || (k == NCCL && dot(c) && member(c, right[curpos], 0))
 472                                 || (k == MCCL && mdot(c) && member(c, right[curpos], 1))
 473                         ) {
 474                                 number = positions[foll[curpos]];
 475                                 newpos = foll[curpos] + 1;
 476                                 for (k=0; k<number; k++) {
 477                                         if (tmpstat[positions[newpos]] != 1) {
 478                                                 tmpstat[positions[newpos]] = 1;
 479                                                 count++;
 480                                         }
 481                                         newpos++;
 482                                 }
 483                         }
 484                 pos++;
 485         }
 486         if (notin(nstate)) {
 487                 if (++nstate >= NSTATES) {
 488                         for (i=1; i<NSTATES; i++)
 489                                 out[i] = 0;
 490                         for (i=1; i<NSTATES; i++)
 491                                 for (k=0; k<NCHARS; k++)
 492                                         gotofn[i][k] = 0;
 493                         nstate = 1;
 494                         nxtpos = inxtpos;
 495                         reinit = 1;
 496                         add(state, nstate);
 497                         if (tmpstat[line] == 1) out[nstate] = 1;
 498                         return nstate;
 499                 }
 500                 add(state, nstate);
 501                 if (tmpstat[line] == 1) out[nstate] = 1;
 502                 gotofn[s][c] = nstate;
 503                 return nstate;
 504         }
 505         else {
 506                 gotofn[s][c] = xstate;
 507                 return xstate;
 508         }
 509 }
 510 
 511 
 512 int
 513 cstate(int v) 
 514 {
 515         int b;
 516         if (left[v] == 0) {
 517                 if (tmpstat[v] != 1) {
 518                         tmpstat[v] = 1;
 519                         count++;
 520                 }
 521                 return(1);
 522         }
 523         else if (right[v] == 0) {
 524                 if (cstate(left[v]) == 0) return (0);
 525                 else if (name[v] == PLUS) return (1);
 526                 else return (0);
 527         }
 528         else if (name[v] == CAT) {
 529                 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
 530                 else return (1);
 531         }
 532         else { /* name[v] == OR */
 533                 b = cstate(right[v]);
 534                 if (cstate(left[v]) == 0 || b == 0) return (0);
 535                 else return (1);
 536         }
 537 }
 538 
 539 
 540 int
 541 dot(int c)
 542 {
 543         if(multibyte && c >= 0200 && (!iscntrl(c) || c == SS2 && eucw2 || c == SS3 && eucw3))
 544                 return(0);
 545         if(c == RIGHT || c == LEFT)
 546                 return(0);
 547         return(1);
 548 }
 549 
 550 int
 551 mdot(int c)
 552 {
 553         if(c >= 0200 && !iscntrl(c))
 554                 return(1);
 555         return(0);
 556 }
 557 
 558 int
 559 member(int symb, int set, int torf) 
 560 {
 561         int i, num, pos, c, lc;
 562         if(symb == RIGHT || symb == LEFT)
 563                 return(0);
 564         num = chars[set];
 565         pos = set + 1;
 566         lc = 0;
 567         if(iflag) 
 568                 symb = tolower(symb);
 569         for (i=0; i<num; i++) {
 570                 c = (unsigned char)chars[pos++];
 571                 if(c == '-' && lc != 0 && ++i < num) {
 572                         c = (unsigned char)chars[pos++];
 573                         if(lc <= symb && symb <= c)
 574                                 return(torf);
 575                 }
 576                 if (symb == c)
 577                         return (torf);
 578                 lc = c;
 579         }
 580         return(!torf);
 581 }
 582 
 583 int
 584 notin(int n)
 585 {
 586         int i, j, pos;
 587         for (i=1; i<=n; i++) {
 588                 if (positions[state[i]] == count) {
 589                         pos = state[i] + 1;
 590                         for (j=0; j < count; j++)
 591                                 if (tmpstat[positions[pos++]] != 1) goto nxt;
 592                         xstate = i;
 593                         return (0);
 594                 }
 595                 nxt: ;
 596         }
 597         return (1);
 598 }
 599 
 600 void
 601 add(int *array, int n) 
 602 {
 603         int i;
 604         if (nxtpos + count >= maxpos) { 
 605                 maxpos += MAXPOS + count;
 606                 if((positions = (int *)realloc(positions, maxpos *sizeof(int))) == (int *)0)
 607                         overflo();
 608         }
 609         array[n] = nxtpos;
 610         positions[nxtpos++] = count;
 611         for (i=3; i <= line; i++) {
 612                 if (tmpstat[i] == 1) {
 613                         positions[nxtpos++] = i;
 614                 }
 615         }
 616 }
 617 
 618 void
 619 follow(int v) 
 620 {
 621         int p;
 622         if (v == line) return;
 623         p = parent[v];
 624         switch(name[p]) {
 625                 case STAR:
 626                 case PLUS:      cstate(v);
 627                                 follow(p);
 628                                 return;
 629 
 630                 case OR:
 631                 case QUEST:     follow(p);
 632                                 return;
 633 
 634                 case CAT:       if (v == left[p]) {
 635                                         if (cstate(right[p]) == 0) {
 636                                                 follow(p);
 637                                                 return;
 638                                         }
 639                                 }
 640                                 else follow(p);
 641                                 return;
 642                 case FINAL:     if (tmpstat[line] != 1) {
 643                                         tmpstat[line] = 1;
 644                                         count++;
 645                                 }
 646                                 return;
 647         }
 648 }
 649 
 650 #define USAGE "[ -bchilnsv ] [ -e exp ] [ -f file ] [ strings ] [ file ] ..." 
 651 
 652 int
 653 main(int argc, char **argv)
 654 {
 655         char c;
 656         char nl = '\n';
 657         int errflag = 0;
 658         
 659         (void)setlocale(LC_ALL, "");
 660 
 661 #if !defined(TEXT_DOMAIN)               /* Should be defined by cc -D */
 662         #define TEXT_DOMAIN "SYS_TEST"  /* Use this only if it weren't. */
 663 #endif
 664         (void) textdomain(TEXT_DOMAIN);
 665 
 666         while((c = getopt(argc, argv, "ybcie:f:Hhlnvs")) != -1)
 667                 switch(c) {
 668 
 669                 case 'b':
 670                         bflag++;
 671                         continue;
 672 
 673                 case 'c':
 674                         cflag++;
 675                         continue;
 676 
 677                 case 'e':
 678                         eflag++;
 679                         input = optarg;
 680                         continue;
 681 
 682                 case 'f':
 683                         fflag++;
 684                         expfile = fopen(optarg, "r");
 685                         if(expfile == NULL) {
 686                                 fprintf(stderr, 
 687                                   gettext("egrep: can't open %s\n"), optarg);
 688                                 exit(2);
 689                         }
 690                         continue;
 691 
 692                 case 'H':
 693                         if (!lflag) /* H is excluded by l as in GNU grep */
 694                                 Hflag++;
 695                         hflag = 0; /* H excludes h */
 696                         continue;
 697 
 698                 case 'h':
 699                         hflag++;
 700                         Hflag = 0; /* h excludes H */
 701                         continue;
 702 
 703                 case 'y':
 704                 case 'i':
 705                         iflag++;
 706                         continue;
 707 
 708                 case 'l':
 709                         lflag++;
 710                         Hflag = 0; /* l excludes H */
 711                         continue;
 712 
 713                 case 'n':
 714                         nflag++;
 715                         continue;
 716 
 717                 case 'q':
 718                 case 's': /* Solaris: legacy option */
 719                         qflag++;
 720                         continue;
 721 
 722                 case 'v':
 723                         vflag++;
 724                         continue;
 725 
 726                 case '?':
 727                         errflag++;
 728                 }
 729         if (errflag || ((argc <= 0) && !fflag && !eflag)) {
 730                 fprintf(stderr, gettext("usage: egrep %s\n"), gettext(USAGE));
 731                 exit(2);
 732         }
 733         if(!eflag && !fflag) {
 734                 input = argv[optind];
 735                 optind++;
 736         }
 737 
 738         argc -= optind;
 739         argv = &argv[optind];
 740         
 741         /* allocate initial space for arrays */
 742         if((name = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 743                 overflo();
 744         if((left = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 745                 overflo();
 746         if((right = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 747                 overflo();
 748         if((parent = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 749                 overflo();
 750         if((foll = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 751                 overflo();
 752         if((tmpstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 753                 overflo();
 754         if((initstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
 755                 overflo();
 756         if((chars = (char *)malloc(MAXLIN)) == (char *)0)
 757                 overflo();
 758         if((lower = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
 759                 overflo();
 760         if((upper = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
 761                 overflo();
 762         if((positions = (int *)malloc(MAXPOS*sizeof(int))) == (int *)0)
 763                 overflo();
 764         maxlin = MAXLIN;
 765         maxclin = MAXLIN;
 766         maxwclin = MAXLIN;
 767         maxpos = MAXPOS;
 768                 
 769         yyparse();
 770 
 771         cfoll(line-1);
 772         cgotofn();
 773         nfile = argc;
 774         if (argc<=0) {
 775                 execute(0);
 776         }
 777         else while (--argc >= 0) {
 778                 if (reinit == 1) clearg();
 779                 execute(*argv++);
 780         }
 781         return (badbotch ? 2 : nsucc==0);
 782 }
 783 
 784 void
 785 execute(char *file)
 786 {
 787         char *p;
 788         int cstat;
 789         wchar_t c;
 790         int t;
 791         long count;
 792         long count1, count2;
 793         long nchars;
 794         int succ;
 795         char *ptr, *ptrend, *lastptr;
 796         char *buf;
 797         long lBufSiz;
 798         FILE *f;
 799         int nlflag;
 800 
 801         lBufSiz = EBUFSIZ;
 802         if ((buf = malloc (lBufSiz + EBUFSIZ)) == NULL) {
 803                 exit (2); /* out of memory - BAIL */
 804         }
 805 
 806         if (file) {
 807                 if ((f = fopen(file, "r")) == NULL) {
 808                         fprintf(stderr, 
 809                                 gettext("egrep: can't open %s\n"), file);
 810                         badbotch=1;
 811                         return;
 812                 }
 813         } else {
 814                 f = stdin;
 815                 file = STDIN_FILENAME;
 816         }
 817         lnum = 1;
 818         tln = 0;
 819         if((count = read(fileno(f), buf, EBUFSIZ)) <= 0) {
 820                 fclose(f);
 821 
 822                 if (cflag && !qflag) {
 823                         if (Hflag || (nfile > 1 && !hflag))
 824                                 fprintf(stdout, "%s:", file);
 825                         fprintf(stdout, "%lld\n", tln);
 826                 }
 827                 return;
 828         }
 829 
 830         blkno = count;
 831         ptr = buf;
 832         for(;;) {
 833                 if((ptrend = memchr(ptr, '\n', buf + count - ptr)) == NULL) {
 834                         /* 
 835                                 move the unused partial record to the head of the buffer 
 836                         */
 837                         if (ptr > buf) {
 838                                 count = buf + count - ptr;
 839                                 memmove (buf, ptr, count);
 840                                 ptr = buf;
 841                         }
 842 
 843                         /*
 844                                 Get a bigger buffer if this one is full
 845                         */
 846                         if(count > lBufSiz) {
 847                                 /*
 848                                         expand the buffer       
 849                                 */
 850                                 lBufSiz += EBUFSIZ;
 851                                 if ((buf = realloc (buf, lBufSiz + EBUFSIZ)) == NULL) {
 852                                         exit (2); /* out of memory - BAIL */
 853                                 }
 854 
 855                                 ptr = buf;
 856                         }
 857 
 858                         p = buf + count;
 859                         if((count1 = read(fileno(f), p, EBUFSIZ)) > 0) {
 860                                 count += count1;
 861                                 blkno += count1;
 862                                 continue;
 863                         }
 864                         ptrend = ptr + count;
 865                         nlflag = 0;
 866                 } else
 867                         nlflag = 1;
 868                 *ptrend = '\n';
 869                 p = ptr;
 870                 lastptr = ptr;
 871                 cstat = istat;
 872                 succ = 0;
 873                 for(;;) {
 874                         if(out[cstat]) {
 875                                 if(multibyte && p > ptr) {
 876                                         wchar_t wchar;
 877                                         int length;
 878                                         char *endptr = p;
 879                                         p = lastptr;
 880                                         while(p < endptr) {
 881                                                 length = mbtowc(&wchar, p, MB_LEN_MAX);
 882                                                 if(length <= 1)
 883                                                         p++;
 884                                                 else
 885                                                         p += length;
 886                                         }
 887                                         if(p == endptr) {
 888                                                 succ = !vflag;
 889                                                 break;
 890                                         }
 891                                         cstat = 1;
 892                                         length = mbtowc(&wchar, lastptr, MB_LEN_MAX);
 893                                         if(length <= 1)
 894                                                 lastptr++;
 895                                         else
 896                                                 lastptr += length;
 897                                         p = lastptr;
 898                                         continue;
 899                                 }
 900                                 succ = !vflag;
 901                                 break;
 902                         }
 903                         c = (unsigned char)*p++;
 904                         if ((t = gotofn[cstat][c]) == 0)
 905                                 cstat = nxtst(cstat, c);
 906                         else
 907                                 cstat = t;
 908                         if(c == RIGHT) {
 909                                 if(out[cstat]) {
 910                                         succ = !vflag;
 911                                         break;
 912                                 }
 913                                 succ = vflag;
 914                                 break;
 915                         }
 916                 }
 917                 if (succ) {
 918                         nsucc = 1;
 919                         if (lflag || qflag) {
 920                                 if (!qflag)
 921                                         (void) printf("%s\n", file);
 922                                 fclose(f);
 923                                 return;
 924                         }
 925                         if (cflag) {
 926                                 tln++;
 927                         } else {
 928                                 if (Hflag || (nfile > 1 && !hflag))
 929                                         printf("%s:", file);
 930                                 if (bflag) {
 931                                         nchars = blkno - (buf + count - ptrend) - 2;
 932                                         if(nlflag)
 933                                                 nchars++;
 934                                         printf("%lld:", nchars/BLKSIZE);
 935                                 }
 936                                 if (nflag) 
 937                                         printf("%lld:", lnum);
 938                                 if(nlflag)
 939                                         nchars = ptrend - ptr + 1;
 940                                 else
 941                                         nchars = ptrend - ptr;
 942                                 fwrite(ptr, (size_t)1, (size_t)nchars, stdout);
 943                         }
 944                 }
 945                 if(!nlflag)
 946                         break;
 947                 ptr = ptrend + 1;
 948                 if(ptr >= buf + count) {
 949                         ptr = buf;
 950                         if((count = read(fileno(f), buf, EBUFSIZ)) <= 0)
 951                                 break;
 952                         blkno += count;
 953                 }
 954                 lnum++;
 955                 if (reinit == 1) 
 956                         clearg();
 957         }
 958         fclose(f);
 959         if (cflag && !qflag) {
 960                 if (Hflag || (nfile > 1 && !hflag))
 961                         printf("%s:", file);
 962                 printf("%lld\n", tln);
 963         }
 964 }
 965 
 966 void
 967 clearg(void)
 968 {
 969         int i, k;
 970         for (i=1; i<=nstate; i++)
 971                 out[i] = 0;
 972         for (i=1; i<=nstate; i++)
 973                 for (k=0; k<NCHARS; k++)
 974                         gotofn[i][k] = 0;
 975         nstate = 1;
 976         nxtpos = inxtpos;
 977         reinit = 0;
 978         count = 0;
 979         for (i=3; i<=line; i++) tmpstat[i] = 0;
 980         if (cstate(line-1)==0) {
 981                 tmpstat[line] = 1;
 982                 count++;
 983                 out[1] = 1;
 984         }
 985         for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
 986         count--;                /*leave out position 1 */
 987         icount = count;
 988         tmpstat[1] = 0;
 989         add(state, 1);
 990         istat = nxtst(1, LEFT);
 991 }
 992 
 993 int
 994 mdotenter(void)
 995 {
 996         int i, x1, x2;
 997         x1 = enter(DOT);
 998         x2 = enter(MDOT);
 999         for(i = 1; i < (int) eucw1; i++) 
1000                 x2 = node(CAT, x2, enter(MDOT));
1001         x1 = node(OR, x1, x2);
1002         if(eucw2) {
1003                 x2 = enter('\216');
1004                 for(i = 1; i <= (int) eucw2; i++) 
1005                         x2 = node(CAT, x2, enter(MDOT));
1006                 x1 = node(OR, x1, x2);
1007         }
1008         if(eucw3) {
1009                 x2 = enter('\217');
1010                 for(i = 1; i <= (int) eucw3; i++) 
1011                         x2 = node(CAT, x2, enter(MDOT));
1012                 x1 = node(OR, x1, x2);
1013         }
1014         return(x1);
1015 }
1016 
1017 int
1018 mchar(wchar_t c)
1019 {
1020         char multichar[MB_LEN_MAX+1];
1021         char *p;
1022         int x1, lc, length;
1023         
1024         length = wctomb(multichar, c);
1025         p = multichar;
1026         *(p + length) = '\0';
1027         x1 = enter((unsigned char)*p++);
1028         while(lc = (unsigned char)*p++)
1029                 x1 = node(CAT, x1, enter(lc));
1030         return(x1);
1031 }
1032 
1033 int
1034 ccl(int type) 
1035 {
1036         wchar_t c, lc;
1037         char multic1[MB_LEN_MAX];
1038         char multic2[MB_LEN_MAX];
1039         int x1, x2, length, current, last, cclcnt;
1040         x2 = 0;
1041         current = 0;
1042         last = genrange(type);
1043         nxtchar = count + 1;
1044         cclcnt = 0;
1045         /* create usual character class for single byte characters */
1046         while(current <= last && (isascii(c = lower[current]) || c <= 0377 && iscntrl(c))) {
1047                 cclcnt++;
1048                 chars[nxtchar++] = c;
1049                 if(lower[current] != upper[current]) {
1050                         chars[nxtchar++] = '-';
1051                         chars[nxtchar++] = upper[current];
1052                         cclcnt += 2;
1053                 }
1054                 current++;
1055         }
1056         
1057         if(cclcnt)
1058                 chars[count] = cclcnt;
1059         else
1060                 nxtchar = count;
1061         if(current > 0)
1062                 /* single byte part of character class */
1063                 x2 = cclenter(type);
1064         else if(type == NCCL)
1065                 /* all single byte characters match */
1066                 x2 = enter(DOT);
1067         while(current <= last) {
1068                 if(upper[current] == lower[current]) 
1069                         x1 = mchar(lower[current]);
1070                 else {
1071                         length = wctomb(multic1, lower[current]);
1072                         wctomb(multic2, upper[current]);
1073                         x1 = range((unsigned char *)multic1,
1074                             (unsigned char *)multic2, length);
1075                 }
1076                 if(x2)
1077                         x2 = node(OR, x2, x1);
1078                 else
1079                         x2 = x1;
1080                 current++;
1081         }
1082         return x2;
1083 }
1084 
1085 int
1086 range(unsigned char *p1, unsigned char *p2, int length)
1087 {
1088         char multic[MB_LEN_MAX+1];
1089         char *p;
1090         int i, x1, x2;
1091         if(length == 1)
1092                 return(classenter(*p1, *p2));
1093         if(p1[0] == p2[0]) 
1094                 return(node(CAT, enter(p1[0]), range(p1+1, p2+1, length - 1)));         
1095         p = multic;
1096         for(i = 1; i < length; i++)
1097                 *p++ = 0377;
1098         x1 = node(CAT, enter(p1[0]),
1099             range(p1+1, (unsigned char *)multic, length - 1));
1100         if((unsigned char)(p1[0] + 1) < p2[0]) {
1101                 x2 = classenter(p1[0] + 1, p2[0] - 1);
1102                 for(i = 1; i < length; i++)
1103                         x2 = node(CAT, x2, enter(MDOT));
1104                 x1 = node(OR, x1, x2);
1105         }
1106         p = multic;
1107         for(i = 1; i < length; i++) 
1108                 *p++ = 0200;
1109         x2 = node(CAT, enter(p2[0]),
1110             range((unsigned char *)multic, p2+1, length - 1));
1111         return node(OR, x1, x2);
1112 }
1113 
1114 int
1115 classenter(int x1, int x2)
1116 {
1117         static int max, min;
1118         if(!max) {
1119                 int i;
1120                 for(i = 0200; i <= 0377; i++)
1121                         if(!iscntrl(i))
1122                                 break;
1123                 min = i;
1124                 for(i = 0377; i >= 0200; i--)
1125                         if(!iscntrl(i))
1126                                 break;
1127                 max = i;
1128         }
1129         if(x1 <= min && x2 >= max)
1130                 return enter(MDOT);
1131         if(nxtchar + 4 >= maxclin)
1132                 if(allocchars() == 0)   
1133                         overflo();
1134         count = nxtchar++;
1135         chars[nxtchar++] = x1;
1136         chars[nxtchar++] = '-';
1137         chars[nxtchar++] = x2;
1138         chars[count] = 3;
1139         return cclenter(MCCL);
1140 }
1141 
1142 int
1143 genrange(int type)
1144 {
1145         char *p, *endp;
1146         int current, nel, i, last, length;
1147         wchar_t c, lc;
1148 
1149         current = 0;
1150         p = &chars[count+1];
1151         endp = &chars[count+1] + chars[count];
1152         lc = 0;
1153                 
1154         /* convert character class into union of ranges */
1155         while(p < endp) {
1156                 length = mbtowc(&c, p, MB_LEN_MAX);
1157                 p += length;
1158                 if(c == '-' && lc != 0) {
1159                         length = mbtowc(&c, p, MB_LEN_MAX);
1160                         upper[current-1] = c;
1161                         p += length;
1162                 } else {
1163                         lower[current] = c;
1164                         upper[current++] = c;
1165                 }
1166                 lc = c;
1167         }
1168         nel = current;
1169         /* sort lower and upper bounds of ranges */
1170         qsort((char *)lower, nel, sizeof(wchar_t), compare);
1171         qsort((char *)upper, nel, sizeof(wchar_t), compare);
1172         last = current - 1;
1173         current = 0;
1174         /* combine overlapping or adjacent ranges */
1175         for(i = 0; i < last; i++)
1176                 if(upper[i] >= lower[i+1] - 1)
1177                         upper[current] = upper[i+1];
1178                 else {
1179                         lower[++current] = lower[i+1];
1180                         upper[current] = upper[i+1];
1181                 }
1182         if(type == NCCL) {
1183                 /* find complement of character class */
1184                 int j, next;
1185                 i = 0;
1186                 while(i <= current && isascii(c=lower[i]) || c <= 0377 && iscntrl(c))
1187                         i++;
1188                 if(i > current) {
1189                         /* match all multibyte characters */
1190                         if(eucw2) {
1191                                 lower[i] = maxmin(WCHAR_CS2, 0);
1192                                 upper[i++] = maxmin(WCHAR_CS2, 1);
1193                         }
1194                         if(eucw3) {
1195                                 lower[i] = maxmin(WCHAR_CS3, 0);
1196                                 upper[i++] = maxmin(WCHAR_CS3, 1);
1197                         }
1198                         lower[i] = maxmin(WCHAR_CS1, 0);
1199                         upper[i++] = maxmin(WCHAR_CS1, 1);
1200                         return i - 1;
1201                 }
1202                 next = current + 1;
1203                 if(next + current + 2 >= maxwclin) {
1204                         maxwclin += MAXLIN + next + current + 2;
1205                         if((lower = (wchar_t *)realloc(lower, maxwclin *sizeof(wchar_t))) == (wchar_t *)0 ||
1206                            (upper = (wchar_t *)realloc(upper, maxwclin * sizeof(wchar_t))) == (wchar_t *)0)
1207                                 overflo();
1208                 }
1209                 if(eucw2 && lower[i] > maxmin(WCHAR_CS2, 0)) {
1210                         lower[next] = maxmin(WCHAR_CS2, 0);
1211                         if((lower[i] & WCHAR_CSMASK) != WCHAR_CS2) {
1212                                 upper[next++] = maxmin(WCHAR_CS2, 1);
1213                                 if((lower[i] & WCHAR_CSMASK) == WCHAR_CS1 && eucw3) {
1214                                         lower[next] = maxmin(WCHAR_CS3, 0);
1215                                         upper[next++] = maxmin(WCHAR_CS3, 1);
1216                                 }
1217                                 if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1218                                         lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1219                                         upper[next++] = lower[i] - 1;
1220                                 }
1221                         } else
1222                                 upper[next++] = lower[i] - 1;
1223                 } else if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1224                         lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1225                         upper[next++] = lower[i] - 1;
1226                 }
1227                 for(j = i; j < current; j++) {
1228                         if(upper[j] < maxmin(upper[j] & WCHAR_CSMASK, 1)) {
1229                                 lower[next] = upper[j] + 1;
1230                                 if((upper[j] & WCHAR_CSMASK) != (lower[j+1] & WCHAR_CSMASK)) {
1231                                         upper[next++] = maxmin(upper[j] & WCHAR_CSMASK, 1);
1232                                         if(eucw3 && (upper[j] & WCHAR_CSMASK) == WCHAR_CS2 && (lower[j+1] & WCHAR_CSMASK) == WCHAR_CS1) {
1233                                                 lower[next] = maxmin(WCHAR_CS3, 0);
1234                                                 upper[next++] = maxmin(WCHAR_CS3, 1);
1235                                         }
1236                                         if(lower[j+1] > maxmin(lower[j+1] & WCHAR_CSMASK, 0)) {
1237                                                 lower[next] = maxmin(lower[j+1] & WCHAR_CSMASK, 0);
1238                                                 upper[next++] = lower[j+1] - 1;
1239                                         }
1240                                 } else
1241                                         upper[next++] = lower[j+1] - 1;
1242                         } else if(lower[j+1] > maxmin(lower[j+1], 0)) {
1243                                 lower[next] = maxmin(lower[j+1], 0);
1244                                 upper[next++] = lower[j+1] - 1;
1245                         }
1246                 }
1247                 if(upper[current] < maxmin(upper[current] & WCHAR_CSMASK, 1)) {
1248                         lower[next] = upper[current] + 1;
1249                         upper[next++] = maxmin(upper[current] & WCHAR_CSMASK, 1); 
1250                 }
1251                 if((upper[current] & WCHAR_CSMASK) != WCHAR_CS1) {
1252                         if((upper[current] & WCHAR_CSMASK) == WCHAR_CS2 && eucw3) {
1253                                 lower[next] = maxmin(WCHAR_CS3, 0);
1254                                 upper[next++] = maxmin(WCHAR_CS3, 1);
1255                         }
1256                         lower[next] = maxmin(WCHAR_CS1, 0);
1257                         upper[next++] = maxmin(WCHAR_CS1, 1);
1258                 }
1259                 for(j = current + 1; j < next; j++) {
1260                         lower[i] = lower[j];
1261                         upper[i++] = upper[j];
1262                 }
1263                 current = i - 1;
1264         }
1265         return(current);
1266 }
1267 
1268 int
1269 compare(wchar_t *c, wchar_t *d)
1270 {
1271         if(*c < *d)
1272                 return -1;
1273         if(*c == *d)
1274                 return 0;
1275         return 1;
1276 }
1277 
1278 wchar_t
1279 maxmin(wchar_t c, int flag)
1280 {
1281         static wchar_t minmax1[2], minmax2[2], minmax3[2];
1282 
1283         if(!minmax1[0]) {
1284                 /* compute min and max process codes for all code sets */
1285                 int length, i;
1286                 char multic[MB_LEN_MAX], minmax[2];
1287                 for(i = 0377; i >= 0200; i--)
1288                         if(!iscntrl(i))
1289                                 break;
1290                 minmax[1] = i;
1291                 for(i = 0240; i <= 0377; i++)
1292                         if(!iscntrl(i))
1293                                 break;
1294                 minmax[0] = i;
1295                 for(i = 0; i <= 1; i++) {
1296                         length = MB_LEN_MAX;
1297                         while(length--)
1298                                 multic[length] = minmax[i];
1299                         mbtowc(&minmax1[i], multic, MB_LEN_MAX);
1300                         if(eucw2) {
1301                                 multic[0] = SS2;
1302                                 mbtowc(&minmax2[i], multic, MB_LEN_MAX);
1303                         }
1304                         if(eucw3) {
1305                                 multic[0] = SS3;
1306                                 mbtowc(&minmax3[i], multic, MB_LEN_MAX);
1307                         }
1308                 }
1309         }
1310         switch(c) {
1311                 case WCHAR_CS1: return minmax1[flag];
1312                 case WCHAR_CS2: return minmax2[flag];
1313                 case WCHAR_CS3: return minmax3[flag];
1314         }
1315 
1316         /* NOTREACHED */
1317         return (0);
1318 }