1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 /*      Copyright (c) 1987, 1988 Microsoft Corporation  */
  31 /*        All Rights Reserved   */
  32 
  33 #pragma ident   "%Z%%M% %I%     %E% SMI"
  34 
  35 /*
  36  * fgrep -- print all lines containing any of a set of keywords
  37  *
  38  *      status returns:
  39  *              0 - ok, and some matches
  40  *              1 - ok, but no matches
  41  *              2 - some error
  42  */
  43 
  44 #include <stdio.h>
  45 #include <ctype.h>
  46 #include <sys/types.h>
  47 #include <stdlib.h>
  48 #include <string.h>
  49 #include <locale.h>
  50 #include <libintl.h>
  51 #include <euc.h>
  52 #include <sys/stat.h>
  53 #include <fcntl.h>
  54 
  55 #include <getwidth.h>
  56 
  57 eucwidth_t WW;
  58 #define WIDTH1  WW._eucw1
  59 #define WIDTH2  WW._eucw2
  60 #define WIDTH3  WW._eucw3
  61 #define MULTI_BYTE      WW._multibyte
  62 #define GETONE(lc, p) \
  63         cw = ISASCII(lc = (unsigned char)*p++) ? 1 :     \
  64                 (ISSET2(lc) ? WIDTH2 :                       \
  65                 (ISSET3(lc) ? WIDTH3 : WIDTH1));             \
  66         if (--cw > --ccount) {                           \
  67                 cw -= ccount;                                \
  68                 while (ccount--)                             \
  69                         lc = (lc << 7) | ((*p++) & 0177);        \
  70                         if (p >= &buf[fw_lBufsiz + BUFSIZ]) {    \
  71                         if (nlp == buf) {                        \
  72                                 /* Increase the buffer size */       \
  73                                 fw_lBufsiz += BUFSIZ;                \
  74                                 if ((buf = realloc(buf,              \
  75                                         fw_lBufsiz + BUFSIZ)) == NULL) { \
  76                                         exit(2); /* out of memory */     \
  77                                 }                                    \
  78                                 nlp = buf;                           \
  79                                 p = &buf[fw_lBufsiz];                \
  80                         } else {                                 \
  81                                 /* shift the buffer contents down */ \
  82                                 (void) memmove(buf, nlp,             \
  83                                         &buf[fw_lBufsiz + BUFSIZ] - nlp);\
  84                                 p -= nlp - buf;                      \
  85                                 nlp = buf;                           \
  86                         }                                        \
  87                 }                                            \
  88                 if (p > &buf[fw_lBufsiz]) {                  \
  89                         if ((ccount = fread(p, sizeof (char),    \
  90                             &buf[fw_lBufsiz + BUFSIZ] - p, fptr))\
  91                                 <= 0) break;                         \
  92                 } else if ((ccount = fread(p,                \
  93                         sizeof (char),  BUFSIZ, fptr)) <= 0)     \
  94                         break;                                   \
  95                 blkno += (long long)ccount;                  \
  96         }                                                \
  97         ccount -= cw;                                    \
  98         while (cw--)                                     \
  99                 lc = (lc << 7) | ((*p++) & 0177)
 100 
 101 /*
 102  * The same() macro and letter() function were inserted to allow for
 103  * the -i option work for the multi-byte environment.
 104  */
 105 wchar_t letter();
 106 #define same(a, b) \
 107         (a == b || iflag && (!MULTI_BYTE || ISASCII(a)) && (a ^ b) == ' ' && \
 108         letter(a) == letter(b))
 109 
 110 
 111 #define QSIZE 400
 112 struct words {
 113         wchar_t inp;
 114         char    out;
 115         struct  words *nst;
 116         struct  words *link;
 117         struct  words *fail;
 118 } *w = NULL, *smax, *q;
 119 
 120 FILE *fptr;
 121 long long lnum;
 122 int     bflag, cflag, lflag, fflag, nflag, vflag, xflag, eflag, sflag;
 123 int     hflag, iflag;
 124 int     retcode = 0;
 125 int     nfile;
 126 long long blkno;
 127 int     nsucc;
 128 long long tln;
 129 FILE    *wordf;
 130 char    *argptr;
 131 off_t input_size = 0;
 132 
 133 void    execute(char *);
 134 void    cgotofn(void);
 135 void    overflo(void);
 136 void    cfail(void);
 137 
 138 static long fw_lBufsiz = 0;
 139 
 140 int
 141 main(int argc, char **argv)
 142 {
 143         int c;
 144         int errflg = 0;
 145         struct stat file_stat;
 146 
 147         (void) setlocale(LC_ALL, "");
 148 #if !defined(TEXT_DOMAIN)       /* Should be defined by cc -D */
 149 #define TEXT_DOMAIN "SYS_TEST"  /* Use this only if it weren't */
 150 #endif
 151         (void) textdomain(TEXT_DOMAIN);
 152 
 153         while ((c = getopt(argc, argv, "hybcie:f:lnvxs")) != EOF)
 154                 switch (c) {
 155 
 156                 case 's':
 157                         sflag++;
 158                         continue;
 159                 case 'h':
 160                         hflag++;
 161                         continue;
 162                 case 'b':
 163                         bflag++;
 164                         continue;
 165 
 166                 case 'i':
 167                 case 'y':
 168                         iflag++;
 169                         continue;
 170 
 171                 case 'c':
 172                         cflag++;
 173                         continue;
 174 
 175                 case 'e':
 176                         eflag++;
 177                         argptr = optarg;
 178                         input_size = strlen(argptr);
 179                         continue;
 180 
 181                 case 'f':
 182                         fflag++;
 183                         wordf = fopen(optarg, "r");
 184                         if (wordf == NULL) {
 185                                 (void) fprintf(stderr,
 186                                         gettext("fgrep: can't open %s\n"),
 187                                         optarg);
 188                                 exit(2);
 189                         }
 190 
 191                         if (fstat(fileno(wordf), &file_stat) == 0) {
 192                             input_size = file_stat.st_size;
 193                         } else {
 194                                 (void) fprintf(stderr,
 195                                         gettext("fgrep: can't fstat %s\n"),
 196                                         optarg);
 197                                 exit(2);
 198                         }
 199 
 200                         continue;
 201 
 202                 case 'l':
 203                         lflag++;
 204                         continue;
 205 
 206                 case 'n':
 207                         nflag++;
 208                         continue;
 209 
 210                 case 'v':
 211                         vflag++;
 212                         continue;
 213 
 214                 case 'x':
 215                         xflag++;
 216                         continue;
 217 
 218                 case '?':
 219                         errflg++;
 220         }
 221 
 222         argc -= optind;
 223         if (errflg || ((argc <= 0) && !fflag && !eflag)) {
 224                 (void) printf(gettext("usage: fgrep [ -bchilnsvx ] "
 225                         "[ -e exp ] [ -f file ] [ strings ] [ file ] ...\n"));
 226                 exit(2);
 227         }
 228         if (!eflag && !fflag) {
 229                 argptr = argv[optind];
 230                 input_size = strlen(argptr);
 231                 input_size++;
 232                 optind++;
 233                 argc--;
 234         }
 235 
 236 /*
 237  * Normally we need one struct words for each letter in the pattern
 238  * plus one terminating struct words with outp = 1, but when -x option
 239  * is specified we require one more struct words for `\n` character so we
 240  * calculate the input_size as below. We add extra 1 because
 241  * (input_size/2) rounds off odd numbers
 242  */
 243 
 244         if (xflag) {
 245                 input_size = input_size + (input_size/2) + 1;
 246         }
 247 
 248         input_size++;
 249 
 250         w = (struct words *)calloc(input_size, sizeof (struct words));
 251         if (w == NULL) {
 252                 (void) fprintf(stderr,
 253                         gettext("fgrep: could not allocate "
 254                                 "memory for wordlist\n"));
 255                 exit(2);
 256         }
 257 
 258         getwidth(&WW);
 259         if ((WIDTH1 == 0) && (WIDTH2 == 0) &&
 260                 (WIDTH3 == 0)) {
 261                 /*
 262                  * If non EUC-based locale,
 263                  * assume WIDTH1 is 1.
 264                  */
 265                 WIDTH1 = 1;
 266         }
 267         WIDTH2++;
 268         WIDTH3++;
 269 
 270         cgotofn();
 271         cfail();
 272         nfile = argc;
 273         argv = &argv[optind];
 274         if (argc <= 0) {
 275                 execute((char *)NULL);
 276         } else
 277                 while (--argc >= 0) {
 278                         execute(*argv);
 279                         argv++;
 280                 }
 281 
 282         if (w != NULL) {
 283                 free(w);
 284         }
 285 
 286         return (retcode != 0 ? retcode : nsucc == 0);
 287 }
 288 
 289 void
 290 execute(char *file)
 291 {
 292         char *p;
 293         struct words *c;
 294         int ccount;
 295         static char *buf = NULL;
 296         int failed;
 297         char *nlp;
 298         wchar_t lc;
 299         int cw;
 300 
 301         if (buf == NULL) {
 302                 fw_lBufsiz = BUFSIZ;
 303                 if ((buf = malloc(fw_lBufsiz + BUFSIZ)) == NULL) {
 304                         exit(2); /* out of memory */
 305                 }
 306         }
 307 
 308         if (file) {
 309                 if ((fptr = fopen(file, "r")) == NULL) {
 310                         (void) fprintf(stderr,
 311                                 gettext("fgrep: can't open %s\n"), file);
 312                         retcode = 2;
 313                         return;
 314                 }
 315         } else {
 316                 file = "<stdin>";
 317                 fptr = stdin;
 318         }
 319         ccount = 0;
 320         failed = 0;
 321         lnum = 1;
 322         tln = 0;
 323         blkno = 0;
 324         p = buf;
 325         nlp = p;
 326         c = w;
 327         for (;;) {
 328                 if (c == 0)
 329                         break;
 330                 if (ccount <= 0) {
 331                         if (p >= &buf[fw_lBufsiz + BUFSIZ]) {
 332                                 if (nlp == buf) {
 333                                         /* increase the buffer size */
 334                                         fw_lBufsiz += BUFSIZ;
 335                                         if ((buf = realloc(buf,
 336                                                 fw_lBufsiz + BUFSIZ)) == NULL) {
 337                                                 exit(2); /* out of memory */
 338                                         }
 339                                         nlp = buf;
 340                                         p = &buf[fw_lBufsiz];
 341                                 } else {
 342                                         /* shift the buffer down */
 343                                         (void) memmove(buf, nlp,
 344                                                 &buf[fw_lBufsiz + BUFSIZ]
 345                                                 - nlp);
 346                                         p -= nlp - buf;
 347                                         nlp = buf;
 348                                 }
 349 
 350                         }
 351                         if (p > &buf[fw_lBufsiz]) {
 352                                 if ((ccount = fread(p, sizeof (char),
 353                                         &buf[fw_lBufsiz + BUFSIZ] - p, fptr))
 354                                         <= 0)
 355                                         break;
 356                         } else if ((ccount = fread(p, sizeof (char),
 357                                 BUFSIZ, fptr)) <= 0)
 358                                 break;
 359                         blkno += (long long)ccount;
 360                 }
 361                 GETONE(lc, p);
 362 nstate:
 363                 if (same(c->inp, lc)) {
 364                         c = c->nst;
 365                 } else if (c->link != 0) {
 366                         c = c->link;
 367                         goto nstate;
 368                 } else {
 369                         c = c->fail;
 370                         failed = 1;
 371                         if (c == 0) {
 372                                 c = w;
 373 istate:
 374                                 if (same(c->inp, lc)) {
 375                                         c = c->nst;
 376                                 } else if (c->link != 0) {
 377                                         c = c->link;
 378                                         goto istate;
 379                                 }
 380                         } else
 381                                 goto nstate;
 382                 }
 383 
 384                 if (c == 0)
 385                         break;
 386 
 387                 if (c->out) {
 388                         while (lc != '\n') {
 389                                 if (ccount <= 0) {
 390 if (p == &buf[fw_lBufsiz + BUFSIZ]) {
 391         if (nlp == buf) {
 392                 /* increase buffer size */
 393                 fw_lBufsiz += BUFSIZ;
 394                 if ((buf = realloc(buf, fw_lBufsiz + BUFSIZ)) == NULL) {
 395                         exit(2); /* out of memory */
 396                 }
 397                 nlp = buf;
 398                 p = &buf[fw_lBufsiz];
 399         } else {
 400                 /* shift buffer down */
 401                 (void) memmove(buf, nlp, &buf[fw_lBufsiz + BUFSIZ] - nlp);
 402                 p -= nlp - buf;
 403                 nlp = buf;
 404         }
 405 }
 406 if (p > &buf[fw_lBufsiz]) {
 407         if ((ccount = fread(p, sizeof (char),
 408                 &buf[fw_lBufsiz + BUFSIZ] - p, fptr)) <= 0) break;
 409         } else if ((ccount = fread(p, sizeof (char), BUFSIZ,
 410                 fptr)) <= 0) break;
 411                 blkno += (long long)ccount;
 412         }
 413         GETONE(lc, p);
 414 }
 415                         if ((vflag && (failed == 0 || xflag == 0)) ||
 416                                 (vflag == 0 && xflag && failed))
 417                                 goto nomatch;
 418 succeed:
 419                         nsucc = 1;
 420                         if (cflag)
 421                                 tln++;
 422                         else if (lflag && !sflag) {
 423                                 (void) printf("%s\n", file);
 424                                 (void) fclose(fptr);
 425                                 return;
 426                         } else if (!sflag) {
 427                                 if (nfile > 1 && !hflag)
 428                                         (void) printf("%s:", file);
 429                                 if (bflag)
 430                                         (void) printf("%lld:",
 431                                                 (blkno - (long long)(ccount-1))
 432                                                 / BUFSIZ);
 433                                 if (nflag)
 434                                         (void) printf("%lld:", lnum);
 435                                 if (p <= nlp) {
 436                                         while (nlp < &buf[fw_lBufsiz + BUFSIZ])
 437                                                 (void) putchar(*nlp++);
 438                                         nlp = buf;
 439                                 }
 440                                 while (nlp < p)
 441                                         (void) putchar(*nlp++);
 442                         }
 443 nomatch:
 444                         lnum++;
 445                         nlp = p;
 446                         c = w;
 447                         failed = 0;
 448                         continue;
 449                 }
 450                 if (lc == '\n')
 451                         if (vflag)
 452                                 goto succeed;
 453                         else {
 454                                 lnum++;
 455                                 nlp = p;
 456                                 c = w;
 457                                 failed = 0;
 458                         }
 459         }
 460         (void) fclose(fptr);
 461         if (cflag) {
 462                 if ((nfile > 1) && !hflag)
 463                         (void) printf("%s:", file);
 464                 (void) printf("%lld\n", tln);
 465         }
 466 }
 467 
 468 
 469 wchar_t
 470 getargc(void)
 471 {
 472         /* appends a newline to shell quoted argument list so */
 473         /* the list looks like it came from an ed style file  */
 474         wchar_t c;
 475         int cw;
 476         int b;
 477         static int endflg;
 478 
 479 
 480         if (wordf) {
 481                 if ((b = getc(wordf)) == EOF)
 482                         return (EOF);
 483                 cw = ISASCII(c = (wchar_t)b) ? 1 :
 484                         (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
 485                 while (--cw) {
 486                         if ((b = getc(wordf)) == EOF)
 487                                 return (EOF);
 488                         c = (c << 7) | (b & 0177);
 489                 }
 490                 return (iflag ? letter(c) : c);
 491         }
 492 
 493         if (endflg)
 494                 return (EOF);
 495 
 496         {
 497                 cw = ISASCII(c = (unsigned char)*argptr++) ? 1 :
 498                         (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
 499 
 500                 while (--cw)
 501                         c = (c << 7) | ((*argptr++) & 0177);
 502                 if (c == '\0') {
 503                         endflg++;
 504                         return ('\n');
 505                 }
 506         }
 507         return (iflag ? letter(c) : c);
 508 
 509 
 510 }
 511 
 512 void
 513 cgotofn(void)
 514 {
 515         int c;
 516         struct words *s;
 517 
 518         s = smax = w;
 519 nword:
 520         for (;;) {
 521                 c = getargc();
 522                 if (c == EOF)
 523                         return;
 524                 if (c == 0)
 525                         goto enter;
 526                 if (c == '\n') {
 527                         if (xflag) {
 528                                 for (;;) {
 529                                         if (s->inp == c) {
 530                                                 s = s->nst;
 531                                                 break;
 532                                         }
 533                                         if (s->inp == 0)
 534                                                 goto nenter;
 535                                         if (s->link == 0) {
 536                                                 if (smax >= &w[input_size -1])
 537                                                         overflo();
 538                                                 s->link = ++smax;
 539                                                 s = smax;
 540                                                 goto nenter;
 541                                         }
 542                                         s = s->link;
 543                                 }
 544                         }
 545                         s->out = 1;
 546                         s = w;
 547                 } else {
 548 loop:
 549                         if (s->inp == c) {
 550                                 s = s->nst;
 551                                 continue;
 552                         }
 553                         if (s->inp == 0)
 554                                 goto enter;
 555                         if (s->link == 0) {
 556                                 if (smax >= &w[input_size -1])
 557                                         overflo();
 558                                 s->link = ++smax;
 559                                 s = smax;
 560                                 goto enter;
 561                         }
 562                         s = s->link;
 563                         goto loop;
 564                 }
 565         }
 566 
 567 enter:
 568         do {
 569                 s->inp = c;
 570                 if (smax >= &w[input_size -1])
 571                         overflo();
 572                 s->nst = ++smax;
 573                 s = smax;
 574         } while ((c = getargc()) != '\n' && c != EOF);
 575         if (xflag) {
 576 nenter:
 577                 s->inp = '\n';
 578                 if (smax >= &w[input_size -1])
 579                         overflo();
 580                 s->nst = ++smax;
 581         }
 582         smax->out = 1;
 583         s = w;
 584         if (c != EOF)
 585                 goto nword;
 586 }
 587 
 588 /*
 589  * This function is an unexpected condition, since input_size should have been
 590  * calculated correctly before hand.
 591  */
 592 
 593 void
 594 overflo(void)
 595 {
 596         (void) fprintf(stderr, gettext("fgrep: wordlist too large\n"));
 597         exit(2);
 598 }
 599 
 600 void
 601 cfail(void)
 602 {
 603         int qsize = QSIZE;
 604         struct words **queue = NULL;
 605 
 606         /*
 607          * front and rear are pointers used to traverse the global words
 608          * structure "w" which contains the data of input pattern file
 609          */
 610         struct words **front, **rear;
 611         struct words *state;
 612         unsigned long frontoffset = 0, rearoffset = 0;
 613         char c;
 614         struct words *s;
 615         s = w;
 616         if ((queue = (struct words **)calloc(qsize, sizeof (struct words *)))
 617                                 == NULL) {
 618                 perror("fgrep");
 619                 exit(2);
 620         }
 621         front = rear = queue;
 622 init:
 623         if ((s->inp) != 0) {
 624                 *rear++ = s->nst;
 625         /*
 626          * Reallocates the queue if the number of distinct starting
 627          * character of patterns exceeds the qsize value
 628          */
 629                 if (rear >= &queue[qsize - 1]) {
 630                         frontoffset = front - queue;
 631                         rearoffset = rear - queue;
 632                         qsize += QSIZE;
 633                         if ((queue = (struct words **)realloc(queue,
 634                                 qsize * sizeof (struct words *))) == NULL) {
 635                                 perror("fgrep");
 636                                 exit(2);
 637                         }
 638                         front = queue + frontoffset;
 639                         rear = queue + rearoffset;
 640                 }
 641         }
 642         if ((s = s->link) != 0) {
 643                 goto init;
 644         }
 645 
 646         while (rear != front) {
 647                 s = *front++;
 648 cloop:
 649                 if ((c = s->inp) != 0) {
 650                         *rear++ = (q = s->nst);
 651                 /*
 652                  * Reallocate the queue if the rear pointer reaches the end
 653                  * queue
 654                  */
 655                         if (rear >= &queue[qsize - 1]) {
 656                                 frontoffset = front - queue;
 657                                 rearoffset = rear - queue;
 658                                 qsize += QSIZE;
 659                                 if ((queue = (struct words **)realloc(queue,
 660                                     qsize * sizeof (struct words *))) == NULL) {
 661                                         perror("fgrep");
 662                                         exit(2);
 663                                 }
 664                                 front = queue + frontoffset;
 665                                 rear = queue + rearoffset;
 666                         }
 667                         state = s->fail;
 668 floop:
 669                         if (state == 0)
 670                                 state = w;
 671                         if (state->inp == c) {
 672 qloop:
 673                                 q->fail = state->nst;
 674                                 if ((state->nst)->out == 1)
 675                                         q->out = 1;
 676                                 if ((q = q->link) != 0)
 677                                         goto qloop;
 678                         } else if ((state = state->link) != 0)
 679                                 goto floop;
 680                 }
 681                 if ((s = s->link) != 0)
 682                         goto cloop;
 683         }
 684 }
 685 
 686 wchar_t
 687 letter(wchar_t c)
 688 {
 689         if (c >= 'a' && c <= 'z')
 690                 return (c);
 691         if (c >= 'A' && c <= 'Z')
 692                 return (c + 'a' - 'A');
 693         return (c);
 694 }